1321369Sdim//===- TailDuplicator.cpp - Duplicate blocks into predecessors' tails -----===// 2303231Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6303231Sdim// 7303231Sdim//===----------------------------------------------------------------------===// 8303231Sdim// 9303231Sdim// This utility class duplicates basic blocks ending in unconditional branches 10303231Sdim// into the tails of their predecessors. 11303231Sdim// 12303231Sdim//===----------------------------------------------------------------------===// 13303231Sdim 14327952Sdim#include "llvm/CodeGen/TailDuplicator.h" 15321369Sdim#include "llvm/ADT/DenseMap.h" 16303231Sdim#include "llvm/ADT/DenseSet.h" 17327952Sdim#include "llvm/ADT/STLExtras.h" 18303231Sdim#include "llvm/ADT/SetVector.h" 19321369Sdim#include "llvm/ADT/SmallPtrSet.h" 20321369Sdim#include "llvm/ADT/SmallVector.h" 21303231Sdim#include "llvm/ADT/Statistic.h" 22360784Sdim#include "llvm/Analysis/ProfileSummaryInfo.h" 23321369Sdim#include "llvm/CodeGen/MachineBasicBlock.h" 24303231Sdim#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" 25360784Sdim#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" 26321369Sdim#include "llvm/CodeGen/MachineFunction.h" 27321369Sdim#include "llvm/CodeGen/MachineInstr.h" 28303231Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 29321369Sdim#include "llvm/CodeGen/MachineOperand.h" 30321369Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 31360784Sdim#include "llvm/CodeGen/MachineSizeOpts.h" 32321369Sdim#include "llvm/CodeGen/MachineSSAUpdater.h" 33327952Sdim#include "llvm/CodeGen/TargetInstrInfo.h" 34327952Sdim#include "llvm/CodeGen/TargetRegisterInfo.h" 35327952Sdim#include "llvm/CodeGen/TargetSubtargetInfo.h" 36321369Sdim#include "llvm/IR/DebugLoc.h" 37303231Sdim#include "llvm/IR/Function.h" 38303231Sdim#include "llvm/Support/CommandLine.h" 39303231Sdim#include "llvm/Support/Debug.h" 40303231Sdim#include "llvm/Support/ErrorHandling.h" 41303231Sdim#include "llvm/Support/raw_ostream.h" 42341825Sdim#include "llvm/Target/TargetMachine.h" 43321369Sdim#include <algorithm> 44321369Sdim#include <cassert> 45321369Sdim#include <iterator> 46321369Sdim#include <utility> 47321369Sdim 48303231Sdimusing namespace llvm; 49303231Sdim 50303231Sdim#define DEBUG_TYPE "tailduplication" 51303231Sdim 52303231SdimSTATISTIC(NumTails, "Number of tails duplicated"); 53303231SdimSTATISTIC(NumTailDups, "Number of tail duplicated blocks"); 54303231SdimSTATISTIC(NumTailDupAdded, 55303231Sdim "Number of instructions added due to tail duplication"); 56303231SdimSTATISTIC(NumTailDupRemoved, 57303231Sdim "Number of instructions removed due to tail duplication"); 58303231SdimSTATISTIC(NumDeadBlocks, "Number of dead blocks removed"); 59303231SdimSTATISTIC(NumAddedPHIs, "Number of phis added"); 60303231Sdim 61303231Sdim// Heuristic for tail duplication. 62303231Sdimstatic cl::opt<unsigned> TailDuplicateSize( 63303231Sdim "tail-dup-size", 64303231Sdim cl::desc("Maximum instructions to consider tail duplicating"), cl::init(2), 65303231Sdim cl::Hidden); 66303231Sdim 67321369Sdimstatic cl::opt<unsigned> TailDupIndirectBranchSize( 68314564Sdim "tail-dup-indirect-size", 69314564Sdim cl::desc("Maximum instructions to consider tail duplicating blocks that " 70314564Sdim "end with indirect branches."), cl::init(20), 71314564Sdim cl::Hidden); 72314564Sdim 73303231Sdimstatic cl::opt<bool> 74303231Sdim TailDupVerify("tail-dup-verify", 75303231Sdim cl::desc("Verify sanity of PHI instructions during taildup"), 76303231Sdim cl::init(false), cl::Hidden); 77303231Sdim 78303231Sdimstatic cl::opt<unsigned> TailDupLimit("tail-dup-limit", cl::init(~0U), 79303231Sdim cl::Hidden); 80303231Sdim 81327952Sdimvoid TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc, 82314564Sdim const MachineBranchProbabilityInfo *MBPIin, 83360784Sdim const MachineBlockFrequencyInfo *MBFIin, 84360784Sdim ProfileSummaryInfo *PSIin, 85314564Sdim bool LayoutModeIn, unsigned TailDupSizeIn) { 86314564Sdim MF = &MFin; 87314564Sdim TII = MF->getSubtarget().getInstrInfo(); 88314564Sdim TRI = MF->getSubtarget().getRegisterInfo(); 89314564Sdim MRI = &MF->getRegInfo(); 90314564Sdim MMI = &MF->getMMI(); 91303231Sdim MBPI = MBPIin; 92360784Sdim MBFI = MBFIin; 93360784Sdim PSI = PSIin; 94314564Sdim TailDupSize = TailDupSizeIn; 95303231Sdim 96303231Sdim assert(MBPI != nullptr && "Machine Branch Probability Info required"); 97303231Sdim 98314564Sdim LayoutMode = LayoutModeIn; 99327952Sdim this->PreRegAlloc = PreRegAlloc; 100303231Sdim} 101303231Sdim 102303231Sdimstatic void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { 103303231Sdim for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) { 104303231Sdim MachineBasicBlock *MBB = &*I; 105303231Sdim SmallSetVector<MachineBasicBlock *, 8> Preds(MBB->pred_begin(), 106303231Sdim MBB->pred_end()); 107303231Sdim MachineBasicBlock::iterator MI = MBB->begin(); 108303231Sdim while (MI != MBB->end()) { 109303231Sdim if (!MI->isPHI()) 110303231Sdim break; 111314564Sdim for (MachineBasicBlock *PredBB : Preds) { 112303231Sdim bool Found = false; 113303231Sdim for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { 114303231Sdim MachineBasicBlock *PHIBB = MI->getOperand(i + 1).getMBB(); 115303231Sdim if (PHIBB == PredBB) { 116303231Sdim Found = true; 117303231Sdim break; 118303231Sdim } 119303231Sdim } 120303231Sdim if (!Found) { 121327952Sdim dbgs() << "Malformed PHI in " << printMBBReference(*MBB) << ": " 122327952Sdim << *MI; 123327952Sdim dbgs() << " missing input from predecessor " 124327952Sdim << printMBBReference(*PredBB) << '\n'; 125303231Sdim llvm_unreachable(nullptr); 126303231Sdim } 127303231Sdim } 128303231Sdim 129303231Sdim for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { 130303231Sdim MachineBasicBlock *PHIBB = MI->getOperand(i + 1).getMBB(); 131303231Sdim if (CheckExtra && !Preds.count(PHIBB)) { 132327952Sdim dbgs() << "Warning: malformed PHI in " << printMBBReference(*MBB) 133327952Sdim << ": " << *MI; 134327952Sdim dbgs() << " extra input from predecessor " 135327952Sdim << printMBBReference(*PHIBB) << '\n'; 136303231Sdim llvm_unreachable(nullptr); 137303231Sdim } 138303231Sdim if (PHIBB->getNumber() < 0) { 139327952Sdim dbgs() << "Malformed PHI in " << printMBBReference(*MBB) << ": " 140327952Sdim << *MI; 141327952Sdim dbgs() << " non-existing " << printMBBReference(*PHIBB) << '\n'; 142303231Sdim llvm_unreachable(nullptr); 143303231Sdim } 144303231Sdim } 145303231Sdim ++MI; 146303231Sdim } 147303231Sdim } 148303231Sdim} 149303231Sdim 150303231Sdim/// Tail duplicate the block and cleanup. 151314564Sdim/// \p IsSimple - return value of isSimpleBB 152314564Sdim/// \p MBB - block to be duplicated 153314564Sdim/// \p ForcedLayoutPred - If non-null, treat this block as the layout 154314564Sdim/// predecessor, instead of using the ordering in MF 155314564Sdim/// \p DuplicatedPreds - if non-null, \p DuplicatedPreds will contain a list of 156314564Sdim/// all Preds that received a copy of \p MBB. 157314564Sdim/// \p RemovalCallback - if non-null, called just before MBB is deleted. 158314564Sdimbool TailDuplicator::tailDuplicateAndUpdate( 159314564Sdim bool IsSimple, MachineBasicBlock *MBB, 160314564Sdim MachineBasicBlock *ForcedLayoutPred, 161314564Sdim SmallVectorImpl<MachineBasicBlock*> *DuplicatedPreds, 162321369Sdim function_ref<void(MachineBasicBlock *)> *RemovalCallback) { 163303231Sdim // Save the successors list. 164303231Sdim SmallSetVector<MachineBasicBlock *, 8> Succs(MBB->succ_begin(), 165303231Sdim MBB->succ_end()); 166303231Sdim 167303231Sdim SmallVector<MachineBasicBlock *, 8> TDBBs; 168303231Sdim SmallVector<MachineInstr *, 16> Copies; 169314564Sdim if (!tailDuplicate(IsSimple, MBB, ForcedLayoutPred, TDBBs, Copies)) 170303231Sdim return false; 171303231Sdim 172303231Sdim ++NumTails; 173303231Sdim 174303231Sdim SmallVector<MachineInstr *, 8> NewPHIs; 175314564Sdim MachineSSAUpdater SSAUpdate(*MF, &NewPHIs); 176303231Sdim 177303231Sdim // TailBB's immediate successors are now successors of those predecessors 178303231Sdim // which duplicated TailBB. Add the predecessors as sources to the PHI 179303231Sdim // instructions. 180303231Sdim bool isDead = MBB->pred_empty() && !MBB->hasAddressTaken(); 181303231Sdim if (PreRegAlloc) 182303231Sdim updateSuccessorsPHIs(MBB, isDead, TDBBs, Succs); 183303231Sdim 184303231Sdim // If it is dead, remove it. 185303231Sdim if (isDead) { 186303231Sdim NumTailDupRemoved += MBB->size(); 187314564Sdim removeDeadBlock(MBB, RemovalCallback); 188303231Sdim ++NumDeadBlocks; 189303231Sdim } 190303231Sdim 191303231Sdim // Update SSA form. 192303231Sdim if (!SSAUpdateVRs.empty()) { 193303231Sdim for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) { 194303231Sdim unsigned VReg = SSAUpdateVRs[i]; 195303231Sdim SSAUpdate.Initialize(VReg); 196303231Sdim 197303231Sdim // If the original definition is still around, add it as an available 198303231Sdim // value. 199303231Sdim MachineInstr *DefMI = MRI->getVRegDef(VReg); 200303231Sdim MachineBasicBlock *DefBB = nullptr; 201303231Sdim if (DefMI) { 202303231Sdim DefBB = DefMI->getParent(); 203303231Sdim SSAUpdate.AddAvailableValue(DefBB, VReg); 204303231Sdim } 205303231Sdim 206303231Sdim // Add the new vregs as available values. 207303231Sdim DenseMap<unsigned, AvailableValsTy>::iterator LI = 208303231Sdim SSAUpdateVals.find(VReg); 209303231Sdim for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { 210303231Sdim MachineBasicBlock *SrcBB = LI->second[j].first; 211303231Sdim unsigned SrcReg = LI->second[j].second; 212303231Sdim SSAUpdate.AddAvailableValue(SrcBB, SrcReg); 213303231Sdim } 214303231Sdim 215303231Sdim // Rewrite uses that are outside of the original def's block. 216303231Sdim MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg); 217303231Sdim while (UI != MRI->use_end()) { 218303231Sdim MachineOperand &UseMO = *UI; 219303231Sdim MachineInstr *UseMI = UseMO.getParent(); 220303231Sdim ++UI; 221303231Sdim if (UseMI->isDebugValue()) { 222303231Sdim // SSAUpdate can replace the use with an undef. That creates 223303231Sdim // a debug instruction that is a kill. 224303231Sdim // FIXME: Should it SSAUpdate job to delete debug instructions 225303231Sdim // instead of replacing the use with undef? 226303231Sdim UseMI->eraseFromParent(); 227303231Sdim continue; 228303231Sdim } 229303231Sdim if (UseMI->getParent() == DefBB && !UseMI->isPHI()) 230303231Sdim continue; 231303231Sdim SSAUpdate.RewriteUse(UseMO); 232303231Sdim } 233303231Sdim } 234303231Sdim 235303231Sdim SSAUpdateVRs.clear(); 236303231Sdim SSAUpdateVals.clear(); 237303231Sdim } 238303231Sdim 239303231Sdim // Eliminate some of the copies inserted by tail duplication to maintain 240303231Sdim // SSA form. 241303231Sdim for (unsigned i = 0, e = Copies.size(); i != e; ++i) { 242303231Sdim MachineInstr *Copy = Copies[i]; 243303231Sdim if (!Copy->isCopy()) 244303231Sdim continue; 245360784Sdim Register Dst = Copy->getOperand(0).getReg(); 246360784Sdim Register Src = Copy->getOperand(1).getReg(); 247303231Sdim if (MRI->hasOneNonDBGUse(Src) && 248303231Sdim MRI->constrainRegClass(Src, MRI->getRegClass(Dst))) { 249303231Sdim // Copy is the only use. Do trivial copy propagation here. 250303231Sdim MRI->replaceRegWith(Dst, Src); 251303231Sdim Copy->eraseFromParent(); 252303231Sdim } 253303231Sdim } 254303231Sdim 255303231Sdim if (NewPHIs.size()) 256303231Sdim NumAddedPHIs += NewPHIs.size(); 257303231Sdim 258314564Sdim if (DuplicatedPreds) 259314564Sdim *DuplicatedPreds = std::move(TDBBs); 260314564Sdim 261303231Sdim return true; 262303231Sdim} 263303231Sdim 264303231Sdim/// Look for small blocks that are unconditionally branched to and do not fall 265303231Sdim/// through. Tail-duplicate their instructions into their predecessors to 266303231Sdim/// eliminate (dynamic) branches. 267314564Sdimbool TailDuplicator::tailDuplicateBlocks() { 268303231Sdim bool MadeChange = false; 269303231Sdim 270303231Sdim if (PreRegAlloc && TailDupVerify) { 271341825Sdim LLVM_DEBUG(dbgs() << "\n*** Before tail-duplicating\n"); 272314564Sdim VerifyPHIs(*MF, true); 273303231Sdim } 274303231Sdim 275314564Sdim for (MachineFunction::iterator I = ++MF->begin(), E = MF->end(); I != E;) { 276303231Sdim MachineBasicBlock *MBB = &*I++; 277303231Sdim 278303231Sdim if (NumTails == TailDupLimit) 279303231Sdim break; 280303231Sdim 281303231Sdim bool IsSimple = isSimpleBB(MBB); 282303231Sdim 283314564Sdim if (!shouldTailDuplicate(IsSimple, *MBB)) 284303231Sdim continue; 285303231Sdim 286314564Sdim MadeChange |= tailDuplicateAndUpdate(IsSimple, MBB, nullptr); 287303231Sdim } 288303231Sdim 289303231Sdim if (PreRegAlloc && TailDupVerify) 290314564Sdim VerifyPHIs(*MF, false); 291303231Sdim 292303231Sdim return MadeChange; 293303231Sdim} 294303231Sdim 295303231Sdimstatic bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB, 296303231Sdim const MachineRegisterInfo *MRI) { 297303231Sdim for (MachineInstr &UseMI : MRI->use_instructions(Reg)) { 298303231Sdim if (UseMI.isDebugValue()) 299303231Sdim continue; 300303231Sdim if (UseMI.getParent() != BB) 301303231Sdim return true; 302303231Sdim } 303303231Sdim return false; 304303231Sdim} 305303231Sdim 306303231Sdimstatic unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) { 307303231Sdim for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) 308303231Sdim if (MI->getOperand(i + 1).getMBB() == SrcBB) 309303231Sdim return i; 310303231Sdim return 0; 311303231Sdim} 312303231Sdim 313303231Sdim// Remember which registers are used by phis in this block. This is 314303231Sdim// used to determine which registers are liveout while modifying the 315303231Sdim// block (which is why we need to copy the information). 316303231Sdimstatic void getRegsUsedByPHIs(const MachineBasicBlock &BB, 317303231Sdim DenseSet<unsigned> *UsedByPhi) { 318303231Sdim for (const auto &MI : BB) { 319303231Sdim if (!MI.isPHI()) 320303231Sdim break; 321303231Sdim for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) { 322360784Sdim Register SrcReg = MI.getOperand(i).getReg(); 323303231Sdim UsedByPhi->insert(SrcReg); 324303231Sdim } 325303231Sdim } 326303231Sdim} 327303231Sdim 328303231Sdim/// Add a definition and source virtual registers pair for SSA update. 329303231Sdimvoid TailDuplicator::addSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, 330303231Sdim MachineBasicBlock *BB) { 331303231Sdim DenseMap<unsigned, AvailableValsTy>::iterator LI = 332303231Sdim SSAUpdateVals.find(OrigReg); 333303231Sdim if (LI != SSAUpdateVals.end()) 334303231Sdim LI->second.push_back(std::make_pair(BB, NewReg)); 335303231Sdim else { 336303231Sdim AvailableValsTy Vals; 337303231Sdim Vals.push_back(std::make_pair(BB, NewReg)); 338303231Sdim SSAUpdateVals.insert(std::make_pair(OrigReg, Vals)); 339303231Sdim SSAUpdateVRs.push_back(OrigReg); 340303231Sdim } 341303231Sdim} 342303231Sdim 343303231Sdim/// Process PHI node in TailBB by turning it into a copy in PredBB. Remember the 344303231Sdim/// source register that's contributed by PredBB and update SSA update map. 345303231Sdimvoid TailDuplicator::processPHI( 346303231Sdim MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, 347303231Sdim DenseMap<unsigned, RegSubRegPair> &LocalVRMap, 348303231Sdim SmallVectorImpl<std::pair<unsigned, RegSubRegPair>> &Copies, 349303231Sdim const DenseSet<unsigned> &RegsUsedByPhi, bool Remove) { 350360784Sdim Register DefReg = MI->getOperand(0).getReg(); 351303231Sdim unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB); 352303231Sdim assert(SrcOpIdx && "Unable to find matching PHI source?"); 353360784Sdim Register SrcReg = MI->getOperand(SrcOpIdx).getReg(); 354303231Sdim unsigned SrcSubReg = MI->getOperand(SrcOpIdx).getSubReg(); 355303231Sdim const TargetRegisterClass *RC = MRI->getRegClass(DefReg); 356303231Sdim LocalVRMap.insert(std::make_pair(DefReg, RegSubRegPair(SrcReg, SrcSubReg))); 357303231Sdim 358303231Sdim // Insert a copy from source to the end of the block. The def register is the 359303231Sdim // available value liveout of the block. 360360784Sdim Register NewDef = MRI->createVirtualRegister(RC); 361303231Sdim Copies.push_back(std::make_pair(NewDef, RegSubRegPair(SrcReg, SrcSubReg))); 362303231Sdim if (isDefLiveOut(DefReg, TailBB, MRI) || RegsUsedByPhi.count(DefReg)) 363303231Sdim addSSAUpdateEntry(DefReg, NewDef, PredBB); 364303231Sdim 365303231Sdim if (!Remove) 366303231Sdim return; 367303231Sdim 368303231Sdim // Remove PredBB from the PHI node. 369303231Sdim MI->RemoveOperand(SrcOpIdx + 1); 370303231Sdim MI->RemoveOperand(SrcOpIdx); 371303231Sdim if (MI->getNumOperands() == 1) 372303231Sdim MI->eraseFromParent(); 373303231Sdim} 374303231Sdim 375303231Sdim/// Duplicate a TailBB instruction to PredBB and update 376303231Sdim/// the source operands due to earlier PHI translation. 377303231Sdimvoid TailDuplicator::duplicateInstruction( 378303231Sdim MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, 379303231Sdim DenseMap<unsigned, RegSubRegPair> &LocalVRMap, 380303231Sdim const DenseSet<unsigned> &UsedByPhi) { 381341825Sdim // Allow duplication of CFI instructions. 382341825Sdim if (MI->isCFIInstruction()) { 383341825Sdim BuildMI(*PredBB, PredBB->end(), PredBB->findDebugLoc(PredBB->begin()), 384341825Sdim TII->get(TargetOpcode::CFI_INSTRUCTION)).addCFIIndex( 385341825Sdim MI->getOperand(0).getCFIIndex()); 386341825Sdim return; 387341825Sdim } 388327952Sdim MachineInstr &NewMI = TII->duplicate(*PredBB, PredBB->end(), *MI); 389303231Sdim if (PreRegAlloc) { 390327952Sdim for (unsigned i = 0, e = NewMI.getNumOperands(); i != e; ++i) { 391327952Sdim MachineOperand &MO = NewMI.getOperand(i); 392303231Sdim if (!MO.isReg()) 393303231Sdim continue; 394360784Sdim Register Reg = MO.getReg(); 395360784Sdim if (!Register::isVirtualRegister(Reg)) 396303231Sdim continue; 397303231Sdim if (MO.isDef()) { 398303231Sdim const TargetRegisterClass *RC = MRI->getRegClass(Reg); 399360784Sdim Register NewReg = MRI->createVirtualRegister(RC); 400303231Sdim MO.setReg(NewReg); 401303231Sdim LocalVRMap.insert(std::make_pair(Reg, RegSubRegPair(NewReg, 0))); 402303231Sdim if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg)) 403303231Sdim addSSAUpdateEntry(Reg, NewReg, PredBB); 404303231Sdim } else { 405303231Sdim auto VI = LocalVRMap.find(Reg); 406303231Sdim if (VI != LocalVRMap.end()) { 407303231Sdim // Need to make sure that the register class of the mapped register 408303231Sdim // will satisfy the constraints of the class of the register being 409303231Sdim // replaced. 410303231Sdim auto *OrigRC = MRI->getRegClass(Reg); 411303231Sdim auto *MappedRC = MRI->getRegClass(VI->second.Reg); 412303231Sdim const TargetRegisterClass *ConstrRC; 413303231Sdim if (VI->second.SubReg != 0) { 414303231Sdim ConstrRC = TRI->getMatchingSuperRegClass(MappedRC, OrigRC, 415303231Sdim VI->second.SubReg); 416303231Sdim if (ConstrRC) { 417303231Sdim // The actual constraining (as in "find appropriate new class") 418303231Sdim // is done by getMatchingSuperRegClass, so now we only need to 419303231Sdim // change the class of the mapped register. 420303231Sdim MRI->setRegClass(VI->second.Reg, ConstrRC); 421303231Sdim } 422303231Sdim } else { 423303231Sdim // For mapped registers that do not have sub-registers, simply 424303231Sdim // restrict their class to match the original one. 425303231Sdim ConstrRC = MRI->constrainRegClass(VI->second.Reg, OrigRC); 426303231Sdim } 427303231Sdim 428303231Sdim if (ConstrRC) { 429303231Sdim // If the class constraining succeeded, we can simply replace 430303231Sdim // the old register with the mapped one. 431303231Sdim MO.setReg(VI->second.Reg); 432303231Sdim // We have Reg -> VI.Reg:VI.SubReg, so if Reg is used with a 433303231Sdim // sub-register, we need to compose the sub-register indices. 434303231Sdim MO.setSubReg(TRI->composeSubRegIndices(MO.getSubReg(), 435303231Sdim VI->second.SubReg)); 436303231Sdim } else { 437303231Sdim // The direct replacement is not possible, due to failing register 438303231Sdim // class constraints. An explicit COPY is necessary. Create one 439303231Sdim // that can be reused 440303231Sdim auto *NewRC = MI->getRegClassConstraint(i, TII, TRI); 441303231Sdim if (NewRC == nullptr) 442303231Sdim NewRC = OrigRC; 443360784Sdim Register NewReg = MRI->createVirtualRegister(NewRC); 444353358Sdim BuildMI(*PredBB, NewMI, NewMI.getDebugLoc(), 445303231Sdim TII->get(TargetOpcode::COPY), NewReg) 446303231Sdim .addReg(VI->second.Reg, 0, VI->second.SubReg); 447303231Sdim LocalVRMap.erase(VI); 448303231Sdim LocalVRMap.insert(std::make_pair(Reg, RegSubRegPair(NewReg, 0))); 449303231Sdim MO.setReg(NewReg); 450303231Sdim // The composed VI.Reg:VI.SubReg is replaced with NewReg, which 451303231Sdim // is equivalent to the whole register Reg. Hence, Reg:subreg 452303231Sdim // is same as NewReg:subreg, so keep the sub-register index 453303231Sdim // unchanged. 454303231Sdim } 455303231Sdim // Clear any kill flags from this operand. The new register could 456303231Sdim // have uses after this one, so kills are not valid here. 457303231Sdim MO.setIsKill(false); 458303231Sdim } 459303231Sdim } 460303231Sdim } 461303231Sdim } 462303231Sdim} 463303231Sdim 464303231Sdim/// After FromBB is tail duplicated into its predecessor blocks, the successors 465303231Sdim/// have gained new predecessors. Update the PHI instructions in them 466303231Sdim/// accordingly. 467303231Sdimvoid TailDuplicator::updateSuccessorsPHIs( 468303231Sdim MachineBasicBlock *FromBB, bool isDead, 469303231Sdim SmallVectorImpl<MachineBasicBlock *> &TDBBs, 470303231Sdim SmallSetVector<MachineBasicBlock *, 8> &Succs) { 471314564Sdim for (MachineBasicBlock *SuccBB : Succs) { 472314564Sdim for (MachineInstr &MI : *SuccBB) { 473314564Sdim if (!MI.isPHI()) 474303231Sdim break; 475314564Sdim MachineInstrBuilder MIB(*FromBB->getParent(), MI); 476303231Sdim unsigned Idx = 0; 477314564Sdim for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) { 478314564Sdim MachineOperand &MO = MI.getOperand(i + 1); 479303231Sdim if (MO.getMBB() == FromBB) { 480303231Sdim Idx = i; 481303231Sdim break; 482303231Sdim } 483303231Sdim } 484303231Sdim 485303231Sdim assert(Idx != 0); 486314564Sdim MachineOperand &MO0 = MI.getOperand(Idx); 487360784Sdim Register Reg = MO0.getReg(); 488303231Sdim if (isDead) { 489303231Sdim // Folded into the previous BB. 490303231Sdim // There could be duplicate phi source entries. FIXME: Should sdisel 491303231Sdim // or earlier pass fixed this? 492314564Sdim for (unsigned i = MI.getNumOperands() - 2; i != Idx; i -= 2) { 493314564Sdim MachineOperand &MO = MI.getOperand(i + 1); 494303231Sdim if (MO.getMBB() == FromBB) { 495314564Sdim MI.RemoveOperand(i + 1); 496314564Sdim MI.RemoveOperand(i); 497303231Sdim } 498303231Sdim } 499303231Sdim } else 500303231Sdim Idx = 0; 501303231Sdim 502303231Sdim // If Idx is set, the operands at Idx and Idx+1 must be removed. 503303231Sdim // We reuse the location to avoid expensive RemoveOperand calls. 504303231Sdim 505303231Sdim DenseMap<unsigned, AvailableValsTy>::iterator LI = 506303231Sdim SSAUpdateVals.find(Reg); 507303231Sdim if (LI != SSAUpdateVals.end()) { 508303231Sdim // This register is defined in the tail block. 509303231Sdim for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { 510303231Sdim MachineBasicBlock *SrcBB = LI->second[j].first; 511303231Sdim // If we didn't duplicate a bb into a particular predecessor, we 512303231Sdim // might still have added an entry to SSAUpdateVals to correcly 513303231Sdim // recompute SSA. If that case, avoid adding a dummy extra argument 514303231Sdim // this PHI. 515303231Sdim if (!SrcBB->isSuccessor(SuccBB)) 516303231Sdim continue; 517303231Sdim 518303231Sdim unsigned SrcReg = LI->second[j].second; 519303231Sdim if (Idx != 0) { 520314564Sdim MI.getOperand(Idx).setReg(SrcReg); 521314564Sdim MI.getOperand(Idx + 1).setMBB(SrcBB); 522303231Sdim Idx = 0; 523303231Sdim } else { 524303231Sdim MIB.addReg(SrcReg).addMBB(SrcBB); 525303231Sdim } 526303231Sdim } 527303231Sdim } else { 528303231Sdim // Live in tail block, must also be live in predecessors. 529303231Sdim for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) { 530303231Sdim MachineBasicBlock *SrcBB = TDBBs[j]; 531303231Sdim if (Idx != 0) { 532314564Sdim MI.getOperand(Idx).setReg(Reg); 533314564Sdim MI.getOperand(Idx + 1).setMBB(SrcBB); 534303231Sdim Idx = 0; 535303231Sdim } else { 536303231Sdim MIB.addReg(Reg).addMBB(SrcBB); 537303231Sdim } 538303231Sdim } 539303231Sdim } 540303231Sdim if (Idx != 0) { 541314564Sdim MI.RemoveOperand(Idx + 1); 542314564Sdim MI.RemoveOperand(Idx); 543303231Sdim } 544303231Sdim } 545303231Sdim } 546303231Sdim} 547303231Sdim 548303231Sdim/// Determine if it is profitable to duplicate this block. 549314564Sdimbool TailDuplicator::shouldTailDuplicate(bool IsSimple, 550303231Sdim MachineBasicBlock &TailBB) { 551314564Sdim // When doing tail-duplication during layout, the block ordering is in flux, 552314564Sdim // so canFallThrough returns a result based on incorrect information and 553314564Sdim // should just be ignored. 554314564Sdim if (!LayoutMode && TailBB.canFallThrough()) 555303231Sdim return false; 556303231Sdim 557303231Sdim // Don't try to tail-duplicate single-block loops. 558303231Sdim if (TailBB.isSuccessor(&TailBB)) 559303231Sdim return false; 560303231Sdim 561303231Sdim // Set the limit on the cost to duplicate. When optimizing for size, 562303231Sdim // duplicate only one, because one branch instruction can be eliminated to 563303231Sdim // compensate for the duplication. 564303231Sdim unsigned MaxDuplicateCount; 565360784Sdim bool OptForSize = MF->getFunction().hasOptSize() || 566360784Sdim llvm::shouldOptimizeForSize(&TailBB, PSI, MBFI); 567360784Sdim if (TailDupSize == 0) 568314564Sdim MaxDuplicateCount = TailDuplicateSize; 569303231Sdim else 570314564Sdim MaxDuplicateCount = TailDupSize; 571360784Sdim if (OptForSize) 572360784Sdim MaxDuplicateCount = 1; 573303231Sdim 574314564Sdim // If the block to be duplicated ends in an unanalyzable fallthrough, don't 575314564Sdim // duplicate it. 576314564Sdim // A similar check is necessary in MachineBlockPlacement to make sure pairs of 577314564Sdim // blocks with unanalyzable fallthrough get layed out contiguously. 578314564Sdim MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; 579314564Sdim SmallVector<MachineOperand, 4> PredCond; 580314564Sdim if (TII->analyzeBranch(TailBB, PredTBB, PredFBB, PredCond) && 581314564Sdim TailBB.canFallThrough()) 582314564Sdim return false; 583314564Sdim 584303231Sdim // If the target has hardware branch prediction that can handle indirect 585303231Sdim // branches, duplicating them can often make them predictable when there 586303231Sdim // are common paths through the code. The limit needs to be high enough 587303231Sdim // to allow undoing the effects of tail merging and other optimizations 588303231Sdim // that rearrange the predecessors of the indirect branch. 589303231Sdim 590303231Sdim bool HasIndirectbr = false; 591303231Sdim if (!TailBB.empty()) 592303231Sdim HasIndirectbr = TailBB.back().isIndirectBranch(); 593303231Sdim 594303231Sdim if (HasIndirectbr && PreRegAlloc) 595314564Sdim MaxDuplicateCount = TailDupIndirectBranchSize; 596303231Sdim 597303231Sdim // Check the instructions in the block to determine whether tail-duplication 598303231Sdim // is invalid or unlikely to be profitable. 599303231Sdim unsigned InstrCount = 0; 600303231Sdim for (MachineInstr &MI : TailBB) { 601303231Sdim // Non-duplicable things shouldn't be tail-duplicated. 602341825Sdim // CFI instructions are marked as non-duplicable, because Darwin compact 603341825Sdim // unwind info emission can't handle multiple prologue setups. In case of 604341825Sdim // DWARF, allow them be duplicated, so that their existence doesn't prevent 605341825Sdim // tail duplication of some basic blocks, that would be duplicated otherwise. 606341825Sdim if (MI.isNotDuplicable() && 607341825Sdim (TailBB.getParent()->getTarget().getTargetTriple().isOSDarwin() || 608341825Sdim !MI.isCFIInstruction())) 609303231Sdim return false; 610303231Sdim 611303231Sdim // Convergent instructions can be duplicated only if doing so doesn't add 612303231Sdim // new control dependencies, which is what we're going to do here. 613303231Sdim if (MI.isConvergent()) 614303231Sdim return false; 615303231Sdim 616303231Sdim // Do not duplicate 'return' instructions if this is a pre-regalloc run. 617303231Sdim // A return may expand into a lot more instructions (e.g. reload of callee 618303231Sdim // saved registers) after PEI. 619303231Sdim if (PreRegAlloc && MI.isReturn()) 620303231Sdim return false; 621303231Sdim 622303231Sdim // Avoid duplicating calls before register allocation. Calls presents a 623303231Sdim // barrier to register allocation so duplicating them may end up increasing 624303231Sdim // spills. 625303231Sdim if (PreRegAlloc && MI.isCall()) 626303231Sdim return false; 627303231Sdim 628341825Sdim if (!MI.isPHI() && !MI.isMetaInstruction()) 629303231Sdim InstrCount += 1; 630303231Sdim 631303231Sdim if (InstrCount > MaxDuplicateCount) 632303231Sdim return false; 633303231Sdim } 634303231Sdim 635303231Sdim // Check if any of the successors of TailBB has a PHI node in which the 636303231Sdim // value corresponding to TailBB uses a subregister. 637303231Sdim // If a phi node uses a register paired with a subregister, the actual 638303231Sdim // "value type" of the phi may differ from the type of the register without 639303231Sdim // any subregisters. Due to a bug, tail duplication may add a new operand 640303231Sdim // without a necessary subregister, producing an invalid code. This is 641303231Sdim // demonstrated by test/CodeGen/Hexagon/tail-dup-subreg-abort.ll. 642303231Sdim // Disable tail duplication for this case for now, until the problem is 643303231Sdim // fixed. 644303231Sdim for (auto SB : TailBB.successors()) { 645303231Sdim for (auto &I : *SB) { 646303231Sdim if (!I.isPHI()) 647303231Sdim break; 648303231Sdim unsigned Idx = getPHISrcRegOpIdx(&I, &TailBB); 649303231Sdim assert(Idx != 0); 650303231Sdim MachineOperand &PU = I.getOperand(Idx); 651303231Sdim if (PU.getSubReg() != 0) 652303231Sdim return false; 653303231Sdim } 654303231Sdim } 655303231Sdim 656303231Sdim if (HasIndirectbr && PreRegAlloc) 657303231Sdim return true; 658303231Sdim 659303231Sdim if (IsSimple) 660303231Sdim return true; 661303231Sdim 662303231Sdim if (!PreRegAlloc) 663303231Sdim return true; 664303231Sdim 665303231Sdim return canCompletelyDuplicateBB(TailBB); 666303231Sdim} 667303231Sdim 668303231Sdim/// True if this BB has only one unconditional jump. 669303231Sdimbool TailDuplicator::isSimpleBB(MachineBasicBlock *TailBB) { 670303231Sdim if (TailBB->succ_size() != 1) 671303231Sdim return false; 672303231Sdim if (TailBB->pred_empty()) 673303231Sdim return false; 674303231Sdim MachineBasicBlock::iterator I = TailBB->getFirstNonDebugInstr(); 675303231Sdim if (I == TailBB->end()) 676303231Sdim return true; 677303231Sdim return I->isUnconditionalBranch(); 678303231Sdim} 679303231Sdim 680303231Sdimstatic bool bothUsedInPHI(const MachineBasicBlock &A, 681303231Sdim const SmallPtrSet<MachineBasicBlock *, 8> &SuccsB) { 682303231Sdim for (MachineBasicBlock *BB : A.successors()) 683303231Sdim if (SuccsB.count(BB) && !BB->empty() && BB->begin()->isPHI()) 684303231Sdim return true; 685303231Sdim 686303231Sdim return false; 687303231Sdim} 688303231Sdim 689303231Sdimbool TailDuplicator::canCompletelyDuplicateBB(MachineBasicBlock &BB) { 690303231Sdim for (MachineBasicBlock *PredBB : BB.predecessors()) { 691303231Sdim if (PredBB->succ_size() > 1) 692303231Sdim return false; 693303231Sdim 694303231Sdim MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; 695303231Sdim SmallVector<MachineOperand, 4> PredCond; 696314564Sdim if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond)) 697303231Sdim return false; 698303231Sdim 699303231Sdim if (!PredCond.empty()) 700303231Sdim return false; 701303231Sdim } 702303231Sdim return true; 703303231Sdim} 704303231Sdim 705303231Sdimbool TailDuplicator::duplicateSimpleBB( 706303231Sdim MachineBasicBlock *TailBB, SmallVectorImpl<MachineBasicBlock *> &TDBBs, 707303231Sdim const DenseSet<unsigned> &UsedByPhi, 708303231Sdim SmallVectorImpl<MachineInstr *> &Copies) { 709303231Sdim SmallPtrSet<MachineBasicBlock *, 8> Succs(TailBB->succ_begin(), 710303231Sdim TailBB->succ_end()); 711303231Sdim SmallVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(), 712303231Sdim TailBB->pred_end()); 713303231Sdim bool Changed = false; 714314564Sdim for (MachineBasicBlock *PredBB : Preds) { 715303231Sdim if (PredBB->hasEHPadSuccessor()) 716303231Sdim continue; 717303231Sdim 718303231Sdim if (bothUsedInPHI(*PredBB, Succs)) 719303231Sdim continue; 720303231Sdim 721303231Sdim MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; 722303231Sdim SmallVector<MachineOperand, 4> PredCond; 723314564Sdim if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond)) 724303231Sdim continue; 725303231Sdim 726303231Sdim Changed = true; 727341825Sdim LLVM_DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB 728341825Sdim << "From simple Succ: " << *TailBB); 729303231Sdim 730303231Sdim MachineBasicBlock *NewTarget = *TailBB->succ_begin(); 731314564Sdim MachineBasicBlock *NextBB = PredBB->getNextNode(); 732303231Sdim 733303231Sdim // Make PredFBB explicit. 734303231Sdim if (PredCond.empty()) 735303231Sdim PredFBB = PredTBB; 736303231Sdim 737303231Sdim // Make fall through explicit. 738303231Sdim if (!PredTBB) 739303231Sdim PredTBB = NextBB; 740303231Sdim if (!PredFBB) 741303231Sdim PredFBB = NextBB; 742303231Sdim 743303231Sdim // Redirect 744303231Sdim if (PredFBB == TailBB) 745303231Sdim PredFBB = NewTarget; 746303231Sdim if (PredTBB == TailBB) 747303231Sdim PredTBB = NewTarget; 748303231Sdim 749303231Sdim // Make the branch unconditional if possible 750303231Sdim if (PredTBB == PredFBB) { 751303231Sdim PredCond.clear(); 752303231Sdim PredFBB = nullptr; 753303231Sdim } 754303231Sdim 755303231Sdim // Avoid adding fall through branches. 756303231Sdim if (PredFBB == NextBB) 757303231Sdim PredFBB = nullptr; 758303231Sdim if (PredTBB == NextBB && PredFBB == nullptr) 759303231Sdim PredTBB = nullptr; 760303231Sdim 761321369Sdim auto DL = PredBB->findBranchDebugLoc(); 762314564Sdim TII->removeBranch(*PredBB); 763303231Sdim 764303231Sdim if (!PredBB->isSuccessor(NewTarget)) 765303231Sdim PredBB->replaceSuccessor(TailBB, NewTarget); 766303231Sdim else { 767303231Sdim PredBB->removeSuccessor(TailBB, true); 768303231Sdim assert(PredBB->succ_size() <= 1); 769303231Sdim } 770303231Sdim 771303231Sdim if (PredTBB) 772321369Sdim TII->insertBranch(*PredBB, PredTBB, PredFBB, PredCond, DL); 773303231Sdim 774303231Sdim TDBBs.push_back(PredBB); 775303231Sdim } 776303231Sdim return Changed; 777303231Sdim} 778303231Sdim 779314564Sdimbool TailDuplicator::canTailDuplicate(MachineBasicBlock *TailBB, 780314564Sdim MachineBasicBlock *PredBB) { 781314564Sdim // EH edges are ignored by analyzeBranch. 782314564Sdim if (PredBB->succ_size() > 1) 783314564Sdim return false; 784314564Sdim 785321369Sdim MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; 786314564Sdim SmallVector<MachineOperand, 4> PredCond; 787314564Sdim if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond)) 788314564Sdim return false; 789314564Sdim if (!PredCond.empty()) 790314564Sdim return false; 791314564Sdim return true; 792314564Sdim} 793314564Sdim 794303231Sdim/// If it is profitable, duplicate TailBB's contents in each 795303231Sdim/// of its predecessors. 796314564Sdim/// \p IsSimple result of isSimpleBB 797314564Sdim/// \p TailBB Block to be duplicated. 798314564Sdim/// \p ForcedLayoutPred When non-null, use this block as the layout predecessor 799314564Sdim/// instead of the previous block in MF's order. 800314564Sdim/// \p TDBBs A vector to keep track of all blocks tail-duplicated 801314564Sdim/// into. 802314564Sdim/// \p Copies A vector of copy instructions inserted. Used later to 803314564Sdim/// walk all the inserted copies and remove redundant ones. 804314564Sdimbool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, 805314564Sdim MachineBasicBlock *ForcedLayoutPred, 806303231Sdim SmallVectorImpl<MachineBasicBlock *> &TDBBs, 807303231Sdim SmallVectorImpl<MachineInstr *> &Copies) { 808341825Sdim LLVM_DEBUG(dbgs() << "\n*** Tail-duplicating " << printMBBReference(*TailBB) 809341825Sdim << '\n'); 810303231Sdim 811303231Sdim DenseSet<unsigned> UsedByPhi; 812303231Sdim getRegsUsedByPHIs(*TailBB, &UsedByPhi); 813303231Sdim 814303231Sdim if (IsSimple) 815303231Sdim return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies); 816303231Sdim 817303231Sdim // Iterate through all the unique predecessors and tail-duplicate this 818303231Sdim // block into them, if possible. Copying the list ahead of time also 819303231Sdim // avoids trouble with the predecessor list reallocating. 820303231Sdim bool Changed = false; 821303231Sdim SmallSetVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(), 822303231Sdim TailBB->pred_end()); 823314564Sdim for (MachineBasicBlock *PredBB : Preds) { 824303231Sdim assert(TailBB != PredBB && 825303231Sdim "Single-block loop should have been rejected earlier!"); 826314564Sdim 827314564Sdim if (!canTailDuplicate(TailBB, PredBB)) 828303231Sdim continue; 829303231Sdim 830303231Sdim // Don't duplicate into a fall-through predecessor (at least for now). 831314564Sdim bool IsLayoutSuccessor = false; 832314564Sdim if (ForcedLayoutPred) 833314564Sdim IsLayoutSuccessor = (ForcedLayoutPred == PredBB); 834314564Sdim else if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough()) 835314564Sdim IsLayoutSuccessor = true; 836314564Sdim if (IsLayoutSuccessor) 837303231Sdim continue; 838303231Sdim 839341825Sdim LLVM_DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB 840341825Sdim << "From Succ: " << *TailBB); 841303231Sdim 842303231Sdim TDBBs.push_back(PredBB); 843303231Sdim 844303231Sdim // Remove PredBB's unconditional branch. 845314564Sdim TII->removeBranch(*PredBB); 846303231Sdim 847303231Sdim // Clone the contents of TailBB into PredBB. 848303231Sdim DenseMap<unsigned, RegSubRegPair> LocalVRMap; 849303231Sdim SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos; 850327952Sdim for (MachineBasicBlock::iterator I = TailBB->begin(), E = TailBB->end(); 851327952Sdim I != E; /* empty */) { 852303231Sdim MachineInstr *MI = &*I; 853303231Sdim ++I; 854303231Sdim if (MI->isPHI()) { 855303231Sdim // Replace the uses of the def of the PHI with the register coming 856303231Sdim // from PredBB. 857303231Sdim processPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true); 858303231Sdim } else { 859303231Sdim // Replace def of virtual registers with new registers, and update 860303231Sdim // uses with PHI source register or the new registers. 861314564Sdim duplicateInstruction(MI, TailBB, PredBB, LocalVRMap, UsedByPhi); 862303231Sdim } 863303231Sdim } 864303231Sdim appendCopies(PredBB, CopyInfos, Copies); 865303231Sdim 866303231Sdim NumTailDupAdded += TailBB->size() - 1; // subtract one for removed branch 867303231Sdim 868303231Sdim // Update the CFG. 869303231Sdim PredBB->removeSuccessor(PredBB->succ_begin()); 870303231Sdim assert(PredBB->succ_empty() && 871303231Sdim "TailDuplicate called on block with multiple successors!"); 872314564Sdim for (MachineBasicBlock *Succ : TailBB->successors()) 873314564Sdim PredBB->addSuccessor(Succ, MBPI->getEdgeProbability(TailBB, Succ)); 874303231Sdim 875303231Sdim Changed = true; 876303231Sdim ++NumTailDups; 877303231Sdim } 878303231Sdim 879303231Sdim // If TailBB was duplicated into all its predecessors except for the prior 880303231Sdim // block, which falls through unconditionally, move the contents of this 881303231Sdim // block into the prior block. 882314564Sdim MachineBasicBlock *PrevBB = ForcedLayoutPred; 883314564Sdim if (!PrevBB) 884314564Sdim PrevBB = &*std::prev(TailBB->getIterator()); 885303231Sdim MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr; 886303231Sdim SmallVector<MachineOperand, 4> PriorCond; 887303231Sdim // This has to check PrevBB->succ_size() because EH edges are ignored by 888314564Sdim // analyzeBranch. 889303231Sdim if (PrevBB->succ_size() == 1 && 890314564Sdim // Layout preds are not always CFG preds. Check. 891314564Sdim *PrevBB->succ_begin() == TailBB && 892314564Sdim !TII->analyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond) && 893314564Sdim PriorCond.empty() && 894314564Sdim (!PriorTBB || PriorTBB == TailBB) && 895314564Sdim TailBB->pred_size() == 1 && 896303231Sdim !TailBB->hasAddressTaken()) { 897341825Sdim LLVM_DEBUG(dbgs() << "\nMerging into block: " << *PrevBB 898341825Sdim << "From MBB: " << *TailBB); 899314564Sdim // There may be a branch to the layout successor. This is unlikely but it 900314564Sdim // happens. The correct thing to do is to remove the branch before 901314564Sdim // duplicating the instructions in all cases. 902314564Sdim TII->removeBranch(*PrevBB); 903303231Sdim if (PreRegAlloc) { 904303231Sdim DenseMap<unsigned, RegSubRegPair> LocalVRMap; 905303231Sdim SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos; 906303231Sdim MachineBasicBlock::iterator I = TailBB->begin(); 907303231Sdim // Process PHI instructions first. 908303231Sdim while (I != TailBB->end() && I->isPHI()) { 909303231Sdim // Replace the uses of the def of the PHI with the register coming 910303231Sdim // from PredBB. 911303231Sdim MachineInstr *MI = &*I++; 912303231Sdim processPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true); 913303231Sdim } 914303231Sdim 915303231Sdim // Now copy the non-PHI instructions. 916303231Sdim while (I != TailBB->end()) { 917303231Sdim // Replace def of virtual registers with new registers, and update 918303231Sdim // uses with PHI source register or the new registers. 919303231Sdim MachineInstr *MI = &*I++; 920303231Sdim assert(!MI->isBundle() && "Not expecting bundles before regalloc!"); 921314564Sdim duplicateInstruction(MI, TailBB, PrevBB, LocalVRMap, UsedByPhi); 922303231Sdim MI->eraseFromParent(); 923303231Sdim } 924303231Sdim appendCopies(PrevBB, CopyInfos, Copies); 925303231Sdim } else { 926314564Sdim TII->removeBranch(*PrevBB); 927303231Sdim // No PHIs to worry about, just splice the instructions over. 928303231Sdim PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end()); 929303231Sdim } 930303231Sdim PrevBB->removeSuccessor(PrevBB->succ_begin()); 931303231Sdim assert(PrevBB->succ_empty()); 932303231Sdim PrevBB->transferSuccessors(TailBB); 933303231Sdim TDBBs.push_back(PrevBB); 934303231Sdim Changed = true; 935303231Sdim } 936303231Sdim 937303231Sdim // If this is after register allocation, there are no phis to fix. 938303231Sdim if (!PreRegAlloc) 939303231Sdim return Changed; 940303231Sdim 941303231Sdim // If we made no changes so far, we are safe. 942303231Sdim if (!Changed) 943303231Sdim return Changed; 944303231Sdim 945303231Sdim // Handle the nasty case in that we duplicated a block that is part of a loop 946303231Sdim // into some but not all of its predecessors. For example: 947303231Sdim // 1 -> 2 <-> 3 | 948303231Sdim // \ | 949303231Sdim // \---> rest | 950303231Sdim // if we duplicate 2 into 1 but not into 3, we end up with 951303231Sdim // 12 -> 3 <-> 2 -> rest | 952303231Sdim // \ / | 953303231Sdim // \----->-----/ | 954303231Sdim // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced 955303231Sdim // with a phi in 3 (which now dominates 2). 956303231Sdim // What we do here is introduce a copy in 3 of the register defined by the 957303231Sdim // phi, just like when we are duplicating 2 into 3, but we don't copy any 958303231Sdim // real instructions or remove the 3 -> 2 edge from the phi in 2. 959314564Sdim for (MachineBasicBlock *PredBB : Preds) { 960314564Sdim if (is_contained(TDBBs, PredBB)) 961303231Sdim continue; 962303231Sdim 963303231Sdim // EH edges 964303231Sdim if (PredBB->succ_size() != 1) 965303231Sdim continue; 966303231Sdim 967303231Sdim DenseMap<unsigned, RegSubRegPair> LocalVRMap; 968303231Sdim SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos; 969303231Sdim MachineBasicBlock::iterator I = TailBB->begin(); 970303231Sdim // Process PHI instructions first. 971303231Sdim while (I != TailBB->end() && I->isPHI()) { 972303231Sdim // Replace the uses of the def of the PHI with the register coming 973303231Sdim // from PredBB. 974303231Sdim MachineInstr *MI = &*I++; 975303231Sdim processPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false); 976303231Sdim } 977303231Sdim appendCopies(PredBB, CopyInfos, Copies); 978303231Sdim } 979303231Sdim 980303231Sdim return Changed; 981303231Sdim} 982303231Sdim 983303231Sdim/// At the end of the block \p MBB generate COPY instructions between registers 984303231Sdim/// described by \p CopyInfos. Append resulting instructions to \p Copies. 985303231Sdimvoid TailDuplicator::appendCopies(MachineBasicBlock *MBB, 986303231Sdim SmallVectorImpl<std::pair<unsigned,RegSubRegPair>> &CopyInfos, 987303231Sdim SmallVectorImpl<MachineInstr*> &Copies) { 988303231Sdim MachineBasicBlock::iterator Loc = MBB->getFirstTerminator(); 989303231Sdim const MCInstrDesc &CopyD = TII->get(TargetOpcode::COPY); 990303231Sdim for (auto &CI : CopyInfos) { 991303231Sdim auto C = BuildMI(*MBB, Loc, DebugLoc(), CopyD, CI.first) 992303231Sdim .addReg(CI.second.Reg, 0, CI.second.SubReg); 993303231Sdim Copies.push_back(C); 994303231Sdim } 995303231Sdim} 996303231Sdim 997303231Sdim/// Remove the specified dead machine basic block from the function, updating 998303231Sdim/// the CFG. 999314564Sdimvoid TailDuplicator::removeDeadBlock( 1000314564Sdim MachineBasicBlock *MBB, 1001321369Sdim function_ref<void(MachineBasicBlock *)> *RemovalCallback) { 1002303231Sdim assert(MBB->pred_empty() && "MBB must be dead!"); 1003341825Sdim LLVM_DEBUG(dbgs() << "\nRemoving MBB: " << *MBB); 1004303231Sdim 1005314564Sdim if (RemovalCallback) 1006314564Sdim (*RemovalCallback)(MBB); 1007314564Sdim 1008303231Sdim // Remove all successors. 1009303231Sdim while (!MBB->succ_empty()) 1010303231Sdim MBB->removeSuccessor(MBB->succ_end() - 1); 1011303231Sdim 1012303231Sdim // Remove the block. 1013303231Sdim MBB->eraseFromParent(); 1014303231Sdim} 1015