TailDuplication.cpp revision 223017
1//===-- TailDuplication.cpp - Duplicate blocks into predecessors' tails ---===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass duplicates basic blocks ending in unconditional branches into 11// the tails of their predecessors. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "tailduplication" 16#include "llvm/Function.h" 17#include "llvm/CodeGen/Passes.h" 18#include "llvm/CodeGen/MachineModuleInfo.h" 19#include "llvm/CodeGen/MachineFunctionPass.h" 20#include "llvm/CodeGen/MachineInstrBuilder.h" 21#include "llvm/CodeGen/MachineRegisterInfo.h" 22#include "llvm/CodeGen/MachineSSAUpdater.h" 23#include "llvm/Target/TargetInstrInfo.h" 24#include "llvm/Support/CommandLine.h" 25#include "llvm/Support/Debug.h" 26#include "llvm/Support/ErrorHandling.h" 27#include "llvm/Support/raw_ostream.h" 28#include "llvm/ADT/SmallSet.h" 29#include "llvm/ADT/SetVector.h" 30#include "llvm/ADT/Statistic.h" 31using namespace llvm; 32 33STATISTIC(NumTails , "Number of tails duplicated"); 34STATISTIC(NumTailDups , "Number of tail duplicated blocks"); 35STATISTIC(NumInstrDups , "Additional instructions due to tail duplication"); 36STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); 37STATISTIC(NumAddedPHIs , "Number of phis added"); 38 39// Heuristic for tail duplication. 40static cl::opt<unsigned> 41TailDuplicateSize("tail-dup-size", 42 cl::desc("Maximum instructions to consider tail duplicating"), 43 cl::init(2), cl::Hidden); 44 45static cl::opt<bool> 46TailDupVerify("tail-dup-verify", 47 cl::desc("Verify sanity of PHI instructions during taildup"), 48 cl::init(false), cl::Hidden); 49 50static cl::opt<unsigned> 51TailDupLimit("tail-dup-limit", cl::init(~0U), cl::Hidden); 52 53typedef std::vector<std::pair<MachineBasicBlock*,unsigned> > AvailableValsTy; 54 55namespace { 56 /// TailDuplicatePass - Perform tail duplication. 57 class TailDuplicatePass : public MachineFunctionPass { 58 bool PreRegAlloc; 59 const TargetInstrInfo *TII; 60 MachineModuleInfo *MMI; 61 MachineRegisterInfo *MRI; 62 63 // SSAUpdateVRs - A list of virtual registers for which to update SSA form. 64 SmallVector<unsigned, 16> SSAUpdateVRs; 65 66 // SSAUpdateVals - For each virtual register in SSAUpdateVals keep a list of 67 // source virtual registers. 68 DenseMap<unsigned, AvailableValsTy> SSAUpdateVals; 69 70 public: 71 static char ID; 72 explicit TailDuplicatePass(bool PreRA) : 73 MachineFunctionPass(ID), PreRegAlloc(PreRA) {} 74 75 virtual bool runOnMachineFunction(MachineFunction &MF); 76 virtual const char *getPassName() const { return "Tail Duplication"; } 77 78 private: 79 void AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, 80 MachineBasicBlock *BB); 81 void ProcessPHI(MachineInstr *MI, MachineBasicBlock *TailBB, 82 MachineBasicBlock *PredBB, 83 DenseMap<unsigned, unsigned> &LocalVRMap, 84 SmallVector<std::pair<unsigned,unsigned>, 4> &Copies, 85 const DenseSet<unsigned> &UsedByPhi, 86 bool Remove); 87 void DuplicateInstruction(MachineInstr *MI, 88 MachineBasicBlock *TailBB, 89 MachineBasicBlock *PredBB, 90 MachineFunction &MF, 91 DenseMap<unsigned, unsigned> &LocalVRMap, 92 const DenseSet<unsigned> &UsedByPhi); 93 void UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, 94 SmallVector<MachineBasicBlock*, 8> &TDBBs, 95 SmallSetVector<MachineBasicBlock*, 8> &Succs); 96 bool TailDuplicateBlocks(MachineFunction &MF); 97 bool shouldTailDuplicate(const MachineFunction &MF, 98 MachineBasicBlock &TailBB); 99 bool TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, 100 SmallVector<MachineBasicBlock*, 8> &TDBBs, 101 SmallVector<MachineInstr*, 16> &Copies); 102 void RemoveDeadBlock(MachineBasicBlock *MBB); 103 }; 104 105 char TailDuplicatePass::ID = 0; 106} 107 108FunctionPass *llvm::createTailDuplicatePass(bool PreRegAlloc) { 109 return new TailDuplicatePass(PreRegAlloc); 110} 111 112bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) { 113 TII = MF.getTarget().getInstrInfo(); 114 MRI = &MF.getRegInfo(); 115 MMI = getAnalysisIfAvailable<MachineModuleInfo>(); 116 117 bool MadeChange = false; 118 while (TailDuplicateBlocks(MF)) 119 MadeChange = true; 120 121 return MadeChange; 122} 123 124static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { 125 for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) { 126 MachineBasicBlock *MBB = I; 127 SmallSetVector<MachineBasicBlock*, 8> Preds(MBB->pred_begin(), 128 MBB->pred_end()); 129 MachineBasicBlock::iterator MI = MBB->begin(); 130 while (MI != MBB->end()) { 131 if (!MI->isPHI()) 132 break; 133 for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), 134 PE = Preds.end(); PI != PE; ++PI) { 135 MachineBasicBlock *PredBB = *PI; 136 bool Found = false; 137 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { 138 MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB(); 139 if (PHIBB == PredBB) { 140 Found = true; 141 break; 142 } 143 } 144 if (!Found) { 145 dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; 146 dbgs() << " missing input from predecessor BB#" 147 << PredBB->getNumber() << '\n'; 148 llvm_unreachable(0); 149 } 150 } 151 152 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { 153 MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB(); 154 if (CheckExtra && !Preds.count(PHIBB)) { 155 dbgs() << "Warning: malformed PHI in BB#" << MBB->getNumber() 156 << ": " << *MI; 157 dbgs() << " extra input from predecessor BB#" 158 << PHIBB->getNumber() << '\n'; 159 llvm_unreachable(0); 160 } 161 if (PHIBB->getNumber() < 0) { 162 dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; 163 dbgs() << " non-existing BB#" << PHIBB->getNumber() << '\n'; 164 llvm_unreachable(0); 165 } 166 } 167 ++MI; 168 } 169 } 170} 171 172/// TailDuplicateBlocks - Look for small blocks that are unconditionally 173/// branched to and do not fall through. Tail-duplicate their instructions 174/// into their predecessors to eliminate (dynamic) branches. 175bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) { 176 bool MadeChange = false; 177 178 if (PreRegAlloc && TailDupVerify) { 179 DEBUG(dbgs() << "\n*** Before tail-duplicating\n"); 180 VerifyPHIs(MF, true); 181 } 182 183 SmallVector<MachineInstr*, 8> NewPHIs; 184 MachineSSAUpdater SSAUpdate(MF, &NewPHIs); 185 186 for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) { 187 MachineBasicBlock *MBB = I++; 188 189 if (NumTails == TailDupLimit) 190 break; 191 192 // Save the successors list. 193 SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(), 194 MBB->succ_end()); 195 196 SmallVector<MachineBasicBlock*, 8> TDBBs; 197 SmallVector<MachineInstr*, 16> Copies; 198 if (TailDuplicate(MBB, MF, TDBBs, Copies)) { 199 ++NumTails; 200 201 // TailBB's immediate successors are now successors of those predecessors 202 // which duplicated TailBB. Add the predecessors as sources to the PHI 203 // instructions. 204 bool isDead = MBB->pred_empty(); 205 if (PreRegAlloc) 206 UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs); 207 208 // If it is dead, remove it. 209 if (isDead) { 210 NumInstrDups -= MBB->size(); 211 RemoveDeadBlock(MBB); 212 ++NumDeadBlocks; 213 } 214 215 // Update SSA form. 216 if (!SSAUpdateVRs.empty()) { 217 for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) { 218 unsigned VReg = SSAUpdateVRs[i]; 219 SSAUpdate.Initialize(VReg); 220 221 // If the original definition is still around, add it as an available 222 // value. 223 MachineInstr *DefMI = MRI->getVRegDef(VReg); 224 MachineBasicBlock *DefBB = 0; 225 if (DefMI) { 226 DefBB = DefMI->getParent(); 227 SSAUpdate.AddAvailableValue(DefBB, VReg); 228 } 229 230 // Add the new vregs as available values. 231 DenseMap<unsigned, AvailableValsTy>::iterator LI = 232 SSAUpdateVals.find(VReg); 233 for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { 234 MachineBasicBlock *SrcBB = LI->second[j].first; 235 unsigned SrcReg = LI->second[j].second; 236 SSAUpdate.AddAvailableValue(SrcBB, SrcReg); 237 } 238 239 // Rewrite uses that are outside of the original def's block. 240 MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg); 241 while (UI != MRI->use_end()) { 242 MachineOperand &UseMO = UI.getOperand(); 243 MachineInstr *UseMI = &*UI; 244 ++UI; 245 if (UseMI->getParent() == DefBB && !UseMI->isPHI()) 246 continue; 247 SSAUpdate.RewriteUse(UseMO); 248 } 249 } 250 251 SSAUpdateVRs.clear(); 252 SSAUpdateVals.clear(); 253 } 254 255 // Eliminate some of the copies inserted by tail duplication to maintain 256 // SSA form. 257 for (unsigned i = 0, e = Copies.size(); i != e; ++i) { 258 MachineInstr *Copy = Copies[i]; 259 if (!Copy->isCopy()) 260 continue; 261 unsigned Dst = Copy->getOperand(0).getReg(); 262 unsigned Src = Copy->getOperand(1).getReg(); 263 MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src); 264 if (++UI == MRI->use_end()) { 265 // Copy is the only use. Do trivial copy propagation here. 266 MRI->replaceRegWith(Dst, Src); 267 Copy->eraseFromParent(); 268 } 269 } 270 271 if (PreRegAlloc && TailDupVerify) 272 VerifyPHIs(MF, false); 273 MadeChange = true; 274 } 275 } 276 NumAddedPHIs += NewPHIs.size(); 277 278 return MadeChange; 279} 280 281static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB, 282 const MachineRegisterInfo *MRI) { 283 for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), 284 UE = MRI->use_end(); UI != UE; ++UI) { 285 MachineInstr *UseMI = &*UI; 286 if (UseMI->getParent() != BB) 287 return true; 288 } 289 return false; 290} 291 292static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) { 293 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) 294 if (MI->getOperand(i+1).getMBB() == SrcBB) 295 return i; 296 return 0; 297} 298 299 300// Remember which registers are used by phis in this block. This is 301// used to determine which registers are liveout while modifying the 302// block (which is why we need to copy the information). 303static void getRegsUsedByPHIs(const MachineBasicBlock &BB, 304 DenseSet<unsigned> *UsedByPhi) { 305 for(MachineBasicBlock::const_iterator I = BB.begin(), E = BB.end(); 306 I != E; ++I) { 307 const MachineInstr &MI = *I; 308 if (!MI.isPHI()) 309 break; 310 for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) { 311 unsigned SrcReg = MI.getOperand(i).getReg(); 312 UsedByPhi->insert(SrcReg); 313 } 314 } 315} 316 317/// AddSSAUpdateEntry - Add a definition and source virtual registers pair for 318/// SSA update. 319void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, 320 MachineBasicBlock *BB) { 321 DenseMap<unsigned, AvailableValsTy>::iterator LI= SSAUpdateVals.find(OrigReg); 322 if (LI != SSAUpdateVals.end()) 323 LI->second.push_back(std::make_pair(BB, NewReg)); 324 else { 325 AvailableValsTy Vals; 326 Vals.push_back(std::make_pair(BB, NewReg)); 327 SSAUpdateVals.insert(std::make_pair(OrigReg, Vals)); 328 SSAUpdateVRs.push_back(OrigReg); 329 } 330} 331 332/// ProcessPHI - Process PHI node in TailBB by turning it into a copy in PredBB. 333/// Remember the source register that's contributed by PredBB and update SSA 334/// update map. 335void TailDuplicatePass::ProcessPHI(MachineInstr *MI, 336 MachineBasicBlock *TailBB, 337 MachineBasicBlock *PredBB, 338 DenseMap<unsigned, unsigned> &LocalVRMap, 339 SmallVector<std::pair<unsigned,unsigned>, 4> &Copies, 340 const DenseSet<unsigned> &RegsUsedByPhi, 341 bool Remove) { 342 unsigned DefReg = MI->getOperand(0).getReg(); 343 unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB); 344 assert(SrcOpIdx && "Unable to find matching PHI source?"); 345 unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg(); 346 const TargetRegisterClass *RC = MRI->getRegClass(DefReg); 347 LocalVRMap.insert(std::make_pair(DefReg, SrcReg)); 348 349 // Insert a copy from source to the end of the block. The def register is the 350 // available value liveout of the block. 351 unsigned NewDef = MRI->createVirtualRegister(RC); 352 Copies.push_back(std::make_pair(NewDef, SrcReg)); 353 if (isDefLiveOut(DefReg, TailBB, MRI) || RegsUsedByPhi.count(DefReg)) 354 AddSSAUpdateEntry(DefReg, NewDef, PredBB); 355 356 if (!Remove) 357 return; 358 359 // Remove PredBB from the PHI node. 360 MI->RemoveOperand(SrcOpIdx+1); 361 MI->RemoveOperand(SrcOpIdx); 362 if (MI->getNumOperands() == 1) 363 MI->eraseFromParent(); 364} 365 366/// DuplicateInstruction - Duplicate a TailBB instruction to PredBB and update 367/// the source operands due to earlier PHI translation. 368void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI, 369 MachineBasicBlock *TailBB, 370 MachineBasicBlock *PredBB, 371 MachineFunction &MF, 372 DenseMap<unsigned, unsigned> &LocalVRMap, 373 const DenseSet<unsigned> &UsedByPhi) { 374 MachineInstr *NewMI = TII->duplicate(MI, MF); 375 for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { 376 MachineOperand &MO = NewMI->getOperand(i); 377 if (!MO.isReg()) 378 continue; 379 unsigned Reg = MO.getReg(); 380 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 381 continue; 382 if (MO.isDef()) { 383 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 384 unsigned NewReg = MRI->createVirtualRegister(RC); 385 MO.setReg(NewReg); 386 LocalVRMap.insert(std::make_pair(Reg, NewReg)); 387 if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg)) 388 AddSSAUpdateEntry(Reg, NewReg, PredBB); 389 } else { 390 DenseMap<unsigned, unsigned>::iterator VI = LocalVRMap.find(Reg); 391 if (VI != LocalVRMap.end()) 392 MO.setReg(VI->second); 393 } 394 } 395 PredBB->insert(PredBB->end(), NewMI); 396} 397 398/// UpdateSuccessorsPHIs - After FromBB is tail duplicated into its predecessor 399/// blocks, the successors have gained new predecessors. Update the PHI 400/// instructions in them accordingly. 401void 402TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, 403 SmallVector<MachineBasicBlock*, 8> &TDBBs, 404 SmallSetVector<MachineBasicBlock*,8> &Succs) { 405 for (SmallSetVector<MachineBasicBlock*, 8>::iterator SI = Succs.begin(), 406 SE = Succs.end(); SI != SE; ++SI) { 407 MachineBasicBlock *SuccBB = *SI; 408 for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end(); 409 II != EE; ++II) { 410 if (!II->isPHI()) 411 break; 412 unsigned Idx = 0; 413 for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) { 414 MachineOperand &MO = II->getOperand(i+1); 415 if (MO.getMBB() == FromBB) { 416 Idx = i; 417 break; 418 } 419 } 420 421 assert(Idx != 0); 422 MachineOperand &MO0 = II->getOperand(Idx); 423 unsigned Reg = MO0.getReg(); 424 if (isDead) { 425 // Folded into the previous BB. 426 // There could be duplicate phi source entries. FIXME: Should sdisel 427 // or earlier pass fixed this? 428 for (unsigned i = II->getNumOperands()-2; i != Idx; i -= 2) { 429 MachineOperand &MO = II->getOperand(i+1); 430 if (MO.getMBB() == FromBB) { 431 II->RemoveOperand(i+1); 432 II->RemoveOperand(i); 433 } 434 } 435 } else 436 Idx = 0; 437 438 // If Idx is set, the operands at Idx and Idx+1 must be removed. 439 // We reuse the location to avoid expensive RemoveOperand calls. 440 441 DenseMap<unsigned,AvailableValsTy>::iterator LI=SSAUpdateVals.find(Reg); 442 if (LI != SSAUpdateVals.end()) { 443 // This register is defined in the tail block. 444 for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { 445 MachineBasicBlock *SrcBB = LI->second[j].first; 446 // If we didn't duplicate a bb into a particular predecessor, we 447 // might still have added an entry to SSAUpdateVals to correcly 448 // recompute SSA. If that case, avoid adding a dummy extra argument 449 // this PHI. 450 if (!SrcBB->isSuccessor(SuccBB)) 451 continue; 452 453 unsigned SrcReg = LI->second[j].second; 454 if (Idx != 0) { 455 II->getOperand(Idx).setReg(SrcReg); 456 II->getOperand(Idx+1).setMBB(SrcBB); 457 Idx = 0; 458 } else { 459 II->addOperand(MachineOperand::CreateReg(SrcReg, false)); 460 II->addOperand(MachineOperand::CreateMBB(SrcBB)); 461 } 462 } 463 } else { 464 // Live in tail block, must also be live in predecessors. 465 for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) { 466 MachineBasicBlock *SrcBB = TDBBs[j]; 467 if (Idx != 0) { 468 II->getOperand(Idx).setReg(Reg); 469 II->getOperand(Idx+1).setMBB(SrcBB); 470 Idx = 0; 471 } else { 472 II->addOperand(MachineOperand::CreateReg(Reg, false)); 473 II->addOperand(MachineOperand::CreateMBB(SrcBB)); 474 } 475 } 476 } 477 if (Idx != 0) { 478 II->RemoveOperand(Idx+1); 479 II->RemoveOperand(Idx); 480 } 481 } 482 } 483} 484 485/// shouldTailDuplicate - Determine if it is profitable to duplicate this block. 486bool 487TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, 488 MachineBasicBlock &TailBB) { 489 // Only duplicate blocks that end with unconditional branches. 490 if (TailBB.canFallThrough()) 491 return false; 492 493 // Set the limit on the cost to duplicate. When optimizing for size, 494 // duplicate only one, because one branch instruction can be eliminated to 495 // compensate for the duplication. 496 unsigned MaxDuplicateCount; 497 if (TailDuplicateSize.getNumOccurrences() == 0 && 498 MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) 499 MaxDuplicateCount = 1; 500 else 501 MaxDuplicateCount = TailDuplicateSize; 502 503 if (PreRegAlloc) { 504 if (TailBB.empty()) 505 return false; 506 const TargetInstrDesc &TID = TailBB.back().getDesc(); 507 // Pre-regalloc tail duplication hurts compile time and doesn't help 508 // much except for indirect branches. 509 if (!TID.isIndirectBranch()) 510 return false; 511 // If the target has hardware branch prediction that can handle indirect 512 // branches, duplicating them can often make them predictable when there 513 // are common paths through the code. The limit needs to be high enough 514 // to allow undoing the effects of tail merging and other optimizations 515 // that rearrange the predecessors of the indirect branch. 516 MaxDuplicateCount = 20; 517 } 518 519 // Don't try to tail-duplicate single-block loops. 520 if (TailBB.isSuccessor(&TailBB)) 521 return false; 522 523 // Check the instructions in the block to determine whether tail-duplication 524 // is invalid or unlikely to be profitable. 525 unsigned InstrCount = 0; 526 bool HasCall = false; 527 for (MachineBasicBlock::const_iterator I = TailBB.begin(); I != TailBB.end(); 528 ++I) { 529 // Non-duplicable things shouldn't be tail-duplicated. 530 if (I->getDesc().isNotDuplicable()) return false; 531 // Do not duplicate 'return' instructions if this is a pre-regalloc run. 532 // A return may expand into a lot more instructions (e.g. reload of callee 533 // saved registers) after PEI. 534 if (PreRegAlloc && I->getDesc().isReturn()) return false; 535 // Don't duplicate more than the threshold. 536 if (InstrCount == MaxDuplicateCount) return false; 537 // Remember if we saw a call. 538 if (I->getDesc().isCall()) HasCall = true; 539 if (!I->isPHI() && !I->isDebugValue()) 540 InstrCount += 1; 541 } 542 // Don't tail-duplicate calls before register allocation. Calls presents a 543 // barrier to register allocation so duplicating them may end up increasing 544 // spills. 545 if (InstrCount > 1 && (PreRegAlloc && HasCall)) 546 return false; 547 548 return true; 549} 550 551/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each 552/// of its predecessors. 553bool 554TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, 555 SmallVector<MachineBasicBlock*, 8> &TDBBs, 556 SmallVector<MachineInstr*, 16> &Copies) { 557 if (!shouldTailDuplicate(MF, *TailBB)) 558 return false; 559 560 DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); 561 562 // Iterate through all the unique predecessors and tail-duplicate this 563 // block into them, if possible. Copying the list ahead of time also 564 // avoids trouble with the predecessor list reallocating. 565 bool Changed = false; 566 SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(), 567 TailBB->pred_end()); 568 DenseSet<unsigned> UsedByPhi; 569 getRegsUsedByPHIs(*TailBB, &UsedByPhi); 570 for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), 571 PE = Preds.end(); PI != PE; ++PI) { 572 MachineBasicBlock *PredBB = *PI; 573 574 assert(TailBB != PredBB && 575 "Single-block loop should have been rejected earlier!"); 576 // EH edges are ignored by AnalyzeBranch. 577 if (PredBB->succ_size() > 1) 578 continue; 579 580 MachineBasicBlock *PredTBB, *PredFBB; 581 SmallVector<MachineOperand, 4> PredCond; 582 if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) 583 continue; 584 if (!PredCond.empty()) 585 continue; 586 // Don't duplicate into a fall-through predecessor (at least for now). 587 if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough()) 588 continue; 589 590 DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB 591 << "From Succ: " << *TailBB); 592 593 TDBBs.push_back(PredBB); 594 595 // Remove PredBB's unconditional branch. 596 TII->RemoveBranch(*PredBB); 597 598 // Clone the contents of TailBB into PredBB. 599 DenseMap<unsigned, unsigned> LocalVRMap; 600 SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; 601 MachineBasicBlock::iterator I = TailBB->begin(); 602 while (I != TailBB->end()) { 603 MachineInstr *MI = &*I; 604 ++I; 605 if (MI->isPHI()) { 606 // Replace the uses of the def of the PHI with the register coming 607 // from PredBB. 608 ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true); 609 } else { 610 // Replace def of virtual registers with new registers, and update 611 // uses with PHI source register or the new registers. 612 DuplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap, UsedByPhi); 613 } 614 } 615 MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator(); 616 for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { 617 Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(), 618 TII->get(TargetOpcode::COPY), 619 CopyInfos[i].first).addReg(CopyInfos[i].second)); 620 } 621 NumInstrDups += TailBB->size() - 1; // subtract one for removed branch 622 623 // Update the CFG. 624 PredBB->removeSuccessor(PredBB->succ_begin()); 625 assert(PredBB->succ_empty() && 626 "TailDuplicate called on block with multiple successors!"); 627 for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(), 628 E = TailBB->succ_end(); I != E; ++I) 629 PredBB->addSuccessor(*I); 630 631 Changed = true; 632 ++NumTailDups; 633 } 634 635 // If TailBB was duplicated into all its predecessors except for the prior 636 // block, which falls through unconditionally, move the contents of this 637 // block into the prior block. 638 MachineBasicBlock *PrevBB = prior(MachineFunction::iterator(TailBB)); 639 MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0; 640 SmallVector<MachineOperand, 4> PriorCond; 641 // This has to check PrevBB->succ_size() because EH edges are ignored by 642 // AnalyzeBranch. 643 if (PrevBB->succ_size() == 1 && 644 !TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) && 645 PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 && 646 !TailBB->hasAddressTaken()) { 647 DEBUG(dbgs() << "\nMerging into block: " << *PrevBB 648 << "From MBB: " << *TailBB); 649 if (PreRegAlloc) { 650 DenseMap<unsigned, unsigned> LocalVRMap; 651 SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; 652 MachineBasicBlock::iterator I = TailBB->begin(); 653 // Process PHI instructions first. 654 while (I != TailBB->end() && I->isPHI()) { 655 // Replace the uses of the def of the PHI with the register coming 656 // from PredBB. 657 MachineInstr *MI = &*I++; 658 ProcessPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true); 659 if (MI->getParent()) 660 MI->eraseFromParent(); 661 } 662 663 // Now copy the non-PHI instructions. 664 while (I != TailBB->end()) { 665 // Replace def of virtual registers with new registers, and update 666 // uses with PHI source register or the new registers. 667 MachineInstr *MI = &*I++; 668 DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi); 669 MI->eraseFromParent(); 670 } 671 MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator(); 672 for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { 673 Copies.push_back(BuildMI(*PrevBB, Loc, DebugLoc(), 674 TII->get(TargetOpcode::COPY), 675 CopyInfos[i].first) 676 .addReg(CopyInfos[i].second)); 677 } 678 } else { 679 // No PHIs to worry about, just splice the instructions over. 680 PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end()); 681 } 682 PrevBB->removeSuccessor(PrevBB->succ_begin()); 683 assert(PrevBB->succ_empty()); 684 PrevBB->transferSuccessors(TailBB); 685 TDBBs.push_back(PrevBB); 686 Changed = true; 687 } 688 689 // If this is after register allocation, there are no phis to fix. 690 if (!PreRegAlloc) 691 return Changed; 692 693 // If we made no changes so far, we are safe. 694 if (!Changed) 695 return Changed; 696 697 698 // Handle the nasty case in that we duplicated a block that is part of a loop 699 // into some but not all of its predecessors. For example: 700 // 1 -> 2 <-> 3 | 701 // \ | 702 // \---> rest | 703 // if we duplicate 2 into 1 but not into 3, we end up with 704 // 12 -> 3 <-> 2 -> rest | 705 // \ / | 706 // \----->-----/ | 707 // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced 708 // with a phi in 3 (which now dominates 2). 709 // What we do here is introduce a copy in 3 of the register defined by the 710 // phi, just like when we are duplicating 2 into 3, but we don't copy any 711 // real instructions or remove the 3 -> 2 edge from the phi in 2. 712 for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), 713 PE = Preds.end(); PI != PE; ++PI) { 714 MachineBasicBlock *PredBB = *PI; 715 if (std::find(TDBBs.begin(), TDBBs.end(), PredBB) != TDBBs.end()) 716 continue; 717 718 // EH edges 719 if (PredBB->succ_size() != 1) 720 continue; 721 722 DenseMap<unsigned, unsigned> LocalVRMap; 723 SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos; 724 MachineBasicBlock::iterator I = TailBB->begin(); 725 // Process PHI instructions first. 726 while (I != TailBB->end() && I->isPHI()) { 727 // Replace the uses of the def of the PHI with the register coming 728 // from PredBB. 729 MachineInstr *MI = &*I++; 730 ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false); 731 } 732 MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator(); 733 for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { 734 Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(), 735 TII->get(TargetOpcode::COPY), 736 CopyInfos[i].first).addReg(CopyInfos[i].second)); 737 } 738 } 739 740 return Changed; 741} 742 743/// RemoveDeadBlock - Remove the specified dead machine basic block from the 744/// function, updating the CFG. 745void TailDuplicatePass::RemoveDeadBlock(MachineBasicBlock *MBB) { 746 assert(MBB->pred_empty() && "MBB must be dead!"); 747 DEBUG(dbgs() << "\nRemoving MBB: " << *MBB); 748 749 // Remove all successors. 750 while (!MBB->succ_empty()) 751 MBB->removeSuccessor(MBB->succ_end()-1); 752 753 // Remove the block. 754 MBB->eraseFromParent(); 755} 756