1//===- RegisterCoalescer.cpp - Generic Register Coalescing Interface -------==// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the generic RegisterCoalescer interface which 11// is used as the common interface used by all clients and 12// implementations of register coalescing. 13// 14//===----------------------------------------------------------------------===// 15 16#define DEBUG_TYPE "regalloc" 17#include "RegisterCoalescer.h" 18#include "LiveDebugVariables.h" 19#include "VirtRegMap.h" 20 21#include "llvm/Pass.h" 22#include "llvm/Value.h" 23#include "llvm/ADT/OwningPtr.h" 24#include "llvm/ADT/STLExtras.h" 25#include "llvm/ADT/SmallSet.h" 26#include "llvm/ADT/Statistic.h" 27#include "llvm/Analysis/AliasAnalysis.h" 28#include "llvm/CodeGen/LiveIntervalAnalysis.h" 29#include "llvm/CodeGen/LiveIntervalAnalysis.h" 30#include "llvm/CodeGen/LiveRangeEdit.h" 31#include "llvm/CodeGen/MachineFrameInfo.h" 32#include "llvm/CodeGen/MachineInstr.h" 33#include "llvm/CodeGen/MachineInstr.h" 34#include "llvm/CodeGen/MachineLoopInfo.h" 35#include "llvm/CodeGen/MachineRegisterInfo.h" 36#include "llvm/CodeGen/MachineRegisterInfo.h" 37#include "llvm/CodeGen/Passes.h" 38#include "llvm/CodeGen/RegisterClassInfo.h" 39#include "llvm/Support/CommandLine.h" 40#include "llvm/Support/Debug.h" 41#include "llvm/Support/ErrorHandling.h" 42#include "llvm/Support/raw_ostream.h" 43#include "llvm/Target/TargetInstrInfo.h" 44#include "llvm/Target/TargetInstrInfo.h" 45#include "llvm/Target/TargetMachine.h" 46#include "llvm/Target/TargetOptions.h" 47#include "llvm/Target/TargetRegisterInfo.h" 48#include <algorithm> 49#include <cmath> 50using namespace llvm; 51 52STATISTIC(numJoins , "Number of interval joins performed"); 53STATISTIC(numCrossRCs , "Number of cross class joins performed"); 54STATISTIC(numCommutes , "Number of instruction commuting performed"); 55STATISTIC(numExtends , "Number of copies extended"); 56STATISTIC(NumReMats , "Number of instructions re-materialized"); 57STATISTIC(NumInflated , "Number of register classes inflated"); 58STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested"); 59STATISTIC(NumLaneResolves, "Number of dead lane conflicts resolved"); 60 61static cl::opt<bool> 62EnableJoining("join-liveintervals", 63 cl::desc("Coalesce copies (default=true)"), 64 cl::init(true)); 65 66static cl::opt<bool> 67VerifyCoalescing("verify-coalescing", 68 cl::desc("Verify machine instrs before and after register coalescing"), 69 cl::Hidden); 70 71// Temporary option for testing new coalescer algo. 72static cl::opt<bool> 73NewCoalescer("new-coalescer", cl::Hidden, cl::init(true), 74 cl::desc("Use new coalescer algorithm")); 75 76namespace { 77 class RegisterCoalescer : public MachineFunctionPass, 78 private LiveRangeEdit::Delegate { 79 MachineFunction* MF; 80 MachineRegisterInfo* MRI; 81 const TargetMachine* TM; 82 const TargetRegisterInfo* TRI; 83 const TargetInstrInfo* TII; 84 LiveIntervals *LIS; 85 LiveDebugVariables *LDV; 86 const MachineLoopInfo* Loops; 87 AliasAnalysis *AA; 88 RegisterClassInfo RegClassInfo; 89 90 /// WorkList - Copy instructions yet to be coalesced. 91 SmallVector<MachineInstr*, 8> WorkList; 92 93 /// ErasedInstrs - Set of instruction pointers that have been erased, and 94 /// that may be present in WorkList. 95 SmallPtrSet<MachineInstr*, 8> ErasedInstrs; 96 97 /// Dead instructions that are about to be deleted. 98 SmallVector<MachineInstr*, 8> DeadDefs; 99 100 /// Virtual registers to be considered for register class inflation. 101 SmallVector<unsigned, 8> InflateRegs; 102 103 /// Recursively eliminate dead defs in DeadDefs. 104 void eliminateDeadDefs(); 105 106 /// LiveRangeEdit callback. 107 void LRE_WillEraseInstruction(MachineInstr *MI); 108 109 /// joinAllIntervals - join compatible live intervals 110 void joinAllIntervals(); 111 112 /// copyCoalesceInMBB - Coalesce copies in the specified MBB, putting 113 /// copies that cannot yet be coalesced into WorkList. 114 void copyCoalesceInMBB(MachineBasicBlock *MBB); 115 116 /// copyCoalesceWorkList - Try to coalesce all copies in WorkList after 117 /// position From. Return true if any progress was made. 118 bool copyCoalesceWorkList(unsigned From = 0); 119 120 /// joinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, 121 /// which are the src/dst of the copy instruction CopyMI. This returns 122 /// true if the copy was successfully coalesced away. If it is not 123 /// currently possible to coalesce this interval, but it may be possible if 124 /// other things get coalesced, then it returns true by reference in 125 /// 'Again'. 126 bool joinCopy(MachineInstr *TheCopy, bool &Again); 127 128 /// joinIntervals - Attempt to join these two intervals. On failure, this 129 /// returns false. The output "SrcInt" will not have been modified, so we 130 /// can use this information below to update aliases. 131 bool joinIntervals(CoalescerPair &CP); 132 133 /// Attempt joining two virtual registers. Return true on success. 134 bool joinVirtRegs(CoalescerPair &CP); 135 136 /// Attempt joining with a reserved physreg. 137 bool joinReservedPhysReg(CoalescerPair &CP); 138 139 /// adjustCopiesBackFrom - We found a non-trivially-coalescable copy. If 140 /// the source value number is defined by a copy from the destination reg 141 /// see if we can merge these two destination reg valno# into a single 142 /// value number, eliminating a copy. 143 bool adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI); 144 145 /// hasOtherReachingDefs - Return true if there are definitions of IntB 146 /// other than BValNo val# that can reach uses of AValno val# of IntA. 147 bool hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB, 148 VNInfo *AValNo, VNInfo *BValNo); 149 150 /// removeCopyByCommutingDef - We found a non-trivially-coalescable copy. 151 /// If the source value number is defined by a commutable instruction and 152 /// its other operand is coalesced to the copy dest register, see if we 153 /// can transform the copy into a noop by commuting the definition. 154 bool removeCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI); 155 156 /// reMaterializeTrivialDef - If the source of a copy is defined by a 157 /// trivial computation, replace the copy by rematerialize the definition. 158 bool reMaterializeTrivialDef(LiveInterval &SrcInt, unsigned DstReg, 159 MachineInstr *CopyMI); 160 161 /// canJoinPhys - Return true if a physreg copy should be joined. 162 bool canJoinPhys(CoalescerPair &CP); 163 164 /// updateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and 165 /// update the subregister number if it is not zero. If DstReg is a 166 /// physical register and the existing subregister number of the def / use 167 /// being updated is not zero, make sure to set it to the correct physical 168 /// subregister. 169 void updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx); 170 171 /// eliminateUndefCopy - Handle copies of undef values. 172 bool eliminateUndefCopy(MachineInstr *CopyMI, const CoalescerPair &CP); 173 174 public: 175 static char ID; // Class identification, replacement for typeinfo 176 RegisterCoalescer() : MachineFunctionPass(ID) { 177 initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); 178 } 179 180 virtual void getAnalysisUsage(AnalysisUsage &AU) const; 181 182 virtual void releaseMemory(); 183 184 /// runOnMachineFunction - pass entry point 185 virtual bool runOnMachineFunction(MachineFunction&); 186 187 /// print - Implement the dump method. 188 virtual void print(raw_ostream &O, const Module* = 0) const; 189 }; 190} /// end anonymous namespace 191 192char &llvm::RegisterCoalescerID = RegisterCoalescer::ID; 193 194INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing", 195 "Simple Register Coalescing", false, false) 196INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 197INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables) 198INITIALIZE_PASS_DEPENDENCY(SlotIndexes) 199INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 200INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 201INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing", 202 "Simple Register Coalescing", false, false) 203 204char RegisterCoalescer::ID = 0; 205 206static unsigned compose(const TargetRegisterInfo &tri, unsigned a, unsigned b) { 207 if (!a) return b; 208 if (!b) return a; 209 return tri.composeSubRegIndices(a, b); 210} 211 212static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI, 213 unsigned &Src, unsigned &Dst, 214 unsigned &SrcSub, unsigned &DstSub) { 215 if (MI->isCopy()) { 216 Dst = MI->getOperand(0).getReg(); 217 DstSub = MI->getOperand(0).getSubReg(); 218 Src = MI->getOperand(1).getReg(); 219 SrcSub = MI->getOperand(1).getSubReg(); 220 } else if (MI->isSubregToReg()) { 221 Dst = MI->getOperand(0).getReg(); 222 DstSub = compose(tri, MI->getOperand(0).getSubReg(), 223 MI->getOperand(3).getImm()); 224 Src = MI->getOperand(2).getReg(); 225 SrcSub = MI->getOperand(2).getSubReg(); 226 } else 227 return false; 228 return true; 229} 230 231bool CoalescerPair::setRegisters(const MachineInstr *MI) { 232 SrcReg = DstReg = 0; 233 SrcIdx = DstIdx = 0; 234 NewRC = 0; 235 Flipped = CrossClass = false; 236 237 unsigned Src, Dst, SrcSub, DstSub; 238 if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub)) 239 return false; 240 Partial = SrcSub || DstSub; 241 242 // If one register is a physreg, it must be Dst. 243 if (TargetRegisterInfo::isPhysicalRegister(Src)) { 244 if (TargetRegisterInfo::isPhysicalRegister(Dst)) 245 return false; 246 std::swap(Src, Dst); 247 std::swap(SrcSub, DstSub); 248 Flipped = true; 249 } 250 251 const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 252 253 if (TargetRegisterInfo::isPhysicalRegister(Dst)) { 254 // Eliminate DstSub on a physreg. 255 if (DstSub) { 256 Dst = TRI.getSubReg(Dst, DstSub); 257 if (!Dst) return false; 258 DstSub = 0; 259 } 260 261 // Eliminate SrcSub by picking a corresponding Dst superregister. 262 if (SrcSub) { 263 Dst = TRI.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src)); 264 if (!Dst) return false; 265 SrcSub = 0; 266 } else if (!MRI.getRegClass(Src)->contains(Dst)) { 267 return false; 268 } 269 } else { 270 // Both registers are virtual. 271 const TargetRegisterClass *SrcRC = MRI.getRegClass(Src); 272 const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); 273 274 // Both registers have subreg indices. 275 if (SrcSub && DstSub) { 276 // Copies between different sub-registers are never coalescable. 277 if (Src == Dst && SrcSub != DstSub) 278 return false; 279 280 NewRC = TRI.getCommonSuperRegClass(SrcRC, SrcSub, DstRC, DstSub, 281 SrcIdx, DstIdx); 282 if (!NewRC) 283 return false; 284 } else if (DstSub) { 285 // SrcReg will be merged with a sub-register of DstReg. 286 SrcIdx = DstSub; 287 NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub); 288 } else if (SrcSub) { 289 // DstReg will be merged with a sub-register of SrcReg. 290 DstIdx = SrcSub; 291 NewRC = TRI.getMatchingSuperRegClass(SrcRC, DstRC, SrcSub); 292 } else { 293 // This is a straight copy without sub-registers. 294 NewRC = TRI.getCommonSubClass(DstRC, SrcRC); 295 } 296 297 // The combined constraint may be impossible to satisfy. 298 if (!NewRC) 299 return false; 300 301 // Prefer SrcReg to be a sub-register of DstReg. 302 // FIXME: Coalescer should support subregs symmetrically. 303 if (DstIdx && !SrcIdx) { 304 std::swap(Src, Dst); 305 std::swap(SrcIdx, DstIdx); 306 Flipped = !Flipped; 307 } 308 309 CrossClass = NewRC != DstRC || NewRC != SrcRC; 310 } 311 // Check our invariants 312 assert(TargetRegisterInfo::isVirtualRegister(Src) && "Src must be virtual"); 313 assert(!(TargetRegisterInfo::isPhysicalRegister(Dst) && DstSub) && 314 "Cannot have a physical SubIdx"); 315 SrcReg = Src; 316 DstReg = Dst; 317 return true; 318} 319 320bool CoalescerPair::flip() { 321 if (TargetRegisterInfo::isPhysicalRegister(DstReg)) 322 return false; 323 std::swap(SrcReg, DstReg); 324 std::swap(SrcIdx, DstIdx); 325 Flipped = !Flipped; 326 return true; 327} 328 329bool CoalescerPair::isCoalescable(const MachineInstr *MI) const { 330 if (!MI) 331 return false; 332 unsigned Src, Dst, SrcSub, DstSub; 333 if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub)) 334 return false; 335 336 // Find the virtual register that is SrcReg. 337 if (Dst == SrcReg) { 338 std::swap(Src, Dst); 339 std::swap(SrcSub, DstSub); 340 } else if (Src != SrcReg) { 341 return false; 342 } 343 344 // Now check that Dst matches DstReg. 345 if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { 346 if (!TargetRegisterInfo::isPhysicalRegister(Dst)) 347 return false; 348 assert(!DstIdx && !SrcIdx && "Inconsistent CoalescerPair state."); 349 // DstSub could be set for a physreg from INSERT_SUBREG. 350 if (DstSub) 351 Dst = TRI.getSubReg(Dst, DstSub); 352 // Full copy of Src. 353 if (!SrcSub) 354 return DstReg == Dst; 355 // This is a partial register copy. Check that the parts match. 356 return TRI.getSubReg(DstReg, SrcSub) == Dst; 357 } else { 358 // DstReg is virtual. 359 if (DstReg != Dst) 360 return false; 361 // Registers match, do the subregisters line up? 362 return compose(TRI, SrcIdx, SrcSub) == compose(TRI, DstIdx, DstSub); 363 } 364} 365 366void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const { 367 AU.setPreservesCFG(); 368 AU.addRequired<AliasAnalysis>(); 369 AU.addRequired<LiveIntervals>(); 370 AU.addPreserved<LiveIntervals>(); 371 AU.addRequired<LiveDebugVariables>(); 372 AU.addPreserved<LiveDebugVariables>(); 373 AU.addPreserved<SlotIndexes>(); 374 AU.addRequired<MachineLoopInfo>(); 375 AU.addPreserved<MachineLoopInfo>(); 376 AU.addPreservedID(MachineDominatorsID); 377 MachineFunctionPass::getAnalysisUsage(AU); 378} 379 380void RegisterCoalescer::eliminateDeadDefs() { 381 SmallVector<LiveInterval*, 8> NewRegs; 382 LiveRangeEdit(0, NewRegs, *MF, *LIS, 0, this).eliminateDeadDefs(DeadDefs); 383} 384 385// Callback from eliminateDeadDefs(). 386void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) { 387 // MI may be in WorkList. Make sure we don't visit it. 388 ErasedInstrs.insert(MI); 389} 390 391/// adjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA 392/// being the source and IntB being the dest, thus this defines a value number 393/// in IntB. If the source value number (in IntA) is defined by a copy from B, 394/// see if we can merge these two pieces of B into a single value number, 395/// eliminating a copy. For example: 396/// 397/// A3 = B0 398/// ... 399/// B1 = A3 <- this copy 400/// 401/// In this case, B0 can be extended to where the B1 copy lives, allowing the B1 402/// value number to be replaced with B0 (which simplifies the B liveinterval). 403/// 404/// This returns true if an interval was modified. 405/// 406bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, 407 MachineInstr *CopyMI) { 408 assert(!CP.isPartial() && "This doesn't work for partial copies."); 409 assert(!CP.isPhys() && "This doesn't work for physreg copies."); 410 411 LiveInterval &IntA = 412 LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); 413 LiveInterval &IntB = 414 LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); 415 SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); 416 417 // BValNo is a value number in B that is defined by a copy from A. 'B3' in 418 // the example above. 419 LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx); 420 if (BLR == IntB.end()) return false; 421 VNInfo *BValNo = BLR->valno; 422 423 // Get the location that B is defined at. Two options: either this value has 424 // an unknown definition point or it is defined at CopyIdx. If unknown, we 425 // can't process it. 426 if (BValNo->def != CopyIdx) return false; 427 428 // AValNo is the value number in A that defines the copy, A3 in the example. 429 SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true); 430 LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx); 431 // The live range might not exist after fun with physreg coalescing. 432 if (ALR == IntA.end()) return false; 433 VNInfo *AValNo = ALR->valno; 434 435 // If AValNo is defined as a copy from IntB, we can potentially process this. 436 // Get the instruction that defines this value number. 437 MachineInstr *ACopyMI = LIS->getInstructionFromIndex(AValNo->def); 438 // Don't allow any partial copies, even if isCoalescable() allows them. 439 if (!CP.isCoalescable(ACopyMI) || !ACopyMI->isFullCopy()) 440 return false; 441 442 // Get the LiveRange in IntB that this value number starts with. 443 LiveInterval::iterator ValLR = 444 IntB.FindLiveRangeContaining(AValNo->def.getPrevSlot()); 445 if (ValLR == IntB.end()) 446 return false; 447 448 // Make sure that the end of the live range is inside the same block as 449 // CopyMI. 450 MachineInstr *ValLREndInst = 451 LIS->getInstructionFromIndex(ValLR->end.getPrevSlot()); 452 if (!ValLREndInst || ValLREndInst->getParent() != CopyMI->getParent()) 453 return false; 454 455 // Okay, we now know that ValLR ends in the same block that the CopyMI 456 // live-range starts. If there are no intervening live ranges between them in 457 // IntB, we can merge them. 458 if (ValLR+1 != BLR) return false; 459 460 DEBUG(dbgs() << "Extending: " << PrintReg(IntB.reg, TRI)); 461 462 SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start; 463 // We are about to delete CopyMI, so need to remove it as the 'instruction 464 // that defines this value #'. Update the valnum with the new defining 465 // instruction #. 466 BValNo->def = FillerStart; 467 468 // Okay, we can merge them. We need to insert a new liverange: 469 // [ValLR.end, BLR.begin) of either value number, then we merge the 470 // two value numbers. 471 IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo)); 472 473 // Okay, merge "B1" into the same value number as "B0". 474 if (BValNo != ValLR->valno) 475 IntB.MergeValueNumberInto(BValNo, ValLR->valno); 476 DEBUG(dbgs() << " result = " << IntB << '\n'); 477 478 // If the source instruction was killing the source register before the 479 // merge, unset the isKill marker given the live range has been extended. 480 int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true); 481 if (UIdx != -1) { 482 ValLREndInst->getOperand(UIdx).setIsKill(false); 483 } 484 485 // Rewrite the copy. If the copy instruction was killing the destination 486 // register before the merge, find the last use and trim the live range. That 487 // will also add the isKill marker. 488 CopyMI->substituteRegister(IntA.reg, IntB.reg, 0, *TRI); 489 if (ALR->end == CopyIdx) 490 LIS->shrinkToUses(&IntA); 491 492 ++numExtends; 493 return true; 494} 495 496/// hasOtherReachingDefs - Return true if there are definitions of IntB 497/// other than BValNo val# that can reach uses of AValno val# of IntA. 498bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA, 499 LiveInterval &IntB, 500 VNInfo *AValNo, 501 VNInfo *BValNo) { 502 // If AValNo has PHI kills, conservatively assume that IntB defs can reach 503 // the PHI values. 504 if (LIS->hasPHIKill(IntA, AValNo)) 505 return true; 506 507 for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); 508 AI != AE; ++AI) { 509 if (AI->valno != AValNo) continue; 510 LiveInterval::Ranges::iterator BI = 511 std::upper_bound(IntB.ranges.begin(), IntB.ranges.end(), AI->start); 512 if (BI != IntB.ranges.begin()) 513 --BI; 514 for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) { 515 if (BI->valno == BValNo) 516 continue; 517 if (BI->start <= AI->start && BI->end > AI->start) 518 return true; 519 if (BI->start > AI->start && BI->start < AI->end) 520 return true; 521 } 522 } 523 return false; 524} 525 526/// removeCopyByCommutingDef - We found a non-trivially-coalescable copy with 527/// IntA being the source and IntB being the dest, thus this defines a value 528/// number in IntB. If the source value number (in IntA) is defined by a 529/// commutable instruction and its other operand is coalesced to the copy dest 530/// register, see if we can transform the copy into a noop by commuting the 531/// definition. For example, 532/// 533/// A3 = op A2 B0<kill> 534/// ... 535/// B1 = A3 <- this copy 536/// ... 537/// = op A3 <- more uses 538/// 539/// ==> 540/// 541/// B2 = op B0 A2<kill> 542/// ... 543/// B1 = B2 <- now an identify copy 544/// ... 545/// = op B2 <- more uses 546/// 547/// This returns true if an interval was modified. 548/// 549bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, 550 MachineInstr *CopyMI) { 551 assert (!CP.isPhys()); 552 553 SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); 554 555 LiveInterval &IntA = 556 LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); 557 LiveInterval &IntB = 558 LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); 559 560 // BValNo is a value number in B that is defined by a copy from A. 'B3' in 561 // the example above. 562 VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx); 563 if (!BValNo || BValNo->def != CopyIdx) 564 return false; 565 566 assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); 567 568 // AValNo is the value number in A that defines the copy, A3 in the example. 569 VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true)); 570 assert(AValNo && "COPY source not live"); 571 if (AValNo->isPHIDef() || AValNo->isUnused()) 572 return false; 573 MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def); 574 if (!DefMI) 575 return false; 576 if (!DefMI->isCommutable()) 577 return false; 578 // If DefMI is a two-address instruction then commuting it will change the 579 // destination register. 580 int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg); 581 assert(DefIdx != -1); 582 unsigned UseOpIdx; 583 if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx)) 584 return false; 585 unsigned Op1, Op2, NewDstIdx; 586 if (!TII->findCommutedOpIndices(DefMI, Op1, Op2)) 587 return false; 588 if (Op1 == UseOpIdx) 589 NewDstIdx = Op2; 590 else if (Op2 == UseOpIdx) 591 NewDstIdx = Op1; 592 else 593 return false; 594 595 MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); 596 unsigned NewReg = NewDstMO.getReg(); 597 if (NewReg != IntB.reg || !LiveRangeQuery(IntB, AValNo->def).isKill()) 598 return false; 599 600 // Make sure there are no other definitions of IntB that would reach the 601 // uses which the new definition can reach. 602 if (hasOtherReachingDefs(IntA, IntB, AValNo, BValNo)) 603 return false; 604 605 // If some of the uses of IntA.reg is already coalesced away, return false. 606 // It's not possible to determine whether it's safe to perform the coalescing. 607 for (MachineRegisterInfo::use_nodbg_iterator UI = 608 MRI->use_nodbg_begin(IntA.reg), 609 UE = MRI->use_nodbg_end(); UI != UE; ++UI) { 610 MachineInstr *UseMI = &*UI; 611 SlotIndex UseIdx = LIS->getInstructionIndex(UseMI); 612 LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); 613 if (ULR == IntA.end() || ULR->valno != AValNo) 614 continue; 615 // If this use is tied to a def, we can't rewrite the register. 616 if (UseMI->isRegTiedToDefOperand(UI.getOperandNo())) 617 return false; 618 } 619 620 DEBUG(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t' 621 << *DefMI); 622 623 // At this point we have decided that it is legal to do this 624 // transformation. Start by commuting the instruction. 625 MachineBasicBlock *MBB = DefMI->getParent(); 626 MachineInstr *NewMI = TII->commuteInstruction(DefMI); 627 if (!NewMI) 628 return false; 629 if (TargetRegisterInfo::isVirtualRegister(IntA.reg) && 630 TargetRegisterInfo::isVirtualRegister(IntB.reg) && 631 !MRI->constrainRegClass(IntB.reg, MRI->getRegClass(IntA.reg))) 632 return false; 633 if (NewMI != DefMI) { 634 LIS->ReplaceMachineInstrInMaps(DefMI, NewMI); 635 MachineBasicBlock::iterator Pos = DefMI; 636 MBB->insert(Pos, NewMI); 637 MBB->erase(DefMI); 638 } 639 unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false); 640 NewMI->getOperand(OpIdx).setIsKill(); 641 642 // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g. 643 // A = or A, B 644 // ... 645 // B = A 646 // ... 647 // C = A<kill> 648 // ... 649 // = B 650 651 // Update uses of IntA of the specific Val# with IntB. 652 for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg), 653 UE = MRI->use_end(); UI != UE;) { 654 MachineOperand &UseMO = UI.getOperand(); 655 MachineInstr *UseMI = &*UI; 656 ++UI; 657 if (UseMI->isDebugValue()) { 658 // FIXME These don't have an instruction index. Not clear we have enough 659 // info to decide whether to do this replacement or not. For now do it. 660 UseMO.setReg(NewReg); 661 continue; 662 } 663 SlotIndex UseIdx = LIS->getInstructionIndex(UseMI).getRegSlot(true); 664 LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); 665 if (ULR == IntA.end() || ULR->valno != AValNo) 666 continue; 667 // Kill flags are no longer accurate. They are recomputed after RA. 668 UseMO.setIsKill(false); 669 if (TargetRegisterInfo::isPhysicalRegister(NewReg)) 670 UseMO.substPhysReg(NewReg, *TRI); 671 else 672 UseMO.setReg(NewReg); 673 if (UseMI == CopyMI) 674 continue; 675 if (!UseMI->isCopy()) 676 continue; 677 if (UseMI->getOperand(0).getReg() != IntB.reg || 678 UseMI->getOperand(0).getSubReg()) 679 continue; 680 681 // This copy will become a noop. If it's defining a new val#, merge it into 682 // BValNo. 683 SlotIndex DefIdx = UseIdx.getRegSlot(); 684 VNInfo *DVNI = IntB.getVNInfoAt(DefIdx); 685 if (!DVNI) 686 continue; 687 DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI); 688 assert(DVNI->def == DefIdx); 689 BValNo = IntB.MergeValueNumberInto(BValNo, DVNI); 690 ErasedInstrs.insert(UseMI); 691 LIS->RemoveMachineInstrFromMaps(UseMI); 692 UseMI->eraseFromParent(); 693 } 694 695 // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition 696 // is updated. 697 VNInfo *ValNo = BValNo; 698 ValNo->def = AValNo->def; 699 for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); 700 AI != AE; ++AI) { 701 if (AI->valno != AValNo) continue; 702 IntB.addRange(LiveRange(AI->start, AI->end, ValNo)); 703 } 704 DEBUG(dbgs() << "\t\textended: " << IntB << '\n'); 705 706 IntA.removeValNo(AValNo); 707 DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n'); 708 ++numCommutes; 709 return true; 710} 711 712/// reMaterializeTrivialDef - If the source of a copy is defined by a trivial 713/// computation, replace the copy by rematerialize the definition. 714bool RegisterCoalescer::reMaterializeTrivialDef(LiveInterval &SrcInt, 715 unsigned DstReg, 716 MachineInstr *CopyMI) { 717 SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true); 718 LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx); 719 assert(SrcLR != SrcInt.end() && "Live range not found!"); 720 VNInfo *ValNo = SrcLR->valno; 721 if (ValNo->isPHIDef() || ValNo->isUnused()) 722 return false; 723 MachineInstr *DefMI = LIS->getInstructionFromIndex(ValNo->def); 724 if (!DefMI) 725 return false; 726 assert(DefMI && "Defining instruction disappeared"); 727 if (!DefMI->isAsCheapAsAMove()) 728 return false; 729 if (!TII->isTriviallyReMaterializable(DefMI, AA)) 730 return false; 731 bool SawStore = false; 732 if (!DefMI->isSafeToMove(TII, AA, SawStore)) 733 return false; 734 const MCInstrDesc &MCID = DefMI->getDesc(); 735 if (MCID.getNumDefs() != 1) 736 return false; 737 if (!DefMI->isImplicitDef()) { 738 // Make sure the copy destination register class fits the instruction 739 // definition register class. The mismatch can happen as a result of earlier 740 // extract_subreg, insert_subreg, subreg_to_reg coalescing. 741 const TargetRegisterClass *RC = TII->getRegClass(MCID, 0, TRI, *MF); 742 if (TargetRegisterInfo::isVirtualRegister(DstReg)) { 743 if (MRI->getRegClass(DstReg) != RC) 744 return false; 745 } else if (!RC->contains(DstReg)) 746 return false; 747 } 748 749 MachineBasicBlock *MBB = CopyMI->getParent(); 750 MachineBasicBlock::iterator MII = 751 llvm::next(MachineBasicBlock::iterator(CopyMI)); 752 TII->reMaterialize(*MBB, MII, DstReg, 0, DefMI, *TRI); 753 MachineInstr *NewMI = prior(MII); 754 755 // NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86). 756 // We need to remember these so we can add intervals once we insert 757 // NewMI into SlotIndexes. 758 SmallVector<unsigned, 4> NewMIImplDefs; 759 for (unsigned i = NewMI->getDesc().getNumOperands(), 760 e = NewMI->getNumOperands(); i != e; ++i) { 761 MachineOperand &MO = NewMI->getOperand(i); 762 if (MO.isReg()) { 763 assert(MO.isDef() && MO.isImplicit() && MO.isDead() && 764 TargetRegisterInfo::isPhysicalRegister(MO.getReg())); 765 NewMIImplDefs.push_back(MO.getReg()); 766 } 767 } 768 769 // CopyMI may have implicit operands, transfer them over to the newly 770 // rematerialized instruction. And update implicit def interval valnos. 771 for (unsigned i = CopyMI->getDesc().getNumOperands(), 772 e = CopyMI->getNumOperands(); i != e; ++i) { 773 MachineOperand &MO = CopyMI->getOperand(i); 774 if (MO.isReg()) { 775 assert(MO.isImplicit() && "No explicit operands after implict operands."); 776 // Discard VReg implicit defs. 777 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { 778 NewMI->addOperand(MO); 779 } 780 } 781 } 782 783 LIS->ReplaceMachineInstrInMaps(CopyMI, NewMI); 784 785 SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI); 786 for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) { 787 unsigned Reg = NewMIImplDefs[i]; 788 for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) 789 if (LiveInterval *LI = LIS->getCachedRegUnit(*Units)) 790 LI->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator()); 791 } 792 793 CopyMI->eraseFromParent(); 794 ErasedInstrs.insert(CopyMI); 795 DEBUG(dbgs() << "Remat: " << *NewMI); 796 ++NumReMats; 797 798 // The source interval can become smaller because we removed a use. 799 LIS->shrinkToUses(&SrcInt, &DeadDefs); 800 if (!DeadDefs.empty()) 801 eliminateDeadDefs(); 802 803 return true; 804} 805 806/// eliminateUndefCopy - ProcessImpicitDefs may leave some copies of <undef> 807/// values, it only removes local variables. When we have a copy like: 808/// 809/// %vreg1 = COPY %vreg2<undef> 810/// 811/// We delete the copy and remove the corresponding value number from %vreg1. 812/// Any uses of that value number are marked as <undef>. 813bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI, 814 const CoalescerPair &CP) { 815 SlotIndex Idx = LIS->getInstructionIndex(CopyMI); 816 LiveInterval *SrcInt = &LIS->getInterval(CP.getSrcReg()); 817 if (SrcInt->liveAt(Idx)) 818 return false; 819 LiveInterval *DstInt = &LIS->getInterval(CP.getDstReg()); 820 if (DstInt->liveAt(Idx)) 821 return false; 822 823 // No intervals are live-in to CopyMI - it is undef. 824 if (CP.isFlipped()) 825 DstInt = SrcInt; 826 SrcInt = 0; 827 828 VNInfo *DeadVNI = DstInt->getVNInfoAt(Idx.getRegSlot()); 829 assert(DeadVNI && "No value defined in DstInt"); 830 DstInt->removeValNo(DeadVNI); 831 832 // Find new undef uses. 833 for (MachineRegisterInfo::reg_nodbg_iterator 834 I = MRI->reg_nodbg_begin(DstInt->reg), E = MRI->reg_nodbg_end(); 835 I != E; ++I) { 836 MachineOperand &MO = I.getOperand(); 837 if (MO.isDef() || MO.isUndef()) 838 continue; 839 MachineInstr *MI = MO.getParent(); 840 SlotIndex Idx = LIS->getInstructionIndex(MI); 841 if (DstInt->liveAt(Idx)) 842 continue; 843 MO.setIsUndef(true); 844 DEBUG(dbgs() << "\tnew undef: " << Idx << '\t' << *MI); 845 } 846 return true; 847} 848 849/// updateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and 850/// update the subregister number if it is not zero. If DstReg is a 851/// physical register and the existing subregister number of the def / use 852/// being updated is not zero, make sure to set it to the correct physical 853/// subregister. 854void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, 855 unsigned DstReg, 856 unsigned SubIdx) { 857 bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); 858 LiveInterval *DstInt = DstIsPhys ? 0 : &LIS->getInterval(DstReg); 859 860 // Update LiveDebugVariables. 861 LDV->renameRegister(SrcReg, DstReg, SubIdx); 862 863 SmallPtrSet<MachineInstr*, 8> Visited; 864 for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(SrcReg); 865 MachineInstr *UseMI = I.skipInstruction();) { 866 // Each instruction can only be rewritten once because sub-register 867 // composition is not always idempotent. When SrcReg != DstReg, rewriting 868 // the UseMI operands removes them from the SrcReg use-def chain, but when 869 // SrcReg is DstReg we could encounter UseMI twice if it has multiple 870 // operands mentioning the virtual register. 871 if (SrcReg == DstReg && !Visited.insert(UseMI)) 872 continue; 873 874 SmallVector<unsigned,8> Ops; 875 bool Reads, Writes; 876 tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops); 877 878 // If SrcReg wasn't read, it may still be the case that DstReg is live-in 879 // because SrcReg is a sub-register. 880 if (DstInt && !Reads && SubIdx) 881 Reads = DstInt->liveAt(LIS->getInstructionIndex(UseMI)); 882 883 // Replace SrcReg with DstReg in all UseMI operands. 884 for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 885 MachineOperand &MO = UseMI->getOperand(Ops[i]); 886 887 // Adjust <undef> flags in case of sub-register joins. We don't want to 888 // turn a full def into a read-modify-write sub-register def and vice 889 // versa. 890 if (SubIdx && MO.isDef()) 891 MO.setIsUndef(!Reads); 892 893 if (DstIsPhys) 894 MO.substPhysReg(DstReg, *TRI); 895 else 896 MO.substVirtReg(DstReg, SubIdx, *TRI); 897 } 898 899 DEBUG({ 900 dbgs() << "\t\tupdated: "; 901 if (!UseMI->isDebugValue()) 902 dbgs() << LIS->getInstructionIndex(UseMI) << "\t"; 903 dbgs() << *UseMI; 904 }); 905 } 906} 907 908/// canJoinPhys - Return true if a copy involving a physreg should be joined. 909bool RegisterCoalescer::canJoinPhys(CoalescerPair &CP) { 910 /// Always join simple intervals that are defined by a single copy from a 911 /// reserved register. This doesn't increase register pressure, so it is 912 /// always beneficial. 913 if (!MRI->isReserved(CP.getDstReg())) { 914 DEBUG(dbgs() << "\tCan only merge into reserved registers.\n"); 915 return false; 916 } 917 918 LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg()); 919 if (CP.isFlipped() && JoinVInt.containsOneValue()) 920 return true; 921 922 DEBUG(dbgs() << "\tCannot join defs into reserved register.\n"); 923 return false; 924} 925 926/// joinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, 927/// which are the src/dst of the copy instruction CopyMI. This returns true 928/// if the copy was successfully coalesced away. If it is not currently 929/// possible to coalesce this interval, but it may be possible if other 930/// things get coalesced, then it returns true by reference in 'Again'. 931bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { 932 933 Again = false; 934 DEBUG(dbgs() << LIS->getInstructionIndex(CopyMI) << '\t' << *CopyMI); 935 936 CoalescerPair CP(*TRI); 937 if (!CP.setRegisters(CopyMI)) { 938 DEBUG(dbgs() << "\tNot coalescable.\n"); 939 return false; 940 } 941 942 // Dead code elimination. This really should be handled by MachineDCE, but 943 // sometimes dead copies slip through, and we can't generate invalid live 944 // ranges. 945 if (!CP.isPhys() && CopyMI->allDefsAreDead()) { 946 DEBUG(dbgs() << "\tCopy is dead.\n"); 947 DeadDefs.push_back(CopyMI); 948 eliminateDeadDefs(); 949 return true; 950 } 951 952 // Eliminate undefs. 953 if (!CP.isPhys() && eliminateUndefCopy(CopyMI, CP)) { 954 DEBUG(dbgs() << "\tEliminated copy of <undef> value.\n"); 955 LIS->RemoveMachineInstrFromMaps(CopyMI); 956 CopyMI->eraseFromParent(); 957 return false; // Not coalescable. 958 } 959 960 // Coalesced copies are normally removed immediately, but transformations 961 // like removeCopyByCommutingDef() can inadvertently create identity copies. 962 // When that happens, just join the values and remove the copy. 963 if (CP.getSrcReg() == CP.getDstReg()) { 964 LiveInterval &LI = LIS->getInterval(CP.getSrcReg()); 965 DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n'); 966 LiveRangeQuery LRQ(LI, LIS->getInstructionIndex(CopyMI)); 967 if (VNInfo *DefVNI = LRQ.valueDefined()) { 968 VNInfo *ReadVNI = LRQ.valueIn(); 969 assert(ReadVNI && "No value before copy and no <undef> flag."); 970 assert(ReadVNI != DefVNI && "Cannot read and define the same value."); 971 LI.MergeValueNumberInto(DefVNI, ReadVNI); 972 DEBUG(dbgs() << "\tMerged values: " << LI << '\n'); 973 } 974 LIS->RemoveMachineInstrFromMaps(CopyMI); 975 CopyMI->eraseFromParent(); 976 return true; 977 } 978 979 // Enforce policies. 980 if (CP.isPhys()) { 981 DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), TRI) 982 << " with " << PrintReg(CP.getDstReg(), TRI, CP.getSrcIdx()) 983 << '\n'); 984 if (!canJoinPhys(CP)) { 985 // Before giving up coalescing, if definition of source is defined by 986 // trivial computation, try rematerializing it. 987 if (!CP.isFlipped() && 988 reMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()), 989 CP.getDstReg(), CopyMI)) 990 return true; 991 return false; 992 } 993 } else { 994 DEBUG({ 995 dbgs() << "\tConsidering merging to " << CP.getNewRC()->getName() 996 << " with "; 997 if (CP.getDstIdx() && CP.getSrcIdx()) 998 dbgs() << PrintReg(CP.getDstReg()) << " in " 999 << TRI->getSubRegIndexName(CP.getDstIdx()) << " and " 1000 << PrintReg(CP.getSrcReg()) << " in " 1001 << TRI->getSubRegIndexName(CP.getSrcIdx()) << '\n'; 1002 else 1003 dbgs() << PrintReg(CP.getSrcReg(), TRI) << " in " 1004 << PrintReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n'; 1005 }); 1006 1007 // When possible, let DstReg be the larger interval. 1008 if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).ranges.size() > 1009 LIS->getInterval(CP.getDstReg()).ranges.size()) 1010 CP.flip(); 1011 } 1012 1013 // Okay, attempt to join these two intervals. On failure, this returns false. 1014 // Otherwise, if one of the intervals being joined is a physreg, this method 1015 // always canonicalizes DstInt to be it. The output "SrcInt" will not have 1016 // been modified, so we can use this information below to update aliases. 1017 if (!joinIntervals(CP)) { 1018 // Coalescing failed. 1019 1020 // If definition of source is defined by trivial computation, try 1021 // rematerializing it. 1022 if (!CP.isFlipped() && 1023 reMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()), 1024 CP.getDstReg(), CopyMI)) 1025 return true; 1026 1027 // If we can eliminate the copy without merging the live ranges, do so now. 1028 if (!CP.isPartial() && !CP.isPhys()) { 1029 if (adjustCopiesBackFrom(CP, CopyMI) || 1030 removeCopyByCommutingDef(CP, CopyMI)) { 1031 LIS->RemoveMachineInstrFromMaps(CopyMI); 1032 CopyMI->eraseFromParent(); 1033 DEBUG(dbgs() << "\tTrivial!\n"); 1034 return true; 1035 } 1036 } 1037 1038 // Otherwise, we are unable to join the intervals. 1039 DEBUG(dbgs() << "\tInterference!\n"); 1040 Again = true; // May be possible to coalesce later. 1041 return false; 1042 } 1043 1044 // Coalescing to a virtual register that is of a sub-register class of the 1045 // other. Make sure the resulting register is set to the right register class. 1046 if (CP.isCrossClass()) { 1047 ++numCrossRCs; 1048 MRI->setRegClass(CP.getDstReg(), CP.getNewRC()); 1049 } 1050 1051 // Removing sub-register copies can ease the register class constraints. 1052 // Make sure we attempt to inflate the register class of DstReg. 1053 if (!CP.isPhys() && RegClassInfo.isProperSubClass(CP.getNewRC())) 1054 InflateRegs.push_back(CP.getDstReg()); 1055 1056 // CopyMI has been erased by joinIntervals at this point. Remove it from 1057 // ErasedInstrs since copyCoalesceWorkList() won't add a successful join back 1058 // to the work list. This keeps ErasedInstrs from growing needlessly. 1059 ErasedInstrs.erase(CopyMI); 1060 1061 // Rewrite all SrcReg operands to DstReg. 1062 // Also update DstReg operands to include DstIdx if it is set. 1063 if (CP.getDstIdx()) 1064 updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx()); 1065 updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx()); 1066 1067 // SrcReg is guaranteed to be the register whose live interval that is 1068 // being merged. 1069 LIS->removeInterval(CP.getSrcReg()); 1070 1071 // Update regalloc hint. 1072 TRI->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF); 1073 1074 DEBUG({ 1075 dbgs() << "\tJoined. Result = " << PrintReg(CP.getDstReg(), TRI); 1076 if (!CP.isPhys()) 1077 dbgs() << LIS->getInterval(CP.getDstReg()); 1078 dbgs() << '\n'; 1079 }); 1080 1081 ++numJoins; 1082 return true; 1083} 1084 1085/// Attempt joining with a reserved physreg. 1086bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { 1087 assert(CP.isPhys() && "Must be a physreg copy"); 1088 assert(MRI->isReserved(CP.getDstReg()) && "Not a reserved register"); 1089 LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); 1090 DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS 1091 << '\n'); 1092 1093 assert(CP.isFlipped() && RHS.containsOneValue() && 1094 "Invalid join with reserved register"); 1095 1096 // Optimization for reserved registers like ESP. We can only merge with a 1097 // reserved physreg if RHS has a single value that is a copy of CP.DstReg(). 1098 // The live range of the reserved register will look like a set of dead defs 1099 // - we don't properly track the live range of reserved registers. 1100 1101 // Deny any overlapping intervals. This depends on all the reserved 1102 // register live ranges to look like dead defs. 1103 for (MCRegUnitIterator UI(CP.getDstReg(), TRI); UI.isValid(); ++UI) 1104 if (RHS.overlaps(LIS->getRegUnit(*UI))) { 1105 DEBUG(dbgs() << "\t\tInterference: " << PrintRegUnit(*UI, TRI) << '\n'); 1106 return false; 1107 } 1108 1109 // Skip any value computations, we are not adding new values to the 1110 // reserved register. Also skip merging the live ranges, the reserved 1111 // register live range doesn't need to be accurate as long as all the 1112 // defs are there. 1113 1114 // Delete the identity copy. 1115 MachineInstr *CopyMI = MRI->getVRegDef(RHS.reg); 1116 LIS->RemoveMachineInstrFromMaps(CopyMI); 1117 CopyMI->eraseFromParent(); 1118 1119 // We don't track kills for reserved registers. 1120 MRI->clearKillFlags(CP.getSrcReg()); 1121 1122 return true; 1123} 1124 1125//===----------------------------------------------------------------------===// 1126// Interference checking and interval joining 1127//===----------------------------------------------------------------------===// 1128// 1129// In the easiest case, the two live ranges being joined are disjoint, and 1130// there is no interference to consider. It is quite common, though, to have 1131// overlapping live ranges, and we need to check if the interference can be 1132// resolved. 1133// 1134// The live range of a single SSA value forms a sub-tree of the dominator tree. 1135// This means that two SSA values overlap if and only if the def of one value 1136// is contained in the live range of the other value. As a special case, the 1137// overlapping values can be defined at the same index. 1138// 1139// The interference from an overlapping def can be resolved in these cases: 1140// 1141// 1. Coalescable copies. The value is defined by a copy that would become an 1142// identity copy after joining SrcReg and DstReg. The copy instruction will 1143// be removed, and the value will be merged with the source value. 1144// 1145// There can be several copies back and forth, causing many values to be 1146// merged into one. We compute a list of ultimate values in the joined live 1147// range as well as a mappings from the old value numbers. 1148// 1149// 2. IMPLICIT_DEF. This instruction is only inserted to ensure all PHI 1150// predecessors have a live out value. It doesn't cause real interference, 1151// and can be merged into the value it overlaps. Like a coalescable copy, it 1152// can be erased after joining. 1153// 1154// 3. Copy of external value. The overlapping def may be a copy of a value that 1155// is already in the other register. This is like a coalescable copy, but 1156// the live range of the source register must be trimmed after erasing the 1157// copy instruction: 1158// 1159// %src = COPY %ext 1160// %dst = COPY %ext <-- Remove this COPY, trim the live range of %ext. 1161// 1162// 4. Clobbering undefined lanes. Vector registers are sometimes built by 1163// defining one lane at a time: 1164// 1165// %dst:ssub0<def,read-undef> = FOO 1166// %src = BAR 1167// %dst:ssub1<def> = COPY %src 1168// 1169// The live range of %src overlaps the %dst value defined by FOO, but 1170// merging %src into %dst:ssub1 is only going to clobber the ssub1 lane 1171// which was undef anyway. 1172// 1173// The value mapping is more complicated in this case. The final live range 1174// will have different value numbers for both FOO and BAR, but there is no 1175// simple mapping from old to new values. It may even be necessary to add 1176// new PHI values. 1177// 1178// 5. Clobbering dead lanes. A def may clobber a lane of a vector register that 1179// is live, but never read. This can happen because we don't compute 1180// individual live ranges per lane. 1181// 1182// %dst<def> = FOO 1183// %src = BAR 1184// %dst:ssub1<def> = COPY %src 1185// 1186// This kind of interference is only resolved locally. If the clobbered 1187// lane value escapes the block, the join is aborted. 1188 1189namespace { 1190/// Track information about values in a single virtual register about to be 1191/// joined. Objects of this class are always created in pairs - one for each 1192/// side of the CoalescerPair. 1193class JoinVals { 1194 LiveInterval &LI; 1195 1196 // Location of this register in the final joined register. 1197 // Either CP.DstIdx or CP.SrcIdx. 1198 unsigned SubIdx; 1199 1200 // Values that will be present in the final live range. 1201 SmallVectorImpl<VNInfo*> &NewVNInfo; 1202 1203 const CoalescerPair &CP; 1204 LiveIntervals *LIS; 1205 SlotIndexes *Indexes; 1206 const TargetRegisterInfo *TRI; 1207 1208 // Value number assignments. Maps value numbers in LI to entries in NewVNInfo. 1209 // This is suitable for passing to LiveInterval::join(). 1210 SmallVector<int, 8> Assignments; 1211 1212 // Conflict resolution for overlapping values. 1213 enum ConflictResolution { 1214 // No overlap, simply keep this value. 1215 CR_Keep, 1216 1217 // Merge this value into OtherVNI and erase the defining instruction. 1218 // Used for IMPLICIT_DEF, coalescable copies, and copies from external 1219 // values. 1220 CR_Erase, 1221 1222 // Merge this value into OtherVNI but keep the defining instruction. 1223 // This is for the special case where OtherVNI is defined by the same 1224 // instruction. 1225 CR_Merge, 1226 1227 // Keep this value, and have it replace OtherVNI where possible. This 1228 // complicates value mapping since OtherVNI maps to two different values 1229 // before and after this def. 1230 // Used when clobbering undefined or dead lanes. 1231 CR_Replace, 1232 1233 // Unresolved conflict. Visit later when all values have been mapped. 1234 CR_Unresolved, 1235 1236 // Unresolvable conflict. Abort the join. 1237 CR_Impossible 1238 }; 1239 1240 // Per-value info for LI. The lane bit masks are all relative to the final 1241 // joined register, so they can be compared directly between SrcReg and 1242 // DstReg. 1243 struct Val { 1244 ConflictResolution Resolution; 1245 1246 // Lanes written by this def, 0 for unanalyzed values. 1247 unsigned WriteLanes; 1248 1249 // Lanes with defined values in this register. Other lanes are undef and 1250 // safe to clobber. 1251 unsigned ValidLanes; 1252 1253 // Value in LI being redefined by this def. 1254 VNInfo *RedefVNI; 1255 1256 // Value in the other live range that overlaps this def, if any. 1257 VNInfo *OtherVNI; 1258 1259 // Is this value an IMPLICIT_DEF? 1260 bool IsImplicitDef; 1261 1262 // True when the live range of this value will be pruned because of an 1263 // overlapping CR_Replace value in the other live range. 1264 bool Pruned; 1265 1266 // True once Pruned above has been computed. 1267 bool PrunedComputed; 1268 1269 Val() : Resolution(CR_Keep), WriteLanes(0), ValidLanes(0), 1270 RedefVNI(0), OtherVNI(0), IsImplicitDef(false), Pruned(false), 1271 PrunedComputed(false) {} 1272 1273 bool isAnalyzed() const { return WriteLanes != 0; } 1274 }; 1275 1276 // One entry per value number in LI. 1277 SmallVector<Val, 8> Vals; 1278 1279 unsigned computeWriteLanes(const MachineInstr *DefMI, bool &Redef); 1280 VNInfo *stripCopies(VNInfo *VNI); 1281 ConflictResolution analyzeValue(unsigned ValNo, JoinVals &Other); 1282 void computeAssignment(unsigned ValNo, JoinVals &Other); 1283 bool taintExtent(unsigned, unsigned, JoinVals&, 1284 SmallVectorImpl<std::pair<SlotIndex, unsigned> >&); 1285 bool usesLanes(MachineInstr *MI, unsigned, unsigned, unsigned); 1286 bool isPrunedValue(unsigned ValNo, JoinVals &Other); 1287 1288public: 1289 JoinVals(LiveInterval &li, unsigned subIdx, 1290 SmallVectorImpl<VNInfo*> &newVNInfo, 1291 const CoalescerPair &cp, 1292 LiveIntervals *lis, 1293 const TargetRegisterInfo *tri) 1294 : LI(li), SubIdx(subIdx), NewVNInfo(newVNInfo), CP(cp), LIS(lis), 1295 Indexes(LIS->getSlotIndexes()), TRI(tri), 1296 Assignments(LI.getNumValNums(), -1), Vals(LI.getNumValNums()) 1297 {} 1298 1299 /// Analyze defs in LI and compute a value mapping in NewVNInfo. 1300 /// Returns false if any conflicts were impossible to resolve. 1301 bool mapValues(JoinVals &Other); 1302 1303 /// Try to resolve conflicts that require all values to be mapped. 1304 /// Returns false if any conflicts were impossible to resolve. 1305 bool resolveConflicts(JoinVals &Other); 1306 1307 /// Prune the live range of values in Other.LI where they would conflict with 1308 /// CR_Replace values in LI. Collect end points for restoring the live range 1309 /// after joining. 1310 void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints); 1311 1312 /// Erase any machine instructions that have been coalesced away. 1313 /// Add erased instructions to ErasedInstrs. 1314 /// Add foreign virtual registers to ShrinkRegs if their live range ended at 1315 /// the erased instrs. 1316 void eraseInstrs(SmallPtrSet<MachineInstr*, 8> &ErasedInstrs, 1317 SmallVectorImpl<unsigned> &ShrinkRegs); 1318 1319 /// Get the value assignments suitable for passing to LiveInterval::join. 1320 const int *getAssignments() const { return Assignments.data(); } 1321}; 1322} // end anonymous namespace 1323 1324/// Compute the bitmask of lanes actually written by DefMI. 1325/// Set Redef if there are any partial register definitions that depend on the 1326/// previous value of the register. 1327unsigned JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) { 1328 unsigned L = 0; 1329 for (ConstMIOperands MO(DefMI); MO.isValid(); ++MO) { 1330 if (!MO->isReg() || MO->getReg() != LI.reg || !MO->isDef()) 1331 continue; 1332 L |= TRI->getSubRegIndexLaneMask(compose(*TRI, SubIdx, MO->getSubReg())); 1333 if (MO->readsReg()) 1334 Redef = true; 1335 } 1336 return L; 1337} 1338 1339/// Find the ultimate value that VNI was copied from. 1340VNInfo *JoinVals::stripCopies(VNInfo *VNI) { 1341 while (!VNI->isPHIDef()) { 1342 MachineInstr *MI = Indexes->getInstructionFromIndex(VNI->def); 1343 assert(MI && "No defining instruction"); 1344 if (!MI->isFullCopy()) 1345 break; 1346 unsigned Reg = MI->getOperand(1).getReg(); 1347 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 1348 break; 1349 LiveRangeQuery LRQ(LIS->getInterval(Reg), VNI->def); 1350 if (!LRQ.valueIn()) 1351 break; 1352 VNI = LRQ.valueIn(); 1353 } 1354 return VNI; 1355} 1356 1357/// Analyze ValNo in this live range, and set all fields of Vals[ValNo]. 1358/// Return a conflict resolution when possible, but leave the hard cases as 1359/// CR_Unresolved. 1360/// Recursively calls computeAssignment() on this and Other, guaranteeing that 1361/// both OtherVNI and RedefVNI have been analyzed and mapped before returning. 1362/// The recursion always goes upwards in the dominator tree, making loops 1363/// impossible. 1364JoinVals::ConflictResolution 1365JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { 1366 Val &V = Vals[ValNo]; 1367 assert(!V.isAnalyzed() && "Value has already been analyzed!"); 1368 VNInfo *VNI = LI.getValNumInfo(ValNo); 1369 if (VNI->isUnused()) { 1370 V.WriteLanes = ~0u; 1371 return CR_Keep; 1372 } 1373 1374 // Get the instruction defining this value, compute the lanes written. 1375 const MachineInstr *DefMI = 0; 1376 if (VNI->isPHIDef()) { 1377 // Conservatively assume that all lanes in a PHI are valid. 1378 V.ValidLanes = V.WriteLanes = TRI->getSubRegIndexLaneMask(SubIdx); 1379 } else { 1380 DefMI = Indexes->getInstructionFromIndex(VNI->def); 1381 bool Redef = false; 1382 V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef); 1383 1384 // If this is a read-modify-write instruction, there may be more valid 1385 // lanes than the ones written by this instruction. 1386 // This only covers partial redef operands. DefMI may have normal use 1387 // operands reading the register. They don't contribute valid lanes. 1388 // 1389 // This adds ssub1 to the set of valid lanes in %src: 1390 // 1391 // %src:ssub1<def> = FOO 1392 // 1393 // This leaves only ssub1 valid, making any other lanes undef: 1394 // 1395 // %src:ssub1<def,read-undef> = FOO %src:ssub2 1396 // 1397 // The <read-undef> flag on the def operand means that old lane values are 1398 // not important. 1399 if (Redef) { 1400 V.RedefVNI = LiveRangeQuery(LI, VNI->def).valueIn(); 1401 assert(V.RedefVNI && "Instruction is reading nonexistent value"); 1402 computeAssignment(V.RedefVNI->id, Other); 1403 V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes; 1404 } 1405 1406 // An IMPLICIT_DEF writes undef values. 1407 if (DefMI->isImplicitDef()) { 1408 V.IsImplicitDef = true; 1409 V.ValidLanes &= ~V.WriteLanes; 1410 } 1411 } 1412 1413 // Find the value in Other that overlaps VNI->def, if any. 1414 LiveRangeQuery OtherLRQ(Other.LI, VNI->def); 1415 1416 // It is possible that both values are defined by the same instruction, or 1417 // the values are PHIs defined in the same block. When that happens, the two 1418 // values should be merged into one, but not into any preceding value. 1419 // The first value defined or visited gets CR_Keep, the other gets CR_Merge. 1420 if (VNInfo *OtherVNI = OtherLRQ.valueDefined()) { 1421 assert(SlotIndex::isSameInstr(VNI->def, OtherVNI->def) && "Broken LRQ"); 1422 1423 // One value stays, the other is merged. Keep the earlier one, or the first 1424 // one we see. 1425 if (OtherVNI->def < VNI->def) 1426 Other.computeAssignment(OtherVNI->id, *this); 1427 else if (VNI->def < OtherVNI->def && OtherLRQ.valueIn()) { 1428 // This is an early-clobber def overlapping a live-in value in the other 1429 // register. Not mergeable. 1430 V.OtherVNI = OtherLRQ.valueIn(); 1431 return CR_Impossible; 1432 } 1433 V.OtherVNI = OtherVNI; 1434 Val &OtherV = Other.Vals[OtherVNI->id]; 1435 // Keep this value, check for conflicts when analyzing OtherVNI. 1436 if (!OtherV.isAnalyzed()) 1437 return CR_Keep; 1438 // Both sides have been analyzed now. 1439 // Allow overlapping PHI values. Any real interference would show up in a 1440 // predecessor, the PHI itself can't introduce any conflicts. 1441 if (VNI->isPHIDef()) 1442 return CR_Merge; 1443 if (V.ValidLanes & OtherV.ValidLanes) 1444 // Overlapping lanes can't be resolved. 1445 return CR_Impossible; 1446 else 1447 return CR_Merge; 1448 } 1449 1450 // No simultaneous def. Is Other live at the def? 1451 V.OtherVNI = OtherLRQ.valueIn(); 1452 if (!V.OtherVNI) 1453 // No overlap, no conflict. 1454 return CR_Keep; 1455 1456 assert(!SlotIndex::isSameInstr(VNI->def, V.OtherVNI->def) && "Broken LRQ"); 1457 1458 // We have overlapping values, or possibly a kill of Other. 1459 // Recursively compute assignments up the dominator tree. 1460 Other.computeAssignment(V.OtherVNI->id, *this); 1461 const Val &OtherV = Other.Vals[V.OtherVNI->id]; 1462 1463 // Allow overlapping PHI values. Any real interference would show up in a 1464 // predecessor, the PHI itself can't introduce any conflicts. 1465 if (VNI->isPHIDef()) 1466 return CR_Replace; 1467 1468 // Check for simple erasable conflicts. 1469 if (DefMI->isImplicitDef()) 1470 return CR_Erase; 1471 1472 // Include the non-conflict where DefMI is a coalescable copy that kills 1473 // OtherVNI. We still want the copy erased and value numbers merged. 1474 if (CP.isCoalescable(DefMI)) { 1475 // Some of the lanes copied from OtherVNI may be undef, making them undef 1476 // here too. 1477 V.ValidLanes &= ~V.WriteLanes | OtherV.ValidLanes; 1478 return CR_Erase; 1479 } 1480 1481 // This may not be a real conflict if DefMI simply kills Other and defines 1482 // VNI. 1483 if (OtherLRQ.isKill() && OtherLRQ.endPoint() <= VNI->def) 1484 return CR_Keep; 1485 1486 // Handle the case where VNI and OtherVNI can be proven to be identical: 1487 // 1488 // %other = COPY %ext 1489 // %this = COPY %ext <-- Erase this copy 1490 // 1491 if (DefMI->isFullCopy() && !CP.isPartial() && 1492 stripCopies(VNI) == stripCopies(V.OtherVNI)) 1493 return CR_Erase; 1494 1495 // If the lanes written by this instruction were all undef in OtherVNI, it is 1496 // still safe to join the live ranges. This can't be done with a simple value 1497 // mapping, though - OtherVNI will map to multiple values: 1498 // 1499 // 1 %dst:ssub0 = FOO <-- OtherVNI 1500 // 2 %src = BAR <-- VNI 1501 // 3 %dst:ssub1 = COPY %src<kill> <-- Eliminate this copy. 1502 // 4 BAZ %dst<kill> 1503 // 5 QUUX %src<kill> 1504 // 1505 // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace 1506 // handles this complex value mapping. 1507 if ((V.WriteLanes & OtherV.ValidLanes) == 0) 1508 return CR_Replace; 1509 1510 // If the other live range is killed by DefMI and the live ranges are still 1511 // overlapping, it must be because we're looking at an early clobber def: 1512 // 1513 // %dst<def,early-clobber> = ASM %src<kill> 1514 // 1515 // In this case, it is illegal to merge the two live ranges since the early 1516 // clobber def would clobber %src before it was read. 1517 if (OtherLRQ.isKill()) { 1518 // This case where the def doesn't overlap the kill is handled above. 1519 assert(VNI->def.isEarlyClobber() && 1520 "Only early clobber defs can overlap a kill"); 1521 return CR_Impossible; 1522 } 1523 1524 // VNI is clobbering live lanes in OtherVNI, but there is still the 1525 // possibility that no instructions actually read the clobbered lanes. 1526 // If we're clobbering all the lanes in OtherVNI, at least one must be read. 1527 // Otherwise Other.LI wouldn't be live here. 1528 if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes) == 0) 1529 return CR_Impossible; 1530 1531 // We need to verify that no instructions are reading the clobbered lanes. To 1532 // save compile time, we'll only check that locally. Don't allow the tainted 1533 // value to escape the basic block. 1534 MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def); 1535 if (OtherLRQ.endPoint() >= Indexes->getMBBEndIdx(MBB)) 1536 return CR_Impossible; 1537 1538 // There are still some things that could go wrong besides clobbered lanes 1539 // being read, for example OtherVNI may be only partially redefined in MBB, 1540 // and some clobbered lanes could escape the block. Save this analysis for 1541 // resolveConflicts() when all values have been mapped. We need to know 1542 // RedefVNI and WriteLanes for any later defs in MBB, and we can't compute 1543 // that now - the recursive analyzeValue() calls must go upwards in the 1544 // dominator tree. 1545 return CR_Unresolved; 1546} 1547 1548/// Compute the value assignment for ValNo in LI. 1549/// This may be called recursively by analyzeValue(), but never for a ValNo on 1550/// the stack. 1551void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) { 1552 Val &V = Vals[ValNo]; 1553 if (V.isAnalyzed()) { 1554 // Recursion should always move up the dominator tree, so ValNo is not 1555 // supposed to reappear before it has been assigned. 1556 assert(Assignments[ValNo] != -1 && "Bad recursion?"); 1557 return; 1558 } 1559 switch ((V.Resolution = analyzeValue(ValNo, Other))) { 1560 case CR_Erase: 1561 case CR_Merge: 1562 // Merge this ValNo into OtherVNI. 1563 assert(V.OtherVNI && "OtherVNI not assigned, can't merge."); 1564 assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion"); 1565 Assignments[ValNo] = Other.Assignments[V.OtherVNI->id]; 1566 DEBUG(dbgs() << "\t\tmerge " << PrintReg(LI.reg) << ':' << ValNo << '@' 1567 << LI.getValNumInfo(ValNo)->def << " into " 1568 << PrintReg(Other.LI.reg) << ':' << V.OtherVNI->id << '@' 1569 << V.OtherVNI->def << " --> @" 1570 << NewVNInfo[Assignments[ValNo]]->def << '\n'); 1571 break; 1572 case CR_Replace: 1573 case CR_Unresolved: 1574 // The other value is going to be pruned if this join is successful. 1575 assert(V.OtherVNI && "OtherVNI not assigned, can't prune"); 1576 Other.Vals[V.OtherVNI->id].Pruned = true; 1577 // Fall through. 1578 default: 1579 // This value number needs to go in the final joined live range. 1580 Assignments[ValNo] = NewVNInfo.size(); 1581 NewVNInfo.push_back(LI.getValNumInfo(ValNo)); 1582 break; 1583 } 1584} 1585 1586bool JoinVals::mapValues(JoinVals &Other) { 1587 for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) { 1588 computeAssignment(i, Other); 1589 if (Vals[i].Resolution == CR_Impossible) { 1590 DEBUG(dbgs() << "\t\tinterference at " << PrintReg(LI.reg) << ':' << i 1591 << '@' << LI.getValNumInfo(i)->def << '\n'); 1592 return false; 1593 } 1594 } 1595 return true; 1596} 1597 1598/// Assuming ValNo is going to clobber some valid lanes in Other.LI, compute 1599/// the extent of the tainted lanes in the block. 1600/// 1601/// Multiple values in Other.LI can be affected since partial redefinitions can 1602/// preserve previously tainted lanes. 1603/// 1604/// 1 %dst = VLOAD <-- Define all lanes in %dst 1605/// 2 %src = FOO <-- ValNo to be joined with %dst:ssub0 1606/// 3 %dst:ssub1 = BAR <-- Partial redef doesn't clear taint in ssub0 1607/// 4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read 1608/// 1609/// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes) 1610/// entry to TaintedVals. 1611/// 1612/// Returns false if the tainted lanes extend beyond the basic block. 1613bool JoinVals:: 1614taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other, 1615 SmallVectorImpl<std::pair<SlotIndex, unsigned> > &TaintExtent) { 1616 VNInfo *VNI = LI.getValNumInfo(ValNo); 1617 MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def); 1618 SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB); 1619 1620 // Scan Other.LI from VNI.def to MBBEnd. 1621 LiveInterval::iterator OtherI = Other.LI.find(VNI->def); 1622 assert(OtherI != Other.LI.end() && "No conflict?"); 1623 do { 1624 // OtherI is pointing to a tainted value. Abort the join if the tainted 1625 // lanes escape the block. 1626 SlotIndex End = OtherI->end; 1627 if (End >= MBBEnd) { 1628 DEBUG(dbgs() << "\t\ttaints global " << PrintReg(Other.LI.reg) << ':' 1629 << OtherI->valno->id << '@' << OtherI->start << '\n'); 1630 return false; 1631 } 1632 DEBUG(dbgs() << "\t\ttaints local " << PrintReg(Other.LI.reg) << ':' 1633 << OtherI->valno->id << '@' << OtherI->start 1634 << " to " << End << '\n'); 1635 // A dead def is not a problem. 1636 if (End.isDead()) 1637 break; 1638 TaintExtent.push_back(std::make_pair(End, TaintedLanes)); 1639 1640 // Check for another def in the MBB. 1641 if (++OtherI == Other.LI.end() || OtherI->start >= MBBEnd) 1642 break; 1643 1644 // Lanes written by the new def are no longer tainted. 1645 const Val &OV = Other.Vals[OtherI->valno->id]; 1646 TaintedLanes &= ~OV.WriteLanes; 1647 if (!OV.RedefVNI) 1648 break; 1649 } while (TaintedLanes); 1650 return true; 1651} 1652 1653/// Return true if MI uses any of the given Lanes from Reg. 1654/// This does not include partial redefinitions of Reg. 1655bool JoinVals::usesLanes(MachineInstr *MI, unsigned Reg, unsigned SubIdx, 1656 unsigned Lanes) { 1657 if (MI->isDebugValue()) 1658 return false; 1659 for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { 1660 if (!MO->isReg() || MO->isDef() || MO->getReg() != Reg) 1661 continue; 1662 if (!MO->readsReg()) 1663 continue; 1664 if (Lanes & 1665 TRI->getSubRegIndexLaneMask(compose(*TRI, SubIdx, MO->getSubReg()))) 1666 return true; 1667 } 1668 return false; 1669} 1670 1671bool JoinVals::resolveConflicts(JoinVals &Other) { 1672 for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) { 1673 Val &V = Vals[i]; 1674 assert (V.Resolution != CR_Impossible && "Unresolvable conflict"); 1675 if (V.Resolution != CR_Unresolved) 1676 continue; 1677 DEBUG(dbgs() << "\t\tconflict at " << PrintReg(LI.reg) << ':' << i 1678 << '@' << LI.getValNumInfo(i)->def << '\n'); 1679 ++NumLaneConflicts; 1680 assert(V.OtherVNI && "Inconsistent conflict resolution."); 1681 VNInfo *VNI = LI.getValNumInfo(i); 1682 const Val &OtherV = Other.Vals[V.OtherVNI->id]; 1683 1684 // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the 1685 // join, those lanes will be tainted with a wrong value. Get the extent of 1686 // the tainted lanes. 1687 unsigned TaintedLanes = V.WriteLanes & OtherV.ValidLanes; 1688 SmallVector<std::pair<SlotIndex, unsigned>, 8> TaintExtent; 1689 if (!taintExtent(i, TaintedLanes, Other, TaintExtent)) 1690 // Tainted lanes would extend beyond the basic block. 1691 return false; 1692 1693 assert(!TaintExtent.empty() && "There should be at least one conflict."); 1694 1695 // Now look at the instructions from VNI->def to TaintExtent (inclusive). 1696 MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def); 1697 MachineBasicBlock::iterator MI = MBB->begin(); 1698 if (!VNI->isPHIDef()) { 1699 MI = Indexes->getInstructionFromIndex(VNI->def); 1700 // No need to check the instruction defining VNI for reads. 1701 ++MI; 1702 } 1703 assert(!SlotIndex::isSameInstr(VNI->def, TaintExtent.front().first) && 1704 "Interference ends on VNI->def. Should have been handled earlier"); 1705 MachineInstr *LastMI = 1706 Indexes->getInstructionFromIndex(TaintExtent.front().first); 1707 assert(LastMI && "Range must end at a proper instruction"); 1708 unsigned TaintNum = 0; 1709 for(;;) { 1710 assert(MI != MBB->end() && "Bad LastMI"); 1711 if (usesLanes(MI, Other.LI.reg, Other.SubIdx, TaintedLanes)) { 1712 DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI); 1713 return false; 1714 } 1715 // LastMI is the last instruction to use the current value. 1716 if (&*MI == LastMI) { 1717 if (++TaintNum == TaintExtent.size()) 1718 break; 1719 LastMI = Indexes->getInstructionFromIndex(TaintExtent[TaintNum].first); 1720 assert(LastMI && "Range must end at a proper instruction"); 1721 TaintedLanes = TaintExtent[TaintNum].second; 1722 } 1723 ++MI; 1724 } 1725 1726 // The tainted lanes are unused. 1727 V.Resolution = CR_Replace; 1728 ++NumLaneResolves; 1729 } 1730 return true; 1731} 1732 1733// Determine if ValNo is a copy of a value number in LI or Other.LI that will 1734// be pruned: 1735// 1736// %dst = COPY %src 1737// %src = COPY %dst <-- This value to be pruned. 1738// %dst = COPY %src <-- This value is a copy of a pruned value. 1739// 1740bool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) { 1741 Val &V = Vals[ValNo]; 1742 if (V.Pruned || V.PrunedComputed) 1743 return V.Pruned; 1744 1745 if (V.Resolution != CR_Erase && V.Resolution != CR_Merge) 1746 return V.Pruned; 1747 1748 // Follow copies up the dominator tree and check if any intermediate value 1749 // has been pruned. 1750 V.PrunedComputed = true; 1751 V.Pruned = Other.isPrunedValue(V.OtherVNI->id, *this); 1752 return V.Pruned; 1753} 1754 1755void JoinVals::pruneValues(JoinVals &Other, 1756 SmallVectorImpl<SlotIndex> &EndPoints) { 1757 for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) { 1758 SlotIndex Def = LI.getValNumInfo(i)->def; 1759 switch (Vals[i].Resolution) { 1760 case CR_Keep: 1761 break; 1762 case CR_Replace: { 1763 // This value takes precedence over the value in Other.LI. 1764 LIS->pruneValue(&Other.LI, Def, &EndPoints); 1765 // Check if we're replacing an IMPLICIT_DEF value. The IMPLICIT_DEF 1766 // instructions are only inserted to provide a live-out value for PHI 1767 // predecessors, so the instruction should simply go away once its value 1768 // has been replaced. 1769 Val &OtherV = Other.Vals[Vals[i].OtherVNI->id]; 1770 bool EraseImpDef = OtherV.IsImplicitDef && OtherV.Resolution == CR_Keep; 1771 if (!Def.isBlock()) { 1772 // Remove <def,read-undef> flags. This def is now a partial redef. 1773 // Also remove <def,dead> flags since the joined live range will 1774 // continue past this instruction. 1775 for (MIOperands MO(Indexes->getInstructionFromIndex(Def)); 1776 MO.isValid(); ++MO) 1777 if (MO->isReg() && MO->isDef() && MO->getReg() == LI.reg) { 1778 MO->setIsUndef(EraseImpDef); 1779 MO->setIsDead(false); 1780 } 1781 // This value will reach instructions below, but we need to make sure 1782 // the live range also reaches the instruction at Def. 1783 if (!EraseImpDef) 1784 EndPoints.push_back(Def); 1785 } 1786 DEBUG(dbgs() << "\t\tpruned " << PrintReg(Other.LI.reg) << " at " << Def 1787 << ": " << Other.LI << '\n'); 1788 break; 1789 } 1790 case CR_Erase: 1791 case CR_Merge: 1792 if (isPrunedValue(i, Other)) { 1793 // This value is ultimately a copy of a pruned value in LI or Other.LI. 1794 // We can no longer trust the value mapping computed by 1795 // computeAssignment(), the value that was originally copied could have 1796 // been replaced. 1797 LIS->pruneValue(&LI, Def, &EndPoints); 1798 DEBUG(dbgs() << "\t\tpruned all of " << PrintReg(LI.reg) << " at " 1799 << Def << ": " << LI << '\n'); 1800 } 1801 break; 1802 case CR_Unresolved: 1803 case CR_Impossible: 1804 llvm_unreachable("Unresolved conflicts"); 1805 } 1806 } 1807} 1808 1809void JoinVals::eraseInstrs(SmallPtrSet<MachineInstr*, 8> &ErasedInstrs, 1810 SmallVectorImpl<unsigned> &ShrinkRegs) { 1811 for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) { 1812 // Get the def location before markUnused() below invalidates it. 1813 SlotIndex Def = LI.getValNumInfo(i)->def; 1814 switch (Vals[i].Resolution) { 1815 case CR_Keep: 1816 // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any 1817 // longer. The IMPLICIT_DEF instructions are only inserted by 1818 // PHIElimination to guarantee that all PHI predecessors have a value. 1819 if (!Vals[i].IsImplicitDef || !Vals[i].Pruned) 1820 break; 1821 // Remove value number i from LI. Note that this VNInfo is still present 1822 // in NewVNInfo, so it will appear as an unused value number in the final 1823 // joined interval. 1824 LI.getValNumInfo(i)->markUnused(); 1825 LI.removeValNo(LI.getValNumInfo(i)); 1826 DEBUG(dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LI << '\n'); 1827 // FALL THROUGH. 1828 1829 case CR_Erase: { 1830 MachineInstr *MI = Indexes->getInstructionFromIndex(Def); 1831 assert(MI && "No instruction to erase"); 1832 if (MI->isCopy()) { 1833 unsigned Reg = MI->getOperand(1).getReg(); 1834 if (TargetRegisterInfo::isVirtualRegister(Reg) && 1835 Reg != CP.getSrcReg() && Reg != CP.getDstReg()) 1836 ShrinkRegs.push_back(Reg); 1837 } 1838 ErasedInstrs.insert(MI); 1839 DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI); 1840 LIS->RemoveMachineInstrFromMaps(MI); 1841 MI->eraseFromParent(); 1842 break; 1843 } 1844 default: 1845 break; 1846 } 1847 } 1848} 1849 1850bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { 1851 SmallVector<VNInfo*, 16> NewVNInfo; 1852 LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); 1853 LiveInterval &LHS = LIS->getInterval(CP.getDstReg()); 1854 JoinVals RHSVals(RHS, CP.getSrcIdx(), NewVNInfo, CP, LIS, TRI); 1855 JoinVals LHSVals(LHS, CP.getDstIdx(), NewVNInfo, CP, LIS, TRI); 1856 1857 DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS 1858 << "\n\t\tLHS = " << PrintReg(CP.getDstReg()) << ' ' << LHS 1859 << '\n'); 1860 1861 // First compute NewVNInfo and the simple value mappings. 1862 // Detect impossible conflicts early. 1863 if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) 1864 return false; 1865 1866 // Some conflicts can only be resolved after all values have been mapped. 1867 if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals)) 1868 return false; 1869 1870 // All clear, the live ranges can be merged. 1871 1872 // The merging algorithm in LiveInterval::join() can't handle conflicting 1873 // value mappings, so we need to remove any live ranges that overlap a 1874 // CR_Replace resolution. Collect a set of end points that can be used to 1875 // restore the live range after joining. 1876 SmallVector<SlotIndex, 8> EndPoints; 1877 LHSVals.pruneValues(RHSVals, EndPoints); 1878 RHSVals.pruneValues(LHSVals, EndPoints); 1879 1880 // Erase COPY and IMPLICIT_DEF instructions. This may cause some external 1881 // registers to require trimming. 1882 SmallVector<unsigned, 8> ShrinkRegs; 1883 LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs); 1884 RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs); 1885 while (!ShrinkRegs.empty()) 1886 LIS->shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val())); 1887 1888 // Join RHS into LHS. 1889 LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo, 1890 MRI); 1891 1892 // Kill flags are going to be wrong if the live ranges were overlapping. 1893 // Eventually, we should simply clear all kill flags when computing live 1894 // ranges. They are reinserted after register allocation. 1895 MRI->clearKillFlags(LHS.reg); 1896 MRI->clearKillFlags(RHS.reg); 1897 1898 if (EndPoints.empty()) 1899 return true; 1900 1901 // Recompute the parts of the live range we had to remove because of 1902 // CR_Replace conflicts. 1903 DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size() 1904 << " points: " << LHS << '\n'); 1905 LIS->extendToIndices(&LHS, EndPoints); 1906 return true; 1907} 1908 1909/// ComputeUltimateVN - Assuming we are going to join two live intervals, 1910/// compute what the resultant value numbers for each value in the input two 1911/// ranges will be. This is complicated by copies between the two which can 1912/// and will commonly cause multiple value numbers to be merged into one. 1913/// 1914/// VN is the value number that we're trying to resolve. InstDefiningValue 1915/// keeps track of the new InstDefiningValue assignment for the result 1916/// LiveInterval. ThisFromOther/OtherFromThis are sets that keep track of 1917/// whether a value in this or other is a copy from the opposite set. 1918/// ThisValNoAssignments/OtherValNoAssignments keep track of value #'s that have 1919/// already been assigned. 1920/// 1921/// ThisFromOther[x] - If x is defined as a copy from the other interval, this 1922/// contains the value number the copy is from. 1923/// 1924static unsigned ComputeUltimateVN(VNInfo *VNI, 1925 SmallVector<VNInfo*, 16> &NewVNInfo, 1926 DenseMap<VNInfo*, VNInfo*> &ThisFromOther, 1927 DenseMap<VNInfo*, VNInfo*> &OtherFromThis, 1928 SmallVector<int, 16> &ThisValNoAssignments, 1929 SmallVector<int, 16> &OtherValNoAssignments) { 1930 unsigned VN = VNI->id; 1931 1932 // If the VN has already been computed, just return it. 1933 if (ThisValNoAssignments[VN] >= 0) 1934 return ThisValNoAssignments[VN]; 1935 assert(ThisValNoAssignments[VN] != -2 && "Cyclic value numbers"); 1936 1937 // If this val is not a copy from the other val, then it must be a new value 1938 // number in the destination. 1939 DenseMap<VNInfo*, VNInfo*>::iterator I = ThisFromOther.find(VNI); 1940 if (I == ThisFromOther.end()) { 1941 NewVNInfo.push_back(VNI); 1942 return ThisValNoAssignments[VN] = NewVNInfo.size()-1; 1943 } 1944 VNInfo *OtherValNo = I->second; 1945 1946 // Otherwise, this *is* a copy from the RHS. If the other side has already 1947 // been computed, return it. 1948 if (OtherValNoAssignments[OtherValNo->id] >= 0) 1949 return ThisValNoAssignments[VN] = OtherValNoAssignments[OtherValNo->id]; 1950 1951 // Mark this value number as currently being computed, then ask what the 1952 // ultimate value # of the other value is. 1953 ThisValNoAssignments[VN] = -2; 1954 unsigned UltimateVN = 1955 ComputeUltimateVN(OtherValNo, NewVNInfo, OtherFromThis, ThisFromOther, 1956 OtherValNoAssignments, ThisValNoAssignments); 1957 return ThisValNoAssignments[VN] = UltimateVN; 1958} 1959 1960 1961// Find out if we have something like 1962// A = X 1963// B = X 1964// if so, we can pretend this is actually 1965// A = X 1966// B = A 1967// which allows us to coalesce A and B. 1968// VNI is the definition of B. LR is the life range of A that includes 1969// the slot just before B. If we return true, we add "B = X" to DupCopies. 1970// This implies that A dominates B. 1971static bool RegistersDefinedFromSameValue(LiveIntervals &li, 1972 const TargetRegisterInfo &tri, 1973 CoalescerPair &CP, 1974 VNInfo *VNI, 1975 VNInfo *OtherVNI, 1976 SmallVector<MachineInstr*, 8> &DupCopies) { 1977 // FIXME: This is very conservative. For example, we don't handle 1978 // physical registers. 1979 1980 MachineInstr *MI = li.getInstructionFromIndex(VNI->def); 1981 1982 if (!MI || CP.isPartial() || CP.isPhys()) 1983 return false; 1984 1985 unsigned A = CP.getDstReg(); 1986 if (!TargetRegisterInfo::isVirtualRegister(A)) 1987 return false; 1988 1989 unsigned B = CP.getSrcReg(); 1990 if (!TargetRegisterInfo::isVirtualRegister(B)) 1991 return false; 1992 1993 MachineInstr *OtherMI = li.getInstructionFromIndex(OtherVNI->def); 1994 if (!OtherMI) 1995 return false; 1996 1997 if (MI->isImplicitDef()) { 1998 DupCopies.push_back(MI); 1999 return true; 2000 } else { 2001 if (!MI->isFullCopy()) 2002 return false; 2003 unsigned Src = MI->getOperand(1).getReg(); 2004 if (!TargetRegisterInfo::isVirtualRegister(Src)) 2005 return false; 2006 if (!OtherMI->isFullCopy()) 2007 return false; 2008 unsigned OtherSrc = OtherMI->getOperand(1).getReg(); 2009 if (!TargetRegisterInfo::isVirtualRegister(OtherSrc)) 2010 return false; 2011 2012 if (Src != OtherSrc) 2013 return false; 2014 2015 // If the copies use two different value numbers of X, we cannot merge 2016 // A and B. 2017 LiveInterval &SrcInt = li.getInterval(Src); 2018 // getVNInfoBefore returns NULL for undef copies. In this case, the 2019 // optimization is still safe. 2020 if (SrcInt.getVNInfoBefore(OtherVNI->def) != 2021 SrcInt.getVNInfoBefore(VNI->def)) 2022 return false; 2023 2024 DupCopies.push_back(MI); 2025 return true; 2026 } 2027} 2028 2029/// joinIntervals - Attempt to join these two intervals. On failure, this 2030/// returns false. 2031bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) { 2032 // Handle physreg joins separately. 2033 if (CP.isPhys()) 2034 return joinReservedPhysReg(CP); 2035 2036 if (NewCoalescer) 2037 return joinVirtRegs(CP); 2038 2039 LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); 2040 DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS 2041 << '\n'); 2042 2043 // Compute the final value assignment, assuming that the live ranges can be 2044 // coalesced. 2045 SmallVector<int, 16> LHSValNoAssignments; 2046 SmallVector<int, 16> RHSValNoAssignments; 2047 DenseMap<VNInfo*, VNInfo*> LHSValsDefinedFromRHS; 2048 DenseMap<VNInfo*, VNInfo*> RHSValsDefinedFromLHS; 2049 SmallVector<VNInfo*, 16> NewVNInfo; 2050 2051 SmallVector<MachineInstr*, 8> DupCopies; 2052 SmallVector<MachineInstr*, 8> DeadCopies; 2053 2054 LiveInterval &LHS = LIS->getOrCreateInterval(CP.getDstReg()); 2055 DEBUG(dbgs() << "\t\tLHS = " << PrintReg(CP.getDstReg(), TRI) << ' ' << LHS 2056 << '\n'); 2057 2058 // Loop over the value numbers of the LHS, seeing if any are defined from 2059 // the RHS. 2060 for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); 2061 i != e; ++i) { 2062 VNInfo *VNI = *i; 2063 if (VNI->isUnused() || VNI->isPHIDef()) 2064 continue; 2065 MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def); 2066 assert(MI && "Missing def"); 2067 if (!MI->isCopyLike() && !MI->isImplicitDef()) // Src not defined by a copy? 2068 continue; 2069 2070 // Figure out the value # from the RHS. 2071 VNInfo *OtherVNI = RHS.getVNInfoBefore(VNI->def); 2072 // The copy could be to an aliased physreg. 2073 if (!OtherVNI) 2074 continue; 2075 2076 // DstReg is known to be a register in the LHS interval. If the src is 2077 // from the RHS interval, we can use its value #. 2078 if (CP.isCoalescable(MI)) 2079 DeadCopies.push_back(MI); 2080 else if (!RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, OtherVNI, 2081 DupCopies)) 2082 continue; 2083 2084 LHSValsDefinedFromRHS[VNI] = OtherVNI; 2085 } 2086 2087 // Loop over the value numbers of the RHS, seeing if any are defined from 2088 // the LHS. 2089 for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); 2090 i != e; ++i) { 2091 VNInfo *VNI = *i; 2092 if (VNI->isUnused() || VNI->isPHIDef()) 2093 continue; 2094 MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def); 2095 assert(MI && "Missing def"); 2096 if (!MI->isCopyLike() && !MI->isImplicitDef()) // Src not defined by a copy? 2097 continue; 2098 2099 // Figure out the value # from the LHS. 2100 VNInfo *OtherVNI = LHS.getVNInfoBefore(VNI->def); 2101 // The copy could be to an aliased physreg. 2102 if (!OtherVNI) 2103 continue; 2104 2105 // DstReg is known to be a register in the RHS interval. If the src is 2106 // from the LHS interval, we can use its value #. 2107 if (CP.isCoalescable(MI)) 2108 DeadCopies.push_back(MI); 2109 else if (!RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, OtherVNI, 2110 DupCopies)) 2111 continue; 2112 2113 RHSValsDefinedFromLHS[VNI] = OtherVNI; 2114 } 2115 2116 LHSValNoAssignments.resize(LHS.getNumValNums(), -1); 2117 RHSValNoAssignments.resize(RHS.getNumValNums(), -1); 2118 NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums()); 2119 2120 for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); 2121 i != e; ++i) { 2122 VNInfo *VNI = *i; 2123 unsigned VN = VNI->id; 2124 if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused()) 2125 continue; 2126 ComputeUltimateVN(VNI, NewVNInfo, 2127 LHSValsDefinedFromRHS, RHSValsDefinedFromLHS, 2128 LHSValNoAssignments, RHSValNoAssignments); 2129 } 2130 for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); 2131 i != e; ++i) { 2132 VNInfo *VNI = *i; 2133 unsigned VN = VNI->id; 2134 if (RHSValNoAssignments[VN] >= 0 || VNI->isUnused()) 2135 continue; 2136 // If this value number isn't a copy from the LHS, it's a new number. 2137 if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) { 2138 NewVNInfo.push_back(VNI); 2139 RHSValNoAssignments[VN] = NewVNInfo.size()-1; 2140 continue; 2141 } 2142 2143 ComputeUltimateVN(VNI, NewVNInfo, 2144 RHSValsDefinedFromLHS, LHSValsDefinedFromRHS, 2145 RHSValNoAssignments, LHSValNoAssignments); 2146 } 2147 2148 // Armed with the mappings of LHS/RHS values to ultimate values, walk the 2149 // interval lists to see if these intervals are coalescable. 2150 LiveInterval::const_iterator I = LHS.begin(); 2151 LiveInterval::const_iterator IE = LHS.end(); 2152 LiveInterval::const_iterator J = RHS.begin(); 2153 LiveInterval::const_iterator JE = RHS.end(); 2154 2155 // Collect interval end points that will no longer be kills. 2156 SmallVector<MachineInstr*, 8> LHSOldKills; 2157 SmallVector<MachineInstr*, 8> RHSOldKills; 2158 2159 // Skip ahead until the first place of potential sharing. 2160 if (I != IE && J != JE) { 2161 if (I->start < J->start) { 2162 I = std::upper_bound(I, IE, J->start); 2163 if (I != LHS.begin()) --I; 2164 } else if (J->start < I->start) { 2165 J = std::upper_bound(J, JE, I->start); 2166 if (J != RHS.begin()) --J; 2167 } 2168 } 2169 2170 while (I != IE && J != JE) { 2171 // Determine if these two live ranges overlap. 2172 // If so, check value # info to determine if they are really different. 2173 if (I->end > J->start && J->end > I->start) { 2174 // If the live range overlap will map to the same value number in the 2175 // result liverange, we can still coalesce them. If not, we can't. 2176 if (LHSValNoAssignments[I->valno->id] != 2177 RHSValNoAssignments[J->valno->id]) 2178 return false; 2179 2180 // Extended live ranges should no longer be killed. 2181 if (!I->end.isBlock() && I->end < J->end) 2182 if (MachineInstr *MI = LIS->getInstructionFromIndex(I->end)) 2183 LHSOldKills.push_back(MI); 2184 if (!J->end.isBlock() && J->end < I->end) 2185 if (MachineInstr *MI = LIS->getInstructionFromIndex(J->end)) 2186 RHSOldKills.push_back(MI); 2187 } 2188 2189 if (I->end < J->end) 2190 ++I; 2191 else 2192 ++J; 2193 } 2194 2195 // Clear kill flags where live ranges are extended. 2196 while (!LHSOldKills.empty()) 2197 LHSOldKills.pop_back_val()->clearRegisterKills(LHS.reg, TRI); 2198 while (!RHSOldKills.empty()) 2199 RHSOldKills.pop_back_val()->clearRegisterKills(RHS.reg, TRI); 2200 2201 if (LHSValNoAssignments.empty()) 2202 LHSValNoAssignments.push_back(-1); 2203 if (RHSValNoAssignments.empty()) 2204 RHSValNoAssignments.push_back(-1); 2205 2206 // Now erase all the redundant copies. 2207 for (unsigned i = 0, e = DeadCopies.size(); i != e; ++i) { 2208 MachineInstr *MI = DeadCopies[i]; 2209 if (!ErasedInstrs.insert(MI)) 2210 continue; 2211 DEBUG(dbgs() << "\t\terased:\t" << LIS->getInstructionIndex(MI) 2212 << '\t' << *MI); 2213 LIS->RemoveMachineInstrFromMaps(MI); 2214 MI->eraseFromParent(); 2215 } 2216 2217 SmallVector<unsigned, 8> SourceRegisters; 2218 for (SmallVector<MachineInstr*, 8>::iterator I = DupCopies.begin(), 2219 E = DupCopies.end(); I != E; ++I) { 2220 MachineInstr *MI = *I; 2221 if (!ErasedInstrs.insert(MI)) 2222 continue; 2223 2224 // If MI is a copy, then we have pretended that the assignment to B in 2225 // A = X 2226 // B = X 2227 // was actually a copy from A. Now that we decided to coalesce A and B, 2228 // transform the code into 2229 // A = X 2230 // In the case of the implicit_def, we just have to remove it. 2231 if (!MI->isImplicitDef()) { 2232 unsigned Src = MI->getOperand(1).getReg(); 2233 SourceRegisters.push_back(Src); 2234 } 2235 LIS->RemoveMachineInstrFromMaps(MI); 2236 MI->eraseFromParent(); 2237 } 2238 2239 // If B = X was the last use of X in a liverange, we have to shrink it now 2240 // that B = X is gone. 2241 for (SmallVector<unsigned, 8>::iterator I = SourceRegisters.begin(), 2242 E = SourceRegisters.end(); I != E; ++I) { 2243 LIS->shrinkToUses(&LIS->getInterval(*I)); 2244 } 2245 2246 // If we get here, we know that we can coalesce the live ranges. Ask the 2247 // intervals to coalesce themselves now. 2248 LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo, 2249 MRI); 2250 return true; 2251} 2252 2253namespace { 2254 // DepthMBBCompare - Comparison predicate that sort first based on the loop 2255 // depth of the basic block (the unsigned), and then on the MBB number. 2256 struct DepthMBBCompare { 2257 typedef std::pair<unsigned, MachineBasicBlock*> DepthMBBPair; 2258 bool operator()(const DepthMBBPair &LHS, const DepthMBBPair &RHS) const { 2259 // Deeper loops first 2260 if (LHS.first != RHS.first) 2261 return LHS.first > RHS.first; 2262 2263 // Prefer blocks that are more connected in the CFG. This takes care of 2264 // the most difficult copies first while intervals are short. 2265 unsigned cl = LHS.second->pred_size() + LHS.second->succ_size(); 2266 unsigned cr = RHS.second->pred_size() + RHS.second->succ_size(); 2267 if (cl != cr) 2268 return cl > cr; 2269 2270 // As a last resort, sort by block number. 2271 return LHS.second->getNumber() < RHS.second->getNumber(); 2272 } 2273 }; 2274} 2275 2276// Try joining WorkList copies starting from index From. 2277// Null out any successful joins. 2278bool RegisterCoalescer::copyCoalesceWorkList(unsigned From) { 2279 assert(From <= WorkList.size() && "Out of range"); 2280 bool Progress = false; 2281 for (unsigned i = From, e = WorkList.size(); i != e; ++i) { 2282 if (!WorkList[i]) 2283 continue; 2284 // Skip instruction pointers that have already been erased, for example by 2285 // dead code elimination. 2286 if (ErasedInstrs.erase(WorkList[i])) { 2287 WorkList[i] = 0; 2288 continue; 2289 } 2290 bool Again = false; 2291 bool Success = joinCopy(WorkList[i], Again); 2292 Progress |= Success; 2293 if (Success || !Again) 2294 WorkList[i] = 0; 2295 } 2296 return Progress; 2297} 2298 2299void 2300RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) { 2301 DEBUG(dbgs() << MBB->getName() << ":\n"); 2302 2303 // Collect all copy-like instructions in MBB. Don't start coalescing anything 2304 // yet, it might invalidate the iterator. 2305 const unsigned PrevSize = WorkList.size(); 2306 for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); 2307 MII != E; ++MII) 2308 if (MII->isCopyLike()) 2309 WorkList.push_back(MII); 2310 2311 // Try coalescing the collected copies immediately, and remove the nulls. 2312 // This prevents the WorkList from getting too large since most copies are 2313 // joinable on the first attempt. 2314 if (copyCoalesceWorkList(PrevSize)) 2315 WorkList.erase(std::remove(WorkList.begin() + PrevSize, WorkList.end(), 2316 (MachineInstr*)0), WorkList.end()); 2317} 2318 2319void RegisterCoalescer::joinAllIntervals() { 2320 DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n"); 2321 assert(WorkList.empty() && "Old data still around."); 2322 2323 if (Loops->empty()) { 2324 // If there are no loops in the function, join intervals in function order. 2325 for (MachineFunction::iterator I = MF->begin(), E = MF->end(); 2326 I != E; ++I) 2327 copyCoalesceInMBB(I); 2328 } else { 2329 // Otherwise, join intervals in inner loops before other intervals. 2330 // Unfortunately we can't just iterate over loop hierarchy here because 2331 // there may be more MBB's than BB's. Collect MBB's for sorting. 2332 2333 // Join intervals in the function prolog first. We want to join physical 2334 // registers with virtual registers before the intervals got too long. 2335 std::vector<std::pair<unsigned, MachineBasicBlock*> > MBBs; 2336 for (MachineFunction::iterator I = MF->begin(), E = MF->end();I != E;++I){ 2337 MachineBasicBlock *MBB = I; 2338 MBBs.push_back(std::make_pair(Loops->getLoopDepth(MBB), I)); 2339 } 2340 2341 // Sort by loop depth. 2342 std::sort(MBBs.begin(), MBBs.end(), DepthMBBCompare()); 2343 2344 // Finally, join intervals in loop nest order. 2345 for (unsigned i = 0, e = MBBs.size(); i != e; ++i) 2346 copyCoalesceInMBB(MBBs[i].second); 2347 } 2348 2349 // Joining intervals can allow other intervals to be joined. Iteratively join 2350 // until we make no progress. 2351 while (copyCoalesceWorkList()) 2352 /* empty */ ; 2353} 2354 2355void RegisterCoalescer::releaseMemory() { 2356 ErasedInstrs.clear(); 2357 WorkList.clear(); 2358 DeadDefs.clear(); 2359 InflateRegs.clear(); 2360} 2361 2362bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { 2363 MF = &fn; 2364 MRI = &fn.getRegInfo(); 2365 TM = &fn.getTarget(); 2366 TRI = TM->getRegisterInfo(); 2367 TII = TM->getInstrInfo(); 2368 LIS = &getAnalysis<LiveIntervals>(); 2369 LDV = &getAnalysis<LiveDebugVariables>(); 2370 AA = &getAnalysis<AliasAnalysis>(); 2371 Loops = &getAnalysis<MachineLoopInfo>(); 2372 2373 DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n" 2374 << "********** Function: " << MF->getName() << '\n'); 2375 2376 if (VerifyCoalescing) 2377 MF->verify(this, "Before register coalescing"); 2378 2379 RegClassInfo.runOnMachineFunction(fn); 2380 2381 // Join (coalesce) intervals if requested. 2382 if (EnableJoining) 2383 joinAllIntervals(); 2384 2385 // After deleting a lot of copies, register classes may be less constrained. 2386 // Removing sub-register operands may allow GR32_ABCD -> GR32 and DPR_VFP2 -> 2387 // DPR inflation. 2388 array_pod_sort(InflateRegs.begin(), InflateRegs.end()); 2389 InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()), 2390 InflateRegs.end()); 2391 DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size() << " regs.\n"); 2392 for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) { 2393 unsigned Reg = InflateRegs[i]; 2394 if (MRI->reg_nodbg_empty(Reg)) 2395 continue; 2396 if (MRI->recomputeRegClass(Reg, *TM)) { 2397 DEBUG(dbgs() << PrintReg(Reg) << " inflated to " 2398 << MRI->getRegClass(Reg)->getName() << '\n'); 2399 ++NumInflated; 2400 } 2401 } 2402 2403 DEBUG(dump()); 2404 DEBUG(LDV->dump()); 2405 if (VerifyCoalescing) 2406 MF->verify(this, "After register coalescing"); 2407 return true; 2408} 2409 2410/// print - Implement the dump method. 2411void RegisterCoalescer::print(raw_ostream &O, const Module* m) const { 2412 LIS->print(O, m); 2413} 2414