RegisterCoalescer.cpp revision 327952
1327952Sdim//===- RegisterCoalescer.cpp - Generic Register Coalescing Interface ------===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This file implements the generic RegisterCoalescer interface which 11193323Sed// is used as the common interface used by all clients and 12193323Sed// implementations of register coalescing. 13193323Sed// 14193323Sed//===----------------------------------------------------------------------===// 15193323Sed 16224145Sdim#include "RegisterCoalescer.h" 17327952Sdim#include "llvm/ADT/ArrayRef.h" 18327952Sdim#include "llvm/ADT/BitVector.h" 19239462Sdim#include "llvm/ADT/STLExtras.h" 20327952Sdim#include "llvm/ADT/SmallPtrSet.h" 21327952Sdim#include "llvm/ADT/SmallVector.h" 22239462Sdim#include "llvm/ADT/Statistic.h" 23239462Sdim#include "llvm/Analysis/AliasAnalysis.h" 24327952Sdim#include "llvm/CodeGen/LiveInterval.h" 25327952Sdim#include "llvm/CodeGen/LiveIntervals.h" 26239462Sdim#include "llvm/CodeGen/LiveRangeEdit.h" 27327952Sdim#include "llvm/CodeGen/MachineBasicBlock.h" 28327952Sdim#include "llvm/CodeGen/MachineFunction.h" 29327952Sdim#include "llvm/CodeGen/MachineFunctionPass.h" 30224145Sdim#include "llvm/CodeGen/MachineInstr.h" 31321369Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 32224145Sdim#include "llvm/CodeGen/MachineLoopInfo.h" 33327952Sdim#include "llvm/CodeGen/MachineOperand.h" 34224145Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 35224145Sdim#include "llvm/CodeGen/Passes.h" 36239462Sdim#include "llvm/CodeGen/RegisterClassInfo.h" 37327952Sdim#include "llvm/CodeGen/SlotIndexes.h" 38327952Sdim#include "llvm/CodeGen/TargetInstrInfo.h" 39327952Sdim#include "llvm/CodeGen/TargetOpcodes.h" 40327952Sdim#include "llvm/CodeGen/TargetRegisterInfo.h" 41327952Sdim#include "llvm/CodeGen/TargetSubtargetInfo.h" 42327952Sdim#include "llvm/IR/DebugLoc.h" 43327952Sdim#include "llvm/MC/LaneBitmask.h" 44327952Sdim#include "llvm/MC/MCInstrDesc.h" 45327952Sdim#include "llvm/MC/MCRegisterInfo.h" 46249423Sdim#include "llvm/Pass.h" 47224145Sdim#include "llvm/Support/CommandLine.h" 48327952Sdim#include "llvm/Support/Compiler.h" 49224145Sdim#include "llvm/Support/Debug.h" 50224145Sdim#include "llvm/Support/ErrorHandling.h" 51224145Sdim#include "llvm/Support/raw_ostream.h" 52224145Sdim#include <algorithm> 53327952Sdim#include <cassert> 54327952Sdim#include <iterator> 55327952Sdim#include <limits> 56327952Sdim#include <tuple> 57327952Sdim#include <utility> 58327952Sdim#include <vector> 59327952Sdim 60193323Sedusing namespace llvm; 61193323Sed 62276479Sdim#define DEBUG_TYPE "regalloc" 63276479Sdim 64224145SdimSTATISTIC(numJoins , "Number of interval joins performed"); 65224145SdimSTATISTIC(numCrossRCs , "Number of cross class joins performed"); 66224145SdimSTATISTIC(numCommutes , "Number of instruction commuting performed"); 67224145SdimSTATISTIC(numExtends , "Number of copies extended"); 68224145SdimSTATISTIC(NumReMats , "Number of instructions re-materialized"); 69226633SdimSTATISTIC(NumInflated , "Number of register classes inflated"); 70243830SdimSTATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested"); 71243830SdimSTATISTIC(NumLaneResolves, "Number of dead lane conflicts resolved"); 72224145Sdim 73327952Sdimstatic cl::opt<bool> EnableJoining("join-liveintervals", 74327952Sdim cl::desc("Coalesce copies (default=true)"), 75327952Sdim cl::init(true), cl::Hidden); 76224145Sdim 77288943Sdimstatic cl::opt<bool> UseTerminalRule("terminal-rule", 78288943Sdim cl::desc("Apply the terminal rule"), 79288943Sdim cl::init(false), cl::Hidden); 80288943Sdim 81288943Sdim/// Temporary flag to test critical edge unsplitting. 82224145Sdimstatic cl::opt<bool> 83249423SdimEnableJoinSplits("join-splitedges", 84249423Sdim cl::desc("Coalesce copies on split edges (default=subtarget)"), cl::Hidden); 85249423Sdim 86288943Sdim/// Temporary flag to test global copy optimization. 87249423Sdimstatic cl::opt<cl::boolOrDefault> 88249423SdimEnableGlobalCopies("join-globalcopies", 89249423Sdim cl::desc("Coalesce copies that span blocks (default=subtarget)"), 90249423Sdim cl::init(cl::BOU_UNSET), cl::Hidden); 91249423Sdim 92249423Sdimstatic cl::opt<bool> 93224145SdimVerifyCoalescing("verify-coalescing", 94224145Sdim cl::desc("Verify machine instrs before and after register coalescing"), 95224145Sdim cl::Hidden); 96224145Sdim 97226633Sdimnamespace { 98327952Sdim 99239462Sdim class RegisterCoalescer : public MachineFunctionPass, 100239462Sdim private LiveRangeEdit::Delegate { 101226633Sdim MachineFunction* MF; 102226633Sdim MachineRegisterInfo* MRI; 103226633Sdim const TargetRegisterInfo* TRI; 104226633Sdim const TargetInstrInfo* TII; 105226633Sdim LiveIntervals *LIS; 106226633Sdim const MachineLoopInfo* Loops; 107226633Sdim AliasAnalysis *AA; 108226633Sdim RegisterClassInfo RegClassInfo; 109226633Sdim 110280031Sdim /// A LaneMask to remember on which subregister live ranges we need to call 111280031Sdim /// shrinkToUses() later. 112296417Sdim LaneBitmask ShrinkMask; 113280031Sdim 114280031Sdim /// True if the main range of the currently coalesced intervals should be 115280031Sdim /// checked for smaller live intervals. 116280031Sdim bool ShrinkMainRange; 117280031Sdim 118249423Sdim /// \brief True if the coalescer should aggressively coalesce global copies 119249423Sdim /// in favor of keeping local copies. 120249423Sdim bool JoinGlobalCopies; 121249423Sdim 122249423Sdim /// \brief True if the coalescer should aggressively coalesce fall-thru 123249423Sdim /// blocks exclusively containing copies. 124249423Sdim bool JoinSplitEdges; 125249423Sdim 126280031Sdim /// Copy instructions yet to be coalesced. 127239462Sdim SmallVector<MachineInstr*, 8> WorkList; 128249423Sdim SmallVector<MachineInstr*, 8> LocalWorkList; 129226633Sdim 130280031Sdim /// Set of instruction pointers that have been erased, and 131239462Sdim /// that may be present in WorkList. 132239462Sdim SmallPtrSet<MachineInstr*, 8> ErasedInstrs; 133226633Sdim 134239462Sdim /// Dead instructions that are about to be deleted. 135239462Sdim SmallVector<MachineInstr*, 8> DeadDefs; 136226633Sdim 137239462Sdim /// Virtual registers to be considered for register class inflation. 138239462Sdim SmallVector<unsigned, 8> InflateRegs; 139226633Sdim 140239462Sdim /// Recursively eliminate dead defs in DeadDefs. 141239462Sdim void eliminateDeadDefs(); 142226633Sdim 143288943Sdim /// LiveRangeEdit callback for eliminateDeadDefs(). 144276479Sdim void LRE_WillEraseInstruction(MachineInstr *MI) override; 145239462Sdim 146280031Sdim /// Coalesce the LocalWorkList. 147249423Sdim void coalesceLocals(); 148249423Sdim 149280031Sdim /// Join compatible live intervals 150239462Sdim void joinAllIntervals(); 151239462Sdim 152280031Sdim /// Coalesce copies in the specified MBB, putting 153239462Sdim /// copies that cannot yet be coalesced into WorkList. 154239462Sdim void copyCoalesceInMBB(MachineBasicBlock *MBB); 155239462Sdim 156288943Sdim /// Tries to coalesce all copies in CurrList. Returns true if any progress 157288943Sdim /// was made. 158249423Sdim bool copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList); 159239462Sdim 160288943Sdim /// Attempt to join intervals corresponding to SrcReg/DstReg, which are the 161288943Sdim /// src/dst of the copy instruction CopyMI. This returns true if the copy 162288943Sdim /// was successfully coalesced away. If it is not currently possible to 163288943Sdim /// coalesce this interval, but it may be possible if other things get 164288943Sdim /// coalesced, then it returns true by reference in 'Again'. 165239462Sdim bool joinCopy(MachineInstr *TheCopy, bool &Again); 166226633Sdim 167280031Sdim /// Attempt to join these two intervals. On failure, this 168226633Sdim /// returns false. The output "SrcInt" will not have been modified, so we 169226633Sdim /// can use this information below to update aliases. 170239462Sdim bool joinIntervals(CoalescerPair &CP); 171226633Sdim 172243830Sdim /// Attempt joining two virtual registers. Return true on success. 173243830Sdim bool joinVirtRegs(CoalescerPair &CP); 174243830Sdim 175239462Sdim /// Attempt joining with a reserved physreg. 176239462Sdim bool joinReservedPhysReg(CoalescerPair &CP); 177239462Sdim 178280031Sdim /// Add the LiveRange @p ToMerge as a subregister liverange of @p LI. 179280031Sdim /// Subranges in @p LI which only partially interfere with the desired 180280031Sdim /// LaneMask are split as necessary. @p LaneMask are the lanes that 181280031Sdim /// @p ToMerge will occupy in the coalescer register. @p LI has its subrange 182280031Sdim /// lanemasks already adjusted to the coalesced register. 183296417Sdim void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge, 184296417Sdim LaneBitmask LaneMask, CoalescerPair &CP); 185280031Sdim 186280031Sdim /// Join the liveranges of two subregisters. Joins @p RRange into 187280031Sdim /// @p LRange, @p RRange may be invalid afterwards. 188296417Sdim void joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, 189296417Sdim LaneBitmask LaneMask, const CoalescerPair &CP); 190280031Sdim 191288943Sdim /// We found a non-trivially-coalescable copy. If the source value number is 192288943Sdim /// defined by a copy from the destination reg see if we can merge these two 193288943Sdim /// destination reg valno# into a single value number, eliminating a copy. 194288943Sdim /// This returns true if an interval was modified. 195239462Sdim bool adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI); 196226633Sdim 197280031Sdim /// Return true if there are definitions of IntB 198226633Sdim /// other than BValNo val# that can reach uses of AValno val# of IntA. 199239462Sdim bool hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB, 200226633Sdim VNInfo *AValNo, VNInfo *BValNo); 201226633Sdim 202280031Sdim /// We found a non-trivially-coalescable copy. 203226633Sdim /// If the source value number is defined by a commutable instruction and 204226633Sdim /// its other operand is coalesced to the copy dest register, see if we 205226633Sdim /// can transform the copy into a noop by commuting the definition. 206288943Sdim /// This returns true if an interval was modified. 207239462Sdim bool removeCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI); 208226633Sdim 209321369Sdim /// We found a copy which can be moved to its less frequent predecessor. 210321369Sdim bool removePartialRedundancy(const CoalescerPair &CP, MachineInstr &CopyMI); 211321369Sdim 212280031Sdim /// If the source of a copy is defined by a 213226633Sdim /// trivial computation, replace the copy by rematerialize the definition. 214288943Sdim bool reMaterializeTrivialDef(const CoalescerPair &CP, MachineInstr *CopyMI, 215261991Sdim bool &IsDefCopy); 216226633Sdim 217288943Sdim /// Return true if a copy involving a physreg should be joined. 218249423Sdim bool canJoinPhys(const CoalescerPair &CP); 219226633Sdim 220288943Sdim /// Replace all defs and uses of SrcReg to DstReg and update the subregister 221288943Sdim /// number if it is not zero. If DstReg is a physical register and the 222288943Sdim /// existing subregister number of the def / use being updated is not zero, 223288943Sdim /// make sure to set it to the correct physical subregister. 224239462Sdim void updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx); 225226633Sdim 226309124Sdim /// If the given machine operand reads only undefined lanes add an undef 227309124Sdim /// flag. 228309124Sdim /// This can happen when undef uses were previously concealed by a copy 229309124Sdim /// which we coalesced. Example: 230327952Sdim /// %0:sub0<def,read-undef> = ... 231327952Sdim /// %1 = COPY %0 <-- Coalescing COPY reveals undef 232327952Sdim /// = use %1:sub1 <-- hidden undef use 233309124Sdim void addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx, 234309124Sdim MachineOperand &MO, unsigned SubRegIdx); 235309124Sdim 236280031Sdim /// Handle copies of undef values. 237288943Sdim /// Returns true if @p CopyMI was a copy of an undef value and eliminated. 238280031Sdim bool eliminateUndefCopy(MachineInstr *CopyMI); 239226633Sdim 240288943Sdim /// Check whether or not we should apply the terminal rule on the 241288943Sdim /// destination (Dst) of \p Copy. 242288943Sdim /// When the terminal rule applies, Copy is not profitable to 243288943Sdim /// coalesce. 244288943Sdim /// Dst is terminal if it has exactly one affinity (Dst, Src) and 245288943Sdim /// at least one interference (Dst, Dst2). If Dst is terminal, the 246288943Sdim /// terminal rule consists in checking that at least one of 247288943Sdim /// interfering node, say Dst2, has an affinity of equal or greater 248288943Sdim /// weight with Src. 249288943Sdim /// In that case, Dst2 and Dst will not be able to be both coalesced 250288943Sdim /// with Src. Since Dst2 exposes more coalescing opportunities than 251288943Sdim /// Dst, we can drop \p Copy. 252288943Sdim bool applyTerminalRule(const MachineInstr &Copy) const; 253288943Sdim 254288943Sdim /// Wrapper method for \see LiveIntervals::shrinkToUses. 255288943Sdim /// This method does the proper fixing of the live-ranges when the afore 256288943Sdim /// mentioned method returns true. 257288943Sdim void shrinkToUses(LiveInterval *LI, 258288943Sdim SmallVectorImpl<MachineInstr * > *Dead = nullptr) { 259296417Sdim if (LIS->shrinkToUses(LI, Dead)) { 260296417Sdim /// Check whether or not \p LI is composed by multiple connected 261296417Sdim /// components and if that is the case, fix that. 262296417Sdim SmallVector<LiveInterval*, 8> SplitLIs; 263296417Sdim LIS->splitSeparateComponents(*LI, SplitLIs); 264296417Sdim } 265288943Sdim } 266288943Sdim 267327952Sdim /// Wrapper Method to do all the necessary work when an Instruction is 268327952Sdim /// deleted. 269327952Sdim /// Optimizations should use this to make sure that deleted instructions 270327952Sdim /// are always accounted for. 271327952Sdim void deleteInstr(MachineInstr* MI) { 272327952Sdim ErasedInstrs.insert(MI); 273327952Sdim LIS->RemoveMachineInstrFromMaps(*MI); 274327952Sdim MI->eraseFromParent(); 275327952Sdim } 276327952Sdim 277226633Sdim public: 278288943Sdim static char ID; ///< Class identification, replacement for typeinfo 279327952Sdim 280226633Sdim RegisterCoalescer() : MachineFunctionPass(ID) { 281226633Sdim initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); 282226633Sdim } 283226633Sdim 284276479Sdim void getAnalysisUsage(AnalysisUsage &AU) const override; 285226633Sdim 286276479Sdim void releaseMemory() override; 287226633Sdim 288280031Sdim /// This is the pass entry point. 289276479Sdim bool runOnMachineFunction(MachineFunction&) override; 290226633Sdim 291280031Sdim /// Implement the dump method. 292276479Sdim void print(raw_ostream &O, const Module* = nullptr) const override; 293226633Sdim }; 294327952Sdim 295288943Sdim} // end anonymous namespace 296226633Sdim 297327952Sdimchar RegisterCoalescer::ID = 0; 298327952Sdim 299234353Sdimchar &llvm::RegisterCoalescerID = RegisterCoalescer::ID; 300226633Sdim 301224145SdimINITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing", 302224145Sdim "Simple Register Coalescing", false, false) 303224145SdimINITIALIZE_PASS_DEPENDENCY(LiveIntervals) 304224145SdimINITIALIZE_PASS_DEPENDENCY(SlotIndexes) 305224145SdimINITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 306296417SdimINITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 307224145SdimINITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing", 308224145Sdim "Simple Register Coalescing", false, false) 309224145Sdim 310224145Sdimstatic bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI, 311224145Sdim unsigned &Src, unsigned &Dst, 312224145Sdim unsigned &SrcSub, unsigned &DstSub) { 313210299Sed if (MI->isCopy()) { 314210299Sed Dst = MI->getOperand(0).getReg(); 315210299Sed DstSub = MI->getOperand(0).getSubReg(); 316210299Sed Src = MI->getOperand(1).getReg(); 317210299Sed SrcSub = MI->getOperand(1).getSubReg(); 318210299Sed } else if (MI->isSubregToReg()) { 319210299Sed Dst = MI->getOperand(0).getReg(); 320243830Sdim DstSub = tri.composeSubRegIndices(MI->getOperand(0).getSubReg(), 321243830Sdim MI->getOperand(3).getImm()); 322210299Sed Src = MI->getOperand(2).getReg(); 323210299Sed SrcSub = MI->getOperand(2).getSubReg(); 324212904Sdim } else 325210299Sed return false; 326210299Sed return true; 327210299Sed} 328210299Sed 329288943Sdim/// Return true if this block should be vacated by the coalescer to eliminate 330288943Sdim/// branches. The important cases to handle in the coalescer are critical edges 331288943Sdim/// split during phi elimination which contain only copies. Simple blocks that 332288943Sdim/// contain non-branches should also be vacated, but this can be handled by an 333288943Sdim/// earlier pass similar to early if-conversion. 334249423Sdimstatic bool isSplitEdge(const MachineBasicBlock *MBB) { 335249423Sdim if (MBB->pred_size() != 1 || MBB->succ_size() != 1) 336249423Sdim return false; 337249423Sdim 338276479Sdim for (const auto &MI : *MBB) { 339276479Sdim if (!MI.isCopyLike() && !MI.isUnconditionalBranch()) 340249423Sdim return false; 341249423Sdim } 342249423Sdim return true; 343249423Sdim} 344249423Sdim 345210299Sedbool CoalescerPair::setRegisters(const MachineInstr *MI) { 346239462Sdim SrcReg = DstReg = 0; 347239462Sdim SrcIdx = DstIdx = 0; 348276479Sdim NewRC = nullptr; 349226633Sdim Flipped = CrossClass = false; 350210299Sed 351210299Sed unsigned Src, Dst, SrcSub, DstSub; 352226633Sdim if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub)) 353210299Sed return false; 354226633Sdim Partial = SrcSub || DstSub; 355210299Sed 356210299Sed // If one register is a physreg, it must be Dst. 357210299Sed if (TargetRegisterInfo::isPhysicalRegister(Src)) { 358210299Sed if (TargetRegisterInfo::isPhysicalRegister(Dst)) 359210299Sed return false; 360210299Sed std::swap(Src, Dst); 361210299Sed std::swap(SrcSub, DstSub); 362226633Sdim Flipped = true; 363210299Sed } 364210299Sed 365327952Sdim const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo(); 366210299Sed 367210299Sed if (TargetRegisterInfo::isPhysicalRegister(Dst)) { 368210299Sed // Eliminate DstSub on a physreg. 369210299Sed if (DstSub) { 370226633Sdim Dst = TRI.getSubReg(Dst, DstSub); 371210299Sed if (!Dst) return false; 372210299Sed DstSub = 0; 373210299Sed } 374210299Sed 375210299Sed // Eliminate SrcSub by picking a corresponding Dst superregister. 376210299Sed if (SrcSub) { 377226633Sdim Dst = TRI.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src)); 378210299Sed if (!Dst) return false; 379210299Sed } else if (!MRI.getRegClass(Src)->contains(Dst)) { 380210299Sed return false; 381210299Sed } 382210299Sed } else { 383210299Sed // Both registers are virtual. 384239462Sdim const TargetRegisterClass *SrcRC = MRI.getRegClass(Src); 385239462Sdim const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); 386210299Sed 387210299Sed // Both registers have subreg indices. 388210299Sed if (SrcSub && DstSub) { 389239462Sdim // Copies between different sub-registers are never coalescable. 390239462Sdim if (Src == Dst && SrcSub != DstSub) 391210299Sed return false; 392239462Sdim 393239462Sdim NewRC = TRI.getCommonSuperRegClass(SrcRC, SrcSub, DstRC, DstSub, 394239462Sdim SrcIdx, DstIdx); 395239462Sdim if (!NewRC) 396210299Sed return false; 397239462Sdim } else if (DstSub) { 398239462Sdim // SrcReg will be merged with a sub-register of DstReg. 399239462Sdim SrcIdx = DstSub; 400239462Sdim NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub); 401239462Sdim } else if (SrcSub) { 402239462Sdim // DstReg will be merged with a sub-register of SrcReg. 403239462Sdim DstIdx = SrcSub; 404239462Sdim NewRC = TRI.getMatchingSuperRegClass(SrcRC, DstRC, SrcSub); 405239462Sdim } else { 406239462Sdim // This is a straight copy without sub-registers. 407239462Sdim NewRC = TRI.getCommonSubClass(DstRC, SrcRC); 408210299Sed } 409210299Sed 410239462Sdim // The combined constraint may be impossible to satisfy. 411239462Sdim if (!NewRC) 412239462Sdim return false; 413239462Sdim 414239462Sdim // Prefer SrcReg to be a sub-register of DstReg. 415239462Sdim // FIXME: Coalescer should support subregs symmetrically. 416239462Sdim if (DstIdx && !SrcIdx) { 417210299Sed std::swap(Src, Dst); 418239462Sdim std::swap(SrcIdx, DstIdx); 419239462Sdim Flipped = !Flipped; 420210299Sed } 421210299Sed 422226633Sdim CrossClass = NewRC != DstRC || NewRC != SrcRC; 423210299Sed } 424210299Sed // Check our invariants 425210299Sed assert(TargetRegisterInfo::isVirtualRegister(Src) && "Src must be virtual"); 426210299Sed assert(!(TargetRegisterInfo::isPhysicalRegister(Dst) && DstSub) && 427210299Sed "Cannot have a physical SubIdx"); 428226633Sdim SrcReg = Src; 429226633Sdim DstReg = Dst; 430210299Sed return true; 431210299Sed} 432210299Sed 433210299Sedbool CoalescerPair::flip() { 434239462Sdim if (TargetRegisterInfo::isPhysicalRegister(DstReg)) 435210299Sed return false; 436226633Sdim std::swap(SrcReg, DstReg); 437239462Sdim std::swap(SrcIdx, DstIdx); 438226633Sdim Flipped = !Flipped; 439210299Sed return true; 440210299Sed} 441210299Sed 442210299Sedbool CoalescerPair::isCoalescable(const MachineInstr *MI) const { 443210299Sed if (!MI) 444210299Sed return false; 445210299Sed unsigned Src, Dst, SrcSub, DstSub; 446226633Sdim if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub)) 447210299Sed return false; 448210299Sed 449226633Sdim // Find the virtual register that is SrcReg. 450226633Sdim if (Dst == SrcReg) { 451210299Sed std::swap(Src, Dst); 452210299Sed std::swap(SrcSub, DstSub); 453226633Sdim } else if (Src != SrcReg) { 454210299Sed return false; 455210299Sed } 456210299Sed 457226633Sdim // Now check that Dst matches DstReg. 458226633Sdim if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { 459210299Sed if (!TargetRegisterInfo::isPhysicalRegister(Dst)) 460210299Sed return false; 461239462Sdim assert(!DstIdx && !SrcIdx && "Inconsistent CoalescerPair state."); 462210299Sed // DstSub could be set for a physreg from INSERT_SUBREG. 463210299Sed if (DstSub) 464226633Sdim Dst = TRI.getSubReg(Dst, DstSub); 465210299Sed // Full copy of Src. 466210299Sed if (!SrcSub) 467226633Sdim return DstReg == Dst; 468210299Sed // This is a partial register copy. Check that the parts match. 469226633Sdim return TRI.getSubReg(DstReg, SrcSub) == Dst; 470210299Sed } else { 471226633Sdim // DstReg is virtual. 472226633Sdim if (DstReg != Dst) 473210299Sed return false; 474210299Sed // Registers match, do the subregisters line up? 475243830Sdim return TRI.composeSubRegIndices(SrcIdx, SrcSub) == 476243830Sdim TRI.composeSubRegIndices(DstIdx, DstSub); 477210299Sed } 478210299Sed} 479210299Sed 480224145Sdimvoid RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const { 481224145Sdim AU.setPreservesCFG(); 482296417Sdim AU.addRequired<AAResultsWrapperPass>(); 483224145Sdim AU.addRequired<LiveIntervals>(); 484224145Sdim AU.addPreserved<LiveIntervals>(); 485224145Sdim AU.addPreserved<SlotIndexes>(); 486224145Sdim AU.addRequired<MachineLoopInfo>(); 487224145Sdim AU.addPreserved<MachineLoopInfo>(); 488224145Sdim AU.addPreservedID(MachineDominatorsID); 489224145Sdim MachineFunctionPass::getAnalysisUsage(AU); 490224145Sdim} 491224145Sdim 492239462Sdimvoid RegisterCoalescer::eliminateDeadDefs() { 493261991Sdim SmallVector<unsigned, 8> NewRegs; 494276479Sdim LiveRangeEdit(nullptr, NewRegs, *MF, *LIS, 495276479Sdim nullptr, this).eliminateDeadDefs(DeadDefs); 496239462Sdim} 497224145Sdim 498239462Sdimvoid RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) { 499239462Sdim // MI may be in WorkList. Make sure we don't visit it. 500239462Sdim ErasedInstrs.insert(MI); 501224145Sdim} 502224145Sdim 503239462Sdimbool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, 504239462Sdim MachineInstr *CopyMI) { 505239462Sdim assert(!CP.isPartial() && "This doesn't work for partial copies."); 506239462Sdim assert(!CP.isPhys() && "This doesn't work for physreg copies."); 507224145Sdim 508224145Sdim LiveInterval &IntA = 509226633Sdim LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); 510224145Sdim LiveInterval &IntB = 511226633Sdim LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); 512309124Sdim SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot(); 513224145Sdim 514288943Sdim // We have a non-trivially-coalescable copy with IntA being the source and 515288943Sdim // IntB being the dest, thus this defines a value number in IntB. If the 516288943Sdim // source value number (in IntA) is defined by a copy from B, see if we can 517288943Sdim // merge these two pieces of B into a single value number, eliminating a copy. 518288943Sdim // For example: 519288943Sdim // 520288943Sdim // A3 = B0 521288943Sdim // ... 522288943Sdim // B1 = A3 <- this copy 523288943Sdim // 524288943Sdim // In this case, B0 can be extended to where the B1 copy lives, allowing the 525288943Sdim // B1 value number to be replaced with B0 (which simplifies the B 526288943Sdim // liveinterval). 527288943Sdim 528261991Sdim // BValNo is a value number in B that is defined by a copy from A. 'B1' in 529224145Sdim // the example above. 530261991Sdim LiveInterval::iterator BS = IntB.FindSegmentContaining(CopyIdx); 531261991Sdim if (BS == IntB.end()) return false; 532261991Sdim VNInfo *BValNo = BS->valno; 533224145Sdim 534224145Sdim // Get the location that B is defined at. Two options: either this value has 535224145Sdim // an unknown definition point or it is defined at CopyIdx. If unknown, we 536224145Sdim // can't process it. 537234353Sdim if (BValNo->def != CopyIdx) return false; 538224145Sdim 539224145Sdim // AValNo is the value number in A that defines the copy, A3 in the example. 540234353Sdim SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true); 541261991Sdim LiveInterval::iterator AS = IntA.FindSegmentContaining(CopyUseIdx); 542261991Sdim // The live segment might not exist after fun with physreg coalescing. 543261991Sdim if (AS == IntA.end()) return false; 544261991Sdim VNInfo *AValNo = AS->valno; 545224145Sdim 546224145Sdim // If AValNo is defined as a copy from IntB, we can potentially process this. 547224145Sdim // Get the instruction that defines this value number. 548234353Sdim MachineInstr *ACopyMI = LIS->getInstructionFromIndex(AValNo->def); 549243830Sdim // Don't allow any partial copies, even if isCoalescable() allows them. 550243830Sdim if (!CP.isCoalescable(ACopyMI) || !ACopyMI->isFullCopy()) 551224145Sdim return false; 552224145Sdim 553261991Sdim // Get the Segment in IntB that this value number starts with. 554261991Sdim LiveInterval::iterator ValS = 555261991Sdim IntB.FindSegmentContaining(AValNo->def.getPrevSlot()); 556261991Sdim if (ValS == IntB.end()) 557224145Sdim return false; 558224145Sdim 559261991Sdim // Make sure that the end of the live segment is inside the same block as 560224145Sdim // CopyMI. 561261991Sdim MachineInstr *ValSEndInst = 562261991Sdim LIS->getInstructionFromIndex(ValS->end.getPrevSlot()); 563261991Sdim if (!ValSEndInst || ValSEndInst->getParent() != CopyMI->getParent()) 564224145Sdim return false; 565224145Sdim 566261991Sdim // Okay, we now know that ValS ends in the same block that the CopyMI 567261991Sdim // live-range starts. If there are no intervening live segments between them 568261991Sdim // in IntB, we can merge them. 569261991Sdim if (ValS+1 != BS) return false; 570224145Sdim 571327952Sdim DEBUG(dbgs() << "Extending: " << printReg(IntB.reg, TRI)); 572224145Sdim 573261991Sdim SlotIndex FillerStart = ValS->end, FillerEnd = BS->start; 574224145Sdim // We are about to delete CopyMI, so need to remove it as the 'instruction 575224145Sdim // that defines this value #'. Update the valnum with the new defining 576224145Sdim // instruction #. 577234353Sdim BValNo->def = FillerStart; 578224145Sdim 579224145Sdim // Okay, we can merge them. We need to insert a new liverange: 580261991Sdim // [ValS.end, BS.begin) of either value number, then we merge the 581224145Sdim // two value numbers. 582261991Sdim IntB.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, BValNo)); 583224145Sdim 584224145Sdim // Okay, merge "B1" into the same value number as "B0". 585261991Sdim if (BValNo != ValS->valno) 586261991Sdim IntB.MergeValueNumberInto(BValNo, ValS->valno); 587280031Sdim 588280031Sdim // Do the same for the subregister segments. 589280031Sdim for (LiveInterval::SubRange &S : IntB.subranges()) { 590280031Sdim VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx); 591280031Sdim S.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, SubBValNo)); 592280031Sdim VNInfo *SubValSNo = S.getVNInfoAt(AValNo->def.getPrevSlot()); 593280031Sdim if (SubBValNo != SubValSNo) 594280031Sdim S.MergeValueNumberInto(SubBValNo, SubValSNo); 595280031Sdim } 596280031Sdim 597239462Sdim DEBUG(dbgs() << " result = " << IntB << '\n'); 598224145Sdim 599224145Sdim // If the source instruction was killing the source register before the 600224145Sdim // merge, unset the isKill marker given the live range has been extended. 601261991Sdim int UIdx = ValSEndInst->findRegisterUseOperandIdx(IntB.reg, true); 602224145Sdim if (UIdx != -1) { 603261991Sdim ValSEndInst->getOperand(UIdx).setIsKill(false); 604224145Sdim } 605224145Sdim 606234353Sdim // Rewrite the copy. If the copy instruction was killing the destination 607234353Sdim // register before the merge, find the last use and trim the live range. That 608234353Sdim // will also add the isKill marker. 609239462Sdim CopyMI->substituteRegister(IntA.reg, IntB.reg, 0, *TRI); 610261991Sdim if (AS->end == CopyIdx) 611288943Sdim shrinkToUses(&IntA); 612224145Sdim 613224145Sdim ++numExtends; 614224145Sdim return true; 615224145Sdim} 616224145Sdim 617239462Sdimbool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA, 618239462Sdim LiveInterval &IntB, 619239462Sdim VNInfo *AValNo, 620239462Sdim VNInfo *BValNo) { 621239462Sdim // If AValNo has PHI kills, conservatively assume that IntB defs can reach 622239462Sdim // the PHI values. 623239462Sdim if (LIS->hasPHIKill(IntA, AValNo)) 624239462Sdim return true; 625239462Sdim 626280031Sdim for (LiveRange::Segment &ASeg : IntA.segments) { 627280031Sdim if (ASeg.valno != AValNo) continue; 628261991Sdim LiveInterval::iterator BI = 629280031Sdim std::upper_bound(IntB.begin(), IntB.end(), ASeg.start); 630261991Sdim if (BI != IntB.begin()) 631224145Sdim --BI; 632280031Sdim for (; BI != IntB.end() && ASeg.end >= BI->start; ++BI) { 633224145Sdim if (BI->valno == BValNo) 634224145Sdim continue; 635280031Sdim if (BI->start <= ASeg.start && BI->end > ASeg.start) 636224145Sdim return true; 637280031Sdim if (BI->start > ASeg.start && BI->start < ASeg.end) 638224145Sdim return true; 639224145Sdim } 640224145Sdim } 641224145Sdim return false; 642224145Sdim} 643224145Sdim 644280031Sdim/// Copy segements with value number @p SrcValNo from liverange @p Src to live 645280031Sdim/// range @Dst and use value number @p DstValNo there. 646280031Sdimstatic void addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo, 647327952Sdim const LiveRange &Src, const VNInfo *SrcValNo) { 648280031Sdim for (const LiveRange::Segment &S : Src.segments) { 649280031Sdim if (S.valno != SrcValNo) 650280031Sdim continue; 651280031Sdim Dst.addSegment(LiveRange::Segment(S.start, S.end, DstValNo)); 652280031Sdim } 653280031Sdim} 654280031Sdim 655239462Sdimbool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, 656239462Sdim MachineInstr *CopyMI) { 657280031Sdim assert(!CP.isPhys()); 658224145Sdim 659224145Sdim LiveInterval &IntA = 660280031Sdim LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); 661224145Sdim LiveInterval &IntB = 662280031Sdim LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); 663224145Sdim 664288943Sdim // We found a non-trivially-coalescable copy with IntA being the source and 665288943Sdim // IntB being the dest, thus this defines a value number in IntB. If the 666288943Sdim // source value number (in IntA) is defined by a commutable instruction and 667288943Sdim // its other operand is coalesced to the copy dest register, see if we can 668288943Sdim // transform the copy into a noop by commuting the definition. For example, 669288943Sdim // 670327952Sdim // A3 = op A2 killed B0 671288943Sdim // ... 672288943Sdim // B1 = A3 <- this copy 673288943Sdim // ... 674288943Sdim // = op A3 <- more uses 675288943Sdim // 676288943Sdim // ==> 677288943Sdim // 678327952Sdim // B2 = op B0 killed A2 679288943Sdim // ... 680288943Sdim // B1 = B2 <- now an identity copy 681288943Sdim // ... 682288943Sdim // = op B2 <- more uses 683288943Sdim 684261991Sdim // BValNo is a value number in B that is defined by a copy from A. 'B1' in 685224145Sdim // the example above. 686309124Sdim SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot(); 687224145Sdim VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx); 688280031Sdim assert(BValNo != nullptr && BValNo->def == CopyIdx); 689224145Sdim 690224145Sdim // AValNo is the value number in A that defines the copy, A3 in the example. 691234353Sdim VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true)); 692280031Sdim assert(AValNo && !AValNo->isUnused() && "COPY source not live"); 693280031Sdim if (AValNo->isPHIDef()) 694224145Sdim return false; 695226633Sdim MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def); 696224145Sdim if (!DefMI) 697224145Sdim return false; 698234353Sdim if (!DefMI->isCommutable()) 699224145Sdim return false; 700224145Sdim // If DefMI is a two-address instruction then commuting it will change the 701224145Sdim // destination register. 702224145Sdim int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg); 703224145Sdim assert(DefIdx != -1); 704224145Sdim unsigned UseOpIdx; 705224145Sdim if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx)) 706224145Sdim return false; 707296417Sdim 708296417Sdim // FIXME: The code below tries to commute 'UseOpIdx' operand with some other 709296417Sdim // commutable operand which is expressed by 'CommuteAnyOperandIndex'value 710296417Sdim // passed to the method. That _other_ operand is chosen by 711296417Sdim // the findCommutedOpIndices() method. 712296417Sdim // 713296417Sdim // That is obviously an area for improvement in case of instructions having 714296417Sdim // more than 2 operands. For example, if some instruction has 3 commutable 715296417Sdim // operands then all possible variants (i.e. op#1<->op#2, op#1<->op#3, 716296417Sdim // op#2<->op#3) of commute transformation should be considered/tried here. 717296417Sdim unsigned NewDstIdx = TargetInstrInfo::CommuteAnyOperandIndex; 718309124Sdim if (!TII->findCommutedOpIndices(*DefMI, UseOpIdx, NewDstIdx)) 719224145Sdim return false; 720224145Sdim 721224145Sdim MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); 722224145Sdim unsigned NewReg = NewDstMO.getReg(); 723261991Sdim if (NewReg != IntB.reg || !IntB.Query(AValNo->def).isKill()) 724224145Sdim return false; 725224145Sdim 726224145Sdim // Make sure there are no other definitions of IntB that would reach the 727224145Sdim // uses which the new definition can reach. 728239462Sdim if (hasOtherReachingDefs(IntA, IntB, AValNo, BValNo)) 729224145Sdim return false; 730224145Sdim 731224145Sdim // If some of the uses of IntA.reg is already coalesced away, return false. 732224145Sdim // It's not possible to determine whether it's safe to perform the coalescing. 733276479Sdim for (MachineOperand &MO : MRI->use_nodbg_operands(IntA.reg)) { 734276479Sdim MachineInstr *UseMI = MO.getParent(); 735276479Sdim unsigned OpNo = &MO - &UseMI->getOperand(0); 736309124Sdim SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI); 737261991Sdim LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx); 738261991Sdim if (US == IntA.end() || US->valno != AValNo) 739224145Sdim continue; 740239462Sdim // If this use is tied to a def, we can't rewrite the register. 741276479Sdim if (UseMI->isRegTiedToDefOperand(OpNo)) 742224145Sdim return false; 743224145Sdim } 744224145Sdim 745239462Sdim DEBUG(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t' 746224145Sdim << *DefMI); 747224145Sdim 748224145Sdim // At this point we have decided that it is legal to do this 749224145Sdim // transformation. Start by commuting the instruction. 750224145Sdim MachineBasicBlock *MBB = DefMI->getParent(); 751296417Sdim MachineInstr *NewMI = 752309124Sdim TII->commuteInstruction(*DefMI, false, UseOpIdx, NewDstIdx); 753224145Sdim if (!NewMI) 754224145Sdim return false; 755224145Sdim if (TargetRegisterInfo::isVirtualRegister(IntA.reg) && 756224145Sdim TargetRegisterInfo::isVirtualRegister(IntB.reg) && 757226633Sdim !MRI->constrainRegClass(IntB.reg, MRI->getRegClass(IntA.reg))) 758224145Sdim return false; 759224145Sdim if (NewMI != DefMI) { 760309124Sdim LIS->ReplaceMachineInstrInMaps(*DefMI, *NewMI); 761234353Sdim MachineBasicBlock::iterator Pos = DefMI; 762234353Sdim MBB->insert(Pos, NewMI); 763224145Sdim MBB->erase(DefMI); 764224145Sdim } 765224145Sdim 766224145Sdim // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g. 767224145Sdim // A = or A, B 768224145Sdim // ... 769224145Sdim // B = A 770224145Sdim // ... 771327952Sdim // C = killed A 772224145Sdim // ... 773224145Sdim // = B 774224145Sdim 775224145Sdim // Update uses of IntA of the specific Val# with IntB. 776226633Sdim for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg), 777280031Sdim UE = MRI->use_end(); 778280031Sdim UI != UE; /* ++UI is below because of possible MI removal */) { 779276479Sdim MachineOperand &UseMO = *UI; 780280031Sdim ++UI; 781280031Sdim if (UseMO.isUndef()) 782280031Sdim continue; 783276479Sdim MachineInstr *UseMI = UseMO.getParent(); 784224145Sdim if (UseMI->isDebugValue()) { 785224145Sdim // FIXME These don't have an instruction index. Not clear we have enough 786224145Sdim // info to decide whether to do this replacement or not. For now do it. 787224145Sdim UseMO.setReg(NewReg); 788224145Sdim continue; 789224145Sdim } 790309124Sdim SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI).getRegSlot(true); 791261991Sdim LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx); 792280031Sdim assert(US != IntA.end() && "Use must be live"); 793280031Sdim if (US->valno != AValNo) 794224145Sdim continue; 795239462Sdim // Kill flags are no longer accurate. They are recomputed after RA. 796239462Sdim UseMO.setIsKill(false); 797224145Sdim if (TargetRegisterInfo::isPhysicalRegister(NewReg)) 798226633Sdim UseMO.substPhysReg(NewReg, *TRI); 799224145Sdim else 800224145Sdim UseMO.setReg(NewReg); 801224145Sdim if (UseMI == CopyMI) 802224145Sdim continue; 803224145Sdim if (!UseMI->isCopy()) 804224145Sdim continue; 805224145Sdim if (UseMI->getOperand(0).getReg() != IntB.reg || 806224145Sdim UseMI->getOperand(0).getSubReg()) 807224145Sdim continue; 808224145Sdim 809224145Sdim // This copy will become a noop. If it's defining a new val#, merge it into 810224145Sdim // BValNo. 811234353Sdim SlotIndex DefIdx = UseIdx.getRegSlot(); 812224145Sdim VNInfo *DVNI = IntB.getVNInfoAt(DefIdx); 813224145Sdim if (!DVNI) 814224145Sdim continue; 815224145Sdim DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI); 816224145Sdim assert(DVNI->def == DefIdx); 817288943Sdim BValNo = IntB.MergeValueNumberInto(DVNI, BValNo); 818280031Sdim for (LiveInterval::SubRange &S : IntB.subranges()) { 819280031Sdim VNInfo *SubDVNI = S.getVNInfoAt(DefIdx); 820280031Sdim if (!SubDVNI) 821280031Sdim continue; 822280031Sdim VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx); 823280031Sdim assert(SubBValNo->def == CopyIdx); 824288943Sdim S.MergeValueNumberInto(SubDVNI, SubBValNo); 825280031Sdim } 826280031Sdim 827327952Sdim deleteInstr(UseMI); 828224145Sdim } 829224145Sdim 830261991Sdim // Extend BValNo by merging in IntA live segments of AValNo. Val# definition 831224145Sdim // is updated. 832280031Sdim BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); 833280031Sdim if (IntB.hasSubRanges()) { 834280031Sdim if (!IntA.hasSubRanges()) { 835296417Sdim LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntA.reg); 836280031Sdim IntA.createSubRangeFrom(Allocator, Mask, IntA); 837280031Sdim } 838280031Sdim SlotIndex AIdx = CopyIdx.getRegSlot(true); 839280031Sdim for (LiveInterval::SubRange &SA : IntA.subranges()) { 840280031Sdim VNInfo *ASubValNo = SA.getVNInfoAt(AIdx); 841280031Sdim assert(ASubValNo != nullptr); 842280031Sdim 843321369Sdim IntB.refineSubRanges(Allocator, SA.LaneMask, 844321369Sdim [&Allocator,&SA,CopyIdx,ASubValNo](LiveInterval::SubRange &SR) { 845321369Sdim VNInfo *BSubValNo = SR.empty() 846321369Sdim ? SR.getNextValue(CopyIdx, Allocator) 847321369Sdim : SR.getVNInfoAt(CopyIdx); 848321369Sdim assert(BSubValNo != nullptr); 849321369Sdim addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo); 850321369Sdim }); 851280031Sdim } 852224145Sdim } 853280031Sdim 854280031Sdim BValNo->def = AValNo->def; 855280031Sdim addSegmentsWithValNo(IntB, BValNo, IntA, AValNo); 856224145Sdim DEBUG(dbgs() << "\t\textended: " << IntB << '\n'); 857224145Sdim 858288943Sdim LIS->removeVRegDefAt(IntA, AValNo->def); 859288943Sdim 860224145Sdim DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n'); 861224145Sdim ++numCommutes; 862224145Sdim return true; 863224145Sdim} 864224145Sdim 865321369Sdim/// For copy B = A in BB2, if A is defined by A = B in BB0 which is a 866321369Sdim/// predecessor of BB2, and if B is not redefined on the way from A = B 867321369Sdim/// in BB2 to B = A in BB2, B = A in BB2 is partially redundant if the 868321369Sdim/// execution goes through the path from BB0 to BB2. We may move B = A 869321369Sdim/// to the predecessor without such reversed copy. 870321369Sdim/// So we will transform the program from: 871321369Sdim/// BB0: 872321369Sdim/// A = B; BB1: 873321369Sdim/// ... ... 874321369Sdim/// / \ / 875321369Sdim/// BB2: 876321369Sdim/// ... 877321369Sdim/// B = A; 878321369Sdim/// 879321369Sdim/// to: 880321369Sdim/// 881321369Sdim/// BB0: BB1: 882321369Sdim/// A = B; ... 883321369Sdim/// ... B = A; 884321369Sdim/// / \ / 885321369Sdim/// BB2: 886321369Sdim/// ... 887321369Sdim/// 888321369Sdim/// A special case is when BB0 and BB2 are the same BB which is the only 889321369Sdim/// BB in a loop: 890321369Sdim/// BB1: 891321369Sdim/// ... 892321369Sdim/// BB0/BB2: ---- 893321369Sdim/// B = A; | 894321369Sdim/// ... | 895321369Sdim/// A = B; | 896321369Sdim/// |------- 897321369Sdim/// | 898321369Sdim/// We may hoist B = A from BB0/BB2 to BB1. 899321369Sdim/// 900321369Sdim/// The major preconditions for correctness to remove such partial 901321369Sdim/// redundancy include: 902321369Sdim/// 1. A in B = A in BB2 is defined by a PHI in BB2, and one operand of 903321369Sdim/// the PHI is defined by the reversed copy A = B in BB0. 904321369Sdim/// 2. No B is referenced from the start of BB2 to B = A. 905321369Sdim/// 3. No B is defined from A = B to the end of BB0. 906321369Sdim/// 4. BB1 has only one successor. 907321369Sdim/// 908321369Sdim/// 2 and 4 implicitly ensure B is not live at the end of BB1. 909321369Sdim/// 4 guarantees BB2 is hotter than BB1, so we can only move a copy to a 910321369Sdim/// colder place, which not only prevent endless loop, but also make sure 911321369Sdim/// the movement of copy is beneficial. 912321369Sdimbool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP, 913321369Sdim MachineInstr &CopyMI) { 914321369Sdim assert(!CP.isPhys()); 915321369Sdim if (!CopyMI.isFullCopy()) 916321369Sdim return false; 917321369Sdim 918321369Sdim MachineBasicBlock &MBB = *CopyMI.getParent(); 919321369Sdim if (MBB.isEHPad()) 920321369Sdim return false; 921321369Sdim 922321369Sdim if (MBB.pred_size() != 2) 923321369Sdim return false; 924321369Sdim 925321369Sdim LiveInterval &IntA = 926321369Sdim LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); 927321369Sdim LiveInterval &IntB = 928321369Sdim LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); 929321369Sdim 930321369Sdim // A is defined by PHI at the entry of MBB. 931321369Sdim SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true); 932321369Sdim VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx); 933321369Sdim assert(AValNo && !AValNo->isUnused() && "COPY source not live"); 934321369Sdim if (!AValNo->isPHIDef()) 935321369Sdim return false; 936321369Sdim 937321369Sdim // No B is referenced before CopyMI in MBB. 938321369Sdim if (IntB.overlaps(LIS->getMBBStartIdx(&MBB), CopyIdx)) 939321369Sdim return false; 940321369Sdim 941321369Sdim // MBB has two predecessors: one contains A = B so no copy will be inserted 942321369Sdim // for it. The other one will have a copy moved from MBB. 943321369Sdim bool FoundReverseCopy = false; 944321369Sdim MachineBasicBlock *CopyLeftBB = nullptr; 945321369Sdim for (MachineBasicBlock *Pred : MBB.predecessors()) { 946321369Sdim VNInfo *PVal = IntA.getVNInfoBefore(LIS->getMBBEndIdx(Pred)); 947321369Sdim MachineInstr *DefMI = LIS->getInstructionFromIndex(PVal->def); 948321369Sdim if (!DefMI || !DefMI->isFullCopy()) { 949321369Sdim CopyLeftBB = Pred; 950321369Sdim continue; 951321369Sdim } 952321369Sdim // Check DefMI is a reverse copy and it is in BB Pred. 953321369Sdim if (DefMI->getOperand(0).getReg() != IntA.reg || 954321369Sdim DefMI->getOperand(1).getReg() != IntB.reg || 955321369Sdim DefMI->getParent() != Pred) { 956321369Sdim CopyLeftBB = Pred; 957321369Sdim continue; 958321369Sdim } 959321369Sdim // If there is any other def of B after DefMI and before the end of Pred, 960321369Sdim // we need to keep the copy of B = A at the end of Pred if we remove 961321369Sdim // B = A from MBB. 962321369Sdim bool ValB_Changed = false; 963321369Sdim for (auto VNI : IntB.valnos) { 964321369Sdim if (VNI->isUnused()) 965321369Sdim continue; 966321369Sdim if (PVal->def < VNI->def && VNI->def < LIS->getMBBEndIdx(Pred)) { 967321369Sdim ValB_Changed = true; 968321369Sdim break; 969321369Sdim } 970321369Sdim } 971321369Sdim if (ValB_Changed) { 972321369Sdim CopyLeftBB = Pred; 973321369Sdim continue; 974321369Sdim } 975321369Sdim FoundReverseCopy = true; 976321369Sdim } 977321369Sdim 978321369Sdim // If no reverse copy is found in predecessors, nothing to do. 979321369Sdim if (!FoundReverseCopy) 980321369Sdim return false; 981321369Sdim 982321369Sdim // If CopyLeftBB is nullptr, it means every predecessor of MBB contains 983321369Sdim // reverse copy, CopyMI can be removed trivially if only IntA/IntB is updated. 984321369Sdim // If CopyLeftBB is not nullptr, move CopyMI from MBB to CopyLeftBB and 985321369Sdim // update IntA/IntB. 986321369Sdim // 987321369Sdim // If CopyLeftBB is not nullptr, ensure CopyLeftBB has a single succ so 988321369Sdim // MBB is hotter than CopyLeftBB. 989321369Sdim if (CopyLeftBB && CopyLeftBB->succ_size() > 1) 990321369Sdim return false; 991321369Sdim 992321369Sdim // Now ok to move copy. 993321369Sdim if (CopyLeftBB) { 994327952Sdim DEBUG(dbgs() << "\tremovePartialRedundancy: Move the copy to " 995327952Sdim << printMBBReference(*CopyLeftBB) << '\t' << CopyMI); 996321369Sdim 997321369Sdim // Insert new copy to CopyLeftBB. 998321369Sdim auto InsPos = CopyLeftBB->getFirstTerminator(); 999321369Sdim MachineInstr *NewCopyMI = BuildMI(*CopyLeftBB, InsPos, CopyMI.getDebugLoc(), 1000321369Sdim TII->get(TargetOpcode::COPY), IntB.reg) 1001321369Sdim .addReg(IntA.reg); 1002321369Sdim SlotIndex NewCopyIdx = 1003321369Sdim LIS->InsertMachineInstrInMaps(*NewCopyMI).getRegSlot(); 1004321369Sdim IntB.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator()); 1005321369Sdim for (LiveInterval::SubRange &SR : IntB.subranges()) 1006321369Sdim SR.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator()); 1007321369Sdim 1008321369Sdim // If the newly created Instruction has an address of an instruction that was 1009321369Sdim // deleted before (object recycled by the allocator) it needs to be removed from 1010321369Sdim // the deleted list. 1011321369Sdim ErasedInstrs.erase(NewCopyMI); 1012321369Sdim } else { 1013327952Sdim DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from " 1014327952Sdim << printMBBReference(MBB) << '\t' << CopyMI); 1015321369Sdim } 1016321369Sdim 1017321369Sdim // Remove CopyMI. 1018321369Sdim // Note: This is fine to remove the copy before updating the live-ranges. 1019321369Sdim // While updating the live-ranges, we only look at slot indices and 1020321369Sdim // never go back to the instruction. 1021321369Sdim // Mark instructions as deleted. 1022327952Sdim deleteInstr(&CopyMI); 1023321369Sdim 1024321369Sdim // Update the liveness. 1025321369Sdim SmallVector<SlotIndex, 8> EndPoints; 1026321369Sdim VNInfo *BValNo = IntB.Query(CopyIdx).valueOutOrDead(); 1027321369Sdim LIS->pruneValue(*static_cast<LiveRange *>(&IntB), CopyIdx.getRegSlot(), 1028321369Sdim &EndPoints); 1029321369Sdim BValNo->markUnused(); 1030321369Sdim // Extend IntB to the EndPoints of its original live interval. 1031321369Sdim LIS->extendToIndices(IntB, EndPoints); 1032321369Sdim 1033321369Sdim // Now, do the same for its subranges. 1034321369Sdim for (LiveInterval::SubRange &SR : IntB.subranges()) { 1035321369Sdim EndPoints.clear(); 1036321369Sdim VNInfo *BValNo = SR.Query(CopyIdx).valueOutOrDead(); 1037321369Sdim assert(BValNo && "All sublanes should be live"); 1038321369Sdim LIS->pruneValue(SR, CopyIdx.getRegSlot(), &EndPoints); 1039321369Sdim BValNo->markUnused(); 1040321369Sdim LIS->extendToIndices(SR, EndPoints); 1041321369Sdim } 1042321369Sdim 1043321369Sdim // Finally, update the live-range of IntA. 1044321369Sdim shrinkToUses(&IntA); 1045321369Sdim return true; 1046321369Sdim} 1047321369Sdim 1048288943Sdim/// Returns true if @p MI defines the full vreg @p Reg, as opposed to just 1049288943Sdim/// defining a subregister. 1050288943Sdimstatic bool definesFullReg(const MachineInstr &MI, unsigned Reg) { 1051288943Sdim assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && 1052288943Sdim "This code cannot handle physreg aliasing"); 1053288943Sdim for (const MachineOperand &Op : MI.operands()) { 1054288943Sdim if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg) 1055288943Sdim continue; 1056288943Sdim // Return true if we define the full register or don't care about the value 1057288943Sdim // inside other subregisters. 1058288943Sdim if (Op.getSubReg() == 0 || Op.isUndef()) 1059288943Sdim return true; 1060288943Sdim } 1061288943Sdim return false; 1062288943Sdim} 1063288943Sdim 1064288943Sdimbool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, 1065261991Sdim MachineInstr *CopyMI, 1066261991Sdim bool &IsDefCopy) { 1067261991Sdim IsDefCopy = false; 1068249423Sdim unsigned SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg(); 1069261991Sdim unsigned SrcIdx = CP.isFlipped() ? CP.getDstIdx() : CP.getSrcIdx(); 1070249423Sdim unsigned DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg(); 1071261991Sdim unsigned DstIdx = CP.isFlipped() ? CP.getSrcIdx() : CP.getDstIdx(); 1072249423Sdim if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) 1073249423Sdim return false; 1074249423Sdim 1075249423Sdim LiveInterval &SrcInt = LIS->getInterval(SrcReg); 1076309124Sdim SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI); 1077261991Sdim VNInfo *ValNo = SrcInt.Query(CopyIdx).valueIn(); 1078261991Sdim assert(ValNo && "CopyMI input register not live"); 1079226633Sdim if (ValNo->isPHIDef() || ValNo->isUnused()) 1080224145Sdim return false; 1081226633Sdim MachineInstr *DefMI = LIS->getInstructionFromIndex(ValNo->def); 1082224145Sdim if (!DefMI) 1083224145Sdim return false; 1084261991Sdim if (DefMI->isCopyLike()) { 1085261991Sdim IsDefCopy = true; 1086261991Sdim return false; 1087261991Sdim } 1088309124Sdim if (!TII->isAsCheapAsAMove(*DefMI)) 1089224145Sdim return false; 1090309124Sdim if (!TII->isTriviallyReMaterializable(*DefMI, AA)) 1091224145Sdim return false; 1092288943Sdim if (!definesFullReg(*DefMI, SrcReg)) 1093288943Sdim return false; 1094224145Sdim bool SawStore = false; 1095288943Sdim if (!DefMI->isSafeToMove(AA, SawStore)) 1096224145Sdim return false; 1097234353Sdim const MCInstrDesc &MCID = DefMI->getDesc(); 1098224145Sdim if (MCID.getNumDefs() != 1) 1099224145Sdim return false; 1100249423Sdim // Only support subregister destinations when the def is read-undef. 1101249423Sdim MachineOperand &DstOperand = CopyMI->getOperand(0); 1102261991Sdim unsigned CopyDstReg = DstOperand.getReg(); 1103249423Sdim if (DstOperand.getSubReg() && !DstOperand.isUndef()) 1104249423Sdim return false; 1105261991Sdim 1106276479Sdim // If both SrcIdx and DstIdx are set, correct rematerialization would widen 1107276479Sdim // the register substantially (beyond both source and dest size). This is bad 1108276479Sdim // for performance since it can cascade through a function, introducing many 1109276479Sdim // extra spills and fills (e.g. ARM can easily end up copying QQQQPR registers 1110276479Sdim // around after a few subreg copies). 1111276479Sdim if (SrcIdx && DstIdx) 1112276479Sdim return false; 1113276479Sdim 1114261991Sdim const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF); 1115224145Sdim if (!DefMI->isImplicitDef()) { 1116261991Sdim if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { 1117261991Sdim unsigned NewDstReg = DstReg; 1118261991Sdim 1119261991Sdim unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(), 1120261991Sdim DefMI->getOperand(0).getSubReg()); 1121261991Sdim if (NewDstIdx) 1122261991Sdim NewDstReg = TRI->getSubReg(DstReg, NewDstIdx); 1123261991Sdim 1124261991Sdim // Finally, make sure that the physical subregister that will be 1125261991Sdim // constructed later is permitted for the instruction. 1126261991Sdim if (!DefRC->contains(NewDstReg)) 1127224145Sdim return false; 1128261991Sdim } else { 1129261991Sdim // Theoretically, some stack frame reference could exist. Just make sure 1130261991Sdim // it hasn't actually happened. 1131261991Sdim assert(TargetRegisterInfo::isVirtualRegister(DstReg) && 1132261991Sdim "Only expect to deal with virtual or physical registers"); 1133261991Sdim } 1134224145Sdim } 1135224145Sdim 1136309124Sdim DebugLoc DL = CopyMI->getDebugLoc(); 1137224145Sdim MachineBasicBlock *MBB = CopyMI->getParent(); 1138224145Sdim MachineBasicBlock::iterator MII = 1139276479Sdim std::next(MachineBasicBlock::iterator(CopyMI)); 1140309124Sdim TII->reMaterialize(*MBB, MII, DstReg, SrcIdx, *DefMI, *TRI); 1141309124Sdim MachineInstr &NewMI = *std::prev(MII); 1142309124Sdim NewMI.setDebugLoc(DL); 1143224145Sdim 1144288943Sdim // In a situation like the following: 1145327952Sdim // %0:subreg = instr ; DefMI, subreg = DstIdx 1146327952Sdim // %1 = copy %0:subreg ; CopyMI, SrcIdx = 0 1147327952Sdim // instead of widening %1 to the register class of %0 simply do: 1148327952Sdim // %1 = instr 1149288943Sdim const TargetRegisterClass *NewRC = CP.getNewRC(); 1150288943Sdim if (DstIdx != 0) { 1151309124Sdim MachineOperand &DefMO = NewMI.getOperand(0); 1152288943Sdim if (DefMO.getSubReg() == DstIdx) { 1153288943Sdim assert(SrcIdx == 0 && CP.isFlipped() 1154288943Sdim && "Shouldn't have SrcIdx+DstIdx at this point"); 1155288943Sdim const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); 1156288943Sdim const TargetRegisterClass *CommonRC = 1157288943Sdim TRI->getCommonSubClass(DefRC, DstRC); 1158288943Sdim if (CommonRC != nullptr) { 1159288943Sdim NewRC = CommonRC; 1160288943Sdim DstIdx = 0; 1161288943Sdim DefMO.setSubReg(0); 1162314564Sdim DefMO.setIsUndef(false); // Only subregs can have def+undef. 1163288943Sdim } 1164288943Sdim } 1165288943Sdim } 1166288943Sdim 1167309124Sdim // CopyMI may have implicit operands, save them so that we can transfer them 1168309124Sdim // over to the newly materialized instruction after CopyMI is removed. 1169309124Sdim SmallVector<MachineOperand, 4> ImplicitOps; 1170309124Sdim ImplicitOps.reserve(CopyMI->getNumOperands() - 1171309124Sdim CopyMI->getDesc().getNumOperands()); 1172309124Sdim for (unsigned I = CopyMI->getDesc().getNumOperands(), 1173309124Sdim E = CopyMI->getNumOperands(); 1174309124Sdim I != E; ++I) { 1175309124Sdim MachineOperand &MO = CopyMI->getOperand(I); 1176309124Sdim if (MO.isReg()) { 1177309124Sdim assert(MO.isImplicit() && "No explicit operands after implict operands."); 1178309124Sdim // Discard VReg implicit defs. 1179309124Sdim if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) 1180309124Sdim ImplicitOps.push_back(MO); 1181309124Sdim } 1182309124Sdim } 1183309124Sdim 1184309124Sdim LIS->ReplaceMachineInstrInMaps(*CopyMI, NewMI); 1185261991Sdim CopyMI->eraseFromParent(); 1186261991Sdim ErasedInstrs.insert(CopyMI); 1187249423Sdim 1188234353Sdim // NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86). 1189234353Sdim // We need to remember these so we can add intervals once we insert 1190234353Sdim // NewMI into SlotIndexes. 1191234353Sdim SmallVector<unsigned, 4> NewMIImplDefs; 1192309124Sdim for (unsigned i = NewMI.getDesc().getNumOperands(), 1193309124Sdim e = NewMI.getNumOperands(); 1194309124Sdim i != e; ++i) { 1195309124Sdim MachineOperand &MO = NewMI.getOperand(i); 1196288943Sdim if (MO.isReg() && MO.isDef()) { 1197288943Sdim assert(MO.isImplicit() && MO.isDead() && 1198234353Sdim TargetRegisterInfo::isPhysicalRegister(MO.getReg())); 1199234353Sdim NewMIImplDefs.push_back(MO.getReg()); 1200234353Sdim } 1201234353Sdim } 1202234353Sdim 1203261991Sdim if (TargetRegisterInfo::isVirtualRegister(DstReg)) { 1204309124Sdim unsigned NewIdx = NewMI.getOperand(0).getSubReg(); 1205276479Sdim 1206288943Sdim if (DefRC != nullptr) { 1207288943Sdim if (NewIdx) 1208288943Sdim NewRC = TRI->getMatchingSuperRegClass(NewRC, DefRC, NewIdx); 1209288943Sdim else 1210288943Sdim NewRC = TRI->getCommonSubClass(NewRC, DefRC); 1211288943Sdim assert(NewRC && "subreg chosen for remat incompatible with instruction"); 1212288943Sdim } 1213309124Sdim // Remap subranges to new lanemask and change register class. 1214309124Sdim LiveInterval &DstInt = LIS->getInterval(DstReg); 1215309124Sdim for (LiveInterval::SubRange &SR : DstInt.subranges()) { 1216309124Sdim SR.LaneMask = TRI->composeSubRegIndexLaneMask(DstIdx, SR.LaneMask); 1217309124Sdim } 1218276479Sdim MRI->setRegClass(DstReg, NewRC); 1219276479Sdim 1220309124Sdim // Update machine operands and add flags. 1221276479Sdim updateRegDefsUses(DstReg, DstReg, DstIdx); 1222309124Sdim NewMI.getOperand(0).setSubReg(NewIdx); 1223309124Sdim // Add dead subregister definitions if we are defining the whole register 1224309124Sdim // but only part of it is live. 1225309124Sdim // This could happen if the rematerialization instruction is rematerializing 1226309124Sdim // more than actually is used in the register. 1227309124Sdim // An example would be: 1228327952Sdim // %1 = LOAD CONSTANTS 5, 8 ; Loading both 5 and 8 in different subregs 1229309124Sdim // ; Copying only part of the register here, but the rest is undef. 1230327952Sdim // %2:sub_16bit<def, read-undef> = COPY %1:sub_16bit 1231309124Sdim // ==> 1232309124Sdim // ; Materialize all the constants but only using one 1233327952Sdim // %2 = LOAD_CONSTANTS 5, 8 1234309124Sdim // 1235309124Sdim // at this point for the part that wasn't defined before we could have 1236309124Sdim // subranges missing the definition. 1237309124Sdim if (NewIdx == 0 && DstInt.hasSubRanges()) { 1238309124Sdim SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI); 1239309124Sdim SlotIndex DefIndex = 1240309124Sdim CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber()); 1241309124Sdim LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(DstReg); 1242309124Sdim VNInfo::Allocator& Alloc = LIS->getVNInfoAllocator(); 1243309124Sdim for (LiveInterval::SubRange &SR : DstInt.subranges()) { 1244309124Sdim if (!SR.liveAt(DefIndex)) 1245309124Sdim SR.createDeadDef(DefIndex, Alloc); 1246309124Sdim MaxMask &= ~SR.LaneMask; 1247309124Sdim } 1248314564Sdim if (MaxMask.any()) { 1249309124Sdim LiveInterval::SubRange *SR = DstInt.createSubRange(Alloc, MaxMask); 1250309124Sdim SR->createDeadDef(DefIndex, Alloc); 1251309124Sdim } 1252309124Sdim } 1253321369Sdim 1254321369Sdim // Make sure that the subrange for resultant undef is removed 1255321369Sdim // For example: 1256327952Sdim // %1:sub1<def,read-undef> = LOAD CONSTANT 1 1257327952Sdim // %2 = COPY %1 1258321369Sdim // ==> 1259327952Sdim // %2:sub1<def, read-undef> = LOAD CONSTANT 1 1260327952Sdim // ; Correct but need to remove the subrange for %2:sub0 1261321369Sdim // ; as it is now undef 1262321369Sdim if (NewIdx != 0 && DstInt.hasSubRanges()) { 1263321369Sdim // The affected subregister segments can be removed. 1264321369Sdim SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI); 1265321369Sdim LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(NewIdx); 1266321369Sdim bool UpdatedSubRanges = false; 1267321369Sdim for (LiveInterval::SubRange &SR : DstInt.subranges()) { 1268321369Sdim if ((SR.LaneMask & DstMask).none()) { 1269321369Sdim DEBUG(dbgs() << "Removing undefined SubRange " 1270321369Sdim << PrintLaneMask(SR.LaneMask) << " : " << SR << "\n"); 1271321369Sdim // VNI is in ValNo - remove any segments in this SubRange that have this ValNo 1272321369Sdim if (VNInfo *RmValNo = SR.getVNInfoAt(CurrIdx.getRegSlot())) { 1273321369Sdim SR.removeValNo(RmValNo); 1274321369Sdim UpdatedSubRanges = true; 1275321369Sdim } 1276321369Sdim } 1277321369Sdim } 1278321369Sdim if (UpdatedSubRanges) 1279321369Sdim DstInt.removeEmptySubRanges(); 1280321369Sdim } 1281309124Sdim } else if (NewMI.getOperand(0).getReg() != CopyDstReg) { 1282261991Sdim // The New instruction may be defining a sub-register of what's actually 1283261991Sdim // been asked for. If so it must implicitly define the whole thing. 1284261991Sdim assert(TargetRegisterInfo::isPhysicalRegister(DstReg) && 1285261991Sdim "Only expect virtual or physical registers in remat"); 1286309124Sdim NewMI.getOperand(0).setIsDead(true); 1287309124Sdim NewMI.addOperand(MachineOperand::CreateReg( 1288309124Sdim CopyDstReg, true /*IsDef*/, true /*IsImp*/, false /*IsKill*/)); 1289276479Sdim // Record small dead def live-ranges for all the subregisters 1290276479Sdim // of the destination register. 1291276479Sdim // Otherwise, variables that live through may miss some 1292276479Sdim // interferences, thus creating invalid allocation. 1293276479Sdim // E.g., i386 code: 1294327952Sdim // %1 = somedef ; %1 GR8 1295327952Sdim // %2 = remat ; %2 GR32 1296327952Sdim // CL = COPY %2.sub_8bit 1297327952Sdim // = somedef %1 ; %1 GR8 1298276479Sdim // => 1299327952Sdim // %1 = somedef ; %1 GR8 1300327952Sdim // dead ECX = remat ; implicit-def CL 1301327952Sdim // = somedef %1 ; %1 GR8 1302327952Sdim // %1 will see the inteferences with CL but not with CH since 1303276479Sdim // no live-ranges would have been created for ECX. 1304276479Sdim // Fix that! 1305276479Sdim SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI); 1306309124Sdim for (MCRegUnitIterator Units(NewMI.getOperand(0).getReg(), TRI); 1307276479Sdim Units.isValid(); ++Units) 1308276479Sdim if (LiveRange *LR = LIS->getCachedRegUnit(*Units)) 1309276479Sdim LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator()); 1310261991Sdim } 1311261991Sdim 1312309124Sdim if (NewMI.getOperand(0).getSubReg()) 1313309124Sdim NewMI.getOperand(0).setIsUndef(); 1314261991Sdim 1315309124Sdim // Transfer over implicit operands to the rematerialized instruction. 1316309124Sdim for (MachineOperand &MO : ImplicitOps) 1317309124Sdim NewMI.addOperand(MO); 1318224145Sdim 1319234353Sdim SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI); 1320234353Sdim for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) { 1321239462Sdim unsigned Reg = NewMIImplDefs[i]; 1322239462Sdim for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) 1323261991Sdim if (LiveRange *LR = LIS->getCachedRegUnit(*Units)) 1324261991Sdim LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator()); 1325234353Sdim } 1326234353Sdim 1327309124Sdim DEBUG(dbgs() << "Remat: " << NewMI); 1328224145Sdim ++NumReMats; 1329224145Sdim 1330224145Sdim // The source interval can become smaller because we removed a use. 1331288943Sdim shrinkToUses(&SrcInt, &DeadDefs); 1332280031Sdim if (!DeadDefs.empty()) { 1333280031Sdim // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs 1334280031Sdim // to describe DstReg instead. 1335280031Sdim for (MachineOperand &UseMO : MRI->use_operands(SrcReg)) { 1336280031Sdim MachineInstr *UseMI = UseMO.getParent(); 1337280031Sdim if (UseMI->isDebugValue()) { 1338280031Sdim UseMO.setReg(DstReg); 1339327952Sdim // Move the debug value directly after the def of the rematerialized 1340327952Sdim // value in DstReg. 1341327952Sdim MBB->splice(std::next(NewMI.getIterator()), UseMI->getParent(), UseMI); 1342280031Sdim DEBUG(dbgs() << "\t\tupdated: " << *UseMI); 1343280031Sdim } 1344280031Sdim } 1345239462Sdim eliminateDeadDefs(); 1346280031Sdim } 1347224145Sdim 1348224145Sdim return true; 1349224145Sdim} 1350224145Sdim 1351288943Sdimbool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { 1352288943Sdim // ProcessImpicitDefs may leave some copies of <undef> values, it only removes 1353288943Sdim // local variables. When we have a copy like: 1354288943Sdim // 1355327952Sdim // %1 = COPY undef %2 1356288943Sdim // 1357327952Sdim // We delete the copy and remove the corresponding value number from %1. 1358288943Sdim // Any uses of that value number are marked as <undef>. 1359280031Sdim 1360280031Sdim // Note that we do not query CoalescerPair here but redo isMoveInstr as the 1361280031Sdim // CoalescerPair may have a new register class with adjusted subreg indices 1362280031Sdim // at this point. 1363280031Sdim unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; 1364280031Sdim isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx); 1365280031Sdim 1366309124Sdim SlotIndex Idx = LIS->getInstructionIndex(*CopyMI); 1367280031Sdim const LiveInterval &SrcLI = LIS->getInterval(SrcReg); 1368280031Sdim // CopyMI is undef iff SrcReg is not live before the instruction. 1369280031Sdim if (SrcSubIdx != 0 && SrcLI.hasSubRanges()) { 1370296417Sdim LaneBitmask SrcMask = TRI->getSubRegIndexLaneMask(SrcSubIdx); 1371280031Sdim for (const LiveInterval::SubRange &SR : SrcLI.subranges()) { 1372314564Sdim if ((SR.LaneMask & SrcMask).none()) 1373280031Sdim continue; 1374280031Sdim if (SR.liveAt(Idx)) 1375280031Sdim return false; 1376280031Sdim } 1377280031Sdim } else if (SrcLI.liveAt(Idx)) 1378226633Sdim return false; 1379226633Sdim 1380280031Sdim DEBUG(dbgs() << "\tEliminating copy of <undef> value\n"); 1381226633Sdim 1382280031Sdim // Remove any DstReg segments starting at the instruction. 1383280031Sdim LiveInterval &DstLI = LIS->getInterval(DstReg); 1384280031Sdim SlotIndex RegIndex = Idx.getRegSlot(); 1385280031Sdim // Remove value or merge with previous one in case of a subregister def. 1386280031Sdim if (VNInfo *PrevVNI = DstLI.getVNInfoAt(Idx)) { 1387288943Sdim VNInfo *VNI = DstLI.getVNInfoAt(RegIndex); 1388288943Sdim DstLI.MergeValueNumberInto(VNI, PrevVNI); 1389280031Sdim 1390288943Sdim // The affected subregister segments can be removed. 1391296417Sdim LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx); 1392288943Sdim for (LiveInterval::SubRange &SR : DstLI.subranges()) { 1393314564Sdim if ((SR.LaneMask & DstMask).none()) 1394288943Sdim continue; 1395288943Sdim 1396288943Sdim VNInfo *SVNI = SR.getVNInfoAt(RegIndex); 1397288943Sdim assert(SVNI != nullptr && SlotIndex::isSameInstr(SVNI->def, RegIndex)); 1398288943Sdim SR.removeValNo(SVNI); 1399288943Sdim } 1400288943Sdim DstLI.removeEmptySubRanges(); 1401288943Sdim } else 1402288943Sdim LIS->removeVRegDefAt(DstLI, RegIndex); 1403288943Sdim 1404280031Sdim // Mark uses as undef. 1405280031Sdim for (MachineOperand &MO : MRI->reg_nodbg_operands(DstReg)) { 1406280031Sdim if (MO.isDef() /*|| MO.isUndef()*/) 1407226633Sdim continue; 1408280031Sdim const MachineInstr &MI = *MO.getParent(); 1409309124Sdim SlotIndex UseIdx = LIS->getInstructionIndex(MI); 1410296417Sdim LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg()); 1411280031Sdim bool isLive; 1412314564Sdim if (!UseMask.all() && DstLI.hasSubRanges()) { 1413280031Sdim isLive = false; 1414280031Sdim for (const LiveInterval::SubRange &SR : DstLI.subranges()) { 1415314564Sdim if ((SR.LaneMask & UseMask).none()) 1416280031Sdim continue; 1417280031Sdim if (SR.liveAt(UseIdx)) { 1418280031Sdim isLive = true; 1419280031Sdim break; 1420280031Sdim } 1421280031Sdim } 1422280031Sdim } else 1423280031Sdim isLive = DstLI.liveAt(UseIdx); 1424280031Sdim if (isLive) 1425226633Sdim continue; 1426226633Sdim MO.setIsUndef(true); 1427280031Sdim DEBUG(dbgs() << "\tnew undef: " << UseIdx << '\t' << MI); 1428226633Sdim } 1429314564Sdim 1430314564Sdim // A def of a subregister may be a use of the other subregisters, so 1431314564Sdim // deleting a def of a subregister may also remove uses. Since CopyMI 1432314564Sdim // is still part of the function (but about to be erased), mark all 1433314564Sdim // defs of DstReg in it as <undef>, so that shrinkToUses would 1434314564Sdim // ignore them. 1435314564Sdim for (MachineOperand &MO : CopyMI->operands()) 1436314564Sdim if (MO.isReg() && MO.isDef() && MO.getReg() == DstReg) 1437314564Sdim MO.setIsUndef(true); 1438314564Sdim LIS->shrinkToUses(&DstLI); 1439314564Sdim 1440226633Sdim return true; 1441226633Sdim} 1442226633Sdim 1443309124Sdimvoid RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx, 1444309124Sdim MachineOperand &MO, unsigned SubRegIdx) { 1445309124Sdim LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubRegIdx); 1446309124Sdim if (MO.isDef()) 1447309124Sdim Mask = ~Mask; 1448309124Sdim bool IsUndef = true; 1449309124Sdim for (const LiveInterval::SubRange &S : Int.subranges()) { 1450314564Sdim if ((S.LaneMask & Mask).none()) 1451309124Sdim continue; 1452309124Sdim if (S.liveAt(UseIdx)) { 1453309124Sdim IsUndef = false; 1454309124Sdim break; 1455309124Sdim } 1456309124Sdim } 1457309124Sdim if (IsUndef) { 1458309124Sdim MO.setIsUndef(true); 1459309124Sdim // We found out some subregister use is actually reading an undefined 1460309124Sdim // value. In some cases the whole vreg has become undefined at this 1461309124Sdim // point so we have to potentially shrink the main range if the 1462309124Sdim // use was ending a live segment there. 1463309124Sdim LiveQueryResult Q = Int.Query(UseIdx); 1464309124Sdim if (Q.valueOut() == nullptr) 1465309124Sdim ShrinkMainRange = true; 1466309124Sdim } 1467309124Sdim} 1468309124Sdim 1469239462Sdimvoid RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, 1470239462Sdim unsigned DstReg, 1471239462Sdim unsigned SubIdx) { 1472239462Sdim bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); 1473276479Sdim LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg); 1474224145Sdim 1475309124Sdim if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) { 1476309124Sdim for (MachineOperand &MO : MRI->reg_operands(DstReg)) { 1477309124Sdim unsigned SubReg = MO.getSubReg(); 1478309124Sdim if (SubReg == 0 || MO.isUndef()) 1479309124Sdim continue; 1480309124Sdim MachineInstr &MI = *MO.getParent(); 1481309124Sdim if (MI.isDebugValue()) 1482309124Sdim continue; 1483309124Sdim SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot(true); 1484309124Sdim addUndefFlag(*DstInt, UseIdx, MO, SubReg); 1485309124Sdim } 1486309124Sdim } 1487309124Sdim 1488243830Sdim SmallPtrSet<MachineInstr*, 8> Visited; 1489276479Sdim for (MachineRegisterInfo::reg_instr_iterator 1490276479Sdim I = MRI->reg_instr_begin(SrcReg), E = MRI->reg_instr_end(); 1491276479Sdim I != E; ) { 1492276479Sdim MachineInstr *UseMI = &*(I++); 1493276479Sdim 1494243830Sdim // Each instruction can only be rewritten once because sub-register 1495243830Sdim // composition is not always idempotent. When SrcReg != DstReg, rewriting 1496243830Sdim // the UseMI operands removes them from the SrcReg use-def chain, but when 1497243830Sdim // SrcReg is DstReg we could encounter UseMI twice if it has multiple 1498243830Sdim // operands mentioning the virtual register. 1499280031Sdim if (SrcReg == DstReg && !Visited.insert(UseMI).second) 1500243830Sdim continue; 1501243830Sdim 1502224145Sdim SmallVector<unsigned,8> Ops; 1503224145Sdim bool Reads, Writes; 1504276479Sdim std::tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops); 1505224145Sdim 1506239462Sdim // If SrcReg wasn't read, it may still be the case that DstReg is live-in 1507239462Sdim // because SrcReg is a sub-register. 1508321369Sdim if (DstInt && !Reads && SubIdx && !UseMI->isDebugValue()) 1509309124Sdim Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI)); 1510239462Sdim 1511224145Sdim // Replace SrcReg with DstReg in all UseMI operands. 1512224145Sdim for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 1513224145Sdim MachineOperand &MO = UseMI->getOperand(Ops[i]); 1514224145Sdim 1515239462Sdim // Adjust <undef> flags in case of sub-register joins. We don't want to 1516239462Sdim // turn a full def into a read-modify-write sub-register def and vice 1517239462Sdim // versa. 1518239462Sdim if (SubIdx && MO.isDef()) 1519239462Sdim MO.setIsUndef(!Reads); 1520226633Sdim 1521280031Sdim // A subreg use of a partially undef (super) register may be a complete 1522280031Sdim // undef use now and then has to be marked that way. 1523288943Sdim if (SubIdx != 0 && MO.isUse() && MRI->shouldTrackSubRegLiveness(DstReg)) { 1524280031Sdim if (!DstInt->hasSubRanges()) { 1525280031Sdim BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); 1526296417Sdim LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(DstInt->reg); 1527280031Sdim DstInt->createSubRangeFrom(Allocator, Mask, *DstInt); 1528280031Sdim } 1529280031Sdim SlotIndex MIIdx = UseMI->isDebugValue() 1530309124Sdim ? LIS->getSlotIndexes()->getIndexBefore(*UseMI) 1531309124Sdim : LIS->getInstructionIndex(*UseMI); 1532280031Sdim SlotIndex UseIdx = MIIdx.getRegSlot(true); 1533309124Sdim addUndefFlag(*DstInt, UseIdx, MO, SubIdx); 1534280031Sdim } 1535280031Sdim 1536224145Sdim if (DstIsPhys) 1537226633Sdim MO.substPhysReg(DstReg, *TRI); 1538224145Sdim else 1539226633Sdim MO.substVirtReg(DstReg, SubIdx, *TRI); 1540224145Sdim } 1541224145Sdim 1542224145Sdim DEBUG({ 1543224145Sdim dbgs() << "\t\tupdated: "; 1544224145Sdim if (!UseMI->isDebugValue()) 1545309124Sdim dbgs() << LIS->getInstructionIndex(*UseMI) << "\t"; 1546224145Sdim dbgs() << *UseMI; 1547224145Sdim }); 1548224145Sdim } 1549224145Sdim} 1550224145Sdim 1551249423Sdimbool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) { 1552288943Sdim // Always join simple intervals that are defined by a single copy from a 1553288943Sdim // reserved register. This doesn't increase register pressure, so it is 1554288943Sdim // always beneficial. 1555243830Sdim if (!MRI->isReserved(CP.getDstReg())) { 1556239462Sdim DEBUG(dbgs() << "\tCan only merge into reserved registers.\n"); 1557224145Sdim return false; 1558224145Sdim } 1559224145Sdim 1560239462Sdim LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg()); 1561288943Sdim if (JoinVInt.containsOneValue()) 1562224145Sdim return true; 1563224145Sdim 1564288943Sdim DEBUG(dbgs() << "\tCannot join complex intervals into reserved register.\n"); 1565239462Sdim return false; 1566224145Sdim} 1567224145Sdim 1568239462Sdimbool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { 1569224145Sdim Again = false; 1570309124Sdim DEBUG(dbgs() << LIS->getInstructionIndex(*CopyMI) << '\t' << *CopyMI); 1571224145Sdim 1572239462Sdim CoalescerPair CP(*TRI); 1573224145Sdim if (!CP.setRegisters(CopyMI)) { 1574224145Sdim DEBUG(dbgs() << "\tNot coalescable.\n"); 1575224145Sdim return false; 1576224145Sdim } 1577224145Sdim 1578276479Sdim if (CP.getNewRC()) { 1579276479Sdim auto SrcRC = MRI->getRegClass(CP.getSrcReg()); 1580276479Sdim auto DstRC = MRI->getRegClass(CP.getDstReg()); 1581276479Sdim unsigned SrcIdx = CP.getSrcIdx(); 1582276479Sdim unsigned DstIdx = CP.getDstIdx(); 1583276479Sdim if (CP.isFlipped()) { 1584276479Sdim std::swap(SrcIdx, DstIdx); 1585276479Sdim std::swap(SrcRC, DstRC); 1586276479Sdim } 1587276479Sdim if (!TRI->shouldCoalesce(CopyMI, SrcRC, SrcIdx, DstRC, DstIdx, 1588327952Sdim CP.getNewRC(), *LIS)) { 1589276479Sdim DEBUG(dbgs() << "\tSubtarget bailed on coalescing.\n"); 1590276479Sdim return false; 1591276479Sdim } 1592276479Sdim } 1593276479Sdim 1594239462Sdim // Dead code elimination. This really should be handled by MachineDCE, but 1595239462Sdim // sometimes dead copies slip through, and we can't generate invalid live 1596239462Sdim // ranges. 1597239462Sdim if (!CP.isPhys() && CopyMI->allDefsAreDead()) { 1598239462Sdim DEBUG(dbgs() << "\tCopy is dead.\n"); 1599239462Sdim DeadDefs.push_back(CopyMI); 1600239462Sdim eliminateDeadDefs(); 1601239462Sdim return true; 1602224145Sdim } 1603224145Sdim 1604226633Sdim // Eliminate undefs. 1605280031Sdim if (!CP.isPhys() && eliminateUndefCopy(CopyMI)) { 1606327952Sdim deleteInstr(CopyMI); 1607226633Sdim return false; // Not coalescable. 1608226633Sdim } 1609226633Sdim 1610239462Sdim // Coalesced copies are normally removed immediately, but transformations 1611239462Sdim // like removeCopyByCommutingDef() can inadvertently create identity copies. 1612239462Sdim // When that happens, just join the values and remove the copy. 1613239462Sdim if (CP.getSrcReg() == CP.getDstReg()) { 1614239462Sdim LiveInterval &LI = LIS->getInterval(CP.getSrcReg()); 1615239462Sdim DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n'); 1616309124Sdim const SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI); 1617280031Sdim LiveQueryResult LRQ = LI.Query(CopyIdx); 1618239462Sdim if (VNInfo *DefVNI = LRQ.valueDefined()) { 1619239462Sdim VNInfo *ReadVNI = LRQ.valueIn(); 1620239462Sdim assert(ReadVNI && "No value before copy and no <undef> flag."); 1621239462Sdim assert(ReadVNI != DefVNI && "Cannot read and define the same value."); 1622239462Sdim LI.MergeValueNumberInto(DefVNI, ReadVNI); 1623280031Sdim 1624280031Sdim // Process subregister liveranges. 1625280031Sdim for (LiveInterval::SubRange &S : LI.subranges()) { 1626280031Sdim LiveQueryResult SLRQ = S.Query(CopyIdx); 1627280031Sdim if (VNInfo *SDefVNI = SLRQ.valueDefined()) { 1628280031Sdim VNInfo *SReadVNI = SLRQ.valueIn(); 1629280031Sdim S.MergeValueNumberInto(SDefVNI, SReadVNI); 1630280031Sdim } 1631280031Sdim } 1632239462Sdim DEBUG(dbgs() << "\tMerged values: " << LI << '\n'); 1633239462Sdim } 1634327952Sdim deleteInstr(CopyMI); 1635239462Sdim return true; 1636239462Sdim } 1637224145Sdim 1638224145Sdim // Enforce policies. 1639224145Sdim if (CP.isPhys()) { 1640327952Sdim DEBUG(dbgs() << "\tConsidering merging " << printReg(CP.getSrcReg(), TRI) 1641327952Sdim << " with " << printReg(CP.getDstReg(), TRI, CP.getSrcIdx()) 1642239462Sdim << '\n'); 1643239462Sdim if (!canJoinPhys(CP)) { 1644224145Sdim // Before giving up coalescing, if definition of source is defined by 1645224145Sdim // trivial computation, try rematerializing it. 1646261991Sdim bool IsDefCopy; 1647261991Sdim if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy)) 1648224145Sdim return true; 1649261991Sdim if (IsDefCopy) 1650261991Sdim Again = true; // May be possible to coalesce later. 1651224145Sdim return false; 1652224145Sdim } 1653224145Sdim } else { 1654280031Sdim // When possible, let DstReg be the larger interval. 1655280031Sdim if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).size() > 1656280031Sdim LIS->getInterval(CP.getDstReg()).size()) 1657280031Sdim CP.flip(); 1658280031Sdim 1659239462Sdim DEBUG({ 1660280031Sdim dbgs() << "\tConsidering merging to " 1661280031Sdim << TRI->getRegClassName(CP.getNewRC()) << " with "; 1662239462Sdim if (CP.getDstIdx() && CP.getSrcIdx()) 1663327952Sdim dbgs() << printReg(CP.getDstReg()) << " in " 1664239462Sdim << TRI->getSubRegIndexName(CP.getDstIdx()) << " and " 1665327952Sdim << printReg(CP.getSrcReg()) << " in " 1666239462Sdim << TRI->getSubRegIndexName(CP.getSrcIdx()) << '\n'; 1667239462Sdim else 1668327952Sdim dbgs() << printReg(CP.getSrcReg(), TRI) << " in " 1669327952Sdim << printReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n'; 1670239462Sdim }); 1671224145Sdim } 1672224145Sdim 1673314564Sdim ShrinkMask = LaneBitmask::getNone(); 1674280031Sdim ShrinkMainRange = false; 1675280031Sdim 1676224145Sdim // Okay, attempt to join these two intervals. On failure, this returns false. 1677224145Sdim // Otherwise, if one of the intervals being joined is a physreg, this method 1678224145Sdim // always canonicalizes DstInt to be it. The output "SrcInt" will not have 1679224145Sdim // been modified, so we can use this information below to update aliases. 1680239462Sdim if (!joinIntervals(CP)) { 1681224145Sdim // Coalescing failed. 1682224145Sdim 1683224145Sdim // If definition of source is defined by trivial computation, try 1684224145Sdim // rematerializing it. 1685261991Sdim bool IsDefCopy; 1686261991Sdim if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy)) 1687224145Sdim return true; 1688224145Sdim 1689261991Sdim // If we can eliminate the copy without merging the live segments, do so 1690261991Sdim // now. 1691239462Sdim if (!CP.isPartial() && !CP.isPhys()) { 1692239462Sdim if (adjustCopiesBackFrom(CP, CopyMI) || 1693239462Sdim removeCopyByCommutingDef(CP, CopyMI)) { 1694327952Sdim deleteInstr(CopyMI); 1695224145Sdim DEBUG(dbgs() << "\tTrivial!\n"); 1696224145Sdim return true; 1697224145Sdim } 1698224145Sdim } 1699224145Sdim 1700321369Sdim // Try and see if we can partially eliminate the copy by moving the copy to 1701321369Sdim // its predecessor. 1702321369Sdim if (!CP.isPartial() && !CP.isPhys()) 1703321369Sdim if (removePartialRedundancy(CP, *CopyMI)) 1704321369Sdim return true; 1705321369Sdim 1706224145Sdim // Otherwise, we are unable to join the intervals. 1707224145Sdim DEBUG(dbgs() << "\tInterference!\n"); 1708224145Sdim Again = true; // May be possible to coalesce later. 1709224145Sdim return false; 1710224145Sdim } 1711224145Sdim 1712224145Sdim // Coalescing to a virtual register that is of a sub-register class of the 1713224145Sdim // other. Make sure the resulting register is set to the right register class. 1714224145Sdim if (CP.isCrossClass()) { 1715224145Sdim ++numCrossRCs; 1716226633Sdim MRI->setRegClass(CP.getDstReg(), CP.getNewRC()); 1717224145Sdim } 1718224145Sdim 1719239462Sdim // Removing sub-register copies can ease the register class constraints. 1720239462Sdim // Make sure we attempt to inflate the register class of DstReg. 1721239462Sdim if (!CP.isPhys() && RegClassInfo.isProperSubClass(CP.getNewRC())) 1722239462Sdim InflateRegs.push_back(CP.getDstReg()); 1723224145Sdim 1724239462Sdim // CopyMI has been erased by joinIntervals at this point. Remove it from 1725239462Sdim // ErasedInstrs since copyCoalesceWorkList() won't add a successful join back 1726239462Sdim // to the work list. This keeps ErasedInstrs from growing needlessly. 1727239462Sdim ErasedInstrs.erase(CopyMI); 1728224145Sdim 1729239462Sdim // Rewrite all SrcReg operands to DstReg. 1730239462Sdim // Also update DstReg operands to include DstIdx if it is set. 1731239462Sdim if (CP.getDstIdx()) 1732239462Sdim updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx()); 1733239462Sdim updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx()); 1734224145Sdim 1735280031Sdim // Shrink subregister ranges if necessary. 1736314564Sdim if (ShrinkMask.any()) { 1737280031Sdim LiveInterval &LI = LIS->getInterval(CP.getDstReg()); 1738280031Sdim for (LiveInterval::SubRange &S : LI.subranges()) { 1739314564Sdim if ((S.LaneMask & ShrinkMask).none()) 1740280031Sdim continue; 1741296417Sdim DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask) 1742296417Sdim << ")\n"); 1743280031Sdim LIS->shrinkToUses(S, LI.reg); 1744280031Sdim } 1745288943Sdim LI.removeEmptySubRanges(); 1746280031Sdim } 1747280031Sdim if (ShrinkMainRange) { 1748280031Sdim LiveInterval &LI = LIS->getInterval(CP.getDstReg()); 1749288943Sdim shrinkToUses(&LI); 1750280031Sdim } 1751280031Sdim 1752234353Sdim // SrcReg is guaranteed to be the register whose live interval that is 1753224145Sdim // being merged. 1754226633Sdim LIS->removeInterval(CP.getSrcReg()); 1755224145Sdim 1756224145Sdim // Update regalloc hint. 1757288943Sdim TRI->updateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF); 1758224145Sdim 1759224145Sdim DEBUG({ 1760327952Sdim dbgs() << "\tSuccess: " << printReg(CP.getSrcReg(), TRI, CP.getSrcIdx()) 1761327952Sdim << " -> " << printReg(CP.getDstReg(), TRI, CP.getDstIdx()) << '\n'; 1762280031Sdim dbgs() << "\tResult = "; 1763261991Sdim if (CP.isPhys()) 1764327952Sdim dbgs() << printReg(CP.getDstReg(), TRI); 1765261991Sdim else 1766239462Sdim dbgs() << LIS->getInterval(CP.getDstReg()); 1767261991Sdim dbgs() << '\n'; 1768224145Sdim }); 1769224145Sdim 1770224145Sdim ++numJoins; 1771224145Sdim return true; 1772224145Sdim} 1773224145Sdim 1774239462Sdimbool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { 1775288943Sdim unsigned DstReg = CP.getDstReg(); 1776314564Sdim unsigned SrcReg = CP.getSrcReg(); 1777239462Sdim assert(CP.isPhys() && "Must be a physreg copy"); 1778288943Sdim assert(MRI->isReserved(DstReg) && "Not a reserved register"); 1779314564Sdim LiveInterval &RHS = LIS->getInterval(SrcReg); 1780261991Sdim DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n'); 1781239462Sdim 1782288943Sdim assert(RHS.containsOneValue() && "Invalid join with reserved register"); 1783239462Sdim 1784239462Sdim // Optimization for reserved registers like ESP. We can only merge with a 1785288943Sdim // reserved physreg if RHS has a single value that is a copy of DstReg. 1786239462Sdim // The live range of the reserved register will look like a set of dead defs 1787239462Sdim // - we don't properly track the live range of reserved registers. 1788239462Sdim 1789239462Sdim // Deny any overlapping intervals. This depends on all the reserved 1790239462Sdim // register live ranges to look like dead defs. 1791314564Sdim if (!MRI->isConstantPhysReg(DstReg)) { 1792314564Sdim for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) { 1793314564Sdim // Abort if not all the regunits are reserved. 1794314564Sdim for (MCRegUnitRootIterator RI(*UI, TRI); RI.isValid(); ++RI) { 1795314564Sdim if (!MRI->isReserved(*RI)) 1796314564Sdim return false; 1797314564Sdim } 1798314564Sdim if (RHS.overlaps(LIS->getRegUnit(*UI))) { 1799327952Sdim DEBUG(dbgs() << "\t\tInterference: " << printRegUnit(*UI, TRI) << '\n'); 1800314564Sdim return false; 1801314564Sdim } 1802239462Sdim } 1803321369Sdim 1804321369Sdim // We must also check for overlaps with regmask clobbers. 1805321369Sdim BitVector RegMaskUsable; 1806321369Sdim if (LIS->checkRegMaskInterference(RHS, RegMaskUsable) && 1807321369Sdim !RegMaskUsable.test(DstReg)) { 1808321369Sdim DEBUG(dbgs() << "\t\tRegMask interference\n"); 1809321369Sdim return false; 1810321369Sdim } 1811314564Sdim } 1812239462Sdim 1813239462Sdim // Skip any value computations, we are not adding new values to the 1814239462Sdim // reserved register. Also skip merging the live ranges, the reserved 1815239462Sdim // register live range doesn't need to be accurate as long as all the 1816239462Sdim // defs are there. 1817239462Sdim 1818239462Sdim // Delete the identity copy. 1819288943Sdim MachineInstr *CopyMI; 1820288943Sdim if (CP.isFlipped()) { 1821314564Sdim // Physreg is copied into vreg 1822327952Sdim // %y = COPY %physreg_x 1823327952Sdim // ... //< no other def of %x here 1824327952Sdim // use %y 1825314564Sdim // => 1826314564Sdim // ... 1827327952Sdim // use %x 1828314564Sdim CopyMI = MRI->getVRegDef(SrcReg); 1829288943Sdim } else { 1830314564Sdim // VReg is copied into physreg: 1831327952Sdim // %y = def 1832327952Sdim // ... //< no other def or use of %y here 1833327952Sdim // %y = COPY %physreg_x 1834314564Sdim // => 1835327952Sdim // %y = def 1836314564Sdim // ... 1837314564Sdim if (!MRI->hasOneNonDBGUse(SrcReg)) { 1838288943Sdim DEBUG(dbgs() << "\t\tMultiple vreg uses!\n"); 1839288943Sdim return false; 1840288943Sdim } 1841288943Sdim 1842314564Sdim if (!LIS->intervalIsInOneMBB(RHS)) { 1843314564Sdim DEBUG(dbgs() << "\t\tComplex control flow!\n"); 1844314564Sdim return false; 1845314564Sdim } 1846288943Sdim 1847314564Sdim MachineInstr &DestMI = *MRI->getVRegDef(SrcReg); 1848314564Sdim CopyMI = &*MRI->use_instr_nodbg_begin(SrcReg); 1849314564Sdim SlotIndex CopyRegIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot(); 1850314564Sdim SlotIndex DestRegIdx = LIS->getInstructionIndex(DestMI).getRegSlot(); 1851288943Sdim 1852314564Sdim if (!MRI->isConstantPhysReg(DstReg)) { 1853314564Sdim // We checked above that there are no interfering defs of the physical 1854327952Sdim // register. However, for this case, where we intend to move up the def of 1855314564Sdim // the physical register, we also need to check for interfering uses. 1856314564Sdim SlotIndexes *Indexes = LIS->getSlotIndexes(); 1857314564Sdim for (SlotIndex SI = Indexes->getNextNonNullIndex(DestRegIdx); 1858314564Sdim SI != CopyRegIdx; SI = Indexes->getNextNonNullIndex(SI)) { 1859314564Sdim MachineInstr *MI = LIS->getInstructionFromIndex(SI); 1860314564Sdim if (MI->readsRegister(DstReg, TRI)) { 1861314564Sdim DEBUG(dbgs() << "\t\tInterference (read): " << *MI); 1862288943Sdim return false; 1863288943Sdim } 1864288943Sdim } 1865288943Sdim } 1866288943Sdim 1867288943Sdim // We're going to remove the copy which defines a physical reserved 1868288943Sdim // register, so remove its valno, etc. 1869327952Sdim DEBUG(dbgs() << "\t\tRemoving phys reg def of " << printReg(DstReg, TRI) 1870314564Sdim << " at " << CopyRegIdx << "\n"); 1871288943Sdim 1872288943Sdim LIS->removePhysRegDefAt(DstReg, CopyRegIdx); 1873288943Sdim // Create a new dead def at the new def location. 1874288943Sdim for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) { 1875288943Sdim LiveRange &LR = LIS->getRegUnit(*UI); 1876288943Sdim LR.createDeadDef(DestRegIdx, LIS->getVNInfoAllocator()); 1877288943Sdim } 1878288943Sdim } 1879288943Sdim 1880327952Sdim deleteInstr(CopyMI); 1881239462Sdim 1882239462Sdim // We don't track kills for reserved registers. 1883239462Sdim MRI->clearKillFlags(CP.getSrcReg()); 1884239462Sdim 1885239462Sdim return true; 1886239462Sdim} 1887239462Sdim 1888243830Sdim//===----------------------------------------------------------------------===// 1889243830Sdim// Interference checking and interval joining 1890243830Sdim//===----------------------------------------------------------------------===// 1891243830Sdim// 1892243830Sdim// In the easiest case, the two live ranges being joined are disjoint, and 1893243830Sdim// there is no interference to consider. It is quite common, though, to have 1894243830Sdim// overlapping live ranges, and we need to check if the interference can be 1895243830Sdim// resolved. 1896243830Sdim// 1897243830Sdim// The live range of a single SSA value forms a sub-tree of the dominator tree. 1898243830Sdim// This means that two SSA values overlap if and only if the def of one value 1899243830Sdim// is contained in the live range of the other value. As a special case, the 1900243830Sdim// overlapping values can be defined at the same index. 1901243830Sdim// 1902243830Sdim// The interference from an overlapping def can be resolved in these cases: 1903243830Sdim// 1904243830Sdim// 1. Coalescable copies. The value is defined by a copy that would become an 1905243830Sdim// identity copy after joining SrcReg and DstReg. The copy instruction will 1906243830Sdim// be removed, and the value will be merged with the source value. 1907243830Sdim// 1908243830Sdim// There can be several copies back and forth, causing many values to be 1909243830Sdim// merged into one. We compute a list of ultimate values in the joined live 1910243830Sdim// range as well as a mappings from the old value numbers. 1911243830Sdim// 1912243830Sdim// 2. IMPLICIT_DEF. This instruction is only inserted to ensure all PHI 1913243830Sdim// predecessors have a live out value. It doesn't cause real interference, 1914243830Sdim// and can be merged into the value it overlaps. Like a coalescable copy, it 1915243830Sdim// can be erased after joining. 1916243830Sdim// 1917243830Sdim// 3. Copy of external value. The overlapping def may be a copy of a value that 1918243830Sdim// is already in the other register. This is like a coalescable copy, but 1919243830Sdim// the live range of the source register must be trimmed after erasing the 1920243830Sdim// copy instruction: 1921243830Sdim// 1922243830Sdim// %src = COPY %ext 1923243830Sdim// %dst = COPY %ext <-- Remove this COPY, trim the live range of %ext. 1924243830Sdim// 1925243830Sdim// 4. Clobbering undefined lanes. Vector registers are sometimes built by 1926243830Sdim// defining one lane at a time: 1927243830Sdim// 1928243830Sdim// %dst:ssub0<def,read-undef> = FOO 1929243830Sdim// %src = BAR 1930327952Sdim// %dst:ssub1 = COPY %src 1931243830Sdim// 1932243830Sdim// The live range of %src overlaps the %dst value defined by FOO, but 1933243830Sdim// merging %src into %dst:ssub1 is only going to clobber the ssub1 lane 1934243830Sdim// which was undef anyway. 1935243830Sdim// 1936243830Sdim// The value mapping is more complicated in this case. The final live range 1937243830Sdim// will have different value numbers for both FOO and BAR, but there is no 1938243830Sdim// simple mapping from old to new values. It may even be necessary to add 1939243830Sdim// new PHI values. 1940243830Sdim// 1941243830Sdim// 5. Clobbering dead lanes. A def may clobber a lane of a vector register that 1942243830Sdim// is live, but never read. This can happen because we don't compute 1943243830Sdim// individual live ranges per lane. 1944243830Sdim// 1945327952Sdim// %dst = FOO 1946243830Sdim// %src = BAR 1947327952Sdim// %dst:ssub1 = COPY %src 1948243830Sdim// 1949243830Sdim// This kind of interference is only resolved locally. If the clobbered 1950243830Sdim// lane value escapes the block, the join is aborted. 1951224145Sdim 1952243830Sdimnamespace { 1953327952Sdim 1954243830Sdim/// Track information about values in a single virtual register about to be 1955243830Sdim/// joined. Objects of this class are always created in pairs - one for each 1956280031Sdim/// side of the CoalescerPair (or one for each lane of a side of the coalescer 1957280031Sdim/// pair) 1958243830Sdimclass JoinVals { 1959280031Sdim /// Live range we work on. 1960280031Sdim LiveRange &LR; 1961327952Sdim 1962280031Sdim /// (Main) register we work on. 1963280031Sdim const unsigned Reg; 1964224145Sdim 1965288943Sdim /// Reg (and therefore the values in this liverange) will end up as 1966288943Sdim /// subregister SubIdx in the coalesced register. Either CP.DstIdx or 1967288943Sdim /// CP.SrcIdx. 1968280031Sdim const unsigned SubIdx; 1969327952Sdim 1970288943Sdim /// The LaneMask that this liverange will occupy the coalesced register. May 1971288943Sdim /// be smaller than the lanemask produced by SubIdx when merging subranges. 1972296417Sdim const LaneBitmask LaneMask; 1973224145Sdim 1974280031Sdim /// This is true when joining sub register ranges, false when joining main 1975280031Sdim /// ranges. 1976280031Sdim const bool SubRangeJoin; 1977327952Sdim 1978280031Sdim /// Whether the current LiveInterval tracks subregister liveness. 1979280031Sdim const bool TrackSubRegLiveness; 1980280031Sdim 1981288943Sdim /// Values that will be present in the final live range. 1982243830Sdim SmallVectorImpl<VNInfo*> &NewVNInfo; 1983224145Sdim 1984243830Sdim const CoalescerPair &CP; 1985243830Sdim LiveIntervals *LIS; 1986243830Sdim SlotIndexes *Indexes; 1987243830Sdim const TargetRegisterInfo *TRI; 1988224145Sdim 1989288943Sdim /// Value number assignments. Maps value numbers in LI to entries in 1990288943Sdim /// NewVNInfo. This is suitable for passing to LiveInterval::join(). 1991243830Sdim SmallVector<int, 8> Assignments; 1992224145Sdim 1993288943Sdim /// Conflict resolution for overlapping values. 1994243830Sdim enum ConflictResolution { 1995288943Sdim /// No overlap, simply keep this value. 1996243830Sdim CR_Keep, 1997224145Sdim 1998288943Sdim /// Merge this value into OtherVNI and erase the defining instruction. 1999288943Sdim /// Used for IMPLICIT_DEF, coalescable copies, and copies from external 2000288943Sdim /// values. 2001243830Sdim CR_Erase, 2002224145Sdim 2003288943Sdim /// Merge this value into OtherVNI but keep the defining instruction. 2004288943Sdim /// This is for the special case where OtherVNI is defined by the same 2005288943Sdim /// instruction. 2006243830Sdim CR_Merge, 2007224145Sdim 2008288943Sdim /// Keep this value, and have it replace OtherVNI where possible. This 2009288943Sdim /// complicates value mapping since OtherVNI maps to two different values 2010288943Sdim /// before and after this def. 2011288943Sdim /// Used when clobbering undefined or dead lanes. 2012243830Sdim CR_Replace, 2013224145Sdim 2014288943Sdim /// Unresolved conflict. Visit later when all values have been mapped. 2015243830Sdim CR_Unresolved, 2016224145Sdim 2017288943Sdim /// Unresolvable conflict. Abort the join. 2018243830Sdim CR_Impossible 2019243830Sdim }; 2020224145Sdim 2021288943Sdim /// Per-value info for LI. The lane bit masks are all relative to the final 2022288943Sdim /// joined register, so they can be compared directly between SrcReg and 2023288943Sdim /// DstReg. 2024243830Sdim struct Val { 2025327952Sdim ConflictResolution Resolution = CR_Keep; 2026224145Sdim 2027288943Sdim /// Lanes written by this def, 0 for unanalyzed values. 2028296417Sdim LaneBitmask WriteLanes; 2029224145Sdim 2030288943Sdim /// Lanes with defined values in this register. Other lanes are undef and 2031288943Sdim /// safe to clobber. 2032296417Sdim LaneBitmask ValidLanes; 2033224145Sdim 2034288943Sdim /// Value in LI being redefined by this def. 2035327952Sdim VNInfo *RedefVNI = nullptr; 2036224145Sdim 2037288943Sdim /// Value in the other live range that overlaps this def, if any. 2038327952Sdim VNInfo *OtherVNI = nullptr; 2039239462Sdim 2040288943Sdim /// Is this value an IMPLICIT_DEF that can be erased? 2041288943Sdim /// 2042288943Sdim /// IMPLICIT_DEF values should only exist at the end of a basic block that 2043288943Sdim /// is a predecessor to a phi-value. These IMPLICIT_DEF instructions can be 2044288943Sdim /// safely erased if they are overlapping a live value in the other live 2045288943Sdim /// interval. 2046288943Sdim /// 2047288943Sdim /// Weird control flow graphs and incomplete PHI handling in 2048288943Sdim /// ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with 2049288943Sdim /// longer live ranges. Such IMPLICIT_DEF values should be treated like 2050288943Sdim /// normal values. 2051327952Sdim bool ErasableImplicitDef = false; 2052224145Sdim 2053288943Sdim /// True when the live range of this value will be pruned because of an 2054288943Sdim /// overlapping CR_Replace value in the other live range. 2055327952Sdim bool Pruned = false; 2056224145Sdim 2057288943Sdim /// True once Pruned above has been computed. 2058327952Sdim bool PrunedComputed = false; 2059224145Sdim 2060327952Sdim Val() = default; 2061224145Sdim 2062314564Sdim bool isAnalyzed() const { return WriteLanes.any(); } 2063243830Sdim }; 2064224145Sdim 2065288943Sdim /// One entry per value number in LI. 2066243830Sdim SmallVector<Val, 8> Vals; 2067224145Sdim 2068288943Sdim /// Compute the bitmask of lanes actually written by DefMI. 2069288943Sdim /// Set Redef if there are any partial register definitions that depend on the 2070288943Sdim /// previous value of the register. 2071296417Sdim LaneBitmask computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const; 2072288943Sdim 2073288943Sdim /// Find the ultimate value that VNI was copied from. 2074280031Sdim std::pair<const VNInfo*,unsigned> followCopyChain(const VNInfo *VNI) const; 2075288943Sdim 2076280031Sdim bool valuesIdentical(VNInfo *Val0, VNInfo *Val1, const JoinVals &Other) const; 2077288943Sdim 2078288943Sdim /// Analyze ValNo in this live range, and set all fields of Vals[ValNo]. 2079288943Sdim /// Return a conflict resolution when possible, but leave the hard cases as 2080288943Sdim /// CR_Unresolved. 2081288943Sdim /// Recursively calls computeAssignment() on this and Other, guaranteeing that 2082288943Sdim /// both OtherVNI and RedefVNI have been analyzed and mapped before returning. 2083288943Sdim /// The recursion always goes upwards in the dominator tree, making loops 2084288943Sdim /// impossible. 2085243830Sdim ConflictResolution analyzeValue(unsigned ValNo, JoinVals &Other); 2086288943Sdim 2087288943Sdim /// Compute the value assignment for ValNo in RI. 2088288943Sdim /// This may be called recursively by analyzeValue(), but never for a ValNo on 2089288943Sdim /// the stack. 2090243830Sdim void computeAssignment(unsigned ValNo, JoinVals &Other); 2091288943Sdim 2092288943Sdim /// Assuming ValNo is going to clobber some valid lanes in Other.LR, compute 2093288943Sdim /// the extent of the tainted lanes in the block. 2094288943Sdim /// 2095288943Sdim /// Multiple values in Other.LR can be affected since partial redefinitions 2096288943Sdim /// can preserve previously tainted lanes. 2097288943Sdim /// 2098288943Sdim /// 1 %dst = VLOAD <-- Define all lanes in %dst 2099288943Sdim /// 2 %src = FOO <-- ValNo to be joined with %dst:ssub0 2100288943Sdim /// 3 %dst:ssub1 = BAR <-- Partial redef doesn't clear taint in ssub0 2101288943Sdim /// 4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read 2102288943Sdim /// 2103288943Sdim /// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes) 2104288943Sdim /// entry to TaintedVals. 2105288943Sdim /// 2106288943Sdim /// Returns false if the tainted lanes extend beyond the basic block. 2107327952Sdim bool 2108327952Sdim taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other, 2109327952Sdim SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent); 2110288943Sdim 2111288943Sdim /// Return true if MI uses any of the given Lanes from Reg. 2112288943Sdim /// This does not include partial redefinitions of Reg. 2113309124Sdim bool usesLanes(const MachineInstr &MI, unsigned, unsigned, LaneBitmask) const; 2114288943Sdim 2115288943Sdim /// Determine if ValNo is a copy of a value number in LR or Other.LR that will 2116288943Sdim /// be pruned: 2117288943Sdim /// 2118288943Sdim /// %dst = COPY %src 2119288943Sdim /// %src = COPY %dst <-- This value to be pruned. 2120288943Sdim /// %dst = COPY %src <-- This value is a copy of a pruned value. 2121243830Sdim bool isPrunedValue(unsigned ValNo, JoinVals &Other); 2122243830Sdim 2123243830Sdimpublic: 2124296417Sdim JoinVals(LiveRange &LR, unsigned Reg, unsigned SubIdx, LaneBitmask LaneMask, 2125280031Sdim SmallVectorImpl<VNInfo*> &newVNInfo, const CoalescerPair &cp, 2126280031Sdim LiveIntervals *lis, const TargetRegisterInfo *TRI, bool SubRangeJoin, 2127280031Sdim bool TrackSubRegLiveness) 2128280031Sdim : LR(LR), Reg(Reg), SubIdx(SubIdx), LaneMask(LaneMask), 2129280031Sdim SubRangeJoin(SubRangeJoin), TrackSubRegLiveness(TrackSubRegLiveness), 2130280031Sdim NewVNInfo(newVNInfo), CP(cp), LIS(lis), Indexes(LIS->getSlotIndexes()), 2131327952Sdim TRI(TRI), Assignments(LR.getNumValNums(), -1), Vals(LR.getNumValNums()) {} 2132243830Sdim 2133280031Sdim /// Analyze defs in LR and compute a value mapping in NewVNInfo. 2134243830Sdim /// Returns false if any conflicts were impossible to resolve. 2135243830Sdim bool mapValues(JoinVals &Other); 2136243830Sdim 2137243830Sdim /// Try to resolve conflicts that require all values to be mapped. 2138243830Sdim /// Returns false if any conflicts were impossible to resolve. 2139243830Sdim bool resolveConflicts(JoinVals &Other); 2140243830Sdim 2141280031Sdim /// Prune the live range of values in Other.LR where they would conflict with 2142280031Sdim /// CR_Replace values in LR. Collect end points for restoring the live range 2143243830Sdim /// after joining. 2144280031Sdim void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints, 2145280031Sdim bool changeInstrs); 2146243830Sdim 2147288943Sdim /// Removes subranges starting at copies that get removed. This sometimes 2148288943Sdim /// happens when undefined subranges are copied around. These ranges contain 2149296417Sdim /// no useful information and can be removed. 2150296417Sdim void pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask); 2151280031Sdim 2152314564Sdim /// Pruning values in subranges can lead to removing segments in these 2153314564Sdim /// subranges started by IMPLICIT_DEFs. The corresponding segments in 2154314564Sdim /// the main range also need to be removed. This function will mark 2155314564Sdim /// the corresponding values in the main range as pruned, so that 2156314564Sdim /// eraseInstrs can do the final cleanup. 2157314564Sdim /// The parameter @p LI must be the interval whose main range is the 2158314564Sdim /// live range LR. 2159314564Sdim void pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange); 2160314564Sdim 2161243830Sdim /// Erase any machine instructions that have been coalesced away. 2162243830Sdim /// Add erased instructions to ErasedInstrs. 2163243830Sdim /// Add foreign virtual registers to ShrinkRegs if their live range ended at 2164243830Sdim /// the erased instrs. 2165280031Sdim void eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs, 2166314564Sdim SmallVectorImpl<unsigned> &ShrinkRegs, 2167314564Sdim LiveInterval *LI = nullptr); 2168243830Sdim 2169288943Sdim /// Remove liverange defs at places where implicit defs will be removed. 2170288943Sdim void removeImplicitDefs(); 2171288943Sdim 2172243830Sdim /// Get the value assignments suitable for passing to LiveInterval::join. 2173243830Sdim const int *getAssignments() const { return Assignments.data(); } 2174243830Sdim}; 2175327952Sdim 2176243830Sdim} // end anonymous namespace 2177243830Sdim 2178296417SdimLaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) 2179280031Sdim const { 2180314564Sdim LaneBitmask L; 2181288943Sdim for (const MachineOperand &MO : DefMI->operands()) { 2182288943Sdim if (!MO.isReg() || MO.getReg() != Reg || !MO.isDef()) 2183224145Sdim continue; 2184243830Sdim L |= TRI->getSubRegIndexLaneMask( 2185288943Sdim TRI->composeSubRegIndices(SubIdx, MO.getSubReg())); 2186288943Sdim if (MO.readsReg()) 2187243830Sdim Redef = true; 2188243830Sdim } 2189243830Sdim return L; 2190243830Sdim} 2191224145Sdim 2192280031Sdimstd::pair<const VNInfo*, unsigned> JoinVals::followCopyChain( 2193280031Sdim const VNInfo *VNI) const { 2194280031Sdim unsigned Reg = this->Reg; 2195280031Sdim 2196243830Sdim while (!VNI->isPHIDef()) { 2197280031Sdim SlotIndex Def = VNI->def; 2198280031Sdim MachineInstr *MI = Indexes->getInstructionFromIndex(Def); 2199243830Sdim assert(MI && "No defining instruction"); 2200243830Sdim if (!MI->isFullCopy()) 2201280031Sdim return std::make_pair(VNI, Reg); 2202280031Sdim unsigned SrcReg = MI->getOperand(1).getReg(); 2203280031Sdim if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) 2204280031Sdim return std::make_pair(VNI, Reg); 2205280031Sdim 2206280031Sdim const LiveInterval &LI = LIS->getInterval(SrcReg); 2207280031Sdim const VNInfo *ValueIn; 2208280031Sdim // No subrange involved. 2209280031Sdim if (!SubRangeJoin || !LI.hasSubRanges()) { 2210280031Sdim LiveQueryResult LRQ = LI.Query(Def); 2211280031Sdim ValueIn = LRQ.valueIn(); 2212280031Sdim } else { 2213280031Sdim // Query subranges. Pick the first matching one. 2214280031Sdim ValueIn = nullptr; 2215280031Sdim for (const LiveInterval::SubRange &S : LI.subranges()) { 2216280031Sdim // Transform lanemask to a mask in the joined live interval. 2217296417Sdim LaneBitmask SMask = TRI->composeSubRegIndexLaneMask(SubIdx, S.LaneMask); 2218314564Sdim if ((SMask & LaneMask).none()) 2219280031Sdim continue; 2220280031Sdim LiveQueryResult LRQ = S.Query(Def); 2221280031Sdim ValueIn = LRQ.valueIn(); 2222280031Sdim break; 2223280031Sdim } 2224280031Sdim } 2225280031Sdim if (ValueIn == nullptr) 2226243830Sdim break; 2227280031Sdim VNI = ValueIn; 2228280031Sdim Reg = SrcReg; 2229224145Sdim } 2230280031Sdim return std::make_pair(VNI, Reg); 2231243830Sdim} 2232224145Sdim 2233280031Sdimbool JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1, 2234280031Sdim const JoinVals &Other) const { 2235280031Sdim const VNInfo *Orig0; 2236280031Sdim unsigned Reg0; 2237280031Sdim std::tie(Orig0, Reg0) = followCopyChain(Value0); 2238280031Sdim if (Orig0 == Value1) 2239280031Sdim return true; 2240280031Sdim 2241280031Sdim const VNInfo *Orig1; 2242280031Sdim unsigned Reg1; 2243280031Sdim std::tie(Orig1, Reg1) = Other.followCopyChain(Value1); 2244280031Sdim 2245280031Sdim // The values are equal if they are defined at the same place and use the 2246280031Sdim // same register. Note that we cannot compare VNInfos directly as some of 2247280031Sdim // them might be from a copy created in mergeSubRangeInto() while the other 2248280031Sdim // is from the original LiveInterval. 2249280031Sdim return Orig0->def == Orig1->def && Reg0 == Reg1; 2250280031Sdim} 2251280031Sdim 2252243830SdimJoinVals::ConflictResolution 2253243830SdimJoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { 2254243830Sdim Val &V = Vals[ValNo]; 2255243830Sdim assert(!V.isAnalyzed() && "Value has already been analyzed!"); 2256280031Sdim VNInfo *VNI = LR.getValNumInfo(ValNo); 2257243830Sdim if (VNI->isUnused()) { 2258314564Sdim V.WriteLanes = LaneBitmask::getAll(); 2259243830Sdim return CR_Keep; 2260243830Sdim } 2261224145Sdim 2262243830Sdim // Get the instruction defining this value, compute the lanes written. 2263276479Sdim const MachineInstr *DefMI = nullptr; 2264243830Sdim if (VNI->isPHIDef()) { 2265243830Sdim // Conservatively assume that all lanes in a PHI are valid. 2266327952Sdim LaneBitmask Lanes = SubRangeJoin ? LaneBitmask::getLane(0) 2267314564Sdim : TRI->getSubRegIndexLaneMask(SubIdx); 2268280031Sdim V.ValidLanes = V.WriteLanes = Lanes; 2269243830Sdim } else { 2270243830Sdim DefMI = Indexes->getInstructionFromIndex(VNI->def); 2271280031Sdim assert(DefMI != nullptr); 2272280031Sdim if (SubRangeJoin) { 2273280031Sdim // We don't care about the lanes when joining subregister ranges. 2274327952Sdim V.WriteLanes = V.ValidLanes = LaneBitmask::getLane(0); 2275288943Sdim if (DefMI->isImplicitDef()) { 2276314564Sdim V.ValidLanes = LaneBitmask::getNone(); 2277288943Sdim V.ErasableImplicitDef = true; 2278288943Sdim } 2279280031Sdim } else { 2280280031Sdim bool Redef = false; 2281280031Sdim V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef); 2282224145Sdim 2283280031Sdim // If this is a read-modify-write instruction, there may be more valid 2284280031Sdim // lanes than the ones written by this instruction. 2285280031Sdim // This only covers partial redef operands. DefMI may have normal use 2286280031Sdim // operands reading the register. They don't contribute valid lanes. 2287280031Sdim // 2288280031Sdim // This adds ssub1 to the set of valid lanes in %src: 2289280031Sdim // 2290327952Sdim // %src:ssub1 = FOO 2291280031Sdim // 2292280031Sdim // This leaves only ssub1 valid, making any other lanes undef: 2293280031Sdim // 2294280031Sdim // %src:ssub1<def,read-undef> = FOO %src:ssub2 2295280031Sdim // 2296280031Sdim // The <read-undef> flag on the def operand means that old lane values are 2297280031Sdim // not important. 2298280031Sdim if (Redef) { 2299280031Sdim V.RedefVNI = LR.Query(VNI->def).valueIn(); 2300280031Sdim assert((TrackSubRegLiveness || V.RedefVNI) && 2301280031Sdim "Instruction is reading nonexistent value"); 2302280031Sdim if (V.RedefVNI != nullptr) { 2303280031Sdim computeAssignment(V.RedefVNI->id, Other); 2304280031Sdim V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes; 2305280031Sdim } 2306280031Sdim } 2307224145Sdim 2308280031Sdim // An IMPLICIT_DEF writes undef values. 2309280031Sdim if (DefMI->isImplicitDef()) { 2310280031Sdim // We normally expect IMPLICIT_DEF values to be live only until the end 2311280031Sdim // of their block. If the value is really live longer and gets pruned in 2312280031Sdim // another block, this flag is cleared again. 2313280031Sdim V.ErasableImplicitDef = true; 2314280031Sdim V.ValidLanes &= ~V.WriteLanes; 2315280031Sdim } 2316243830Sdim } 2317224145Sdim } 2318224145Sdim 2319243830Sdim // Find the value in Other that overlaps VNI->def, if any. 2320280031Sdim LiveQueryResult OtherLRQ = Other.LR.Query(VNI->def); 2321224145Sdim 2322243830Sdim // It is possible that both values are defined by the same instruction, or 2323243830Sdim // the values are PHIs defined in the same block. When that happens, the two 2324243830Sdim // values should be merged into one, but not into any preceding value. 2325243830Sdim // The first value defined or visited gets CR_Keep, the other gets CR_Merge. 2326243830Sdim if (VNInfo *OtherVNI = OtherLRQ.valueDefined()) { 2327243830Sdim assert(SlotIndex::isSameInstr(VNI->def, OtherVNI->def) && "Broken LRQ"); 2328243830Sdim 2329243830Sdim // One value stays, the other is merged. Keep the earlier one, or the first 2330243830Sdim // one we see. 2331243830Sdim if (OtherVNI->def < VNI->def) 2332243830Sdim Other.computeAssignment(OtherVNI->id, *this); 2333243830Sdim else if (VNI->def < OtherVNI->def && OtherLRQ.valueIn()) { 2334243830Sdim // This is an early-clobber def overlapping a live-in value in the other 2335243830Sdim // register. Not mergeable. 2336243830Sdim V.OtherVNI = OtherLRQ.valueIn(); 2337243830Sdim return CR_Impossible; 2338243830Sdim } 2339243830Sdim V.OtherVNI = OtherVNI; 2340243830Sdim Val &OtherV = Other.Vals[OtherVNI->id]; 2341243830Sdim // Keep this value, check for conflicts when analyzing OtherVNI. 2342243830Sdim if (!OtherV.isAnalyzed()) 2343243830Sdim return CR_Keep; 2344243830Sdim // Both sides have been analyzed now. 2345243830Sdim // Allow overlapping PHI values. Any real interference would show up in a 2346243830Sdim // predecessor, the PHI itself can't introduce any conflicts. 2347243830Sdim if (VNI->isPHIDef()) 2348243830Sdim return CR_Merge; 2349314564Sdim if ((V.ValidLanes & OtherV.ValidLanes).any()) 2350243830Sdim // Overlapping lanes can't be resolved. 2351243830Sdim return CR_Impossible; 2352243830Sdim else 2353243830Sdim return CR_Merge; 2354224145Sdim } 2355224145Sdim 2356243830Sdim // No simultaneous def. Is Other live at the def? 2357243830Sdim V.OtherVNI = OtherLRQ.valueIn(); 2358243830Sdim if (!V.OtherVNI) 2359243830Sdim // No overlap, no conflict. 2360243830Sdim return CR_Keep; 2361243830Sdim 2362243830Sdim assert(!SlotIndex::isSameInstr(VNI->def, V.OtherVNI->def) && "Broken LRQ"); 2363243830Sdim 2364243830Sdim // We have overlapping values, or possibly a kill of Other. 2365243830Sdim // Recursively compute assignments up the dominator tree. 2366243830Sdim Other.computeAssignment(V.OtherVNI->id, *this); 2367249423Sdim Val &OtherV = Other.Vals[V.OtherVNI->id]; 2368243830Sdim 2369249423Sdim // Check if OtherV is an IMPLICIT_DEF that extends beyond its basic block. 2370249423Sdim // This shouldn't normally happen, but ProcessImplicitDefs can leave such 2371249423Sdim // IMPLICIT_DEF instructions behind, and there is nothing wrong with it 2372249423Sdim // technically. 2373249423Sdim // 2374309124Sdim // When it happens, treat that IMPLICIT_DEF as a normal value, and don't try 2375249423Sdim // to erase the IMPLICIT_DEF instruction. 2376249423Sdim if (OtherV.ErasableImplicitDef && DefMI && 2377249423Sdim DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)) { 2378249423Sdim DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def 2379327952Sdim << " extends into " << printMBBReference(*DefMI->getParent()) 2380249423Sdim << ", keeping it.\n"); 2381249423Sdim OtherV.ErasableImplicitDef = false; 2382249423Sdim } 2383249423Sdim 2384243830Sdim // Allow overlapping PHI values. Any real interference would show up in a 2385243830Sdim // predecessor, the PHI itself can't introduce any conflicts. 2386243830Sdim if (VNI->isPHIDef()) 2387243830Sdim return CR_Replace; 2388243830Sdim 2389243830Sdim // Check for simple erasable conflicts. 2390280031Sdim if (DefMI->isImplicitDef()) { 2391280031Sdim // We need the def for the subregister if there is nothing else live at the 2392280031Sdim // subrange at this point. 2393280031Sdim if (TrackSubRegLiveness 2394314564Sdim && (V.WriteLanes & (OtherV.ValidLanes | OtherV.WriteLanes)).none()) 2395280031Sdim return CR_Replace; 2396243830Sdim return CR_Erase; 2397280031Sdim } 2398243830Sdim 2399243830Sdim // Include the non-conflict where DefMI is a coalescable copy that kills 2400243830Sdim // OtherVNI. We still want the copy erased and value numbers merged. 2401243830Sdim if (CP.isCoalescable(DefMI)) { 2402243830Sdim // Some of the lanes copied from OtherVNI may be undef, making them undef 2403243830Sdim // here too. 2404243830Sdim V.ValidLanes &= ~V.WriteLanes | OtherV.ValidLanes; 2405243830Sdim return CR_Erase; 2406224145Sdim } 2407224145Sdim 2408243830Sdim // This may not be a real conflict if DefMI simply kills Other and defines 2409243830Sdim // VNI. 2410243830Sdim if (OtherLRQ.isKill() && OtherLRQ.endPoint() <= VNI->def) 2411243830Sdim return CR_Keep; 2412224145Sdim 2413243830Sdim // Handle the case where VNI and OtherVNI can be proven to be identical: 2414243830Sdim // 2415243830Sdim // %other = COPY %ext 2416243830Sdim // %this = COPY %ext <-- Erase this copy 2417243830Sdim // 2418280031Sdim if (DefMI->isFullCopy() && !CP.isPartial() 2419280031Sdim && valuesIdentical(VNI, V.OtherVNI, Other)) 2420243830Sdim return CR_Erase; 2421239462Sdim 2422243830Sdim // If the lanes written by this instruction were all undef in OtherVNI, it is 2423243830Sdim // still safe to join the live ranges. This can't be done with a simple value 2424243830Sdim // mapping, though - OtherVNI will map to multiple values: 2425243830Sdim // 2426243830Sdim // 1 %dst:ssub0 = FOO <-- OtherVNI 2427243830Sdim // 2 %src = BAR <-- VNI 2428327952Sdim // 3 %dst:ssub1 = COPY killed %src <-- Eliminate this copy. 2429327952Sdim // 4 BAZ killed %dst 2430327952Sdim // 5 QUUX killed %src 2431243830Sdim // 2432243830Sdim // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace 2433243830Sdim // handles this complex value mapping. 2434314564Sdim if ((V.WriteLanes & OtherV.ValidLanes).none()) 2435243830Sdim return CR_Replace; 2436243830Sdim 2437243830Sdim // If the other live range is killed by DefMI and the live ranges are still 2438243830Sdim // overlapping, it must be because we're looking at an early clobber def: 2439243830Sdim // 2440327952Sdim // %dst<def,early-clobber> = ASM killed %src 2441243830Sdim // 2442243830Sdim // In this case, it is illegal to merge the two live ranges since the early 2443243830Sdim // clobber def would clobber %src before it was read. 2444243830Sdim if (OtherLRQ.isKill()) { 2445243830Sdim // This case where the def doesn't overlap the kill is handled above. 2446243830Sdim assert(VNI->def.isEarlyClobber() && 2447243830Sdim "Only early clobber defs can overlap a kill"); 2448243830Sdim return CR_Impossible; 2449243830Sdim } 2450243830Sdim 2451243830Sdim // VNI is clobbering live lanes in OtherVNI, but there is still the 2452243830Sdim // possibility that no instructions actually read the clobbered lanes. 2453243830Sdim // If we're clobbering all the lanes in OtherVNI, at least one must be read. 2454280031Sdim // Otherwise Other.RI wouldn't be live here. 2455314564Sdim if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes).none()) 2456243830Sdim return CR_Impossible; 2457243830Sdim 2458243830Sdim // We need to verify that no instructions are reading the clobbered lanes. To 2459243830Sdim // save compile time, we'll only check that locally. Don't allow the tainted 2460243830Sdim // value to escape the basic block. 2461243830Sdim MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def); 2462243830Sdim if (OtherLRQ.endPoint() >= Indexes->getMBBEndIdx(MBB)) 2463243830Sdim return CR_Impossible; 2464243830Sdim 2465243830Sdim // There are still some things that could go wrong besides clobbered lanes 2466243830Sdim // being read, for example OtherVNI may be only partially redefined in MBB, 2467243830Sdim // and some clobbered lanes could escape the block. Save this analysis for 2468243830Sdim // resolveConflicts() when all values have been mapped. We need to know 2469243830Sdim // RedefVNI and WriteLanes for any later defs in MBB, and we can't compute 2470243830Sdim // that now - the recursive analyzeValue() calls must go upwards in the 2471243830Sdim // dominator tree. 2472243830Sdim return CR_Unresolved; 2473243830Sdim} 2474243830Sdim 2475243830Sdimvoid JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) { 2476243830Sdim Val &V = Vals[ValNo]; 2477243830Sdim if (V.isAnalyzed()) { 2478243830Sdim // Recursion should always move up the dominator tree, so ValNo is not 2479243830Sdim // supposed to reappear before it has been assigned. 2480243830Sdim assert(Assignments[ValNo] != -1 && "Bad recursion?"); 2481243830Sdim return; 2482243830Sdim } 2483243830Sdim switch ((V.Resolution = analyzeValue(ValNo, Other))) { 2484243830Sdim case CR_Erase: 2485243830Sdim case CR_Merge: 2486243830Sdim // Merge this ValNo into OtherVNI. 2487243830Sdim assert(V.OtherVNI && "OtherVNI not assigned, can't merge."); 2488243830Sdim assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion"); 2489243830Sdim Assignments[ValNo] = Other.Assignments[V.OtherVNI->id]; 2490327952Sdim DEBUG(dbgs() << "\t\tmerge " << printReg(Reg) << ':' << ValNo << '@' 2491280031Sdim << LR.getValNumInfo(ValNo)->def << " into " 2492327952Sdim << printReg(Other.Reg) << ':' << V.OtherVNI->id << '@' 2493243830Sdim << V.OtherVNI->def << " --> @" 2494243830Sdim << NewVNInfo[Assignments[ValNo]]->def << '\n'); 2495243830Sdim break; 2496243830Sdim case CR_Replace: 2497280031Sdim case CR_Unresolved: { 2498243830Sdim // The other value is going to be pruned if this join is successful. 2499243830Sdim assert(V.OtherVNI && "OtherVNI not assigned, can't prune"); 2500280031Sdim Val &OtherV = Other.Vals[V.OtherVNI->id]; 2501280031Sdim // We cannot erase an IMPLICIT_DEF if we don't have valid values for all 2502280031Sdim // its lanes. 2503314564Sdim if ((OtherV.WriteLanes & ~V.ValidLanes).any() && TrackSubRegLiveness) 2504280031Sdim OtherV.ErasableImplicitDef = false; 2505280031Sdim OtherV.Pruned = true; 2506314564Sdim LLVM_FALLTHROUGH; 2507280031Sdim } 2508243830Sdim default: 2509243830Sdim // This value number needs to go in the final joined live range. 2510243830Sdim Assignments[ValNo] = NewVNInfo.size(); 2511280031Sdim NewVNInfo.push_back(LR.getValNumInfo(ValNo)); 2512243830Sdim break; 2513243830Sdim } 2514243830Sdim} 2515243830Sdim 2516243830Sdimbool JoinVals::mapValues(JoinVals &Other) { 2517280031Sdim for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { 2518243830Sdim computeAssignment(i, Other); 2519243830Sdim if (Vals[i].Resolution == CR_Impossible) { 2520327952Sdim DEBUG(dbgs() << "\t\tinterference at " << printReg(Reg) << ':' << i 2521280031Sdim << '@' << LR.getValNumInfo(i)->def << '\n'); 2522243830Sdim return false; 2523224145Sdim } 2524224145Sdim } 2525243830Sdim return true; 2526243830Sdim} 2527224145Sdim 2528243830Sdimbool JoinVals:: 2529296417SdimtaintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other, 2530327952Sdim SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent) { 2531280031Sdim VNInfo *VNI = LR.getValNumInfo(ValNo); 2532243830Sdim MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def); 2533243830Sdim SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB); 2534239462Sdim 2535280031Sdim // Scan Other.LR from VNI.def to MBBEnd. 2536280031Sdim LiveInterval::iterator OtherI = Other.LR.find(VNI->def); 2537280031Sdim assert(OtherI != Other.LR.end() && "No conflict?"); 2538243830Sdim do { 2539243830Sdim // OtherI is pointing to a tainted value. Abort the join if the tainted 2540243830Sdim // lanes escape the block. 2541243830Sdim SlotIndex End = OtherI->end; 2542243830Sdim if (End >= MBBEnd) { 2543327952Sdim DEBUG(dbgs() << "\t\ttaints global " << printReg(Other.Reg) << ':' 2544243830Sdim << OtherI->valno->id << '@' << OtherI->start << '\n'); 2545243830Sdim return false; 2546224145Sdim } 2547327952Sdim DEBUG(dbgs() << "\t\ttaints local " << printReg(Other.Reg) << ':' 2548243830Sdim << OtherI->valno->id << '@' << OtherI->start 2549243830Sdim << " to " << End << '\n'); 2550243830Sdim // A dead def is not a problem. 2551243830Sdim if (End.isDead()) 2552243830Sdim break; 2553243830Sdim TaintExtent.push_back(std::make_pair(End, TaintedLanes)); 2554224145Sdim 2555243830Sdim // Check for another def in the MBB. 2556280031Sdim if (++OtherI == Other.LR.end() || OtherI->start >= MBBEnd) 2557243830Sdim break; 2558224145Sdim 2559243830Sdim // Lanes written by the new def are no longer tainted. 2560243830Sdim const Val &OV = Other.Vals[OtherI->valno->id]; 2561243830Sdim TaintedLanes &= ~OV.WriteLanes; 2562243830Sdim if (!OV.RedefVNI) 2563243830Sdim break; 2564314564Sdim } while (TaintedLanes.any()); 2565243830Sdim return true; 2566243830Sdim} 2567224145Sdim 2568309124Sdimbool JoinVals::usesLanes(const MachineInstr &MI, unsigned Reg, unsigned SubIdx, 2569296417Sdim LaneBitmask Lanes) const { 2570309124Sdim if (MI.isDebugValue()) 2571243830Sdim return false; 2572309124Sdim for (const MachineOperand &MO : MI.operands()) { 2573288943Sdim if (!MO.isReg() || MO.isDef() || MO.getReg() != Reg) 2574239462Sdim continue; 2575288943Sdim if (!MO.readsReg()) 2576243830Sdim continue; 2577314564Sdim unsigned S = TRI->composeSubRegIndices(SubIdx, MO.getSubReg()); 2578314564Sdim if ((Lanes & TRI->getSubRegIndexLaneMask(S)).any()) 2579243830Sdim return true; 2580239462Sdim } 2581243830Sdim return false; 2582243830Sdim} 2583239462Sdim 2584243830Sdimbool JoinVals::resolveConflicts(JoinVals &Other) { 2585280031Sdim for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { 2586243830Sdim Val &V = Vals[i]; 2587327952Sdim assert(V.Resolution != CR_Impossible && "Unresolvable conflict"); 2588243830Sdim if (V.Resolution != CR_Unresolved) 2589239462Sdim continue; 2590327952Sdim DEBUG(dbgs() << "\t\tconflict at " << printReg(Reg) << ':' << i 2591280031Sdim << '@' << LR.getValNumInfo(i)->def << '\n'); 2592280031Sdim if (SubRangeJoin) 2593280031Sdim return false; 2594280031Sdim 2595243830Sdim ++NumLaneConflicts; 2596243830Sdim assert(V.OtherVNI && "Inconsistent conflict resolution."); 2597280031Sdim VNInfo *VNI = LR.getValNumInfo(i); 2598243830Sdim const Val &OtherV = Other.Vals[V.OtherVNI->id]; 2599224145Sdim 2600243830Sdim // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the 2601243830Sdim // join, those lanes will be tainted with a wrong value. Get the extent of 2602243830Sdim // the tainted lanes. 2603296417Sdim LaneBitmask TaintedLanes = V.WriteLanes & OtherV.ValidLanes; 2604296417Sdim SmallVector<std::pair<SlotIndex, LaneBitmask>, 8> TaintExtent; 2605243830Sdim if (!taintExtent(i, TaintedLanes, Other, TaintExtent)) 2606243830Sdim // Tainted lanes would extend beyond the basic block. 2607243830Sdim return false; 2608243830Sdim 2609243830Sdim assert(!TaintExtent.empty() && "There should be at least one conflict."); 2610243830Sdim 2611243830Sdim // Now look at the instructions from VNI->def to TaintExtent (inclusive). 2612243830Sdim MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def); 2613243830Sdim MachineBasicBlock::iterator MI = MBB->begin(); 2614243830Sdim if (!VNI->isPHIDef()) { 2615243830Sdim MI = Indexes->getInstructionFromIndex(VNI->def); 2616243830Sdim // No need to check the instruction defining VNI for reads. 2617243830Sdim ++MI; 2618239462Sdim } 2619243830Sdim assert(!SlotIndex::isSameInstr(VNI->def, TaintExtent.front().first) && 2620243830Sdim "Interference ends on VNI->def. Should have been handled earlier"); 2621243830Sdim MachineInstr *LastMI = 2622243830Sdim Indexes->getInstructionFromIndex(TaintExtent.front().first); 2623243830Sdim assert(LastMI && "Range must end at a proper instruction"); 2624243830Sdim unsigned TaintNum = 0; 2625327952Sdim while (true) { 2626243830Sdim assert(MI != MBB->end() && "Bad LastMI"); 2627309124Sdim if (usesLanes(*MI, Other.Reg, Other.SubIdx, TaintedLanes)) { 2628243830Sdim DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI); 2629243830Sdim return false; 2630243830Sdim } 2631243830Sdim // LastMI is the last instruction to use the current value. 2632243830Sdim if (&*MI == LastMI) { 2633243830Sdim if (++TaintNum == TaintExtent.size()) 2634243830Sdim break; 2635243830Sdim LastMI = Indexes->getInstructionFromIndex(TaintExtent[TaintNum].first); 2636243830Sdim assert(LastMI && "Range must end at a proper instruction"); 2637243830Sdim TaintedLanes = TaintExtent[TaintNum].second; 2638243830Sdim } 2639243830Sdim ++MI; 2640243830Sdim } 2641243830Sdim 2642243830Sdim // The tainted lanes are unused. 2643243830Sdim V.Resolution = CR_Replace; 2644243830Sdim ++NumLaneResolves; 2645224145Sdim } 2646243830Sdim return true; 2647243830Sdim} 2648224145Sdim 2649243830Sdimbool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) { 2650243830Sdim Val &V = Vals[ValNo]; 2651243830Sdim if (V.Pruned || V.PrunedComputed) 2652243830Sdim return V.Pruned; 2653243830Sdim 2654243830Sdim if (V.Resolution != CR_Erase && V.Resolution != CR_Merge) 2655243830Sdim return V.Pruned; 2656243830Sdim 2657243830Sdim // Follow copies up the dominator tree and check if any intermediate value 2658243830Sdim // has been pruned. 2659243830Sdim V.PrunedComputed = true; 2660243830Sdim V.Pruned = Other.isPrunedValue(V.OtherVNI->id, *this); 2661243830Sdim return V.Pruned; 2662243830Sdim} 2663243830Sdim 2664243830Sdimvoid JoinVals::pruneValues(JoinVals &Other, 2665280031Sdim SmallVectorImpl<SlotIndex> &EndPoints, 2666280031Sdim bool changeInstrs) { 2667280031Sdim for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { 2668280031Sdim SlotIndex Def = LR.getValNumInfo(i)->def; 2669243830Sdim switch (Vals[i].Resolution) { 2670243830Sdim case CR_Keep: 2671243830Sdim break; 2672243830Sdim case CR_Replace: { 2673280031Sdim // This value takes precedence over the value in Other.LR. 2674280031Sdim LIS->pruneValue(Other.LR, Def, &EndPoints); 2675243830Sdim // Check if we're replacing an IMPLICIT_DEF value. The IMPLICIT_DEF 2676243830Sdim // instructions are only inserted to provide a live-out value for PHI 2677243830Sdim // predecessors, so the instruction should simply go away once its value 2678243830Sdim // has been replaced. 2679243830Sdim Val &OtherV = Other.Vals[Vals[i].OtherVNI->id]; 2680249423Sdim bool EraseImpDef = OtherV.ErasableImplicitDef && 2681249423Sdim OtherV.Resolution == CR_Keep; 2682243830Sdim if (!Def.isBlock()) { 2683280031Sdim if (changeInstrs) { 2684280031Sdim // Remove <def,read-undef> flags. This def is now a partial redef. 2685327952Sdim // Also remove dead flags since the joined live range will 2686280031Sdim // continue past this instruction. 2687288943Sdim for (MachineOperand &MO : 2688288943Sdim Indexes->getInstructionFromIndex(Def)->operands()) { 2689288943Sdim if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) { 2690327952Sdim if (MO.getSubReg() != 0 && MO.isUndef() && !EraseImpDef) 2691327952Sdim MO.setIsUndef(false); 2692288943Sdim MO.setIsDead(false); 2693280031Sdim } 2694243830Sdim } 2695280031Sdim } 2696243830Sdim // This value will reach instructions below, but we need to make sure 2697243830Sdim // the live range also reaches the instruction at Def. 2698243830Sdim if (!EraseImpDef) 2699243830Sdim EndPoints.push_back(Def); 2700243830Sdim } 2701327952Sdim DEBUG(dbgs() << "\t\tpruned " << printReg(Other.Reg) << " at " << Def 2702280031Sdim << ": " << Other.LR << '\n'); 2703243830Sdim break; 2704243830Sdim } 2705243830Sdim case CR_Erase: 2706243830Sdim case CR_Merge: 2707243830Sdim if (isPrunedValue(i, Other)) { 2708280031Sdim // This value is ultimately a copy of a pruned value in LR or Other.LR. 2709243830Sdim // We can no longer trust the value mapping computed by 2710243830Sdim // computeAssignment(), the value that was originally copied could have 2711243830Sdim // been replaced. 2712280031Sdim LIS->pruneValue(LR, Def, &EndPoints); 2713327952Sdim DEBUG(dbgs() << "\t\tpruned all of " << printReg(Reg) << " at " 2714280031Sdim << Def << ": " << LR << '\n'); 2715243830Sdim } 2716243830Sdim break; 2717243830Sdim case CR_Unresolved: 2718243830Sdim case CR_Impossible: 2719243830Sdim llvm_unreachable("Unresolved conflicts"); 2720243830Sdim } 2721224145Sdim } 2722243830Sdim} 2723224145Sdim 2724314564Sdimvoid JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) { 2725280031Sdim // Look for values being erased. 2726280031Sdim bool DidPrune = false; 2727280031Sdim for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { 2728321369Sdim // We should trigger in all cases in which eraseInstrs() does something. 2729321369Sdim // match what eraseInstrs() is doing, print a message so 2730321369Sdim if (Vals[i].Resolution != CR_Erase && 2731321369Sdim (Vals[i].Resolution != CR_Keep || !Vals[i].ErasableImplicitDef || 2732321369Sdim !Vals[i].Pruned)) 2733280031Sdim continue; 2734280031Sdim 2735280031Sdim // Check subranges at the point where the copy will be removed. 2736280031Sdim SlotIndex Def = LR.getValNumInfo(i)->def; 2737321369Sdim // Print message so mismatches with eraseInstrs() can be diagnosed. 2738321369Sdim DEBUG(dbgs() << "\t\tExpecting instruction removal at " << Def << '\n'); 2739280031Sdim for (LiveInterval::SubRange &S : LI.subranges()) { 2740280031Sdim LiveQueryResult Q = S.Query(Def); 2741280031Sdim 2742280031Sdim // If a subrange starts at the copy then an undefined value has been 2743280031Sdim // copied and we must remove that subrange value as well. 2744280031Sdim VNInfo *ValueOut = Q.valueOutOrDead(); 2745280031Sdim if (ValueOut != nullptr && Q.valueIn() == nullptr) { 2746296417Sdim DEBUG(dbgs() << "\t\tPrune sublane " << PrintLaneMask(S.LaneMask) 2747280031Sdim << " at " << Def << "\n"); 2748280031Sdim LIS->pruneValue(S, Def, nullptr); 2749280031Sdim DidPrune = true; 2750280031Sdim // Mark value number as unused. 2751280031Sdim ValueOut->markUnused(); 2752280031Sdim continue; 2753280031Sdim } 2754280031Sdim // If a subrange ends at the copy, then a value was copied but only 2755296417Sdim // partially used later. Shrink the subregister range appropriately. 2756280031Sdim if (Q.valueIn() != nullptr && Q.valueOut() == nullptr) { 2757296417Sdim DEBUG(dbgs() << "\t\tDead uses at sublane " << PrintLaneMask(S.LaneMask) 2758296417Sdim << " at " << Def << "\n"); 2759280031Sdim ShrinkMask |= S.LaneMask; 2760280031Sdim } 2761280031Sdim } 2762280031Sdim } 2763280031Sdim if (DidPrune) 2764280031Sdim LI.removeEmptySubRanges(); 2765280031Sdim} 2766280031Sdim 2767314564Sdim/// Check if any of the subranges of @p LI contain a definition at @p Def. 2768314564Sdimstatic bool isDefInSubRange(LiveInterval &LI, SlotIndex Def) { 2769314564Sdim for (LiveInterval::SubRange &SR : LI.subranges()) { 2770314564Sdim if (VNInfo *VNI = SR.Query(Def).valueOutOrDead()) 2771314564Sdim if (VNI->def == Def) 2772314564Sdim return true; 2773314564Sdim } 2774314564Sdim return false; 2775314564Sdim} 2776314564Sdim 2777314564Sdimvoid JoinVals::pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange) { 2778314564Sdim assert(&static_cast<LiveRange&>(LI) == &LR); 2779314564Sdim 2780314564Sdim for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { 2781314564Sdim if (Vals[i].Resolution != CR_Keep) 2782314564Sdim continue; 2783314564Sdim VNInfo *VNI = LR.getValNumInfo(i); 2784314564Sdim if (VNI->isUnused() || VNI->isPHIDef() || isDefInSubRange(LI, VNI->def)) 2785314564Sdim continue; 2786314564Sdim Vals[i].Pruned = true; 2787314564Sdim ShrinkMainRange = true; 2788314564Sdim } 2789314564Sdim} 2790314564Sdim 2791288943Sdimvoid JoinVals::removeImplicitDefs() { 2792288943Sdim for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { 2793288943Sdim Val &V = Vals[i]; 2794288943Sdim if (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned) 2795288943Sdim continue; 2796288943Sdim 2797288943Sdim VNInfo *VNI = LR.getValNumInfo(i); 2798288943Sdim VNI->markUnused(); 2799288943Sdim LR.removeValNo(VNI); 2800288943Sdim } 2801288943Sdim} 2802288943Sdim 2803280031Sdimvoid JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs, 2804314564Sdim SmallVectorImpl<unsigned> &ShrinkRegs, 2805314564Sdim LiveInterval *LI) { 2806280031Sdim for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { 2807243830Sdim // Get the def location before markUnused() below invalidates it. 2808280031Sdim SlotIndex Def = LR.getValNumInfo(i)->def; 2809243830Sdim switch (Vals[i].Resolution) { 2810288943Sdim case CR_Keep: { 2811243830Sdim // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any 2812243830Sdim // longer. The IMPLICIT_DEF instructions are only inserted by 2813243830Sdim // PHIElimination to guarantee that all PHI predecessors have a value. 2814249423Sdim if (!Vals[i].ErasableImplicitDef || !Vals[i].Pruned) 2815243830Sdim break; 2816288943Sdim // Remove value number i from LR. 2817314564Sdim // For intervals with subranges, removing a segment from the main range 2818314564Sdim // may require extending the previous segment: for each definition of 2819314564Sdim // a subregister, there will be a corresponding def in the main range. 2820314564Sdim // That def may fall in the middle of a segment from another subrange. 2821314564Sdim // In such cases, removing this def from the main range must be 2822314564Sdim // complemented by extending the main range to account for the liveness 2823314564Sdim // of the other subrange. 2824288943Sdim VNInfo *VNI = LR.getValNumInfo(i); 2825314564Sdim SlotIndex Def = VNI->def; 2826314564Sdim // The new end point of the main range segment to be extended. 2827314564Sdim SlotIndex NewEnd; 2828314564Sdim if (LI != nullptr) { 2829314564Sdim LiveRange::iterator I = LR.FindSegmentContaining(Def); 2830314564Sdim assert(I != LR.end()); 2831314564Sdim // Do not extend beyond the end of the segment being removed. 2832314564Sdim // The segment may have been pruned in preparation for joining 2833314564Sdim // live ranges. 2834314564Sdim NewEnd = I->end; 2835314564Sdim } 2836314564Sdim 2837288943Sdim LR.removeValNo(VNI); 2838288943Sdim // Note that this VNInfo is reused and still referenced in NewVNInfo, 2839288943Sdim // make it appear like an unused value number. 2840288943Sdim VNI->markUnused(); 2841314564Sdim 2842314564Sdim if (LI != nullptr && LI->hasSubRanges()) { 2843314564Sdim assert(static_cast<LiveRange*>(LI) == &LR); 2844314564Sdim // Determine the end point based on the subrange information: 2845314564Sdim // minimum of (earliest def of next segment, 2846314564Sdim // latest end point of containing segment) 2847314564Sdim SlotIndex ED, LE; 2848314564Sdim for (LiveInterval::SubRange &SR : LI->subranges()) { 2849314564Sdim LiveRange::iterator I = SR.find(Def); 2850314564Sdim if (I == SR.end()) 2851314564Sdim continue; 2852314564Sdim if (I->start > Def) 2853314564Sdim ED = ED.isValid() ? std::min(ED, I->start) : I->start; 2854314564Sdim else 2855314564Sdim LE = LE.isValid() ? std::max(LE, I->end) : I->end; 2856314564Sdim } 2857314564Sdim if (LE.isValid()) 2858314564Sdim NewEnd = std::min(NewEnd, LE); 2859314564Sdim if (ED.isValid()) 2860314564Sdim NewEnd = std::min(NewEnd, ED); 2861314564Sdim 2862314564Sdim // We only want to do the extension if there was a subrange that 2863314564Sdim // was live across Def. 2864314564Sdim if (LE.isValid()) { 2865314564Sdim LiveRange::iterator S = LR.find(Def); 2866314564Sdim if (S != LR.begin()) 2867314564Sdim std::prev(S)->end = NewEnd; 2868314564Sdim } 2869314564Sdim } 2870314564Sdim DEBUG({ 2871314564Sdim dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LR << '\n'; 2872314564Sdim if (LI != nullptr) 2873314564Sdim dbgs() << "\t\t LHS = " << *LI << '\n'; 2874314564Sdim }); 2875314564Sdim LLVM_FALLTHROUGH; 2876288943Sdim } 2877243830Sdim 2878243830Sdim case CR_Erase: { 2879243830Sdim MachineInstr *MI = Indexes->getInstructionFromIndex(Def); 2880243830Sdim assert(MI && "No instruction to erase"); 2881243830Sdim if (MI->isCopy()) { 2882243830Sdim unsigned Reg = MI->getOperand(1).getReg(); 2883243830Sdim if (TargetRegisterInfo::isVirtualRegister(Reg) && 2884243830Sdim Reg != CP.getSrcReg() && Reg != CP.getDstReg()) 2885243830Sdim ShrinkRegs.push_back(Reg); 2886243830Sdim } 2887243830Sdim ErasedInstrs.insert(MI); 2888243830Sdim DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI); 2889309124Sdim LIS->RemoveMachineInstrFromMaps(*MI); 2890243830Sdim MI->eraseFromParent(); 2891243830Sdim break; 2892243830Sdim } 2893243830Sdim default: 2894243830Sdim break; 2895243830Sdim } 2896243830Sdim } 2897243830Sdim} 2898243830Sdim 2899296417Sdimvoid RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, 2900296417Sdim LaneBitmask LaneMask, 2901280031Sdim const CoalescerPair &CP) { 2902280031Sdim SmallVector<VNInfo*, 16> NewVNInfo; 2903280031Sdim JoinVals RHSVals(RRange, CP.getSrcReg(), CP.getSrcIdx(), LaneMask, 2904280031Sdim NewVNInfo, CP, LIS, TRI, true, true); 2905280031Sdim JoinVals LHSVals(LRange, CP.getDstReg(), CP.getDstIdx(), LaneMask, 2906280031Sdim NewVNInfo, CP, LIS, TRI, true, true); 2907280031Sdim 2908288943Sdim // Compute NewVNInfo and resolve conflicts (see also joinVirtRegs()) 2909288943Sdim // We should be able to resolve all conflicts here as we could successfully do 2910288943Sdim // it on the mainrange already. There is however a problem when multiple 2911288943Sdim // ranges get mapped to the "overflow" lane mask bit which creates unexpected 2912288943Sdim // interferences. 2913288943Sdim if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) { 2914296417Sdim // We already determined that it is legal to merge the intervals, so this 2915296417Sdim // should never fail. 2916296417Sdim llvm_unreachable("*** Couldn't join subrange!\n"); 2917288943Sdim } 2918288943Sdim if (!LHSVals.resolveConflicts(RHSVals) || 2919288943Sdim !RHSVals.resolveConflicts(LHSVals)) { 2920296417Sdim // We already determined that it is legal to merge the intervals, so this 2921296417Sdim // should never fail. 2922296417Sdim llvm_unreachable("*** Couldn't join subrange!\n"); 2923288943Sdim } 2924280031Sdim 2925280031Sdim // The merging algorithm in LiveInterval::join() can't handle conflicting 2926280031Sdim // value mappings, so we need to remove any live ranges that overlap a 2927280031Sdim // CR_Replace resolution. Collect a set of end points that can be used to 2928280031Sdim // restore the live range after joining. 2929280031Sdim SmallVector<SlotIndex, 8> EndPoints; 2930280031Sdim LHSVals.pruneValues(RHSVals, EndPoints, false); 2931280031Sdim RHSVals.pruneValues(LHSVals, EndPoints, false); 2932280031Sdim 2933288943Sdim LHSVals.removeImplicitDefs(); 2934288943Sdim RHSVals.removeImplicitDefs(); 2935288943Sdim 2936280031Sdim LRange.verify(); 2937280031Sdim RRange.verify(); 2938280031Sdim 2939280031Sdim // Join RRange into LHS. 2940280031Sdim LRange.join(RRange, LHSVals.getAssignments(), RHSVals.getAssignments(), 2941280031Sdim NewVNInfo); 2942280031Sdim 2943280031Sdim DEBUG(dbgs() << "\t\tjoined lanes: " << LRange << "\n"); 2944280031Sdim if (EndPoints.empty()) 2945296417Sdim return; 2946280031Sdim 2947280031Sdim // Recompute the parts of the live range we had to remove because of 2948280031Sdim // CR_Replace conflicts. 2949314564Sdim DEBUG({ 2950314564Sdim dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: "; 2951314564Sdim for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) { 2952314564Sdim dbgs() << EndPoints[i]; 2953314564Sdim if (i != n-1) 2954314564Sdim dbgs() << ','; 2955314564Sdim } 2956314564Sdim dbgs() << ": " << LRange << '\n'; 2957314564Sdim }); 2958280031Sdim LIS->extendToIndices(LRange, EndPoints); 2959280031Sdim} 2960280031Sdim 2961296417Sdimvoid RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI, 2962280031Sdim const LiveRange &ToMerge, 2963296417Sdim LaneBitmask LaneMask, 2964296417Sdim CoalescerPair &CP) { 2965280031Sdim BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); 2966321369Sdim LI.refineSubRanges(Allocator, LaneMask, 2967321369Sdim [this,&Allocator,&ToMerge,&CP](LiveInterval::SubRange &SR) { 2968321369Sdim if (SR.empty()) { 2969321369Sdim SR.assign(ToMerge, Allocator); 2970280031Sdim } else { 2971321369Sdim // joinSubRegRange() destroys the merged range, so we need a copy. 2972321369Sdim LiveRange RangeCopy(ToMerge, Allocator); 2973321369Sdim joinSubRegRanges(SR, RangeCopy, SR.LaneMask, CP); 2974280031Sdim } 2975321369Sdim }); 2976280031Sdim} 2977280031Sdim 2978243830Sdimbool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { 2979243830Sdim SmallVector<VNInfo*, 16> NewVNInfo; 2980243830Sdim LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); 2981243830Sdim LiveInterval &LHS = LIS->getInterval(CP.getDstReg()); 2982288943Sdim bool TrackSubRegLiveness = MRI->shouldTrackSubRegLiveness(*CP.getNewRC()); 2983314564Sdim JoinVals RHSVals(RHS, CP.getSrcReg(), CP.getSrcIdx(), LaneBitmask::getNone(), 2984314564Sdim NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness); 2985314564Sdim JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), LaneBitmask::getNone(), 2986314564Sdim NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness); 2987243830Sdim 2988261991Sdim DEBUG(dbgs() << "\t\tRHS = " << RHS 2989261991Sdim << "\n\t\tLHS = " << LHS 2990243830Sdim << '\n'); 2991243830Sdim 2992243830Sdim // First compute NewVNInfo and the simple value mappings. 2993243830Sdim // Detect impossible conflicts early. 2994243830Sdim if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) 2995243830Sdim return false; 2996243830Sdim 2997243830Sdim // Some conflicts can only be resolved after all values have been mapped. 2998243830Sdim if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals)) 2999243830Sdim return false; 3000243830Sdim 3001243830Sdim // All clear, the live ranges can be merged. 3002280031Sdim if (RHS.hasSubRanges() || LHS.hasSubRanges()) { 3003280031Sdim BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); 3004243830Sdim 3005280031Sdim // Transform lanemasks from the LHS to masks in the coalesced register and 3006280031Sdim // create initial subranges if necessary. 3007280031Sdim unsigned DstIdx = CP.getDstIdx(); 3008280031Sdim if (!LHS.hasSubRanges()) { 3009296417Sdim LaneBitmask Mask = DstIdx == 0 ? CP.getNewRC()->getLaneMask() 3010296417Sdim : TRI->getSubRegIndexLaneMask(DstIdx); 3011280031Sdim // LHS must support subregs or we wouldn't be in this codepath. 3012314564Sdim assert(Mask.any()); 3013280031Sdim LHS.createSubRangeFrom(Allocator, Mask, LHS); 3014280031Sdim } else if (DstIdx != 0) { 3015280031Sdim // Transform LHS lanemasks to new register class if necessary. 3016280031Sdim for (LiveInterval::SubRange &R : LHS.subranges()) { 3017296417Sdim LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(DstIdx, R.LaneMask); 3018280031Sdim R.LaneMask = Mask; 3019280031Sdim } 3020280031Sdim } 3021327952Sdim DEBUG(dbgs() << "\t\tLHST = " << printReg(CP.getDstReg()) 3022280031Sdim << ' ' << LHS << '\n'); 3023280031Sdim 3024280031Sdim // Determine lanemasks of RHS in the coalesced register and merge subranges. 3025280031Sdim unsigned SrcIdx = CP.getSrcIdx(); 3026280031Sdim if (!RHS.hasSubRanges()) { 3027296417Sdim LaneBitmask Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask() 3028296417Sdim : TRI->getSubRegIndexLaneMask(SrcIdx); 3029296417Sdim mergeSubRangeInto(LHS, RHS, Mask, CP); 3030280031Sdim } else { 3031280031Sdim // Pair up subranges and merge. 3032280031Sdim for (LiveInterval::SubRange &R : RHS.subranges()) { 3033296417Sdim LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask); 3034296417Sdim mergeSubRangeInto(LHS, R, Mask, CP); 3035280031Sdim } 3036280031Sdim } 3037296417Sdim DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n"); 3038280031Sdim 3039314564Sdim // Pruning implicit defs from subranges may result in the main range 3040314564Sdim // having stale segments. 3041314564Sdim LHSVals.pruneMainSegments(LHS, ShrinkMainRange); 3042314564Sdim 3043296417Sdim LHSVals.pruneSubRegValues(LHS, ShrinkMask); 3044296417Sdim RHSVals.pruneSubRegValues(LHS, ShrinkMask); 3045280031Sdim } 3046280031Sdim 3047243830Sdim // The merging algorithm in LiveInterval::join() can't handle conflicting 3048243830Sdim // value mappings, so we need to remove any live ranges that overlap a 3049243830Sdim // CR_Replace resolution. Collect a set of end points that can be used to 3050243830Sdim // restore the live range after joining. 3051243830Sdim SmallVector<SlotIndex, 8> EndPoints; 3052280031Sdim LHSVals.pruneValues(RHSVals, EndPoints, true); 3053280031Sdim RHSVals.pruneValues(LHSVals, EndPoints, true); 3054243830Sdim 3055243830Sdim // Erase COPY and IMPLICIT_DEF instructions. This may cause some external 3056243830Sdim // registers to require trimming. 3057243830Sdim SmallVector<unsigned, 8> ShrinkRegs; 3058314564Sdim LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs, &LHS); 3059243830Sdim RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs); 3060243830Sdim while (!ShrinkRegs.empty()) 3061288943Sdim shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val())); 3062243830Sdim 3063243830Sdim // Join RHS into LHS. 3064261991Sdim LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo); 3065243830Sdim 3066243830Sdim // Kill flags are going to be wrong if the live ranges were overlapping. 3067243830Sdim // Eventually, we should simply clear all kill flags when computing live 3068243830Sdim // ranges. They are reinserted after register allocation. 3069243830Sdim MRI->clearKillFlags(LHS.reg); 3070243830Sdim MRI->clearKillFlags(RHS.reg); 3071243830Sdim 3072280031Sdim if (!EndPoints.empty()) { 3073280031Sdim // Recompute the parts of the live range we had to remove because of 3074280031Sdim // CR_Replace conflicts. 3075314564Sdim DEBUG({ 3076314564Sdim dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: "; 3077314564Sdim for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) { 3078314564Sdim dbgs() << EndPoints[i]; 3079314564Sdim if (i != n-1) 3080314564Sdim dbgs() << ','; 3081314564Sdim } 3082314564Sdim dbgs() << ": " << LHS << '\n'; 3083314564Sdim }); 3084280031Sdim LIS->extendToIndices((LiveRange&)LHS, EndPoints); 3085280031Sdim } 3086243830Sdim 3087224145Sdim return true; 3088224145Sdim} 3089224145Sdim 3090243830Sdimbool RegisterCoalescer::joinIntervals(CoalescerPair &CP) { 3091243830Sdim return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP); 3092243830Sdim} 3093243830Sdim 3094224145Sdimnamespace { 3095327952Sdim 3096288943Sdim/// Information concerning MBB coalescing priority. 3097249423Sdimstruct MBBPriorityInfo { 3098249423Sdim MachineBasicBlock *MBB; 3099249423Sdim unsigned Depth; 3100249423Sdim bool IsSplit; 3101224145Sdim 3102249423Sdim MBBPriorityInfo(MachineBasicBlock *mbb, unsigned depth, bool issplit) 3103249423Sdim : MBB(mbb), Depth(depth), IsSplit(issplit) {} 3104249423Sdim}; 3105224145Sdim 3106327952Sdim} // end anonymous namespace 3107327952Sdim 3108288943Sdim/// C-style comparator that sorts first based on the loop depth of the basic 3109288943Sdim/// block (the unsigned), and then on the MBB number. 3110288943Sdim/// 3111288943Sdim/// EnableGlobalCopies assumes that the primary sort key is loop depth. 3112261991Sdimstatic int compareMBBPriority(const MBBPriorityInfo *LHS, 3113261991Sdim const MBBPriorityInfo *RHS) { 3114249423Sdim // Deeper loops first 3115249423Sdim if (LHS->Depth != RHS->Depth) 3116249423Sdim return LHS->Depth > RHS->Depth ? -1 : 1; 3117249423Sdim 3118249423Sdim // Try to unsplit critical edges next. 3119249423Sdim if (LHS->IsSplit != RHS->IsSplit) 3120249423Sdim return LHS->IsSplit ? -1 : 1; 3121249423Sdim 3122249423Sdim // Prefer blocks that are more connected in the CFG. This takes care of 3123249423Sdim // the most difficult copies first while intervals are short. 3124249423Sdim unsigned cl = LHS->MBB->pred_size() + LHS->MBB->succ_size(); 3125249423Sdim unsigned cr = RHS->MBB->pred_size() + RHS->MBB->succ_size(); 3126249423Sdim if (cl != cr) 3127249423Sdim return cl > cr ? -1 : 1; 3128249423Sdim 3129249423Sdim // As a last resort, sort by block number. 3130249423Sdim return LHS->MBB->getNumber() < RHS->MBB->getNumber() ? -1 : 1; 3131224145Sdim} 3132224145Sdim 3133249423Sdim/// \returns true if the given copy uses or defines a local live range. 3134249423Sdimstatic bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) { 3135249423Sdim if (!Copy->isCopy()) 3136249423Sdim return false; 3137249423Sdim 3138261991Sdim if (Copy->getOperand(1).isUndef()) 3139261991Sdim return false; 3140261991Sdim 3141249423Sdim unsigned SrcReg = Copy->getOperand(1).getReg(); 3142249423Sdim unsigned DstReg = Copy->getOperand(0).getReg(); 3143249423Sdim if (TargetRegisterInfo::isPhysicalRegister(SrcReg) 3144249423Sdim || TargetRegisterInfo::isPhysicalRegister(DstReg)) 3145249423Sdim return false; 3146249423Sdim 3147249423Sdim return LIS->intervalIsInOneMBB(LIS->getInterval(SrcReg)) 3148249423Sdim || LIS->intervalIsInOneMBB(LIS->getInterval(DstReg)); 3149249423Sdim} 3150249423Sdim 3151249423Sdimbool RegisterCoalescer:: 3152249423SdimcopyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) { 3153239462Sdim bool Progress = false; 3154249423Sdim for (unsigned i = 0, e = CurrList.size(); i != e; ++i) { 3155249423Sdim if (!CurrList[i]) 3156239462Sdim continue; 3157239462Sdim // Skip instruction pointers that have already been erased, for example by 3158239462Sdim // dead code elimination. 3159321369Sdim if (ErasedInstrs.count(CurrList[i])) { 3160276479Sdim CurrList[i] = nullptr; 3161239462Sdim continue; 3162239462Sdim } 3163239462Sdim bool Again = false; 3164249423Sdim bool Success = joinCopy(CurrList[i], Again); 3165239462Sdim Progress |= Success; 3166239462Sdim if (Success || !Again) 3167276479Sdim CurrList[i] = nullptr; 3168239462Sdim } 3169239462Sdim return Progress; 3170239462Sdim} 3171239462Sdim 3172288943Sdim/// Check if DstReg is a terminal node. 3173288943Sdim/// I.e., it does not have any affinity other than \p Copy. 3174288943Sdimstatic bool isTerminalReg(unsigned DstReg, const MachineInstr &Copy, 3175288943Sdim const MachineRegisterInfo *MRI) { 3176288943Sdim assert(Copy.isCopyLike()); 3177288943Sdim // Check if the destination of this copy as any other affinity. 3178288943Sdim for (const MachineInstr &MI : MRI->reg_nodbg_instructions(DstReg)) 3179288943Sdim if (&MI != &Copy && MI.isCopyLike()) 3180288943Sdim return false; 3181288943Sdim return true; 3182288943Sdim} 3183288943Sdim 3184288943Sdimbool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const { 3185288943Sdim assert(Copy.isCopyLike()); 3186288943Sdim if (!UseTerminalRule) 3187288943Sdim return false; 3188288943Sdim unsigned DstReg, DstSubReg, SrcReg, SrcSubReg; 3189288943Sdim isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg); 3190288943Sdim // Check if the destination of this copy has any other affinity. 3191288943Sdim if (TargetRegisterInfo::isPhysicalRegister(DstReg) || 3192288943Sdim // If SrcReg is a physical register, the copy won't be coalesced. 3193288943Sdim // Ignoring it may have other side effect (like missing 3194288943Sdim // rematerialization). So keep it. 3195288943Sdim TargetRegisterInfo::isPhysicalRegister(SrcReg) || 3196288943Sdim !isTerminalReg(DstReg, Copy, MRI)) 3197288943Sdim return false; 3198288943Sdim 3199296417Sdim // DstReg is a terminal node. Check if it interferes with any other 3200288943Sdim // copy involving SrcReg. 3201288943Sdim const MachineBasicBlock *OrigBB = Copy.getParent(); 3202288943Sdim const LiveInterval &DstLI = LIS->getInterval(DstReg); 3203288943Sdim for (const MachineInstr &MI : MRI->reg_nodbg_instructions(SrcReg)) { 3204288943Sdim // Technically we should check if the weight of the new copy is 3205288943Sdim // interesting compared to the other one and update the weight 3206288943Sdim // of the copies accordingly. However, this would only work if 3207288943Sdim // we would gather all the copies first then coalesce, whereas 3208288943Sdim // right now we interleave both actions. 3209288943Sdim // For now, just consider the copies that are in the same block. 3210288943Sdim if (&MI == &Copy || !MI.isCopyLike() || MI.getParent() != OrigBB) 3211288943Sdim continue; 3212288943Sdim unsigned OtherReg, OtherSubReg, OtherSrcReg, OtherSrcSubReg; 3213288943Sdim isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg, 3214288943Sdim OtherSubReg); 3215288943Sdim if (OtherReg == SrcReg) 3216288943Sdim OtherReg = OtherSrcReg; 3217288943Sdim // Check if OtherReg is a non-terminal. 3218288943Sdim if (TargetRegisterInfo::isPhysicalRegister(OtherReg) || 3219288943Sdim isTerminalReg(OtherReg, MI, MRI)) 3220288943Sdim continue; 3221288943Sdim // Check that OtherReg interfere with DstReg. 3222288943Sdim if (LIS->getInterval(OtherReg).overlaps(DstLI)) { 3223327952Sdim DEBUG(dbgs() << "Apply terminal rule for: " << printReg(DstReg) << '\n'); 3224288943Sdim return true; 3225288943Sdim } 3226288943Sdim } 3227288943Sdim return false; 3228288943Sdim} 3229288943Sdim 3230239462Sdimvoid 3231239462SdimRegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) { 3232224145Sdim DEBUG(dbgs() << MBB->getName() << ":\n"); 3233224145Sdim 3234239462Sdim // Collect all copy-like instructions in MBB. Don't start coalescing anything 3235239462Sdim // yet, it might invalidate the iterator. 3236239462Sdim const unsigned PrevSize = WorkList.size(); 3237249423Sdim if (JoinGlobalCopies) { 3238288943Sdim SmallVector<MachineInstr*, 2> LocalTerminals; 3239288943Sdim SmallVector<MachineInstr*, 2> GlobalTerminals; 3240249423Sdim // Coalesce copies bottom-up to coalesce local defs before local uses. They 3241249423Sdim // are not inherently easier to resolve, but slightly preferable until we 3242249423Sdim // have local live range splitting. In particular this is required by 3243249423Sdim // cmp+jmp macro fusion. 3244261991Sdim for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); 3245261991Sdim MII != E; ++MII) { 3246249423Sdim if (!MII->isCopyLike()) 3247249423Sdim continue; 3248288943Sdim bool ApplyTerminalRule = applyTerminalRule(*MII); 3249288943Sdim if (isLocalCopy(&(*MII), LIS)) { 3250288943Sdim if (ApplyTerminalRule) 3251288943Sdim LocalTerminals.push_back(&(*MII)); 3252288943Sdim else 3253288943Sdim LocalWorkList.push_back(&(*MII)); 3254288943Sdim } else { 3255288943Sdim if (ApplyTerminalRule) 3256288943Sdim GlobalTerminals.push_back(&(*MII)); 3257288943Sdim else 3258288943Sdim WorkList.push_back(&(*MII)); 3259288943Sdim } 3260249423Sdim } 3261288943Sdim // Append the copies evicted by the terminal rule at the end of the list. 3262288943Sdim LocalWorkList.append(LocalTerminals.begin(), LocalTerminals.end()); 3263288943Sdim WorkList.append(GlobalTerminals.begin(), GlobalTerminals.end()); 3264249423Sdim } 3265249423Sdim else { 3266288943Sdim SmallVector<MachineInstr*, 2> Terminals; 3267309124Sdim for (MachineInstr &MII : *MBB) 3268309124Sdim if (MII.isCopyLike()) { 3269309124Sdim if (applyTerminalRule(MII)) 3270309124Sdim Terminals.push_back(&MII); 3271288943Sdim else 3272309124Sdim WorkList.push_back(&MII); 3273309124Sdim } 3274309124Sdim // Append the copies evicted by the terminal rule at the end of the list. 3275309124Sdim WorkList.append(Terminals.begin(), Terminals.end()); 3276249423Sdim } 3277239462Sdim // Try coalescing the collected copies immediately, and remove the nulls. 3278239462Sdim // This prevents the WorkList from getting too large since most copies are 3279239462Sdim // joinable on the first attempt. 3280249423Sdim MutableArrayRef<MachineInstr*> 3281249423Sdim CurrList(WorkList.begin() + PrevSize, WorkList.end()); 3282249423Sdim if (copyCoalesceWorkList(CurrList)) 3283239462Sdim WorkList.erase(std::remove(WorkList.begin() + PrevSize, WorkList.end(), 3284321369Sdim nullptr), WorkList.end()); 3285224145Sdim} 3286224145Sdim 3287249423Sdimvoid RegisterCoalescer::coalesceLocals() { 3288249423Sdim copyCoalesceWorkList(LocalWorkList); 3289249423Sdim for (unsigned j = 0, je = LocalWorkList.size(); j != je; ++j) { 3290249423Sdim if (LocalWorkList[j]) 3291249423Sdim WorkList.push_back(LocalWorkList[j]); 3292249423Sdim } 3293249423Sdim LocalWorkList.clear(); 3294249423Sdim} 3295249423Sdim 3296239462Sdimvoid RegisterCoalescer::joinAllIntervals() { 3297224145Sdim DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n"); 3298249423Sdim assert(WorkList.empty() && LocalWorkList.empty() && "Old data still around."); 3299224145Sdim 3300249423Sdim std::vector<MBBPriorityInfo> MBBs; 3301249423Sdim MBBs.reserve(MF->size()); 3302296417Sdim for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) { 3303296417Sdim MachineBasicBlock *MBB = &*I; 3304249423Sdim MBBs.push_back(MBBPriorityInfo(MBB, Loops->getLoopDepth(MBB), 3305249423Sdim JoinSplitEdges && isSplitEdge(MBB))); 3306249423Sdim } 3307249423Sdim array_pod_sort(MBBs.begin(), MBBs.end(), compareMBBPriority); 3308224145Sdim 3309249423Sdim // Coalesce intervals in MBB priority order. 3310327952Sdim unsigned CurrDepth = std::numeric_limits<unsigned>::max(); 3311249423Sdim for (unsigned i = 0, e = MBBs.size(); i != e; ++i) { 3312249423Sdim // Try coalescing the collected local copies for deeper loops. 3313249423Sdim if (JoinGlobalCopies && MBBs[i].Depth < CurrDepth) { 3314249423Sdim coalesceLocals(); 3315249423Sdim CurrDepth = MBBs[i].Depth; 3316224145Sdim } 3317249423Sdim copyCoalesceInMBB(MBBs[i].MBB); 3318224145Sdim } 3319249423Sdim coalesceLocals(); 3320224145Sdim 3321224145Sdim // Joining intervals can allow other intervals to be joined. Iteratively join 3322224145Sdim // until we make no progress. 3323249423Sdim while (copyCoalesceWorkList(WorkList)) 3324239462Sdim /* empty */ ; 3325224145Sdim} 3326224145Sdim 3327224145Sdimvoid RegisterCoalescer::releaseMemory() { 3328239462Sdim ErasedInstrs.clear(); 3329239462Sdim WorkList.clear(); 3330239462Sdim DeadDefs.clear(); 3331239462Sdim InflateRegs.clear(); 3332224145Sdim} 3333224145Sdim 3334224145Sdimbool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { 3335226633Sdim MF = &fn; 3336226633Sdim MRI = &fn.getRegInfo(); 3337288943Sdim const TargetSubtargetInfo &STI = fn.getSubtarget(); 3338288943Sdim TRI = STI.getRegisterInfo(); 3339288943Sdim TII = STI.getInstrInfo(); 3340226633Sdim LIS = &getAnalysis<LiveIntervals>(); 3341296417Sdim AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 3342226633Sdim Loops = &getAnalysis<MachineLoopInfo>(); 3343249423Sdim if (EnableGlobalCopies == cl::BOU_UNSET) 3344288943Sdim JoinGlobalCopies = STI.enableJoinGlobalCopies(); 3345249423Sdim else 3346249423Sdim JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE); 3347249423Sdim 3348249423Sdim // The MachineScheduler does not currently require JoinSplitEdges. This will 3349249423Sdim // either be enabled unconditionally or replaced by a more general live range 3350249423Sdim // splitting optimization. 3351249423Sdim JoinSplitEdges = EnableJoinSplits; 3352249423Sdim 3353224145Sdim DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n" 3354243830Sdim << "********** Function: " << MF->getName() << '\n'); 3355224145Sdim 3356224145Sdim if (VerifyCoalescing) 3357226633Sdim MF->verify(this, "Before register coalescing"); 3358224145Sdim 3359224145Sdim RegClassInfo.runOnMachineFunction(fn); 3360224145Sdim 3361224145Sdim // Join (coalesce) intervals if requested. 3362239462Sdim if (EnableJoining) 3363239462Sdim joinAllIntervals(); 3364224145Sdim 3365226633Sdim // After deleting a lot of copies, register classes may be less constrained. 3366239462Sdim // Removing sub-register operands may allow GR32_ABCD -> GR32 and DPR_VFP2 -> 3367226633Sdim // DPR inflation. 3368226633Sdim array_pod_sort(InflateRegs.begin(), InflateRegs.end()); 3369226633Sdim InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()), 3370226633Sdim InflateRegs.end()); 3371226633Sdim DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size() << " regs.\n"); 3372226633Sdim for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) { 3373226633Sdim unsigned Reg = InflateRegs[i]; 3374226633Sdim if (MRI->reg_nodbg_empty(Reg)) 3375226633Sdim continue; 3376288943Sdim if (MRI->recomputeRegClass(Reg)) { 3377327952Sdim DEBUG(dbgs() << printReg(Reg) << " inflated to " 3378280031Sdim << TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n'); 3379296417Sdim ++NumInflated; 3380296417Sdim 3381280031Sdim LiveInterval &LI = LIS->getInterval(Reg); 3382296417Sdim if (LI.hasSubRanges()) { 3383280031Sdim // If the inflated register class does not support subregisters anymore 3384280031Sdim // remove the subranges. 3385296417Sdim if (!MRI->shouldTrackSubRegLiveness(Reg)) { 3386296417Sdim LI.clearSubRanges(); 3387296417Sdim } else { 3388288943Sdim#ifndef NDEBUG 3389296417Sdim LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg); 3390296417Sdim // If subranges are still supported, then the same subregs 3391296417Sdim // should still be supported. 3392296417Sdim for (LiveInterval::SubRange &S : LI.subranges()) { 3393314564Sdim assert((S.LaneMask & ~MaxMask).none()); 3394296417Sdim } 3395296417Sdim#endif 3396280031Sdim } 3397280031Sdim } 3398226633Sdim } 3399226633Sdim } 3400226633Sdim 3401224145Sdim DEBUG(dump()); 3402224145Sdim if (VerifyCoalescing) 3403226633Sdim MF->verify(this, "After register coalescing"); 3404224145Sdim return true; 3405224145Sdim} 3406224145Sdim 3407224145Sdimvoid RegisterCoalescer::print(raw_ostream &O, const Module* m) const { 3408226633Sdim LIS->print(O, m); 3409224145Sdim} 3410