1327952Sdim//===- RegisterCoalescer.cpp - Generic Register Coalescing Interface ------===// 2193323Sed// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6193323Sed// 7193323Sed//===----------------------------------------------------------------------===// 8193323Sed// 9193323Sed// This file implements the generic RegisterCoalescer interface which 10193323Sed// is used as the common interface used by all clients and 11193323Sed// implementations of register coalescing. 12193323Sed// 13193323Sed//===----------------------------------------------------------------------===// 14193323Sed 15224145Sdim#include "RegisterCoalescer.h" 16327952Sdim#include "llvm/ADT/ArrayRef.h" 17327952Sdim#include "llvm/ADT/BitVector.h" 18344779Sdim#include "llvm/ADT/DenseSet.h" 19239462Sdim#include "llvm/ADT/STLExtras.h" 20327952Sdim#include "llvm/ADT/SmallPtrSet.h" 21327952Sdim#include "llvm/ADT/SmallVector.h" 22239462Sdim#include "llvm/ADT/Statistic.h" 23239462Sdim#include "llvm/Analysis/AliasAnalysis.h" 24327952Sdim#include "llvm/CodeGen/LiveInterval.h" 25327952Sdim#include "llvm/CodeGen/LiveIntervals.h" 26239462Sdim#include "llvm/CodeGen/LiveRangeEdit.h" 27327952Sdim#include "llvm/CodeGen/MachineBasicBlock.h" 28327952Sdim#include "llvm/CodeGen/MachineFunction.h" 29327952Sdim#include "llvm/CodeGen/MachineFunctionPass.h" 30224145Sdim#include "llvm/CodeGen/MachineInstr.h" 31321369Sdim#include "llvm/CodeGen/MachineInstrBuilder.h" 32224145Sdim#include "llvm/CodeGen/MachineLoopInfo.h" 33327952Sdim#include "llvm/CodeGen/MachineOperand.h" 34224145Sdim#include "llvm/CodeGen/MachineRegisterInfo.h" 35224145Sdim#include "llvm/CodeGen/Passes.h" 36239462Sdim#include "llvm/CodeGen/RegisterClassInfo.h" 37327952Sdim#include "llvm/CodeGen/SlotIndexes.h" 38327952Sdim#include "llvm/CodeGen/TargetInstrInfo.h" 39327952Sdim#include "llvm/CodeGen/TargetOpcodes.h" 40327952Sdim#include "llvm/CodeGen/TargetRegisterInfo.h" 41327952Sdim#include "llvm/CodeGen/TargetSubtargetInfo.h" 42327952Sdim#include "llvm/IR/DebugLoc.h" 43360784Sdim#include "llvm/InitializePasses.h" 44327952Sdim#include "llvm/MC/LaneBitmask.h" 45327952Sdim#include "llvm/MC/MCInstrDesc.h" 46327952Sdim#include "llvm/MC/MCRegisterInfo.h" 47249423Sdim#include "llvm/Pass.h" 48224145Sdim#include "llvm/Support/CommandLine.h" 49327952Sdim#include "llvm/Support/Compiler.h" 50224145Sdim#include "llvm/Support/Debug.h" 51224145Sdim#include "llvm/Support/ErrorHandling.h" 52224145Sdim#include "llvm/Support/raw_ostream.h" 53224145Sdim#include <algorithm> 54327952Sdim#include <cassert> 55327952Sdim#include <iterator> 56327952Sdim#include <limits> 57327952Sdim#include <tuple> 58327952Sdim#include <utility> 59327952Sdim#include <vector> 60327952Sdim 61193323Sedusing namespace llvm; 62193323Sed 63276479Sdim#define DEBUG_TYPE "regalloc" 64276479Sdim 65224145SdimSTATISTIC(numJoins , "Number of interval joins performed"); 66224145SdimSTATISTIC(numCrossRCs , "Number of cross class joins performed"); 67224145SdimSTATISTIC(numCommutes , "Number of instruction commuting performed"); 68224145SdimSTATISTIC(numExtends , "Number of copies extended"); 69224145SdimSTATISTIC(NumReMats , "Number of instructions re-materialized"); 70226633SdimSTATISTIC(NumInflated , "Number of register classes inflated"); 71243830SdimSTATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested"); 72243830SdimSTATISTIC(NumLaneResolves, "Number of dead lane conflicts resolved"); 73344779SdimSTATISTIC(NumShrinkToUses, "Number of shrinkToUses called"); 74224145Sdim 75327952Sdimstatic cl::opt<bool> EnableJoining("join-liveintervals", 76327952Sdim cl::desc("Coalesce copies (default=true)"), 77327952Sdim cl::init(true), cl::Hidden); 78224145Sdim 79288943Sdimstatic cl::opt<bool> UseTerminalRule("terminal-rule", 80288943Sdim cl::desc("Apply the terminal rule"), 81288943Sdim cl::init(false), cl::Hidden); 82288943Sdim 83288943Sdim/// Temporary flag to test critical edge unsplitting. 84224145Sdimstatic cl::opt<bool> 85249423SdimEnableJoinSplits("join-splitedges", 86249423Sdim cl::desc("Coalesce copies on split edges (default=subtarget)"), cl::Hidden); 87249423Sdim 88288943Sdim/// Temporary flag to test global copy optimization. 89249423Sdimstatic cl::opt<cl::boolOrDefault> 90249423SdimEnableGlobalCopies("join-globalcopies", 91249423Sdim cl::desc("Coalesce copies that span blocks (default=subtarget)"), 92249423Sdim cl::init(cl::BOU_UNSET), cl::Hidden); 93249423Sdim 94249423Sdimstatic cl::opt<bool> 95224145SdimVerifyCoalescing("verify-coalescing", 96224145Sdim cl::desc("Verify machine instrs before and after register coalescing"), 97224145Sdim cl::Hidden); 98224145Sdim 99344779Sdimstatic cl::opt<unsigned> LateRematUpdateThreshold( 100344779Sdim "late-remat-update-threshold", cl::Hidden, 101344779Sdim cl::desc("During rematerialization for a copy, if the def instruction has " 102344779Sdim "many other copy uses to be rematerialized, delay the multiple " 103344779Sdim "separate live interval update work and do them all at once after " 104344779Sdim "all those rematerialization are done. It will save a lot of " 105344779Sdim "repeated work. "), 106344779Sdim cl::init(100)); 107344779Sdim 108353358Sdimstatic cl::opt<unsigned> LargeIntervalSizeThreshold( 109353358Sdim "large-interval-size-threshold", cl::Hidden, 110353358Sdim cl::desc("If the valnos size of an interval is larger than the threshold, " 111353358Sdim "it is regarded as a large interval. "), 112353358Sdim cl::init(100)); 113353358Sdim 114353358Sdimstatic cl::opt<unsigned> LargeIntervalFreqThreshold( 115353358Sdim "large-interval-freq-threshold", cl::Hidden, 116353358Sdim cl::desc("For a large interval, if it is coalesed with other live " 117353358Sdim "intervals many times more than the threshold, stop its " 118353358Sdim "coalescing to control the compile time. "), 119353358Sdim cl::init(100)); 120353358Sdim 121226633Sdimnamespace { 122327952Sdim 123360784Sdim class JoinVals; 124360784Sdim 125239462Sdim class RegisterCoalescer : public MachineFunctionPass, 126239462Sdim private LiveRangeEdit::Delegate { 127360784Sdim MachineFunction* MF = nullptr; 128360784Sdim MachineRegisterInfo* MRI = nullptr; 129360784Sdim const TargetRegisterInfo* TRI = nullptr; 130360784Sdim const TargetInstrInfo* TII = nullptr; 131360784Sdim LiveIntervals *LIS = nullptr; 132360784Sdim const MachineLoopInfo* Loops = nullptr; 133360784Sdim AliasAnalysis *AA = nullptr; 134226633Sdim RegisterClassInfo RegClassInfo; 135226633Sdim 136360784Sdim /// Debug variable location tracking -- for each VReg, maintain an 137360784Sdim /// ordered-by-slot-index set of DBG_VALUEs, to help quick 138360784Sdim /// identification of whether coalescing may change location validity. 139360784Sdim using DbgValueLoc = std::pair<SlotIndex, MachineInstr*>; 140360784Sdim DenseMap<unsigned, std::vector<DbgValueLoc>> DbgVRegToValues; 141360784Sdim 142360784Sdim /// VRegs may be repeatedly coalesced, and have many DBG_VALUEs attached. 143360784Sdim /// To avoid repeatedly merging sets of DbgValueLocs, instead record 144360784Sdim /// which vregs have been coalesced, and where to. This map is from 145360784Sdim /// vreg => {set of vregs merged in}. 146360784Sdim DenseMap<unsigned, SmallVector<unsigned, 4>> DbgMergedVRegNums; 147360784Sdim 148280031Sdim /// A LaneMask to remember on which subregister live ranges we need to call 149280031Sdim /// shrinkToUses() later. 150296417Sdim LaneBitmask ShrinkMask; 151280031Sdim 152280031Sdim /// True if the main range of the currently coalesced intervals should be 153280031Sdim /// checked for smaller live intervals. 154360784Sdim bool ShrinkMainRange = false; 155280031Sdim 156341825Sdim /// True if the coalescer should aggressively coalesce global copies 157249423Sdim /// in favor of keeping local copies. 158360784Sdim bool JoinGlobalCopies = false; 159249423Sdim 160341825Sdim /// True if the coalescer should aggressively coalesce fall-thru 161249423Sdim /// blocks exclusively containing copies. 162360784Sdim bool JoinSplitEdges = false; 163249423Sdim 164280031Sdim /// Copy instructions yet to be coalesced. 165239462Sdim SmallVector<MachineInstr*, 8> WorkList; 166249423Sdim SmallVector<MachineInstr*, 8> LocalWorkList; 167226633Sdim 168280031Sdim /// Set of instruction pointers that have been erased, and 169239462Sdim /// that may be present in WorkList. 170239462Sdim SmallPtrSet<MachineInstr*, 8> ErasedInstrs; 171226633Sdim 172239462Sdim /// Dead instructions that are about to be deleted. 173239462Sdim SmallVector<MachineInstr*, 8> DeadDefs; 174226633Sdim 175239462Sdim /// Virtual registers to be considered for register class inflation. 176239462Sdim SmallVector<unsigned, 8> InflateRegs; 177226633Sdim 178344779Sdim /// The collection of live intervals which should have been updated 179344779Sdim /// immediately after rematerialiation but delayed until 180344779Sdim /// lateLiveIntervalUpdate is called. 181344779Sdim DenseSet<unsigned> ToBeUpdated; 182344779Sdim 183353358Sdim /// Record how many times the large live interval with many valnos 184353358Sdim /// has been tried to join with other live interval. 185353358Sdim DenseMap<unsigned, unsigned long> LargeLIVisitCounter; 186353358Sdim 187239462Sdim /// Recursively eliminate dead defs in DeadDefs. 188239462Sdim void eliminateDeadDefs(); 189226633Sdim 190288943Sdim /// LiveRangeEdit callback for eliminateDeadDefs(). 191276479Sdim void LRE_WillEraseInstruction(MachineInstr *MI) override; 192239462Sdim 193280031Sdim /// Coalesce the LocalWorkList. 194249423Sdim void coalesceLocals(); 195249423Sdim 196280031Sdim /// Join compatible live intervals 197239462Sdim void joinAllIntervals(); 198239462Sdim 199280031Sdim /// Coalesce copies in the specified MBB, putting 200239462Sdim /// copies that cannot yet be coalesced into WorkList. 201239462Sdim void copyCoalesceInMBB(MachineBasicBlock *MBB); 202239462Sdim 203288943Sdim /// Tries to coalesce all copies in CurrList. Returns true if any progress 204288943Sdim /// was made. 205249423Sdim bool copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList); 206239462Sdim 207344779Sdim /// If one def has many copy like uses, and those copy uses are all 208344779Sdim /// rematerialized, the live interval update needed for those 209344779Sdim /// rematerializations will be delayed and done all at once instead 210344779Sdim /// of being done multiple times. This is to save compile cost because 211344779Sdim /// live interval update is costly. 212344779Sdim void lateLiveIntervalUpdate(); 213344779Sdim 214288943Sdim /// Attempt to join intervals corresponding to SrcReg/DstReg, which are the 215288943Sdim /// src/dst of the copy instruction CopyMI. This returns true if the copy 216288943Sdim /// was successfully coalesced away. If it is not currently possible to 217288943Sdim /// coalesce this interval, but it may be possible if other things get 218288943Sdim /// coalesced, then it returns true by reference in 'Again'. 219341825Sdim bool joinCopy(MachineInstr *CopyMI, bool &Again); 220226633Sdim 221280031Sdim /// Attempt to join these two intervals. On failure, this 222226633Sdim /// returns false. The output "SrcInt" will not have been modified, so we 223226633Sdim /// can use this information below to update aliases. 224239462Sdim bool joinIntervals(CoalescerPair &CP); 225226633Sdim 226243830Sdim /// Attempt joining two virtual registers. Return true on success. 227243830Sdim bool joinVirtRegs(CoalescerPair &CP); 228243830Sdim 229353358Sdim /// If a live interval has many valnos and is coalesced with other 230353358Sdim /// live intervals many times, we regard such live interval as having 231353358Sdim /// high compile time cost. 232353358Sdim bool isHighCostLiveInterval(LiveInterval &LI); 233353358Sdim 234239462Sdim /// Attempt joining with a reserved physreg. 235239462Sdim bool joinReservedPhysReg(CoalescerPair &CP); 236239462Sdim 237280031Sdim /// Add the LiveRange @p ToMerge as a subregister liverange of @p LI. 238280031Sdim /// Subranges in @p LI which only partially interfere with the desired 239280031Sdim /// LaneMask are split as necessary. @p LaneMask are the lanes that 240280031Sdim /// @p ToMerge will occupy in the coalescer register. @p LI has its subrange 241280031Sdim /// lanemasks already adjusted to the coalesced register. 242296417Sdim void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge, 243360784Sdim LaneBitmask LaneMask, CoalescerPair &CP, 244360784Sdim unsigned DstIdx); 245280031Sdim 246280031Sdim /// Join the liveranges of two subregisters. Joins @p RRange into 247280031Sdim /// @p LRange, @p RRange may be invalid afterwards. 248296417Sdim void joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, 249296417Sdim LaneBitmask LaneMask, const CoalescerPair &CP); 250280031Sdim 251288943Sdim /// We found a non-trivially-coalescable copy. If the source value number is 252288943Sdim /// defined by a copy from the destination reg see if we can merge these two 253288943Sdim /// destination reg valno# into a single value number, eliminating a copy. 254288943Sdim /// This returns true if an interval was modified. 255239462Sdim bool adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI); 256226633Sdim 257280031Sdim /// Return true if there are definitions of IntB 258226633Sdim /// other than BValNo val# that can reach uses of AValno val# of IntA. 259239462Sdim bool hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB, 260226633Sdim VNInfo *AValNo, VNInfo *BValNo); 261226633Sdim 262280031Sdim /// We found a non-trivially-coalescable copy. 263226633Sdim /// If the source value number is defined by a commutable instruction and 264226633Sdim /// its other operand is coalesced to the copy dest register, see if we 265226633Sdim /// can transform the copy into a noop by commuting the definition. 266344779Sdim /// This returns a pair of two flags: 267344779Sdim /// - the first element is true if an interval was modified, 268344779Sdim /// - the second element is true if the destination interval needs 269344779Sdim /// to be shrunk after deleting the copy. 270344779Sdim std::pair<bool,bool> removeCopyByCommutingDef(const CoalescerPair &CP, 271344779Sdim MachineInstr *CopyMI); 272226633Sdim 273321369Sdim /// We found a copy which can be moved to its less frequent predecessor. 274321369Sdim bool removePartialRedundancy(const CoalescerPair &CP, MachineInstr &CopyMI); 275321369Sdim 276280031Sdim /// If the source of a copy is defined by a 277226633Sdim /// trivial computation, replace the copy by rematerialize the definition. 278288943Sdim bool reMaterializeTrivialDef(const CoalescerPair &CP, MachineInstr *CopyMI, 279261991Sdim bool &IsDefCopy); 280226633Sdim 281288943Sdim /// Return true if a copy involving a physreg should be joined. 282249423Sdim bool canJoinPhys(const CoalescerPair &CP); 283226633Sdim 284288943Sdim /// Replace all defs and uses of SrcReg to DstReg and update the subregister 285288943Sdim /// number if it is not zero. If DstReg is a physical register and the 286288943Sdim /// existing subregister number of the def / use being updated is not zero, 287288943Sdim /// make sure to set it to the correct physical subregister. 288239462Sdim void updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx); 289226633Sdim 290309124Sdim /// If the given machine operand reads only undefined lanes add an undef 291309124Sdim /// flag. 292309124Sdim /// This can happen when undef uses were previously concealed by a copy 293309124Sdim /// which we coalesced. Example: 294327952Sdim /// %0:sub0<def,read-undef> = ... 295327952Sdim /// %1 = COPY %0 <-- Coalescing COPY reveals undef 296327952Sdim /// = use %1:sub1 <-- hidden undef use 297309124Sdim void addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx, 298309124Sdim MachineOperand &MO, unsigned SubRegIdx); 299309124Sdim 300341825Sdim /// Handle copies of undef values. If the undef value is an incoming 301341825Sdim /// PHI value, it will convert @p CopyMI to an IMPLICIT_DEF. 302341825Sdim /// Returns nullptr if @p CopyMI was not in any way eliminable. Otherwise, 303341825Sdim /// it returns @p CopyMI (which could be an IMPLICIT_DEF at this point). 304341825Sdim MachineInstr *eliminateUndefCopy(MachineInstr *CopyMI); 305226633Sdim 306288943Sdim /// Check whether or not we should apply the terminal rule on the 307288943Sdim /// destination (Dst) of \p Copy. 308288943Sdim /// When the terminal rule applies, Copy is not profitable to 309288943Sdim /// coalesce. 310288943Sdim /// Dst is terminal if it has exactly one affinity (Dst, Src) and 311288943Sdim /// at least one interference (Dst, Dst2). If Dst is terminal, the 312288943Sdim /// terminal rule consists in checking that at least one of 313288943Sdim /// interfering node, say Dst2, has an affinity of equal or greater 314288943Sdim /// weight with Src. 315288943Sdim /// In that case, Dst2 and Dst will not be able to be both coalesced 316288943Sdim /// with Src. Since Dst2 exposes more coalescing opportunities than 317288943Sdim /// Dst, we can drop \p Copy. 318288943Sdim bool applyTerminalRule(const MachineInstr &Copy) const; 319288943Sdim 320288943Sdim /// Wrapper method for \see LiveIntervals::shrinkToUses. 321288943Sdim /// This method does the proper fixing of the live-ranges when the afore 322288943Sdim /// mentioned method returns true. 323288943Sdim void shrinkToUses(LiveInterval *LI, 324288943Sdim SmallVectorImpl<MachineInstr * > *Dead = nullptr) { 325344779Sdim NumShrinkToUses++; 326296417Sdim if (LIS->shrinkToUses(LI, Dead)) { 327296417Sdim /// Check whether or not \p LI is composed by multiple connected 328296417Sdim /// components and if that is the case, fix that. 329296417Sdim SmallVector<LiveInterval*, 8> SplitLIs; 330296417Sdim LIS->splitSeparateComponents(*LI, SplitLIs); 331296417Sdim } 332288943Sdim } 333288943Sdim 334327952Sdim /// Wrapper Method to do all the necessary work when an Instruction is 335327952Sdim /// deleted. 336327952Sdim /// Optimizations should use this to make sure that deleted instructions 337327952Sdim /// are always accounted for. 338327952Sdim void deleteInstr(MachineInstr* MI) { 339327952Sdim ErasedInstrs.insert(MI); 340327952Sdim LIS->RemoveMachineInstrFromMaps(*MI); 341327952Sdim MI->eraseFromParent(); 342327952Sdim } 343327952Sdim 344360784Sdim /// Walk over function and initialize the DbgVRegToValues map. 345360784Sdim void buildVRegToDbgValueMap(MachineFunction &MF); 346360784Sdim 347360784Sdim /// Test whether, after merging, any DBG_VALUEs would refer to a 348360784Sdim /// different value number than before merging, and whether this can 349360784Sdim /// be resolved. If not, mark the DBG_VALUE as being undef. 350360784Sdim void checkMergingChangesDbgValues(CoalescerPair &CP, LiveRange &LHS, 351360784Sdim JoinVals &LHSVals, LiveRange &RHS, 352360784Sdim JoinVals &RHSVals); 353360784Sdim 354360784Sdim void checkMergingChangesDbgValuesImpl(unsigned Reg, LiveRange &OtherRange, 355360784Sdim LiveRange &RegRange, JoinVals &Vals2); 356360784Sdim 357226633Sdim public: 358288943Sdim static char ID; ///< Class identification, replacement for typeinfo 359327952Sdim 360226633Sdim RegisterCoalescer() : MachineFunctionPass(ID) { 361226633Sdim initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); 362226633Sdim } 363226633Sdim 364276479Sdim void getAnalysisUsage(AnalysisUsage &AU) const override; 365226633Sdim 366276479Sdim void releaseMemory() override; 367226633Sdim 368280031Sdim /// This is the pass entry point. 369276479Sdim bool runOnMachineFunction(MachineFunction&) override; 370226633Sdim 371280031Sdim /// Implement the dump method. 372276479Sdim void print(raw_ostream &O, const Module* = nullptr) const override; 373226633Sdim }; 374327952Sdim 375288943Sdim} // end anonymous namespace 376226633Sdim 377327952Sdimchar RegisterCoalescer::ID = 0; 378327952Sdim 379234353Sdimchar &llvm::RegisterCoalescerID = RegisterCoalescer::ID; 380226633Sdim 381224145SdimINITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing", 382224145Sdim "Simple Register Coalescing", false, false) 383224145SdimINITIALIZE_PASS_DEPENDENCY(LiveIntervals) 384224145SdimINITIALIZE_PASS_DEPENDENCY(SlotIndexes) 385224145SdimINITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 386296417SdimINITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 387224145SdimINITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing", 388224145Sdim "Simple Register Coalescing", false, false) 389224145Sdim 390353358SdimLLVM_NODISCARD static bool isMoveInstr(const TargetRegisterInfo &tri, 391353358Sdim const MachineInstr *MI, unsigned &Src, 392353358Sdim unsigned &Dst, unsigned &SrcSub, 393353358Sdim unsigned &DstSub) { 394210299Sed if (MI->isCopy()) { 395210299Sed Dst = MI->getOperand(0).getReg(); 396210299Sed DstSub = MI->getOperand(0).getSubReg(); 397210299Sed Src = MI->getOperand(1).getReg(); 398210299Sed SrcSub = MI->getOperand(1).getSubReg(); 399210299Sed } else if (MI->isSubregToReg()) { 400210299Sed Dst = MI->getOperand(0).getReg(); 401243830Sdim DstSub = tri.composeSubRegIndices(MI->getOperand(0).getSubReg(), 402243830Sdim MI->getOperand(3).getImm()); 403210299Sed Src = MI->getOperand(2).getReg(); 404210299Sed SrcSub = MI->getOperand(2).getSubReg(); 405212904Sdim } else 406210299Sed return false; 407210299Sed return true; 408210299Sed} 409210299Sed 410288943Sdim/// Return true if this block should be vacated by the coalescer to eliminate 411288943Sdim/// branches. The important cases to handle in the coalescer are critical edges 412288943Sdim/// split during phi elimination which contain only copies. Simple blocks that 413288943Sdim/// contain non-branches should also be vacated, but this can be handled by an 414288943Sdim/// earlier pass similar to early if-conversion. 415249423Sdimstatic bool isSplitEdge(const MachineBasicBlock *MBB) { 416249423Sdim if (MBB->pred_size() != 1 || MBB->succ_size() != 1) 417249423Sdim return false; 418249423Sdim 419276479Sdim for (const auto &MI : *MBB) { 420276479Sdim if (!MI.isCopyLike() && !MI.isUnconditionalBranch()) 421249423Sdim return false; 422249423Sdim } 423249423Sdim return true; 424249423Sdim} 425249423Sdim 426210299Sedbool CoalescerPair::setRegisters(const MachineInstr *MI) { 427239462Sdim SrcReg = DstReg = 0; 428239462Sdim SrcIdx = DstIdx = 0; 429276479Sdim NewRC = nullptr; 430226633Sdim Flipped = CrossClass = false; 431210299Sed 432210299Sed unsigned Src, Dst, SrcSub, DstSub; 433226633Sdim if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub)) 434210299Sed return false; 435226633Sdim Partial = SrcSub || DstSub; 436210299Sed 437210299Sed // If one register is a physreg, it must be Dst. 438360784Sdim if (Register::isPhysicalRegister(Src)) { 439360784Sdim if (Register::isPhysicalRegister(Dst)) 440210299Sed return false; 441210299Sed std::swap(Src, Dst); 442210299Sed std::swap(SrcSub, DstSub); 443226633Sdim Flipped = true; 444210299Sed } 445210299Sed 446327952Sdim const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo(); 447210299Sed 448360784Sdim if (Register::isPhysicalRegister(Dst)) { 449210299Sed // Eliminate DstSub on a physreg. 450210299Sed if (DstSub) { 451226633Sdim Dst = TRI.getSubReg(Dst, DstSub); 452210299Sed if (!Dst) return false; 453210299Sed DstSub = 0; 454210299Sed } 455210299Sed 456210299Sed // Eliminate SrcSub by picking a corresponding Dst superregister. 457210299Sed if (SrcSub) { 458226633Sdim Dst = TRI.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src)); 459210299Sed if (!Dst) return false; 460210299Sed } else if (!MRI.getRegClass(Src)->contains(Dst)) { 461210299Sed return false; 462210299Sed } 463210299Sed } else { 464210299Sed // Both registers are virtual. 465239462Sdim const TargetRegisterClass *SrcRC = MRI.getRegClass(Src); 466239462Sdim const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); 467210299Sed 468210299Sed // Both registers have subreg indices. 469210299Sed if (SrcSub && DstSub) { 470239462Sdim // Copies between different sub-registers are never coalescable. 471239462Sdim if (Src == Dst && SrcSub != DstSub) 472210299Sed return false; 473239462Sdim 474239462Sdim NewRC = TRI.getCommonSuperRegClass(SrcRC, SrcSub, DstRC, DstSub, 475239462Sdim SrcIdx, DstIdx); 476239462Sdim if (!NewRC) 477210299Sed return false; 478239462Sdim } else if (DstSub) { 479239462Sdim // SrcReg will be merged with a sub-register of DstReg. 480239462Sdim SrcIdx = DstSub; 481239462Sdim NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub); 482239462Sdim } else if (SrcSub) { 483239462Sdim // DstReg will be merged with a sub-register of SrcReg. 484239462Sdim DstIdx = SrcSub; 485239462Sdim NewRC = TRI.getMatchingSuperRegClass(SrcRC, DstRC, SrcSub); 486239462Sdim } else { 487239462Sdim // This is a straight copy without sub-registers. 488239462Sdim NewRC = TRI.getCommonSubClass(DstRC, SrcRC); 489210299Sed } 490210299Sed 491239462Sdim // The combined constraint may be impossible to satisfy. 492239462Sdim if (!NewRC) 493239462Sdim return false; 494239462Sdim 495239462Sdim // Prefer SrcReg to be a sub-register of DstReg. 496239462Sdim // FIXME: Coalescer should support subregs symmetrically. 497239462Sdim if (DstIdx && !SrcIdx) { 498210299Sed std::swap(Src, Dst); 499239462Sdim std::swap(SrcIdx, DstIdx); 500239462Sdim Flipped = !Flipped; 501210299Sed } 502210299Sed 503226633Sdim CrossClass = NewRC != DstRC || NewRC != SrcRC; 504210299Sed } 505210299Sed // Check our invariants 506360784Sdim assert(Register::isVirtualRegister(Src) && "Src must be virtual"); 507360784Sdim assert(!(Register::isPhysicalRegister(Dst) && DstSub) && 508210299Sed "Cannot have a physical SubIdx"); 509226633Sdim SrcReg = Src; 510226633Sdim DstReg = Dst; 511210299Sed return true; 512210299Sed} 513210299Sed 514210299Sedbool CoalescerPair::flip() { 515360784Sdim if (Register::isPhysicalRegister(DstReg)) 516210299Sed return false; 517226633Sdim std::swap(SrcReg, DstReg); 518239462Sdim std::swap(SrcIdx, DstIdx); 519226633Sdim Flipped = !Flipped; 520210299Sed return true; 521210299Sed} 522210299Sed 523210299Sedbool CoalescerPair::isCoalescable(const MachineInstr *MI) const { 524210299Sed if (!MI) 525210299Sed return false; 526210299Sed unsigned Src, Dst, SrcSub, DstSub; 527226633Sdim if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub)) 528210299Sed return false; 529210299Sed 530226633Sdim // Find the virtual register that is SrcReg. 531226633Sdim if (Dst == SrcReg) { 532210299Sed std::swap(Src, Dst); 533210299Sed std::swap(SrcSub, DstSub); 534226633Sdim } else if (Src != SrcReg) { 535210299Sed return false; 536210299Sed } 537210299Sed 538226633Sdim // Now check that Dst matches DstReg. 539360784Sdim if (Register::isPhysicalRegister(DstReg)) { 540360784Sdim if (!Register::isPhysicalRegister(Dst)) 541210299Sed return false; 542239462Sdim assert(!DstIdx && !SrcIdx && "Inconsistent CoalescerPair state."); 543210299Sed // DstSub could be set for a physreg from INSERT_SUBREG. 544210299Sed if (DstSub) 545226633Sdim Dst = TRI.getSubReg(Dst, DstSub); 546210299Sed // Full copy of Src. 547210299Sed if (!SrcSub) 548226633Sdim return DstReg == Dst; 549210299Sed // This is a partial register copy. Check that the parts match. 550226633Sdim return TRI.getSubReg(DstReg, SrcSub) == Dst; 551210299Sed } else { 552226633Sdim // DstReg is virtual. 553226633Sdim if (DstReg != Dst) 554210299Sed return false; 555210299Sed // Registers match, do the subregisters line up? 556243830Sdim return TRI.composeSubRegIndices(SrcIdx, SrcSub) == 557243830Sdim TRI.composeSubRegIndices(DstIdx, DstSub); 558210299Sed } 559210299Sed} 560210299Sed 561224145Sdimvoid RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const { 562224145Sdim AU.setPreservesCFG(); 563296417Sdim AU.addRequired<AAResultsWrapperPass>(); 564224145Sdim AU.addRequired<LiveIntervals>(); 565224145Sdim AU.addPreserved<LiveIntervals>(); 566224145Sdim AU.addPreserved<SlotIndexes>(); 567224145Sdim AU.addRequired<MachineLoopInfo>(); 568224145Sdim AU.addPreserved<MachineLoopInfo>(); 569224145Sdim AU.addPreservedID(MachineDominatorsID); 570224145Sdim MachineFunctionPass::getAnalysisUsage(AU); 571224145Sdim} 572224145Sdim 573239462Sdimvoid RegisterCoalescer::eliminateDeadDefs() { 574261991Sdim SmallVector<unsigned, 8> NewRegs; 575276479Sdim LiveRangeEdit(nullptr, NewRegs, *MF, *LIS, 576276479Sdim nullptr, this).eliminateDeadDefs(DeadDefs); 577239462Sdim} 578224145Sdim 579239462Sdimvoid RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) { 580239462Sdim // MI may be in WorkList. Make sure we don't visit it. 581239462Sdim ErasedInstrs.insert(MI); 582224145Sdim} 583224145Sdim 584239462Sdimbool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, 585239462Sdim MachineInstr *CopyMI) { 586239462Sdim assert(!CP.isPartial() && "This doesn't work for partial copies."); 587239462Sdim assert(!CP.isPhys() && "This doesn't work for physreg copies."); 588224145Sdim 589224145Sdim LiveInterval &IntA = 590226633Sdim LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); 591224145Sdim LiveInterval &IntB = 592226633Sdim LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); 593309124Sdim SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot(); 594224145Sdim 595288943Sdim // We have a non-trivially-coalescable copy with IntA being the source and 596288943Sdim // IntB being the dest, thus this defines a value number in IntB. If the 597288943Sdim // source value number (in IntA) is defined by a copy from B, see if we can 598288943Sdim // merge these two pieces of B into a single value number, eliminating a copy. 599288943Sdim // For example: 600288943Sdim // 601288943Sdim // A3 = B0 602288943Sdim // ... 603288943Sdim // B1 = A3 <- this copy 604288943Sdim // 605288943Sdim // In this case, B0 can be extended to where the B1 copy lives, allowing the 606288943Sdim // B1 value number to be replaced with B0 (which simplifies the B 607288943Sdim // liveinterval). 608288943Sdim 609261991Sdim // BValNo is a value number in B that is defined by a copy from A. 'B1' in 610224145Sdim // the example above. 611261991Sdim LiveInterval::iterator BS = IntB.FindSegmentContaining(CopyIdx); 612261991Sdim if (BS == IntB.end()) return false; 613261991Sdim VNInfo *BValNo = BS->valno; 614224145Sdim 615224145Sdim // Get the location that B is defined at. Two options: either this value has 616224145Sdim // an unknown definition point or it is defined at CopyIdx. If unknown, we 617224145Sdim // can't process it. 618234353Sdim if (BValNo->def != CopyIdx) return false; 619224145Sdim 620224145Sdim // AValNo is the value number in A that defines the copy, A3 in the example. 621234353Sdim SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true); 622261991Sdim LiveInterval::iterator AS = IntA.FindSegmentContaining(CopyUseIdx); 623261991Sdim // The live segment might not exist after fun with physreg coalescing. 624261991Sdim if (AS == IntA.end()) return false; 625261991Sdim VNInfo *AValNo = AS->valno; 626224145Sdim 627224145Sdim // If AValNo is defined as a copy from IntB, we can potentially process this. 628224145Sdim // Get the instruction that defines this value number. 629234353Sdim MachineInstr *ACopyMI = LIS->getInstructionFromIndex(AValNo->def); 630243830Sdim // Don't allow any partial copies, even if isCoalescable() allows them. 631243830Sdim if (!CP.isCoalescable(ACopyMI) || !ACopyMI->isFullCopy()) 632224145Sdim return false; 633224145Sdim 634261991Sdim // Get the Segment in IntB that this value number starts with. 635261991Sdim LiveInterval::iterator ValS = 636261991Sdim IntB.FindSegmentContaining(AValNo->def.getPrevSlot()); 637261991Sdim if (ValS == IntB.end()) 638224145Sdim return false; 639224145Sdim 640261991Sdim // Make sure that the end of the live segment is inside the same block as 641224145Sdim // CopyMI. 642261991Sdim MachineInstr *ValSEndInst = 643261991Sdim LIS->getInstructionFromIndex(ValS->end.getPrevSlot()); 644261991Sdim if (!ValSEndInst || ValSEndInst->getParent() != CopyMI->getParent()) 645224145Sdim return false; 646224145Sdim 647261991Sdim // Okay, we now know that ValS ends in the same block that the CopyMI 648261991Sdim // live-range starts. If there are no intervening live segments between them 649261991Sdim // in IntB, we can merge them. 650261991Sdim if (ValS+1 != BS) return false; 651224145Sdim 652341825Sdim LLVM_DEBUG(dbgs() << "Extending: " << printReg(IntB.reg, TRI)); 653224145Sdim 654261991Sdim SlotIndex FillerStart = ValS->end, FillerEnd = BS->start; 655224145Sdim // We are about to delete CopyMI, so need to remove it as the 'instruction 656224145Sdim // that defines this value #'. Update the valnum with the new defining 657224145Sdim // instruction #. 658234353Sdim BValNo->def = FillerStart; 659224145Sdim 660224145Sdim // Okay, we can merge them. We need to insert a new liverange: 661261991Sdim // [ValS.end, BS.begin) of either value number, then we merge the 662224145Sdim // two value numbers. 663261991Sdim IntB.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, BValNo)); 664224145Sdim 665224145Sdim // Okay, merge "B1" into the same value number as "B0". 666261991Sdim if (BValNo != ValS->valno) 667261991Sdim IntB.MergeValueNumberInto(BValNo, ValS->valno); 668280031Sdim 669280031Sdim // Do the same for the subregister segments. 670280031Sdim for (LiveInterval::SubRange &S : IntB.subranges()) { 671341825Sdim // Check for SubRange Segments of the form [1234r,1234d:0) which can be 672341825Sdim // removed to prevent creating bogus SubRange Segments. 673341825Sdim LiveInterval::iterator SS = S.FindSegmentContaining(CopyIdx); 674341825Sdim if (SS != S.end() && SlotIndex::isSameInstr(SS->start, SS->end)) { 675341825Sdim S.removeSegment(*SS, true); 676341825Sdim continue; 677341825Sdim } 678280031Sdim VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx); 679280031Sdim S.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, SubBValNo)); 680280031Sdim VNInfo *SubValSNo = S.getVNInfoAt(AValNo->def.getPrevSlot()); 681280031Sdim if (SubBValNo != SubValSNo) 682280031Sdim S.MergeValueNumberInto(SubBValNo, SubValSNo); 683280031Sdim } 684280031Sdim 685341825Sdim LLVM_DEBUG(dbgs() << " result = " << IntB << '\n'); 686224145Sdim 687224145Sdim // If the source instruction was killing the source register before the 688224145Sdim // merge, unset the isKill marker given the live range has been extended. 689261991Sdim int UIdx = ValSEndInst->findRegisterUseOperandIdx(IntB.reg, true); 690224145Sdim if (UIdx != -1) { 691261991Sdim ValSEndInst->getOperand(UIdx).setIsKill(false); 692224145Sdim } 693224145Sdim 694341825Sdim // Rewrite the copy. 695239462Sdim CopyMI->substituteRegister(IntA.reg, IntB.reg, 0, *TRI); 696341825Sdim // If the copy instruction was killing the destination register or any 697341825Sdim // subrange before the merge trim the live range. 698341825Sdim bool RecomputeLiveRange = AS->end == CopyIdx; 699341825Sdim if (!RecomputeLiveRange) { 700341825Sdim for (LiveInterval::SubRange &S : IntA.subranges()) { 701341825Sdim LiveInterval::iterator SS = S.FindSegmentContaining(CopyUseIdx); 702341825Sdim if (SS != S.end() && SS->end == CopyIdx) { 703341825Sdim RecomputeLiveRange = true; 704341825Sdim break; 705341825Sdim } 706341825Sdim } 707341825Sdim } 708341825Sdim if (RecomputeLiveRange) 709288943Sdim shrinkToUses(&IntA); 710224145Sdim 711224145Sdim ++numExtends; 712224145Sdim return true; 713224145Sdim} 714224145Sdim 715239462Sdimbool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA, 716239462Sdim LiveInterval &IntB, 717239462Sdim VNInfo *AValNo, 718239462Sdim VNInfo *BValNo) { 719239462Sdim // If AValNo has PHI kills, conservatively assume that IntB defs can reach 720239462Sdim // the PHI values. 721239462Sdim if (LIS->hasPHIKill(IntA, AValNo)) 722239462Sdim return true; 723239462Sdim 724280031Sdim for (LiveRange::Segment &ASeg : IntA.segments) { 725280031Sdim if (ASeg.valno != AValNo) continue; 726353358Sdim LiveInterval::iterator BI = llvm::upper_bound(IntB, ASeg.start); 727261991Sdim if (BI != IntB.begin()) 728224145Sdim --BI; 729280031Sdim for (; BI != IntB.end() && ASeg.end >= BI->start; ++BI) { 730224145Sdim if (BI->valno == BValNo) 731224145Sdim continue; 732280031Sdim if (BI->start <= ASeg.start && BI->end > ASeg.start) 733224145Sdim return true; 734280031Sdim if (BI->start > ASeg.start && BI->start < ASeg.end) 735224145Sdim return true; 736224145Sdim } 737224145Sdim } 738224145Sdim return false; 739224145Sdim} 740224145Sdim 741341825Sdim/// Copy segments with value number @p SrcValNo from liverange @p Src to live 742280031Sdim/// range @Dst and use value number @p DstValNo there. 743344779Sdimstatic std::pair<bool,bool> 744344779SdimaddSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo, const LiveRange &Src, 745344779Sdim const VNInfo *SrcValNo) { 746344779Sdim bool Changed = false; 747344779Sdim bool MergedWithDead = false; 748280031Sdim for (const LiveRange::Segment &S : Src.segments) { 749280031Sdim if (S.valno != SrcValNo) 750280031Sdim continue; 751344779Sdim // This is adding a segment from Src that ends in a copy that is about 752344779Sdim // to be removed. This segment is going to be merged with a pre-existing 753344779Sdim // segment in Dst. This works, except in cases when the corresponding 754344779Sdim // segment in Dst is dead. For example: adding [192r,208r:1) from Src 755344779Sdim // to [208r,208d:1) in Dst would create [192r,208d:1) in Dst. 756344779Sdim // Recognized such cases, so that the segments can be shrunk. 757344779Sdim LiveRange::Segment Added = LiveRange::Segment(S.start, S.end, DstValNo); 758344779Sdim LiveRange::Segment &Merged = *Dst.addSegment(Added); 759344779Sdim if (Merged.end.isDead()) 760344779Sdim MergedWithDead = true; 761344779Sdim Changed = true; 762280031Sdim } 763344779Sdim return std::make_pair(Changed, MergedWithDead); 764280031Sdim} 765280031Sdim 766344779Sdimstd::pair<bool,bool> 767344779SdimRegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, 768344779Sdim MachineInstr *CopyMI) { 769280031Sdim assert(!CP.isPhys()); 770224145Sdim 771224145Sdim LiveInterval &IntA = 772280031Sdim LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); 773224145Sdim LiveInterval &IntB = 774280031Sdim LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); 775224145Sdim 776288943Sdim // We found a non-trivially-coalescable copy with IntA being the source and 777288943Sdim // IntB being the dest, thus this defines a value number in IntB. If the 778288943Sdim // source value number (in IntA) is defined by a commutable instruction and 779288943Sdim // its other operand is coalesced to the copy dest register, see if we can 780288943Sdim // transform the copy into a noop by commuting the definition. For example, 781288943Sdim // 782327952Sdim // A3 = op A2 killed B0 783288943Sdim // ... 784288943Sdim // B1 = A3 <- this copy 785288943Sdim // ... 786288943Sdim // = op A3 <- more uses 787288943Sdim // 788288943Sdim // ==> 789288943Sdim // 790327952Sdim // B2 = op B0 killed A2 791288943Sdim // ... 792288943Sdim // B1 = B2 <- now an identity copy 793288943Sdim // ... 794288943Sdim // = op B2 <- more uses 795288943Sdim 796261991Sdim // BValNo is a value number in B that is defined by a copy from A. 'B1' in 797224145Sdim // the example above. 798309124Sdim SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot(); 799224145Sdim VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx); 800280031Sdim assert(BValNo != nullptr && BValNo->def == CopyIdx); 801224145Sdim 802224145Sdim // AValNo is the value number in A that defines the copy, A3 in the example. 803234353Sdim VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true)); 804280031Sdim assert(AValNo && !AValNo->isUnused() && "COPY source not live"); 805280031Sdim if (AValNo->isPHIDef()) 806344779Sdim return { false, false }; 807226633Sdim MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def); 808224145Sdim if (!DefMI) 809344779Sdim return { false, false }; 810234353Sdim if (!DefMI->isCommutable()) 811344779Sdim return { false, false }; 812224145Sdim // If DefMI is a two-address instruction then commuting it will change the 813224145Sdim // destination register. 814224145Sdim int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg); 815224145Sdim assert(DefIdx != -1); 816224145Sdim unsigned UseOpIdx; 817224145Sdim if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx)) 818344779Sdim return { false, false }; 819296417Sdim 820296417Sdim // FIXME: The code below tries to commute 'UseOpIdx' operand with some other 821296417Sdim // commutable operand which is expressed by 'CommuteAnyOperandIndex'value 822296417Sdim // passed to the method. That _other_ operand is chosen by 823296417Sdim // the findCommutedOpIndices() method. 824296417Sdim // 825296417Sdim // That is obviously an area for improvement in case of instructions having 826296417Sdim // more than 2 operands. For example, if some instruction has 3 commutable 827296417Sdim // operands then all possible variants (i.e. op#1<->op#2, op#1<->op#3, 828296417Sdim // op#2<->op#3) of commute transformation should be considered/tried here. 829296417Sdim unsigned NewDstIdx = TargetInstrInfo::CommuteAnyOperandIndex; 830309124Sdim if (!TII->findCommutedOpIndices(*DefMI, UseOpIdx, NewDstIdx)) 831344779Sdim return { false, false }; 832224145Sdim 833224145Sdim MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); 834360784Sdim Register NewReg = NewDstMO.getReg(); 835261991Sdim if (NewReg != IntB.reg || !IntB.Query(AValNo->def).isKill()) 836344779Sdim return { false, false }; 837224145Sdim 838224145Sdim // Make sure there are no other definitions of IntB that would reach the 839224145Sdim // uses which the new definition can reach. 840239462Sdim if (hasOtherReachingDefs(IntA, IntB, AValNo, BValNo)) 841344779Sdim return { false, false }; 842224145Sdim 843224145Sdim // If some of the uses of IntA.reg is already coalesced away, return false. 844224145Sdim // It's not possible to determine whether it's safe to perform the coalescing. 845276479Sdim for (MachineOperand &MO : MRI->use_nodbg_operands(IntA.reg)) { 846276479Sdim MachineInstr *UseMI = MO.getParent(); 847276479Sdim unsigned OpNo = &MO - &UseMI->getOperand(0); 848309124Sdim SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI); 849261991Sdim LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx); 850261991Sdim if (US == IntA.end() || US->valno != AValNo) 851224145Sdim continue; 852239462Sdim // If this use is tied to a def, we can't rewrite the register. 853276479Sdim if (UseMI->isRegTiedToDefOperand(OpNo)) 854344779Sdim return { false, false }; 855224145Sdim } 856224145Sdim 857341825Sdim LLVM_DEBUG(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t' 858341825Sdim << *DefMI); 859224145Sdim 860224145Sdim // At this point we have decided that it is legal to do this 861224145Sdim // transformation. Start by commuting the instruction. 862224145Sdim MachineBasicBlock *MBB = DefMI->getParent(); 863296417Sdim MachineInstr *NewMI = 864309124Sdim TII->commuteInstruction(*DefMI, false, UseOpIdx, NewDstIdx); 865224145Sdim if (!NewMI) 866344779Sdim return { false, false }; 867360784Sdim if (Register::isVirtualRegister(IntA.reg) && 868360784Sdim Register::isVirtualRegister(IntB.reg) && 869226633Sdim !MRI->constrainRegClass(IntB.reg, MRI->getRegClass(IntA.reg))) 870344779Sdim return { false, false }; 871224145Sdim if (NewMI != DefMI) { 872309124Sdim LIS->ReplaceMachineInstrInMaps(*DefMI, *NewMI); 873234353Sdim MachineBasicBlock::iterator Pos = DefMI; 874234353Sdim MBB->insert(Pos, NewMI); 875224145Sdim MBB->erase(DefMI); 876224145Sdim } 877224145Sdim 878224145Sdim // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g. 879224145Sdim // A = or A, B 880224145Sdim // ... 881224145Sdim // B = A 882224145Sdim // ... 883327952Sdim // C = killed A 884224145Sdim // ... 885224145Sdim // = B 886224145Sdim 887224145Sdim // Update uses of IntA of the specific Val# with IntB. 888226633Sdim for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg), 889280031Sdim UE = MRI->use_end(); 890280031Sdim UI != UE; /* ++UI is below because of possible MI removal */) { 891276479Sdim MachineOperand &UseMO = *UI; 892280031Sdim ++UI; 893280031Sdim if (UseMO.isUndef()) 894280031Sdim continue; 895276479Sdim MachineInstr *UseMI = UseMO.getParent(); 896224145Sdim if (UseMI->isDebugValue()) { 897224145Sdim // FIXME These don't have an instruction index. Not clear we have enough 898224145Sdim // info to decide whether to do this replacement or not. For now do it. 899224145Sdim UseMO.setReg(NewReg); 900224145Sdim continue; 901224145Sdim } 902309124Sdim SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI).getRegSlot(true); 903261991Sdim LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx); 904280031Sdim assert(US != IntA.end() && "Use must be live"); 905280031Sdim if (US->valno != AValNo) 906224145Sdim continue; 907239462Sdim // Kill flags are no longer accurate. They are recomputed after RA. 908239462Sdim UseMO.setIsKill(false); 909360784Sdim if (Register::isPhysicalRegister(NewReg)) 910226633Sdim UseMO.substPhysReg(NewReg, *TRI); 911224145Sdim else 912224145Sdim UseMO.setReg(NewReg); 913224145Sdim if (UseMI == CopyMI) 914224145Sdim continue; 915224145Sdim if (!UseMI->isCopy()) 916224145Sdim continue; 917224145Sdim if (UseMI->getOperand(0).getReg() != IntB.reg || 918224145Sdim UseMI->getOperand(0).getSubReg()) 919224145Sdim continue; 920224145Sdim 921224145Sdim // This copy will become a noop. If it's defining a new val#, merge it into 922224145Sdim // BValNo. 923234353Sdim SlotIndex DefIdx = UseIdx.getRegSlot(); 924224145Sdim VNInfo *DVNI = IntB.getVNInfoAt(DefIdx); 925224145Sdim if (!DVNI) 926224145Sdim continue; 927341825Sdim LLVM_DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI); 928224145Sdim assert(DVNI->def == DefIdx); 929288943Sdim BValNo = IntB.MergeValueNumberInto(DVNI, BValNo); 930280031Sdim for (LiveInterval::SubRange &S : IntB.subranges()) { 931280031Sdim VNInfo *SubDVNI = S.getVNInfoAt(DefIdx); 932280031Sdim if (!SubDVNI) 933280031Sdim continue; 934280031Sdim VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx); 935280031Sdim assert(SubBValNo->def == CopyIdx); 936288943Sdim S.MergeValueNumberInto(SubDVNI, SubBValNo); 937280031Sdim } 938280031Sdim 939327952Sdim deleteInstr(UseMI); 940224145Sdim } 941224145Sdim 942261991Sdim // Extend BValNo by merging in IntA live segments of AValNo. Val# definition 943224145Sdim // is updated. 944344779Sdim bool ShrinkB = false; 945280031Sdim BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); 946344779Sdim if (IntA.hasSubRanges() || IntB.hasSubRanges()) { 947280031Sdim if (!IntA.hasSubRanges()) { 948296417Sdim LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntA.reg); 949280031Sdim IntA.createSubRangeFrom(Allocator, Mask, IntA); 950344779Sdim } else if (!IntB.hasSubRanges()) { 951344779Sdim LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntB.reg); 952344779Sdim IntB.createSubRangeFrom(Allocator, Mask, IntB); 953280031Sdim } 954280031Sdim SlotIndex AIdx = CopyIdx.getRegSlot(true); 955344779Sdim LaneBitmask MaskA; 956353358Sdim const SlotIndexes &Indexes = *LIS->getSlotIndexes(); 957280031Sdim for (LiveInterval::SubRange &SA : IntA.subranges()) { 958280031Sdim VNInfo *ASubValNo = SA.getVNInfoAt(AIdx); 959353358Sdim // Even if we are dealing with a full copy, some lanes can 960353358Sdim // still be undefined. 961353358Sdim // E.g., 962353358Sdim // undef A.subLow = ... 963353358Sdim // B = COPY A <== A.subHigh is undefined here and does 964353358Sdim // not have a value number. 965353358Sdim if (!ASubValNo) 966353358Sdim continue; 967344779Sdim MaskA |= SA.LaneMask; 968280031Sdim 969353358Sdim IntB.refineSubRanges( 970353358Sdim Allocator, SA.LaneMask, 971353358Sdim [&Allocator, &SA, CopyIdx, ASubValNo, 972353358Sdim &ShrinkB](LiveInterval::SubRange &SR) { 973353358Sdim VNInfo *BSubValNo = SR.empty() ? SR.getNextValue(CopyIdx, Allocator) 974353358Sdim : SR.getVNInfoAt(CopyIdx); 975353358Sdim assert(BSubValNo != nullptr); 976353358Sdim auto P = addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo); 977353358Sdim ShrinkB |= P.second; 978353358Sdim if (P.first) 979353358Sdim BSubValNo->def = ASubValNo->def; 980353358Sdim }, 981353358Sdim Indexes, *TRI); 982280031Sdim } 983344779Sdim // Go over all subranges of IntB that have not been covered by IntA, 984344779Sdim // and delete the segments starting at CopyIdx. This can happen if 985344779Sdim // IntA has undef lanes that are defined in IntB. 986344779Sdim for (LiveInterval::SubRange &SB : IntB.subranges()) { 987344779Sdim if ((SB.LaneMask & MaskA).any()) 988344779Sdim continue; 989344779Sdim if (LiveRange::Segment *S = SB.getSegmentContaining(CopyIdx)) 990344779Sdim if (S->start.getBaseIndex() == CopyIdx.getBaseIndex()) 991344779Sdim SB.removeSegment(*S, true); 992344779Sdim } 993224145Sdim } 994280031Sdim 995280031Sdim BValNo->def = AValNo->def; 996344779Sdim auto P = addSegmentsWithValNo(IntB, BValNo, IntA, AValNo); 997344779Sdim ShrinkB |= P.second; 998341825Sdim LLVM_DEBUG(dbgs() << "\t\textended: " << IntB << '\n'); 999224145Sdim 1000288943Sdim LIS->removeVRegDefAt(IntA, AValNo->def); 1001288943Sdim 1002341825Sdim LLVM_DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n'); 1003224145Sdim ++numCommutes; 1004344779Sdim return { true, ShrinkB }; 1005224145Sdim} 1006224145Sdim 1007321369Sdim/// For copy B = A in BB2, if A is defined by A = B in BB0 which is a 1008321369Sdim/// predecessor of BB2, and if B is not redefined on the way from A = B 1009353358Sdim/// in BB0 to B = A in BB2, B = A in BB2 is partially redundant if the 1010321369Sdim/// execution goes through the path from BB0 to BB2. We may move B = A 1011321369Sdim/// to the predecessor without such reversed copy. 1012321369Sdim/// So we will transform the program from: 1013321369Sdim/// BB0: 1014321369Sdim/// A = B; BB1: 1015321369Sdim/// ... ... 1016321369Sdim/// / \ / 1017321369Sdim/// BB2: 1018321369Sdim/// ... 1019321369Sdim/// B = A; 1020321369Sdim/// 1021321369Sdim/// to: 1022321369Sdim/// 1023321369Sdim/// BB0: BB1: 1024321369Sdim/// A = B; ... 1025321369Sdim/// ... B = A; 1026321369Sdim/// / \ / 1027321369Sdim/// BB2: 1028321369Sdim/// ... 1029321369Sdim/// 1030321369Sdim/// A special case is when BB0 and BB2 are the same BB which is the only 1031321369Sdim/// BB in a loop: 1032321369Sdim/// BB1: 1033321369Sdim/// ... 1034321369Sdim/// BB0/BB2: ---- 1035321369Sdim/// B = A; | 1036321369Sdim/// ... | 1037321369Sdim/// A = B; | 1038321369Sdim/// |------- 1039321369Sdim/// | 1040321369Sdim/// We may hoist B = A from BB0/BB2 to BB1. 1041321369Sdim/// 1042321369Sdim/// The major preconditions for correctness to remove such partial 1043321369Sdim/// redundancy include: 1044321369Sdim/// 1. A in B = A in BB2 is defined by a PHI in BB2, and one operand of 1045321369Sdim/// the PHI is defined by the reversed copy A = B in BB0. 1046321369Sdim/// 2. No B is referenced from the start of BB2 to B = A. 1047321369Sdim/// 3. No B is defined from A = B to the end of BB0. 1048321369Sdim/// 4. BB1 has only one successor. 1049321369Sdim/// 1050321369Sdim/// 2 and 4 implicitly ensure B is not live at the end of BB1. 1051321369Sdim/// 4 guarantees BB2 is hotter than BB1, so we can only move a copy to a 1052321369Sdim/// colder place, which not only prevent endless loop, but also make sure 1053321369Sdim/// the movement of copy is beneficial. 1054321369Sdimbool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP, 1055321369Sdim MachineInstr &CopyMI) { 1056321369Sdim assert(!CP.isPhys()); 1057321369Sdim if (!CopyMI.isFullCopy()) 1058321369Sdim return false; 1059321369Sdim 1060321369Sdim MachineBasicBlock &MBB = *CopyMI.getParent(); 1061321369Sdim if (MBB.isEHPad()) 1062321369Sdim return false; 1063321369Sdim 1064321369Sdim if (MBB.pred_size() != 2) 1065321369Sdim return false; 1066321369Sdim 1067321369Sdim LiveInterval &IntA = 1068321369Sdim LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); 1069321369Sdim LiveInterval &IntB = 1070321369Sdim LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); 1071321369Sdim 1072321369Sdim // A is defined by PHI at the entry of MBB. 1073321369Sdim SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true); 1074321369Sdim VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx); 1075321369Sdim assert(AValNo && !AValNo->isUnused() && "COPY source not live"); 1076321369Sdim if (!AValNo->isPHIDef()) 1077321369Sdim return false; 1078321369Sdim 1079321369Sdim // No B is referenced before CopyMI in MBB. 1080321369Sdim if (IntB.overlaps(LIS->getMBBStartIdx(&MBB), CopyIdx)) 1081321369Sdim return false; 1082321369Sdim 1083321369Sdim // MBB has two predecessors: one contains A = B so no copy will be inserted 1084321369Sdim // for it. The other one will have a copy moved from MBB. 1085321369Sdim bool FoundReverseCopy = false; 1086321369Sdim MachineBasicBlock *CopyLeftBB = nullptr; 1087321369Sdim for (MachineBasicBlock *Pred : MBB.predecessors()) { 1088321369Sdim VNInfo *PVal = IntA.getVNInfoBefore(LIS->getMBBEndIdx(Pred)); 1089321369Sdim MachineInstr *DefMI = LIS->getInstructionFromIndex(PVal->def); 1090321369Sdim if (!DefMI || !DefMI->isFullCopy()) { 1091321369Sdim CopyLeftBB = Pred; 1092321369Sdim continue; 1093321369Sdim } 1094321369Sdim // Check DefMI is a reverse copy and it is in BB Pred. 1095321369Sdim if (DefMI->getOperand(0).getReg() != IntA.reg || 1096321369Sdim DefMI->getOperand(1).getReg() != IntB.reg || 1097321369Sdim DefMI->getParent() != Pred) { 1098321369Sdim CopyLeftBB = Pred; 1099321369Sdim continue; 1100321369Sdim } 1101321369Sdim // If there is any other def of B after DefMI and before the end of Pred, 1102321369Sdim // we need to keep the copy of B = A at the end of Pred if we remove 1103321369Sdim // B = A from MBB. 1104321369Sdim bool ValB_Changed = false; 1105321369Sdim for (auto VNI : IntB.valnos) { 1106321369Sdim if (VNI->isUnused()) 1107321369Sdim continue; 1108321369Sdim if (PVal->def < VNI->def && VNI->def < LIS->getMBBEndIdx(Pred)) { 1109321369Sdim ValB_Changed = true; 1110321369Sdim break; 1111321369Sdim } 1112321369Sdim } 1113321369Sdim if (ValB_Changed) { 1114321369Sdim CopyLeftBB = Pred; 1115321369Sdim continue; 1116321369Sdim } 1117321369Sdim FoundReverseCopy = true; 1118321369Sdim } 1119321369Sdim 1120321369Sdim // If no reverse copy is found in predecessors, nothing to do. 1121321369Sdim if (!FoundReverseCopy) 1122321369Sdim return false; 1123321369Sdim 1124321369Sdim // If CopyLeftBB is nullptr, it means every predecessor of MBB contains 1125321369Sdim // reverse copy, CopyMI can be removed trivially if only IntA/IntB is updated. 1126321369Sdim // If CopyLeftBB is not nullptr, move CopyMI from MBB to CopyLeftBB and 1127321369Sdim // update IntA/IntB. 1128321369Sdim // 1129321369Sdim // If CopyLeftBB is not nullptr, ensure CopyLeftBB has a single succ so 1130321369Sdim // MBB is hotter than CopyLeftBB. 1131321369Sdim if (CopyLeftBB && CopyLeftBB->succ_size() > 1) 1132321369Sdim return false; 1133321369Sdim 1134341825Sdim // Now (almost sure it's) ok to move copy. 1135321369Sdim if (CopyLeftBB) { 1136341825Sdim // Position in CopyLeftBB where we should insert new copy. 1137341825Sdim auto InsPos = CopyLeftBB->getFirstTerminator(); 1138321369Sdim 1139341825Sdim // Make sure that B isn't referenced in the terminators (if any) at the end 1140341825Sdim // of the predecessor since we're about to insert a new definition of B 1141341825Sdim // before them. 1142341825Sdim if (InsPos != CopyLeftBB->end()) { 1143341825Sdim SlotIndex InsPosIdx = LIS->getInstructionIndex(*InsPos).getRegSlot(true); 1144341825Sdim if (IntB.overlaps(InsPosIdx, LIS->getMBBEndIdx(CopyLeftBB))) 1145341825Sdim return false; 1146341825Sdim } 1147341825Sdim 1148341825Sdim LLVM_DEBUG(dbgs() << "\tremovePartialRedundancy: Move the copy to " 1149341825Sdim << printMBBReference(*CopyLeftBB) << '\t' << CopyMI); 1150341825Sdim 1151321369Sdim // Insert new copy to CopyLeftBB. 1152321369Sdim MachineInstr *NewCopyMI = BuildMI(*CopyLeftBB, InsPos, CopyMI.getDebugLoc(), 1153321369Sdim TII->get(TargetOpcode::COPY), IntB.reg) 1154321369Sdim .addReg(IntA.reg); 1155321369Sdim SlotIndex NewCopyIdx = 1156321369Sdim LIS->InsertMachineInstrInMaps(*NewCopyMI).getRegSlot(); 1157321369Sdim IntB.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator()); 1158321369Sdim for (LiveInterval::SubRange &SR : IntB.subranges()) 1159321369Sdim SR.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator()); 1160321369Sdim 1161321369Sdim // If the newly created Instruction has an address of an instruction that was 1162321369Sdim // deleted before (object recycled by the allocator) it needs to be removed from 1163321369Sdim // the deleted list. 1164321369Sdim ErasedInstrs.erase(NewCopyMI); 1165321369Sdim } else { 1166341825Sdim LLVM_DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from " 1167341825Sdim << printMBBReference(MBB) << '\t' << CopyMI); 1168321369Sdim } 1169321369Sdim 1170321369Sdim // Remove CopyMI. 1171321369Sdim // Note: This is fine to remove the copy before updating the live-ranges. 1172321369Sdim // While updating the live-ranges, we only look at slot indices and 1173321369Sdim // never go back to the instruction. 1174321369Sdim // Mark instructions as deleted. 1175327952Sdim deleteInstr(&CopyMI); 1176321369Sdim 1177321369Sdim // Update the liveness. 1178321369Sdim SmallVector<SlotIndex, 8> EndPoints; 1179321369Sdim VNInfo *BValNo = IntB.Query(CopyIdx).valueOutOrDead(); 1180321369Sdim LIS->pruneValue(*static_cast<LiveRange *>(&IntB), CopyIdx.getRegSlot(), 1181321369Sdim &EndPoints); 1182321369Sdim BValNo->markUnused(); 1183321369Sdim // Extend IntB to the EndPoints of its original live interval. 1184321369Sdim LIS->extendToIndices(IntB, EndPoints); 1185321369Sdim 1186321369Sdim // Now, do the same for its subranges. 1187321369Sdim for (LiveInterval::SubRange &SR : IntB.subranges()) { 1188321369Sdim EndPoints.clear(); 1189321369Sdim VNInfo *BValNo = SR.Query(CopyIdx).valueOutOrDead(); 1190321369Sdim assert(BValNo && "All sublanes should be live"); 1191321369Sdim LIS->pruneValue(SR, CopyIdx.getRegSlot(), &EndPoints); 1192321369Sdim BValNo->markUnused(); 1193344779Sdim // We can have a situation where the result of the original copy is live, 1194344779Sdim // but is immediately dead in this subrange, e.g. [336r,336d:0). That makes 1195344779Sdim // the copy appear as an endpoint from pruneValue(), but we don't want it 1196344779Sdim // to because the copy has been removed. We can go ahead and remove that 1197344779Sdim // endpoint; there is no other situation here that there could be a use at 1198344779Sdim // the same place as we know that the copy is a full copy. 1199344779Sdim for (unsigned I = 0; I != EndPoints.size(); ) { 1200344779Sdim if (SlotIndex::isSameInstr(EndPoints[I], CopyIdx)) { 1201344779Sdim EndPoints[I] = EndPoints.back(); 1202344779Sdim EndPoints.pop_back(); 1203344779Sdim continue; 1204344779Sdim } 1205344779Sdim ++I; 1206344779Sdim } 1207321369Sdim LIS->extendToIndices(SR, EndPoints); 1208321369Sdim } 1209341825Sdim // If any dead defs were extended, truncate them. 1210341825Sdim shrinkToUses(&IntB); 1211321369Sdim 1212321369Sdim // Finally, update the live-range of IntA. 1213321369Sdim shrinkToUses(&IntA); 1214321369Sdim return true; 1215321369Sdim} 1216321369Sdim 1217288943Sdim/// Returns true if @p MI defines the full vreg @p Reg, as opposed to just 1218288943Sdim/// defining a subregister. 1219288943Sdimstatic bool definesFullReg(const MachineInstr &MI, unsigned Reg) { 1220360784Sdim assert(!Register::isPhysicalRegister(Reg) && 1221288943Sdim "This code cannot handle physreg aliasing"); 1222288943Sdim for (const MachineOperand &Op : MI.operands()) { 1223288943Sdim if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg) 1224288943Sdim continue; 1225288943Sdim // Return true if we define the full register or don't care about the value 1226288943Sdim // inside other subregisters. 1227288943Sdim if (Op.getSubReg() == 0 || Op.isUndef()) 1228288943Sdim return true; 1229288943Sdim } 1230288943Sdim return false; 1231288943Sdim} 1232288943Sdim 1233288943Sdimbool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP, 1234261991Sdim MachineInstr *CopyMI, 1235261991Sdim bool &IsDefCopy) { 1236261991Sdim IsDefCopy = false; 1237249423Sdim unsigned SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg(); 1238261991Sdim unsigned SrcIdx = CP.isFlipped() ? CP.getDstIdx() : CP.getSrcIdx(); 1239249423Sdim unsigned DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg(); 1240261991Sdim unsigned DstIdx = CP.isFlipped() ? CP.getSrcIdx() : CP.getDstIdx(); 1241360784Sdim if (Register::isPhysicalRegister(SrcReg)) 1242249423Sdim return false; 1243249423Sdim 1244249423Sdim LiveInterval &SrcInt = LIS->getInterval(SrcReg); 1245309124Sdim SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI); 1246261991Sdim VNInfo *ValNo = SrcInt.Query(CopyIdx).valueIn(); 1247344779Sdim if (!ValNo) 1248344779Sdim return false; 1249226633Sdim if (ValNo->isPHIDef() || ValNo->isUnused()) 1250224145Sdim return false; 1251226633Sdim MachineInstr *DefMI = LIS->getInstructionFromIndex(ValNo->def); 1252224145Sdim if (!DefMI) 1253224145Sdim return false; 1254261991Sdim if (DefMI->isCopyLike()) { 1255261991Sdim IsDefCopy = true; 1256261991Sdim return false; 1257261991Sdim } 1258309124Sdim if (!TII->isAsCheapAsAMove(*DefMI)) 1259224145Sdim return false; 1260309124Sdim if (!TII->isTriviallyReMaterializable(*DefMI, AA)) 1261224145Sdim return false; 1262288943Sdim if (!definesFullReg(*DefMI, SrcReg)) 1263288943Sdim return false; 1264224145Sdim bool SawStore = false; 1265288943Sdim if (!DefMI->isSafeToMove(AA, SawStore)) 1266224145Sdim return false; 1267234353Sdim const MCInstrDesc &MCID = DefMI->getDesc(); 1268224145Sdim if (MCID.getNumDefs() != 1) 1269224145Sdim return false; 1270249423Sdim // Only support subregister destinations when the def is read-undef. 1271249423Sdim MachineOperand &DstOperand = CopyMI->getOperand(0); 1272360784Sdim Register CopyDstReg = DstOperand.getReg(); 1273249423Sdim if (DstOperand.getSubReg() && !DstOperand.isUndef()) 1274249423Sdim return false; 1275261991Sdim 1276276479Sdim // If both SrcIdx and DstIdx are set, correct rematerialization would widen 1277276479Sdim // the register substantially (beyond both source and dest size). This is bad 1278276479Sdim // for performance since it can cascade through a function, introducing many 1279276479Sdim // extra spills and fills (e.g. ARM can easily end up copying QQQQPR registers 1280276479Sdim // around after a few subreg copies). 1281276479Sdim if (SrcIdx && DstIdx) 1282276479Sdim return false; 1283276479Sdim 1284261991Sdim const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF); 1285224145Sdim if (!DefMI->isImplicitDef()) { 1286360784Sdim if (Register::isPhysicalRegister(DstReg)) { 1287261991Sdim unsigned NewDstReg = DstReg; 1288261991Sdim 1289261991Sdim unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(), 1290261991Sdim DefMI->getOperand(0).getSubReg()); 1291261991Sdim if (NewDstIdx) 1292261991Sdim NewDstReg = TRI->getSubReg(DstReg, NewDstIdx); 1293261991Sdim 1294261991Sdim // Finally, make sure that the physical subregister that will be 1295261991Sdim // constructed later is permitted for the instruction. 1296261991Sdim if (!DefRC->contains(NewDstReg)) 1297224145Sdim return false; 1298261991Sdim } else { 1299261991Sdim // Theoretically, some stack frame reference could exist. Just make sure 1300261991Sdim // it hasn't actually happened. 1301360784Sdim assert(Register::isVirtualRegister(DstReg) && 1302261991Sdim "Only expect to deal with virtual or physical registers"); 1303261991Sdim } 1304224145Sdim } 1305224145Sdim 1306309124Sdim DebugLoc DL = CopyMI->getDebugLoc(); 1307224145Sdim MachineBasicBlock *MBB = CopyMI->getParent(); 1308224145Sdim MachineBasicBlock::iterator MII = 1309276479Sdim std::next(MachineBasicBlock::iterator(CopyMI)); 1310309124Sdim TII->reMaterialize(*MBB, MII, DstReg, SrcIdx, *DefMI, *TRI); 1311309124Sdim MachineInstr &NewMI = *std::prev(MII); 1312309124Sdim NewMI.setDebugLoc(DL); 1313224145Sdim 1314288943Sdim // In a situation like the following: 1315327952Sdim // %0:subreg = instr ; DefMI, subreg = DstIdx 1316327952Sdim // %1 = copy %0:subreg ; CopyMI, SrcIdx = 0 1317327952Sdim // instead of widening %1 to the register class of %0 simply do: 1318327952Sdim // %1 = instr 1319288943Sdim const TargetRegisterClass *NewRC = CP.getNewRC(); 1320288943Sdim if (DstIdx != 0) { 1321309124Sdim MachineOperand &DefMO = NewMI.getOperand(0); 1322288943Sdim if (DefMO.getSubReg() == DstIdx) { 1323288943Sdim assert(SrcIdx == 0 && CP.isFlipped() 1324288943Sdim && "Shouldn't have SrcIdx+DstIdx at this point"); 1325288943Sdim const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); 1326288943Sdim const TargetRegisterClass *CommonRC = 1327288943Sdim TRI->getCommonSubClass(DefRC, DstRC); 1328288943Sdim if (CommonRC != nullptr) { 1329288943Sdim NewRC = CommonRC; 1330288943Sdim DstIdx = 0; 1331288943Sdim DefMO.setSubReg(0); 1332314564Sdim DefMO.setIsUndef(false); // Only subregs can have def+undef. 1333288943Sdim } 1334288943Sdim } 1335288943Sdim } 1336288943Sdim 1337309124Sdim // CopyMI may have implicit operands, save them so that we can transfer them 1338309124Sdim // over to the newly materialized instruction after CopyMI is removed. 1339309124Sdim SmallVector<MachineOperand, 4> ImplicitOps; 1340309124Sdim ImplicitOps.reserve(CopyMI->getNumOperands() - 1341309124Sdim CopyMI->getDesc().getNumOperands()); 1342309124Sdim for (unsigned I = CopyMI->getDesc().getNumOperands(), 1343309124Sdim E = CopyMI->getNumOperands(); 1344309124Sdim I != E; ++I) { 1345309124Sdim MachineOperand &MO = CopyMI->getOperand(I); 1346309124Sdim if (MO.isReg()) { 1347341825Sdim assert(MO.isImplicit() && "No explicit operands after implicit operands."); 1348309124Sdim // Discard VReg implicit defs. 1349360784Sdim if (Register::isPhysicalRegister(MO.getReg())) 1350309124Sdim ImplicitOps.push_back(MO); 1351309124Sdim } 1352309124Sdim } 1353309124Sdim 1354309124Sdim LIS->ReplaceMachineInstrInMaps(*CopyMI, NewMI); 1355261991Sdim CopyMI->eraseFromParent(); 1356261991Sdim ErasedInstrs.insert(CopyMI); 1357249423Sdim 1358234353Sdim // NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86). 1359234353Sdim // We need to remember these so we can add intervals once we insert 1360234353Sdim // NewMI into SlotIndexes. 1361234353Sdim SmallVector<unsigned, 4> NewMIImplDefs; 1362309124Sdim for (unsigned i = NewMI.getDesc().getNumOperands(), 1363309124Sdim e = NewMI.getNumOperands(); 1364309124Sdim i != e; ++i) { 1365309124Sdim MachineOperand &MO = NewMI.getOperand(i); 1366288943Sdim if (MO.isReg() && MO.isDef()) { 1367288943Sdim assert(MO.isImplicit() && MO.isDead() && 1368360784Sdim Register::isPhysicalRegister(MO.getReg())); 1369234353Sdim NewMIImplDefs.push_back(MO.getReg()); 1370234353Sdim } 1371234353Sdim } 1372234353Sdim 1373360784Sdim if (Register::isVirtualRegister(DstReg)) { 1374309124Sdim unsigned NewIdx = NewMI.getOperand(0).getSubReg(); 1375276479Sdim 1376288943Sdim if (DefRC != nullptr) { 1377288943Sdim if (NewIdx) 1378288943Sdim NewRC = TRI->getMatchingSuperRegClass(NewRC, DefRC, NewIdx); 1379288943Sdim else 1380288943Sdim NewRC = TRI->getCommonSubClass(NewRC, DefRC); 1381288943Sdim assert(NewRC && "subreg chosen for remat incompatible with instruction"); 1382288943Sdim } 1383309124Sdim // Remap subranges to new lanemask and change register class. 1384309124Sdim LiveInterval &DstInt = LIS->getInterval(DstReg); 1385309124Sdim for (LiveInterval::SubRange &SR : DstInt.subranges()) { 1386309124Sdim SR.LaneMask = TRI->composeSubRegIndexLaneMask(DstIdx, SR.LaneMask); 1387309124Sdim } 1388276479Sdim MRI->setRegClass(DstReg, NewRC); 1389276479Sdim 1390309124Sdim // Update machine operands and add flags. 1391276479Sdim updateRegDefsUses(DstReg, DstReg, DstIdx); 1392309124Sdim NewMI.getOperand(0).setSubReg(NewIdx); 1393341825Sdim // updateRegDefUses can add an "undef" flag to the definition, since 1394341825Sdim // it will replace DstReg with DstReg.DstIdx. If NewIdx is 0, make 1395341825Sdim // sure that "undef" is not set. 1396341825Sdim if (NewIdx == 0) 1397341825Sdim NewMI.getOperand(0).setIsUndef(false); 1398309124Sdim // Add dead subregister definitions if we are defining the whole register 1399309124Sdim // but only part of it is live. 1400309124Sdim // This could happen if the rematerialization instruction is rematerializing 1401309124Sdim // more than actually is used in the register. 1402309124Sdim // An example would be: 1403327952Sdim // %1 = LOAD CONSTANTS 5, 8 ; Loading both 5 and 8 in different subregs 1404309124Sdim // ; Copying only part of the register here, but the rest is undef. 1405327952Sdim // %2:sub_16bit<def, read-undef> = COPY %1:sub_16bit 1406309124Sdim // ==> 1407309124Sdim // ; Materialize all the constants but only using one 1408327952Sdim // %2 = LOAD_CONSTANTS 5, 8 1409309124Sdim // 1410309124Sdim // at this point for the part that wasn't defined before we could have 1411309124Sdim // subranges missing the definition. 1412309124Sdim if (NewIdx == 0 && DstInt.hasSubRanges()) { 1413309124Sdim SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI); 1414309124Sdim SlotIndex DefIndex = 1415309124Sdim CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber()); 1416309124Sdim LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(DstReg); 1417309124Sdim VNInfo::Allocator& Alloc = LIS->getVNInfoAllocator(); 1418309124Sdim for (LiveInterval::SubRange &SR : DstInt.subranges()) { 1419309124Sdim if (!SR.liveAt(DefIndex)) 1420309124Sdim SR.createDeadDef(DefIndex, Alloc); 1421309124Sdim MaxMask &= ~SR.LaneMask; 1422309124Sdim } 1423314564Sdim if (MaxMask.any()) { 1424309124Sdim LiveInterval::SubRange *SR = DstInt.createSubRange(Alloc, MaxMask); 1425309124Sdim SR->createDeadDef(DefIndex, Alloc); 1426309124Sdim } 1427309124Sdim } 1428321369Sdim 1429321369Sdim // Make sure that the subrange for resultant undef is removed 1430321369Sdim // For example: 1431327952Sdim // %1:sub1<def,read-undef> = LOAD CONSTANT 1 1432327952Sdim // %2 = COPY %1 1433321369Sdim // ==> 1434327952Sdim // %2:sub1<def, read-undef> = LOAD CONSTANT 1 1435327952Sdim // ; Correct but need to remove the subrange for %2:sub0 1436321369Sdim // ; as it is now undef 1437321369Sdim if (NewIdx != 0 && DstInt.hasSubRanges()) { 1438321369Sdim // The affected subregister segments can be removed. 1439321369Sdim SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI); 1440321369Sdim LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(NewIdx); 1441321369Sdim bool UpdatedSubRanges = false; 1442321369Sdim for (LiveInterval::SubRange &SR : DstInt.subranges()) { 1443321369Sdim if ((SR.LaneMask & DstMask).none()) { 1444341825Sdim LLVM_DEBUG(dbgs() 1445341825Sdim << "Removing undefined SubRange " 1446341825Sdim << PrintLaneMask(SR.LaneMask) << " : " << SR << "\n"); 1447321369Sdim // VNI is in ValNo - remove any segments in this SubRange that have this ValNo 1448321369Sdim if (VNInfo *RmValNo = SR.getVNInfoAt(CurrIdx.getRegSlot())) { 1449321369Sdim SR.removeValNo(RmValNo); 1450321369Sdim UpdatedSubRanges = true; 1451321369Sdim } 1452321369Sdim } 1453321369Sdim } 1454321369Sdim if (UpdatedSubRanges) 1455321369Sdim DstInt.removeEmptySubRanges(); 1456321369Sdim } 1457309124Sdim } else if (NewMI.getOperand(0).getReg() != CopyDstReg) { 1458261991Sdim // The New instruction may be defining a sub-register of what's actually 1459261991Sdim // been asked for. If so it must implicitly define the whole thing. 1460360784Sdim assert(Register::isPhysicalRegister(DstReg) && 1461261991Sdim "Only expect virtual or physical registers in remat"); 1462309124Sdim NewMI.getOperand(0).setIsDead(true); 1463309124Sdim NewMI.addOperand(MachineOperand::CreateReg( 1464309124Sdim CopyDstReg, true /*IsDef*/, true /*IsImp*/, false /*IsKill*/)); 1465276479Sdim // Record small dead def live-ranges for all the subregisters 1466276479Sdim // of the destination register. 1467276479Sdim // Otherwise, variables that live through may miss some 1468276479Sdim // interferences, thus creating invalid allocation. 1469276479Sdim // E.g., i386 code: 1470327952Sdim // %1 = somedef ; %1 GR8 1471327952Sdim // %2 = remat ; %2 GR32 1472327952Sdim // CL = COPY %2.sub_8bit 1473327952Sdim // = somedef %1 ; %1 GR8 1474276479Sdim // => 1475327952Sdim // %1 = somedef ; %1 GR8 1476327952Sdim // dead ECX = remat ; implicit-def CL 1477327952Sdim // = somedef %1 ; %1 GR8 1478341825Sdim // %1 will see the interferences with CL but not with CH since 1479276479Sdim // no live-ranges would have been created for ECX. 1480276479Sdim // Fix that! 1481276479Sdim SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI); 1482309124Sdim for (MCRegUnitIterator Units(NewMI.getOperand(0).getReg(), TRI); 1483276479Sdim Units.isValid(); ++Units) 1484276479Sdim if (LiveRange *LR = LIS->getCachedRegUnit(*Units)) 1485276479Sdim LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator()); 1486261991Sdim } 1487261991Sdim 1488309124Sdim if (NewMI.getOperand(0).getSubReg()) 1489309124Sdim NewMI.getOperand(0).setIsUndef(); 1490261991Sdim 1491309124Sdim // Transfer over implicit operands to the rematerialized instruction. 1492309124Sdim for (MachineOperand &MO : ImplicitOps) 1493309124Sdim NewMI.addOperand(MO); 1494224145Sdim 1495234353Sdim SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI); 1496234353Sdim for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) { 1497239462Sdim unsigned Reg = NewMIImplDefs[i]; 1498239462Sdim for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) 1499261991Sdim if (LiveRange *LR = LIS->getCachedRegUnit(*Units)) 1500261991Sdim LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator()); 1501234353Sdim } 1502234353Sdim 1503341825Sdim LLVM_DEBUG(dbgs() << "Remat: " << NewMI); 1504224145Sdim ++NumReMats; 1505224145Sdim 1506344779Sdim // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs 1507344779Sdim // to describe DstReg instead. 1508344779Sdim if (MRI->use_nodbg_empty(SrcReg)) { 1509280031Sdim for (MachineOperand &UseMO : MRI->use_operands(SrcReg)) { 1510280031Sdim MachineInstr *UseMI = UseMO.getParent(); 1511280031Sdim if (UseMI->isDebugValue()) { 1512360784Sdim if (Register::isPhysicalRegister(DstReg)) 1513344779Sdim UseMO.substPhysReg(DstReg, *TRI); 1514344779Sdim else 1515344779Sdim UseMO.setReg(DstReg); 1516327952Sdim // Move the debug value directly after the def of the rematerialized 1517327952Sdim // value in DstReg. 1518327952Sdim MBB->splice(std::next(NewMI.getIterator()), UseMI->getParent(), UseMI); 1519341825Sdim LLVM_DEBUG(dbgs() << "\t\tupdated: " << *UseMI); 1520280031Sdim } 1521280031Sdim } 1522280031Sdim } 1523224145Sdim 1524344779Sdim if (ToBeUpdated.count(SrcReg)) 1525344779Sdim return true; 1526344779Sdim 1527344779Sdim unsigned NumCopyUses = 0; 1528344779Sdim for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) { 1529344779Sdim if (UseMO.getParent()->isCopyLike()) 1530344779Sdim NumCopyUses++; 1531344779Sdim } 1532344779Sdim if (NumCopyUses < LateRematUpdateThreshold) { 1533344779Sdim // The source interval can become smaller because we removed a use. 1534344779Sdim shrinkToUses(&SrcInt, &DeadDefs); 1535344779Sdim if (!DeadDefs.empty()) 1536344779Sdim eliminateDeadDefs(); 1537344779Sdim } else { 1538344779Sdim ToBeUpdated.insert(SrcReg); 1539344779Sdim } 1540224145Sdim return true; 1541224145Sdim} 1542224145Sdim 1543341825SdimMachineInstr *RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) { 1544341825Sdim // ProcessImplicitDefs may leave some copies of <undef> values, it only 1545341825Sdim // removes local variables. When we have a copy like: 1546288943Sdim // 1547327952Sdim // %1 = COPY undef %2 1548288943Sdim // 1549327952Sdim // We delete the copy and remove the corresponding value number from %1. 1550288943Sdim // Any uses of that value number are marked as <undef>. 1551280031Sdim 1552280031Sdim // Note that we do not query CoalescerPair here but redo isMoveInstr as the 1553280031Sdim // CoalescerPair may have a new register class with adjusted subreg indices 1554280031Sdim // at this point. 1555280031Sdim unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; 1556353358Sdim if(!isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) 1557353358Sdim return nullptr; 1558280031Sdim 1559309124Sdim SlotIndex Idx = LIS->getInstructionIndex(*CopyMI); 1560280031Sdim const LiveInterval &SrcLI = LIS->getInterval(SrcReg); 1561280031Sdim // CopyMI is undef iff SrcReg is not live before the instruction. 1562280031Sdim if (SrcSubIdx != 0 && SrcLI.hasSubRanges()) { 1563296417Sdim LaneBitmask SrcMask = TRI->getSubRegIndexLaneMask(SrcSubIdx); 1564280031Sdim for (const LiveInterval::SubRange &SR : SrcLI.subranges()) { 1565314564Sdim if ((SR.LaneMask & SrcMask).none()) 1566280031Sdim continue; 1567280031Sdim if (SR.liveAt(Idx)) 1568341825Sdim return nullptr; 1569280031Sdim } 1570280031Sdim } else if (SrcLI.liveAt(Idx)) 1571341825Sdim return nullptr; 1572226633Sdim 1573341825Sdim // If the undef copy defines a live-out value (i.e. an input to a PHI def), 1574341825Sdim // then replace it with an IMPLICIT_DEF. 1575341825Sdim LiveInterval &DstLI = LIS->getInterval(DstReg); 1576341825Sdim SlotIndex RegIndex = Idx.getRegSlot(); 1577341825Sdim LiveRange::Segment *Seg = DstLI.getSegmentContaining(RegIndex); 1578341825Sdim assert(Seg != nullptr && "No segment for defining instruction"); 1579341825Sdim if (VNInfo *V = DstLI.getVNInfoAt(Seg->end)) { 1580341825Sdim if (V->isPHIDef()) { 1581341825Sdim CopyMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); 1582341825Sdim for (unsigned i = CopyMI->getNumOperands(); i != 0; --i) { 1583341825Sdim MachineOperand &MO = CopyMI->getOperand(i-1); 1584341825Sdim if (MO.isReg() && MO.isUse()) 1585341825Sdim CopyMI->RemoveOperand(i-1); 1586341825Sdim } 1587341825Sdim LLVM_DEBUG(dbgs() << "\tReplaced copy of <undef> value with an " 1588341825Sdim "implicit def\n"); 1589341825Sdim return CopyMI; 1590341825Sdim } 1591341825Sdim } 1592226633Sdim 1593280031Sdim // Remove any DstReg segments starting at the instruction. 1594341825Sdim LLVM_DEBUG(dbgs() << "\tEliminating copy of <undef> value\n"); 1595341825Sdim 1596280031Sdim // Remove value or merge with previous one in case of a subregister def. 1597280031Sdim if (VNInfo *PrevVNI = DstLI.getVNInfoAt(Idx)) { 1598288943Sdim VNInfo *VNI = DstLI.getVNInfoAt(RegIndex); 1599288943Sdim DstLI.MergeValueNumberInto(VNI, PrevVNI); 1600280031Sdim 1601288943Sdim // The affected subregister segments can be removed. 1602296417Sdim LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx); 1603288943Sdim for (LiveInterval::SubRange &SR : DstLI.subranges()) { 1604314564Sdim if ((SR.LaneMask & DstMask).none()) 1605288943Sdim continue; 1606288943Sdim 1607288943Sdim VNInfo *SVNI = SR.getVNInfoAt(RegIndex); 1608288943Sdim assert(SVNI != nullptr && SlotIndex::isSameInstr(SVNI->def, RegIndex)); 1609288943Sdim SR.removeValNo(SVNI); 1610288943Sdim } 1611288943Sdim DstLI.removeEmptySubRanges(); 1612288943Sdim } else 1613288943Sdim LIS->removeVRegDefAt(DstLI, RegIndex); 1614288943Sdim 1615280031Sdim // Mark uses as undef. 1616280031Sdim for (MachineOperand &MO : MRI->reg_nodbg_operands(DstReg)) { 1617280031Sdim if (MO.isDef() /*|| MO.isUndef()*/) 1618226633Sdim continue; 1619280031Sdim const MachineInstr &MI = *MO.getParent(); 1620309124Sdim SlotIndex UseIdx = LIS->getInstructionIndex(MI); 1621296417Sdim LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg()); 1622280031Sdim bool isLive; 1623314564Sdim if (!UseMask.all() && DstLI.hasSubRanges()) { 1624280031Sdim isLive = false; 1625280031Sdim for (const LiveInterval::SubRange &SR : DstLI.subranges()) { 1626314564Sdim if ((SR.LaneMask & UseMask).none()) 1627280031Sdim continue; 1628280031Sdim if (SR.liveAt(UseIdx)) { 1629280031Sdim isLive = true; 1630280031Sdim break; 1631280031Sdim } 1632280031Sdim } 1633280031Sdim } else 1634280031Sdim isLive = DstLI.liveAt(UseIdx); 1635280031Sdim if (isLive) 1636226633Sdim continue; 1637226633Sdim MO.setIsUndef(true); 1638341825Sdim LLVM_DEBUG(dbgs() << "\tnew undef: " << UseIdx << '\t' << MI); 1639226633Sdim } 1640314564Sdim 1641314564Sdim // A def of a subregister may be a use of the other subregisters, so 1642314564Sdim // deleting a def of a subregister may also remove uses. Since CopyMI 1643314564Sdim // is still part of the function (but about to be erased), mark all 1644314564Sdim // defs of DstReg in it as <undef>, so that shrinkToUses would 1645314564Sdim // ignore them. 1646314564Sdim for (MachineOperand &MO : CopyMI->operands()) 1647314564Sdim if (MO.isReg() && MO.isDef() && MO.getReg() == DstReg) 1648314564Sdim MO.setIsUndef(true); 1649314564Sdim LIS->shrinkToUses(&DstLI); 1650314564Sdim 1651341825Sdim return CopyMI; 1652226633Sdim} 1653226633Sdim 1654309124Sdimvoid RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx, 1655309124Sdim MachineOperand &MO, unsigned SubRegIdx) { 1656309124Sdim LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubRegIdx); 1657309124Sdim if (MO.isDef()) 1658309124Sdim Mask = ~Mask; 1659309124Sdim bool IsUndef = true; 1660309124Sdim for (const LiveInterval::SubRange &S : Int.subranges()) { 1661314564Sdim if ((S.LaneMask & Mask).none()) 1662309124Sdim continue; 1663309124Sdim if (S.liveAt(UseIdx)) { 1664309124Sdim IsUndef = false; 1665309124Sdim break; 1666309124Sdim } 1667309124Sdim } 1668309124Sdim if (IsUndef) { 1669309124Sdim MO.setIsUndef(true); 1670309124Sdim // We found out some subregister use is actually reading an undefined 1671309124Sdim // value. In some cases the whole vreg has become undefined at this 1672309124Sdim // point so we have to potentially shrink the main range if the 1673309124Sdim // use was ending a live segment there. 1674309124Sdim LiveQueryResult Q = Int.Query(UseIdx); 1675309124Sdim if (Q.valueOut() == nullptr) 1676309124Sdim ShrinkMainRange = true; 1677309124Sdim } 1678309124Sdim} 1679309124Sdim 1680360784Sdimvoid RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, unsigned DstReg, 1681239462Sdim unsigned SubIdx) { 1682360784Sdim bool DstIsPhys = Register::isPhysicalRegister(DstReg); 1683276479Sdim LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg); 1684224145Sdim 1685309124Sdim if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) { 1686309124Sdim for (MachineOperand &MO : MRI->reg_operands(DstReg)) { 1687309124Sdim unsigned SubReg = MO.getSubReg(); 1688309124Sdim if (SubReg == 0 || MO.isUndef()) 1689309124Sdim continue; 1690309124Sdim MachineInstr &MI = *MO.getParent(); 1691309124Sdim if (MI.isDebugValue()) 1692309124Sdim continue; 1693309124Sdim SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot(true); 1694309124Sdim addUndefFlag(*DstInt, UseIdx, MO, SubReg); 1695309124Sdim } 1696309124Sdim } 1697309124Sdim 1698243830Sdim SmallPtrSet<MachineInstr*, 8> Visited; 1699276479Sdim for (MachineRegisterInfo::reg_instr_iterator 1700276479Sdim I = MRI->reg_instr_begin(SrcReg), E = MRI->reg_instr_end(); 1701276479Sdim I != E; ) { 1702276479Sdim MachineInstr *UseMI = &*(I++); 1703276479Sdim 1704243830Sdim // Each instruction can only be rewritten once because sub-register 1705243830Sdim // composition is not always idempotent. When SrcReg != DstReg, rewriting 1706243830Sdim // the UseMI operands removes them from the SrcReg use-def chain, but when 1707243830Sdim // SrcReg is DstReg we could encounter UseMI twice if it has multiple 1708243830Sdim // operands mentioning the virtual register. 1709280031Sdim if (SrcReg == DstReg && !Visited.insert(UseMI).second) 1710243830Sdim continue; 1711243830Sdim 1712224145Sdim SmallVector<unsigned,8> Ops; 1713224145Sdim bool Reads, Writes; 1714276479Sdim std::tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops); 1715224145Sdim 1716239462Sdim // If SrcReg wasn't read, it may still be the case that DstReg is live-in 1717239462Sdim // because SrcReg is a sub-register. 1718321369Sdim if (DstInt && !Reads && SubIdx && !UseMI->isDebugValue()) 1719309124Sdim Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI)); 1720239462Sdim 1721224145Sdim // Replace SrcReg with DstReg in all UseMI operands. 1722224145Sdim for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 1723224145Sdim MachineOperand &MO = UseMI->getOperand(Ops[i]); 1724224145Sdim 1725239462Sdim // Adjust <undef> flags in case of sub-register joins. We don't want to 1726239462Sdim // turn a full def into a read-modify-write sub-register def and vice 1727239462Sdim // versa. 1728239462Sdim if (SubIdx && MO.isDef()) 1729239462Sdim MO.setIsUndef(!Reads); 1730226633Sdim 1731280031Sdim // A subreg use of a partially undef (super) register may be a complete 1732280031Sdim // undef use now and then has to be marked that way. 1733288943Sdim if (SubIdx != 0 && MO.isUse() && MRI->shouldTrackSubRegLiveness(DstReg)) { 1734280031Sdim if (!DstInt->hasSubRanges()) { 1735280031Sdim BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); 1736360784Sdim LaneBitmask FullMask = MRI->getMaxLaneMaskForVReg(DstInt->reg); 1737360784Sdim LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx); 1738360784Sdim LaneBitmask UnusedLanes = FullMask & ~UsedLanes; 1739360784Sdim DstInt->createSubRangeFrom(Allocator, UsedLanes, *DstInt); 1740360784Sdim // The unused lanes are just empty live-ranges at this point. 1741360784Sdim // It is the caller responsibility to set the proper 1742360784Sdim // dead segments if there is an actual dead def of the 1743360784Sdim // unused lanes. This may happen with rematerialization. 1744360784Sdim DstInt->createSubRange(Allocator, UnusedLanes); 1745280031Sdim } 1746280031Sdim SlotIndex MIIdx = UseMI->isDebugValue() 1747309124Sdim ? LIS->getSlotIndexes()->getIndexBefore(*UseMI) 1748309124Sdim : LIS->getInstructionIndex(*UseMI); 1749280031Sdim SlotIndex UseIdx = MIIdx.getRegSlot(true); 1750309124Sdim addUndefFlag(*DstInt, UseIdx, MO, SubIdx); 1751280031Sdim } 1752280031Sdim 1753224145Sdim if (DstIsPhys) 1754226633Sdim MO.substPhysReg(DstReg, *TRI); 1755224145Sdim else 1756226633Sdim MO.substVirtReg(DstReg, SubIdx, *TRI); 1757224145Sdim } 1758224145Sdim 1759341825Sdim LLVM_DEBUG({ 1760341825Sdim dbgs() << "\t\tupdated: "; 1761341825Sdim if (!UseMI->isDebugValue()) 1762341825Sdim dbgs() << LIS->getInstructionIndex(*UseMI) << "\t"; 1763341825Sdim dbgs() << *UseMI; 1764341825Sdim }); 1765224145Sdim } 1766224145Sdim} 1767224145Sdim 1768249423Sdimbool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) { 1769288943Sdim // Always join simple intervals that are defined by a single copy from a 1770288943Sdim // reserved register. This doesn't increase register pressure, so it is 1771288943Sdim // always beneficial. 1772243830Sdim if (!MRI->isReserved(CP.getDstReg())) { 1773341825Sdim LLVM_DEBUG(dbgs() << "\tCan only merge into reserved registers.\n"); 1774224145Sdim return false; 1775224145Sdim } 1776224145Sdim 1777239462Sdim LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg()); 1778288943Sdim if (JoinVInt.containsOneValue()) 1779224145Sdim return true; 1780224145Sdim 1781341825Sdim LLVM_DEBUG( 1782341825Sdim dbgs() << "\tCannot join complex intervals into reserved register.\n"); 1783239462Sdim return false; 1784224145Sdim} 1785224145Sdim 1786239462Sdimbool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { 1787224145Sdim Again = false; 1788341825Sdim LLVM_DEBUG(dbgs() << LIS->getInstructionIndex(*CopyMI) << '\t' << *CopyMI); 1789224145Sdim 1790239462Sdim CoalescerPair CP(*TRI); 1791224145Sdim if (!CP.setRegisters(CopyMI)) { 1792341825Sdim LLVM_DEBUG(dbgs() << "\tNot coalescable.\n"); 1793224145Sdim return false; 1794224145Sdim } 1795224145Sdim 1796276479Sdim if (CP.getNewRC()) { 1797276479Sdim auto SrcRC = MRI->getRegClass(CP.getSrcReg()); 1798276479Sdim auto DstRC = MRI->getRegClass(CP.getDstReg()); 1799276479Sdim unsigned SrcIdx = CP.getSrcIdx(); 1800276479Sdim unsigned DstIdx = CP.getDstIdx(); 1801276479Sdim if (CP.isFlipped()) { 1802276479Sdim std::swap(SrcIdx, DstIdx); 1803276479Sdim std::swap(SrcRC, DstRC); 1804276479Sdim } 1805276479Sdim if (!TRI->shouldCoalesce(CopyMI, SrcRC, SrcIdx, DstRC, DstIdx, 1806327952Sdim CP.getNewRC(), *LIS)) { 1807341825Sdim LLVM_DEBUG(dbgs() << "\tSubtarget bailed on coalescing.\n"); 1808276479Sdim return false; 1809276479Sdim } 1810276479Sdim } 1811276479Sdim 1812239462Sdim // Dead code elimination. This really should be handled by MachineDCE, but 1813239462Sdim // sometimes dead copies slip through, and we can't generate invalid live 1814239462Sdim // ranges. 1815239462Sdim if (!CP.isPhys() && CopyMI->allDefsAreDead()) { 1816341825Sdim LLVM_DEBUG(dbgs() << "\tCopy is dead.\n"); 1817239462Sdim DeadDefs.push_back(CopyMI); 1818239462Sdim eliminateDeadDefs(); 1819239462Sdim return true; 1820224145Sdim } 1821224145Sdim 1822226633Sdim // Eliminate undefs. 1823341825Sdim if (!CP.isPhys()) { 1824341825Sdim // If this is an IMPLICIT_DEF, leave it alone, but don't try to coalesce. 1825341825Sdim if (MachineInstr *UndefMI = eliminateUndefCopy(CopyMI)) { 1826341825Sdim if (UndefMI->isImplicitDef()) 1827341825Sdim return false; 1828341825Sdim deleteInstr(CopyMI); 1829341825Sdim return false; // Not coalescable. 1830341825Sdim } 1831226633Sdim } 1832226633Sdim 1833239462Sdim // Coalesced copies are normally removed immediately, but transformations 1834239462Sdim // like removeCopyByCommutingDef() can inadvertently create identity copies. 1835239462Sdim // When that happens, just join the values and remove the copy. 1836239462Sdim if (CP.getSrcReg() == CP.getDstReg()) { 1837239462Sdim LiveInterval &LI = LIS->getInterval(CP.getSrcReg()); 1838341825Sdim LLVM_DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n'); 1839309124Sdim const SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI); 1840280031Sdim LiveQueryResult LRQ = LI.Query(CopyIdx); 1841239462Sdim if (VNInfo *DefVNI = LRQ.valueDefined()) { 1842239462Sdim VNInfo *ReadVNI = LRQ.valueIn(); 1843239462Sdim assert(ReadVNI && "No value before copy and no <undef> flag."); 1844239462Sdim assert(ReadVNI != DefVNI && "Cannot read and define the same value."); 1845239462Sdim LI.MergeValueNumberInto(DefVNI, ReadVNI); 1846280031Sdim 1847280031Sdim // Process subregister liveranges. 1848280031Sdim for (LiveInterval::SubRange &S : LI.subranges()) { 1849280031Sdim LiveQueryResult SLRQ = S.Query(CopyIdx); 1850280031Sdim if (VNInfo *SDefVNI = SLRQ.valueDefined()) { 1851280031Sdim VNInfo *SReadVNI = SLRQ.valueIn(); 1852280031Sdim S.MergeValueNumberInto(SDefVNI, SReadVNI); 1853280031Sdim } 1854280031Sdim } 1855341825Sdim LLVM_DEBUG(dbgs() << "\tMerged values: " << LI << '\n'); 1856239462Sdim } 1857327952Sdim deleteInstr(CopyMI); 1858239462Sdim return true; 1859239462Sdim } 1860224145Sdim 1861224145Sdim // Enforce policies. 1862224145Sdim if (CP.isPhys()) { 1863341825Sdim LLVM_DEBUG(dbgs() << "\tConsidering merging " 1864341825Sdim << printReg(CP.getSrcReg(), TRI) << " with " 1865341825Sdim << printReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n'); 1866239462Sdim if (!canJoinPhys(CP)) { 1867224145Sdim // Before giving up coalescing, if definition of source is defined by 1868224145Sdim // trivial computation, try rematerializing it. 1869261991Sdim bool IsDefCopy; 1870261991Sdim if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy)) 1871224145Sdim return true; 1872261991Sdim if (IsDefCopy) 1873261991Sdim Again = true; // May be possible to coalesce later. 1874224145Sdim return false; 1875224145Sdim } 1876224145Sdim } else { 1877280031Sdim // When possible, let DstReg be the larger interval. 1878280031Sdim if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).size() > 1879280031Sdim LIS->getInterval(CP.getDstReg()).size()) 1880280031Sdim CP.flip(); 1881280031Sdim 1882341825Sdim LLVM_DEBUG({ 1883280031Sdim dbgs() << "\tConsidering merging to " 1884280031Sdim << TRI->getRegClassName(CP.getNewRC()) << " with "; 1885239462Sdim if (CP.getDstIdx() && CP.getSrcIdx()) 1886327952Sdim dbgs() << printReg(CP.getDstReg()) << " in " 1887239462Sdim << TRI->getSubRegIndexName(CP.getDstIdx()) << " and " 1888327952Sdim << printReg(CP.getSrcReg()) << " in " 1889239462Sdim << TRI->getSubRegIndexName(CP.getSrcIdx()) << '\n'; 1890239462Sdim else 1891327952Sdim dbgs() << printReg(CP.getSrcReg(), TRI) << " in " 1892327952Sdim << printReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n'; 1893239462Sdim }); 1894224145Sdim } 1895224145Sdim 1896314564Sdim ShrinkMask = LaneBitmask::getNone(); 1897280031Sdim ShrinkMainRange = false; 1898280031Sdim 1899224145Sdim // Okay, attempt to join these two intervals. On failure, this returns false. 1900224145Sdim // Otherwise, if one of the intervals being joined is a physreg, this method 1901224145Sdim // always canonicalizes DstInt to be it. The output "SrcInt" will not have 1902224145Sdim // been modified, so we can use this information below to update aliases. 1903239462Sdim if (!joinIntervals(CP)) { 1904224145Sdim // Coalescing failed. 1905224145Sdim 1906224145Sdim // If definition of source is defined by trivial computation, try 1907224145Sdim // rematerializing it. 1908261991Sdim bool IsDefCopy; 1909261991Sdim if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy)) 1910224145Sdim return true; 1911224145Sdim 1912261991Sdim // If we can eliminate the copy without merging the live segments, do so 1913261991Sdim // now. 1914239462Sdim if (!CP.isPartial() && !CP.isPhys()) { 1915344779Sdim bool Changed = adjustCopiesBackFrom(CP, CopyMI); 1916344779Sdim bool Shrink = false; 1917344779Sdim if (!Changed) 1918344779Sdim std::tie(Changed, Shrink) = removeCopyByCommutingDef(CP, CopyMI); 1919344779Sdim if (Changed) { 1920327952Sdim deleteInstr(CopyMI); 1921344779Sdim if (Shrink) { 1922344779Sdim unsigned DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg(); 1923344779Sdim LiveInterval &DstLI = LIS->getInterval(DstReg); 1924344779Sdim shrinkToUses(&DstLI); 1925344779Sdim LLVM_DEBUG(dbgs() << "\t\tshrunk: " << DstLI << '\n'); 1926344779Sdim } 1927341825Sdim LLVM_DEBUG(dbgs() << "\tTrivial!\n"); 1928224145Sdim return true; 1929224145Sdim } 1930224145Sdim } 1931224145Sdim 1932321369Sdim // Try and see if we can partially eliminate the copy by moving the copy to 1933321369Sdim // its predecessor. 1934321369Sdim if (!CP.isPartial() && !CP.isPhys()) 1935321369Sdim if (removePartialRedundancy(CP, *CopyMI)) 1936321369Sdim return true; 1937321369Sdim 1938224145Sdim // Otherwise, we are unable to join the intervals. 1939341825Sdim LLVM_DEBUG(dbgs() << "\tInterference!\n"); 1940224145Sdim Again = true; // May be possible to coalesce later. 1941224145Sdim return false; 1942224145Sdim } 1943224145Sdim 1944224145Sdim // Coalescing to a virtual register that is of a sub-register class of the 1945224145Sdim // other. Make sure the resulting register is set to the right register class. 1946224145Sdim if (CP.isCrossClass()) { 1947224145Sdim ++numCrossRCs; 1948226633Sdim MRI->setRegClass(CP.getDstReg(), CP.getNewRC()); 1949224145Sdim } 1950224145Sdim 1951239462Sdim // Removing sub-register copies can ease the register class constraints. 1952239462Sdim // Make sure we attempt to inflate the register class of DstReg. 1953239462Sdim if (!CP.isPhys() && RegClassInfo.isProperSubClass(CP.getNewRC())) 1954239462Sdim InflateRegs.push_back(CP.getDstReg()); 1955224145Sdim 1956239462Sdim // CopyMI has been erased by joinIntervals at this point. Remove it from 1957239462Sdim // ErasedInstrs since copyCoalesceWorkList() won't add a successful join back 1958239462Sdim // to the work list. This keeps ErasedInstrs from growing needlessly. 1959239462Sdim ErasedInstrs.erase(CopyMI); 1960224145Sdim 1961239462Sdim // Rewrite all SrcReg operands to DstReg. 1962239462Sdim // Also update DstReg operands to include DstIdx if it is set. 1963239462Sdim if (CP.getDstIdx()) 1964239462Sdim updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx()); 1965239462Sdim updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx()); 1966224145Sdim 1967280031Sdim // Shrink subregister ranges if necessary. 1968314564Sdim if (ShrinkMask.any()) { 1969280031Sdim LiveInterval &LI = LIS->getInterval(CP.getDstReg()); 1970280031Sdim for (LiveInterval::SubRange &S : LI.subranges()) { 1971314564Sdim if ((S.LaneMask & ShrinkMask).none()) 1972280031Sdim continue; 1973341825Sdim LLVM_DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask) 1974341825Sdim << ")\n"); 1975280031Sdim LIS->shrinkToUses(S, LI.reg); 1976280031Sdim } 1977288943Sdim LI.removeEmptySubRanges(); 1978280031Sdim } 1979344779Sdim 1980344779Sdim // CP.getSrcReg()'s live interval has been merged into CP.getDstReg's live 1981344779Sdim // interval. Since CP.getSrcReg() is in ToBeUpdated set and its live interval 1982344779Sdim // is not up-to-date, need to update the merged live interval here. 1983344779Sdim if (ToBeUpdated.count(CP.getSrcReg())) 1984344779Sdim ShrinkMainRange = true; 1985344779Sdim 1986280031Sdim if (ShrinkMainRange) { 1987280031Sdim LiveInterval &LI = LIS->getInterval(CP.getDstReg()); 1988288943Sdim shrinkToUses(&LI); 1989280031Sdim } 1990280031Sdim 1991234353Sdim // SrcReg is guaranteed to be the register whose live interval that is 1992224145Sdim // being merged. 1993226633Sdim LIS->removeInterval(CP.getSrcReg()); 1994224145Sdim 1995224145Sdim // Update regalloc hint. 1996288943Sdim TRI->updateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF); 1997224145Sdim 1998341825Sdim LLVM_DEBUG({ 1999327952Sdim dbgs() << "\tSuccess: " << printReg(CP.getSrcReg(), TRI, CP.getSrcIdx()) 2000327952Sdim << " -> " << printReg(CP.getDstReg(), TRI, CP.getDstIdx()) << '\n'; 2001280031Sdim dbgs() << "\tResult = "; 2002261991Sdim if (CP.isPhys()) 2003327952Sdim dbgs() << printReg(CP.getDstReg(), TRI); 2004261991Sdim else 2005239462Sdim dbgs() << LIS->getInterval(CP.getDstReg()); 2006261991Sdim dbgs() << '\n'; 2007224145Sdim }); 2008224145Sdim 2009224145Sdim ++numJoins; 2010224145Sdim return true; 2011224145Sdim} 2012224145Sdim 2013239462Sdimbool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { 2014288943Sdim unsigned DstReg = CP.getDstReg(); 2015314564Sdim unsigned SrcReg = CP.getSrcReg(); 2016239462Sdim assert(CP.isPhys() && "Must be a physreg copy"); 2017288943Sdim assert(MRI->isReserved(DstReg) && "Not a reserved register"); 2018314564Sdim LiveInterval &RHS = LIS->getInterval(SrcReg); 2019341825Sdim LLVM_DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n'); 2020239462Sdim 2021288943Sdim assert(RHS.containsOneValue() && "Invalid join with reserved register"); 2022239462Sdim 2023239462Sdim // Optimization for reserved registers like ESP. We can only merge with a 2024288943Sdim // reserved physreg if RHS has a single value that is a copy of DstReg. 2025239462Sdim // The live range of the reserved register will look like a set of dead defs 2026239462Sdim // - we don't properly track the live range of reserved registers. 2027239462Sdim 2028239462Sdim // Deny any overlapping intervals. This depends on all the reserved 2029239462Sdim // register live ranges to look like dead defs. 2030314564Sdim if (!MRI->isConstantPhysReg(DstReg)) { 2031314564Sdim for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) { 2032314564Sdim // Abort if not all the regunits are reserved. 2033314564Sdim for (MCRegUnitRootIterator RI(*UI, TRI); RI.isValid(); ++RI) { 2034314564Sdim if (!MRI->isReserved(*RI)) 2035314564Sdim return false; 2036314564Sdim } 2037314564Sdim if (RHS.overlaps(LIS->getRegUnit(*UI))) { 2038341825Sdim LLVM_DEBUG(dbgs() << "\t\tInterference: " << printRegUnit(*UI, TRI) 2039341825Sdim << '\n'); 2040314564Sdim return false; 2041314564Sdim } 2042239462Sdim } 2043321369Sdim 2044321369Sdim // We must also check for overlaps with regmask clobbers. 2045321369Sdim BitVector RegMaskUsable; 2046321369Sdim if (LIS->checkRegMaskInterference(RHS, RegMaskUsable) && 2047321369Sdim !RegMaskUsable.test(DstReg)) { 2048341825Sdim LLVM_DEBUG(dbgs() << "\t\tRegMask interference\n"); 2049321369Sdim return false; 2050321369Sdim } 2051314564Sdim } 2052239462Sdim 2053239462Sdim // Skip any value computations, we are not adding new values to the 2054239462Sdim // reserved register. Also skip merging the live ranges, the reserved 2055239462Sdim // register live range doesn't need to be accurate as long as all the 2056239462Sdim // defs are there. 2057239462Sdim 2058239462Sdim // Delete the identity copy. 2059288943Sdim MachineInstr *CopyMI; 2060288943Sdim if (CP.isFlipped()) { 2061314564Sdim // Physreg is copied into vreg 2062327952Sdim // %y = COPY %physreg_x 2063353358Sdim // ... //< no other def of %physreg_x here 2064327952Sdim // use %y 2065314564Sdim // => 2066314564Sdim // ... 2067353358Sdim // use %physreg_x 2068314564Sdim CopyMI = MRI->getVRegDef(SrcReg); 2069288943Sdim } else { 2070314564Sdim // VReg is copied into physreg: 2071327952Sdim // %y = def 2072353358Sdim // ... //< no other def or use of %physreg_x here 2073353358Sdim // %physreg_x = COPY %y 2074314564Sdim // => 2075353358Sdim // %physreg_x = def 2076314564Sdim // ... 2077314564Sdim if (!MRI->hasOneNonDBGUse(SrcReg)) { 2078341825Sdim LLVM_DEBUG(dbgs() << "\t\tMultiple vreg uses!\n"); 2079288943Sdim return false; 2080288943Sdim } 2081288943Sdim 2082314564Sdim if (!LIS->intervalIsInOneMBB(RHS)) { 2083341825Sdim LLVM_DEBUG(dbgs() << "\t\tComplex control flow!\n"); 2084314564Sdim return false; 2085314564Sdim } 2086288943Sdim 2087314564Sdim MachineInstr &DestMI = *MRI->getVRegDef(SrcReg); 2088314564Sdim CopyMI = &*MRI->use_instr_nodbg_begin(SrcReg); 2089314564Sdim SlotIndex CopyRegIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot(); 2090314564Sdim SlotIndex DestRegIdx = LIS->getInstructionIndex(DestMI).getRegSlot(); 2091288943Sdim 2092314564Sdim if (!MRI->isConstantPhysReg(DstReg)) { 2093314564Sdim // We checked above that there are no interfering defs of the physical 2094327952Sdim // register. However, for this case, where we intend to move up the def of 2095314564Sdim // the physical register, we also need to check for interfering uses. 2096314564Sdim SlotIndexes *Indexes = LIS->getSlotIndexes(); 2097314564Sdim for (SlotIndex SI = Indexes->getNextNonNullIndex(DestRegIdx); 2098314564Sdim SI != CopyRegIdx; SI = Indexes->getNextNonNullIndex(SI)) { 2099314564Sdim MachineInstr *MI = LIS->getInstructionFromIndex(SI); 2100314564Sdim if (MI->readsRegister(DstReg, TRI)) { 2101341825Sdim LLVM_DEBUG(dbgs() << "\t\tInterference (read): " << *MI); 2102288943Sdim return false; 2103288943Sdim } 2104288943Sdim } 2105288943Sdim } 2106288943Sdim 2107288943Sdim // We're going to remove the copy which defines a physical reserved 2108288943Sdim // register, so remove its valno, etc. 2109341825Sdim LLVM_DEBUG(dbgs() << "\t\tRemoving phys reg def of " 2110341825Sdim << printReg(DstReg, TRI) << " at " << CopyRegIdx << "\n"); 2111288943Sdim 2112288943Sdim LIS->removePhysRegDefAt(DstReg, CopyRegIdx); 2113288943Sdim // Create a new dead def at the new def location. 2114288943Sdim for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) { 2115288943Sdim LiveRange &LR = LIS->getRegUnit(*UI); 2116288943Sdim LR.createDeadDef(DestRegIdx, LIS->getVNInfoAllocator()); 2117288943Sdim } 2118288943Sdim } 2119288943Sdim 2120327952Sdim deleteInstr(CopyMI); 2121239462Sdim 2122239462Sdim // We don't track kills for reserved registers. 2123239462Sdim MRI->clearKillFlags(CP.getSrcReg()); 2124239462Sdim 2125239462Sdim return true; 2126239462Sdim} 2127239462Sdim 2128243830Sdim//===----------------------------------------------------------------------===// 2129243830Sdim// Interference checking and interval joining 2130243830Sdim//===----------------------------------------------------------------------===// 2131243830Sdim// 2132243830Sdim// In the easiest case, the two live ranges being joined are disjoint, and 2133243830Sdim// there is no interference to consider. It is quite common, though, to have 2134243830Sdim// overlapping live ranges, and we need to check if the interference can be 2135243830Sdim// resolved. 2136243830Sdim// 2137243830Sdim// The live range of a single SSA value forms a sub-tree of the dominator tree. 2138243830Sdim// This means that two SSA values overlap if and only if the def of one value 2139243830Sdim// is contained in the live range of the other value. As a special case, the 2140243830Sdim// overlapping values can be defined at the same index. 2141243830Sdim// 2142243830Sdim// The interference from an overlapping def can be resolved in these cases: 2143243830Sdim// 2144243830Sdim// 1. Coalescable copies. The value is defined by a copy that would become an 2145243830Sdim// identity copy after joining SrcReg and DstReg. The copy instruction will 2146243830Sdim// be removed, and the value will be merged with the source value. 2147243830Sdim// 2148243830Sdim// There can be several copies back and forth, causing many values to be 2149243830Sdim// merged into one. We compute a list of ultimate values in the joined live 2150243830Sdim// range as well as a mappings from the old value numbers. 2151243830Sdim// 2152243830Sdim// 2. IMPLICIT_DEF. This instruction is only inserted to ensure all PHI 2153243830Sdim// predecessors have a live out value. It doesn't cause real interference, 2154243830Sdim// and can be merged into the value it overlaps. Like a coalescable copy, it 2155243830Sdim// can be erased after joining. 2156243830Sdim// 2157243830Sdim// 3. Copy of external value. The overlapping def may be a copy of a value that 2158243830Sdim// is already in the other register. This is like a coalescable copy, but 2159243830Sdim// the live range of the source register must be trimmed after erasing the 2160243830Sdim// copy instruction: 2161243830Sdim// 2162243830Sdim// %src = COPY %ext 2163243830Sdim// %dst = COPY %ext <-- Remove this COPY, trim the live range of %ext. 2164243830Sdim// 2165243830Sdim// 4. Clobbering undefined lanes. Vector registers are sometimes built by 2166243830Sdim// defining one lane at a time: 2167243830Sdim// 2168243830Sdim// %dst:ssub0<def,read-undef> = FOO 2169243830Sdim// %src = BAR 2170327952Sdim// %dst:ssub1 = COPY %src 2171243830Sdim// 2172243830Sdim// The live range of %src overlaps the %dst value defined by FOO, but 2173243830Sdim// merging %src into %dst:ssub1 is only going to clobber the ssub1 lane 2174243830Sdim// which was undef anyway. 2175243830Sdim// 2176243830Sdim// The value mapping is more complicated in this case. The final live range 2177243830Sdim// will have different value numbers for both FOO and BAR, but there is no 2178243830Sdim// simple mapping from old to new values. It may even be necessary to add 2179243830Sdim// new PHI values. 2180243830Sdim// 2181243830Sdim// 5. Clobbering dead lanes. A def may clobber a lane of a vector register that 2182243830Sdim// is live, but never read. This can happen because we don't compute 2183243830Sdim// individual live ranges per lane. 2184243830Sdim// 2185327952Sdim// %dst = FOO 2186243830Sdim// %src = BAR 2187327952Sdim// %dst:ssub1 = COPY %src 2188243830Sdim// 2189243830Sdim// This kind of interference is only resolved locally. If the clobbered 2190243830Sdim// lane value escapes the block, the join is aborted. 2191224145Sdim 2192243830Sdimnamespace { 2193327952Sdim 2194243830Sdim/// Track information about values in a single virtual register about to be 2195243830Sdim/// joined. Objects of this class are always created in pairs - one for each 2196280031Sdim/// side of the CoalescerPair (or one for each lane of a side of the coalescer 2197280031Sdim/// pair) 2198243830Sdimclass JoinVals { 2199280031Sdim /// Live range we work on. 2200280031Sdim LiveRange &LR; 2201327952Sdim 2202280031Sdim /// (Main) register we work on. 2203280031Sdim const unsigned Reg; 2204224145Sdim 2205288943Sdim /// Reg (and therefore the values in this liverange) will end up as 2206288943Sdim /// subregister SubIdx in the coalesced register. Either CP.DstIdx or 2207288943Sdim /// CP.SrcIdx. 2208280031Sdim const unsigned SubIdx; 2209327952Sdim 2210288943Sdim /// The LaneMask that this liverange will occupy the coalesced register. May 2211288943Sdim /// be smaller than the lanemask produced by SubIdx when merging subranges. 2212296417Sdim const LaneBitmask LaneMask; 2213224145Sdim 2214280031Sdim /// This is true when joining sub register ranges, false when joining main 2215280031Sdim /// ranges. 2216280031Sdim const bool SubRangeJoin; 2217327952Sdim 2218280031Sdim /// Whether the current LiveInterval tracks subregister liveness. 2219280031Sdim const bool TrackSubRegLiveness; 2220280031Sdim 2221288943Sdim /// Values that will be present in the final live range. 2222243830Sdim SmallVectorImpl<VNInfo*> &NewVNInfo; 2223224145Sdim 2224243830Sdim const CoalescerPair &CP; 2225243830Sdim LiveIntervals *LIS; 2226243830Sdim SlotIndexes *Indexes; 2227243830Sdim const TargetRegisterInfo *TRI; 2228224145Sdim 2229288943Sdim /// Value number assignments. Maps value numbers in LI to entries in 2230288943Sdim /// NewVNInfo. This is suitable for passing to LiveInterval::join(). 2231243830Sdim SmallVector<int, 8> Assignments; 2232224145Sdim 2233360784Sdim public: 2234288943Sdim /// Conflict resolution for overlapping values. 2235243830Sdim enum ConflictResolution { 2236288943Sdim /// No overlap, simply keep this value. 2237243830Sdim CR_Keep, 2238224145Sdim 2239288943Sdim /// Merge this value into OtherVNI and erase the defining instruction. 2240288943Sdim /// Used for IMPLICIT_DEF, coalescable copies, and copies from external 2241288943Sdim /// values. 2242243830Sdim CR_Erase, 2243224145Sdim 2244288943Sdim /// Merge this value into OtherVNI but keep the defining instruction. 2245288943Sdim /// This is for the special case where OtherVNI is defined by the same 2246288943Sdim /// instruction. 2247243830Sdim CR_Merge, 2248224145Sdim 2249288943Sdim /// Keep this value, and have it replace OtherVNI where possible. This 2250288943Sdim /// complicates value mapping since OtherVNI maps to two different values 2251288943Sdim /// before and after this def. 2252288943Sdim /// Used when clobbering undefined or dead lanes. 2253243830Sdim CR_Replace, 2254224145Sdim 2255288943Sdim /// Unresolved conflict. Visit later when all values have been mapped. 2256243830Sdim CR_Unresolved, 2257224145Sdim 2258288943Sdim /// Unresolvable conflict. Abort the join. 2259243830Sdim CR_Impossible 2260243830Sdim }; 2261224145Sdim 2262360784Sdim private: 2263288943Sdim /// Per-value info for LI. The lane bit masks are all relative to the final 2264288943Sdim /// joined register, so they can be compared directly between SrcReg and 2265288943Sdim /// DstReg. 2266243830Sdim struct Val { 2267327952Sdim ConflictResolution Resolution = CR_Keep; 2268224145Sdim 2269288943Sdim /// Lanes written by this def, 0 for unanalyzed values. 2270296417Sdim LaneBitmask WriteLanes; 2271224145Sdim 2272288943Sdim /// Lanes with defined values in this register. Other lanes are undef and 2273288943Sdim /// safe to clobber. 2274296417Sdim LaneBitmask ValidLanes; 2275224145Sdim 2276288943Sdim /// Value in LI being redefined by this def. 2277327952Sdim VNInfo *RedefVNI = nullptr; 2278224145Sdim 2279288943Sdim /// Value in the other live range that overlaps this def, if any. 2280327952Sdim VNInfo *OtherVNI = nullptr; 2281239462Sdim 2282288943Sdim /// Is this value an IMPLICIT_DEF that can be erased? 2283288943Sdim /// 2284288943Sdim /// IMPLICIT_DEF values should only exist at the end of a basic block that 2285288943Sdim /// is a predecessor to a phi-value. These IMPLICIT_DEF instructions can be 2286288943Sdim /// safely erased if they are overlapping a live value in the other live 2287288943Sdim /// interval. 2288288943Sdim /// 2289288943Sdim /// Weird control flow graphs and incomplete PHI handling in 2290288943Sdim /// ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with 2291288943Sdim /// longer live ranges. Such IMPLICIT_DEF values should be treated like 2292288943Sdim /// normal values. 2293327952Sdim bool ErasableImplicitDef = false; 2294224145Sdim 2295288943Sdim /// True when the live range of this value will be pruned because of an 2296288943Sdim /// overlapping CR_Replace value in the other live range. 2297327952Sdim bool Pruned = false; 2298224145Sdim 2299288943Sdim /// True once Pruned above has been computed. 2300327952Sdim bool PrunedComputed = false; 2301224145Sdim 2302341825Sdim /// True if this value is determined to be identical to OtherVNI 2303341825Sdim /// (in valuesIdentical). This is used with CR_Erase where the erased 2304341825Sdim /// copy is redundant, i.e. the source value is already the same as 2305341825Sdim /// the destination. In such cases the subranges need to be updated 2306341825Sdim /// properly. See comment at pruneSubRegValues for more info. 2307341825Sdim bool Identical = false; 2308341825Sdim 2309327952Sdim Val() = default; 2310224145Sdim 2311314564Sdim bool isAnalyzed() const { return WriteLanes.any(); } 2312243830Sdim }; 2313224145Sdim 2314288943Sdim /// One entry per value number in LI. 2315243830Sdim SmallVector<Val, 8> Vals; 2316224145Sdim 2317288943Sdim /// Compute the bitmask of lanes actually written by DefMI. 2318288943Sdim /// Set Redef if there are any partial register definitions that depend on the 2319288943Sdim /// previous value of the register. 2320296417Sdim LaneBitmask computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const; 2321288943Sdim 2322288943Sdim /// Find the ultimate value that VNI was copied from. 2323280031Sdim std::pair<const VNInfo*,unsigned> followCopyChain(const VNInfo *VNI) const; 2324288943Sdim 2325341825Sdim bool valuesIdentical(VNInfo *Value0, VNInfo *Value1, const JoinVals &Other) const; 2326288943Sdim 2327288943Sdim /// Analyze ValNo in this live range, and set all fields of Vals[ValNo]. 2328288943Sdim /// Return a conflict resolution when possible, but leave the hard cases as 2329288943Sdim /// CR_Unresolved. 2330288943Sdim /// Recursively calls computeAssignment() on this and Other, guaranteeing that 2331288943Sdim /// both OtherVNI and RedefVNI have been analyzed and mapped before returning. 2332288943Sdim /// The recursion always goes upwards in the dominator tree, making loops 2333288943Sdim /// impossible. 2334243830Sdim ConflictResolution analyzeValue(unsigned ValNo, JoinVals &Other); 2335288943Sdim 2336288943Sdim /// Compute the value assignment for ValNo in RI. 2337288943Sdim /// This may be called recursively by analyzeValue(), but never for a ValNo on 2338288943Sdim /// the stack. 2339243830Sdim void computeAssignment(unsigned ValNo, JoinVals &Other); 2340288943Sdim 2341288943Sdim /// Assuming ValNo is going to clobber some valid lanes in Other.LR, compute 2342288943Sdim /// the extent of the tainted lanes in the block. 2343288943Sdim /// 2344288943Sdim /// Multiple values in Other.LR can be affected since partial redefinitions 2345288943Sdim /// can preserve previously tainted lanes. 2346288943Sdim /// 2347288943Sdim /// 1 %dst = VLOAD <-- Define all lanes in %dst 2348288943Sdim /// 2 %src = FOO <-- ValNo to be joined with %dst:ssub0 2349288943Sdim /// 3 %dst:ssub1 = BAR <-- Partial redef doesn't clear taint in ssub0 2350288943Sdim /// 4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read 2351288943Sdim /// 2352288943Sdim /// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes) 2353288943Sdim /// entry to TaintedVals. 2354288943Sdim /// 2355288943Sdim /// Returns false if the tainted lanes extend beyond the basic block. 2356327952Sdim bool 2357327952Sdim taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other, 2358327952Sdim SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent); 2359288943Sdim 2360288943Sdim /// Return true if MI uses any of the given Lanes from Reg. 2361288943Sdim /// This does not include partial redefinitions of Reg. 2362309124Sdim bool usesLanes(const MachineInstr &MI, unsigned, unsigned, LaneBitmask) const; 2363288943Sdim 2364288943Sdim /// Determine if ValNo is a copy of a value number in LR or Other.LR that will 2365288943Sdim /// be pruned: 2366288943Sdim /// 2367288943Sdim /// %dst = COPY %src 2368288943Sdim /// %src = COPY %dst <-- This value to be pruned. 2369288943Sdim /// %dst = COPY %src <-- This value is a copy of a pruned value. 2370243830Sdim bool isPrunedValue(unsigned ValNo, JoinVals &Other); 2371243830Sdim 2372243830Sdimpublic: 2373296417Sdim JoinVals(LiveRange &LR, unsigned Reg, unsigned SubIdx, LaneBitmask LaneMask, 2374280031Sdim SmallVectorImpl<VNInfo*> &newVNInfo, const CoalescerPair &cp, 2375280031Sdim LiveIntervals *lis, const TargetRegisterInfo *TRI, bool SubRangeJoin, 2376280031Sdim bool TrackSubRegLiveness) 2377280031Sdim : LR(LR), Reg(Reg), SubIdx(SubIdx), LaneMask(LaneMask), 2378280031Sdim SubRangeJoin(SubRangeJoin), TrackSubRegLiveness(TrackSubRegLiveness), 2379280031Sdim NewVNInfo(newVNInfo), CP(cp), LIS(lis), Indexes(LIS->getSlotIndexes()), 2380327952Sdim TRI(TRI), Assignments(LR.getNumValNums(), -1), Vals(LR.getNumValNums()) {} 2381243830Sdim 2382280031Sdim /// Analyze defs in LR and compute a value mapping in NewVNInfo. 2383243830Sdim /// Returns false if any conflicts were impossible to resolve. 2384243830Sdim bool mapValues(JoinVals &Other); 2385243830Sdim 2386243830Sdim /// Try to resolve conflicts that require all values to be mapped. 2387243830Sdim /// Returns false if any conflicts were impossible to resolve. 2388243830Sdim bool resolveConflicts(JoinVals &Other); 2389243830Sdim 2390280031Sdim /// Prune the live range of values in Other.LR where they would conflict with 2391280031Sdim /// CR_Replace values in LR. Collect end points for restoring the live range 2392243830Sdim /// after joining. 2393280031Sdim void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints, 2394280031Sdim bool changeInstrs); 2395243830Sdim 2396288943Sdim /// Removes subranges starting at copies that get removed. This sometimes 2397288943Sdim /// happens when undefined subranges are copied around. These ranges contain 2398296417Sdim /// no useful information and can be removed. 2399296417Sdim void pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask); 2400280031Sdim 2401314564Sdim /// Pruning values in subranges can lead to removing segments in these 2402314564Sdim /// subranges started by IMPLICIT_DEFs. The corresponding segments in 2403314564Sdim /// the main range also need to be removed. This function will mark 2404314564Sdim /// the corresponding values in the main range as pruned, so that 2405314564Sdim /// eraseInstrs can do the final cleanup. 2406314564Sdim /// The parameter @p LI must be the interval whose main range is the 2407314564Sdim /// live range LR. 2408314564Sdim void pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange); 2409314564Sdim 2410243830Sdim /// Erase any machine instructions that have been coalesced away. 2411243830Sdim /// Add erased instructions to ErasedInstrs. 2412243830Sdim /// Add foreign virtual registers to ShrinkRegs if their live range ended at 2413243830Sdim /// the erased instrs. 2414280031Sdim void eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs, 2415314564Sdim SmallVectorImpl<unsigned> &ShrinkRegs, 2416314564Sdim LiveInterval *LI = nullptr); 2417243830Sdim 2418288943Sdim /// Remove liverange defs at places where implicit defs will be removed. 2419288943Sdim void removeImplicitDefs(); 2420288943Sdim 2421243830Sdim /// Get the value assignments suitable for passing to LiveInterval::join. 2422243830Sdim const int *getAssignments() const { return Assignments.data(); } 2423360784Sdim 2424360784Sdim /// Get the conflict resolution for a value number. 2425360784Sdim ConflictResolution getResolution(unsigned Num) const { 2426360784Sdim return Vals[Num].Resolution; 2427360784Sdim } 2428243830Sdim}; 2429327952Sdim 2430243830Sdim} // end anonymous namespace 2431243830Sdim 2432296417SdimLaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) 2433280031Sdim const { 2434314564Sdim LaneBitmask L; 2435288943Sdim for (const MachineOperand &MO : DefMI->operands()) { 2436288943Sdim if (!MO.isReg() || MO.getReg() != Reg || !MO.isDef()) 2437224145Sdim continue; 2438243830Sdim L |= TRI->getSubRegIndexLaneMask( 2439288943Sdim TRI->composeSubRegIndices(SubIdx, MO.getSubReg())); 2440288943Sdim if (MO.readsReg()) 2441243830Sdim Redef = true; 2442243830Sdim } 2443243830Sdim return L; 2444243830Sdim} 2445224145Sdim 2446280031Sdimstd::pair<const VNInfo*, unsigned> JoinVals::followCopyChain( 2447280031Sdim const VNInfo *VNI) const { 2448341825Sdim unsigned TrackReg = Reg; 2449280031Sdim 2450243830Sdim while (!VNI->isPHIDef()) { 2451280031Sdim SlotIndex Def = VNI->def; 2452280031Sdim MachineInstr *MI = Indexes->getInstructionFromIndex(Def); 2453243830Sdim assert(MI && "No defining instruction"); 2454243830Sdim if (!MI->isFullCopy()) 2455341825Sdim return std::make_pair(VNI, TrackReg); 2456360784Sdim Register SrcReg = MI->getOperand(1).getReg(); 2457360784Sdim if (!Register::isVirtualRegister(SrcReg)) 2458341825Sdim return std::make_pair(VNI, TrackReg); 2459280031Sdim 2460280031Sdim const LiveInterval &LI = LIS->getInterval(SrcReg); 2461280031Sdim const VNInfo *ValueIn; 2462280031Sdim // No subrange involved. 2463280031Sdim if (!SubRangeJoin || !LI.hasSubRanges()) { 2464280031Sdim LiveQueryResult LRQ = LI.Query(Def); 2465280031Sdim ValueIn = LRQ.valueIn(); 2466280031Sdim } else { 2467341825Sdim // Query subranges. Ensure that all matching ones take us to the same def 2468341825Sdim // (allowing some of them to be undef). 2469280031Sdim ValueIn = nullptr; 2470280031Sdim for (const LiveInterval::SubRange &S : LI.subranges()) { 2471280031Sdim // Transform lanemask to a mask in the joined live interval. 2472296417Sdim LaneBitmask SMask = TRI->composeSubRegIndexLaneMask(SubIdx, S.LaneMask); 2473314564Sdim if ((SMask & LaneMask).none()) 2474280031Sdim continue; 2475280031Sdim LiveQueryResult LRQ = S.Query(Def); 2476341825Sdim if (!ValueIn) { 2477341825Sdim ValueIn = LRQ.valueIn(); 2478341825Sdim continue; 2479341825Sdim } 2480341825Sdim if (LRQ.valueIn() && ValueIn != LRQ.valueIn()) 2481341825Sdim return std::make_pair(VNI, TrackReg); 2482280031Sdim } 2483280031Sdim } 2484341825Sdim if (ValueIn == nullptr) { 2485341825Sdim // Reaching an undefined value is legitimate, for example: 2486341825Sdim // 2487341825Sdim // 1 undef %0.sub1 = ... ;; %0.sub0 == undef 2488341825Sdim // 2 %1 = COPY %0 ;; %1 is defined here. 2489341825Sdim // 3 %0 = COPY %1 ;; Now %0.sub0 has a definition, 2490341825Sdim // ;; but it's equivalent to "undef". 2491341825Sdim return std::make_pair(nullptr, SrcReg); 2492341825Sdim } 2493280031Sdim VNI = ValueIn; 2494341825Sdim TrackReg = SrcReg; 2495224145Sdim } 2496341825Sdim return std::make_pair(VNI, TrackReg); 2497243830Sdim} 2498224145Sdim 2499280031Sdimbool JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1, 2500280031Sdim const JoinVals &Other) const { 2501280031Sdim const VNInfo *Orig0; 2502280031Sdim unsigned Reg0; 2503280031Sdim std::tie(Orig0, Reg0) = followCopyChain(Value0); 2504341825Sdim if (Orig0 == Value1 && Reg0 == Other.Reg) 2505280031Sdim return true; 2506280031Sdim 2507280031Sdim const VNInfo *Orig1; 2508280031Sdim unsigned Reg1; 2509280031Sdim std::tie(Orig1, Reg1) = Other.followCopyChain(Value1); 2510341825Sdim // If both values are undefined, and the source registers are the same 2511341825Sdim // register, the values are identical. Filter out cases where only one 2512341825Sdim // value is defined. 2513341825Sdim if (Orig0 == nullptr || Orig1 == nullptr) 2514341825Sdim return Orig0 == Orig1 && Reg0 == Reg1; 2515280031Sdim 2516280031Sdim // The values are equal if they are defined at the same place and use the 2517280031Sdim // same register. Note that we cannot compare VNInfos directly as some of 2518280031Sdim // them might be from a copy created in mergeSubRangeInto() while the other 2519280031Sdim // is from the original LiveInterval. 2520280031Sdim return Orig0->def == Orig1->def && Reg0 == Reg1; 2521280031Sdim} 2522280031Sdim 2523243830SdimJoinVals::ConflictResolution 2524243830SdimJoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { 2525243830Sdim Val &V = Vals[ValNo]; 2526243830Sdim assert(!V.isAnalyzed() && "Value has already been analyzed!"); 2527280031Sdim VNInfo *VNI = LR.getValNumInfo(ValNo); 2528243830Sdim if (VNI->isUnused()) { 2529314564Sdim V.WriteLanes = LaneBitmask::getAll(); 2530243830Sdim return CR_Keep; 2531243830Sdim } 2532224145Sdim 2533243830Sdim // Get the instruction defining this value, compute the lanes written. 2534276479Sdim const MachineInstr *DefMI = nullptr; 2535243830Sdim if (VNI->isPHIDef()) { 2536243830Sdim // Conservatively assume that all lanes in a PHI are valid. 2537327952Sdim LaneBitmask Lanes = SubRangeJoin ? LaneBitmask::getLane(0) 2538314564Sdim : TRI->getSubRegIndexLaneMask(SubIdx); 2539280031Sdim V.ValidLanes = V.WriteLanes = Lanes; 2540243830Sdim } else { 2541243830Sdim DefMI = Indexes->getInstructionFromIndex(VNI->def); 2542280031Sdim assert(DefMI != nullptr); 2543280031Sdim if (SubRangeJoin) { 2544280031Sdim // We don't care about the lanes when joining subregister ranges. 2545327952Sdim V.WriteLanes = V.ValidLanes = LaneBitmask::getLane(0); 2546288943Sdim if (DefMI->isImplicitDef()) { 2547314564Sdim V.ValidLanes = LaneBitmask::getNone(); 2548288943Sdim V.ErasableImplicitDef = true; 2549288943Sdim } 2550280031Sdim } else { 2551280031Sdim bool Redef = false; 2552280031Sdim V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef); 2553224145Sdim 2554280031Sdim // If this is a read-modify-write instruction, there may be more valid 2555280031Sdim // lanes than the ones written by this instruction. 2556280031Sdim // This only covers partial redef operands. DefMI may have normal use 2557280031Sdim // operands reading the register. They don't contribute valid lanes. 2558280031Sdim // 2559280031Sdim // This adds ssub1 to the set of valid lanes in %src: 2560280031Sdim // 2561327952Sdim // %src:ssub1 = FOO 2562280031Sdim // 2563280031Sdim // This leaves only ssub1 valid, making any other lanes undef: 2564280031Sdim // 2565280031Sdim // %src:ssub1<def,read-undef> = FOO %src:ssub2 2566280031Sdim // 2567280031Sdim // The <read-undef> flag on the def operand means that old lane values are 2568280031Sdim // not important. 2569280031Sdim if (Redef) { 2570280031Sdim V.RedefVNI = LR.Query(VNI->def).valueIn(); 2571280031Sdim assert((TrackSubRegLiveness || V.RedefVNI) && 2572280031Sdim "Instruction is reading nonexistent value"); 2573280031Sdim if (V.RedefVNI != nullptr) { 2574280031Sdim computeAssignment(V.RedefVNI->id, Other); 2575280031Sdim V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes; 2576280031Sdim } 2577280031Sdim } 2578224145Sdim 2579280031Sdim // An IMPLICIT_DEF writes undef values. 2580280031Sdim if (DefMI->isImplicitDef()) { 2581280031Sdim // We normally expect IMPLICIT_DEF values to be live only until the end 2582280031Sdim // of their block. If the value is really live longer and gets pruned in 2583280031Sdim // another block, this flag is cleared again. 2584344779Sdim // 2585344779Sdim // Clearing the valid lanes is deferred until it is sure this can be 2586344779Sdim // erased. 2587280031Sdim V.ErasableImplicitDef = true; 2588280031Sdim } 2589243830Sdim } 2590224145Sdim } 2591224145Sdim 2592243830Sdim // Find the value in Other that overlaps VNI->def, if any. 2593280031Sdim LiveQueryResult OtherLRQ = Other.LR.Query(VNI->def); 2594224145Sdim 2595243830Sdim // It is possible that both values are defined by the same instruction, or 2596243830Sdim // the values are PHIs defined in the same block. When that happens, the two 2597243830Sdim // values should be merged into one, but not into any preceding value. 2598243830Sdim // The first value defined or visited gets CR_Keep, the other gets CR_Merge. 2599243830Sdim if (VNInfo *OtherVNI = OtherLRQ.valueDefined()) { 2600243830Sdim assert(SlotIndex::isSameInstr(VNI->def, OtherVNI->def) && "Broken LRQ"); 2601243830Sdim 2602243830Sdim // One value stays, the other is merged. Keep the earlier one, or the first 2603243830Sdim // one we see. 2604243830Sdim if (OtherVNI->def < VNI->def) 2605243830Sdim Other.computeAssignment(OtherVNI->id, *this); 2606243830Sdim else if (VNI->def < OtherVNI->def && OtherLRQ.valueIn()) { 2607243830Sdim // This is an early-clobber def overlapping a live-in value in the other 2608243830Sdim // register. Not mergeable. 2609243830Sdim V.OtherVNI = OtherLRQ.valueIn(); 2610243830Sdim return CR_Impossible; 2611243830Sdim } 2612243830Sdim V.OtherVNI = OtherVNI; 2613243830Sdim Val &OtherV = Other.Vals[OtherVNI->id]; 2614243830Sdim // Keep this value, check for conflicts when analyzing OtherVNI. 2615243830Sdim if (!OtherV.isAnalyzed()) 2616243830Sdim return CR_Keep; 2617243830Sdim // Both sides have been analyzed now. 2618243830Sdim // Allow overlapping PHI values. Any real interference would show up in a 2619243830Sdim // predecessor, the PHI itself can't introduce any conflicts. 2620243830Sdim if (VNI->isPHIDef()) 2621243830Sdim return CR_Merge; 2622314564Sdim if ((V.ValidLanes & OtherV.ValidLanes).any()) 2623243830Sdim // Overlapping lanes can't be resolved. 2624243830Sdim return CR_Impossible; 2625243830Sdim else 2626243830Sdim return CR_Merge; 2627224145Sdim } 2628224145Sdim 2629243830Sdim // No simultaneous def. Is Other live at the def? 2630243830Sdim V.OtherVNI = OtherLRQ.valueIn(); 2631243830Sdim if (!V.OtherVNI) 2632243830Sdim // No overlap, no conflict. 2633243830Sdim return CR_Keep; 2634243830Sdim 2635243830Sdim assert(!SlotIndex::isSameInstr(VNI->def, V.OtherVNI->def) && "Broken LRQ"); 2636243830Sdim 2637243830Sdim // We have overlapping values, or possibly a kill of Other. 2638243830Sdim // Recursively compute assignments up the dominator tree. 2639243830Sdim Other.computeAssignment(V.OtherVNI->id, *this); 2640249423Sdim Val &OtherV = Other.Vals[V.OtherVNI->id]; 2641243830Sdim 2642344779Sdim if (OtherV.ErasableImplicitDef) { 2643344779Sdim // Check if OtherV is an IMPLICIT_DEF that extends beyond its basic block. 2644344779Sdim // This shouldn't normally happen, but ProcessImplicitDefs can leave such 2645344779Sdim // IMPLICIT_DEF instructions behind, and there is nothing wrong with it 2646344779Sdim // technically. 2647344779Sdim // 2648344779Sdim // When it happens, treat that IMPLICIT_DEF as a normal value, and don't try 2649344779Sdim // to erase the IMPLICIT_DEF instruction. 2650344779Sdim if (DefMI && 2651344779Sdim DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)) { 2652344779Sdim LLVM_DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def 2653344779Sdim << " extends into " 2654344779Sdim << printMBBReference(*DefMI->getParent()) 2655344779Sdim << ", keeping it.\n"); 2656344779Sdim OtherV.ErasableImplicitDef = false; 2657344779Sdim } else { 2658344779Sdim // We deferred clearing these lanes in case we needed to save them 2659344779Sdim OtherV.ValidLanes &= ~OtherV.WriteLanes; 2660344779Sdim } 2661249423Sdim } 2662249423Sdim 2663243830Sdim // Allow overlapping PHI values. Any real interference would show up in a 2664243830Sdim // predecessor, the PHI itself can't introduce any conflicts. 2665243830Sdim if (VNI->isPHIDef()) 2666243830Sdim return CR_Replace; 2667243830Sdim 2668243830Sdim // Check for simple erasable conflicts. 2669280031Sdim if (DefMI->isImplicitDef()) { 2670280031Sdim // We need the def for the subregister if there is nothing else live at the 2671280031Sdim // subrange at this point. 2672280031Sdim if (TrackSubRegLiveness 2673314564Sdim && (V.WriteLanes & (OtherV.ValidLanes | OtherV.WriteLanes)).none()) 2674280031Sdim return CR_Replace; 2675243830Sdim return CR_Erase; 2676280031Sdim } 2677243830Sdim 2678243830Sdim // Include the non-conflict where DefMI is a coalescable copy that kills 2679243830Sdim // OtherVNI. We still want the copy erased and value numbers merged. 2680243830Sdim if (CP.isCoalescable(DefMI)) { 2681243830Sdim // Some of the lanes copied from OtherVNI may be undef, making them undef 2682243830Sdim // here too. 2683243830Sdim V.ValidLanes &= ~V.WriteLanes | OtherV.ValidLanes; 2684243830Sdim return CR_Erase; 2685224145Sdim } 2686224145Sdim 2687243830Sdim // This may not be a real conflict if DefMI simply kills Other and defines 2688243830Sdim // VNI. 2689243830Sdim if (OtherLRQ.isKill() && OtherLRQ.endPoint() <= VNI->def) 2690243830Sdim return CR_Keep; 2691224145Sdim 2692243830Sdim // Handle the case where VNI and OtherVNI can be proven to be identical: 2693243830Sdim // 2694243830Sdim // %other = COPY %ext 2695243830Sdim // %this = COPY %ext <-- Erase this copy 2696243830Sdim // 2697341825Sdim if (DefMI->isFullCopy() && !CP.isPartial() && 2698341825Sdim valuesIdentical(VNI, V.OtherVNI, Other)) { 2699341825Sdim V.Identical = true; 2700243830Sdim return CR_Erase; 2701341825Sdim } 2702239462Sdim 2703344779Sdim // The remaining checks apply to the lanes, which aren't tracked here. This 2704344779Sdim // was already decided to be OK via the following CR_Replace condition. 2705344779Sdim // CR_Replace. 2706344779Sdim if (SubRangeJoin) 2707344779Sdim return CR_Replace; 2708344779Sdim 2709243830Sdim // If the lanes written by this instruction were all undef in OtherVNI, it is 2710243830Sdim // still safe to join the live ranges. This can't be done with a simple value 2711243830Sdim // mapping, though - OtherVNI will map to multiple values: 2712243830Sdim // 2713243830Sdim // 1 %dst:ssub0 = FOO <-- OtherVNI 2714243830Sdim // 2 %src = BAR <-- VNI 2715327952Sdim // 3 %dst:ssub1 = COPY killed %src <-- Eliminate this copy. 2716327952Sdim // 4 BAZ killed %dst 2717327952Sdim // 5 QUUX killed %src 2718243830Sdim // 2719243830Sdim // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace 2720243830Sdim // handles this complex value mapping. 2721314564Sdim if ((V.WriteLanes & OtherV.ValidLanes).none()) 2722243830Sdim return CR_Replace; 2723243830Sdim 2724243830Sdim // If the other live range is killed by DefMI and the live ranges are still 2725243830Sdim // overlapping, it must be because we're looking at an early clobber def: 2726243830Sdim // 2727327952Sdim // %dst<def,early-clobber> = ASM killed %src 2728243830Sdim // 2729243830Sdim // In this case, it is illegal to merge the two live ranges since the early 2730243830Sdim // clobber def would clobber %src before it was read. 2731243830Sdim if (OtherLRQ.isKill()) { 2732243830Sdim // This case where the def doesn't overlap the kill is handled above. 2733243830Sdim assert(VNI->def.isEarlyClobber() && 2734243830Sdim "Only early clobber defs can overlap a kill"); 2735243830Sdim return CR_Impossible; 2736243830Sdim } 2737243830Sdim 2738243830Sdim // VNI is clobbering live lanes in OtherVNI, but there is still the 2739243830Sdim // possibility that no instructions actually read the clobbered lanes. 2740243830Sdim // If we're clobbering all the lanes in OtherVNI, at least one must be read. 2741280031Sdim // Otherwise Other.RI wouldn't be live here. 2742314564Sdim if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes).none()) 2743243830Sdim return CR_Impossible; 2744243830Sdim 2745243830Sdim // We need to verify that no instructions are reading the clobbered lanes. To 2746243830Sdim // save compile time, we'll only check that locally. Don't allow the tainted 2747243830Sdim // value to escape the basic block. 2748243830Sdim MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def); 2749243830Sdim if (OtherLRQ.endPoint() >= Indexes->getMBBEndIdx(MBB)) 2750243830Sdim return CR_Impossible; 2751243830Sdim 2752243830Sdim // There are still some things that could go wrong besides clobbered lanes 2753243830Sdim // being read, for example OtherVNI may be only partially redefined in MBB, 2754243830Sdim // and some clobbered lanes could escape the block. Save this analysis for 2755243830Sdim // resolveConflicts() when all values have been mapped. We need to know 2756243830Sdim // RedefVNI and WriteLanes for any later defs in MBB, and we can't compute 2757243830Sdim // that now - the recursive analyzeValue() calls must go upwards in the 2758243830Sdim // dominator tree. 2759243830Sdim return CR_Unresolved; 2760243830Sdim} 2761243830Sdim 2762243830Sdimvoid JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) { 2763243830Sdim Val &V = Vals[ValNo]; 2764243830Sdim if (V.isAnalyzed()) { 2765243830Sdim // Recursion should always move up the dominator tree, so ValNo is not 2766243830Sdim // supposed to reappear before it has been assigned. 2767243830Sdim assert(Assignments[ValNo] != -1 && "Bad recursion?"); 2768243830Sdim return; 2769243830Sdim } 2770243830Sdim switch ((V.Resolution = analyzeValue(ValNo, Other))) { 2771243830Sdim case CR_Erase: 2772243830Sdim case CR_Merge: 2773243830Sdim // Merge this ValNo into OtherVNI. 2774243830Sdim assert(V.OtherVNI && "OtherVNI not assigned, can't merge."); 2775243830Sdim assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion"); 2776243830Sdim Assignments[ValNo] = Other.Assignments[V.OtherVNI->id]; 2777341825Sdim LLVM_DEBUG(dbgs() << "\t\tmerge " << printReg(Reg) << ':' << ValNo << '@' 2778341825Sdim << LR.getValNumInfo(ValNo)->def << " into " 2779341825Sdim << printReg(Other.Reg) << ':' << V.OtherVNI->id << '@' 2780341825Sdim << V.OtherVNI->def << " --> @" 2781341825Sdim << NewVNInfo[Assignments[ValNo]]->def << '\n'); 2782243830Sdim break; 2783243830Sdim case CR_Replace: 2784280031Sdim case CR_Unresolved: { 2785243830Sdim // The other value is going to be pruned if this join is successful. 2786243830Sdim assert(V.OtherVNI && "OtherVNI not assigned, can't prune"); 2787280031Sdim Val &OtherV = Other.Vals[V.OtherVNI->id]; 2788280031Sdim // We cannot erase an IMPLICIT_DEF if we don't have valid values for all 2789280031Sdim // its lanes. 2790344779Sdim if (OtherV.ErasableImplicitDef && 2791344779Sdim TrackSubRegLiveness && 2792344779Sdim (OtherV.WriteLanes & ~V.ValidLanes).any()) { 2793344779Sdim LLVM_DEBUG(dbgs() << "Cannot erase implicit_def with missing values\n"); 2794344779Sdim 2795280031Sdim OtherV.ErasableImplicitDef = false; 2796344779Sdim // The valid lanes written by the implicit_def were speculatively cleared 2797344779Sdim // before, so make this more conservative. It may be better to track this, 2798344779Sdim // I haven't found a testcase where it matters. 2799344779Sdim OtherV.ValidLanes = LaneBitmask::getAll(); 2800344779Sdim } 2801344779Sdim 2802280031Sdim OtherV.Pruned = true; 2803314564Sdim LLVM_FALLTHROUGH; 2804280031Sdim } 2805243830Sdim default: 2806243830Sdim // This value number needs to go in the final joined live range. 2807243830Sdim Assignments[ValNo] = NewVNInfo.size(); 2808280031Sdim NewVNInfo.push_back(LR.getValNumInfo(ValNo)); 2809243830Sdim break; 2810243830Sdim } 2811243830Sdim} 2812243830Sdim 2813243830Sdimbool JoinVals::mapValues(JoinVals &Other) { 2814280031Sdim for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { 2815243830Sdim computeAssignment(i, Other); 2816243830Sdim if (Vals[i].Resolution == CR_Impossible) { 2817341825Sdim LLVM_DEBUG(dbgs() << "\t\tinterference at " << printReg(Reg) << ':' << i 2818341825Sdim << '@' << LR.getValNumInfo(i)->def << '\n'); 2819243830Sdim return false; 2820224145Sdim } 2821224145Sdim } 2822243830Sdim return true; 2823243830Sdim} 2824224145Sdim 2825243830Sdimbool JoinVals:: 2826296417SdimtaintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other, 2827327952Sdim SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent) { 2828280031Sdim VNInfo *VNI = LR.getValNumInfo(ValNo); 2829243830Sdim MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def); 2830243830Sdim SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB); 2831239462Sdim 2832280031Sdim // Scan Other.LR from VNI.def to MBBEnd. 2833280031Sdim LiveInterval::iterator OtherI = Other.LR.find(VNI->def); 2834280031Sdim assert(OtherI != Other.LR.end() && "No conflict?"); 2835243830Sdim do { 2836243830Sdim // OtherI is pointing to a tainted value. Abort the join if the tainted 2837243830Sdim // lanes escape the block. 2838243830Sdim SlotIndex End = OtherI->end; 2839243830Sdim if (End >= MBBEnd) { 2840341825Sdim LLVM_DEBUG(dbgs() << "\t\ttaints global " << printReg(Other.Reg) << ':' 2841341825Sdim << OtherI->valno->id << '@' << OtherI->start << '\n'); 2842243830Sdim return false; 2843224145Sdim } 2844341825Sdim LLVM_DEBUG(dbgs() << "\t\ttaints local " << printReg(Other.Reg) << ':' 2845341825Sdim << OtherI->valno->id << '@' << OtherI->start << " to " 2846341825Sdim << End << '\n'); 2847243830Sdim // A dead def is not a problem. 2848243830Sdim if (End.isDead()) 2849243830Sdim break; 2850243830Sdim TaintExtent.push_back(std::make_pair(End, TaintedLanes)); 2851224145Sdim 2852243830Sdim // Check for another def in the MBB. 2853280031Sdim if (++OtherI == Other.LR.end() || OtherI->start >= MBBEnd) 2854243830Sdim break; 2855224145Sdim 2856243830Sdim // Lanes written by the new def are no longer tainted. 2857243830Sdim const Val &OV = Other.Vals[OtherI->valno->id]; 2858243830Sdim TaintedLanes &= ~OV.WriteLanes; 2859243830Sdim if (!OV.RedefVNI) 2860243830Sdim break; 2861314564Sdim } while (TaintedLanes.any()); 2862243830Sdim return true; 2863243830Sdim} 2864224145Sdim 2865309124Sdimbool JoinVals::usesLanes(const MachineInstr &MI, unsigned Reg, unsigned SubIdx, 2866296417Sdim LaneBitmask Lanes) const { 2867341825Sdim if (MI.isDebugInstr()) 2868243830Sdim return false; 2869309124Sdim for (const MachineOperand &MO : MI.operands()) { 2870288943Sdim if (!MO.isReg() || MO.isDef() || MO.getReg() != Reg) 2871239462Sdim continue; 2872288943Sdim if (!MO.readsReg()) 2873243830Sdim continue; 2874314564Sdim unsigned S = TRI->composeSubRegIndices(SubIdx, MO.getSubReg()); 2875314564Sdim if ((Lanes & TRI->getSubRegIndexLaneMask(S)).any()) 2876243830Sdim return true; 2877239462Sdim } 2878243830Sdim return false; 2879243830Sdim} 2880239462Sdim 2881243830Sdimbool JoinVals::resolveConflicts(JoinVals &Other) { 2882280031Sdim for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { 2883243830Sdim Val &V = Vals[i]; 2884327952Sdim assert(V.Resolution != CR_Impossible && "Unresolvable conflict"); 2885243830Sdim if (V.Resolution != CR_Unresolved) 2886239462Sdim continue; 2887341825Sdim LLVM_DEBUG(dbgs() << "\t\tconflict at " << printReg(Reg) << ':' << i << '@' 2888341825Sdim << LR.getValNumInfo(i)->def << '\n'); 2889280031Sdim if (SubRangeJoin) 2890280031Sdim return false; 2891280031Sdim 2892243830Sdim ++NumLaneConflicts; 2893243830Sdim assert(V.OtherVNI && "Inconsistent conflict resolution."); 2894280031Sdim VNInfo *VNI = LR.getValNumInfo(i); 2895243830Sdim const Val &OtherV = Other.Vals[V.OtherVNI->id]; 2896224145Sdim 2897243830Sdim // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the 2898243830Sdim // join, those lanes will be tainted with a wrong value. Get the extent of 2899243830Sdim // the tainted lanes. 2900296417Sdim LaneBitmask TaintedLanes = V.WriteLanes & OtherV.ValidLanes; 2901296417Sdim SmallVector<std::pair<SlotIndex, LaneBitmask>, 8> TaintExtent; 2902243830Sdim if (!taintExtent(i, TaintedLanes, Other, TaintExtent)) 2903243830Sdim // Tainted lanes would extend beyond the basic block. 2904243830Sdim return false; 2905243830Sdim 2906243830Sdim assert(!TaintExtent.empty() && "There should be at least one conflict."); 2907243830Sdim 2908243830Sdim // Now look at the instructions from VNI->def to TaintExtent (inclusive). 2909243830Sdim MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def); 2910243830Sdim MachineBasicBlock::iterator MI = MBB->begin(); 2911243830Sdim if (!VNI->isPHIDef()) { 2912243830Sdim MI = Indexes->getInstructionFromIndex(VNI->def); 2913243830Sdim // No need to check the instruction defining VNI for reads. 2914243830Sdim ++MI; 2915239462Sdim } 2916243830Sdim assert(!SlotIndex::isSameInstr(VNI->def, TaintExtent.front().first) && 2917243830Sdim "Interference ends on VNI->def. Should have been handled earlier"); 2918243830Sdim MachineInstr *LastMI = 2919243830Sdim Indexes->getInstructionFromIndex(TaintExtent.front().first); 2920243830Sdim assert(LastMI && "Range must end at a proper instruction"); 2921243830Sdim unsigned TaintNum = 0; 2922327952Sdim while (true) { 2923243830Sdim assert(MI != MBB->end() && "Bad LastMI"); 2924309124Sdim if (usesLanes(*MI, Other.Reg, Other.SubIdx, TaintedLanes)) { 2925341825Sdim LLVM_DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI); 2926243830Sdim return false; 2927243830Sdim } 2928243830Sdim // LastMI is the last instruction to use the current value. 2929243830Sdim if (&*MI == LastMI) { 2930243830Sdim if (++TaintNum == TaintExtent.size()) 2931243830Sdim break; 2932243830Sdim LastMI = Indexes->getInstructionFromIndex(TaintExtent[TaintNum].first); 2933243830Sdim assert(LastMI && "Range must end at a proper instruction"); 2934243830Sdim TaintedLanes = TaintExtent[TaintNum].second; 2935243830Sdim } 2936243830Sdim ++MI; 2937243830Sdim } 2938243830Sdim 2939243830Sdim // The tainted lanes are unused. 2940243830Sdim V.Resolution = CR_Replace; 2941243830Sdim ++NumLaneResolves; 2942224145Sdim } 2943243830Sdim return true; 2944243830Sdim} 2945224145Sdim 2946243830Sdimbool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) { 2947243830Sdim Val &V = Vals[ValNo]; 2948243830Sdim if (V.Pruned || V.PrunedComputed) 2949243830Sdim return V.Pruned; 2950243830Sdim 2951243830Sdim if (V.Resolution != CR_Erase && V.Resolution != CR_Merge) 2952243830Sdim return V.Pruned; 2953243830Sdim 2954243830Sdim // Follow copies up the dominator tree and check if any intermediate value 2955243830Sdim // has been pruned. 2956243830Sdim V.PrunedComputed = true; 2957243830Sdim V.Pruned = Other.isPrunedValue(V.OtherVNI->id, *this); 2958243830Sdim return V.Pruned; 2959243830Sdim} 2960243830Sdim 2961243830Sdimvoid JoinVals::pruneValues(JoinVals &Other, 2962280031Sdim SmallVectorImpl<SlotIndex> &EndPoints, 2963280031Sdim bool changeInstrs) { 2964280031Sdim for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { 2965280031Sdim SlotIndex Def = LR.getValNumInfo(i)->def; 2966243830Sdim switch (Vals[i].Resolution) { 2967243830Sdim case CR_Keep: 2968243830Sdim break; 2969243830Sdim case CR_Replace: { 2970280031Sdim // This value takes precedence over the value in Other.LR. 2971280031Sdim LIS->pruneValue(Other.LR, Def, &EndPoints); 2972243830Sdim // Check if we're replacing an IMPLICIT_DEF value. The IMPLICIT_DEF 2973243830Sdim // instructions are only inserted to provide a live-out value for PHI 2974243830Sdim // predecessors, so the instruction should simply go away once its value 2975243830Sdim // has been replaced. 2976243830Sdim Val &OtherV = Other.Vals[Vals[i].OtherVNI->id]; 2977249423Sdim bool EraseImpDef = OtherV.ErasableImplicitDef && 2978249423Sdim OtherV.Resolution == CR_Keep; 2979243830Sdim if (!Def.isBlock()) { 2980280031Sdim if (changeInstrs) { 2981280031Sdim // Remove <def,read-undef> flags. This def is now a partial redef. 2982327952Sdim // Also remove dead flags since the joined live range will 2983280031Sdim // continue past this instruction. 2984288943Sdim for (MachineOperand &MO : 2985288943Sdim Indexes->getInstructionFromIndex(Def)->operands()) { 2986288943Sdim if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) { 2987327952Sdim if (MO.getSubReg() != 0 && MO.isUndef() && !EraseImpDef) 2988327952Sdim MO.setIsUndef(false); 2989288943Sdim MO.setIsDead(false); 2990280031Sdim } 2991243830Sdim } 2992280031Sdim } 2993243830Sdim // This value will reach instructions below, but we need to make sure 2994243830Sdim // the live range also reaches the instruction at Def. 2995243830Sdim if (!EraseImpDef) 2996243830Sdim EndPoints.push_back(Def); 2997243830Sdim } 2998341825Sdim LLVM_DEBUG(dbgs() << "\t\tpruned " << printReg(Other.Reg) << " at " << Def 2999341825Sdim << ": " << Other.LR << '\n'); 3000243830Sdim break; 3001243830Sdim } 3002243830Sdim case CR_Erase: 3003243830Sdim case CR_Merge: 3004243830Sdim if (isPrunedValue(i, Other)) { 3005280031Sdim // This value is ultimately a copy of a pruned value in LR or Other.LR. 3006243830Sdim // We can no longer trust the value mapping computed by 3007243830Sdim // computeAssignment(), the value that was originally copied could have 3008243830Sdim // been replaced. 3009280031Sdim LIS->pruneValue(LR, Def, &EndPoints); 3010341825Sdim LLVM_DEBUG(dbgs() << "\t\tpruned all of " << printReg(Reg) << " at " 3011341825Sdim << Def << ": " << LR << '\n'); 3012243830Sdim } 3013243830Sdim break; 3014243830Sdim case CR_Unresolved: 3015243830Sdim case CR_Impossible: 3016243830Sdim llvm_unreachable("Unresolved conflicts"); 3017243830Sdim } 3018224145Sdim } 3019243830Sdim} 3020224145Sdim 3021341825Sdim/// Consider the following situation when coalescing the copy between 3022341825Sdim/// %31 and %45 at 800. (The vertical lines represent live range segments.) 3023341825Sdim/// 3024341825Sdim/// Main range Subrange 0004 (sub2) 3025341825Sdim/// %31 %45 %31 %45 3026341825Sdim/// 544 %45 = COPY %28 + + 3027341825Sdim/// | v1 | v1 3028341825Sdim/// 560B bb.1: + + 3029341825Sdim/// 624 = %45.sub2 | v2 | v2 3030341825Sdim/// 800 %31 = COPY %45 + + + + 3031341825Sdim/// | v0 | v0 3032341825Sdim/// 816 %31.sub1 = ... + | 3033341825Sdim/// 880 %30 = COPY %31 | v1 + 3034341825Sdim/// 928 %45 = COPY %30 | + + 3035341825Sdim/// | | v0 | v0 <--+ 3036341825Sdim/// 992B ; backedge -> bb.1 | + + | 3037341825Sdim/// 1040 = %31.sub0 + | 3038341825Sdim/// This value must remain 3039341825Sdim/// live-out! 3040341825Sdim/// 3041341825Sdim/// Assuming that %31 is coalesced into %45, the copy at 928 becomes 3042341825Sdim/// redundant, since it copies the value from %45 back into it. The 3043341825Sdim/// conflict resolution for the main range determines that %45.v0 is 3044341825Sdim/// to be erased, which is ok since %31.v1 is identical to it. 3045341825Sdim/// The problem happens with the subrange for sub2: it has to be live 3046341825Sdim/// on exit from the block, but since 928 was actually a point of 3047341825Sdim/// definition of %45.sub2, %45.sub2 was not live immediately prior 3048341825Sdim/// to that definition. As a result, when 928 was erased, the value v0 3049341825Sdim/// for %45.sub2 was pruned in pruneSubRegValues. Consequently, an 3050341825Sdim/// IMPLICIT_DEF was inserted as a "backedge" definition for %45.sub2, 3051341825Sdim/// providing an incorrect value to the use at 624. 3052341825Sdim/// 3053341825Sdim/// Since the main-range values %31.v1 and %45.v0 were proved to be 3054341825Sdim/// identical, the corresponding values in subranges must also be the 3055341825Sdim/// same. A redundant copy is removed because it's not needed, and not 3056341825Sdim/// because it copied an undefined value, so any liveness that originated 3057341825Sdim/// from that copy cannot disappear. When pruning a value that started 3058341825Sdim/// at the removed copy, the corresponding identical value must be 3059341825Sdim/// extended to replace it. 3060314564Sdimvoid JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) { 3061280031Sdim // Look for values being erased. 3062280031Sdim bool DidPrune = false; 3063280031Sdim for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { 3064341825Sdim Val &V = Vals[i]; 3065321369Sdim // We should trigger in all cases in which eraseInstrs() does something. 3066321369Sdim // match what eraseInstrs() is doing, print a message so 3067341825Sdim if (V.Resolution != CR_Erase && 3068341825Sdim (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned)) 3069280031Sdim continue; 3070280031Sdim 3071280031Sdim // Check subranges at the point where the copy will be removed. 3072280031Sdim SlotIndex Def = LR.getValNumInfo(i)->def; 3073341825Sdim SlotIndex OtherDef; 3074341825Sdim if (V.Identical) 3075341825Sdim OtherDef = V.OtherVNI->def; 3076341825Sdim 3077321369Sdim // Print message so mismatches with eraseInstrs() can be diagnosed. 3078341825Sdim LLVM_DEBUG(dbgs() << "\t\tExpecting instruction removal at " << Def 3079341825Sdim << '\n'); 3080280031Sdim for (LiveInterval::SubRange &S : LI.subranges()) { 3081280031Sdim LiveQueryResult Q = S.Query(Def); 3082280031Sdim 3083280031Sdim // If a subrange starts at the copy then an undefined value has been 3084280031Sdim // copied and we must remove that subrange value as well. 3085280031Sdim VNInfo *ValueOut = Q.valueOutOrDead(); 3086353358Sdim if (ValueOut != nullptr && (Q.valueIn() == nullptr || 3087353358Sdim (V.Identical && V.Resolution == CR_Erase && 3088353358Sdim ValueOut->def == Def))) { 3089341825Sdim LLVM_DEBUG(dbgs() << "\t\tPrune sublane " << PrintLaneMask(S.LaneMask) 3090341825Sdim << " at " << Def << "\n"); 3091341825Sdim SmallVector<SlotIndex,8> EndPoints; 3092341825Sdim LIS->pruneValue(S, Def, &EndPoints); 3093280031Sdim DidPrune = true; 3094280031Sdim // Mark value number as unused. 3095280031Sdim ValueOut->markUnused(); 3096341825Sdim 3097353358Sdim if (V.Identical && S.Query(OtherDef).valueOutOrDead()) { 3098341825Sdim // If V is identical to V.OtherVNI (and S was live at OtherDef), 3099341825Sdim // then we can't simply prune V from S. V needs to be replaced 3100341825Sdim // with V.OtherVNI. 3101341825Sdim LIS->extendToIndices(S, EndPoints); 3102341825Sdim } 3103280031Sdim continue; 3104280031Sdim } 3105280031Sdim // If a subrange ends at the copy, then a value was copied but only 3106296417Sdim // partially used later. Shrink the subregister range appropriately. 3107280031Sdim if (Q.valueIn() != nullptr && Q.valueOut() == nullptr) { 3108341825Sdim LLVM_DEBUG(dbgs() << "\t\tDead uses at sublane " 3109341825Sdim << PrintLaneMask(S.LaneMask) << " at " << Def 3110341825Sdim << "\n"); 3111280031Sdim ShrinkMask |= S.LaneMask; 3112280031Sdim } 3113280031Sdim } 3114280031Sdim } 3115280031Sdim if (DidPrune) 3116280031Sdim LI.removeEmptySubRanges(); 3117280031Sdim} 3118280031Sdim 3119314564Sdim/// Check if any of the subranges of @p LI contain a definition at @p Def. 3120314564Sdimstatic bool isDefInSubRange(LiveInterval &LI, SlotIndex Def) { 3121314564Sdim for (LiveInterval::SubRange &SR : LI.subranges()) { 3122314564Sdim if (VNInfo *VNI = SR.Query(Def).valueOutOrDead()) 3123314564Sdim if (VNI->def == Def) 3124314564Sdim return true; 3125314564Sdim } 3126314564Sdim return false; 3127314564Sdim} 3128314564Sdim 3129314564Sdimvoid JoinVals::pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange) { 3130314564Sdim assert(&static_cast<LiveRange&>(LI) == &LR); 3131314564Sdim 3132314564Sdim for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { 3133314564Sdim if (Vals[i].Resolution != CR_Keep) 3134314564Sdim continue; 3135314564Sdim VNInfo *VNI = LR.getValNumInfo(i); 3136314564Sdim if (VNI->isUnused() || VNI->isPHIDef() || isDefInSubRange(LI, VNI->def)) 3137314564Sdim continue; 3138314564Sdim Vals[i].Pruned = true; 3139314564Sdim ShrinkMainRange = true; 3140314564Sdim } 3141314564Sdim} 3142314564Sdim 3143288943Sdimvoid JoinVals::removeImplicitDefs() { 3144288943Sdim for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { 3145288943Sdim Val &V = Vals[i]; 3146288943Sdim if (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned) 3147288943Sdim continue; 3148288943Sdim 3149288943Sdim VNInfo *VNI = LR.getValNumInfo(i); 3150288943Sdim VNI->markUnused(); 3151288943Sdim LR.removeValNo(VNI); 3152288943Sdim } 3153288943Sdim} 3154288943Sdim 3155280031Sdimvoid JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs, 3156314564Sdim SmallVectorImpl<unsigned> &ShrinkRegs, 3157314564Sdim LiveInterval *LI) { 3158280031Sdim for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) { 3159243830Sdim // Get the def location before markUnused() below invalidates it. 3160360784Sdim VNInfo *VNI = LR.getValNumInfo(i); 3161360784Sdim SlotIndex Def = VNI->def; 3162243830Sdim switch (Vals[i].Resolution) { 3163288943Sdim case CR_Keep: { 3164243830Sdim // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any 3165243830Sdim // longer. The IMPLICIT_DEF instructions are only inserted by 3166243830Sdim // PHIElimination to guarantee that all PHI predecessors have a value. 3167249423Sdim if (!Vals[i].ErasableImplicitDef || !Vals[i].Pruned) 3168243830Sdim break; 3169288943Sdim // Remove value number i from LR. 3170314564Sdim // For intervals with subranges, removing a segment from the main range 3171314564Sdim // may require extending the previous segment: for each definition of 3172314564Sdim // a subregister, there will be a corresponding def in the main range. 3173314564Sdim // That def may fall in the middle of a segment from another subrange. 3174314564Sdim // In such cases, removing this def from the main range must be 3175314564Sdim // complemented by extending the main range to account for the liveness 3176314564Sdim // of the other subrange. 3177314564Sdim // The new end point of the main range segment to be extended. 3178314564Sdim SlotIndex NewEnd; 3179314564Sdim if (LI != nullptr) { 3180314564Sdim LiveRange::iterator I = LR.FindSegmentContaining(Def); 3181314564Sdim assert(I != LR.end()); 3182314564Sdim // Do not extend beyond the end of the segment being removed. 3183314564Sdim // The segment may have been pruned in preparation for joining 3184314564Sdim // live ranges. 3185314564Sdim NewEnd = I->end; 3186314564Sdim } 3187314564Sdim 3188288943Sdim LR.removeValNo(VNI); 3189288943Sdim // Note that this VNInfo is reused and still referenced in NewVNInfo, 3190288943Sdim // make it appear like an unused value number. 3191288943Sdim VNI->markUnused(); 3192314564Sdim 3193314564Sdim if (LI != nullptr && LI->hasSubRanges()) { 3194314564Sdim assert(static_cast<LiveRange*>(LI) == &LR); 3195314564Sdim // Determine the end point based on the subrange information: 3196314564Sdim // minimum of (earliest def of next segment, 3197314564Sdim // latest end point of containing segment) 3198314564Sdim SlotIndex ED, LE; 3199314564Sdim for (LiveInterval::SubRange &SR : LI->subranges()) { 3200314564Sdim LiveRange::iterator I = SR.find(Def); 3201314564Sdim if (I == SR.end()) 3202314564Sdim continue; 3203314564Sdim if (I->start > Def) 3204314564Sdim ED = ED.isValid() ? std::min(ED, I->start) : I->start; 3205314564Sdim else 3206314564Sdim LE = LE.isValid() ? std::max(LE, I->end) : I->end; 3207314564Sdim } 3208314564Sdim if (LE.isValid()) 3209314564Sdim NewEnd = std::min(NewEnd, LE); 3210314564Sdim if (ED.isValid()) 3211314564Sdim NewEnd = std::min(NewEnd, ED); 3212314564Sdim 3213314564Sdim // We only want to do the extension if there was a subrange that 3214314564Sdim // was live across Def. 3215314564Sdim if (LE.isValid()) { 3216314564Sdim LiveRange::iterator S = LR.find(Def); 3217314564Sdim if (S != LR.begin()) 3218314564Sdim std::prev(S)->end = NewEnd; 3219314564Sdim } 3220314564Sdim } 3221341825Sdim LLVM_DEBUG({ 3222314564Sdim dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LR << '\n'; 3223314564Sdim if (LI != nullptr) 3224314564Sdim dbgs() << "\t\t LHS = " << *LI << '\n'; 3225314564Sdim }); 3226314564Sdim LLVM_FALLTHROUGH; 3227288943Sdim } 3228243830Sdim 3229243830Sdim case CR_Erase: { 3230243830Sdim MachineInstr *MI = Indexes->getInstructionFromIndex(Def); 3231243830Sdim assert(MI && "No instruction to erase"); 3232243830Sdim if (MI->isCopy()) { 3233360784Sdim Register Reg = MI->getOperand(1).getReg(); 3234360784Sdim if (Register::isVirtualRegister(Reg) && Reg != CP.getSrcReg() && 3235360784Sdim Reg != CP.getDstReg()) 3236243830Sdim ShrinkRegs.push_back(Reg); 3237243830Sdim } 3238243830Sdim ErasedInstrs.insert(MI); 3239341825Sdim LLVM_DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI); 3240309124Sdim LIS->RemoveMachineInstrFromMaps(*MI); 3241243830Sdim MI->eraseFromParent(); 3242243830Sdim break; 3243243830Sdim } 3244243830Sdim default: 3245243830Sdim break; 3246243830Sdim } 3247243830Sdim } 3248243830Sdim} 3249243830Sdim 3250296417Sdimvoid RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange, 3251296417Sdim LaneBitmask LaneMask, 3252280031Sdim const CoalescerPair &CP) { 3253280031Sdim SmallVector<VNInfo*, 16> NewVNInfo; 3254280031Sdim JoinVals RHSVals(RRange, CP.getSrcReg(), CP.getSrcIdx(), LaneMask, 3255280031Sdim NewVNInfo, CP, LIS, TRI, true, true); 3256280031Sdim JoinVals LHSVals(LRange, CP.getDstReg(), CP.getDstIdx(), LaneMask, 3257280031Sdim NewVNInfo, CP, LIS, TRI, true, true); 3258280031Sdim 3259288943Sdim // Compute NewVNInfo and resolve conflicts (see also joinVirtRegs()) 3260288943Sdim // We should be able to resolve all conflicts here as we could successfully do 3261288943Sdim // it on the mainrange already. There is however a problem when multiple 3262288943Sdim // ranges get mapped to the "overflow" lane mask bit which creates unexpected 3263288943Sdim // interferences. 3264288943Sdim if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) { 3265296417Sdim // We already determined that it is legal to merge the intervals, so this 3266296417Sdim // should never fail. 3267296417Sdim llvm_unreachable("*** Couldn't join subrange!\n"); 3268288943Sdim } 3269288943Sdim if (!LHSVals.resolveConflicts(RHSVals) || 3270288943Sdim !RHSVals.resolveConflicts(LHSVals)) { 3271296417Sdim // We already determined that it is legal to merge the intervals, so this 3272296417Sdim // should never fail. 3273296417Sdim llvm_unreachable("*** Couldn't join subrange!\n"); 3274288943Sdim } 3275280031Sdim 3276280031Sdim // The merging algorithm in LiveInterval::join() can't handle conflicting 3277280031Sdim // value mappings, so we need to remove any live ranges that overlap a 3278280031Sdim // CR_Replace resolution. Collect a set of end points that can be used to 3279280031Sdim // restore the live range after joining. 3280280031Sdim SmallVector<SlotIndex, 8> EndPoints; 3281280031Sdim LHSVals.pruneValues(RHSVals, EndPoints, false); 3282280031Sdim RHSVals.pruneValues(LHSVals, EndPoints, false); 3283280031Sdim 3284288943Sdim LHSVals.removeImplicitDefs(); 3285288943Sdim RHSVals.removeImplicitDefs(); 3286288943Sdim 3287280031Sdim LRange.verify(); 3288280031Sdim RRange.verify(); 3289280031Sdim 3290280031Sdim // Join RRange into LHS. 3291280031Sdim LRange.join(RRange, LHSVals.getAssignments(), RHSVals.getAssignments(), 3292280031Sdim NewVNInfo); 3293280031Sdim 3294341825Sdim LLVM_DEBUG(dbgs() << "\t\tjoined lanes: " << PrintLaneMask(LaneMask) 3295341825Sdim << ' ' << LRange << "\n"); 3296280031Sdim if (EndPoints.empty()) 3297296417Sdim return; 3298280031Sdim 3299280031Sdim // Recompute the parts of the live range we had to remove because of 3300280031Sdim // CR_Replace conflicts. 3301341825Sdim LLVM_DEBUG({ 3302314564Sdim dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: "; 3303314564Sdim for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) { 3304314564Sdim dbgs() << EndPoints[i]; 3305314564Sdim if (i != n-1) 3306314564Sdim dbgs() << ','; 3307314564Sdim } 3308314564Sdim dbgs() << ": " << LRange << '\n'; 3309314564Sdim }); 3310280031Sdim LIS->extendToIndices(LRange, EndPoints); 3311280031Sdim} 3312280031Sdim 3313296417Sdimvoid RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI, 3314280031Sdim const LiveRange &ToMerge, 3315296417Sdim LaneBitmask LaneMask, 3316360784Sdim CoalescerPair &CP, 3317360784Sdim unsigned ComposeSubRegIdx) { 3318280031Sdim BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); 3319353358Sdim LI.refineSubRanges( 3320353358Sdim Allocator, LaneMask, 3321353358Sdim [this, &Allocator, &ToMerge, &CP](LiveInterval::SubRange &SR) { 3322353358Sdim if (SR.empty()) { 3323353358Sdim SR.assign(ToMerge, Allocator); 3324353358Sdim } else { 3325353358Sdim // joinSubRegRange() destroys the merged range, so we need a copy. 3326353358Sdim LiveRange RangeCopy(ToMerge, Allocator); 3327353358Sdim joinSubRegRanges(SR, RangeCopy, SR.LaneMask, CP); 3328353358Sdim } 3329353358Sdim }, 3330360784Sdim *LIS->getSlotIndexes(), *TRI, ComposeSubRegIdx); 3331280031Sdim} 3332280031Sdim 3333353358Sdimbool RegisterCoalescer::isHighCostLiveInterval(LiveInterval &LI) { 3334353358Sdim if (LI.valnos.size() < LargeIntervalSizeThreshold) 3335353358Sdim return false; 3336353358Sdim auto &Counter = LargeLIVisitCounter[LI.reg]; 3337353358Sdim if (Counter < LargeIntervalFreqThreshold) { 3338353358Sdim Counter++; 3339353358Sdim return false; 3340353358Sdim } 3341353358Sdim return true; 3342353358Sdim} 3343353358Sdim 3344243830Sdimbool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { 3345243830Sdim SmallVector<VNInfo*, 16> NewVNInfo; 3346243830Sdim LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); 3347243830Sdim LiveInterval &LHS = LIS->getInterval(CP.getDstReg()); 3348288943Sdim bool TrackSubRegLiveness = MRI->shouldTrackSubRegLiveness(*CP.getNewRC()); 3349314564Sdim JoinVals RHSVals(RHS, CP.getSrcReg(), CP.getSrcIdx(), LaneBitmask::getNone(), 3350314564Sdim NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness); 3351314564Sdim JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), LaneBitmask::getNone(), 3352314564Sdim NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness); 3353243830Sdim 3354341825Sdim LLVM_DEBUG(dbgs() << "\t\tRHS = " << RHS << "\n\t\tLHS = " << LHS << '\n'); 3355243830Sdim 3356353358Sdim if (isHighCostLiveInterval(LHS) || isHighCostLiveInterval(RHS)) 3357353358Sdim return false; 3358353358Sdim 3359243830Sdim // First compute NewVNInfo and the simple value mappings. 3360243830Sdim // Detect impossible conflicts early. 3361243830Sdim if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) 3362243830Sdim return false; 3363243830Sdim 3364243830Sdim // Some conflicts can only be resolved after all values have been mapped. 3365243830Sdim if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals)) 3366243830Sdim return false; 3367243830Sdim 3368243830Sdim // All clear, the live ranges can be merged. 3369280031Sdim if (RHS.hasSubRanges() || LHS.hasSubRanges()) { 3370280031Sdim BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator(); 3371243830Sdim 3372280031Sdim // Transform lanemasks from the LHS to masks in the coalesced register and 3373280031Sdim // create initial subranges if necessary. 3374280031Sdim unsigned DstIdx = CP.getDstIdx(); 3375280031Sdim if (!LHS.hasSubRanges()) { 3376296417Sdim LaneBitmask Mask = DstIdx == 0 ? CP.getNewRC()->getLaneMask() 3377296417Sdim : TRI->getSubRegIndexLaneMask(DstIdx); 3378280031Sdim // LHS must support subregs or we wouldn't be in this codepath. 3379314564Sdim assert(Mask.any()); 3380280031Sdim LHS.createSubRangeFrom(Allocator, Mask, LHS); 3381280031Sdim } else if (DstIdx != 0) { 3382280031Sdim // Transform LHS lanemasks to new register class if necessary. 3383280031Sdim for (LiveInterval::SubRange &R : LHS.subranges()) { 3384296417Sdim LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(DstIdx, R.LaneMask); 3385280031Sdim R.LaneMask = Mask; 3386280031Sdim } 3387280031Sdim } 3388341825Sdim LLVM_DEBUG(dbgs() << "\t\tLHST = " << printReg(CP.getDstReg()) << ' ' << LHS 3389341825Sdim << '\n'); 3390280031Sdim 3391280031Sdim // Determine lanemasks of RHS in the coalesced register and merge subranges. 3392280031Sdim unsigned SrcIdx = CP.getSrcIdx(); 3393280031Sdim if (!RHS.hasSubRanges()) { 3394296417Sdim LaneBitmask Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask() 3395296417Sdim : TRI->getSubRegIndexLaneMask(SrcIdx); 3396360784Sdim mergeSubRangeInto(LHS, RHS, Mask, CP, DstIdx); 3397280031Sdim } else { 3398280031Sdim // Pair up subranges and merge. 3399280031Sdim for (LiveInterval::SubRange &R : RHS.subranges()) { 3400296417Sdim LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask); 3401360784Sdim mergeSubRangeInto(LHS, R, Mask, CP, DstIdx); 3402280031Sdim } 3403280031Sdim } 3404341825Sdim LLVM_DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n"); 3405280031Sdim 3406314564Sdim // Pruning implicit defs from subranges may result in the main range 3407314564Sdim // having stale segments. 3408314564Sdim LHSVals.pruneMainSegments(LHS, ShrinkMainRange); 3409314564Sdim 3410296417Sdim LHSVals.pruneSubRegValues(LHS, ShrinkMask); 3411296417Sdim RHSVals.pruneSubRegValues(LHS, ShrinkMask); 3412280031Sdim } 3413280031Sdim 3414243830Sdim // The merging algorithm in LiveInterval::join() can't handle conflicting 3415243830Sdim // value mappings, so we need to remove any live ranges that overlap a 3416243830Sdim // CR_Replace resolution. Collect a set of end points that can be used to 3417243830Sdim // restore the live range after joining. 3418243830Sdim SmallVector<SlotIndex, 8> EndPoints; 3419280031Sdim LHSVals.pruneValues(RHSVals, EndPoints, true); 3420280031Sdim RHSVals.pruneValues(LHSVals, EndPoints, true); 3421243830Sdim 3422243830Sdim // Erase COPY and IMPLICIT_DEF instructions. This may cause some external 3423243830Sdim // registers to require trimming. 3424243830Sdim SmallVector<unsigned, 8> ShrinkRegs; 3425314564Sdim LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs, &LHS); 3426243830Sdim RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs); 3427243830Sdim while (!ShrinkRegs.empty()) 3428288943Sdim shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val())); 3429243830Sdim 3430360784Sdim // Scan and mark undef any DBG_VALUEs that would refer to a different value. 3431360784Sdim checkMergingChangesDbgValues(CP, LHS, LHSVals, RHS, RHSVals); 3432360784Sdim 3433243830Sdim // Join RHS into LHS. 3434261991Sdim LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo); 3435243830Sdim 3436243830Sdim // Kill flags are going to be wrong if the live ranges were overlapping. 3437243830Sdim // Eventually, we should simply clear all kill flags when computing live 3438243830Sdim // ranges. They are reinserted after register allocation. 3439243830Sdim MRI->clearKillFlags(LHS.reg); 3440243830Sdim MRI->clearKillFlags(RHS.reg); 3441243830Sdim 3442280031Sdim if (!EndPoints.empty()) { 3443280031Sdim // Recompute the parts of the live range we had to remove because of 3444280031Sdim // CR_Replace conflicts. 3445341825Sdim LLVM_DEBUG({ 3446314564Sdim dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: "; 3447314564Sdim for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) { 3448314564Sdim dbgs() << EndPoints[i]; 3449314564Sdim if (i != n-1) 3450314564Sdim dbgs() << ','; 3451314564Sdim } 3452314564Sdim dbgs() << ": " << LHS << '\n'; 3453314564Sdim }); 3454280031Sdim LIS->extendToIndices((LiveRange&)LHS, EndPoints); 3455280031Sdim } 3456243830Sdim 3457224145Sdim return true; 3458224145Sdim} 3459224145Sdim 3460243830Sdimbool RegisterCoalescer::joinIntervals(CoalescerPair &CP) { 3461243830Sdim return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP); 3462243830Sdim} 3463243830Sdim 3464360784Sdimvoid RegisterCoalescer::buildVRegToDbgValueMap(MachineFunction &MF) 3465360784Sdim{ 3466360784Sdim const SlotIndexes &Slots = *LIS->getSlotIndexes(); 3467360784Sdim SmallVector<MachineInstr *, 8> ToInsert; 3468360784Sdim 3469360784Sdim // After collecting a block of DBG_VALUEs into ToInsert, enter them into the 3470360784Sdim // vreg => DbgValueLoc map. 3471360784Sdim auto CloseNewDVRange = [this, &ToInsert](SlotIndex Slot) { 3472360784Sdim for (auto *X : ToInsert) 3473360784Sdim DbgVRegToValues[X->getOperand(0).getReg()].push_back({Slot, X}); 3474360784Sdim 3475360784Sdim ToInsert.clear(); 3476360784Sdim }; 3477360784Sdim 3478360784Sdim // Iterate over all instructions, collecting them into the ToInsert vector. 3479360784Sdim // Once a non-debug instruction is found, record the slot index of the 3480360784Sdim // collected DBG_VALUEs. 3481360784Sdim for (auto &MBB : MF) { 3482360784Sdim SlotIndex CurrentSlot = Slots.getMBBStartIdx(&MBB); 3483360784Sdim 3484360784Sdim for (auto &MI : MBB) { 3485360784Sdim if (MI.isDebugValue() && MI.getOperand(0).isReg() && 3486360784Sdim MI.getOperand(0).getReg().isVirtual()) { 3487360784Sdim ToInsert.push_back(&MI); 3488360784Sdim } else if (!MI.isDebugInstr()) { 3489360784Sdim CurrentSlot = Slots.getInstructionIndex(MI); 3490360784Sdim CloseNewDVRange(CurrentSlot); 3491360784Sdim } 3492360784Sdim } 3493360784Sdim 3494360784Sdim // Close range of DBG_VALUEs at the end of blocks. 3495360784Sdim CloseNewDVRange(Slots.getMBBEndIdx(&MBB)); 3496360784Sdim } 3497360784Sdim 3498360784Sdim // Sort all DBG_VALUEs we've seen by slot number. 3499360784Sdim for (auto &Pair : DbgVRegToValues) 3500360784Sdim llvm::sort(Pair.second); 3501360784Sdim} 3502360784Sdim 3503360784Sdimvoid RegisterCoalescer::checkMergingChangesDbgValues(CoalescerPair &CP, 3504360784Sdim LiveRange &LHS, 3505360784Sdim JoinVals &LHSVals, 3506360784Sdim LiveRange &RHS, 3507360784Sdim JoinVals &RHSVals) { 3508360784Sdim auto ScanForDstReg = [&](unsigned Reg) { 3509360784Sdim checkMergingChangesDbgValuesImpl(Reg, RHS, LHS, LHSVals); 3510360784Sdim }; 3511360784Sdim 3512360784Sdim auto ScanForSrcReg = [&](unsigned Reg) { 3513360784Sdim checkMergingChangesDbgValuesImpl(Reg, LHS, RHS, RHSVals); 3514360784Sdim }; 3515360784Sdim 3516360784Sdim // Scan for potentially unsound DBG_VALUEs: examine first the register number 3517360784Sdim // Reg, and then any other vregs that may have been merged into it. 3518360784Sdim auto PerformScan = [this](unsigned Reg, std::function<void(unsigned)> Func) { 3519360784Sdim Func(Reg); 3520360784Sdim if (DbgMergedVRegNums.count(Reg)) 3521360784Sdim for (unsigned X : DbgMergedVRegNums[Reg]) 3522360784Sdim Func(X); 3523360784Sdim }; 3524360784Sdim 3525360784Sdim // Scan for unsound updates of both the source and destination register. 3526360784Sdim PerformScan(CP.getSrcReg(), ScanForSrcReg); 3527360784Sdim PerformScan(CP.getDstReg(), ScanForDstReg); 3528360784Sdim} 3529360784Sdim 3530360784Sdimvoid RegisterCoalescer::checkMergingChangesDbgValuesImpl(unsigned Reg, 3531360784Sdim LiveRange &OtherLR, 3532360784Sdim LiveRange &RegLR, 3533360784Sdim JoinVals &RegVals) { 3534360784Sdim // Are there any DBG_VALUEs to examine? 3535360784Sdim auto VRegMapIt = DbgVRegToValues.find(Reg); 3536360784Sdim if (VRegMapIt == DbgVRegToValues.end()) 3537360784Sdim return; 3538360784Sdim 3539360784Sdim auto &DbgValueSet = VRegMapIt->second; 3540360784Sdim auto DbgValueSetIt = DbgValueSet.begin(); 3541360784Sdim auto SegmentIt = OtherLR.begin(); 3542360784Sdim 3543360784Sdim bool LastUndefResult = false; 3544360784Sdim SlotIndex LastUndefIdx; 3545360784Sdim 3546360784Sdim // If the "Other" register is live at a slot Idx, test whether Reg can 3547360784Sdim // safely be merged with it, or should be marked undef. 3548360784Sdim auto ShouldUndef = [&RegVals, &RegLR, &LastUndefResult, 3549360784Sdim &LastUndefIdx](SlotIndex Idx) -> bool { 3550360784Sdim // Our worst-case performance typically happens with asan, causing very 3551360784Sdim // many DBG_VALUEs of the same location. Cache a copy of the most recent 3552360784Sdim // result for this edge-case. 3553360784Sdim if (LastUndefIdx == Idx) 3554360784Sdim return LastUndefResult; 3555360784Sdim 3556360784Sdim // If the other range was live, and Reg's was not, the register coalescer 3557360784Sdim // will not have tried to resolve any conflicts. We don't know whether 3558360784Sdim // the DBG_VALUE will refer to the same value number, so it must be made 3559360784Sdim // undef. 3560360784Sdim auto OtherIt = RegLR.find(Idx); 3561360784Sdim if (OtherIt == RegLR.end()) 3562360784Sdim return true; 3563360784Sdim 3564360784Sdim // Both the registers were live: examine the conflict resolution record for 3565360784Sdim // the value number Reg refers to. CR_Keep meant that this value number 3566360784Sdim // "won" and the merged register definitely refers to that value. CR_Erase 3567360784Sdim // means the value number was a redundant copy of the other value, which 3568360784Sdim // was coalesced and Reg deleted. It's safe to refer to the other register 3569360784Sdim // (which will be the source of the copy). 3570360784Sdim auto Resolution = RegVals.getResolution(OtherIt->valno->id); 3571360784Sdim LastUndefResult = Resolution != JoinVals::CR_Keep && 3572360784Sdim Resolution != JoinVals::CR_Erase; 3573360784Sdim LastUndefIdx = Idx; 3574360784Sdim return LastUndefResult; 3575360784Sdim }; 3576360784Sdim 3577360784Sdim // Iterate over both the live-range of the "Other" register, and the set of 3578360784Sdim // DBG_VALUEs for Reg at the same time. Advance whichever one has the lowest 3579360784Sdim // slot index. This relies on the DbgValueSet being ordered. 3580360784Sdim while (DbgValueSetIt != DbgValueSet.end() && SegmentIt != OtherLR.end()) { 3581360784Sdim if (DbgValueSetIt->first < SegmentIt->end) { 3582360784Sdim // "Other" is live and there is a DBG_VALUE of Reg: test if we should 3583360784Sdim // set it undef. 3584360784Sdim if (DbgValueSetIt->first >= SegmentIt->start && 3585360784Sdim DbgValueSetIt->second->getOperand(0).getReg() != 0 && 3586360784Sdim ShouldUndef(DbgValueSetIt->first)) { 3587360784Sdim // Mark undef, erase record of this DBG_VALUE to avoid revisiting. 3588360784Sdim DbgValueSetIt->second->getOperand(0).setReg(0); 3589360784Sdim continue; 3590360784Sdim } 3591360784Sdim ++DbgValueSetIt; 3592360784Sdim } else { 3593360784Sdim ++SegmentIt; 3594360784Sdim } 3595360784Sdim } 3596360784Sdim} 3597360784Sdim 3598224145Sdimnamespace { 3599327952Sdim 3600288943Sdim/// Information concerning MBB coalescing priority. 3601249423Sdimstruct MBBPriorityInfo { 3602249423Sdim MachineBasicBlock *MBB; 3603249423Sdim unsigned Depth; 3604249423Sdim bool IsSplit; 3605224145Sdim 3606249423Sdim MBBPriorityInfo(MachineBasicBlock *mbb, unsigned depth, bool issplit) 3607249423Sdim : MBB(mbb), Depth(depth), IsSplit(issplit) {} 3608249423Sdim}; 3609224145Sdim 3610327952Sdim} // end anonymous namespace 3611327952Sdim 3612288943Sdim/// C-style comparator that sorts first based on the loop depth of the basic 3613288943Sdim/// block (the unsigned), and then on the MBB number. 3614288943Sdim/// 3615288943Sdim/// EnableGlobalCopies assumes that the primary sort key is loop depth. 3616261991Sdimstatic int compareMBBPriority(const MBBPriorityInfo *LHS, 3617261991Sdim const MBBPriorityInfo *RHS) { 3618249423Sdim // Deeper loops first 3619249423Sdim if (LHS->Depth != RHS->Depth) 3620249423Sdim return LHS->Depth > RHS->Depth ? -1 : 1; 3621249423Sdim 3622249423Sdim // Try to unsplit critical edges next. 3623249423Sdim if (LHS->IsSplit != RHS->IsSplit) 3624249423Sdim return LHS->IsSplit ? -1 : 1; 3625249423Sdim 3626249423Sdim // Prefer blocks that are more connected in the CFG. This takes care of 3627249423Sdim // the most difficult copies first while intervals are short. 3628249423Sdim unsigned cl = LHS->MBB->pred_size() + LHS->MBB->succ_size(); 3629249423Sdim unsigned cr = RHS->MBB->pred_size() + RHS->MBB->succ_size(); 3630249423Sdim if (cl != cr) 3631249423Sdim return cl > cr ? -1 : 1; 3632249423Sdim 3633249423Sdim // As a last resort, sort by block number. 3634249423Sdim return LHS->MBB->getNumber() < RHS->MBB->getNumber() ? -1 : 1; 3635224145Sdim} 3636224145Sdim 3637249423Sdim/// \returns true if the given copy uses or defines a local live range. 3638249423Sdimstatic bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) { 3639249423Sdim if (!Copy->isCopy()) 3640249423Sdim return false; 3641249423Sdim 3642261991Sdim if (Copy->getOperand(1).isUndef()) 3643261991Sdim return false; 3644261991Sdim 3645360784Sdim Register SrcReg = Copy->getOperand(1).getReg(); 3646360784Sdim Register DstReg = Copy->getOperand(0).getReg(); 3647360784Sdim if (Register::isPhysicalRegister(SrcReg) || 3648360784Sdim Register::isPhysicalRegister(DstReg)) 3649249423Sdim return false; 3650249423Sdim 3651249423Sdim return LIS->intervalIsInOneMBB(LIS->getInterval(SrcReg)) 3652249423Sdim || LIS->intervalIsInOneMBB(LIS->getInterval(DstReg)); 3653249423Sdim} 3654249423Sdim 3655344779Sdimvoid RegisterCoalescer::lateLiveIntervalUpdate() { 3656344779Sdim for (unsigned reg : ToBeUpdated) { 3657344779Sdim if (!LIS->hasInterval(reg)) 3658344779Sdim continue; 3659344779Sdim LiveInterval &LI = LIS->getInterval(reg); 3660344779Sdim shrinkToUses(&LI, &DeadDefs); 3661344779Sdim if (!DeadDefs.empty()) 3662344779Sdim eliminateDeadDefs(); 3663344779Sdim } 3664344779Sdim ToBeUpdated.clear(); 3665344779Sdim} 3666344779Sdim 3667249423Sdimbool RegisterCoalescer:: 3668249423SdimcopyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) { 3669239462Sdim bool Progress = false; 3670249423Sdim for (unsigned i = 0, e = CurrList.size(); i != e; ++i) { 3671249423Sdim if (!CurrList[i]) 3672239462Sdim continue; 3673239462Sdim // Skip instruction pointers that have already been erased, for example by 3674239462Sdim // dead code elimination. 3675321369Sdim if (ErasedInstrs.count(CurrList[i])) { 3676276479Sdim CurrList[i] = nullptr; 3677239462Sdim continue; 3678239462Sdim } 3679239462Sdim bool Again = false; 3680249423Sdim bool Success = joinCopy(CurrList[i], Again); 3681239462Sdim Progress |= Success; 3682239462Sdim if (Success || !Again) 3683276479Sdim CurrList[i] = nullptr; 3684239462Sdim } 3685239462Sdim return Progress; 3686239462Sdim} 3687239462Sdim 3688288943Sdim/// Check if DstReg is a terminal node. 3689288943Sdim/// I.e., it does not have any affinity other than \p Copy. 3690288943Sdimstatic bool isTerminalReg(unsigned DstReg, const MachineInstr &Copy, 3691288943Sdim const MachineRegisterInfo *MRI) { 3692288943Sdim assert(Copy.isCopyLike()); 3693288943Sdim // Check if the destination of this copy as any other affinity. 3694288943Sdim for (const MachineInstr &MI : MRI->reg_nodbg_instructions(DstReg)) 3695288943Sdim if (&MI != &Copy && MI.isCopyLike()) 3696288943Sdim return false; 3697288943Sdim return true; 3698288943Sdim} 3699288943Sdim 3700288943Sdimbool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const { 3701288943Sdim assert(Copy.isCopyLike()); 3702288943Sdim if (!UseTerminalRule) 3703288943Sdim return false; 3704288943Sdim unsigned DstReg, DstSubReg, SrcReg, SrcSubReg; 3705353358Sdim if (!isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg)) 3706353358Sdim return false; 3707288943Sdim // Check if the destination of this copy has any other affinity. 3708360784Sdim if (Register::isPhysicalRegister(DstReg) || 3709288943Sdim // If SrcReg is a physical register, the copy won't be coalesced. 3710288943Sdim // Ignoring it may have other side effect (like missing 3711288943Sdim // rematerialization). So keep it. 3712360784Sdim Register::isPhysicalRegister(SrcReg) || !isTerminalReg(DstReg, Copy, MRI)) 3713288943Sdim return false; 3714288943Sdim 3715296417Sdim // DstReg is a terminal node. Check if it interferes with any other 3716288943Sdim // copy involving SrcReg. 3717288943Sdim const MachineBasicBlock *OrigBB = Copy.getParent(); 3718288943Sdim const LiveInterval &DstLI = LIS->getInterval(DstReg); 3719288943Sdim for (const MachineInstr &MI : MRI->reg_nodbg_instructions(SrcReg)) { 3720288943Sdim // Technically we should check if the weight of the new copy is 3721288943Sdim // interesting compared to the other one and update the weight 3722288943Sdim // of the copies accordingly. However, this would only work if 3723288943Sdim // we would gather all the copies first then coalesce, whereas 3724288943Sdim // right now we interleave both actions. 3725288943Sdim // For now, just consider the copies that are in the same block. 3726288943Sdim if (&MI == &Copy || !MI.isCopyLike() || MI.getParent() != OrigBB) 3727288943Sdim continue; 3728288943Sdim unsigned OtherReg, OtherSubReg, OtherSrcReg, OtherSrcSubReg; 3729353358Sdim if (!isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg, 3730353358Sdim OtherSubReg)) 3731353358Sdim return false; 3732288943Sdim if (OtherReg == SrcReg) 3733288943Sdim OtherReg = OtherSrcReg; 3734288943Sdim // Check if OtherReg is a non-terminal. 3735360784Sdim if (Register::isPhysicalRegister(OtherReg) || 3736288943Sdim isTerminalReg(OtherReg, MI, MRI)) 3737288943Sdim continue; 3738288943Sdim // Check that OtherReg interfere with DstReg. 3739288943Sdim if (LIS->getInterval(OtherReg).overlaps(DstLI)) { 3740341825Sdim LLVM_DEBUG(dbgs() << "Apply terminal rule for: " << printReg(DstReg) 3741341825Sdim << '\n'); 3742288943Sdim return true; 3743288943Sdim } 3744288943Sdim } 3745288943Sdim return false; 3746288943Sdim} 3747288943Sdim 3748239462Sdimvoid 3749239462SdimRegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) { 3750341825Sdim LLVM_DEBUG(dbgs() << MBB->getName() << ":\n"); 3751224145Sdim 3752239462Sdim // Collect all copy-like instructions in MBB. Don't start coalescing anything 3753239462Sdim // yet, it might invalidate the iterator. 3754239462Sdim const unsigned PrevSize = WorkList.size(); 3755249423Sdim if (JoinGlobalCopies) { 3756288943Sdim SmallVector<MachineInstr*, 2> LocalTerminals; 3757288943Sdim SmallVector<MachineInstr*, 2> GlobalTerminals; 3758249423Sdim // Coalesce copies bottom-up to coalesce local defs before local uses. They 3759249423Sdim // are not inherently easier to resolve, but slightly preferable until we 3760249423Sdim // have local live range splitting. In particular this is required by 3761249423Sdim // cmp+jmp macro fusion. 3762261991Sdim for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); 3763261991Sdim MII != E; ++MII) { 3764249423Sdim if (!MII->isCopyLike()) 3765249423Sdim continue; 3766288943Sdim bool ApplyTerminalRule = applyTerminalRule(*MII); 3767288943Sdim if (isLocalCopy(&(*MII), LIS)) { 3768288943Sdim if (ApplyTerminalRule) 3769288943Sdim LocalTerminals.push_back(&(*MII)); 3770288943Sdim else 3771288943Sdim LocalWorkList.push_back(&(*MII)); 3772288943Sdim } else { 3773288943Sdim if (ApplyTerminalRule) 3774288943Sdim GlobalTerminals.push_back(&(*MII)); 3775288943Sdim else 3776288943Sdim WorkList.push_back(&(*MII)); 3777288943Sdim } 3778249423Sdim } 3779288943Sdim // Append the copies evicted by the terminal rule at the end of the list. 3780288943Sdim LocalWorkList.append(LocalTerminals.begin(), LocalTerminals.end()); 3781288943Sdim WorkList.append(GlobalTerminals.begin(), GlobalTerminals.end()); 3782249423Sdim } 3783249423Sdim else { 3784288943Sdim SmallVector<MachineInstr*, 2> Terminals; 3785309124Sdim for (MachineInstr &MII : *MBB) 3786309124Sdim if (MII.isCopyLike()) { 3787309124Sdim if (applyTerminalRule(MII)) 3788309124Sdim Terminals.push_back(&MII); 3789288943Sdim else 3790309124Sdim WorkList.push_back(&MII); 3791309124Sdim } 3792309124Sdim // Append the copies evicted by the terminal rule at the end of the list. 3793309124Sdim WorkList.append(Terminals.begin(), Terminals.end()); 3794249423Sdim } 3795239462Sdim // Try coalescing the collected copies immediately, and remove the nulls. 3796239462Sdim // This prevents the WorkList from getting too large since most copies are 3797239462Sdim // joinable on the first attempt. 3798249423Sdim MutableArrayRef<MachineInstr*> 3799249423Sdim CurrList(WorkList.begin() + PrevSize, WorkList.end()); 3800249423Sdim if (copyCoalesceWorkList(CurrList)) 3801239462Sdim WorkList.erase(std::remove(WorkList.begin() + PrevSize, WorkList.end(), 3802321369Sdim nullptr), WorkList.end()); 3803224145Sdim} 3804224145Sdim 3805249423Sdimvoid RegisterCoalescer::coalesceLocals() { 3806249423Sdim copyCoalesceWorkList(LocalWorkList); 3807249423Sdim for (unsigned j = 0, je = LocalWorkList.size(); j != je; ++j) { 3808249423Sdim if (LocalWorkList[j]) 3809249423Sdim WorkList.push_back(LocalWorkList[j]); 3810249423Sdim } 3811249423Sdim LocalWorkList.clear(); 3812249423Sdim} 3813249423Sdim 3814239462Sdimvoid RegisterCoalescer::joinAllIntervals() { 3815341825Sdim LLVM_DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n"); 3816249423Sdim assert(WorkList.empty() && LocalWorkList.empty() && "Old data still around."); 3817224145Sdim 3818249423Sdim std::vector<MBBPriorityInfo> MBBs; 3819249423Sdim MBBs.reserve(MF->size()); 3820296417Sdim for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) { 3821296417Sdim MachineBasicBlock *MBB = &*I; 3822249423Sdim MBBs.push_back(MBBPriorityInfo(MBB, Loops->getLoopDepth(MBB), 3823249423Sdim JoinSplitEdges && isSplitEdge(MBB))); 3824249423Sdim } 3825249423Sdim array_pod_sort(MBBs.begin(), MBBs.end(), compareMBBPriority); 3826224145Sdim 3827249423Sdim // Coalesce intervals in MBB priority order. 3828327952Sdim unsigned CurrDepth = std::numeric_limits<unsigned>::max(); 3829249423Sdim for (unsigned i = 0, e = MBBs.size(); i != e; ++i) { 3830249423Sdim // Try coalescing the collected local copies for deeper loops. 3831249423Sdim if (JoinGlobalCopies && MBBs[i].Depth < CurrDepth) { 3832249423Sdim coalesceLocals(); 3833249423Sdim CurrDepth = MBBs[i].Depth; 3834224145Sdim } 3835249423Sdim copyCoalesceInMBB(MBBs[i].MBB); 3836224145Sdim } 3837344779Sdim lateLiveIntervalUpdate(); 3838249423Sdim coalesceLocals(); 3839224145Sdim 3840224145Sdim // Joining intervals can allow other intervals to be joined. Iteratively join 3841224145Sdim // until we make no progress. 3842249423Sdim while (copyCoalesceWorkList(WorkList)) 3843239462Sdim /* empty */ ; 3844344779Sdim lateLiveIntervalUpdate(); 3845224145Sdim} 3846224145Sdim 3847224145Sdimvoid RegisterCoalescer::releaseMemory() { 3848239462Sdim ErasedInstrs.clear(); 3849239462Sdim WorkList.clear(); 3850239462Sdim DeadDefs.clear(); 3851239462Sdim InflateRegs.clear(); 3852353358Sdim LargeLIVisitCounter.clear(); 3853224145Sdim} 3854224145Sdim 3855224145Sdimbool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { 3856226633Sdim MF = &fn; 3857226633Sdim MRI = &fn.getRegInfo(); 3858288943Sdim const TargetSubtargetInfo &STI = fn.getSubtarget(); 3859288943Sdim TRI = STI.getRegisterInfo(); 3860288943Sdim TII = STI.getInstrInfo(); 3861226633Sdim LIS = &getAnalysis<LiveIntervals>(); 3862296417Sdim AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 3863226633Sdim Loops = &getAnalysis<MachineLoopInfo>(); 3864249423Sdim if (EnableGlobalCopies == cl::BOU_UNSET) 3865288943Sdim JoinGlobalCopies = STI.enableJoinGlobalCopies(); 3866249423Sdim else 3867249423Sdim JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE); 3868249423Sdim 3869249423Sdim // The MachineScheduler does not currently require JoinSplitEdges. This will 3870249423Sdim // either be enabled unconditionally or replaced by a more general live range 3871249423Sdim // splitting optimization. 3872249423Sdim JoinSplitEdges = EnableJoinSplits; 3873249423Sdim 3874341825Sdim LLVM_DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n" 3875341825Sdim << "********** Function: " << MF->getName() << '\n'); 3876224145Sdim 3877224145Sdim if (VerifyCoalescing) 3878226633Sdim MF->verify(this, "Before register coalescing"); 3879224145Sdim 3880360784Sdim DbgVRegToValues.clear(); 3881360784Sdim DbgMergedVRegNums.clear(); 3882360784Sdim buildVRegToDbgValueMap(fn); 3883360784Sdim 3884224145Sdim RegClassInfo.runOnMachineFunction(fn); 3885224145Sdim 3886224145Sdim // Join (coalesce) intervals if requested. 3887239462Sdim if (EnableJoining) 3888239462Sdim joinAllIntervals(); 3889224145Sdim 3890226633Sdim // After deleting a lot of copies, register classes may be less constrained. 3891239462Sdim // Removing sub-register operands may allow GR32_ABCD -> GR32 and DPR_VFP2 -> 3892226633Sdim // DPR inflation. 3893226633Sdim array_pod_sort(InflateRegs.begin(), InflateRegs.end()); 3894226633Sdim InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()), 3895226633Sdim InflateRegs.end()); 3896341825Sdim LLVM_DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size() 3897341825Sdim << " regs.\n"); 3898226633Sdim for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) { 3899226633Sdim unsigned Reg = InflateRegs[i]; 3900226633Sdim if (MRI->reg_nodbg_empty(Reg)) 3901226633Sdim continue; 3902288943Sdim if (MRI->recomputeRegClass(Reg)) { 3903341825Sdim LLVM_DEBUG(dbgs() << printReg(Reg) << " inflated to " 3904341825Sdim << TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n'); 3905296417Sdim ++NumInflated; 3906296417Sdim 3907280031Sdim LiveInterval &LI = LIS->getInterval(Reg); 3908296417Sdim if (LI.hasSubRanges()) { 3909280031Sdim // If the inflated register class does not support subregisters anymore 3910280031Sdim // remove the subranges. 3911296417Sdim if (!MRI->shouldTrackSubRegLiveness(Reg)) { 3912296417Sdim LI.clearSubRanges(); 3913296417Sdim } else { 3914288943Sdim#ifndef NDEBUG 3915296417Sdim LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg); 3916296417Sdim // If subranges are still supported, then the same subregs 3917296417Sdim // should still be supported. 3918296417Sdim for (LiveInterval::SubRange &S : LI.subranges()) { 3919314564Sdim assert((S.LaneMask & ~MaxMask).none()); 3920296417Sdim } 3921296417Sdim#endif 3922280031Sdim } 3923280031Sdim } 3924226633Sdim } 3925226633Sdim } 3926226633Sdim 3927341825Sdim LLVM_DEBUG(dump()); 3928224145Sdim if (VerifyCoalescing) 3929226633Sdim MF->verify(this, "After register coalescing"); 3930224145Sdim return true; 3931224145Sdim} 3932224145Sdim 3933224145Sdimvoid RegisterCoalescer::print(raw_ostream &O, const Module* m) const { 3934226633Sdim LIS->print(O, m); 3935224145Sdim} 3936