RegisterCoalescer.cpp revision 327952
1327952Sdim//===- RegisterCoalescer.cpp - Generic Register Coalescing Interface ------===//
2193323Sed//
3193323Sed//                     The LLVM Compiler Infrastructure
4193323Sed//
5193323Sed// This file is distributed under the University of Illinois Open Source
6193323Sed// License. See LICENSE.TXT for details.
7193323Sed//
8193323Sed//===----------------------------------------------------------------------===//
9193323Sed//
10193323Sed// This file implements the generic RegisterCoalescer interface which
11193323Sed// is used as the common interface used by all clients and
12193323Sed// implementations of register coalescing.
13193323Sed//
14193323Sed//===----------------------------------------------------------------------===//
15193323Sed
16224145Sdim#include "RegisterCoalescer.h"
17327952Sdim#include "llvm/ADT/ArrayRef.h"
18327952Sdim#include "llvm/ADT/BitVector.h"
19239462Sdim#include "llvm/ADT/STLExtras.h"
20327952Sdim#include "llvm/ADT/SmallPtrSet.h"
21327952Sdim#include "llvm/ADT/SmallVector.h"
22239462Sdim#include "llvm/ADT/Statistic.h"
23239462Sdim#include "llvm/Analysis/AliasAnalysis.h"
24327952Sdim#include "llvm/CodeGen/LiveInterval.h"
25327952Sdim#include "llvm/CodeGen/LiveIntervals.h"
26239462Sdim#include "llvm/CodeGen/LiveRangeEdit.h"
27327952Sdim#include "llvm/CodeGen/MachineBasicBlock.h"
28327952Sdim#include "llvm/CodeGen/MachineFunction.h"
29327952Sdim#include "llvm/CodeGen/MachineFunctionPass.h"
30224145Sdim#include "llvm/CodeGen/MachineInstr.h"
31321369Sdim#include "llvm/CodeGen/MachineInstrBuilder.h"
32224145Sdim#include "llvm/CodeGen/MachineLoopInfo.h"
33327952Sdim#include "llvm/CodeGen/MachineOperand.h"
34224145Sdim#include "llvm/CodeGen/MachineRegisterInfo.h"
35224145Sdim#include "llvm/CodeGen/Passes.h"
36239462Sdim#include "llvm/CodeGen/RegisterClassInfo.h"
37327952Sdim#include "llvm/CodeGen/SlotIndexes.h"
38327952Sdim#include "llvm/CodeGen/TargetInstrInfo.h"
39327952Sdim#include "llvm/CodeGen/TargetOpcodes.h"
40327952Sdim#include "llvm/CodeGen/TargetRegisterInfo.h"
41327952Sdim#include "llvm/CodeGen/TargetSubtargetInfo.h"
42327952Sdim#include "llvm/IR/DebugLoc.h"
43327952Sdim#include "llvm/MC/LaneBitmask.h"
44327952Sdim#include "llvm/MC/MCInstrDesc.h"
45327952Sdim#include "llvm/MC/MCRegisterInfo.h"
46249423Sdim#include "llvm/Pass.h"
47224145Sdim#include "llvm/Support/CommandLine.h"
48327952Sdim#include "llvm/Support/Compiler.h"
49224145Sdim#include "llvm/Support/Debug.h"
50224145Sdim#include "llvm/Support/ErrorHandling.h"
51224145Sdim#include "llvm/Support/raw_ostream.h"
52224145Sdim#include <algorithm>
53327952Sdim#include <cassert>
54327952Sdim#include <iterator>
55327952Sdim#include <limits>
56327952Sdim#include <tuple>
57327952Sdim#include <utility>
58327952Sdim#include <vector>
59327952Sdim
60193323Sedusing namespace llvm;
61193323Sed
62276479Sdim#define DEBUG_TYPE "regalloc"
63276479Sdim
64224145SdimSTATISTIC(numJoins    , "Number of interval joins performed");
65224145SdimSTATISTIC(numCrossRCs , "Number of cross class joins performed");
66224145SdimSTATISTIC(numCommutes , "Number of instruction commuting performed");
67224145SdimSTATISTIC(numExtends  , "Number of copies extended");
68224145SdimSTATISTIC(NumReMats   , "Number of instructions re-materialized");
69226633SdimSTATISTIC(NumInflated , "Number of register classes inflated");
70243830SdimSTATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested");
71243830SdimSTATISTIC(NumLaneResolves,  "Number of dead lane conflicts resolved");
72224145Sdim
73327952Sdimstatic cl::opt<bool> EnableJoining("join-liveintervals",
74327952Sdim                                   cl::desc("Coalesce copies (default=true)"),
75327952Sdim                                   cl::init(true), cl::Hidden);
76224145Sdim
77288943Sdimstatic cl::opt<bool> UseTerminalRule("terminal-rule",
78288943Sdim                                     cl::desc("Apply the terminal rule"),
79288943Sdim                                     cl::init(false), cl::Hidden);
80288943Sdim
81288943Sdim/// Temporary flag to test critical edge unsplitting.
82224145Sdimstatic cl::opt<bool>
83249423SdimEnableJoinSplits("join-splitedges",
84249423Sdim  cl::desc("Coalesce copies on split edges (default=subtarget)"), cl::Hidden);
85249423Sdim
86288943Sdim/// Temporary flag to test global copy optimization.
87249423Sdimstatic cl::opt<cl::boolOrDefault>
88249423SdimEnableGlobalCopies("join-globalcopies",
89249423Sdim  cl::desc("Coalesce copies that span blocks (default=subtarget)"),
90249423Sdim  cl::init(cl::BOU_UNSET), cl::Hidden);
91249423Sdim
92249423Sdimstatic cl::opt<bool>
93224145SdimVerifyCoalescing("verify-coalescing",
94224145Sdim         cl::desc("Verify machine instrs before and after register coalescing"),
95224145Sdim         cl::Hidden);
96224145Sdim
97226633Sdimnamespace {
98327952Sdim
99239462Sdim  class RegisterCoalescer : public MachineFunctionPass,
100239462Sdim                            private LiveRangeEdit::Delegate {
101226633Sdim    MachineFunction* MF;
102226633Sdim    MachineRegisterInfo* MRI;
103226633Sdim    const TargetRegisterInfo* TRI;
104226633Sdim    const TargetInstrInfo* TII;
105226633Sdim    LiveIntervals *LIS;
106226633Sdim    const MachineLoopInfo* Loops;
107226633Sdim    AliasAnalysis *AA;
108226633Sdim    RegisterClassInfo RegClassInfo;
109226633Sdim
110280031Sdim    /// A LaneMask to remember on which subregister live ranges we need to call
111280031Sdim    /// shrinkToUses() later.
112296417Sdim    LaneBitmask ShrinkMask;
113280031Sdim
114280031Sdim    /// True if the main range of the currently coalesced intervals should be
115280031Sdim    /// checked for smaller live intervals.
116280031Sdim    bool ShrinkMainRange;
117280031Sdim
118249423Sdim    /// \brief True if the coalescer should aggressively coalesce global copies
119249423Sdim    /// in favor of keeping local copies.
120249423Sdim    bool JoinGlobalCopies;
121249423Sdim
122249423Sdim    /// \brief True if the coalescer should aggressively coalesce fall-thru
123249423Sdim    /// blocks exclusively containing copies.
124249423Sdim    bool JoinSplitEdges;
125249423Sdim
126280031Sdim    /// Copy instructions yet to be coalesced.
127239462Sdim    SmallVector<MachineInstr*, 8> WorkList;
128249423Sdim    SmallVector<MachineInstr*, 8> LocalWorkList;
129226633Sdim
130280031Sdim    /// Set of instruction pointers that have been erased, and
131239462Sdim    /// that may be present in WorkList.
132239462Sdim    SmallPtrSet<MachineInstr*, 8> ErasedInstrs;
133226633Sdim
134239462Sdim    /// Dead instructions that are about to be deleted.
135239462Sdim    SmallVector<MachineInstr*, 8> DeadDefs;
136226633Sdim
137239462Sdim    /// Virtual registers to be considered for register class inflation.
138239462Sdim    SmallVector<unsigned, 8> InflateRegs;
139226633Sdim
140239462Sdim    /// Recursively eliminate dead defs in DeadDefs.
141239462Sdim    void eliminateDeadDefs();
142226633Sdim
143288943Sdim    /// LiveRangeEdit callback for eliminateDeadDefs().
144276479Sdim    void LRE_WillEraseInstruction(MachineInstr *MI) override;
145239462Sdim
146280031Sdim    /// Coalesce the LocalWorkList.
147249423Sdim    void coalesceLocals();
148249423Sdim
149280031Sdim    /// Join compatible live intervals
150239462Sdim    void joinAllIntervals();
151239462Sdim
152280031Sdim    /// Coalesce copies in the specified MBB, putting
153239462Sdim    /// copies that cannot yet be coalesced into WorkList.
154239462Sdim    void copyCoalesceInMBB(MachineBasicBlock *MBB);
155239462Sdim
156288943Sdim    /// Tries to coalesce all copies in CurrList. Returns true if any progress
157288943Sdim    /// was made.
158249423Sdim    bool copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList);
159239462Sdim
160288943Sdim    /// Attempt to join intervals corresponding to SrcReg/DstReg, which are the
161288943Sdim    /// src/dst of the copy instruction CopyMI.  This returns true if the copy
162288943Sdim    /// was successfully coalesced away. If it is not currently possible to
163288943Sdim    /// coalesce this interval, but it may be possible if other things get
164288943Sdim    /// coalesced, then it returns true by reference in 'Again'.
165239462Sdim    bool joinCopy(MachineInstr *TheCopy, bool &Again);
166226633Sdim
167280031Sdim    /// Attempt to join these two intervals.  On failure, this
168226633Sdim    /// returns false.  The output "SrcInt" will not have been modified, so we
169226633Sdim    /// can use this information below to update aliases.
170239462Sdim    bool joinIntervals(CoalescerPair &CP);
171226633Sdim
172243830Sdim    /// Attempt joining two virtual registers. Return true on success.
173243830Sdim    bool joinVirtRegs(CoalescerPair &CP);
174243830Sdim
175239462Sdim    /// Attempt joining with a reserved physreg.
176239462Sdim    bool joinReservedPhysReg(CoalescerPair &CP);
177239462Sdim
178280031Sdim    /// Add the LiveRange @p ToMerge as a subregister liverange of @p LI.
179280031Sdim    /// Subranges in @p LI which only partially interfere with the desired
180280031Sdim    /// LaneMask are split as necessary. @p LaneMask are the lanes that
181280031Sdim    /// @p ToMerge will occupy in the coalescer register. @p LI has its subrange
182280031Sdim    /// lanemasks already adjusted to the coalesced register.
183296417Sdim    void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge,
184296417Sdim                           LaneBitmask LaneMask, CoalescerPair &CP);
185280031Sdim
186280031Sdim    /// Join the liveranges of two subregisters. Joins @p RRange into
187280031Sdim    /// @p LRange, @p RRange may be invalid afterwards.
188296417Sdim    void joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
189296417Sdim                          LaneBitmask LaneMask, const CoalescerPair &CP);
190280031Sdim
191288943Sdim    /// We found a non-trivially-coalescable copy. If the source value number is
192288943Sdim    /// defined by a copy from the destination reg see if we can merge these two
193288943Sdim    /// destination reg valno# into a single value number, eliminating a copy.
194288943Sdim    /// This returns true if an interval was modified.
195239462Sdim    bool adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI);
196226633Sdim
197280031Sdim    /// Return true if there are definitions of IntB
198226633Sdim    /// other than BValNo val# that can reach uses of AValno val# of IntA.
199239462Sdim    bool hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB,
200226633Sdim                              VNInfo *AValNo, VNInfo *BValNo);
201226633Sdim
202280031Sdim    /// We found a non-trivially-coalescable copy.
203226633Sdim    /// If the source value number is defined by a commutable instruction and
204226633Sdim    /// its other operand is coalesced to the copy dest register, see if we
205226633Sdim    /// can transform the copy into a noop by commuting the definition.
206288943Sdim    /// This returns true if an interval was modified.
207239462Sdim    bool removeCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI);
208226633Sdim
209321369Sdim    /// We found a copy which can be moved to its less frequent predecessor.
210321369Sdim    bool removePartialRedundancy(const CoalescerPair &CP, MachineInstr &CopyMI);
211321369Sdim
212280031Sdim    /// If the source of a copy is defined by a
213226633Sdim    /// trivial computation, replace the copy by rematerialize the definition.
214288943Sdim    bool reMaterializeTrivialDef(const CoalescerPair &CP, MachineInstr *CopyMI,
215261991Sdim                                 bool &IsDefCopy);
216226633Sdim
217288943Sdim    /// Return true if a copy involving a physreg should be joined.
218249423Sdim    bool canJoinPhys(const CoalescerPair &CP);
219226633Sdim
220288943Sdim    /// Replace all defs and uses of SrcReg to DstReg and update the subregister
221288943Sdim    /// number if it is not zero. If DstReg is a physical register and the
222288943Sdim    /// existing subregister number of the def / use being updated is not zero,
223288943Sdim    /// make sure to set it to the correct physical subregister.
224239462Sdim    void updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx);
225226633Sdim
226309124Sdim    /// If the given machine operand reads only undefined lanes add an undef
227309124Sdim    /// flag.
228309124Sdim    /// This can happen when undef uses were previously concealed by a copy
229309124Sdim    /// which we coalesced. Example:
230327952Sdim    ///    %0:sub0<def,read-undef> = ...
231327952Sdim    ///    %1 = COPY %0           <-- Coalescing COPY reveals undef
232327952Sdim    ///       = use %1:sub1       <-- hidden undef use
233309124Sdim    void addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
234309124Sdim                      MachineOperand &MO, unsigned SubRegIdx);
235309124Sdim
236280031Sdim    /// Handle copies of undef values.
237288943Sdim    /// Returns true if @p CopyMI was a copy of an undef value and eliminated.
238280031Sdim    bool eliminateUndefCopy(MachineInstr *CopyMI);
239226633Sdim
240288943Sdim    /// Check whether or not we should apply the terminal rule on the
241288943Sdim    /// destination (Dst) of \p Copy.
242288943Sdim    /// When the terminal rule applies, Copy is not profitable to
243288943Sdim    /// coalesce.
244288943Sdim    /// Dst is terminal if it has exactly one affinity (Dst, Src) and
245288943Sdim    /// at least one interference (Dst, Dst2). If Dst is terminal, the
246288943Sdim    /// terminal rule consists in checking that at least one of
247288943Sdim    /// interfering node, say Dst2, has an affinity of equal or greater
248288943Sdim    /// weight with Src.
249288943Sdim    /// In that case, Dst2 and Dst will not be able to be both coalesced
250288943Sdim    /// with Src. Since Dst2 exposes more coalescing opportunities than
251288943Sdim    /// Dst, we can drop \p Copy.
252288943Sdim    bool applyTerminalRule(const MachineInstr &Copy) const;
253288943Sdim
254288943Sdim    /// Wrapper method for \see LiveIntervals::shrinkToUses.
255288943Sdim    /// This method does the proper fixing of the live-ranges when the afore
256288943Sdim    /// mentioned method returns true.
257288943Sdim    void shrinkToUses(LiveInterval *LI,
258288943Sdim                      SmallVectorImpl<MachineInstr * > *Dead = nullptr) {
259296417Sdim      if (LIS->shrinkToUses(LI, Dead)) {
260296417Sdim        /// Check whether or not \p LI is composed by multiple connected
261296417Sdim        /// components and if that is the case, fix that.
262296417Sdim        SmallVector<LiveInterval*, 8> SplitLIs;
263296417Sdim        LIS->splitSeparateComponents(*LI, SplitLIs);
264296417Sdim      }
265288943Sdim    }
266288943Sdim
267327952Sdim    /// Wrapper Method to do all the necessary work when an Instruction is
268327952Sdim    /// deleted.
269327952Sdim    /// Optimizations should use this to make sure that deleted instructions
270327952Sdim    /// are always accounted for.
271327952Sdim    void deleteInstr(MachineInstr* MI) {
272327952Sdim      ErasedInstrs.insert(MI);
273327952Sdim      LIS->RemoveMachineInstrFromMaps(*MI);
274327952Sdim      MI->eraseFromParent();
275327952Sdim    }
276327952Sdim
277226633Sdim  public:
278288943Sdim    static char ID; ///< Class identification, replacement for typeinfo
279327952Sdim
280226633Sdim    RegisterCoalescer() : MachineFunctionPass(ID) {
281226633Sdim      initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
282226633Sdim    }
283226633Sdim
284276479Sdim    void getAnalysisUsage(AnalysisUsage &AU) const override;
285226633Sdim
286276479Sdim    void releaseMemory() override;
287226633Sdim
288280031Sdim    /// This is the pass entry point.
289276479Sdim    bool runOnMachineFunction(MachineFunction&) override;
290226633Sdim
291280031Sdim    /// Implement the dump method.
292276479Sdim    void print(raw_ostream &O, const Module* = nullptr) const override;
293226633Sdim  };
294327952Sdim
295288943Sdim} // end anonymous namespace
296226633Sdim
297327952Sdimchar RegisterCoalescer::ID = 0;
298327952Sdim
299234353Sdimchar &llvm::RegisterCoalescerID = RegisterCoalescer::ID;
300226633Sdim
301224145SdimINITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing",
302224145Sdim                      "Simple Register Coalescing", false, false)
303224145SdimINITIALIZE_PASS_DEPENDENCY(LiveIntervals)
304224145SdimINITIALIZE_PASS_DEPENDENCY(SlotIndexes)
305224145SdimINITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
306296417SdimINITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
307224145SdimINITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing",
308224145Sdim                    "Simple Register Coalescing", false, false)
309224145Sdim
310224145Sdimstatic bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI,
311224145Sdim                        unsigned &Src, unsigned &Dst,
312224145Sdim                        unsigned &SrcSub, unsigned &DstSub) {
313210299Sed  if (MI->isCopy()) {
314210299Sed    Dst = MI->getOperand(0).getReg();
315210299Sed    DstSub = MI->getOperand(0).getSubReg();
316210299Sed    Src = MI->getOperand(1).getReg();
317210299Sed    SrcSub = MI->getOperand(1).getSubReg();
318210299Sed  } else if (MI->isSubregToReg()) {
319210299Sed    Dst = MI->getOperand(0).getReg();
320243830Sdim    DstSub = tri.composeSubRegIndices(MI->getOperand(0).getSubReg(),
321243830Sdim                                      MI->getOperand(3).getImm());
322210299Sed    Src = MI->getOperand(2).getReg();
323210299Sed    SrcSub = MI->getOperand(2).getSubReg();
324212904Sdim  } else
325210299Sed    return false;
326210299Sed  return true;
327210299Sed}
328210299Sed
329288943Sdim/// Return true if this block should be vacated by the coalescer to eliminate
330288943Sdim/// branches. The important cases to handle in the coalescer are critical edges
331288943Sdim/// split during phi elimination which contain only copies. Simple blocks that
332288943Sdim/// contain non-branches should also be vacated, but this can be handled by an
333288943Sdim/// earlier pass similar to early if-conversion.
334249423Sdimstatic bool isSplitEdge(const MachineBasicBlock *MBB) {
335249423Sdim  if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
336249423Sdim    return false;
337249423Sdim
338276479Sdim  for (const auto &MI : *MBB) {
339276479Sdim    if (!MI.isCopyLike() && !MI.isUnconditionalBranch())
340249423Sdim      return false;
341249423Sdim  }
342249423Sdim  return true;
343249423Sdim}
344249423Sdim
345210299Sedbool CoalescerPair::setRegisters(const MachineInstr *MI) {
346239462Sdim  SrcReg = DstReg = 0;
347239462Sdim  SrcIdx = DstIdx = 0;
348276479Sdim  NewRC = nullptr;
349226633Sdim  Flipped = CrossClass = false;
350210299Sed
351210299Sed  unsigned Src, Dst, SrcSub, DstSub;
352226633Sdim  if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
353210299Sed    return false;
354226633Sdim  Partial = SrcSub || DstSub;
355210299Sed
356210299Sed  // If one register is a physreg, it must be Dst.
357210299Sed  if (TargetRegisterInfo::isPhysicalRegister(Src)) {
358210299Sed    if (TargetRegisterInfo::isPhysicalRegister(Dst))
359210299Sed      return false;
360210299Sed    std::swap(Src, Dst);
361210299Sed    std::swap(SrcSub, DstSub);
362226633Sdim    Flipped = true;
363210299Sed  }
364210299Sed
365327952Sdim  const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
366210299Sed
367210299Sed  if (TargetRegisterInfo::isPhysicalRegister(Dst)) {
368210299Sed    // Eliminate DstSub on a physreg.
369210299Sed    if (DstSub) {
370226633Sdim      Dst = TRI.getSubReg(Dst, DstSub);
371210299Sed      if (!Dst) return false;
372210299Sed      DstSub = 0;
373210299Sed    }
374210299Sed
375210299Sed    // Eliminate SrcSub by picking a corresponding Dst superregister.
376210299Sed    if (SrcSub) {
377226633Sdim      Dst = TRI.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src));
378210299Sed      if (!Dst) return false;
379210299Sed    } else if (!MRI.getRegClass(Src)->contains(Dst)) {
380210299Sed      return false;
381210299Sed    }
382210299Sed  } else {
383210299Sed    // Both registers are virtual.
384239462Sdim    const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
385239462Sdim    const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
386210299Sed
387210299Sed    // Both registers have subreg indices.
388210299Sed    if (SrcSub && DstSub) {
389239462Sdim      // Copies between different sub-registers are never coalescable.
390239462Sdim      if (Src == Dst && SrcSub != DstSub)
391210299Sed        return false;
392239462Sdim
393239462Sdim      NewRC = TRI.getCommonSuperRegClass(SrcRC, SrcSub, DstRC, DstSub,
394239462Sdim                                         SrcIdx, DstIdx);
395239462Sdim      if (!NewRC)
396210299Sed        return false;
397239462Sdim    } else if (DstSub) {
398239462Sdim      // SrcReg will be merged with a sub-register of DstReg.
399239462Sdim      SrcIdx = DstSub;
400239462Sdim      NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub);
401239462Sdim    } else if (SrcSub) {
402239462Sdim      // DstReg will be merged with a sub-register of SrcReg.
403239462Sdim      DstIdx = SrcSub;
404239462Sdim      NewRC = TRI.getMatchingSuperRegClass(SrcRC, DstRC, SrcSub);
405239462Sdim    } else {
406239462Sdim      // This is a straight copy without sub-registers.
407239462Sdim      NewRC = TRI.getCommonSubClass(DstRC, SrcRC);
408210299Sed    }
409210299Sed
410239462Sdim    // The combined constraint may be impossible to satisfy.
411239462Sdim    if (!NewRC)
412239462Sdim      return false;
413239462Sdim
414239462Sdim    // Prefer SrcReg to be a sub-register of DstReg.
415239462Sdim    // FIXME: Coalescer should support subregs symmetrically.
416239462Sdim    if (DstIdx && !SrcIdx) {
417210299Sed      std::swap(Src, Dst);
418239462Sdim      std::swap(SrcIdx, DstIdx);
419239462Sdim      Flipped = !Flipped;
420210299Sed    }
421210299Sed
422226633Sdim    CrossClass = NewRC != DstRC || NewRC != SrcRC;
423210299Sed  }
424210299Sed  // Check our invariants
425210299Sed  assert(TargetRegisterInfo::isVirtualRegister(Src) && "Src must be virtual");
426210299Sed  assert(!(TargetRegisterInfo::isPhysicalRegister(Dst) && DstSub) &&
427210299Sed         "Cannot have a physical SubIdx");
428226633Sdim  SrcReg = Src;
429226633Sdim  DstReg = Dst;
430210299Sed  return true;
431210299Sed}
432210299Sed
433210299Sedbool CoalescerPair::flip() {
434239462Sdim  if (TargetRegisterInfo::isPhysicalRegister(DstReg))
435210299Sed    return false;
436226633Sdim  std::swap(SrcReg, DstReg);
437239462Sdim  std::swap(SrcIdx, DstIdx);
438226633Sdim  Flipped = !Flipped;
439210299Sed  return true;
440210299Sed}
441210299Sed
442210299Sedbool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
443210299Sed  if (!MI)
444210299Sed    return false;
445210299Sed  unsigned Src, Dst, SrcSub, DstSub;
446226633Sdim  if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
447210299Sed    return false;
448210299Sed
449226633Sdim  // Find the virtual register that is SrcReg.
450226633Sdim  if (Dst == SrcReg) {
451210299Sed    std::swap(Src, Dst);
452210299Sed    std::swap(SrcSub, DstSub);
453226633Sdim  } else if (Src != SrcReg) {
454210299Sed    return false;
455210299Sed  }
456210299Sed
457226633Sdim  // Now check that Dst matches DstReg.
458226633Sdim  if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
459210299Sed    if (!TargetRegisterInfo::isPhysicalRegister(Dst))
460210299Sed      return false;
461239462Sdim    assert(!DstIdx && !SrcIdx && "Inconsistent CoalescerPair state.");
462210299Sed    // DstSub could be set for a physreg from INSERT_SUBREG.
463210299Sed    if (DstSub)
464226633Sdim      Dst = TRI.getSubReg(Dst, DstSub);
465210299Sed    // Full copy of Src.
466210299Sed    if (!SrcSub)
467226633Sdim      return DstReg == Dst;
468210299Sed    // This is a partial register copy. Check that the parts match.
469226633Sdim    return TRI.getSubReg(DstReg, SrcSub) == Dst;
470210299Sed  } else {
471226633Sdim    // DstReg is virtual.
472226633Sdim    if (DstReg != Dst)
473210299Sed      return false;
474210299Sed    // Registers match, do the subregisters line up?
475243830Sdim    return TRI.composeSubRegIndices(SrcIdx, SrcSub) ==
476243830Sdim           TRI.composeSubRegIndices(DstIdx, DstSub);
477210299Sed  }
478210299Sed}
479210299Sed
480224145Sdimvoid RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const {
481224145Sdim  AU.setPreservesCFG();
482296417Sdim  AU.addRequired<AAResultsWrapperPass>();
483224145Sdim  AU.addRequired<LiveIntervals>();
484224145Sdim  AU.addPreserved<LiveIntervals>();
485224145Sdim  AU.addPreserved<SlotIndexes>();
486224145Sdim  AU.addRequired<MachineLoopInfo>();
487224145Sdim  AU.addPreserved<MachineLoopInfo>();
488224145Sdim  AU.addPreservedID(MachineDominatorsID);
489224145Sdim  MachineFunctionPass::getAnalysisUsage(AU);
490224145Sdim}
491224145Sdim
492239462Sdimvoid RegisterCoalescer::eliminateDeadDefs() {
493261991Sdim  SmallVector<unsigned, 8> NewRegs;
494276479Sdim  LiveRangeEdit(nullptr, NewRegs, *MF, *LIS,
495276479Sdim                nullptr, this).eliminateDeadDefs(DeadDefs);
496239462Sdim}
497224145Sdim
498239462Sdimvoid RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) {
499239462Sdim  // MI may be in WorkList. Make sure we don't visit it.
500239462Sdim  ErasedInstrs.insert(MI);
501224145Sdim}
502224145Sdim
503239462Sdimbool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
504239462Sdim                                             MachineInstr *CopyMI) {
505239462Sdim  assert(!CP.isPartial() && "This doesn't work for partial copies.");
506239462Sdim  assert(!CP.isPhys() && "This doesn't work for physreg copies.");
507224145Sdim
508224145Sdim  LiveInterval &IntA =
509226633Sdim    LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
510224145Sdim  LiveInterval &IntB =
511226633Sdim    LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
512309124Sdim  SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
513224145Sdim
514288943Sdim  // We have a non-trivially-coalescable copy with IntA being the source and
515288943Sdim  // IntB being the dest, thus this defines a value number in IntB.  If the
516288943Sdim  // source value number (in IntA) is defined by a copy from B, see if we can
517288943Sdim  // merge these two pieces of B into a single value number, eliminating a copy.
518288943Sdim  // For example:
519288943Sdim  //
520288943Sdim  //  A3 = B0
521288943Sdim  //    ...
522288943Sdim  //  B1 = A3      <- this copy
523288943Sdim  //
524288943Sdim  // In this case, B0 can be extended to where the B1 copy lives, allowing the
525288943Sdim  // B1 value number to be replaced with B0 (which simplifies the B
526288943Sdim  // liveinterval).
527288943Sdim
528261991Sdim  // BValNo is a value number in B that is defined by a copy from A.  'B1' in
529224145Sdim  // the example above.
530261991Sdim  LiveInterval::iterator BS = IntB.FindSegmentContaining(CopyIdx);
531261991Sdim  if (BS == IntB.end()) return false;
532261991Sdim  VNInfo *BValNo = BS->valno;
533224145Sdim
534224145Sdim  // Get the location that B is defined at.  Two options: either this value has
535224145Sdim  // an unknown definition point or it is defined at CopyIdx.  If unknown, we
536224145Sdim  // can't process it.
537234353Sdim  if (BValNo->def != CopyIdx) return false;
538224145Sdim
539224145Sdim  // AValNo is the value number in A that defines the copy, A3 in the example.
540234353Sdim  SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true);
541261991Sdim  LiveInterval::iterator AS = IntA.FindSegmentContaining(CopyUseIdx);
542261991Sdim  // The live segment might not exist after fun with physreg coalescing.
543261991Sdim  if (AS == IntA.end()) return false;
544261991Sdim  VNInfo *AValNo = AS->valno;
545224145Sdim
546224145Sdim  // If AValNo is defined as a copy from IntB, we can potentially process this.
547224145Sdim  // Get the instruction that defines this value number.
548234353Sdim  MachineInstr *ACopyMI = LIS->getInstructionFromIndex(AValNo->def);
549243830Sdim  // Don't allow any partial copies, even if isCoalescable() allows them.
550243830Sdim  if (!CP.isCoalescable(ACopyMI) || !ACopyMI->isFullCopy())
551224145Sdim    return false;
552224145Sdim
553261991Sdim  // Get the Segment in IntB that this value number starts with.
554261991Sdim  LiveInterval::iterator ValS =
555261991Sdim    IntB.FindSegmentContaining(AValNo->def.getPrevSlot());
556261991Sdim  if (ValS == IntB.end())
557224145Sdim    return false;
558224145Sdim
559261991Sdim  // Make sure that the end of the live segment is inside the same block as
560224145Sdim  // CopyMI.
561261991Sdim  MachineInstr *ValSEndInst =
562261991Sdim    LIS->getInstructionFromIndex(ValS->end.getPrevSlot());
563261991Sdim  if (!ValSEndInst || ValSEndInst->getParent() != CopyMI->getParent())
564224145Sdim    return false;
565224145Sdim
566261991Sdim  // Okay, we now know that ValS ends in the same block that the CopyMI
567261991Sdim  // live-range starts.  If there are no intervening live segments between them
568261991Sdim  // in IntB, we can merge them.
569261991Sdim  if (ValS+1 != BS) return false;
570224145Sdim
571327952Sdim  DEBUG(dbgs() << "Extending: " << printReg(IntB.reg, TRI));
572224145Sdim
573261991Sdim  SlotIndex FillerStart = ValS->end, FillerEnd = BS->start;
574224145Sdim  // We are about to delete CopyMI, so need to remove it as the 'instruction
575224145Sdim  // that defines this value #'. Update the valnum with the new defining
576224145Sdim  // instruction #.
577234353Sdim  BValNo->def = FillerStart;
578224145Sdim
579224145Sdim  // Okay, we can merge them.  We need to insert a new liverange:
580261991Sdim  // [ValS.end, BS.begin) of either value number, then we merge the
581224145Sdim  // two value numbers.
582261991Sdim  IntB.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, BValNo));
583224145Sdim
584224145Sdim  // Okay, merge "B1" into the same value number as "B0".
585261991Sdim  if (BValNo != ValS->valno)
586261991Sdim    IntB.MergeValueNumberInto(BValNo, ValS->valno);
587280031Sdim
588280031Sdim  // Do the same for the subregister segments.
589280031Sdim  for (LiveInterval::SubRange &S : IntB.subranges()) {
590280031Sdim    VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx);
591280031Sdim    S.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, SubBValNo));
592280031Sdim    VNInfo *SubValSNo = S.getVNInfoAt(AValNo->def.getPrevSlot());
593280031Sdim    if (SubBValNo != SubValSNo)
594280031Sdim      S.MergeValueNumberInto(SubBValNo, SubValSNo);
595280031Sdim  }
596280031Sdim
597239462Sdim  DEBUG(dbgs() << "   result = " << IntB << '\n');
598224145Sdim
599224145Sdim  // If the source instruction was killing the source register before the
600224145Sdim  // merge, unset the isKill marker given the live range has been extended.
601261991Sdim  int UIdx = ValSEndInst->findRegisterUseOperandIdx(IntB.reg, true);
602224145Sdim  if (UIdx != -1) {
603261991Sdim    ValSEndInst->getOperand(UIdx).setIsKill(false);
604224145Sdim  }
605224145Sdim
606234353Sdim  // Rewrite the copy. If the copy instruction was killing the destination
607234353Sdim  // register before the merge, find the last use and trim the live range. That
608234353Sdim  // will also add the isKill marker.
609239462Sdim  CopyMI->substituteRegister(IntA.reg, IntB.reg, 0, *TRI);
610261991Sdim  if (AS->end == CopyIdx)
611288943Sdim    shrinkToUses(&IntA);
612224145Sdim
613224145Sdim  ++numExtends;
614224145Sdim  return true;
615224145Sdim}
616224145Sdim
617239462Sdimbool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
618239462Sdim                                             LiveInterval &IntB,
619239462Sdim                                             VNInfo *AValNo,
620239462Sdim                                             VNInfo *BValNo) {
621239462Sdim  // If AValNo has PHI kills, conservatively assume that IntB defs can reach
622239462Sdim  // the PHI values.
623239462Sdim  if (LIS->hasPHIKill(IntA, AValNo))
624239462Sdim    return true;
625239462Sdim
626280031Sdim  for (LiveRange::Segment &ASeg : IntA.segments) {
627280031Sdim    if (ASeg.valno != AValNo) continue;
628261991Sdim    LiveInterval::iterator BI =
629280031Sdim      std::upper_bound(IntB.begin(), IntB.end(), ASeg.start);
630261991Sdim    if (BI != IntB.begin())
631224145Sdim      --BI;
632280031Sdim    for (; BI != IntB.end() && ASeg.end >= BI->start; ++BI) {
633224145Sdim      if (BI->valno == BValNo)
634224145Sdim        continue;
635280031Sdim      if (BI->start <= ASeg.start && BI->end > ASeg.start)
636224145Sdim        return true;
637280031Sdim      if (BI->start > ASeg.start && BI->start < ASeg.end)
638224145Sdim        return true;
639224145Sdim    }
640224145Sdim  }
641224145Sdim  return false;
642224145Sdim}
643224145Sdim
644280031Sdim/// Copy segements with value number @p SrcValNo from liverange @p Src to live
645280031Sdim/// range @Dst and use value number @p DstValNo there.
646280031Sdimstatic void addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo,
647327952Sdim                                 const LiveRange &Src, const VNInfo *SrcValNo) {
648280031Sdim  for (const LiveRange::Segment &S : Src.segments) {
649280031Sdim    if (S.valno != SrcValNo)
650280031Sdim      continue;
651280031Sdim    Dst.addSegment(LiveRange::Segment(S.start, S.end, DstValNo));
652280031Sdim  }
653280031Sdim}
654280031Sdim
655239462Sdimbool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
656239462Sdim                                                 MachineInstr *CopyMI) {
657280031Sdim  assert(!CP.isPhys());
658224145Sdim
659224145Sdim  LiveInterval &IntA =
660280031Sdim      LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
661224145Sdim  LiveInterval &IntB =
662280031Sdim      LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
663224145Sdim
664288943Sdim  // We found a non-trivially-coalescable copy with IntA being the source and
665288943Sdim  // IntB being the dest, thus this defines a value number in IntB.  If the
666288943Sdim  // source value number (in IntA) is defined by a commutable instruction and
667288943Sdim  // its other operand is coalesced to the copy dest register, see if we can
668288943Sdim  // transform the copy into a noop by commuting the definition. For example,
669288943Sdim  //
670327952Sdim  //  A3 = op A2 killed B0
671288943Sdim  //    ...
672288943Sdim  //  B1 = A3      <- this copy
673288943Sdim  //    ...
674288943Sdim  //     = op A3   <- more uses
675288943Sdim  //
676288943Sdim  // ==>
677288943Sdim  //
678327952Sdim  //  B2 = op B0 killed A2
679288943Sdim  //    ...
680288943Sdim  //  B1 = B2      <- now an identity copy
681288943Sdim  //    ...
682288943Sdim  //     = op B2   <- more uses
683288943Sdim
684261991Sdim  // BValNo is a value number in B that is defined by a copy from A. 'B1' in
685224145Sdim  // the example above.
686309124Sdim  SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
687224145Sdim  VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx);
688280031Sdim  assert(BValNo != nullptr && BValNo->def == CopyIdx);
689224145Sdim
690224145Sdim  // AValNo is the value number in A that defines the copy, A3 in the example.
691234353Sdim  VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true));
692280031Sdim  assert(AValNo && !AValNo->isUnused() && "COPY source not live");
693280031Sdim  if (AValNo->isPHIDef())
694224145Sdim    return false;
695226633Sdim  MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def);
696224145Sdim  if (!DefMI)
697224145Sdim    return false;
698234353Sdim  if (!DefMI->isCommutable())
699224145Sdim    return false;
700224145Sdim  // If DefMI is a two-address instruction then commuting it will change the
701224145Sdim  // destination register.
702224145Sdim  int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg);
703224145Sdim  assert(DefIdx != -1);
704224145Sdim  unsigned UseOpIdx;
705224145Sdim  if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx))
706224145Sdim    return false;
707296417Sdim
708296417Sdim  // FIXME: The code below tries to commute 'UseOpIdx' operand with some other
709296417Sdim  // commutable operand which is expressed by 'CommuteAnyOperandIndex'value
710296417Sdim  // passed to the method. That _other_ operand is chosen by
711296417Sdim  // the findCommutedOpIndices() method.
712296417Sdim  //
713296417Sdim  // That is obviously an area for improvement in case of instructions having
714296417Sdim  // more than 2 operands. For example, if some instruction has 3 commutable
715296417Sdim  // operands then all possible variants (i.e. op#1<->op#2, op#1<->op#3,
716296417Sdim  // op#2<->op#3) of commute transformation should be considered/tried here.
717296417Sdim  unsigned NewDstIdx = TargetInstrInfo::CommuteAnyOperandIndex;
718309124Sdim  if (!TII->findCommutedOpIndices(*DefMI, UseOpIdx, NewDstIdx))
719224145Sdim    return false;
720224145Sdim
721224145Sdim  MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
722224145Sdim  unsigned NewReg = NewDstMO.getReg();
723261991Sdim  if (NewReg != IntB.reg || !IntB.Query(AValNo->def).isKill())
724224145Sdim    return false;
725224145Sdim
726224145Sdim  // Make sure there are no other definitions of IntB that would reach the
727224145Sdim  // uses which the new definition can reach.
728239462Sdim  if (hasOtherReachingDefs(IntA, IntB, AValNo, BValNo))
729224145Sdim    return false;
730224145Sdim
731224145Sdim  // If some of the uses of IntA.reg is already coalesced away, return false.
732224145Sdim  // It's not possible to determine whether it's safe to perform the coalescing.
733276479Sdim  for (MachineOperand &MO : MRI->use_nodbg_operands(IntA.reg)) {
734276479Sdim    MachineInstr *UseMI = MO.getParent();
735276479Sdim    unsigned OpNo = &MO - &UseMI->getOperand(0);
736309124Sdim    SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI);
737261991Sdim    LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx);
738261991Sdim    if (US == IntA.end() || US->valno != AValNo)
739224145Sdim      continue;
740239462Sdim    // If this use is tied to a def, we can't rewrite the register.
741276479Sdim    if (UseMI->isRegTiedToDefOperand(OpNo))
742224145Sdim      return false;
743224145Sdim  }
744224145Sdim
745239462Sdim  DEBUG(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t'
746224145Sdim               << *DefMI);
747224145Sdim
748224145Sdim  // At this point we have decided that it is legal to do this
749224145Sdim  // transformation.  Start by commuting the instruction.
750224145Sdim  MachineBasicBlock *MBB = DefMI->getParent();
751296417Sdim  MachineInstr *NewMI =
752309124Sdim      TII->commuteInstruction(*DefMI, false, UseOpIdx, NewDstIdx);
753224145Sdim  if (!NewMI)
754224145Sdim    return false;
755224145Sdim  if (TargetRegisterInfo::isVirtualRegister(IntA.reg) &&
756224145Sdim      TargetRegisterInfo::isVirtualRegister(IntB.reg) &&
757226633Sdim      !MRI->constrainRegClass(IntB.reg, MRI->getRegClass(IntA.reg)))
758224145Sdim    return false;
759224145Sdim  if (NewMI != DefMI) {
760309124Sdim    LIS->ReplaceMachineInstrInMaps(*DefMI, *NewMI);
761234353Sdim    MachineBasicBlock::iterator Pos = DefMI;
762234353Sdim    MBB->insert(Pos, NewMI);
763224145Sdim    MBB->erase(DefMI);
764224145Sdim  }
765224145Sdim
766224145Sdim  // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
767224145Sdim  // A = or A, B
768224145Sdim  // ...
769224145Sdim  // B = A
770224145Sdim  // ...
771327952Sdim  // C = killed A
772224145Sdim  // ...
773224145Sdim  //   = B
774224145Sdim
775224145Sdim  // Update uses of IntA of the specific Val# with IntB.
776226633Sdim  for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg),
777280031Sdim                                         UE = MRI->use_end();
778280031Sdim       UI != UE; /* ++UI is below because of possible MI removal */) {
779276479Sdim    MachineOperand &UseMO = *UI;
780280031Sdim    ++UI;
781280031Sdim    if (UseMO.isUndef())
782280031Sdim      continue;
783276479Sdim    MachineInstr *UseMI = UseMO.getParent();
784224145Sdim    if (UseMI->isDebugValue()) {
785224145Sdim      // FIXME These don't have an instruction index.  Not clear we have enough
786224145Sdim      // info to decide whether to do this replacement or not.  For now do it.
787224145Sdim      UseMO.setReg(NewReg);
788224145Sdim      continue;
789224145Sdim    }
790309124Sdim    SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI).getRegSlot(true);
791261991Sdim    LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx);
792280031Sdim    assert(US != IntA.end() && "Use must be live");
793280031Sdim    if (US->valno != AValNo)
794224145Sdim      continue;
795239462Sdim    // Kill flags are no longer accurate. They are recomputed after RA.
796239462Sdim    UseMO.setIsKill(false);
797224145Sdim    if (TargetRegisterInfo::isPhysicalRegister(NewReg))
798226633Sdim      UseMO.substPhysReg(NewReg, *TRI);
799224145Sdim    else
800224145Sdim      UseMO.setReg(NewReg);
801224145Sdim    if (UseMI == CopyMI)
802224145Sdim      continue;
803224145Sdim    if (!UseMI->isCopy())
804224145Sdim      continue;
805224145Sdim    if (UseMI->getOperand(0).getReg() != IntB.reg ||
806224145Sdim        UseMI->getOperand(0).getSubReg())
807224145Sdim      continue;
808224145Sdim
809224145Sdim    // This copy will become a noop. If it's defining a new val#, merge it into
810224145Sdim    // BValNo.
811234353Sdim    SlotIndex DefIdx = UseIdx.getRegSlot();
812224145Sdim    VNInfo *DVNI = IntB.getVNInfoAt(DefIdx);
813224145Sdim    if (!DVNI)
814224145Sdim      continue;
815224145Sdim    DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI);
816224145Sdim    assert(DVNI->def == DefIdx);
817288943Sdim    BValNo = IntB.MergeValueNumberInto(DVNI, BValNo);
818280031Sdim    for (LiveInterval::SubRange &S : IntB.subranges()) {
819280031Sdim      VNInfo *SubDVNI = S.getVNInfoAt(DefIdx);
820280031Sdim      if (!SubDVNI)
821280031Sdim        continue;
822280031Sdim      VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx);
823280031Sdim      assert(SubBValNo->def == CopyIdx);
824288943Sdim      S.MergeValueNumberInto(SubDVNI, SubBValNo);
825280031Sdim    }
826280031Sdim
827327952Sdim    deleteInstr(UseMI);
828224145Sdim  }
829224145Sdim
830261991Sdim  // Extend BValNo by merging in IntA live segments of AValNo. Val# definition
831224145Sdim  // is updated.
832280031Sdim  BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
833280031Sdim  if (IntB.hasSubRanges()) {
834280031Sdim    if (!IntA.hasSubRanges()) {
835296417Sdim      LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntA.reg);
836280031Sdim      IntA.createSubRangeFrom(Allocator, Mask, IntA);
837280031Sdim    }
838280031Sdim    SlotIndex AIdx = CopyIdx.getRegSlot(true);
839280031Sdim    for (LiveInterval::SubRange &SA : IntA.subranges()) {
840280031Sdim      VNInfo *ASubValNo = SA.getVNInfoAt(AIdx);
841280031Sdim      assert(ASubValNo != nullptr);
842280031Sdim
843321369Sdim      IntB.refineSubRanges(Allocator, SA.LaneMask,
844321369Sdim          [&Allocator,&SA,CopyIdx,ASubValNo](LiveInterval::SubRange &SR) {
845321369Sdim        VNInfo *BSubValNo = SR.empty()
846321369Sdim          ? SR.getNextValue(CopyIdx, Allocator)
847321369Sdim          : SR.getVNInfoAt(CopyIdx);
848321369Sdim        assert(BSubValNo != nullptr);
849321369Sdim        addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo);
850321369Sdim      });
851280031Sdim    }
852224145Sdim  }
853280031Sdim
854280031Sdim  BValNo->def = AValNo->def;
855280031Sdim  addSegmentsWithValNo(IntB, BValNo, IntA, AValNo);
856224145Sdim  DEBUG(dbgs() << "\t\textended: " << IntB << '\n');
857224145Sdim
858288943Sdim  LIS->removeVRegDefAt(IntA, AValNo->def);
859288943Sdim
860224145Sdim  DEBUG(dbgs() << "\t\ttrimmed:  " << IntA << '\n');
861224145Sdim  ++numCommutes;
862224145Sdim  return true;
863224145Sdim}
864224145Sdim
865321369Sdim/// For copy B = A in BB2, if A is defined by A = B in BB0 which is a
866321369Sdim/// predecessor of BB2, and if B is not redefined on the way from A = B
867321369Sdim/// in BB2 to B = A in BB2, B = A in BB2 is partially redundant if the
868321369Sdim/// execution goes through the path from BB0 to BB2. We may move B = A
869321369Sdim/// to the predecessor without such reversed copy.
870321369Sdim/// So we will transform the program from:
871321369Sdim///   BB0:
872321369Sdim///      A = B;    BB1:
873321369Sdim///       ...         ...
874321369Sdim///     /     \      /
875321369Sdim///             BB2:
876321369Sdim///               ...
877321369Sdim///               B = A;
878321369Sdim///
879321369Sdim/// to:
880321369Sdim///
881321369Sdim///   BB0:         BB1:
882321369Sdim///      A = B;        ...
883321369Sdim///       ...          B = A;
884321369Sdim///     /     \       /
885321369Sdim///             BB2:
886321369Sdim///               ...
887321369Sdim///
888321369Sdim/// A special case is when BB0 and BB2 are the same BB which is the only
889321369Sdim/// BB in a loop:
890321369Sdim///   BB1:
891321369Sdim///        ...
892321369Sdim///   BB0/BB2:  ----
893321369Sdim///        B = A;   |
894321369Sdim///        ...      |
895321369Sdim///        A = B;   |
896321369Sdim///          |-------
897321369Sdim///          |
898321369Sdim/// We may hoist B = A from BB0/BB2 to BB1.
899321369Sdim///
900321369Sdim/// The major preconditions for correctness to remove such partial
901321369Sdim/// redundancy include:
902321369Sdim/// 1. A in B = A in BB2 is defined by a PHI in BB2, and one operand of
903321369Sdim///    the PHI is defined by the reversed copy A = B in BB0.
904321369Sdim/// 2. No B is referenced from the start of BB2 to B = A.
905321369Sdim/// 3. No B is defined from A = B to the end of BB0.
906321369Sdim/// 4. BB1 has only one successor.
907321369Sdim///
908321369Sdim/// 2 and 4 implicitly ensure B is not live at the end of BB1.
909321369Sdim/// 4 guarantees BB2 is hotter than BB1, so we can only move a copy to a
910321369Sdim/// colder place, which not only prevent endless loop, but also make sure
911321369Sdim/// the movement of copy is beneficial.
912321369Sdimbool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
913321369Sdim                                                MachineInstr &CopyMI) {
914321369Sdim  assert(!CP.isPhys());
915321369Sdim  if (!CopyMI.isFullCopy())
916321369Sdim    return false;
917321369Sdim
918321369Sdim  MachineBasicBlock &MBB = *CopyMI.getParent();
919321369Sdim  if (MBB.isEHPad())
920321369Sdim    return false;
921321369Sdim
922321369Sdim  if (MBB.pred_size() != 2)
923321369Sdim    return false;
924321369Sdim
925321369Sdim  LiveInterval &IntA =
926321369Sdim      LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
927321369Sdim  LiveInterval &IntB =
928321369Sdim      LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
929321369Sdim
930321369Sdim  // A is defined by PHI at the entry of MBB.
931321369Sdim  SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true);
932321369Sdim  VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx);
933321369Sdim  assert(AValNo && !AValNo->isUnused() && "COPY source not live");
934321369Sdim  if (!AValNo->isPHIDef())
935321369Sdim    return false;
936321369Sdim
937321369Sdim  // No B is referenced before CopyMI in MBB.
938321369Sdim  if (IntB.overlaps(LIS->getMBBStartIdx(&MBB), CopyIdx))
939321369Sdim    return false;
940321369Sdim
941321369Sdim  // MBB has two predecessors: one contains A = B so no copy will be inserted
942321369Sdim  // for it. The other one will have a copy moved from MBB.
943321369Sdim  bool FoundReverseCopy = false;
944321369Sdim  MachineBasicBlock *CopyLeftBB = nullptr;
945321369Sdim  for (MachineBasicBlock *Pred : MBB.predecessors()) {
946321369Sdim    VNInfo *PVal = IntA.getVNInfoBefore(LIS->getMBBEndIdx(Pred));
947321369Sdim    MachineInstr *DefMI = LIS->getInstructionFromIndex(PVal->def);
948321369Sdim    if (!DefMI || !DefMI->isFullCopy()) {
949321369Sdim      CopyLeftBB = Pred;
950321369Sdim      continue;
951321369Sdim    }
952321369Sdim    // Check DefMI is a reverse copy and it is in BB Pred.
953321369Sdim    if (DefMI->getOperand(0).getReg() != IntA.reg ||
954321369Sdim        DefMI->getOperand(1).getReg() != IntB.reg ||
955321369Sdim        DefMI->getParent() != Pred) {
956321369Sdim      CopyLeftBB = Pred;
957321369Sdim      continue;
958321369Sdim    }
959321369Sdim    // If there is any other def of B after DefMI and before the end of Pred,
960321369Sdim    // we need to keep the copy of B = A at the end of Pred if we remove
961321369Sdim    // B = A from MBB.
962321369Sdim    bool ValB_Changed = false;
963321369Sdim    for (auto VNI : IntB.valnos) {
964321369Sdim      if (VNI->isUnused())
965321369Sdim        continue;
966321369Sdim      if (PVal->def < VNI->def && VNI->def < LIS->getMBBEndIdx(Pred)) {
967321369Sdim        ValB_Changed = true;
968321369Sdim        break;
969321369Sdim      }
970321369Sdim    }
971321369Sdim    if (ValB_Changed) {
972321369Sdim      CopyLeftBB = Pred;
973321369Sdim      continue;
974321369Sdim    }
975321369Sdim    FoundReverseCopy = true;
976321369Sdim  }
977321369Sdim
978321369Sdim  // If no reverse copy is found in predecessors, nothing to do.
979321369Sdim  if (!FoundReverseCopy)
980321369Sdim    return false;
981321369Sdim
982321369Sdim  // If CopyLeftBB is nullptr, it means every predecessor of MBB contains
983321369Sdim  // reverse copy, CopyMI can be removed trivially if only IntA/IntB is updated.
984321369Sdim  // If CopyLeftBB is not nullptr, move CopyMI from MBB to CopyLeftBB and
985321369Sdim  // update IntA/IntB.
986321369Sdim  //
987321369Sdim  // If CopyLeftBB is not nullptr, ensure CopyLeftBB has a single succ so
988321369Sdim  // MBB is hotter than CopyLeftBB.
989321369Sdim  if (CopyLeftBB && CopyLeftBB->succ_size() > 1)
990321369Sdim    return false;
991321369Sdim
992321369Sdim  // Now ok to move copy.
993321369Sdim  if (CopyLeftBB) {
994327952Sdim    DEBUG(dbgs() << "\tremovePartialRedundancy: Move the copy to "
995327952Sdim                 << printMBBReference(*CopyLeftBB) << '\t' << CopyMI);
996321369Sdim
997321369Sdim    // Insert new copy to CopyLeftBB.
998321369Sdim    auto InsPos = CopyLeftBB->getFirstTerminator();
999321369Sdim    MachineInstr *NewCopyMI = BuildMI(*CopyLeftBB, InsPos, CopyMI.getDebugLoc(),
1000321369Sdim                                      TII->get(TargetOpcode::COPY), IntB.reg)
1001321369Sdim                                  .addReg(IntA.reg);
1002321369Sdim    SlotIndex NewCopyIdx =
1003321369Sdim        LIS->InsertMachineInstrInMaps(*NewCopyMI).getRegSlot();
1004321369Sdim    IntB.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
1005321369Sdim    for (LiveInterval::SubRange &SR : IntB.subranges())
1006321369Sdim      SR.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
1007321369Sdim
1008321369Sdim    // If the newly created Instruction has an address of an instruction that was
1009321369Sdim    // deleted before (object recycled by the allocator) it needs to be removed from
1010321369Sdim    // the deleted list.
1011321369Sdim    ErasedInstrs.erase(NewCopyMI);
1012321369Sdim  } else {
1013327952Sdim    DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from "
1014327952Sdim                 << printMBBReference(MBB) << '\t' << CopyMI);
1015321369Sdim  }
1016321369Sdim
1017321369Sdim  // Remove CopyMI.
1018321369Sdim  // Note: This is fine to remove the copy before updating the live-ranges.
1019321369Sdim  // While updating the live-ranges, we only look at slot indices and
1020321369Sdim  // never go back to the instruction.
1021321369Sdim  // Mark instructions as deleted.
1022327952Sdim  deleteInstr(&CopyMI);
1023321369Sdim
1024321369Sdim  // Update the liveness.
1025321369Sdim  SmallVector<SlotIndex, 8> EndPoints;
1026321369Sdim  VNInfo *BValNo = IntB.Query(CopyIdx).valueOutOrDead();
1027321369Sdim  LIS->pruneValue(*static_cast<LiveRange *>(&IntB), CopyIdx.getRegSlot(),
1028321369Sdim                  &EndPoints);
1029321369Sdim  BValNo->markUnused();
1030321369Sdim  // Extend IntB to the EndPoints of its original live interval.
1031321369Sdim  LIS->extendToIndices(IntB, EndPoints);
1032321369Sdim
1033321369Sdim  // Now, do the same for its subranges.
1034321369Sdim  for (LiveInterval::SubRange &SR : IntB.subranges()) {
1035321369Sdim    EndPoints.clear();
1036321369Sdim    VNInfo *BValNo = SR.Query(CopyIdx).valueOutOrDead();
1037321369Sdim    assert(BValNo && "All sublanes should be live");
1038321369Sdim    LIS->pruneValue(SR, CopyIdx.getRegSlot(), &EndPoints);
1039321369Sdim    BValNo->markUnused();
1040321369Sdim    LIS->extendToIndices(SR, EndPoints);
1041321369Sdim  }
1042321369Sdim
1043321369Sdim  // Finally, update the live-range of IntA.
1044321369Sdim  shrinkToUses(&IntA);
1045321369Sdim  return true;
1046321369Sdim}
1047321369Sdim
1048288943Sdim/// Returns true if @p MI defines the full vreg @p Reg, as opposed to just
1049288943Sdim/// defining a subregister.
1050288943Sdimstatic bool definesFullReg(const MachineInstr &MI, unsigned Reg) {
1051288943Sdim  assert(!TargetRegisterInfo::isPhysicalRegister(Reg) &&
1052288943Sdim         "This code cannot handle physreg aliasing");
1053288943Sdim  for (const MachineOperand &Op : MI.operands()) {
1054288943Sdim    if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg)
1055288943Sdim      continue;
1056288943Sdim    // Return true if we define the full register or don't care about the value
1057288943Sdim    // inside other subregisters.
1058288943Sdim    if (Op.getSubReg() == 0 || Op.isUndef())
1059288943Sdim      return true;
1060288943Sdim  }
1061288943Sdim  return false;
1062288943Sdim}
1063288943Sdim
1064288943Sdimbool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
1065261991Sdim                                                MachineInstr *CopyMI,
1066261991Sdim                                                bool &IsDefCopy) {
1067261991Sdim  IsDefCopy = false;
1068249423Sdim  unsigned SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg();
1069261991Sdim  unsigned SrcIdx = CP.isFlipped() ? CP.getDstIdx() : CP.getSrcIdx();
1070249423Sdim  unsigned DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
1071261991Sdim  unsigned DstIdx = CP.isFlipped() ? CP.getSrcIdx() : CP.getDstIdx();
1072249423Sdim  if (TargetRegisterInfo::isPhysicalRegister(SrcReg))
1073249423Sdim    return false;
1074249423Sdim
1075249423Sdim  LiveInterval &SrcInt = LIS->getInterval(SrcReg);
1076309124Sdim  SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI);
1077261991Sdim  VNInfo *ValNo = SrcInt.Query(CopyIdx).valueIn();
1078261991Sdim  assert(ValNo && "CopyMI input register not live");
1079226633Sdim  if (ValNo->isPHIDef() || ValNo->isUnused())
1080224145Sdim    return false;
1081226633Sdim  MachineInstr *DefMI = LIS->getInstructionFromIndex(ValNo->def);
1082224145Sdim  if (!DefMI)
1083224145Sdim    return false;
1084261991Sdim  if (DefMI->isCopyLike()) {
1085261991Sdim    IsDefCopy = true;
1086261991Sdim    return false;
1087261991Sdim  }
1088309124Sdim  if (!TII->isAsCheapAsAMove(*DefMI))
1089224145Sdim    return false;
1090309124Sdim  if (!TII->isTriviallyReMaterializable(*DefMI, AA))
1091224145Sdim    return false;
1092288943Sdim  if (!definesFullReg(*DefMI, SrcReg))
1093288943Sdim    return false;
1094224145Sdim  bool SawStore = false;
1095288943Sdim  if (!DefMI->isSafeToMove(AA, SawStore))
1096224145Sdim    return false;
1097234353Sdim  const MCInstrDesc &MCID = DefMI->getDesc();
1098224145Sdim  if (MCID.getNumDefs() != 1)
1099224145Sdim    return false;
1100249423Sdim  // Only support subregister destinations when the def is read-undef.
1101249423Sdim  MachineOperand &DstOperand = CopyMI->getOperand(0);
1102261991Sdim  unsigned CopyDstReg = DstOperand.getReg();
1103249423Sdim  if (DstOperand.getSubReg() && !DstOperand.isUndef())
1104249423Sdim    return false;
1105261991Sdim
1106276479Sdim  // If both SrcIdx and DstIdx are set, correct rematerialization would widen
1107276479Sdim  // the register substantially (beyond both source and dest size). This is bad
1108276479Sdim  // for performance since it can cascade through a function, introducing many
1109276479Sdim  // extra spills and fills (e.g. ARM can easily end up copying QQQQPR registers
1110276479Sdim  // around after a few subreg copies).
1111276479Sdim  if (SrcIdx && DstIdx)
1112276479Sdim    return false;
1113276479Sdim
1114261991Sdim  const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF);
1115224145Sdim  if (!DefMI->isImplicitDef()) {
1116261991Sdim    if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
1117261991Sdim      unsigned NewDstReg = DstReg;
1118261991Sdim
1119261991Sdim      unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(),
1120261991Sdim                                              DefMI->getOperand(0).getSubReg());
1121261991Sdim      if (NewDstIdx)
1122261991Sdim        NewDstReg = TRI->getSubReg(DstReg, NewDstIdx);
1123261991Sdim
1124261991Sdim      // Finally, make sure that the physical subregister that will be
1125261991Sdim      // constructed later is permitted for the instruction.
1126261991Sdim      if (!DefRC->contains(NewDstReg))
1127224145Sdim        return false;
1128261991Sdim    } else {
1129261991Sdim      // Theoretically, some stack frame reference could exist. Just make sure
1130261991Sdim      // it hasn't actually happened.
1131261991Sdim      assert(TargetRegisterInfo::isVirtualRegister(DstReg) &&
1132261991Sdim             "Only expect to deal with virtual or physical registers");
1133261991Sdim    }
1134224145Sdim  }
1135224145Sdim
1136309124Sdim  DebugLoc DL = CopyMI->getDebugLoc();
1137224145Sdim  MachineBasicBlock *MBB = CopyMI->getParent();
1138224145Sdim  MachineBasicBlock::iterator MII =
1139276479Sdim    std::next(MachineBasicBlock::iterator(CopyMI));
1140309124Sdim  TII->reMaterialize(*MBB, MII, DstReg, SrcIdx, *DefMI, *TRI);
1141309124Sdim  MachineInstr &NewMI = *std::prev(MII);
1142309124Sdim  NewMI.setDebugLoc(DL);
1143224145Sdim
1144288943Sdim  // In a situation like the following:
1145327952Sdim  //     %0:subreg = instr              ; DefMI, subreg = DstIdx
1146327952Sdim  //     %1        = copy %0:subreg ; CopyMI, SrcIdx = 0
1147327952Sdim  // instead of widening %1 to the register class of %0 simply do:
1148327952Sdim  //     %1 = instr
1149288943Sdim  const TargetRegisterClass *NewRC = CP.getNewRC();
1150288943Sdim  if (DstIdx != 0) {
1151309124Sdim    MachineOperand &DefMO = NewMI.getOperand(0);
1152288943Sdim    if (DefMO.getSubReg() == DstIdx) {
1153288943Sdim      assert(SrcIdx == 0 && CP.isFlipped()
1154288943Sdim             && "Shouldn't have SrcIdx+DstIdx at this point");
1155288943Sdim      const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
1156288943Sdim      const TargetRegisterClass *CommonRC =
1157288943Sdim        TRI->getCommonSubClass(DefRC, DstRC);
1158288943Sdim      if (CommonRC != nullptr) {
1159288943Sdim        NewRC = CommonRC;
1160288943Sdim        DstIdx = 0;
1161288943Sdim        DefMO.setSubReg(0);
1162314564Sdim        DefMO.setIsUndef(false); // Only subregs can have def+undef.
1163288943Sdim      }
1164288943Sdim    }
1165288943Sdim  }
1166288943Sdim
1167309124Sdim  // CopyMI may have implicit operands, save them so that we can transfer them
1168309124Sdim  // over to the newly materialized instruction after CopyMI is removed.
1169309124Sdim  SmallVector<MachineOperand, 4> ImplicitOps;
1170309124Sdim  ImplicitOps.reserve(CopyMI->getNumOperands() -
1171309124Sdim                      CopyMI->getDesc().getNumOperands());
1172309124Sdim  for (unsigned I = CopyMI->getDesc().getNumOperands(),
1173309124Sdim                E = CopyMI->getNumOperands();
1174309124Sdim       I != E; ++I) {
1175309124Sdim    MachineOperand &MO = CopyMI->getOperand(I);
1176309124Sdim    if (MO.isReg()) {
1177309124Sdim      assert(MO.isImplicit() && "No explicit operands after implict operands.");
1178309124Sdim      // Discard VReg implicit defs.
1179309124Sdim      if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
1180309124Sdim        ImplicitOps.push_back(MO);
1181309124Sdim    }
1182309124Sdim  }
1183309124Sdim
1184309124Sdim  LIS->ReplaceMachineInstrInMaps(*CopyMI, NewMI);
1185261991Sdim  CopyMI->eraseFromParent();
1186261991Sdim  ErasedInstrs.insert(CopyMI);
1187249423Sdim
1188234353Sdim  // NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86).
1189234353Sdim  // We need to remember these so we can add intervals once we insert
1190234353Sdim  // NewMI into SlotIndexes.
1191234353Sdim  SmallVector<unsigned, 4> NewMIImplDefs;
1192309124Sdim  for (unsigned i = NewMI.getDesc().getNumOperands(),
1193309124Sdim                e = NewMI.getNumOperands();
1194309124Sdim       i != e; ++i) {
1195309124Sdim    MachineOperand &MO = NewMI.getOperand(i);
1196288943Sdim    if (MO.isReg() && MO.isDef()) {
1197288943Sdim      assert(MO.isImplicit() && MO.isDead() &&
1198234353Sdim             TargetRegisterInfo::isPhysicalRegister(MO.getReg()));
1199234353Sdim      NewMIImplDefs.push_back(MO.getReg());
1200234353Sdim    }
1201234353Sdim  }
1202234353Sdim
1203261991Sdim  if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
1204309124Sdim    unsigned NewIdx = NewMI.getOperand(0).getSubReg();
1205276479Sdim
1206288943Sdim    if (DefRC != nullptr) {
1207288943Sdim      if (NewIdx)
1208288943Sdim        NewRC = TRI->getMatchingSuperRegClass(NewRC, DefRC, NewIdx);
1209288943Sdim      else
1210288943Sdim        NewRC = TRI->getCommonSubClass(NewRC, DefRC);
1211288943Sdim      assert(NewRC && "subreg chosen for remat incompatible with instruction");
1212288943Sdim    }
1213309124Sdim    // Remap subranges to new lanemask and change register class.
1214309124Sdim    LiveInterval &DstInt = LIS->getInterval(DstReg);
1215309124Sdim    for (LiveInterval::SubRange &SR : DstInt.subranges()) {
1216309124Sdim      SR.LaneMask = TRI->composeSubRegIndexLaneMask(DstIdx, SR.LaneMask);
1217309124Sdim    }
1218276479Sdim    MRI->setRegClass(DstReg, NewRC);
1219276479Sdim
1220309124Sdim    // Update machine operands and add flags.
1221276479Sdim    updateRegDefsUses(DstReg, DstReg, DstIdx);
1222309124Sdim    NewMI.getOperand(0).setSubReg(NewIdx);
1223309124Sdim    // Add dead subregister definitions if we are defining the whole register
1224309124Sdim    // but only part of it is live.
1225309124Sdim    // This could happen if the rematerialization instruction is rematerializing
1226309124Sdim    // more than actually is used in the register.
1227309124Sdim    // An example would be:
1228327952Sdim    // %1 = LOAD CONSTANTS 5, 8 ; Loading both 5 and 8 in different subregs
1229309124Sdim    // ; Copying only part of the register here, but the rest is undef.
1230327952Sdim    // %2:sub_16bit<def, read-undef> = COPY %1:sub_16bit
1231309124Sdim    // ==>
1232309124Sdim    // ; Materialize all the constants but only using one
1233327952Sdim    // %2 = LOAD_CONSTANTS 5, 8
1234309124Sdim    //
1235309124Sdim    // at this point for the part that wasn't defined before we could have
1236309124Sdim    // subranges missing the definition.
1237309124Sdim    if (NewIdx == 0 && DstInt.hasSubRanges()) {
1238309124Sdim      SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI);
1239309124Sdim      SlotIndex DefIndex =
1240309124Sdim          CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber());
1241309124Sdim      LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(DstReg);
1242309124Sdim      VNInfo::Allocator& Alloc = LIS->getVNInfoAllocator();
1243309124Sdim      for (LiveInterval::SubRange &SR : DstInt.subranges()) {
1244309124Sdim        if (!SR.liveAt(DefIndex))
1245309124Sdim          SR.createDeadDef(DefIndex, Alloc);
1246309124Sdim        MaxMask &= ~SR.LaneMask;
1247309124Sdim      }
1248314564Sdim      if (MaxMask.any()) {
1249309124Sdim        LiveInterval::SubRange *SR = DstInt.createSubRange(Alloc, MaxMask);
1250309124Sdim        SR->createDeadDef(DefIndex, Alloc);
1251309124Sdim      }
1252309124Sdim    }
1253321369Sdim
1254321369Sdim    // Make sure that the subrange for resultant undef is removed
1255321369Sdim    // For example:
1256327952Sdim    //   %1:sub1<def,read-undef> = LOAD CONSTANT 1
1257327952Sdim    //   %2 = COPY %1
1258321369Sdim    // ==>
1259327952Sdim    //   %2:sub1<def, read-undef> = LOAD CONSTANT 1
1260327952Sdim    //     ; Correct but need to remove the subrange for %2:sub0
1261321369Sdim    //     ; as it is now undef
1262321369Sdim    if (NewIdx != 0 && DstInt.hasSubRanges()) {
1263321369Sdim      // The affected subregister segments can be removed.
1264321369Sdim      SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI);
1265321369Sdim      LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(NewIdx);
1266321369Sdim      bool UpdatedSubRanges = false;
1267321369Sdim      for (LiveInterval::SubRange &SR : DstInt.subranges()) {
1268321369Sdim        if ((SR.LaneMask & DstMask).none()) {
1269321369Sdim          DEBUG(dbgs() << "Removing undefined SubRange "
1270321369Sdim                << PrintLaneMask(SR.LaneMask) << " : " << SR << "\n");
1271321369Sdim          // VNI is in ValNo - remove any segments in this SubRange that have this ValNo
1272321369Sdim          if (VNInfo *RmValNo = SR.getVNInfoAt(CurrIdx.getRegSlot())) {
1273321369Sdim            SR.removeValNo(RmValNo);
1274321369Sdim            UpdatedSubRanges = true;
1275321369Sdim          }
1276321369Sdim        }
1277321369Sdim      }
1278321369Sdim      if (UpdatedSubRanges)
1279321369Sdim        DstInt.removeEmptySubRanges();
1280321369Sdim    }
1281309124Sdim  } else if (NewMI.getOperand(0).getReg() != CopyDstReg) {
1282261991Sdim    // The New instruction may be defining a sub-register of what's actually
1283261991Sdim    // been asked for. If so it must implicitly define the whole thing.
1284261991Sdim    assert(TargetRegisterInfo::isPhysicalRegister(DstReg) &&
1285261991Sdim           "Only expect virtual or physical registers in remat");
1286309124Sdim    NewMI.getOperand(0).setIsDead(true);
1287309124Sdim    NewMI.addOperand(MachineOperand::CreateReg(
1288309124Sdim        CopyDstReg, true /*IsDef*/, true /*IsImp*/, false /*IsKill*/));
1289276479Sdim    // Record small dead def live-ranges for all the subregisters
1290276479Sdim    // of the destination register.
1291276479Sdim    // Otherwise, variables that live through may miss some
1292276479Sdim    // interferences, thus creating invalid allocation.
1293276479Sdim    // E.g., i386 code:
1294327952Sdim    // %1 = somedef ; %1 GR8
1295327952Sdim    // %2 = remat ; %2 GR32
1296327952Sdim    // CL = COPY %2.sub_8bit
1297327952Sdim    // = somedef %1 ; %1 GR8
1298276479Sdim    // =>
1299327952Sdim    // %1 = somedef ; %1 GR8
1300327952Sdim    // dead ECX = remat ; implicit-def CL
1301327952Sdim    // = somedef %1 ; %1 GR8
1302327952Sdim    // %1 will see the inteferences with CL but not with CH since
1303276479Sdim    // no live-ranges would have been created for ECX.
1304276479Sdim    // Fix that!
1305276479Sdim    SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
1306309124Sdim    for (MCRegUnitIterator Units(NewMI.getOperand(0).getReg(), TRI);
1307276479Sdim         Units.isValid(); ++Units)
1308276479Sdim      if (LiveRange *LR = LIS->getCachedRegUnit(*Units))
1309276479Sdim        LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
1310261991Sdim  }
1311261991Sdim
1312309124Sdim  if (NewMI.getOperand(0).getSubReg())
1313309124Sdim    NewMI.getOperand(0).setIsUndef();
1314261991Sdim
1315309124Sdim  // Transfer over implicit operands to the rematerialized instruction.
1316309124Sdim  for (MachineOperand &MO : ImplicitOps)
1317309124Sdim    NewMI.addOperand(MO);
1318224145Sdim
1319234353Sdim  SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
1320234353Sdim  for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) {
1321239462Sdim    unsigned Reg = NewMIImplDefs[i];
1322239462Sdim    for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
1323261991Sdim      if (LiveRange *LR = LIS->getCachedRegUnit(*Units))
1324261991Sdim        LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
1325234353Sdim  }
1326234353Sdim
1327309124Sdim  DEBUG(dbgs() << "Remat: " << NewMI);
1328224145Sdim  ++NumReMats;
1329224145Sdim
1330224145Sdim  // The source interval can become smaller because we removed a use.
1331288943Sdim  shrinkToUses(&SrcInt, &DeadDefs);
1332280031Sdim  if (!DeadDefs.empty()) {
1333280031Sdim    // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs
1334280031Sdim    // to describe DstReg instead.
1335280031Sdim    for (MachineOperand &UseMO : MRI->use_operands(SrcReg)) {
1336280031Sdim      MachineInstr *UseMI = UseMO.getParent();
1337280031Sdim      if (UseMI->isDebugValue()) {
1338280031Sdim        UseMO.setReg(DstReg);
1339327952Sdim        // Move the debug value directly after the def of the rematerialized
1340327952Sdim        // value in DstReg.
1341327952Sdim        MBB->splice(std::next(NewMI.getIterator()), UseMI->getParent(), UseMI);
1342280031Sdim        DEBUG(dbgs() << "\t\tupdated: " << *UseMI);
1343280031Sdim      }
1344280031Sdim    }
1345239462Sdim    eliminateDeadDefs();
1346280031Sdim  }
1347224145Sdim
1348224145Sdim  return true;
1349224145Sdim}
1350224145Sdim
1351288943Sdimbool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
1352288943Sdim  // ProcessImpicitDefs may leave some copies of <undef> values, it only removes
1353288943Sdim  // local variables. When we have a copy like:
1354288943Sdim  //
1355327952Sdim  //   %1 = COPY undef %2
1356288943Sdim  //
1357327952Sdim  // We delete the copy and remove the corresponding value number from %1.
1358288943Sdim  // Any uses of that value number are marked as <undef>.
1359280031Sdim
1360280031Sdim  // Note that we do not query CoalescerPair here but redo isMoveInstr as the
1361280031Sdim  // CoalescerPair may have a new register class with adjusted subreg indices
1362280031Sdim  // at this point.
1363280031Sdim  unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
1364280031Sdim  isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx);
1365280031Sdim
1366309124Sdim  SlotIndex Idx = LIS->getInstructionIndex(*CopyMI);
1367280031Sdim  const LiveInterval &SrcLI = LIS->getInterval(SrcReg);
1368280031Sdim  // CopyMI is undef iff SrcReg is not live before the instruction.
1369280031Sdim  if (SrcSubIdx != 0 && SrcLI.hasSubRanges()) {
1370296417Sdim    LaneBitmask SrcMask = TRI->getSubRegIndexLaneMask(SrcSubIdx);
1371280031Sdim    for (const LiveInterval::SubRange &SR : SrcLI.subranges()) {
1372314564Sdim      if ((SR.LaneMask & SrcMask).none())
1373280031Sdim        continue;
1374280031Sdim      if (SR.liveAt(Idx))
1375280031Sdim        return false;
1376280031Sdim    }
1377280031Sdim  } else if (SrcLI.liveAt(Idx))
1378226633Sdim    return false;
1379226633Sdim
1380280031Sdim  DEBUG(dbgs() << "\tEliminating copy of <undef> value\n");
1381226633Sdim
1382280031Sdim  // Remove any DstReg segments starting at the instruction.
1383280031Sdim  LiveInterval &DstLI = LIS->getInterval(DstReg);
1384280031Sdim  SlotIndex RegIndex = Idx.getRegSlot();
1385280031Sdim  // Remove value or merge with previous one in case of a subregister def.
1386280031Sdim  if (VNInfo *PrevVNI = DstLI.getVNInfoAt(Idx)) {
1387288943Sdim    VNInfo *VNI = DstLI.getVNInfoAt(RegIndex);
1388288943Sdim    DstLI.MergeValueNumberInto(VNI, PrevVNI);
1389280031Sdim
1390288943Sdim    // The affected subregister segments can be removed.
1391296417Sdim    LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx);
1392288943Sdim    for (LiveInterval::SubRange &SR : DstLI.subranges()) {
1393314564Sdim      if ((SR.LaneMask & DstMask).none())
1394288943Sdim        continue;
1395288943Sdim
1396288943Sdim      VNInfo *SVNI = SR.getVNInfoAt(RegIndex);
1397288943Sdim      assert(SVNI != nullptr && SlotIndex::isSameInstr(SVNI->def, RegIndex));
1398288943Sdim      SR.removeValNo(SVNI);
1399288943Sdim    }
1400288943Sdim    DstLI.removeEmptySubRanges();
1401288943Sdim  } else
1402288943Sdim    LIS->removeVRegDefAt(DstLI, RegIndex);
1403288943Sdim
1404280031Sdim  // Mark uses as undef.
1405280031Sdim  for (MachineOperand &MO : MRI->reg_nodbg_operands(DstReg)) {
1406280031Sdim    if (MO.isDef() /*|| MO.isUndef()*/)
1407226633Sdim      continue;
1408280031Sdim    const MachineInstr &MI = *MO.getParent();
1409309124Sdim    SlotIndex UseIdx = LIS->getInstructionIndex(MI);
1410296417Sdim    LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
1411280031Sdim    bool isLive;
1412314564Sdim    if (!UseMask.all() && DstLI.hasSubRanges()) {
1413280031Sdim      isLive = false;
1414280031Sdim      for (const LiveInterval::SubRange &SR : DstLI.subranges()) {
1415314564Sdim        if ((SR.LaneMask & UseMask).none())
1416280031Sdim          continue;
1417280031Sdim        if (SR.liveAt(UseIdx)) {
1418280031Sdim          isLive = true;
1419280031Sdim          break;
1420280031Sdim        }
1421280031Sdim      }
1422280031Sdim    } else
1423280031Sdim      isLive = DstLI.liveAt(UseIdx);
1424280031Sdim    if (isLive)
1425226633Sdim      continue;
1426226633Sdim    MO.setIsUndef(true);
1427280031Sdim    DEBUG(dbgs() << "\tnew undef: " << UseIdx << '\t' << MI);
1428226633Sdim  }
1429314564Sdim
1430314564Sdim  // A def of a subregister may be a use of the other subregisters, so
1431314564Sdim  // deleting a def of a subregister may also remove uses. Since CopyMI
1432314564Sdim  // is still part of the function (but about to be erased), mark all
1433314564Sdim  // defs of DstReg in it as <undef>, so that shrinkToUses would
1434314564Sdim  // ignore them.
1435314564Sdim  for (MachineOperand &MO : CopyMI->operands())
1436314564Sdim    if (MO.isReg() && MO.isDef() && MO.getReg() == DstReg)
1437314564Sdim      MO.setIsUndef(true);
1438314564Sdim  LIS->shrinkToUses(&DstLI);
1439314564Sdim
1440226633Sdim  return true;
1441226633Sdim}
1442226633Sdim
1443309124Sdimvoid RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
1444309124Sdim                                     MachineOperand &MO, unsigned SubRegIdx) {
1445309124Sdim  LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubRegIdx);
1446309124Sdim  if (MO.isDef())
1447309124Sdim    Mask = ~Mask;
1448309124Sdim  bool IsUndef = true;
1449309124Sdim  for (const LiveInterval::SubRange &S : Int.subranges()) {
1450314564Sdim    if ((S.LaneMask & Mask).none())
1451309124Sdim      continue;
1452309124Sdim    if (S.liveAt(UseIdx)) {
1453309124Sdim      IsUndef = false;
1454309124Sdim      break;
1455309124Sdim    }
1456309124Sdim  }
1457309124Sdim  if (IsUndef) {
1458309124Sdim    MO.setIsUndef(true);
1459309124Sdim    // We found out some subregister use is actually reading an undefined
1460309124Sdim    // value. In some cases the whole vreg has become undefined at this
1461309124Sdim    // point so we have to potentially shrink the main range if the
1462309124Sdim    // use was ending a live segment there.
1463309124Sdim    LiveQueryResult Q = Int.Query(UseIdx);
1464309124Sdim    if (Q.valueOut() == nullptr)
1465309124Sdim      ShrinkMainRange = true;
1466309124Sdim  }
1467309124Sdim}
1468309124Sdim
1469239462Sdimvoid RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
1470239462Sdim                                          unsigned DstReg,
1471239462Sdim                                          unsigned SubIdx) {
1472239462Sdim  bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
1473276479Sdim  LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);
1474224145Sdim
1475309124Sdim  if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) {
1476309124Sdim    for (MachineOperand &MO : MRI->reg_operands(DstReg)) {
1477309124Sdim      unsigned SubReg = MO.getSubReg();
1478309124Sdim      if (SubReg == 0 || MO.isUndef())
1479309124Sdim        continue;
1480309124Sdim      MachineInstr &MI = *MO.getParent();
1481309124Sdim      if (MI.isDebugValue())
1482309124Sdim        continue;
1483309124Sdim      SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot(true);
1484309124Sdim      addUndefFlag(*DstInt, UseIdx, MO, SubReg);
1485309124Sdim    }
1486309124Sdim  }
1487309124Sdim
1488243830Sdim  SmallPtrSet<MachineInstr*, 8> Visited;
1489276479Sdim  for (MachineRegisterInfo::reg_instr_iterator
1490276479Sdim       I = MRI->reg_instr_begin(SrcReg), E = MRI->reg_instr_end();
1491276479Sdim       I != E; ) {
1492276479Sdim    MachineInstr *UseMI = &*(I++);
1493276479Sdim
1494243830Sdim    // Each instruction can only be rewritten once because sub-register
1495243830Sdim    // composition is not always idempotent. When SrcReg != DstReg, rewriting
1496243830Sdim    // the UseMI operands removes them from the SrcReg use-def chain, but when
1497243830Sdim    // SrcReg is DstReg we could encounter UseMI twice if it has multiple
1498243830Sdim    // operands mentioning the virtual register.
1499280031Sdim    if (SrcReg == DstReg && !Visited.insert(UseMI).second)
1500243830Sdim      continue;
1501243830Sdim
1502224145Sdim    SmallVector<unsigned,8> Ops;
1503224145Sdim    bool Reads, Writes;
1504276479Sdim    std::tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
1505224145Sdim
1506239462Sdim    // If SrcReg wasn't read, it may still be the case that DstReg is live-in
1507239462Sdim    // because SrcReg is a sub-register.
1508321369Sdim    if (DstInt && !Reads && SubIdx && !UseMI->isDebugValue())
1509309124Sdim      Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI));
1510239462Sdim
1511224145Sdim    // Replace SrcReg with DstReg in all UseMI operands.
1512224145Sdim    for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
1513224145Sdim      MachineOperand &MO = UseMI->getOperand(Ops[i]);
1514224145Sdim
1515239462Sdim      // Adjust <undef> flags in case of sub-register joins. We don't want to
1516239462Sdim      // turn a full def into a read-modify-write sub-register def and vice
1517239462Sdim      // versa.
1518239462Sdim      if (SubIdx && MO.isDef())
1519239462Sdim        MO.setIsUndef(!Reads);
1520226633Sdim
1521280031Sdim      // A subreg use of a partially undef (super) register may be a complete
1522280031Sdim      // undef use now and then has to be marked that way.
1523288943Sdim      if (SubIdx != 0 && MO.isUse() && MRI->shouldTrackSubRegLiveness(DstReg)) {
1524280031Sdim        if (!DstInt->hasSubRanges()) {
1525280031Sdim          BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
1526296417Sdim          LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(DstInt->reg);
1527280031Sdim          DstInt->createSubRangeFrom(Allocator, Mask, *DstInt);
1528280031Sdim        }
1529280031Sdim        SlotIndex MIIdx = UseMI->isDebugValue()
1530309124Sdim                              ? LIS->getSlotIndexes()->getIndexBefore(*UseMI)
1531309124Sdim                              : LIS->getInstructionIndex(*UseMI);
1532280031Sdim        SlotIndex UseIdx = MIIdx.getRegSlot(true);
1533309124Sdim        addUndefFlag(*DstInt, UseIdx, MO, SubIdx);
1534280031Sdim      }
1535280031Sdim
1536224145Sdim      if (DstIsPhys)
1537226633Sdim        MO.substPhysReg(DstReg, *TRI);
1538224145Sdim      else
1539226633Sdim        MO.substVirtReg(DstReg, SubIdx, *TRI);
1540224145Sdim    }
1541224145Sdim
1542224145Sdim    DEBUG({
1543224145Sdim        dbgs() << "\t\tupdated: ";
1544224145Sdim        if (!UseMI->isDebugValue())
1545309124Sdim          dbgs() << LIS->getInstructionIndex(*UseMI) << "\t";
1546224145Sdim        dbgs() << *UseMI;
1547224145Sdim      });
1548224145Sdim  }
1549224145Sdim}
1550224145Sdim
1551249423Sdimbool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) {
1552288943Sdim  // Always join simple intervals that are defined by a single copy from a
1553288943Sdim  // reserved register. This doesn't increase register pressure, so it is
1554288943Sdim  // always beneficial.
1555243830Sdim  if (!MRI->isReserved(CP.getDstReg())) {
1556239462Sdim    DEBUG(dbgs() << "\tCan only merge into reserved registers.\n");
1557224145Sdim    return false;
1558224145Sdim  }
1559224145Sdim
1560239462Sdim  LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg());
1561288943Sdim  if (JoinVInt.containsOneValue())
1562224145Sdim    return true;
1563224145Sdim
1564288943Sdim  DEBUG(dbgs() << "\tCannot join complex intervals into reserved register.\n");
1565239462Sdim  return false;
1566224145Sdim}
1567224145Sdim
1568239462Sdimbool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
1569224145Sdim  Again = false;
1570309124Sdim  DEBUG(dbgs() << LIS->getInstructionIndex(*CopyMI) << '\t' << *CopyMI);
1571224145Sdim
1572239462Sdim  CoalescerPair CP(*TRI);
1573224145Sdim  if (!CP.setRegisters(CopyMI)) {
1574224145Sdim    DEBUG(dbgs() << "\tNot coalescable.\n");
1575224145Sdim    return false;
1576224145Sdim  }
1577224145Sdim
1578276479Sdim  if (CP.getNewRC()) {
1579276479Sdim    auto SrcRC = MRI->getRegClass(CP.getSrcReg());
1580276479Sdim    auto DstRC = MRI->getRegClass(CP.getDstReg());
1581276479Sdim    unsigned SrcIdx = CP.getSrcIdx();
1582276479Sdim    unsigned DstIdx = CP.getDstIdx();
1583276479Sdim    if (CP.isFlipped()) {
1584276479Sdim      std::swap(SrcIdx, DstIdx);
1585276479Sdim      std::swap(SrcRC, DstRC);
1586276479Sdim    }
1587276479Sdim    if (!TRI->shouldCoalesce(CopyMI, SrcRC, SrcIdx, DstRC, DstIdx,
1588327952Sdim                             CP.getNewRC(), *LIS)) {
1589276479Sdim      DEBUG(dbgs() << "\tSubtarget bailed on coalescing.\n");
1590276479Sdim      return false;
1591276479Sdim    }
1592276479Sdim  }
1593276479Sdim
1594239462Sdim  // Dead code elimination. This really should be handled by MachineDCE, but
1595239462Sdim  // sometimes dead copies slip through, and we can't generate invalid live
1596239462Sdim  // ranges.
1597239462Sdim  if (!CP.isPhys() && CopyMI->allDefsAreDead()) {
1598239462Sdim    DEBUG(dbgs() << "\tCopy is dead.\n");
1599239462Sdim    DeadDefs.push_back(CopyMI);
1600239462Sdim    eliminateDeadDefs();
1601239462Sdim    return true;
1602224145Sdim  }
1603224145Sdim
1604226633Sdim  // Eliminate undefs.
1605280031Sdim  if (!CP.isPhys() && eliminateUndefCopy(CopyMI)) {
1606327952Sdim    deleteInstr(CopyMI);
1607226633Sdim    return false;  // Not coalescable.
1608226633Sdim  }
1609226633Sdim
1610239462Sdim  // Coalesced copies are normally removed immediately, but transformations
1611239462Sdim  // like removeCopyByCommutingDef() can inadvertently create identity copies.
1612239462Sdim  // When that happens, just join the values and remove the copy.
1613239462Sdim  if (CP.getSrcReg() == CP.getDstReg()) {
1614239462Sdim    LiveInterval &LI = LIS->getInterval(CP.getSrcReg());
1615239462Sdim    DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n');
1616309124Sdim    const SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI);
1617280031Sdim    LiveQueryResult LRQ = LI.Query(CopyIdx);
1618239462Sdim    if (VNInfo *DefVNI = LRQ.valueDefined()) {
1619239462Sdim      VNInfo *ReadVNI = LRQ.valueIn();
1620239462Sdim      assert(ReadVNI && "No value before copy and no <undef> flag.");
1621239462Sdim      assert(ReadVNI != DefVNI && "Cannot read and define the same value.");
1622239462Sdim      LI.MergeValueNumberInto(DefVNI, ReadVNI);
1623280031Sdim
1624280031Sdim      // Process subregister liveranges.
1625280031Sdim      for (LiveInterval::SubRange &S : LI.subranges()) {
1626280031Sdim        LiveQueryResult SLRQ = S.Query(CopyIdx);
1627280031Sdim        if (VNInfo *SDefVNI = SLRQ.valueDefined()) {
1628280031Sdim          VNInfo *SReadVNI = SLRQ.valueIn();
1629280031Sdim          S.MergeValueNumberInto(SDefVNI, SReadVNI);
1630280031Sdim        }
1631280031Sdim      }
1632239462Sdim      DEBUG(dbgs() << "\tMerged values:          " << LI << '\n');
1633239462Sdim    }
1634327952Sdim    deleteInstr(CopyMI);
1635239462Sdim    return true;
1636239462Sdim  }
1637224145Sdim
1638224145Sdim  // Enforce policies.
1639224145Sdim  if (CP.isPhys()) {
1640327952Sdim    DEBUG(dbgs() << "\tConsidering merging " << printReg(CP.getSrcReg(), TRI)
1641327952Sdim                 << " with " << printReg(CP.getDstReg(), TRI, CP.getSrcIdx())
1642239462Sdim                 << '\n');
1643239462Sdim    if (!canJoinPhys(CP)) {
1644224145Sdim      // Before giving up coalescing, if definition of source is defined by
1645224145Sdim      // trivial computation, try rematerializing it.
1646261991Sdim      bool IsDefCopy;
1647261991Sdim      if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
1648224145Sdim        return true;
1649261991Sdim      if (IsDefCopy)
1650261991Sdim        Again = true;  // May be possible to coalesce later.
1651224145Sdim      return false;
1652224145Sdim    }
1653224145Sdim  } else {
1654280031Sdim    // When possible, let DstReg be the larger interval.
1655280031Sdim    if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).size() >
1656280031Sdim                           LIS->getInterval(CP.getDstReg()).size())
1657280031Sdim      CP.flip();
1658280031Sdim
1659239462Sdim    DEBUG({
1660280031Sdim      dbgs() << "\tConsidering merging to "
1661280031Sdim             << TRI->getRegClassName(CP.getNewRC()) << " with ";
1662239462Sdim      if (CP.getDstIdx() && CP.getSrcIdx())
1663327952Sdim        dbgs() << printReg(CP.getDstReg()) << " in "
1664239462Sdim               << TRI->getSubRegIndexName(CP.getDstIdx()) << " and "
1665327952Sdim               << printReg(CP.getSrcReg()) << " in "
1666239462Sdim               << TRI->getSubRegIndexName(CP.getSrcIdx()) << '\n';
1667239462Sdim      else
1668327952Sdim        dbgs() << printReg(CP.getSrcReg(), TRI) << " in "
1669327952Sdim               << printReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n';
1670239462Sdim    });
1671224145Sdim  }
1672224145Sdim
1673314564Sdim  ShrinkMask = LaneBitmask::getNone();
1674280031Sdim  ShrinkMainRange = false;
1675280031Sdim
1676224145Sdim  // Okay, attempt to join these two intervals.  On failure, this returns false.
1677224145Sdim  // Otherwise, if one of the intervals being joined is a physreg, this method
1678224145Sdim  // always canonicalizes DstInt to be it.  The output "SrcInt" will not have
1679224145Sdim  // been modified, so we can use this information below to update aliases.
1680239462Sdim  if (!joinIntervals(CP)) {
1681224145Sdim    // Coalescing failed.
1682224145Sdim
1683224145Sdim    // If definition of source is defined by trivial computation, try
1684224145Sdim    // rematerializing it.
1685261991Sdim    bool IsDefCopy;
1686261991Sdim    if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
1687224145Sdim      return true;
1688224145Sdim
1689261991Sdim    // If we can eliminate the copy without merging the live segments, do so
1690261991Sdim    // now.
1691239462Sdim    if (!CP.isPartial() && !CP.isPhys()) {
1692239462Sdim      if (adjustCopiesBackFrom(CP, CopyMI) ||
1693239462Sdim          removeCopyByCommutingDef(CP, CopyMI)) {
1694327952Sdim        deleteInstr(CopyMI);
1695224145Sdim        DEBUG(dbgs() << "\tTrivial!\n");
1696224145Sdim        return true;
1697224145Sdim      }
1698224145Sdim    }
1699224145Sdim
1700321369Sdim    // Try and see if we can partially eliminate the copy by moving the copy to
1701321369Sdim    // its predecessor.
1702321369Sdim    if (!CP.isPartial() && !CP.isPhys())
1703321369Sdim      if (removePartialRedundancy(CP, *CopyMI))
1704321369Sdim        return true;
1705321369Sdim
1706224145Sdim    // Otherwise, we are unable to join the intervals.
1707224145Sdim    DEBUG(dbgs() << "\tInterference!\n");
1708224145Sdim    Again = true;  // May be possible to coalesce later.
1709224145Sdim    return false;
1710224145Sdim  }
1711224145Sdim
1712224145Sdim  // Coalescing to a virtual register that is of a sub-register class of the
1713224145Sdim  // other. Make sure the resulting register is set to the right register class.
1714224145Sdim  if (CP.isCrossClass()) {
1715224145Sdim    ++numCrossRCs;
1716226633Sdim    MRI->setRegClass(CP.getDstReg(), CP.getNewRC());
1717224145Sdim  }
1718224145Sdim
1719239462Sdim  // Removing sub-register copies can ease the register class constraints.
1720239462Sdim  // Make sure we attempt to inflate the register class of DstReg.
1721239462Sdim  if (!CP.isPhys() && RegClassInfo.isProperSubClass(CP.getNewRC()))
1722239462Sdim    InflateRegs.push_back(CP.getDstReg());
1723224145Sdim
1724239462Sdim  // CopyMI has been erased by joinIntervals at this point. Remove it from
1725239462Sdim  // ErasedInstrs since copyCoalesceWorkList() won't add a successful join back
1726239462Sdim  // to the work list. This keeps ErasedInstrs from growing needlessly.
1727239462Sdim  ErasedInstrs.erase(CopyMI);
1728224145Sdim
1729239462Sdim  // Rewrite all SrcReg operands to DstReg.
1730239462Sdim  // Also update DstReg operands to include DstIdx if it is set.
1731239462Sdim  if (CP.getDstIdx())
1732239462Sdim    updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
1733239462Sdim  updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
1734224145Sdim
1735280031Sdim  // Shrink subregister ranges if necessary.
1736314564Sdim  if (ShrinkMask.any()) {
1737280031Sdim    LiveInterval &LI = LIS->getInterval(CP.getDstReg());
1738280031Sdim    for (LiveInterval::SubRange &S : LI.subranges()) {
1739314564Sdim      if ((S.LaneMask & ShrinkMask).none())
1740280031Sdim        continue;
1741296417Sdim      DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask)
1742296417Sdim                   << ")\n");
1743280031Sdim      LIS->shrinkToUses(S, LI.reg);
1744280031Sdim    }
1745288943Sdim    LI.removeEmptySubRanges();
1746280031Sdim  }
1747280031Sdim  if (ShrinkMainRange) {
1748280031Sdim    LiveInterval &LI = LIS->getInterval(CP.getDstReg());
1749288943Sdim    shrinkToUses(&LI);
1750280031Sdim  }
1751280031Sdim
1752234353Sdim  // SrcReg is guaranteed to be the register whose live interval that is
1753224145Sdim  // being merged.
1754226633Sdim  LIS->removeInterval(CP.getSrcReg());
1755224145Sdim
1756224145Sdim  // Update regalloc hint.
1757288943Sdim  TRI->updateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF);
1758224145Sdim
1759224145Sdim  DEBUG({
1760327952Sdim    dbgs() << "\tSuccess: " << printReg(CP.getSrcReg(), TRI, CP.getSrcIdx())
1761327952Sdim           << " -> " << printReg(CP.getDstReg(), TRI, CP.getDstIdx()) << '\n';
1762280031Sdim    dbgs() << "\tResult = ";
1763261991Sdim    if (CP.isPhys())
1764327952Sdim      dbgs() << printReg(CP.getDstReg(), TRI);
1765261991Sdim    else
1766239462Sdim      dbgs() << LIS->getInterval(CP.getDstReg());
1767261991Sdim    dbgs() << '\n';
1768224145Sdim  });
1769224145Sdim
1770224145Sdim  ++numJoins;
1771224145Sdim  return true;
1772224145Sdim}
1773224145Sdim
1774239462Sdimbool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
1775288943Sdim  unsigned DstReg = CP.getDstReg();
1776314564Sdim  unsigned SrcReg = CP.getSrcReg();
1777239462Sdim  assert(CP.isPhys() && "Must be a physreg copy");
1778288943Sdim  assert(MRI->isReserved(DstReg) && "Not a reserved register");
1779314564Sdim  LiveInterval &RHS = LIS->getInterval(SrcReg);
1780261991Sdim  DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n');
1781239462Sdim
1782288943Sdim  assert(RHS.containsOneValue() && "Invalid join with reserved register");
1783239462Sdim
1784239462Sdim  // Optimization for reserved registers like ESP. We can only merge with a
1785288943Sdim  // reserved physreg if RHS has a single value that is a copy of DstReg.
1786239462Sdim  // The live range of the reserved register will look like a set of dead defs
1787239462Sdim  // - we don't properly track the live range of reserved registers.
1788239462Sdim
1789239462Sdim  // Deny any overlapping intervals.  This depends on all the reserved
1790239462Sdim  // register live ranges to look like dead defs.
1791314564Sdim  if (!MRI->isConstantPhysReg(DstReg)) {
1792314564Sdim    for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) {
1793314564Sdim      // Abort if not all the regunits are reserved.
1794314564Sdim      for (MCRegUnitRootIterator RI(*UI, TRI); RI.isValid(); ++RI) {
1795314564Sdim        if (!MRI->isReserved(*RI))
1796314564Sdim          return false;
1797314564Sdim      }
1798314564Sdim      if (RHS.overlaps(LIS->getRegUnit(*UI))) {
1799327952Sdim        DEBUG(dbgs() << "\t\tInterference: " << printRegUnit(*UI, TRI) << '\n');
1800314564Sdim        return false;
1801314564Sdim      }
1802239462Sdim    }
1803321369Sdim
1804321369Sdim    // We must also check for overlaps with regmask clobbers.
1805321369Sdim    BitVector RegMaskUsable;
1806321369Sdim    if (LIS->checkRegMaskInterference(RHS, RegMaskUsable) &&
1807321369Sdim        !RegMaskUsable.test(DstReg)) {
1808321369Sdim      DEBUG(dbgs() << "\t\tRegMask interference\n");
1809321369Sdim      return false;
1810321369Sdim    }
1811314564Sdim  }
1812239462Sdim
1813239462Sdim  // Skip any value computations, we are not adding new values to the
1814239462Sdim  // reserved register.  Also skip merging the live ranges, the reserved
1815239462Sdim  // register live range doesn't need to be accurate as long as all the
1816239462Sdim  // defs are there.
1817239462Sdim
1818239462Sdim  // Delete the identity copy.
1819288943Sdim  MachineInstr *CopyMI;
1820288943Sdim  if (CP.isFlipped()) {
1821314564Sdim    // Physreg is copied into vreg
1822327952Sdim    //   %y = COPY %physreg_x
1823327952Sdim    //   ...  //< no other def of %x here
1824327952Sdim    //   use %y
1825314564Sdim    // =>
1826314564Sdim    //   ...
1827327952Sdim    //   use %x
1828314564Sdim    CopyMI = MRI->getVRegDef(SrcReg);
1829288943Sdim  } else {
1830314564Sdim    // VReg is copied into physreg:
1831327952Sdim    //   %y = def
1832327952Sdim    //   ... //< no other def or use of %y here
1833327952Sdim    //   %y = COPY %physreg_x
1834314564Sdim    // =>
1835327952Sdim    //   %y = def
1836314564Sdim    //   ...
1837314564Sdim    if (!MRI->hasOneNonDBGUse(SrcReg)) {
1838288943Sdim      DEBUG(dbgs() << "\t\tMultiple vreg uses!\n");
1839288943Sdim      return false;
1840288943Sdim    }
1841288943Sdim
1842314564Sdim    if (!LIS->intervalIsInOneMBB(RHS)) {
1843314564Sdim      DEBUG(dbgs() << "\t\tComplex control flow!\n");
1844314564Sdim      return false;
1845314564Sdim    }
1846288943Sdim
1847314564Sdim    MachineInstr &DestMI = *MRI->getVRegDef(SrcReg);
1848314564Sdim    CopyMI = &*MRI->use_instr_nodbg_begin(SrcReg);
1849314564Sdim    SlotIndex CopyRegIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
1850314564Sdim    SlotIndex DestRegIdx = LIS->getInstructionIndex(DestMI).getRegSlot();
1851288943Sdim
1852314564Sdim    if (!MRI->isConstantPhysReg(DstReg)) {
1853314564Sdim      // We checked above that there are no interfering defs of the physical
1854327952Sdim      // register. However, for this case, where we intend to move up the def of
1855314564Sdim      // the physical register, we also need to check for interfering uses.
1856314564Sdim      SlotIndexes *Indexes = LIS->getSlotIndexes();
1857314564Sdim      for (SlotIndex SI = Indexes->getNextNonNullIndex(DestRegIdx);
1858314564Sdim           SI != CopyRegIdx; SI = Indexes->getNextNonNullIndex(SI)) {
1859314564Sdim        MachineInstr *MI = LIS->getInstructionFromIndex(SI);
1860314564Sdim        if (MI->readsRegister(DstReg, TRI)) {
1861314564Sdim          DEBUG(dbgs() << "\t\tInterference (read): " << *MI);
1862288943Sdim          return false;
1863288943Sdim        }
1864288943Sdim      }
1865288943Sdim    }
1866288943Sdim
1867288943Sdim    // We're going to remove the copy which defines a physical reserved
1868288943Sdim    // register, so remove its valno, etc.
1869327952Sdim    DEBUG(dbgs() << "\t\tRemoving phys reg def of " << printReg(DstReg, TRI)
1870314564Sdim          << " at " << CopyRegIdx << "\n");
1871288943Sdim
1872288943Sdim    LIS->removePhysRegDefAt(DstReg, CopyRegIdx);
1873288943Sdim    // Create a new dead def at the new def location.
1874288943Sdim    for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) {
1875288943Sdim      LiveRange &LR = LIS->getRegUnit(*UI);
1876288943Sdim      LR.createDeadDef(DestRegIdx, LIS->getVNInfoAllocator());
1877288943Sdim    }
1878288943Sdim  }
1879288943Sdim
1880327952Sdim  deleteInstr(CopyMI);
1881239462Sdim
1882239462Sdim  // We don't track kills for reserved registers.
1883239462Sdim  MRI->clearKillFlags(CP.getSrcReg());
1884239462Sdim
1885239462Sdim  return true;
1886239462Sdim}
1887239462Sdim
1888243830Sdim//===----------------------------------------------------------------------===//
1889243830Sdim//                 Interference checking and interval joining
1890243830Sdim//===----------------------------------------------------------------------===//
1891243830Sdim//
1892243830Sdim// In the easiest case, the two live ranges being joined are disjoint, and
1893243830Sdim// there is no interference to consider. It is quite common, though, to have
1894243830Sdim// overlapping live ranges, and we need to check if the interference can be
1895243830Sdim// resolved.
1896243830Sdim//
1897243830Sdim// The live range of a single SSA value forms a sub-tree of the dominator tree.
1898243830Sdim// This means that two SSA values overlap if and only if the def of one value
1899243830Sdim// is contained in the live range of the other value. As a special case, the
1900243830Sdim// overlapping values can be defined at the same index.
1901243830Sdim//
1902243830Sdim// The interference from an overlapping def can be resolved in these cases:
1903243830Sdim//
1904243830Sdim// 1. Coalescable copies. The value is defined by a copy that would become an
1905243830Sdim//    identity copy after joining SrcReg and DstReg. The copy instruction will
1906243830Sdim//    be removed, and the value will be merged with the source value.
1907243830Sdim//
1908243830Sdim//    There can be several copies back and forth, causing many values to be
1909243830Sdim//    merged into one. We compute a list of ultimate values in the joined live
1910243830Sdim//    range as well as a mappings from the old value numbers.
1911243830Sdim//
1912243830Sdim// 2. IMPLICIT_DEF. This instruction is only inserted to ensure all PHI
1913243830Sdim//    predecessors have a live out value. It doesn't cause real interference,
1914243830Sdim//    and can be merged into the value it overlaps. Like a coalescable copy, it
1915243830Sdim//    can be erased after joining.
1916243830Sdim//
1917243830Sdim// 3. Copy of external value. The overlapping def may be a copy of a value that
1918243830Sdim//    is already in the other register. This is like a coalescable copy, but
1919243830Sdim//    the live range of the source register must be trimmed after erasing the
1920243830Sdim//    copy instruction:
1921243830Sdim//
1922243830Sdim//      %src = COPY %ext
1923243830Sdim//      %dst = COPY %ext  <-- Remove this COPY, trim the live range of %ext.
1924243830Sdim//
1925243830Sdim// 4. Clobbering undefined lanes. Vector registers are sometimes built by
1926243830Sdim//    defining one lane at a time:
1927243830Sdim//
1928243830Sdim//      %dst:ssub0<def,read-undef> = FOO
1929243830Sdim//      %src = BAR
1930327952Sdim//      %dst:ssub1 = COPY %src
1931243830Sdim//
1932243830Sdim//    The live range of %src overlaps the %dst value defined by FOO, but
1933243830Sdim//    merging %src into %dst:ssub1 is only going to clobber the ssub1 lane
1934243830Sdim//    which was undef anyway.
1935243830Sdim//
1936243830Sdim//    The value mapping is more complicated in this case. The final live range
1937243830Sdim//    will have different value numbers for both FOO and BAR, but there is no
1938243830Sdim//    simple mapping from old to new values. It may even be necessary to add
1939243830Sdim//    new PHI values.
1940243830Sdim//
1941243830Sdim// 5. Clobbering dead lanes. A def may clobber a lane of a vector register that
1942243830Sdim//    is live, but never read. This can happen because we don't compute
1943243830Sdim//    individual live ranges per lane.
1944243830Sdim//
1945327952Sdim//      %dst = FOO
1946243830Sdim//      %src = BAR
1947327952Sdim//      %dst:ssub1 = COPY %src
1948243830Sdim//
1949243830Sdim//    This kind of interference is only resolved locally. If the clobbered
1950243830Sdim//    lane value escapes the block, the join is aborted.
1951224145Sdim
1952243830Sdimnamespace {
1953327952Sdim
1954243830Sdim/// Track information about values in a single virtual register about to be
1955243830Sdim/// joined. Objects of this class are always created in pairs - one for each
1956280031Sdim/// side of the CoalescerPair (or one for each lane of a side of the coalescer
1957280031Sdim/// pair)
1958243830Sdimclass JoinVals {
1959280031Sdim  /// Live range we work on.
1960280031Sdim  LiveRange &LR;
1961327952Sdim
1962280031Sdim  /// (Main) register we work on.
1963280031Sdim  const unsigned Reg;
1964224145Sdim
1965288943Sdim  /// Reg (and therefore the values in this liverange) will end up as
1966288943Sdim  /// subregister SubIdx in the coalesced register. Either CP.DstIdx or
1967288943Sdim  /// CP.SrcIdx.
1968280031Sdim  const unsigned SubIdx;
1969327952Sdim
1970288943Sdim  /// The LaneMask that this liverange will occupy the coalesced register. May
1971288943Sdim  /// be smaller than the lanemask produced by SubIdx when merging subranges.
1972296417Sdim  const LaneBitmask LaneMask;
1973224145Sdim
1974280031Sdim  /// This is true when joining sub register ranges, false when joining main
1975280031Sdim  /// ranges.
1976280031Sdim  const bool SubRangeJoin;
1977327952Sdim
1978280031Sdim  /// Whether the current LiveInterval tracks subregister liveness.
1979280031Sdim  const bool TrackSubRegLiveness;
1980280031Sdim
1981288943Sdim  /// Values that will be present in the final live range.
1982243830Sdim  SmallVectorImpl<VNInfo*> &NewVNInfo;
1983224145Sdim
1984243830Sdim  const CoalescerPair &CP;
1985243830Sdim  LiveIntervals *LIS;
1986243830Sdim  SlotIndexes *Indexes;
1987243830Sdim  const TargetRegisterInfo *TRI;
1988224145Sdim
1989288943Sdim  /// Value number assignments. Maps value numbers in LI to entries in
1990288943Sdim  /// NewVNInfo. This is suitable for passing to LiveInterval::join().
1991243830Sdim  SmallVector<int, 8> Assignments;
1992224145Sdim
1993288943Sdim  /// Conflict resolution for overlapping values.
1994243830Sdim  enum ConflictResolution {
1995288943Sdim    /// No overlap, simply keep this value.
1996243830Sdim    CR_Keep,
1997224145Sdim
1998288943Sdim    /// Merge this value into OtherVNI and erase the defining instruction.
1999288943Sdim    /// Used for IMPLICIT_DEF, coalescable copies, and copies from external
2000288943Sdim    /// values.
2001243830Sdim    CR_Erase,
2002224145Sdim
2003288943Sdim    /// Merge this value into OtherVNI but keep the defining instruction.
2004288943Sdim    /// This is for the special case where OtherVNI is defined by the same
2005288943Sdim    /// instruction.
2006243830Sdim    CR_Merge,
2007224145Sdim
2008288943Sdim    /// Keep this value, and have it replace OtherVNI where possible. This
2009288943Sdim    /// complicates value mapping since OtherVNI maps to two different values
2010288943Sdim    /// before and after this def.
2011288943Sdim    /// Used when clobbering undefined or dead lanes.
2012243830Sdim    CR_Replace,
2013224145Sdim
2014288943Sdim    /// Unresolved conflict. Visit later when all values have been mapped.
2015243830Sdim    CR_Unresolved,
2016224145Sdim
2017288943Sdim    /// Unresolvable conflict. Abort the join.
2018243830Sdim    CR_Impossible
2019243830Sdim  };
2020224145Sdim
2021288943Sdim  /// Per-value info for LI. The lane bit masks are all relative to the final
2022288943Sdim  /// joined register, so they can be compared directly between SrcReg and
2023288943Sdim  /// DstReg.
2024243830Sdim  struct Val {
2025327952Sdim    ConflictResolution Resolution = CR_Keep;
2026224145Sdim
2027288943Sdim    /// Lanes written by this def, 0 for unanalyzed values.
2028296417Sdim    LaneBitmask WriteLanes;
2029224145Sdim
2030288943Sdim    /// Lanes with defined values in this register. Other lanes are undef and
2031288943Sdim    /// safe to clobber.
2032296417Sdim    LaneBitmask ValidLanes;
2033224145Sdim
2034288943Sdim    /// Value in LI being redefined by this def.
2035327952Sdim    VNInfo *RedefVNI = nullptr;
2036224145Sdim
2037288943Sdim    /// Value in the other live range that overlaps this def, if any.
2038327952Sdim    VNInfo *OtherVNI = nullptr;
2039239462Sdim
2040288943Sdim    /// Is this value an IMPLICIT_DEF that can be erased?
2041288943Sdim    ///
2042288943Sdim    /// IMPLICIT_DEF values should only exist at the end of a basic block that
2043288943Sdim    /// is a predecessor to a phi-value. These IMPLICIT_DEF instructions can be
2044288943Sdim    /// safely erased if they are overlapping a live value in the other live
2045288943Sdim    /// interval.
2046288943Sdim    ///
2047288943Sdim    /// Weird control flow graphs and incomplete PHI handling in
2048288943Sdim    /// ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with
2049288943Sdim    /// longer live ranges. Such IMPLICIT_DEF values should be treated like
2050288943Sdim    /// normal values.
2051327952Sdim    bool ErasableImplicitDef = false;
2052224145Sdim
2053288943Sdim    /// True when the live range of this value will be pruned because of an
2054288943Sdim    /// overlapping CR_Replace value in the other live range.
2055327952Sdim    bool Pruned = false;
2056224145Sdim
2057288943Sdim    /// True once Pruned above has been computed.
2058327952Sdim    bool PrunedComputed = false;
2059224145Sdim
2060327952Sdim    Val() = default;
2061224145Sdim
2062314564Sdim    bool isAnalyzed() const { return WriteLanes.any(); }
2063243830Sdim  };
2064224145Sdim
2065288943Sdim  /// One entry per value number in LI.
2066243830Sdim  SmallVector<Val, 8> Vals;
2067224145Sdim
2068288943Sdim  /// Compute the bitmask of lanes actually written by DefMI.
2069288943Sdim  /// Set Redef if there are any partial register definitions that depend on the
2070288943Sdim  /// previous value of the register.
2071296417Sdim  LaneBitmask computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const;
2072288943Sdim
2073288943Sdim  /// Find the ultimate value that VNI was copied from.
2074280031Sdim  std::pair<const VNInfo*,unsigned> followCopyChain(const VNInfo *VNI) const;
2075288943Sdim
2076280031Sdim  bool valuesIdentical(VNInfo *Val0, VNInfo *Val1, const JoinVals &Other) const;
2077288943Sdim
2078288943Sdim  /// Analyze ValNo in this live range, and set all fields of Vals[ValNo].
2079288943Sdim  /// Return a conflict resolution when possible, but leave the hard cases as
2080288943Sdim  /// CR_Unresolved.
2081288943Sdim  /// Recursively calls computeAssignment() on this and Other, guaranteeing that
2082288943Sdim  /// both OtherVNI and RedefVNI have been analyzed and mapped before returning.
2083288943Sdim  /// The recursion always goes upwards in the dominator tree, making loops
2084288943Sdim  /// impossible.
2085243830Sdim  ConflictResolution analyzeValue(unsigned ValNo, JoinVals &Other);
2086288943Sdim
2087288943Sdim  /// Compute the value assignment for ValNo in RI.
2088288943Sdim  /// This may be called recursively by analyzeValue(), but never for a ValNo on
2089288943Sdim  /// the stack.
2090243830Sdim  void computeAssignment(unsigned ValNo, JoinVals &Other);
2091288943Sdim
2092288943Sdim  /// Assuming ValNo is going to clobber some valid lanes in Other.LR, compute
2093288943Sdim  /// the extent of the tainted lanes in the block.
2094288943Sdim  ///
2095288943Sdim  /// Multiple values in Other.LR can be affected since partial redefinitions
2096288943Sdim  /// can preserve previously tainted lanes.
2097288943Sdim  ///
2098288943Sdim  ///   1 %dst = VLOAD           <-- Define all lanes in %dst
2099288943Sdim  ///   2 %src = FOO             <-- ValNo to be joined with %dst:ssub0
2100288943Sdim  ///   3 %dst:ssub1 = BAR       <-- Partial redef doesn't clear taint in ssub0
2101288943Sdim  ///   4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read
2102288943Sdim  ///
2103288943Sdim  /// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes)
2104288943Sdim  /// entry to TaintedVals.
2105288943Sdim  ///
2106288943Sdim  /// Returns false if the tainted lanes extend beyond the basic block.
2107327952Sdim  bool
2108327952Sdim  taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
2109327952Sdim              SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent);
2110288943Sdim
2111288943Sdim  /// Return true if MI uses any of the given Lanes from Reg.
2112288943Sdim  /// This does not include partial redefinitions of Reg.
2113309124Sdim  bool usesLanes(const MachineInstr &MI, unsigned, unsigned, LaneBitmask) const;
2114288943Sdim
2115288943Sdim  /// Determine if ValNo is a copy of a value number in LR or Other.LR that will
2116288943Sdim  /// be pruned:
2117288943Sdim  ///
2118288943Sdim  ///   %dst = COPY %src
2119288943Sdim  ///   %src = COPY %dst  <-- This value to be pruned.
2120288943Sdim  ///   %dst = COPY %src  <-- This value is a copy of a pruned value.
2121243830Sdim  bool isPrunedValue(unsigned ValNo, JoinVals &Other);
2122243830Sdim
2123243830Sdimpublic:
2124296417Sdim  JoinVals(LiveRange &LR, unsigned Reg, unsigned SubIdx, LaneBitmask LaneMask,
2125280031Sdim           SmallVectorImpl<VNInfo*> &newVNInfo, const CoalescerPair &cp,
2126280031Sdim           LiveIntervals *lis, const TargetRegisterInfo *TRI, bool SubRangeJoin,
2127280031Sdim           bool TrackSubRegLiveness)
2128280031Sdim    : LR(LR), Reg(Reg), SubIdx(SubIdx), LaneMask(LaneMask),
2129280031Sdim      SubRangeJoin(SubRangeJoin), TrackSubRegLiveness(TrackSubRegLiveness),
2130280031Sdim      NewVNInfo(newVNInfo), CP(cp), LIS(lis), Indexes(LIS->getSlotIndexes()),
2131327952Sdim      TRI(TRI), Assignments(LR.getNumValNums(), -1), Vals(LR.getNumValNums()) {}
2132243830Sdim
2133280031Sdim  /// Analyze defs in LR and compute a value mapping in NewVNInfo.
2134243830Sdim  /// Returns false if any conflicts were impossible to resolve.
2135243830Sdim  bool mapValues(JoinVals &Other);
2136243830Sdim
2137243830Sdim  /// Try to resolve conflicts that require all values to be mapped.
2138243830Sdim  /// Returns false if any conflicts were impossible to resolve.
2139243830Sdim  bool resolveConflicts(JoinVals &Other);
2140243830Sdim
2141280031Sdim  /// Prune the live range of values in Other.LR where they would conflict with
2142280031Sdim  /// CR_Replace values in LR. Collect end points for restoring the live range
2143243830Sdim  /// after joining.
2144280031Sdim  void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints,
2145280031Sdim                   bool changeInstrs);
2146243830Sdim
2147288943Sdim  /// Removes subranges starting at copies that get removed. This sometimes
2148288943Sdim  /// happens when undefined subranges are copied around. These ranges contain
2149296417Sdim  /// no useful information and can be removed.
2150296417Sdim  void pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask);
2151280031Sdim
2152314564Sdim  /// Pruning values in subranges can lead to removing segments in these
2153314564Sdim  /// subranges started by IMPLICIT_DEFs. The corresponding segments in
2154314564Sdim  /// the main range also need to be removed. This function will mark
2155314564Sdim  /// the corresponding values in the main range as pruned, so that
2156314564Sdim  /// eraseInstrs can do the final cleanup.
2157314564Sdim  /// The parameter @p LI must be the interval whose main range is the
2158314564Sdim  /// live range LR.
2159314564Sdim  void pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange);
2160314564Sdim
2161243830Sdim  /// Erase any machine instructions that have been coalesced away.
2162243830Sdim  /// Add erased instructions to ErasedInstrs.
2163243830Sdim  /// Add foreign virtual registers to ShrinkRegs if their live range ended at
2164243830Sdim  /// the erased instrs.
2165280031Sdim  void eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
2166314564Sdim                   SmallVectorImpl<unsigned> &ShrinkRegs,
2167314564Sdim                   LiveInterval *LI = nullptr);
2168243830Sdim
2169288943Sdim  /// Remove liverange defs at places where implicit defs will be removed.
2170288943Sdim  void removeImplicitDefs();
2171288943Sdim
2172243830Sdim  /// Get the value assignments suitable for passing to LiveInterval::join.
2173243830Sdim  const int *getAssignments() const { return Assignments.data(); }
2174243830Sdim};
2175327952Sdim
2176243830Sdim} // end anonymous namespace
2177243830Sdim
2178296417SdimLaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef)
2179280031Sdim  const {
2180314564Sdim  LaneBitmask L;
2181288943Sdim  for (const MachineOperand &MO : DefMI->operands()) {
2182288943Sdim    if (!MO.isReg() || MO.getReg() != Reg || !MO.isDef())
2183224145Sdim      continue;
2184243830Sdim    L |= TRI->getSubRegIndexLaneMask(
2185288943Sdim           TRI->composeSubRegIndices(SubIdx, MO.getSubReg()));
2186288943Sdim    if (MO.readsReg())
2187243830Sdim      Redef = true;
2188243830Sdim  }
2189243830Sdim  return L;
2190243830Sdim}
2191224145Sdim
2192280031Sdimstd::pair<const VNInfo*, unsigned> JoinVals::followCopyChain(
2193280031Sdim    const VNInfo *VNI) const {
2194280031Sdim  unsigned Reg = this->Reg;
2195280031Sdim
2196243830Sdim  while (!VNI->isPHIDef()) {
2197280031Sdim    SlotIndex Def = VNI->def;
2198280031Sdim    MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
2199243830Sdim    assert(MI && "No defining instruction");
2200243830Sdim    if (!MI->isFullCopy())
2201280031Sdim      return std::make_pair(VNI, Reg);
2202280031Sdim    unsigned SrcReg = MI->getOperand(1).getReg();
2203280031Sdim    if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
2204280031Sdim      return std::make_pair(VNI, Reg);
2205280031Sdim
2206280031Sdim    const LiveInterval &LI = LIS->getInterval(SrcReg);
2207280031Sdim    const VNInfo *ValueIn;
2208280031Sdim    // No subrange involved.
2209280031Sdim    if (!SubRangeJoin || !LI.hasSubRanges()) {
2210280031Sdim      LiveQueryResult LRQ = LI.Query(Def);
2211280031Sdim      ValueIn = LRQ.valueIn();
2212280031Sdim    } else {
2213280031Sdim      // Query subranges. Pick the first matching one.
2214280031Sdim      ValueIn = nullptr;
2215280031Sdim      for (const LiveInterval::SubRange &S : LI.subranges()) {
2216280031Sdim        // Transform lanemask to a mask in the joined live interval.
2217296417Sdim        LaneBitmask SMask = TRI->composeSubRegIndexLaneMask(SubIdx, S.LaneMask);
2218314564Sdim        if ((SMask & LaneMask).none())
2219280031Sdim          continue;
2220280031Sdim        LiveQueryResult LRQ = S.Query(Def);
2221280031Sdim        ValueIn = LRQ.valueIn();
2222280031Sdim        break;
2223280031Sdim      }
2224280031Sdim    }
2225280031Sdim    if (ValueIn == nullptr)
2226243830Sdim      break;
2227280031Sdim    VNI = ValueIn;
2228280031Sdim    Reg = SrcReg;
2229224145Sdim  }
2230280031Sdim  return std::make_pair(VNI, Reg);
2231243830Sdim}
2232224145Sdim
2233280031Sdimbool JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1,
2234280031Sdim                               const JoinVals &Other) const {
2235280031Sdim  const VNInfo *Orig0;
2236280031Sdim  unsigned Reg0;
2237280031Sdim  std::tie(Orig0, Reg0) = followCopyChain(Value0);
2238280031Sdim  if (Orig0 == Value1)
2239280031Sdim    return true;
2240280031Sdim
2241280031Sdim  const VNInfo *Orig1;
2242280031Sdim  unsigned Reg1;
2243280031Sdim  std::tie(Orig1, Reg1) = Other.followCopyChain(Value1);
2244280031Sdim
2245280031Sdim  // The values are equal if they are defined at the same place and use the
2246280031Sdim  // same register. Note that we cannot compare VNInfos directly as some of
2247280031Sdim  // them might be from a copy created in mergeSubRangeInto()  while the other
2248280031Sdim  // is from the original LiveInterval.
2249280031Sdim  return Orig0->def == Orig1->def && Reg0 == Reg1;
2250280031Sdim}
2251280031Sdim
2252243830SdimJoinVals::ConflictResolution
2253243830SdimJoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
2254243830Sdim  Val &V = Vals[ValNo];
2255243830Sdim  assert(!V.isAnalyzed() && "Value has already been analyzed!");
2256280031Sdim  VNInfo *VNI = LR.getValNumInfo(ValNo);
2257243830Sdim  if (VNI->isUnused()) {
2258314564Sdim    V.WriteLanes = LaneBitmask::getAll();
2259243830Sdim    return CR_Keep;
2260243830Sdim  }
2261224145Sdim
2262243830Sdim  // Get the instruction defining this value, compute the lanes written.
2263276479Sdim  const MachineInstr *DefMI = nullptr;
2264243830Sdim  if (VNI->isPHIDef()) {
2265243830Sdim    // Conservatively assume that all lanes in a PHI are valid.
2266327952Sdim    LaneBitmask Lanes = SubRangeJoin ? LaneBitmask::getLane(0)
2267314564Sdim                                     : TRI->getSubRegIndexLaneMask(SubIdx);
2268280031Sdim    V.ValidLanes = V.WriteLanes = Lanes;
2269243830Sdim  } else {
2270243830Sdim    DefMI = Indexes->getInstructionFromIndex(VNI->def);
2271280031Sdim    assert(DefMI != nullptr);
2272280031Sdim    if (SubRangeJoin) {
2273280031Sdim      // We don't care about the lanes when joining subregister ranges.
2274327952Sdim      V.WriteLanes = V.ValidLanes = LaneBitmask::getLane(0);
2275288943Sdim      if (DefMI->isImplicitDef()) {
2276314564Sdim        V.ValidLanes = LaneBitmask::getNone();
2277288943Sdim        V.ErasableImplicitDef = true;
2278288943Sdim      }
2279280031Sdim    } else {
2280280031Sdim      bool Redef = false;
2281280031Sdim      V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef);
2282224145Sdim
2283280031Sdim      // If this is a read-modify-write instruction, there may be more valid
2284280031Sdim      // lanes than the ones written by this instruction.
2285280031Sdim      // This only covers partial redef operands. DefMI may have normal use
2286280031Sdim      // operands reading the register. They don't contribute valid lanes.
2287280031Sdim      //
2288280031Sdim      // This adds ssub1 to the set of valid lanes in %src:
2289280031Sdim      //
2290327952Sdim      //   %src:ssub1 = FOO
2291280031Sdim      //
2292280031Sdim      // This leaves only ssub1 valid, making any other lanes undef:
2293280031Sdim      //
2294280031Sdim      //   %src:ssub1<def,read-undef> = FOO %src:ssub2
2295280031Sdim      //
2296280031Sdim      // The <read-undef> flag on the def operand means that old lane values are
2297280031Sdim      // not important.
2298280031Sdim      if (Redef) {
2299280031Sdim        V.RedefVNI = LR.Query(VNI->def).valueIn();
2300280031Sdim        assert((TrackSubRegLiveness || V.RedefVNI) &&
2301280031Sdim               "Instruction is reading nonexistent value");
2302280031Sdim        if (V.RedefVNI != nullptr) {
2303280031Sdim          computeAssignment(V.RedefVNI->id, Other);
2304280031Sdim          V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes;
2305280031Sdim        }
2306280031Sdim      }
2307224145Sdim
2308280031Sdim      // An IMPLICIT_DEF writes undef values.
2309280031Sdim      if (DefMI->isImplicitDef()) {
2310280031Sdim        // We normally expect IMPLICIT_DEF values to be live only until the end
2311280031Sdim        // of their block. If the value is really live longer and gets pruned in
2312280031Sdim        // another block, this flag is cleared again.
2313280031Sdim        V.ErasableImplicitDef = true;
2314280031Sdim        V.ValidLanes &= ~V.WriteLanes;
2315280031Sdim      }
2316243830Sdim    }
2317224145Sdim  }
2318224145Sdim
2319243830Sdim  // Find the value in Other that overlaps VNI->def, if any.
2320280031Sdim  LiveQueryResult OtherLRQ = Other.LR.Query(VNI->def);
2321224145Sdim
2322243830Sdim  // It is possible that both values are defined by the same instruction, or
2323243830Sdim  // the values are PHIs defined in the same block. When that happens, the two
2324243830Sdim  // values should be merged into one, but not into any preceding value.
2325243830Sdim  // The first value defined or visited gets CR_Keep, the other gets CR_Merge.
2326243830Sdim  if (VNInfo *OtherVNI = OtherLRQ.valueDefined()) {
2327243830Sdim    assert(SlotIndex::isSameInstr(VNI->def, OtherVNI->def) && "Broken LRQ");
2328243830Sdim
2329243830Sdim    // One value stays, the other is merged. Keep the earlier one, or the first
2330243830Sdim    // one we see.
2331243830Sdim    if (OtherVNI->def < VNI->def)
2332243830Sdim      Other.computeAssignment(OtherVNI->id, *this);
2333243830Sdim    else if (VNI->def < OtherVNI->def && OtherLRQ.valueIn()) {
2334243830Sdim      // This is an early-clobber def overlapping a live-in value in the other
2335243830Sdim      // register. Not mergeable.
2336243830Sdim      V.OtherVNI = OtherLRQ.valueIn();
2337243830Sdim      return CR_Impossible;
2338243830Sdim    }
2339243830Sdim    V.OtherVNI = OtherVNI;
2340243830Sdim    Val &OtherV = Other.Vals[OtherVNI->id];
2341243830Sdim    // Keep this value, check for conflicts when analyzing OtherVNI.
2342243830Sdim    if (!OtherV.isAnalyzed())
2343243830Sdim      return CR_Keep;
2344243830Sdim    // Both sides have been analyzed now.
2345243830Sdim    // Allow overlapping PHI values. Any real interference would show up in a
2346243830Sdim    // predecessor, the PHI itself can't introduce any conflicts.
2347243830Sdim    if (VNI->isPHIDef())
2348243830Sdim      return CR_Merge;
2349314564Sdim    if ((V.ValidLanes & OtherV.ValidLanes).any())
2350243830Sdim      // Overlapping lanes can't be resolved.
2351243830Sdim      return CR_Impossible;
2352243830Sdim    else
2353243830Sdim      return CR_Merge;
2354224145Sdim  }
2355224145Sdim
2356243830Sdim  // No simultaneous def. Is Other live at the def?
2357243830Sdim  V.OtherVNI = OtherLRQ.valueIn();
2358243830Sdim  if (!V.OtherVNI)
2359243830Sdim    // No overlap, no conflict.
2360243830Sdim    return CR_Keep;
2361243830Sdim
2362243830Sdim  assert(!SlotIndex::isSameInstr(VNI->def, V.OtherVNI->def) && "Broken LRQ");
2363243830Sdim
2364243830Sdim  // We have overlapping values, or possibly a kill of Other.
2365243830Sdim  // Recursively compute assignments up the dominator tree.
2366243830Sdim  Other.computeAssignment(V.OtherVNI->id, *this);
2367249423Sdim  Val &OtherV = Other.Vals[V.OtherVNI->id];
2368243830Sdim
2369249423Sdim  // Check if OtherV is an IMPLICIT_DEF that extends beyond its basic block.
2370249423Sdim  // This shouldn't normally happen, but ProcessImplicitDefs can leave such
2371249423Sdim  // IMPLICIT_DEF instructions behind, and there is nothing wrong with it
2372249423Sdim  // technically.
2373249423Sdim  //
2374309124Sdim  // When it happens, treat that IMPLICIT_DEF as a normal value, and don't try
2375249423Sdim  // to erase the IMPLICIT_DEF instruction.
2376249423Sdim  if (OtherV.ErasableImplicitDef && DefMI &&
2377249423Sdim      DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)) {
2378249423Sdim    DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
2379327952Sdim                 << " extends into " << printMBBReference(*DefMI->getParent())
2380249423Sdim                 << ", keeping it.\n");
2381249423Sdim    OtherV.ErasableImplicitDef = false;
2382249423Sdim  }
2383249423Sdim
2384243830Sdim  // Allow overlapping PHI values. Any real interference would show up in a
2385243830Sdim  // predecessor, the PHI itself can't introduce any conflicts.
2386243830Sdim  if (VNI->isPHIDef())
2387243830Sdim    return CR_Replace;
2388243830Sdim
2389243830Sdim  // Check for simple erasable conflicts.
2390280031Sdim  if (DefMI->isImplicitDef()) {
2391280031Sdim    // We need the def for the subregister if there is nothing else live at the
2392280031Sdim    // subrange at this point.
2393280031Sdim    if (TrackSubRegLiveness
2394314564Sdim        && (V.WriteLanes & (OtherV.ValidLanes | OtherV.WriteLanes)).none())
2395280031Sdim      return CR_Replace;
2396243830Sdim    return CR_Erase;
2397280031Sdim  }
2398243830Sdim
2399243830Sdim  // Include the non-conflict where DefMI is a coalescable copy that kills
2400243830Sdim  // OtherVNI. We still want the copy erased and value numbers merged.
2401243830Sdim  if (CP.isCoalescable(DefMI)) {
2402243830Sdim    // Some of the lanes copied from OtherVNI may be undef, making them undef
2403243830Sdim    // here too.
2404243830Sdim    V.ValidLanes &= ~V.WriteLanes | OtherV.ValidLanes;
2405243830Sdim    return CR_Erase;
2406224145Sdim  }
2407224145Sdim
2408243830Sdim  // This may not be a real conflict if DefMI simply kills Other and defines
2409243830Sdim  // VNI.
2410243830Sdim  if (OtherLRQ.isKill() && OtherLRQ.endPoint() <= VNI->def)
2411243830Sdim    return CR_Keep;
2412224145Sdim
2413243830Sdim  // Handle the case where VNI and OtherVNI can be proven to be identical:
2414243830Sdim  //
2415243830Sdim  //   %other = COPY %ext
2416243830Sdim  //   %this  = COPY %ext <-- Erase this copy
2417243830Sdim  //
2418280031Sdim  if (DefMI->isFullCopy() && !CP.isPartial()
2419280031Sdim      && valuesIdentical(VNI, V.OtherVNI, Other))
2420243830Sdim    return CR_Erase;
2421239462Sdim
2422243830Sdim  // If the lanes written by this instruction were all undef in OtherVNI, it is
2423243830Sdim  // still safe to join the live ranges. This can't be done with a simple value
2424243830Sdim  // mapping, though - OtherVNI will map to multiple values:
2425243830Sdim  //
2426243830Sdim  //   1 %dst:ssub0 = FOO                <-- OtherVNI
2427243830Sdim  //   2 %src = BAR                      <-- VNI
2428327952Sdim  //   3 %dst:ssub1 = COPY killed %src    <-- Eliminate this copy.
2429327952Sdim  //   4 BAZ killed %dst
2430327952Sdim  //   5 QUUX killed %src
2431243830Sdim  //
2432243830Sdim  // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace
2433243830Sdim  // handles this complex value mapping.
2434314564Sdim  if ((V.WriteLanes & OtherV.ValidLanes).none())
2435243830Sdim    return CR_Replace;
2436243830Sdim
2437243830Sdim  // If the other live range is killed by DefMI and the live ranges are still
2438243830Sdim  // overlapping, it must be because we're looking at an early clobber def:
2439243830Sdim  //
2440327952Sdim  //   %dst<def,early-clobber> = ASM killed %src
2441243830Sdim  //
2442243830Sdim  // In this case, it is illegal to merge the two live ranges since the early
2443243830Sdim  // clobber def would clobber %src before it was read.
2444243830Sdim  if (OtherLRQ.isKill()) {
2445243830Sdim    // This case where the def doesn't overlap the kill is handled above.
2446243830Sdim    assert(VNI->def.isEarlyClobber() &&
2447243830Sdim           "Only early clobber defs can overlap a kill");
2448243830Sdim    return CR_Impossible;
2449243830Sdim  }
2450243830Sdim
2451243830Sdim  // VNI is clobbering live lanes in OtherVNI, but there is still the
2452243830Sdim  // possibility that no instructions actually read the clobbered lanes.
2453243830Sdim  // If we're clobbering all the lanes in OtherVNI, at least one must be read.
2454280031Sdim  // Otherwise Other.RI wouldn't be live here.
2455314564Sdim  if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes).none())
2456243830Sdim    return CR_Impossible;
2457243830Sdim
2458243830Sdim  // We need to verify that no instructions are reading the clobbered lanes. To
2459243830Sdim  // save compile time, we'll only check that locally. Don't allow the tainted
2460243830Sdim  // value to escape the basic block.
2461243830Sdim  MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
2462243830Sdim  if (OtherLRQ.endPoint() >= Indexes->getMBBEndIdx(MBB))
2463243830Sdim    return CR_Impossible;
2464243830Sdim
2465243830Sdim  // There are still some things that could go wrong besides clobbered lanes
2466243830Sdim  // being read, for example OtherVNI may be only partially redefined in MBB,
2467243830Sdim  // and some clobbered lanes could escape the block. Save this analysis for
2468243830Sdim  // resolveConflicts() when all values have been mapped. We need to know
2469243830Sdim  // RedefVNI and WriteLanes for any later defs in MBB, and we can't compute
2470243830Sdim  // that now - the recursive analyzeValue() calls must go upwards in the
2471243830Sdim  // dominator tree.
2472243830Sdim  return CR_Unresolved;
2473243830Sdim}
2474243830Sdim
2475243830Sdimvoid JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) {
2476243830Sdim  Val &V = Vals[ValNo];
2477243830Sdim  if (V.isAnalyzed()) {
2478243830Sdim    // Recursion should always move up the dominator tree, so ValNo is not
2479243830Sdim    // supposed to reappear before it has been assigned.
2480243830Sdim    assert(Assignments[ValNo] != -1 && "Bad recursion?");
2481243830Sdim    return;
2482243830Sdim  }
2483243830Sdim  switch ((V.Resolution = analyzeValue(ValNo, Other))) {
2484243830Sdim  case CR_Erase:
2485243830Sdim  case CR_Merge:
2486243830Sdim    // Merge this ValNo into OtherVNI.
2487243830Sdim    assert(V.OtherVNI && "OtherVNI not assigned, can't merge.");
2488243830Sdim    assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion");
2489243830Sdim    Assignments[ValNo] = Other.Assignments[V.OtherVNI->id];
2490327952Sdim    DEBUG(dbgs() << "\t\tmerge " << printReg(Reg) << ':' << ValNo << '@'
2491280031Sdim                 << LR.getValNumInfo(ValNo)->def << " into "
2492327952Sdim                 << printReg(Other.Reg) << ':' << V.OtherVNI->id << '@'
2493243830Sdim                 << V.OtherVNI->def << " --> @"
2494243830Sdim                 << NewVNInfo[Assignments[ValNo]]->def << '\n');
2495243830Sdim    break;
2496243830Sdim  case CR_Replace:
2497280031Sdim  case CR_Unresolved: {
2498243830Sdim    // The other value is going to be pruned if this join is successful.
2499243830Sdim    assert(V.OtherVNI && "OtherVNI not assigned, can't prune");
2500280031Sdim    Val &OtherV = Other.Vals[V.OtherVNI->id];
2501280031Sdim    // We cannot erase an IMPLICIT_DEF if we don't have valid values for all
2502280031Sdim    // its lanes.
2503314564Sdim    if ((OtherV.WriteLanes & ~V.ValidLanes).any() && TrackSubRegLiveness)
2504280031Sdim      OtherV.ErasableImplicitDef = false;
2505280031Sdim    OtherV.Pruned = true;
2506314564Sdim    LLVM_FALLTHROUGH;
2507280031Sdim  }
2508243830Sdim  default:
2509243830Sdim    // This value number needs to go in the final joined live range.
2510243830Sdim    Assignments[ValNo] = NewVNInfo.size();
2511280031Sdim    NewVNInfo.push_back(LR.getValNumInfo(ValNo));
2512243830Sdim    break;
2513243830Sdim  }
2514243830Sdim}
2515243830Sdim
2516243830Sdimbool JoinVals::mapValues(JoinVals &Other) {
2517280031Sdim  for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
2518243830Sdim    computeAssignment(i, Other);
2519243830Sdim    if (Vals[i].Resolution == CR_Impossible) {
2520327952Sdim      DEBUG(dbgs() << "\t\tinterference at " << printReg(Reg) << ':' << i
2521280031Sdim                   << '@' << LR.getValNumInfo(i)->def << '\n');
2522243830Sdim      return false;
2523224145Sdim    }
2524224145Sdim  }
2525243830Sdim  return true;
2526243830Sdim}
2527224145Sdim
2528243830Sdimbool JoinVals::
2529296417SdimtaintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
2530327952Sdim            SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent) {
2531280031Sdim  VNInfo *VNI = LR.getValNumInfo(ValNo);
2532243830Sdim  MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
2533243830Sdim  SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB);
2534239462Sdim
2535280031Sdim  // Scan Other.LR from VNI.def to MBBEnd.
2536280031Sdim  LiveInterval::iterator OtherI = Other.LR.find(VNI->def);
2537280031Sdim  assert(OtherI != Other.LR.end() && "No conflict?");
2538243830Sdim  do {
2539243830Sdim    // OtherI is pointing to a tainted value. Abort the join if the tainted
2540243830Sdim    // lanes escape the block.
2541243830Sdim    SlotIndex End = OtherI->end;
2542243830Sdim    if (End >= MBBEnd) {
2543327952Sdim      DEBUG(dbgs() << "\t\ttaints global " << printReg(Other.Reg) << ':'
2544243830Sdim                   << OtherI->valno->id << '@' << OtherI->start << '\n');
2545243830Sdim      return false;
2546224145Sdim    }
2547327952Sdim    DEBUG(dbgs() << "\t\ttaints local " << printReg(Other.Reg) << ':'
2548243830Sdim                 << OtherI->valno->id << '@' << OtherI->start
2549243830Sdim                 << " to " << End << '\n');
2550243830Sdim    // A dead def is not a problem.
2551243830Sdim    if (End.isDead())
2552243830Sdim      break;
2553243830Sdim    TaintExtent.push_back(std::make_pair(End, TaintedLanes));
2554224145Sdim
2555243830Sdim    // Check for another def in the MBB.
2556280031Sdim    if (++OtherI == Other.LR.end() || OtherI->start >= MBBEnd)
2557243830Sdim      break;
2558224145Sdim
2559243830Sdim    // Lanes written by the new def are no longer tainted.
2560243830Sdim    const Val &OV = Other.Vals[OtherI->valno->id];
2561243830Sdim    TaintedLanes &= ~OV.WriteLanes;
2562243830Sdim    if (!OV.RedefVNI)
2563243830Sdim      break;
2564314564Sdim  } while (TaintedLanes.any());
2565243830Sdim  return true;
2566243830Sdim}
2567224145Sdim
2568309124Sdimbool JoinVals::usesLanes(const MachineInstr &MI, unsigned Reg, unsigned SubIdx,
2569296417Sdim                         LaneBitmask Lanes) const {
2570309124Sdim  if (MI.isDebugValue())
2571243830Sdim    return false;
2572309124Sdim  for (const MachineOperand &MO : MI.operands()) {
2573288943Sdim    if (!MO.isReg() || MO.isDef() || MO.getReg() != Reg)
2574239462Sdim      continue;
2575288943Sdim    if (!MO.readsReg())
2576243830Sdim      continue;
2577314564Sdim    unsigned S = TRI->composeSubRegIndices(SubIdx, MO.getSubReg());
2578314564Sdim    if ((Lanes & TRI->getSubRegIndexLaneMask(S)).any())
2579243830Sdim      return true;
2580239462Sdim  }
2581243830Sdim  return false;
2582243830Sdim}
2583239462Sdim
2584243830Sdimbool JoinVals::resolveConflicts(JoinVals &Other) {
2585280031Sdim  for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
2586243830Sdim    Val &V = Vals[i];
2587327952Sdim    assert(V.Resolution != CR_Impossible && "Unresolvable conflict");
2588243830Sdim    if (V.Resolution != CR_Unresolved)
2589239462Sdim      continue;
2590327952Sdim    DEBUG(dbgs() << "\t\tconflict at " << printReg(Reg) << ':' << i
2591280031Sdim                 << '@' << LR.getValNumInfo(i)->def << '\n');
2592280031Sdim    if (SubRangeJoin)
2593280031Sdim      return false;
2594280031Sdim
2595243830Sdim    ++NumLaneConflicts;
2596243830Sdim    assert(V.OtherVNI && "Inconsistent conflict resolution.");
2597280031Sdim    VNInfo *VNI = LR.getValNumInfo(i);
2598243830Sdim    const Val &OtherV = Other.Vals[V.OtherVNI->id];
2599224145Sdim
2600243830Sdim    // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the
2601243830Sdim    // join, those lanes will be tainted with a wrong value. Get the extent of
2602243830Sdim    // the tainted lanes.
2603296417Sdim    LaneBitmask TaintedLanes = V.WriteLanes & OtherV.ValidLanes;
2604296417Sdim    SmallVector<std::pair<SlotIndex, LaneBitmask>, 8> TaintExtent;
2605243830Sdim    if (!taintExtent(i, TaintedLanes, Other, TaintExtent))
2606243830Sdim      // Tainted lanes would extend beyond the basic block.
2607243830Sdim      return false;
2608243830Sdim
2609243830Sdim    assert(!TaintExtent.empty() && "There should be at least one conflict.");
2610243830Sdim
2611243830Sdim    // Now look at the instructions from VNI->def to TaintExtent (inclusive).
2612243830Sdim    MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
2613243830Sdim    MachineBasicBlock::iterator MI = MBB->begin();
2614243830Sdim    if (!VNI->isPHIDef()) {
2615243830Sdim      MI = Indexes->getInstructionFromIndex(VNI->def);
2616243830Sdim      // No need to check the instruction defining VNI for reads.
2617243830Sdim      ++MI;
2618239462Sdim    }
2619243830Sdim    assert(!SlotIndex::isSameInstr(VNI->def, TaintExtent.front().first) &&
2620243830Sdim           "Interference ends on VNI->def. Should have been handled earlier");
2621243830Sdim    MachineInstr *LastMI =
2622243830Sdim      Indexes->getInstructionFromIndex(TaintExtent.front().first);
2623243830Sdim    assert(LastMI && "Range must end at a proper instruction");
2624243830Sdim    unsigned TaintNum = 0;
2625327952Sdim    while (true) {
2626243830Sdim      assert(MI != MBB->end() && "Bad LastMI");
2627309124Sdim      if (usesLanes(*MI, Other.Reg, Other.SubIdx, TaintedLanes)) {
2628243830Sdim        DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI);
2629243830Sdim        return false;
2630243830Sdim      }
2631243830Sdim      // LastMI is the last instruction to use the current value.
2632243830Sdim      if (&*MI == LastMI) {
2633243830Sdim        if (++TaintNum == TaintExtent.size())
2634243830Sdim          break;
2635243830Sdim        LastMI = Indexes->getInstructionFromIndex(TaintExtent[TaintNum].first);
2636243830Sdim        assert(LastMI && "Range must end at a proper instruction");
2637243830Sdim        TaintedLanes = TaintExtent[TaintNum].second;
2638243830Sdim      }
2639243830Sdim      ++MI;
2640243830Sdim    }
2641243830Sdim
2642243830Sdim    // The tainted lanes are unused.
2643243830Sdim    V.Resolution = CR_Replace;
2644243830Sdim    ++NumLaneResolves;
2645224145Sdim  }
2646243830Sdim  return true;
2647243830Sdim}
2648224145Sdim
2649243830Sdimbool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) {
2650243830Sdim  Val &V = Vals[ValNo];
2651243830Sdim  if (V.Pruned || V.PrunedComputed)
2652243830Sdim    return V.Pruned;
2653243830Sdim
2654243830Sdim  if (V.Resolution != CR_Erase && V.Resolution != CR_Merge)
2655243830Sdim    return V.Pruned;
2656243830Sdim
2657243830Sdim  // Follow copies up the dominator tree and check if any intermediate value
2658243830Sdim  // has been pruned.
2659243830Sdim  V.PrunedComputed = true;
2660243830Sdim  V.Pruned = Other.isPrunedValue(V.OtherVNI->id, *this);
2661243830Sdim  return V.Pruned;
2662243830Sdim}
2663243830Sdim
2664243830Sdimvoid JoinVals::pruneValues(JoinVals &Other,
2665280031Sdim                           SmallVectorImpl<SlotIndex> &EndPoints,
2666280031Sdim                           bool changeInstrs) {
2667280031Sdim  for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
2668280031Sdim    SlotIndex Def = LR.getValNumInfo(i)->def;
2669243830Sdim    switch (Vals[i].Resolution) {
2670243830Sdim    case CR_Keep:
2671243830Sdim      break;
2672243830Sdim    case CR_Replace: {
2673280031Sdim      // This value takes precedence over the value in Other.LR.
2674280031Sdim      LIS->pruneValue(Other.LR, Def, &EndPoints);
2675243830Sdim      // Check if we're replacing an IMPLICIT_DEF value. The IMPLICIT_DEF
2676243830Sdim      // instructions are only inserted to provide a live-out value for PHI
2677243830Sdim      // predecessors, so the instruction should simply go away once its value
2678243830Sdim      // has been replaced.
2679243830Sdim      Val &OtherV = Other.Vals[Vals[i].OtherVNI->id];
2680249423Sdim      bool EraseImpDef = OtherV.ErasableImplicitDef &&
2681249423Sdim                         OtherV.Resolution == CR_Keep;
2682243830Sdim      if (!Def.isBlock()) {
2683280031Sdim        if (changeInstrs) {
2684280031Sdim          // Remove <def,read-undef> flags. This def is now a partial redef.
2685327952Sdim          // Also remove dead flags since the joined live range will
2686280031Sdim          // continue past this instruction.
2687288943Sdim          for (MachineOperand &MO :
2688288943Sdim               Indexes->getInstructionFromIndex(Def)->operands()) {
2689288943Sdim            if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) {
2690327952Sdim              if (MO.getSubReg() != 0 && MO.isUndef() && !EraseImpDef)
2691327952Sdim                MO.setIsUndef(false);
2692288943Sdim              MO.setIsDead(false);
2693280031Sdim            }
2694243830Sdim          }
2695280031Sdim        }
2696243830Sdim        // This value will reach instructions below, but we need to make sure
2697243830Sdim        // the live range also reaches the instruction at Def.
2698243830Sdim        if (!EraseImpDef)
2699243830Sdim          EndPoints.push_back(Def);
2700243830Sdim      }
2701327952Sdim      DEBUG(dbgs() << "\t\tpruned " << printReg(Other.Reg) << " at " << Def
2702280031Sdim                   << ": " << Other.LR << '\n');
2703243830Sdim      break;
2704243830Sdim    }
2705243830Sdim    case CR_Erase:
2706243830Sdim    case CR_Merge:
2707243830Sdim      if (isPrunedValue(i, Other)) {
2708280031Sdim        // This value is ultimately a copy of a pruned value in LR or Other.LR.
2709243830Sdim        // We can no longer trust the value mapping computed by
2710243830Sdim        // computeAssignment(), the value that was originally copied could have
2711243830Sdim        // been replaced.
2712280031Sdim        LIS->pruneValue(LR, Def, &EndPoints);
2713327952Sdim        DEBUG(dbgs() << "\t\tpruned all of " << printReg(Reg) << " at "
2714280031Sdim                     << Def << ": " << LR << '\n');
2715243830Sdim      }
2716243830Sdim      break;
2717243830Sdim    case CR_Unresolved:
2718243830Sdim    case CR_Impossible:
2719243830Sdim      llvm_unreachable("Unresolved conflicts");
2720243830Sdim    }
2721224145Sdim  }
2722243830Sdim}
2723224145Sdim
2724314564Sdimvoid JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) {
2725280031Sdim  // Look for values being erased.
2726280031Sdim  bool DidPrune = false;
2727280031Sdim  for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
2728321369Sdim    // We should trigger in all cases in which eraseInstrs() does something.
2729321369Sdim    // match what eraseInstrs() is doing, print a message so
2730321369Sdim    if (Vals[i].Resolution != CR_Erase &&
2731321369Sdim        (Vals[i].Resolution != CR_Keep || !Vals[i].ErasableImplicitDef ||
2732321369Sdim         !Vals[i].Pruned))
2733280031Sdim      continue;
2734280031Sdim
2735280031Sdim    // Check subranges at the point where the copy will be removed.
2736280031Sdim    SlotIndex Def = LR.getValNumInfo(i)->def;
2737321369Sdim    // Print message so mismatches with eraseInstrs() can be diagnosed.
2738321369Sdim    DEBUG(dbgs() << "\t\tExpecting instruction removal at " << Def << '\n');
2739280031Sdim    for (LiveInterval::SubRange &S : LI.subranges()) {
2740280031Sdim      LiveQueryResult Q = S.Query(Def);
2741280031Sdim
2742280031Sdim      // If a subrange starts at the copy then an undefined value has been
2743280031Sdim      // copied and we must remove that subrange value as well.
2744280031Sdim      VNInfo *ValueOut = Q.valueOutOrDead();
2745280031Sdim      if (ValueOut != nullptr && Q.valueIn() == nullptr) {
2746296417Sdim        DEBUG(dbgs() << "\t\tPrune sublane " << PrintLaneMask(S.LaneMask)
2747280031Sdim                     << " at " << Def << "\n");
2748280031Sdim        LIS->pruneValue(S, Def, nullptr);
2749280031Sdim        DidPrune = true;
2750280031Sdim        // Mark value number as unused.
2751280031Sdim        ValueOut->markUnused();
2752280031Sdim        continue;
2753280031Sdim      }
2754280031Sdim      // If a subrange ends at the copy, then a value was copied but only
2755296417Sdim      // partially used later. Shrink the subregister range appropriately.
2756280031Sdim      if (Q.valueIn() != nullptr && Q.valueOut() == nullptr) {
2757296417Sdim        DEBUG(dbgs() << "\t\tDead uses at sublane " << PrintLaneMask(S.LaneMask)
2758296417Sdim                     << " at " << Def << "\n");
2759280031Sdim        ShrinkMask |= S.LaneMask;
2760280031Sdim      }
2761280031Sdim    }
2762280031Sdim  }
2763280031Sdim  if (DidPrune)
2764280031Sdim    LI.removeEmptySubRanges();
2765280031Sdim}
2766280031Sdim
2767314564Sdim/// Check if any of the subranges of @p LI contain a definition at @p Def.
2768314564Sdimstatic bool isDefInSubRange(LiveInterval &LI, SlotIndex Def) {
2769314564Sdim  for (LiveInterval::SubRange &SR : LI.subranges()) {
2770314564Sdim    if (VNInfo *VNI = SR.Query(Def).valueOutOrDead())
2771314564Sdim      if (VNI->def == Def)
2772314564Sdim        return true;
2773314564Sdim  }
2774314564Sdim  return false;
2775314564Sdim}
2776314564Sdim
2777314564Sdimvoid JoinVals::pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange) {
2778314564Sdim  assert(&static_cast<LiveRange&>(LI) == &LR);
2779314564Sdim
2780314564Sdim  for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
2781314564Sdim    if (Vals[i].Resolution != CR_Keep)
2782314564Sdim      continue;
2783314564Sdim    VNInfo *VNI = LR.getValNumInfo(i);
2784314564Sdim    if (VNI->isUnused() || VNI->isPHIDef() || isDefInSubRange(LI, VNI->def))
2785314564Sdim      continue;
2786314564Sdim    Vals[i].Pruned = true;
2787314564Sdim    ShrinkMainRange = true;
2788314564Sdim  }
2789314564Sdim}
2790314564Sdim
2791288943Sdimvoid JoinVals::removeImplicitDefs() {
2792288943Sdim  for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
2793288943Sdim    Val &V = Vals[i];
2794288943Sdim    if (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned)
2795288943Sdim      continue;
2796288943Sdim
2797288943Sdim    VNInfo *VNI = LR.getValNumInfo(i);
2798288943Sdim    VNI->markUnused();
2799288943Sdim    LR.removeValNo(VNI);
2800288943Sdim  }
2801288943Sdim}
2802288943Sdim
2803280031Sdimvoid JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
2804314564Sdim                           SmallVectorImpl<unsigned> &ShrinkRegs,
2805314564Sdim                           LiveInterval *LI) {
2806280031Sdim  for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
2807243830Sdim    // Get the def location before markUnused() below invalidates it.
2808280031Sdim    SlotIndex Def = LR.getValNumInfo(i)->def;
2809243830Sdim    switch (Vals[i].Resolution) {
2810288943Sdim    case CR_Keep: {
2811243830Sdim      // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any
2812243830Sdim      // longer. The IMPLICIT_DEF instructions are only inserted by
2813243830Sdim      // PHIElimination to guarantee that all PHI predecessors have a value.
2814249423Sdim      if (!Vals[i].ErasableImplicitDef || !Vals[i].Pruned)
2815243830Sdim        break;
2816288943Sdim      // Remove value number i from LR.
2817314564Sdim      // For intervals with subranges, removing a segment from the main range
2818314564Sdim      // may require extending the previous segment: for each definition of
2819314564Sdim      // a subregister, there will be a corresponding def in the main range.
2820314564Sdim      // That def may fall in the middle of a segment from another subrange.
2821314564Sdim      // In such cases, removing this def from the main range must be
2822314564Sdim      // complemented by extending the main range to account for the liveness
2823314564Sdim      // of the other subrange.
2824288943Sdim      VNInfo *VNI = LR.getValNumInfo(i);
2825314564Sdim      SlotIndex Def = VNI->def;
2826314564Sdim      // The new end point of the main range segment to be extended.
2827314564Sdim      SlotIndex NewEnd;
2828314564Sdim      if (LI != nullptr) {
2829314564Sdim        LiveRange::iterator I = LR.FindSegmentContaining(Def);
2830314564Sdim        assert(I != LR.end());
2831314564Sdim        // Do not extend beyond the end of the segment being removed.
2832314564Sdim        // The segment may have been pruned in preparation for joining
2833314564Sdim        // live ranges.
2834314564Sdim        NewEnd = I->end;
2835314564Sdim      }
2836314564Sdim
2837288943Sdim      LR.removeValNo(VNI);
2838288943Sdim      // Note that this VNInfo is reused and still referenced in NewVNInfo,
2839288943Sdim      // make it appear like an unused value number.
2840288943Sdim      VNI->markUnused();
2841314564Sdim
2842314564Sdim      if (LI != nullptr && LI->hasSubRanges()) {
2843314564Sdim        assert(static_cast<LiveRange*>(LI) == &LR);
2844314564Sdim        // Determine the end point based on the subrange information:
2845314564Sdim        // minimum of (earliest def of next segment,
2846314564Sdim        //             latest end point of containing segment)
2847314564Sdim        SlotIndex ED, LE;
2848314564Sdim        for (LiveInterval::SubRange &SR : LI->subranges()) {
2849314564Sdim          LiveRange::iterator I = SR.find(Def);
2850314564Sdim          if (I == SR.end())
2851314564Sdim            continue;
2852314564Sdim          if (I->start > Def)
2853314564Sdim            ED = ED.isValid() ? std::min(ED, I->start) : I->start;
2854314564Sdim          else
2855314564Sdim            LE = LE.isValid() ? std::max(LE, I->end) : I->end;
2856314564Sdim        }
2857314564Sdim        if (LE.isValid())
2858314564Sdim          NewEnd = std::min(NewEnd, LE);
2859314564Sdim        if (ED.isValid())
2860314564Sdim          NewEnd = std::min(NewEnd, ED);
2861314564Sdim
2862314564Sdim        // We only want to do the extension if there was a subrange that
2863314564Sdim        // was live across Def.
2864314564Sdim        if (LE.isValid()) {
2865314564Sdim          LiveRange::iterator S = LR.find(Def);
2866314564Sdim          if (S != LR.begin())
2867314564Sdim            std::prev(S)->end = NewEnd;
2868314564Sdim        }
2869314564Sdim      }
2870314564Sdim      DEBUG({
2871314564Sdim        dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LR << '\n';
2872314564Sdim        if (LI != nullptr)
2873314564Sdim          dbgs() << "\t\t  LHS = " << *LI << '\n';
2874314564Sdim      });
2875314564Sdim      LLVM_FALLTHROUGH;
2876288943Sdim    }
2877243830Sdim
2878243830Sdim    case CR_Erase: {
2879243830Sdim      MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
2880243830Sdim      assert(MI && "No instruction to erase");
2881243830Sdim      if (MI->isCopy()) {
2882243830Sdim        unsigned Reg = MI->getOperand(1).getReg();
2883243830Sdim        if (TargetRegisterInfo::isVirtualRegister(Reg) &&
2884243830Sdim            Reg != CP.getSrcReg() && Reg != CP.getDstReg())
2885243830Sdim          ShrinkRegs.push_back(Reg);
2886243830Sdim      }
2887243830Sdim      ErasedInstrs.insert(MI);
2888243830Sdim      DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI);
2889309124Sdim      LIS->RemoveMachineInstrFromMaps(*MI);
2890243830Sdim      MI->eraseFromParent();
2891243830Sdim      break;
2892243830Sdim    }
2893243830Sdim    default:
2894243830Sdim      break;
2895243830Sdim    }
2896243830Sdim  }
2897243830Sdim}
2898243830Sdim
2899296417Sdimvoid RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
2900296417Sdim                                         LaneBitmask LaneMask,
2901280031Sdim                                         const CoalescerPair &CP) {
2902280031Sdim  SmallVector<VNInfo*, 16> NewVNInfo;
2903280031Sdim  JoinVals RHSVals(RRange, CP.getSrcReg(), CP.getSrcIdx(), LaneMask,
2904280031Sdim                   NewVNInfo, CP, LIS, TRI, true, true);
2905280031Sdim  JoinVals LHSVals(LRange, CP.getDstReg(), CP.getDstIdx(), LaneMask,
2906280031Sdim                   NewVNInfo, CP, LIS, TRI, true, true);
2907280031Sdim
2908288943Sdim  // Compute NewVNInfo and resolve conflicts (see also joinVirtRegs())
2909288943Sdim  // We should be able to resolve all conflicts here as we could successfully do
2910288943Sdim  // it on the mainrange already. There is however a problem when multiple
2911288943Sdim  // ranges get mapped to the "overflow" lane mask bit which creates unexpected
2912288943Sdim  // interferences.
2913288943Sdim  if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) {
2914296417Sdim    // We already determined that it is legal to merge the intervals, so this
2915296417Sdim    // should never fail.
2916296417Sdim    llvm_unreachable("*** Couldn't join subrange!\n");
2917288943Sdim  }
2918288943Sdim  if (!LHSVals.resolveConflicts(RHSVals) ||
2919288943Sdim      !RHSVals.resolveConflicts(LHSVals)) {
2920296417Sdim    // We already determined that it is legal to merge the intervals, so this
2921296417Sdim    // should never fail.
2922296417Sdim    llvm_unreachable("*** Couldn't join subrange!\n");
2923288943Sdim  }
2924280031Sdim
2925280031Sdim  // The merging algorithm in LiveInterval::join() can't handle conflicting
2926280031Sdim  // value mappings, so we need to remove any live ranges that overlap a
2927280031Sdim  // CR_Replace resolution. Collect a set of end points that can be used to
2928280031Sdim  // restore the live range after joining.
2929280031Sdim  SmallVector<SlotIndex, 8> EndPoints;
2930280031Sdim  LHSVals.pruneValues(RHSVals, EndPoints, false);
2931280031Sdim  RHSVals.pruneValues(LHSVals, EndPoints, false);
2932280031Sdim
2933288943Sdim  LHSVals.removeImplicitDefs();
2934288943Sdim  RHSVals.removeImplicitDefs();
2935288943Sdim
2936280031Sdim  LRange.verify();
2937280031Sdim  RRange.verify();
2938280031Sdim
2939280031Sdim  // Join RRange into LHS.
2940280031Sdim  LRange.join(RRange, LHSVals.getAssignments(), RHSVals.getAssignments(),
2941280031Sdim              NewVNInfo);
2942280031Sdim
2943280031Sdim  DEBUG(dbgs() << "\t\tjoined lanes: " << LRange << "\n");
2944280031Sdim  if (EndPoints.empty())
2945296417Sdim    return;
2946280031Sdim
2947280031Sdim  // Recompute the parts of the live range we had to remove because of
2948280031Sdim  // CR_Replace conflicts.
2949314564Sdim  DEBUG({
2950314564Sdim    dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: ";
2951314564Sdim    for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) {
2952314564Sdim      dbgs() << EndPoints[i];
2953314564Sdim      if (i != n-1)
2954314564Sdim        dbgs() << ',';
2955314564Sdim    }
2956314564Sdim    dbgs() << ":  " << LRange << '\n';
2957314564Sdim  });
2958280031Sdim  LIS->extendToIndices(LRange, EndPoints);
2959280031Sdim}
2960280031Sdim
2961296417Sdimvoid RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
2962280031Sdim                                          const LiveRange &ToMerge,
2963296417Sdim                                          LaneBitmask LaneMask,
2964296417Sdim                                          CoalescerPair &CP) {
2965280031Sdim  BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
2966321369Sdim  LI.refineSubRanges(Allocator, LaneMask,
2967321369Sdim      [this,&Allocator,&ToMerge,&CP](LiveInterval::SubRange &SR) {
2968321369Sdim    if (SR.empty()) {
2969321369Sdim      SR.assign(ToMerge, Allocator);
2970280031Sdim    } else {
2971321369Sdim      // joinSubRegRange() destroys the merged range, so we need a copy.
2972321369Sdim      LiveRange RangeCopy(ToMerge, Allocator);
2973321369Sdim      joinSubRegRanges(SR, RangeCopy, SR.LaneMask, CP);
2974280031Sdim    }
2975321369Sdim  });
2976280031Sdim}
2977280031Sdim
2978243830Sdimbool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
2979243830Sdim  SmallVector<VNInfo*, 16> NewVNInfo;
2980243830Sdim  LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
2981243830Sdim  LiveInterval &LHS = LIS->getInterval(CP.getDstReg());
2982288943Sdim  bool TrackSubRegLiveness = MRI->shouldTrackSubRegLiveness(*CP.getNewRC());
2983314564Sdim  JoinVals RHSVals(RHS, CP.getSrcReg(), CP.getSrcIdx(), LaneBitmask::getNone(),
2984314564Sdim                   NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness);
2985314564Sdim  JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), LaneBitmask::getNone(),
2986314564Sdim                   NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness);
2987243830Sdim
2988261991Sdim  DEBUG(dbgs() << "\t\tRHS = " << RHS
2989261991Sdim               << "\n\t\tLHS = " << LHS
2990243830Sdim               << '\n');
2991243830Sdim
2992243830Sdim  // First compute NewVNInfo and the simple value mappings.
2993243830Sdim  // Detect impossible conflicts early.
2994243830Sdim  if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals))
2995243830Sdim    return false;
2996243830Sdim
2997243830Sdim  // Some conflicts can only be resolved after all values have been mapped.
2998243830Sdim  if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals))
2999243830Sdim    return false;
3000243830Sdim
3001243830Sdim  // All clear, the live ranges can be merged.
3002280031Sdim  if (RHS.hasSubRanges() || LHS.hasSubRanges()) {
3003280031Sdim    BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
3004243830Sdim
3005280031Sdim    // Transform lanemasks from the LHS to masks in the coalesced register and
3006280031Sdim    // create initial subranges if necessary.
3007280031Sdim    unsigned DstIdx = CP.getDstIdx();
3008280031Sdim    if (!LHS.hasSubRanges()) {
3009296417Sdim      LaneBitmask Mask = DstIdx == 0 ? CP.getNewRC()->getLaneMask()
3010296417Sdim                                     : TRI->getSubRegIndexLaneMask(DstIdx);
3011280031Sdim      // LHS must support subregs or we wouldn't be in this codepath.
3012314564Sdim      assert(Mask.any());
3013280031Sdim      LHS.createSubRangeFrom(Allocator, Mask, LHS);
3014280031Sdim    } else if (DstIdx != 0) {
3015280031Sdim      // Transform LHS lanemasks to new register class if necessary.
3016280031Sdim      for (LiveInterval::SubRange &R : LHS.subranges()) {
3017296417Sdim        LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(DstIdx, R.LaneMask);
3018280031Sdim        R.LaneMask = Mask;
3019280031Sdim      }
3020280031Sdim    }
3021327952Sdim    DEBUG(dbgs() << "\t\tLHST = " << printReg(CP.getDstReg())
3022280031Sdim                 << ' ' << LHS << '\n');
3023280031Sdim
3024280031Sdim    // Determine lanemasks of RHS in the coalesced register and merge subranges.
3025280031Sdim    unsigned SrcIdx = CP.getSrcIdx();
3026280031Sdim    if (!RHS.hasSubRanges()) {
3027296417Sdim      LaneBitmask Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask()
3028296417Sdim                                     : TRI->getSubRegIndexLaneMask(SrcIdx);
3029296417Sdim      mergeSubRangeInto(LHS, RHS, Mask, CP);
3030280031Sdim    } else {
3031280031Sdim      // Pair up subranges and merge.
3032280031Sdim      for (LiveInterval::SubRange &R : RHS.subranges()) {
3033296417Sdim        LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask);
3034296417Sdim        mergeSubRangeInto(LHS, R, Mask, CP);
3035280031Sdim      }
3036280031Sdim    }
3037296417Sdim    DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
3038280031Sdim
3039314564Sdim    // Pruning implicit defs from subranges may result in the main range
3040314564Sdim    // having stale segments.
3041314564Sdim    LHSVals.pruneMainSegments(LHS, ShrinkMainRange);
3042314564Sdim
3043296417Sdim    LHSVals.pruneSubRegValues(LHS, ShrinkMask);
3044296417Sdim    RHSVals.pruneSubRegValues(LHS, ShrinkMask);
3045280031Sdim  }
3046280031Sdim
3047243830Sdim  // The merging algorithm in LiveInterval::join() can't handle conflicting
3048243830Sdim  // value mappings, so we need to remove any live ranges that overlap a
3049243830Sdim  // CR_Replace resolution. Collect a set of end points that can be used to
3050243830Sdim  // restore the live range after joining.
3051243830Sdim  SmallVector<SlotIndex, 8> EndPoints;
3052280031Sdim  LHSVals.pruneValues(RHSVals, EndPoints, true);
3053280031Sdim  RHSVals.pruneValues(LHSVals, EndPoints, true);
3054243830Sdim
3055243830Sdim  // Erase COPY and IMPLICIT_DEF instructions. This may cause some external
3056243830Sdim  // registers to require trimming.
3057243830Sdim  SmallVector<unsigned, 8> ShrinkRegs;
3058314564Sdim  LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs, &LHS);
3059243830Sdim  RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs);
3060243830Sdim  while (!ShrinkRegs.empty())
3061288943Sdim    shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val()));
3062243830Sdim
3063243830Sdim  // Join RHS into LHS.
3064261991Sdim  LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo);
3065243830Sdim
3066243830Sdim  // Kill flags are going to be wrong if the live ranges were overlapping.
3067243830Sdim  // Eventually, we should simply clear all kill flags when computing live
3068243830Sdim  // ranges. They are reinserted after register allocation.
3069243830Sdim  MRI->clearKillFlags(LHS.reg);
3070243830Sdim  MRI->clearKillFlags(RHS.reg);
3071243830Sdim
3072280031Sdim  if (!EndPoints.empty()) {
3073280031Sdim    // Recompute the parts of the live range we had to remove because of
3074280031Sdim    // CR_Replace conflicts.
3075314564Sdim    DEBUG({
3076314564Sdim      dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: ";
3077314564Sdim      for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) {
3078314564Sdim        dbgs() << EndPoints[i];
3079314564Sdim        if (i != n-1)
3080314564Sdim          dbgs() << ',';
3081314564Sdim      }
3082314564Sdim      dbgs() << ":  " << LHS << '\n';
3083314564Sdim    });
3084280031Sdim    LIS->extendToIndices((LiveRange&)LHS, EndPoints);
3085280031Sdim  }
3086243830Sdim
3087224145Sdim  return true;
3088224145Sdim}
3089224145Sdim
3090243830Sdimbool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
3091243830Sdim  return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP);
3092243830Sdim}
3093243830Sdim
3094224145Sdimnamespace {
3095327952Sdim
3096288943Sdim/// Information concerning MBB coalescing priority.
3097249423Sdimstruct MBBPriorityInfo {
3098249423Sdim  MachineBasicBlock *MBB;
3099249423Sdim  unsigned Depth;
3100249423Sdim  bool IsSplit;
3101224145Sdim
3102249423Sdim  MBBPriorityInfo(MachineBasicBlock *mbb, unsigned depth, bool issplit)
3103249423Sdim    : MBB(mbb), Depth(depth), IsSplit(issplit) {}
3104249423Sdim};
3105224145Sdim
3106327952Sdim} // end anonymous namespace
3107327952Sdim
3108288943Sdim/// C-style comparator that sorts first based on the loop depth of the basic
3109288943Sdim/// block (the unsigned), and then on the MBB number.
3110288943Sdim///
3111288943Sdim/// EnableGlobalCopies assumes that the primary sort key is loop depth.
3112261991Sdimstatic int compareMBBPriority(const MBBPriorityInfo *LHS,
3113261991Sdim                              const MBBPriorityInfo *RHS) {
3114249423Sdim  // Deeper loops first
3115249423Sdim  if (LHS->Depth != RHS->Depth)
3116249423Sdim    return LHS->Depth > RHS->Depth ? -1 : 1;
3117249423Sdim
3118249423Sdim  // Try to unsplit critical edges next.
3119249423Sdim  if (LHS->IsSplit != RHS->IsSplit)
3120249423Sdim    return LHS->IsSplit ? -1 : 1;
3121249423Sdim
3122249423Sdim  // Prefer blocks that are more connected in the CFG. This takes care of
3123249423Sdim  // the most difficult copies first while intervals are short.
3124249423Sdim  unsigned cl = LHS->MBB->pred_size() + LHS->MBB->succ_size();
3125249423Sdim  unsigned cr = RHS->MBB->pred_size() + RHS->MBB->succ_size();
3126249423Sdim  if (cl != cr)
3127249423Sdim    return cl > cr ? -1 : 1;
3128249423Sdim
3129249423Sdim  // As a last resort, sort by block number.
3130249423Sdim  return LHS->MBB->getNumber() < RHS->MBB->getNumber() ? -1 : 1;
3131224145Sdim}
3132224145Sdim
3133249423Sdim/// \returns true if the given copy uses or defines a local live range.
3134249423Sdimstatic bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) {
3135249423Sdim  if (!Copy->isCopy())
3136249423Sdim    return false;
3137249423Sdim
3138261991Sdim  if (Copy->getOperand(1).isUndef())
3139261991Sdim    return false;
3140261991Sdim
3141249423Sdim  unsigned SrcReg = Copy->getOperand(1).getReg();
3142249423Sdim  unsigned DstReg = Copy->getOperand(0).getReg();
3143249423Sdim  if (TargetRegisterInfo::isPhysicalRegister(SrcReg)
3144249423Sdim      || TargetRegisterInfo::isPhysicalRegister(DstReg))
3145249423Sdim    return false;
3146249423Sdim
3147249423Sdim  return LIS->intervalIsInOneMBB(LIS->getInterval(SrcReg))
3148249423Sdim    || LIS->intervalIsInOneMBB(LIS->getInterval(DstReg));
3149249423Sdim}
3150249423Sdim
3151249423Sdimbool RegisterCoalescer::
3152249423SdimcopyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) {
3153239462Sdim  bool Progress = false;
3154249423Sdim  for (unsigned i = 0, e = CurrList.size(); i != e; ++i) {
3155249423Sdim    if (!CurrList[i])
3156239462Sdim      continue;
3157239462Sdim    // Skip instruction pointers that have already been erased, for example by
3158239462Sdim    // dead code elimination.
3159321369Sdim    if (ErasedInstrs.count(CurrList[i])) {
3160276479Sdim      CurrList[i] = nullptr;
3161239462Sdim      continue;
3162239462Sdim    }
3163239462Sdim    bool Again = false;
3164249423Sdim    bool Success = joinCopy(CurrList[i], Again);
3165239462Sdim    Progress |= Success;
3166239462Sdim    if (Success || !Again)
3167276479Sdim      CurrList[i] = nullptr;
3168239462Sdim  }
3169239462Sdim  return Progress;
3170239462Sdim}
3171239462Sdim
3172288943Sdim/// Check if DstReg is a terminal node.
3173288943Sdim/// I.e., it does not have any affinity other than \p Copy.
3174288943Sdimstatic bool isTerminalReg(unsigned DstReg, const MachineInstr &Copy,
3175288943Sdim                          const MachineRegisterInfo *MRI) {
3176288943Sdim  assert(Copy.isCopyLike());
3177288943Sdim  // Check if the destination of this copy as any other affinity.
3178288943Sdim  for (const MachineInstr &MI : MRI->reg_nodbg_instructions(DstReg))
3179288943Sdim    if (&MI != &Copy && MI.isCopyLike())
3180288943Sdim      return false;
3181288943Sdim  return true;
3182288943Sdim}
3183288943Sdim
3184288943Sdimbool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
3185288943Sdim  assert(Copy.isCopyLike());
3186288943Sdim  if (!UseTerminalRule)
3187288943Sdim    return false;
3188288943Sdim  unsigned DstReg, DstSubReg, SrcReg, SrcSubReg;
3189288943Sdim  isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg);
3190288943Sdim  // Check if the destination of this copy has any other affinity.
3191288943Sdim  if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
3192288943Sdim      // If SrcReg is a physical register, the copy won't be coalesced.
3193288943Sdim      // Ignoring it may have other side effect (like missing
3194288943Sdim      // rematerialization). So keep it.
3195288943Sdim      TargetRegisterInfo::isPhysicalRegister(SrcReg) ||
3196288943Sdim      !isTerminalReg(DstReg, Copy, MRI))
3197288943Sdim    return false;
3198288943Sdim
3199296417Sdim  // DstReg is a terminal node. Check if it interferes with any other
3200288943Sdim  // copy involving SrcReg.
3201288943Sdim  const MachineBasicBlock *OrigBB = Copy.getParent();
3202288943Sdim  const LiveInterval &DstLI = LIS->getInterval(DstReg);
3203288943Sdim  for (const MachineInstr &MI : MRI->reg_nodbg_instructions(SrcReg)) {
3204288943Sdim    // Technically we should check if the weight of the new copy is
3205288943Sdim    // interesting compared to the other one and update the weight
3206288943Sdim    // of the copies accordingly. However, this would only work if
3207288943Sdim    // we would gather all the copies first then coalesce, whereas
3208288943Sdim    // right now we interleave both actions.
3209288943Sdim    // For now, just consider the copies that are in the same block.
3210288943Sdim    if (&MI == &Copy || !MI.isCopyLike() || MI.getParent() != OrigBB)
3211288943Sdim      continue;
3212288943Sdim    unsigned OtherReg, OtherSubReg, OtherSrcReg, OtherSrcSubReg;
3213288943Sdim    isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg,
3214288943Sdim                OtherSubReg);
3215288943Sdim    if (OtherReg == SrcReg)
3216288943Sdim      OtherReg = OtherSrcReg;
3217288943Sdim    // Check if OtherReg is a non-terminal.
3218288943Sdim    if (TargetRegisterInfo::isPhysicalRegister(OtherReg) ||
3219288943Sdim        isTerminalReg(OtherReg, MI, MRI))
3220288943Sdim      continue;
3221288943Sdim    // Check that OtherReg interfere with DstReg.
3222288943Sdim    if (LIS->getInterval(OtherReg).overlaps(DstLI)) {
3223327952Sdim      DEBUG(dbgs() << "Apply terminal rule for: " << printReg(DstReg) << '\n');
3224288943Sdim      return true;
3225288943Sdim    }
3226288943Sdim  }
3227288943Sdim  return false;
3228288943Sdim}
3229288943Sdim
3230239462Sdimvoid
3231239462SdimRegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
3232224145Sdim  DEBUG(dbgs() << MBB->getName() << ":\n");
3233224145Sdim
3234239462Sdim  // Collect all copy-like instructions in MBB. Don't start coalescing anything
3235239462Sdim  // yet, it might invalidate the iterator.
3236239462Sdim  const unsigned PrevSize = WorkList.size();
3237249423Sdim  if (JoinGlobalCopies) {
3238288943Sdim    SmallVector<MachineInstr*, 2> LocalTerminals;
3239288943Sdim    SmallVector<MachineInstr*, 2> GlobalTerminals;
3240249423Sdim    // Coalesce copies bottom-up to coalesce local defs before local uses. They
3241249423Sdim    // are not inherently easier to resolve, but slightly preferable until we
3242249423Sdim    // have local live range splitting. In particular this is required by
3243249423Sdim    // cmp+jmp macro fusion.
3244261991Sdim    for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
3245261991Sdim         MII != E; ++MII) {
3246249423Sdim      if (!MII->isCopyLike())
3247249423Sdim        continue;
3248288943Sdim      bool ApplyTerminalRule = applyTerminalRule(*MII);
3249288943Sdim      if (isLocalCopy(&(*MII), LIS)) {
3250288943Sdim        if (ApplyTerminalRule)
3251288943Sdim          LocalTerminals.push_back(&(*MII));
3252288943Sdim        else
3253288943Sdim          LocalWorkList.push_back(&(*MII));
3254288943Sdim      } else {
3255288943Sdim        if (ApplyTerminalRule)
3256288943Sdim          GlobalTerminals.push_back(&(*MII));
3257288943Sdim        else
3258288943Sdim          WorkList.push_back(&(*MII));
3259288943Sdim      }
3260249423Sdim    }
3261288943Sdim    // Append the copies evicted by the terminal rule at the end of the list.
3262288943Sdim    LocalWorkList.append(LocalTerminals.begin(), LocalTerminals.end());
3263288943Sdim    WorkList.append(GlobalTerminals.begin(), GlobalTerminals.end());
3264249423Sdim  }
3265249423Sdim  else {
3266288943Sdim    SmallVector<MachineInstr*, 2> Terminals;
3267309124Sdim    for (MachineInstr &MII : *MBB)
3268309124Sdim      if (MII.isCopyLike()) {
3269309124Sdim        if (applyTerminalRule(MII))
3270309124Sdim          Terminals.push_back(&MII);
3271288943Sdim        else
3272309124Sdim          WorkList.push_back(&MII);
3273309124Sdim      }
3274309124Sdim    // Append the copies evicted by the terminal rule at the end of the list.
3275309124Sdim    WorkList.append(Terminals.begin(), Terminals.end());
3276249423Sdim  }
3277239462Sdim  // Try coalescing the collected copies immediately, and remove the nulls.
3278239462Sdim  // This prevents the WorkList from getting too large since most copies are
3279239462Sdim  // joinable on the first attempt.
3280249423Sdim  MutableArrayRef<MachineInstr*>
3281249423Sdim    CurrList(WorkList.begin() + PrevSize, WorkList.end());
3282249423Sdim  if (copyCoalesceWorkList(CurrList))
3283239462Sdim    WorkList.erase(std::remove(WorkList.begin() + PrevSize, WorkList.end(),
3284321369Sdim                               nullptr), WorkList.end());
3285224145Sdim}
3286224145Sdim
3287249423Sdimvoid RegisterCoalescer::coalesceLocals() {
3288249423Sdim  copyCoalesceWorkList(LocalWorkList);
3289249423Sdim  for (unsigned j = 0, je = LocalWorkList.size(); j != je; ++j) {
3290249423Sdim    if (LocalWorkList[j])
3291249423Sdim      WorkList.push_back(LocalWorkList[j]);
3292249423Sdim  }
3293249423Sdim  LocalWorkList.clear();
3294249423Sdim}
3295249423Sdim
3296239462Sdimvoid RegisterCoalescer::joinAllIntervals() {
3297224145Sdim  DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n");
3298249423Sdim  assert(WorkList.empty() && LocalWorkList.empty() && "Old data still around.");
3299224145Sdim
3300249423Sdim  std::vector<MBBPriorityInfo> MBBs;
3301249423Sdim  MBBs.reserve(MF->size());
3302296417Sdim  for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
3303296417Sdim    MachineBasicBlock *MBB = &*I;
3304249423Sdim    MBBs.push_back(MBBPriorityInfo(MBB, Loops->getLoopDepth(MBB),
3305249423Sdim                                   JoinSplitEdges && isSplitEdge(MBB)));
3306249423Sdim  }
3307249423Sdim  array_pod_sort(MBBs.begin(), MBBs.end(), compareMBBPriority);
3308224145Sdim
3309249423Sdim  // Coalesce intervals in MBB priority order.
3310327952Sdim  unsigned CurrDepth = std::numeric_limits<unsigned>::max();
3311249423Sdim  for (unsigned i = 0, e = MBBs.size(); i != e; ++i) {
3312249423Sdim    // Try coalescing the collected local copies for deeper loops.
3313249423Sdim    if (JoinGlobalCopies && MBBs[i].Depth < CurrDepth) {
3314249423Sdim      coalesceLocals();
3315249423Sdim      CurrDepth = MBBs[i].Depth;
3316224145Sdim    }
3317249423Sdim    copyCoalesceInMBB(MBBs[i].MBB);
3318224145Sdim  }
3319249423Sdim  coalesceLocals();
3320224145Sdim
3321224145Sdim  // Joining intervals can allow other intervals to be joined.  Iteratively join
3322224145Sdim  // until we make no progress.
3323249423Sdim  while (copyCoalesceWorkList(WorkList))
3324239462Sdim    /* empty */ ;
3325224145Sdim}
3326224145Sdim
3327224145Sdimvoid RegisterCoalescer::releaseMemory() {
3328239462Sdim  ErasedInstrs.clear();
3329239462Sdim  WorkList.clear();
3330239462Sdim  DeadDefs.clear();
3331239462Sdim  InflateRegs.clear();
3332224145Sdim}
3333224145Sdim
3334224145Sdimbool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
3335226633Sdim  MF = &fn;
3336226633Sdim  MRI = &fn.getRegInfo();
3337288943Sdim  const TargetSubtargetInfo &STI = fn.getSubtarget();
3338288943Sdim  TRI = STI.getRegisterInfo();
3339288943Sdim  TII = STI.getInstrInfo();
3340226633Sdim  LIS = &getAnalysis<LiveIntervals>();
3341296417Sdim  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
3342226633Sdim  Loops = &getAnalysis<MachineLoopInfo>();
3343249423Sdim  if (EnableGlobalCopies == cl::BOU_UNSET)
3344288943Sdim    JoinGlobalCopies = STI.enableJoinGlobalCopies();
3345249423Sdim  else
3346249423Sdim    JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE);
3347249423Sdim
3348249423Sdim  // The MachineScheduler does not currently require JoinSplitEdges. This will
3349249423Sdim  // either be enabled unconditionally or replaced by a more general live range
3350249423Sdim  // splitting optimization.
3351249423Sdim  JoinSplitEdges = EnableJoinSplits;
3352249423Sdim
3353224145Sdim  DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n"
3354243830Sdim               << "********** Function: " << MF->getName() << '\n');
3355224145Sdim
3356224145Sdim  if (VerifyCoalescing)
3357226633Sdim    MF->verify(this, "Before register coalescing");
3358224145Sdim
3359224145Sdim  RegClassInfo.runOnMachineFunction(fn);
3360224145Sdim
3361224145Sdim  // Join (coalesce) intervals if requested.
3362239462Sdim  if (EnableJoining)
3363239462Sdim    joinAllIntervals();
3364224145Sdim
3365226633Sdim  // After deleting a lot of copies, register classes may be less constrained.
3366239462Sdim  // Removing sub-register operands may allow GR32_ABCD -> GR32 and DPR_VFP2 ->
3367226633Sdim  // DPR inflation.
3368226633Sdim  array_pod_sort(InflateRegs.begin(), InflateRegs.end());
3369226633Sdim  InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()),
3370226633Sdim                    InflateRegs.end());
3371226633Sdim  DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size() << " regs.\n");
3372226633Sdim  for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) {
3373226633Sdim    unsigned Reg = InflateRegs[i];
3374226633Sdim    if (MRI->reg_nodbg_empty(Reg))
3375226633Sdim      continue;
3376288943Sdim    if (MRI->recomputeRegClass(Reg)) {
3377327952Sdim      DEBUG(dbgs() << printReg(Reg) << " inflated to "
3378280031Sdim                   << TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n');
3379296417Sdim      ++NumInflated;
3380296417Sdim
3381280031Sdim      LiveInterval &LI = LIS->getInterval(Reg);
3382296417Sdim      if (LI.hasSubRanges()) {
3383280031Sdim        // If the inflated register class does not support subregisters anymore
3384280031Sdim        // remove the subranges.
3385296417Sdim        if (!MRI->shouldTrackSubRegLiveness(Reg)) {
3386296417Sdim          LI.clearSubRanges();
3387296417Sdim        } else {
3388288943Sdim#ifndef NDEBUG
3389296417Sdim          LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
3390296417Sdim          // If subranges are still supported, then the same subregs
3391296417Sdim          // should still be supported.
3392296417Sdim          for (LiveInterval::SubRange &S : LI.subranges()) {
3393314564Sdim            assert((S.LaneMask & ~MaxMask).none());
3394296417Sdim          }
3395296417Sdim#endif
3396280031Sdim        }
3397280031Sdim      }
3398226633Sdim    }
3399226633Sdim  }
3400226633Sdim
3401224145Sdim  DEBUG(dump());
3402224145Sdim  if (VerifyCoalescing)
3403226633Sdim    MF->verify(this, "After register coalescing");
3404224145Sdim  return true;
3405224145Sdim}
3406224145Sdim
3407224145Sdimvoid RegisterCoalescer::print(raw_ostream &O, const Module* m) const {
3408226633Sdim   LIS->print(O, m);
3409224145Sdim}
3410