RegisterCoalescer.cpp revision 243830
1//===- RegisterCoalescer.cpp - Generic Register Coalescing Interface -------==//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the generic RegisterCoalescer interface which
11// is used as the common interface used by all clients and
12// implementations of register coalescing.
13//
14//===----------------------------------------------------------------------===//
15
16#define DEBUG_TYPE "regalloc"
17#include "RegisterCoalescer.h"
18#include "LiveDebugVariables.h"
19#include "VirtRegMap.h"
20
21#include "llvm/Pass.h"
22#include "llvm/Value.h"
23#include "llvm/ADT/OwningPtr.h"
24#include "llvm/ADT/STLExtras.h"
25#include "llvm/ADT/SmallSet.h"
26#include "llvm/ADT/Statistic.h"
27#include "llvm/Analysis/AliasAnalysis.h"
28#include "llvm/CodeGen/LiveIntervalAnalysis.h"
29#include "llvm/CodeGen/LiveIntervalAnalysis.h"
30#include "llvm/CodeGen/LiveRangeEdit.h"
31#include "llvm/CodeGen/MachineFrameInfo.h"
32#include "llvm/CodeGen/MachineInstr.h"
33#include "llvm/CodeGen/MachineInstr.h"
34#include "llvm/CodeGen/MachineLoopInfo.h"
35#include "llvm/CodeGen/MachineRegisterInfo.h"
36#include "llvm/CodeGen/MachineRegisterInfo.h"
37#include "llvm/CodeGen/Passes.h"
38#include "llvm/CodeGen/RegisterClassInfo.h"
39#include "llvm/Support/CommandLine.h"
40#include "llvm/Support/Debug.h"
41#include "llvm/Support/ErrorHandling.h"
42#include "llvm/Support/raw_ostream.h"
43#include "llvm/Target/TargetInstrInfo.h"
44#include "llvm/Target/TargetInstrInfo.h"
45#include "llvm/Target/TargetMachine.h"
46#include "llvm/Target/TargetOptions.h"
47#include "llvm/Target/TargetRegisterInfo.h"
48#include <algorithm>
49#include <cmath>
50using namespace llvm;
51
52STATISTIC(numJoins    , "Number of interval joins performed");
53STATISTIC(numCrossRCs , "Number of cross class joins performed");
54STATISTIC(numCommutes , "Number of instruction commuting performed");
55STATISTIC(numExtends  , "Number of copies extended");
56STATISTIC(NumReMats   , "Number of instructions re-materialized");
57STATISTIC(NumInflated , "Number of register classes inflated");
58STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested");
59STATISTIC(NumLaneResolves,  "Number of dead lane conflicts resolved");
60
61static cl::opt<bool>
62EnableJoining("join-liveintervals",
63              cl::desc("Coalesce copies (default=true)"),
64              cl::init(true));
65
66static cl::opt<bool>
67VerifyCoalescing("verify-coalescing",
68         cl::desc("Verify machine instrs before and after register coalescing"),
69         cl::Hidden);
70
71namespace {
72  class RegisterCoalescer : public MachineFunctionPass,
73                            private LiveRangeEdit::Delegate {
74    MachineFunction* MF;
75    MachineRegisterInfo* MRI;
76    const TargetMachine* TM;
77    const TargetRegisterInfo* TRI;
78    const TargetInstrInfo* TII;
79    LiveIntervals *LIS;
80    LiveDebugVariables *LDV;
81    const MachineLoopInfo* Loops;
82    AliasAnalysis *AA;
83    RegisterClassInfo RegClassInfo;
84
85    /// WorkList - Copy instructions yet to be coalesced.
86    SmallVector<MachineInstr*, 8> WorkList;
87
88    /// ErasedInstrs - Set of instruction pointers that have been erased, and
89    /// that may be present in WorkList.
90    SmallPtrSet<MachineInstr*, 8> ErasedInstrs;
91
92    /// Dead instructions that are about to be deleted.
93    SmallVector<MachineInstr*, 8> DeadDefs;
94
95    /// Virtual registers to be considered for register class inflation.
96    SmallVector<unsigned, 8> InflateRegs;
97
98    /// Recursively eliminate dead defs in DeadDefs.
99    void eliminateDeadDefs();
100
101    /// LiveRangeEdit callback.
102    void LRE_WillEraseInstruction(MachineInstr *MI);
103
104    /// joinAllIntervals - join compatible live intervals
105    void joinAllIntervals();
106
107    /// copyCoalesceInMBB - Coalesce copies in the specified MBB, putting
108    /// copies that cannot yet be coalesced into WorkList.
109    void copyCoalesceInMBB(MachineBasicBlock *MBB);
110
111    /// copyCoalesceWorkList - Try to coalesce all copies in WorkList after
112    /// position From. Return true if any progress was made.
113    bool copyCoalesceWorkList(unsigned From = 0);
114
115    /// joinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
116    /// which are the src/dst of the copy instruction CopyMI.  This returns
117    /// true if the copy was successfully coalesced away. If it is not
118    /// currently possible to coalesce this interval, but it may be possible if
119    /// other things get coalesced, then it returns true by reference in
120    /// 'Again'.
121    bool joinCopy(MachineInstr *TheCopy, bool &Again);
122
123    /// joinIntervals - Attempt to join these two intervals.  On failure, this
124    /// returns false.  The output "SrcInt" will not have been modified, so we
125    /// can use this information below to update aliases.
126    bool joinIntervals(CoalescerPair &CP);
127
128    /// Attempt joining two virtual registers. Return true on success.
129    bool joinVirtRegs(CoalescerPair &CP);
130
131    /// Attempt joining with a reserved physreg.
132    bool joinReservedPhysReg(CoalescerPair &CP);
133
134    /// adjustCopiesBackFrom - We found a non-trivially-coalescable copy. If
135    /// the source value number is defined by a copy from the destination reg
136    /// see if we can merge these two destination reg valno# into a single
137    /// value number, eliminating a copy.
138    bool adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI);
139
140    /// hasOtherReachingDefs - Return true if there are definitions of IntB
141    /// other than BValNo val# that can reach uses of AValno val# of IntA.
142    bool hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB,
143                              VNInfo *AValNo, VNInfo *BValNo);
144
145    /// removeCopyByCommutingDef - We found a non-trivially-coalescable copy.
146    /// If the source value number is defined by a commutable instruction and
147    /// its other operand is coalesced to the copy dest register, see if we
148    /// can transform the copy into a noop by commuting the definition.
149    bool removeCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI);
150
151    /// reMaterializeTrivialDef - If the source of a copy is defined by a
152    /// trivial computation, replace the copy by rematerialize the definition.
153    bool reMaterializeTrivialDef(LiveInterval &SrcInt, unsigned DstReg,
154                                 MachineInstr *CopyMI);
155
156    /// canJoinPhys - Return true if a physreg copy should be joined.
157    bool canJoinPhys(CoalescerPair &CP);
158
159    /// updateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
160    /// update the subregister number if it is not zero. If DstReg is a
161    /// physical register and the existing subregister number of the def / use
162    /// being updated is not zero, make sure to set it to the correct physical
163    /// subregister.
164    void updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx);
165
166    /// eliminateUndefCopy - Handle copies of undef values.
167    bool eliminateUndefCopy(MachineInstr *CopyMI, const CoalescerPair &CP);
168
169  public:
170    static char ID; // Class identification, replacement for typeinfo
171    RegisterCoalescer() : MachineFunctionPass(ID) {
172      initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
173    }
174
175    virtual void getAnalysisUsage(AnalysisUsage &AU) const;
176
177    virtual void releaseMemory();
178
179    /// runOnMachineFunction - pass entry point
180    virtual bool runOnMachineFunction(MachineFunction&);
181
182    /// print - Implement the dump method.
183    virtual void print(raw_ostream &O, const Module* = 0) const;
184  };
185} /// end anonymous namespace
186
187char &llvm::RegisterCoalescerID = RegisterCoalescer::ID;
188
189INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing",
190                      "Simple Register Coalescing", false, false)
191INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
192INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
193INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
194INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
195INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
196INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing",
197                    "Simple Register Coalescing", false, false)
198
199char RegisterCoalescer::ID = 0;
200
201static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI,
202                        unsigned &Src, unsigned &Dst,
203                        unsigned &SrcSub, unsigned &DstSub) {
204  if (MI->isCopy()) {
205    Dst = MI->getOperand(0).getReg();
206    DstSub = MI->getOperand(0).getSubReg();
207    Src = MI->getOperand(1).getReg();
208    SrcSub = MI->getOperand(1).getSubReg();
209  } else if (MI->isSubregToReg()) {
210    Dst = MI->getOperand(0).getReg();
211    DstSub = tri.composeSubRegIndices(MI->getOperand(0).getSubReg(),
212                                      MI->getOperand(3).getImm());
213    Src = MI->getOperand(2).getReg();
214    SrcSub = MI->getOperand(2).getSubReg();
215  } else
216    return false;
217  return true;
218}
219
220bool CoalescerPair::setRegisters(const MachineInstr *MI) {
221  SrcReg = DstReg = 0;
222  SrcIdx = DstIdx = 0;
223  NewRC = 0;
224  Flipped = CrossClass = false;
225
226  unsigned Src, Dst, SrcSub, DstSub;
227  if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
228    return false;
229  Partial = SrcSub || DstSub;
230
231  // If one register is a physreg, it must be Dst.
232  if (TargetRegisterInfo::isPhysicalRegister(Src)) {
233    if (TargetRegisterInfo::isPhysicalRegister(Dst))
234      return false;
235    std::swap(Src, Dst);
236    std::swap(SrcSub, DstSub);
237    Flipped = true;
238  }
239
240  const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
241
242  if (TargetRegisterInfo::isPhysicalRegister(Dst)) {
243    // Eliminate DstSub on a physreg.
244    if (DstSub) {
245      Dst = TRI.getSubReg(Dst, DstSub);
246      if (!Dst) return false;
247      DstSub = 0;
248    }
249
250    // Eliminate SrcSub by picking a corresponding Dst superregister.
251    if (SrcSub) {
252      Dst = TRI.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src));
253      if (!Dst) return false;
254      SrcSub = 0;
255    } else if (!MRI.getRegClass(Src)->contains(Dst)) {
256      return false;
257    }
258  } else {
259    // Both registers are virtual.
260    const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
261    const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
262
263    // Both registers have subreg indices.
264    if (SrcSub && DstSub) {
265      // Copies between different sub-registers are never coalescable.
266      if (Src == Dst && SrcSub != DstSub)
267        return false;
268
269      NewRC = TRI.getCommonSuperRegClass(SrcRC, SrcSub, DstRC, DstSub,
270                                         SrcIdx, DstIdx);
271      if (!NewRC)
272        return false;
273    } else if (DstSub) {
274      // SrcReg will be merged with a sub-register of DstReg.
275      SrcIdx = DstSub;
276      NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub);
277    } else if (SrcSub) {
278      // DstReg will be merged with a sub-register of SrcReg.
279      DstIdx = SrcSub;
280      NewRC = TRI.getMatchingSuperRegClass(SrcRC, DstRC, SrcSub);
281    } else {
282      // This is a straight copy without sub-registers.
283      NewRC = TRI.getCommonSubClass(DstRC, SrcRC);
284    }
285
286    // The combined constraint may be impossible to satisfy.
287    if (!NewRC)
288      return false;
289
290    // Prefer SrcReg to be a sub-register of DstReg.
291    // FIXME: Coalescer should support subregs symmetrically.
292    if (DstIdx && !SrcIdx) {
293      std::swap(Src, Dst);
294      std::swap(SrcIdx, DstIdx);
295      Flipped = !Flipped;
296    }
297
298    CrossClass = NewRC != DstRC || NewRC != SrcRC;
299  }
300  // Check our invariants
301  assert(TargetRegisterInfo::isVirtualRegister(Src) && "Src must be virtual");
302  assert(!(TargetRegisterInfo::isPhysicalRegister(Dst) && DstSub) &&
303         "Cannot have a physical SubIdx");
304  SrcReg = Src;
305  DstReg = Dst;
306  return true;
307}
308
309bool CoalescerPair::flip() {
310  if (TargetRegisterInfo::isPhysicalRegister(DstReg))
311    return false;
312  std::swap(SrcReg, DstReg);
313  std::swap(SrcIdx, DstIdx);
314  Flipped = !Flipped;
315  return true;
316}
317
318bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
319  if (!MI)
320    return false;
321  unsigned Src, Dst, SrcSub, DstSub;
322  if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
323    return false;
324
325  // Find the virtual register that is SrcReg.
326  if (Dst == SrcReg) {
327    std::swap(Src, Dst);
328    std::swap(SrcSub, DstSub);
329  } else if (Src != SrcReg) {
330    return false;
331  }
332
333  // Now check that Dst matches DstReg.
334  if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
335    if (!TargetRegisterInfo::isPhysicalRegister(Dst))
336      return false;
337    assert(!DstIdx && !SrcIdx && "Inconsistent CoalescerPair state.");
338    // DstSub could be set for a physreg from INSERT_SUBREG.
339    if (DstSub)
340      Dst = TRI.getSubReg(Dst, DstSub);
341    // Full copy of Src.
342    if (!SrcSub)
343      return DstReg == Dst;
344    // This is a partial register copy. Check that the parts match.
345    return TRI.getSubReg(DstReg, SrcSub) == Dst;
346  } else {
347    // DstReg is virtual.
348    if (DstReg != Dst)
349      return false;
350    // Registers match, do the subregisters line up?
351    return TRI.composeSubRegIndices(SrcIdx, SrcSub) ==
352           TRI.composeSubRegIndices(DstIdx, DstSub);
353  }
354}
355
356void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const {
357  AU.setPreservesCFG();
358  AU.addRequired<AliasAnalysis>();
359  AU.addRequired<LiveIntervals>();
360  AU.addPreserved<LiveIntervals>();
361  AU.addRequired<LiveDebugVariables>();
362  AU.addPreserved<LiveDebugVariables>();
363  AU.addPreserved<SlotIndexes>();
364  AU.addRequired<MachineLoopInfo>();
365  AU.addPreserved<MachineLoopInfo>();
366  AU.addPreservedID(MachineDominatorsID);
367  MachineFunctionPass::getAnalysisUsage(AU);
368}
369
370void RegisterCoalescer::eliminateDeadDefs() {
371  SmallVector<LiveInterval*, 8> NewRegs;
372  LiveRangeEdit(0, NewRegs, *MF, *LIS, 0, this).eliminateDeadDefs(DeadDefs);
373}
374
375// Callback from eliminateDeadDefs().
376void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) {
377  // MI may be in WorkList. Make sure we don't visit it.
378  ErasedInstrs.insert(MI);
379}
380
381/// adjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA
382/// being the source and IntB being the dest, thus this defines a value number
383/// in IntB.  If the source value number (in IntA) is defined by a copy from B,
384/// see if we can merge these two pieces of B into a single value number,
385/// eliminating a copy.  For example:
386///
387///  A3 = B0
388///    ...
389///  B1 = A3      <- this copy
390///
391/// In this case, B0 can be extended to where the B1 copy lives, allowing the B1
392/// value number to be replaced with B0 (which simplifies the B liveinterval).
393///
394/// This returns true if an interval was modified.
395///
396bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
397                                             MachineInstr *CopyMI) {
398  assert(!CP.isPartial() && "This doesn't work for partial copies.");
399  assert(!CP.isPhys() && "This doesn't work for physreg copies.");
400
401  LiveInterval &IntA =
402    LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
403  LiveInterval &IntB =
404    LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
405  SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot();
406
407  // BValNo is a value number in B that is defined by a copy from A.  'B3' in
408  // the example above.
409  LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx);
410  if (BLR == IntB.end()) return false;
411  VNInfo *BValNo = BLR->valno;
412
413  // Get the location that B is defined at.  Two options: either this value has
414  // an unknown definition point or it is defined at CopyIdx.  If unknown, we
415  // can't process it.
416  if (BValNo->def != CopyIdx) return false;
417
418  // AValNo is the value number in A that defines the copy, A3 in the example.
419  SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true);
420  LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx);
421  // The live range might not exist after fun with physreg coalescing.
422  if (ALR == IntA.end()) return false;
423  VNInfo *AValNo = ALR->valno;
424
425  // If AValNo is defined as a copy from IntB, we can potentially process this.
426  // Get the instruction that defines this value number.
427  MachineInstr *ACopyMI = LIS->getInstructionFromIndex(AValNo->def);
428  // Don't allow any partial copies, even if isCoalescable() allows them.
429  if (!CP.isCoalescable(ACopyMI) || !ACopyMI->isFullCopy())
430    return false;
431
432  // Get the LiveRange in IntB that this value number starts with.
433  LiveInterval::iterator ValLR =
434    IntB.FindLiveRangeContaining(AValNo->def.getPrevSlot());
435  if (ValLR == IntB.end())
436    return false;
437
438  // Make sure that the end of the live range is inside the same block as
439  // CopyMI.
440  MachineInstr *ValLREndInst =
441    LIS->getInstructionFromIndex(ValLR->end.getPrevSlot());
442  if (!ValLREndInst || ValLREndInst->getParent() != CopyMI->getParent())
443    return false;
444
445  // Okay, we now know that ValLR ends in the same block that the CopyMI
446  // live-range starts.  If there are no intervening live ranges between them in
447  // IntB, we can merge them.
448  if (ValLR+1 != BLR) return false;
449
450  DEBUG(dbgs() << "Extending: " << PrintReg(IntB.reg, TRI));
451
452  SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start;
453  // We are about to delete CopyMI, so need to remove it as the 'instruction
454  // that defines this value #'. Update the valnum with the new defining
455  // instruction #.
456  BValNo->def = FillerStart;
457
458  // Okay, we can merge them.  We need to insert a new liverange:
459  // [ValLR.end, BLR.begin) of either value number, then we merge the
460  // two value numbers.
461  IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo));
462
463  // Okay, merge "B1" into the same value number as "B0".
464  if (BValNo != ValLR->valno)
465    IntB.MergeValueNumberInto(BValNo, ValLR->valno);
466  DEBUG(dbgs() << "   result = " << IntB << '\n');
467
468  // If the source instruction was killing the source register before the
469  // merge, unset the isKill marker given the live range has been extended.
470  int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true);
471  if (UIdx != -1) {
472    ValLREndInst->getOperand(UIdx).setIsKill(false);
473  }
474
475  // Rewrite the copy. If the copy instruction was killing the destination
476  // register before the merge, find the last use and trim the live range. That
477  // will also add the isKill marker.
478  CopyMI->substituteRegister(IntA.reg, IntB.reg, 0, *TRI);
479  if (ALR->end == CopyIdx)
480    LIS->shrinkToUses(&IntA);
481
482  ++numExtends;
483  return true;
484}
485
486/// hasOtherReachingDefs - Return true if there are definitions of IntB
487/// other than BValNo val# that can reach uses of AValno val# of IntA.
488bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
489                                             LiveInterval &IntB,
490                                             VNInfo *AValNo,
491                                             VNInfo *BValNo) {
492  // If AValNo has PHI kills, conservatively assume that IntB defs can reach
493  // the PHI values.
494  if (LIS->hasPHIKill(IntA, AValNo))
495    return true;
496
497  for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
498       AI != AE; ++AI) {
499    if (AI->valno != AValNo) continue;
500    LiveInterval::Ranges::iterator BI =
501      std::upper_bound(IntB.ranges.begin(), IntB.ranges.end(), AI->start);
502    if (BI != IntB.ranges.begin())
503      --BI;
504    for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) {
505      if (BI->valno == BValNo)
506        continue;
507      if (BI->start <= AI->start && BI->end > AI->start)
508        return true;
509      if (BI->start > AI->start && BI->start < AI->end)
510        return true;
511    }
512  }
513  return false;
514}
515
516/// removeCopyByCommutingDef - We found a non-trivially-coalescable copy with
517/// IntA being the source and IntB being the dest, thus this defines a value
518/// number in IntB.  If the source value number (in IntA) is defined by a
519/// commutable instruction and its other operand is coalesced to the copy dest
520/// register, see if we can transform the copy into a noop by commuting the
521/// definition. For example,
522///
523///  A3 = op A2 B0<kill>
524///    ...
525///  B1 = A3      <- this copy
526///    ...
527///     = op A3   <- more uses
528///
529/// ==>
530///
531///  B2 = op B0 A2<kill>
532///    ...
533///  B1 = B2      <- now an identify copy
534///    ...
535///     = op B2   <- more uses
536///
537/// This returns true if an interval was modified.
538///
539bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
540                                                 MachineInstr *CopyMI) {
541  assert (!CP.isPhys());
542
543  SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot();
544
545  LiveInterval &IntA =
546    LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
547  LiveInterval &IntB =
548    LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
549
550  // BValNo is a value number in B that is defined by a copy from A. 'B3' in
551  // the example above.
552  VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx);
553  if (!BValNo || BValNo->def != CopyIdx)
554    return false;
555
556  assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
557
558  // AValNo is the value number in A that defines the copy, A3 in the example.
559  VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true));
560  assert(AValNo && "COPY source not live");
561  if (AValNo->isPHIDef() || AValNo->isUnused())
562    return false;
563  MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def);
564  if (!DefMI)
565    return false;
566  if (!DefMI->isCommutable())
567    return false;
568  // If DefMI is a two-address instruction then commuting it will change the
569  // destination register.
570  int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg);
571  assert(DefIdx != -1);
572  unsigned UseOpIdx;
573  if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx))
574    return false;
575  unsigned Op1, Op2, NewDstIdx;
576  if (!TII->findCommutedOpIndices(DefMI, Op1, Op2))
577    return false;
578  if (Op1 == UseOpIdx)
579    NewDstIdx = Op2;
580  else if (Op2 == UseOpIdx)
581    NewDstIdx = Op1;
582  else
583    return false;
584
585  MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
586  unsigned NewReg = NewDstMO.getReg();
587  if (NewReg != IntB.reg || !LiveRangeQuery(IntB, AValNo->def).isKill())
588    return false;
589
590  // Make sure there are no other definitions of IntB that would reach the
591  // uses which the new definition can reach.
592  if (hasOtherReachingDefs(IntA, IntB, AValNo, BValNo))
593    return false;
594
595  // If some of the uses of IntA.reg is already coalesced away, return false.
596  // It's not possible to determine whether it's safe to perform the coalescing.
597  for (MachineRegisterInfo::use_nodbg_iterator UI =
598         MRI->use_nodbg_begin(IntA.reg),
599       UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
600    MachineInstr *UseMI = &*UI;
601    SlotIndex UseIdx = LIS->getInstructionIndex(UseMI);
602    LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
603    if (ULR == IntA.end() || ULR->valno != AValNo)
604      continue;
605    // If this use is tied to a def, we can't rewrite the register.
606    if (UseMI->isRegTiedToDefOperand(UI.getOperandNo()))
607      return false;
608  }
609
610  DEBUG(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t'
611               << *DefMI);
612
613  // At this point we have decided that it is legal to do this
614  // transformation.  Start by commuting the instruction.
615  MachineBasicBlock *MBB = DefMI->getParent();
616  MachineInstr *NewMI = TII->commuteInstruction(DefMI);
617  if (!NewMI)
618    return false;
619  if (TargetRegisterInfo::isVirtualRegister(IntA.reg) &&
620      TargetRegisterInfo::isVirtualRegister(IntB.reg) &&
621      !MRI->constrainRegClass(IntB.reg, MRI->getRegClass(IntA.reg)))
622    return false;
623  if (NewMI != DefMI) {
624    LIS->ReplaceMachineInstrInMaps(DefMI, NewMI);
625    MachineBasicBlock::iterator Pos = DefMI;
626    MBB->insert(Pos, NewMI);
627    MBB->erase(DefMI);
628  }
629  unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false);
630  NewMI->getOperand(OpIdx).setIsKill();
631
632  // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
633  // A = or A, B
634  // ...
635  // B = A
636  // ...
637  // C = A<kill>
638  // ...
639  //   = B
640
641  // Update uses of IntA of the specific Val# with IntB.
642  for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg),
643         UE = MRI->use_end(); UI != UE;) {
644    MachineOperand &UseMO = UI.getOperand();
645    MachineInstr *UseMI = &*UI;
646    ++UI;
647    if (UseMI->isDebugValue()) {
648      // FIXME These don't have an instruction index.  Not clear we have enough
649      // info to decide whether to do this replacement or not.  For now do it.
650      UseMO.setReg(NewReg);
651      continue;
652    }
653    SlotIndex UseIdx = LIS->getInstructionIndex(UseMI).getRegSlot(true);
654    LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx);
655    if (ULR == IntA.end() || ULR->valno != AValNo)
656      continue;
657    // Kill flags are no longer accurate. They are recomputed after RA.
658    UseMO.setIsKill(false);
659    if (TargetRegisterInfo::isPhysicalRegister(NewReg))
660      UseMO.substPhysReg(NewReg, *TRI);
661    else
662      UseMO.setReg(NewReg);
663    if (UseMI == CopyMI)
664      continue;
665    if (!UseMI->isCopy())
666      continue;
667    if (UseMI->getOperand(0).getReg() != IntB.reg ||
668        UseMI->getOperand(0).getSubReg())
669      continue;
670
671    // This copy will become a noop. If it's defining a new val#, merge it into
672    // BValNo.
673    SlotIndex DefIdx = UseIdx.getRegSlot();
674    VNInfo *DVNI = IntB.getVNInfoAt(DefIdx);
675    if (!DVNI)
676      continue;
677    DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI);
678    assert(DVNI->def == DefIdx);
679    BValNo = IntB.MergeValueNumberInto(BValNo, DVNI);
680    ErasedInstrs.insert(UseMI);
681    LIS->RemoveMachineInstrFromMaps(UseMI);
682    UseMI->eraseFromParent();
683  }
684
685  // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition
686  // is updated.
687  VNInfo *ValNo = BValNo;
688  ValNo->def = AValNo->def;
689  for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
690       AI != AE; ++AI) {
691    if (AI->valno != AValNo) continue;
692    IntB.addRange(LiveRange(AI->start, AI->end, ValNo));
693  }
694  DEBUG(dbgs() << "\t\textended: " << IntB << '\n');
695
696  IntA.removeValNo(AValNo);
697  DEBUG(dbgs() << "\t\ttrimmed:  " << IntA << '\n');
698  ++numCommutes;
699  return true;
700}
701
702/// reMaterializeTrivialDef - If the source of a copy is defined by a trivial
703/// computation, replace the copy by rematerialize the definition.
704bool RegisterCoalescer::reMaterializeTrivialDef(LiveInterval &SrcInt,
705                                                unsigned DstReg,
706                                                MachineInstr *CopyMI) {
707  SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true);
708  LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx);
709  assert(SrcLR != SrcInt.end() && "Live range not found!");
710  VNInfo *ValNo = SrcLR->valno;
711  if (ValNo->isPHIDef() || ValNo->isUnused())
712    return false;
713  MachineInstr *DefMI = LIS->getInstructionFromIndex(ValNo->def);
714  if (!DefMI)
715    return false;
716  assert(DefMI && "Defining instruction disappeared");
717  if (!DefMI->isAsCheapAsAMove())
718    return false;
719  if (!TII->isTriviallyReMaterializable(DefMI, AA))
720    return false;
721  bool SawStore = false;
722  if (!DefMI->isSafeToMove(TII, AA, SawStore))
723    return false;
724  const MCInstrDesc &MCID = DefMI->getDesc();
725  if (MCID.getNumDefs() != 1)
726    return false;
727  if (!DefMI->isImplicitDef()) {
728    // Make sure the copy destination register class fits the instruction
729    // definition register class. The mismatch can happen as a result of earlier
730    // extract_subreg, insert_subreg, subreg_to_reg coalescing.
731    const TargetRegisterClass *RC = TII->getRegClass(MCID, 0, TRI, *MF);
732    if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
733      if (MRI->getRegClass(DstReg) != RC)
734        return false;
735    } else if (!RC->contains(DstReg))
736      return false;
737  }
738
739  MachineBasicBlock *MBB = CopyMI->getParent();
740  MachineBasicBlock::iterator MII =
741    llvm::next(MachineBasicBlock::iterator(CopyMI));
742  TII->reMaterialize(*MBB, MII, DstReg, 0, DefMI, *TRI);
743  MachineInstr *NewMI = prior(MII);
744
745  // NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86).
746  // We need to remember these so we can add intervals once we insert
747  // NewMI into SlotIndexes.
748  SmallVector<unsigned, 4> NewMIImplDefs;
749  for (unsigned i = NewMI->getDesc().getNumOperands(),
750         e = NewMI->getNumOperands(); i != e; ++i) {
751    MachineOperand &MO = NewMI->getOperand(i);
752    if (MO.isReg()) {
753      assert(MO.isDef() && MO.isImplicit() && MO.isDead() &&
754             TargetRegisterInfo::isPhysicalRegister(MO.getReg()));
755      NewMIImplDefs.push_back(MO.getReg());
756    }
757  }
758
759  // CopyMI may have implicit operands, transfer them over to the newly
760  // rematerialized instruction. And update implicit def interval valnos.
761  for (unsigned i = CopyMI->getDesc().getNumOperands(),
762         e = CopyMI->getNumOperands(); i != e; ++i) {
763    MachineOperand &MO = CopyMI->getOperand(i);
764    if (MO.isReg()) {
765      assert(MO.isImplicit() && "No explicit operands after implict operands.");
766      // Discard VReg implicit defs.
767      if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
768        NewMI->addOperand(MO);
769      }
770    }
771  }
772
773  LIS->ReplaceMachineInstrInMaps(CopyMI, NewMI);
774
775  SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
776  for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) {
777    unsigned Reg = NewMIImplDefs[i];
778    for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
779      if (LiveInterval *LI = LIS->getCachedRegUnit(*Units))
780        LI->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
781  }
782
783  CopyMI->eraseFromParent();
784  ErasedInstrs.insert(CopyMI);
785  DEBUG(dbgs() << "Remat: " << *NewMI);
786  ++NumReMats;
787
788  // The source interval can become smaller because we removed a use.
789  LIS->shrinkToUses(&SrcInt, &DeadDefs);
790  if (!DeadDefs.empty())
791    eliminateDeadDefs();
792
793  return true;
794}
795
796/// eliminateUndefCopy - ProcessImpicitDefs may leave some copies of <undef>
797/// values, it only removes local variables. When we have a copy like:
798///
799///   %vreg1 = COPY %vreg2<undef>
800///
801/// We delete the copy and remove the corresponding value number from %vreg1.
802/// Any uses of that value number are marked as <undef>.
803bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI,
804                                           const CoalescerPair &CP) {
805  SlotIndex Idx = LIS->getInstructionIndex(CopyMI);
806  LiveInterval *SrcInt = &LIS->getInterval(CP.getSrcReg());
807  if (SrcInt->liveAt(Idx))
808    return false;
809  LiveInterval *DstInt = &LIS->getInterval(CP.getDstReg());
810  if (DstInt->liveAt(Idx))
811    return false;
812
813  // No intervals are live-in to CopyMI - it is undef.
814  if (CP.isFlipped())
815    DstInt = SrcInt;
816  SrcInt = 0;
817
818  VNInfo *DeadVNI = DstInt->getVNInfoAt(Idx.getRegSlot());
819  assert(DeadVNI && "No value defined in DstInt");
820  DstInt->removeValNo(DeadVNI);
821
822  // Find new undef uses.
823  for (MachineRegisterInfo::reg_nodbg_iterator
824         I = MRI->reg_nodbg_begin(DstInt->reg), E = MRI->reg_nodbg_end();
825       I != E; ++I) {
826    MachineOperand &MO = I.getOperand();
827    if (MO.isDef() || MO.isUndef())
828      continue;
829    MachineInstr *MI = MO.getParent();
830    SlotIndex Idx = LIS->getInstructionIndex(MI);
831    if (DstInt->liveAt(Idx))
832      continue;
833    MO.setIsUndef(true);
834    DEBUG(dbgs() << "\tnew undef: " << Idx << '\t' << *MI);
835  }
836  return true;
837}
838
839/// updateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and
840/// update the subregister number if it is not zero. If DstReg is a
841/// physical register and the existing subregister number of the def / use
842/// being updated is not zero, make sure to set it to the correct physical
843/// subregister.
844void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
845                                          unsigned DstReg,
846                                          unsigned SubIdx) {
847  bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
848  LiveInterval *DstInt = DstIsPhys ? 0 : &LIS->getInterval(DstReg);
849
850  // Update LiveDebugVariables.
851  LDV->renameRegister(SrcReg, DstReg, SubIdx);
852
853  SmallPtrSet<MachineInstr*, 8> Visited;
854  for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(SrcReg);
855       MachineInstr *UseMI = I.skipInstruction();) {
856    // Each instruction can only be rewritten once because sub-register
857    // composition is not always idempotent. When SrcReg != DstReg, rewriting
858    // the UseMI operands removes them from the SrcReg use-def chain, but when
859    // SrcReg is DstReg we could encounter UseMI twice if it has multiple
860    // operands mentioning the virtual register.
861    if (SrcReg == DstReg && !Visited.insert(UseMI))
862      continue;
863
864    SmallVector<unsigned,8> Ops;
865    bool Reads, Writes;
866    tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
867
868    // If SrcReg wasn't read, it may still be the case that DstReg is live-in
869    // because SrcReg is a sub-register.
870    if (DstInt && !Reads && SubIdx)
871      Reads = DstInt->liveAt(LIS->getInstructionIndex(UseMI));
872
873    // Replace SrcReg with DstReg in all UseMI operands.
874    for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
875      MachineOperand &MO = UseMI->getOperand(Ops[i]);
876
877      // Adjust <undef> flags in case of sub-register joins. We don't want to
878      // turn a full def into a read-modify-write sub-register def and vice
879      // versa.
880      if (SubIdx && MO.isDef())
881        MO.setIsUndef(!Reads);
882
883      if (DstIsPhys)
884        MO.substPhysReg(DstReg, *TRI);
885      else
886        MO.substVirtReg(DstReg, SubIdx, *TRI);
887    }
888
889    DEBUG({
890        dbgs() << "\t\tupdated: ";
891        if (!UseMI->isDebugValue())
892          dbgs() << LIS->getInstructionIndex(UseMI) << "\t";
893        dbgs() << *UseMI;
894      });
895  }
896}
897
898/// canJoinPhys - Return true if a copy involving a physreg should be joined.
899bool RegisterCoalescer::canJoinPhys(CoalescerPair &CP) {
900  /// Always join simple intervals that are defined by a single copy from a
901  /// reserved register. This doesn't increase register pressure, so it is
902  /// always beneficial.
903  if (!MRI->isReserved(CP.getDstReg())) {
904    DEBUG(dbgs() << "\tCan only merge into reserved registers.\n");
905    return false;
906  }
907
908  LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg());
909  if (CP.isFlipped() && JoinVInt.containsOneValue())
910    return true;
911
912  DEBUG(dbgs() << "\tCannot join defs into reserved register.\n");
913  return false;
914}
915
916/// joinCopy - Attempt to join intervals corresponding to SrcReg/DstReg,
917/// which are the src/dst of the copy instruction CopyMI.  This returns true
918/// if the copy was successfully coalesced away. If it is not currently
919/// possible to coalesce this interval, but it may be possible if other
920/// things get coalesced, then it returns true by reference in 'Again'.
921bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
922
923  Again = false;
924  DEBUG(dbgs() << LIS->getInstructionIndex(CopyMI) << '\t' << *CopyMI);
925
926  CoalescerPair CP(*TRI);
927  if (!CP.setRegisters(CopyMI)) {
928    DEBUG(dbgs() << "\tNot coalescable.\n");
929    return false;
930  }
931
932  // Dead code elimination. This really should be handled by MachineDCE, but
933  // sometimes dead copies slip through, and we can't generate invalid live
934  // ranges.
935  if (!CP.isPhys() && CopyMI->allDefsAreDead()) {
936    DEBUG(dbgs() << "\tCopy is dead.\n");
937    DeadDefs.push_back(CopyMI);
938    eliminateDeadDefs();
939    return true;
940  }
941
942  // Eliminate undefs.
943  if (!CP.isPhys() && eliminateUndefCopy(CopyMI, CP)) {
944    DEBUG(dbgs() << "\tEliminated copy of <undef> value.\n");
945    LIS->RemoveMachineInstrFromMaps(CopyMI);
946    CopyMI->eraseFromParent();
947    return false;  // Not coalescable.
948  }
949
950  // Coalesced copies are normally removed immediately, but transformations
951  // like removeCopyByCommutingDef() can inadvertently create identity copies.
952  // When that happens, just join the values and remove the copy.
953  if (CP.getSrcReg() == CP.getDstReg()) {
954    LiveInterval &LI = LIS->getInterval(CP.getSrcReg());
955    DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n');
956    LiveRangeQuery LRQ(LI, LIS->getInstructionIndex(CopyMI));
957    if (VNInfo *DefVNI = LRQ.valueDefined()) {
958      VNInfo *ReadVNI = LRQ.valueIn();
959      assert(ReadVNI && "No value before copy and no <undef> flag.");
960      assert(ReadVNI != DefVNI && "Cannot read and define the same value.");
961      LI.MergeValueNumberInto(DefVNI, ReadVNI);
962      DEBUG(dbgs() << "\tMerged values:          " << LI << '\n');
963    }
964    LIS->RemoveMachineInstrFromMaps(CopyMI);
965    CopyMI->eraseFromParent();
966    return true;
967  }
968
969  // Enforce policies.
970  if (CP.isPhys()) {
971    DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), TRI)
972                 << " with " << PrintReg(CP.getDstReg(), TRI, CP.getSrcIdx())
973                 << '\n');
974    if (!canJoinPhys(CP)) {
975      // Before giving up coalescing, if definition of source is defined by
976      // trivial computation, try rematerializing it.
977      if (!CP.isFlipped() &&
978          reMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()),
979                                  CP.getDstReg(), CopyMI))
980        return true;
981      return false;
982    }
983  } else {
984    DEBUG({
985      dbgs() << "\tConsidering merging to " << CP.getNewRC()->getName()
986             << " with ";
987      if (CP.getDstIdx() && CP.getSrcIdx())
988        dbgs() << PrintReg(CP.getDstReg()) << " in "
989               << TRI->getSubRegIndexName(CP.getDstIdx()) << " and "
990               << PrintReg(CP.getSrcReg()) << " in "
991               << TRI->getSubRegIndexName(CP.getSrcIdx()) << '\n';
992      else
993        dbgs() << PrintReg(CP.getSrcReg(), TRI) << " in "
994               << PrintReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n';
995    });
996
997    // When possible, let DstReg be the larger interval.
998    if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).ranges.size() >
999                           LIS->getInterval(CP.getDstReg()).ranges.size())
1000      CP.flip();
1001  }
1002
1003  // Okay, attempt to join these two intervals.  On failure, this returns false.
1004  // Otherwise, if one of the intervals being joined is a physreg, this method
1005  // always canonicalizes DstInt to be it.  The output "SrcInt" will not have
1006  // been modified, so we can use this information below to update aliases.
1007  if (!joinIntervals(CP)) {
1008    // Coalescing failed.
1009
1010    // If definition of source is defined by trivial computation, try
1011    // rematerializing it.
1012    if (!CP.isFlipped() &&
1013        reMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()),
1014                                CP.getDstReg(), CopyMI))
1015      return true;
1016
1017    // If we can eliminate the copy without merging the live ranges, do so now.
1018    if (!CP.isPartial() && !CP.isPhys()) {
1019      if (adjustCopiesBackFrom(CP, CopyMI) ||
1020          removeCopyByCommutingDef(CP, CopyMI)) {
1021        LIS->RemoveMachineInstrFromMaps(CopyMI);
1022        CopyMI->eraseFromParent();
1023        DEBUG(dbgs() << "\tTrivial!\n");
1024        return true;
1025      }
1026    }
1027
1028    // Otherwise, we are unable to join the intervals.
1029    DEBUG(dbgs() << "\tInterference!\n");
1030    Again = true;  // May be possible to coalesce later.
1031    return false;
1032  }
1033
1034  // Coalescing to a virtual register that is of a sub-register class of the
1035  // other. Make sure the resulting register is set to the right register class.
1036  if (CP.isCrossClass()) {
1037    ++numCrossRCs;
1038    MRI->setRegClass(CP.getDstReg(), CP.getNewRC());
1039  }
1040
1041  // Removing sub-register copies can ease the register class constraints.
1042  // Make sure we attempt to inflate the register class of DstReg.
1043  if (!CP.isPhys() && RegClassInfo.isProperSubClass(CP.getNewRC()))
1044    InflateRegs.push_back(CP.getDstReg());
1045
1046  // CopyMI has been erased by joinIntervals at this point. Remove it from
1047  // ErasedInstrs since copyCoalesceWorkList() won't add a successful join back
1048  // to the work list. This keeps ErasedInstrs from growing needlessly.
1049  ErasedInstrs.erase(CopyMI);
1050
1051  // Rewrite all SrcReg operands to DstReg.
1052  // Also update DstReg operands to include DstIdx if it is set.
1053  if (CP.getDstIdx())
1054    updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
1055  updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
1056
1057  // SrcReg is guaranteed to be the register whose live interval that is
1058  // being merged.
1059  LIS->removeInterval(CP.getSrcReg());
1060
1061  // Update regalloc hint.
1062  TRI->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF);
1063
1064  DEBUG({
1065    dbgs() << "\tJoined. Result = " << PrintReg(CP.getDstReg(), TRI);
1066    if (!CP.isPhys())
1067      dbgs() << LIS->getInterval(CP.getDstReg());
1068     dbgs() << '\n';
1069  });
1070
1071  ++numJoins;
1072  return true;
1073}
1074
1075/// Attempt joining with a reserved physreg.
1076bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
1077  assert(CP.isPhys() && "Must be a physreg copy");
1078  assert(MRI->isReserved(CP.getDstReg()) && "Not a reserved register");
1079  LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
1080  DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS
1081               << '\n');
1082
1083  assert(CP.isFlipped() && RHS.containsOneValue() &&
1084         "Invalid join with reserved register");
1085
1086  // Optimization for reserved registers like ESP. We can only merge with a
1087  // reserved physreg if RHS has a single value that is a copy of CP.DstReg().
1088  // The live range of the reserved register will look like a set of dead defs
1089  // - we don't properly track the live range of reserved registers.
1090
1091  // Deny any overlapping intervals.  This depends on all the reserved
1092  // register live ranges to look like dead defs.
1093  for (MCRegUnitIterator UI(CP.getDstReg(), TRI); UI.isValid(); ++UI)
1094    if (RHS.overlaps(LIS->getRegUnit(*UI))) {
1095      DEBUG(dbgs() << "\t\tInterference: " << PrintRegUnit(*UI, TRI) << '\n');
1096      return false;
1097    }
1098
1099  // Skip any value computations, we are not adding new values to the
1100  // reserved register.  Also skip merging the live ranges, the reserved
1101  // register live range doesn't need to be accurate as long as all the
1102  // defs are there.
1103
1104  // Delete the identity copy.
1105  MachineInstr *CopyMI = MRI->getVRegDef(RHS.reg);
1106  LIS->RemoveMachineInstrFromMaps(CopyMI);
1107  CopyMI->eraseFromParent();
1108
1109  // We don't track kills for reserved registers.
1110  MRI->clearKillFlags(CP.getSrcReg());
1111
1112  return true;
1113}
1114
1115//===----------------------------------------------------------------------===//
1116//                 Interference checking and interval joining
1117//===----------------------------------------------------------------------===//
1118//
1119// In the easiest case, the two live ranges being joined are disjoint, and
1120// there is no interference to consider. It is quite common, though, to have
1121// overlapping live ranges, and we need to check if the interference can be
1122// resolved.
1123//
1124// The live range of a single SSA value forms a sub-tree of the dominator tree.
1125// This means that two SSA values overlap if and only if the def of one value
1126// is contained in the live range of the other value. As a special case, the
1127// overlapping values can be defined at the same index.
1128//
1129// The interference from an overlapping def can be resolved in these cases:
1130//
1131// 1. Coalescable copies. The value is defined by a copy that would become an
1132//    identity copy after joining SrcReg and DstReg. The copy instruction will
1133//    be removed, and the value will be merged with the source value.
1134//
1135//    There can be several copies back and forth, causing many values to be
1136//    merged into one. We compute a list of ultimate values in the joined live
1137//    range as well as a mappings from the old value numbers.
1138//
1139// 2. IMPLICIT_DEF. This instruction is only inserted to ensure all PHI
1140//    predecessors have a live out value. It doesn't cause real interference,
1141//    and can be merged into the value it overlaps. Like a coalescable copy, it
1142//    can be erased after joining.
1143//
1144// 3. Copy of external value. The overlapping def may be a copy of a value that
1145//    is already in the other register. This is like a coalescable copy, but
1146//    the live range of the source register must be trimmed after erasing the
1147//    copy instruction:
1148//
1149//      %src = COPY %ext
1150//      %dst = COPY %ext  <-- Remove this COPY, trim the live range of %ext.
1151//
1152// 4. Clobbering undefined lanes. Vector registers are sometimes built by
1153//    defining one lane at a time:
1154//
1155//      %dst:ssub0<def,read-undef> = FOO
1156//      %src = BAR
1157//      %dst:ssub1<def> = COPY %src
1158//
1159//    The live range of %src overlaps the %dst value defined by FOO, but
1160//    merging %src into %dst:ssub1 is only going to clobber the ssub1 lane
1161//    which was undef anyway.
1162//
1163//    The value mapping is more complicated in this case. The final live range
1164//    will have different value numbers for both FOO and BAR, but there is no
1165//    simple mapping from old to new values. It may even be necessary to add
1166//    new PHI values.
1167//
1168// 5. Clobbering dead lanes. A def may clobber a lane of a vector register that
1169//    is live, but never read. This can happen because we don't compute
1170//    individual live ranges per lane.
1171//
1172//      %dst<def> = FOO
1173//      %src = BAR
1174//      %dst:ssub1<def> = COPY %src
1175//
1176//    This kind of interference is only resolved locally. If the clobbered
1177//    lane value escapes the block, the join is aborted.
1178
1179namespace {
1180/// Track information about values in a single virtual register about to be
1181/// joined. Objects of this class are always created in pairs - one for each
1182/// side of the CoalescerPair.
1183class JoinVals {
1184  LiveInterval &LI;
1185
1186  // Location of this register in the final joined register.
1187  // Either CP.DstIdx or CP.SrcIdx.
1188  unsigned SubIdx;
1189
1190  // Values that will be present in the final live range.
1191  SmallVectorImpl<VNInfo*> &NewVNInfo;
1192
1193  const CoalescerPair &CP;
1194  LiveIntervals *LIS;
1195  SlotIndexes *Indexes;
1196  const TargetRegisterInfo *TRI;
1197
1198  // Value number assignments. Maps value numbers in LI to entries in NewVNInfo.
1199  // This is suitable for passing to LiveInterval::join().
1200  SmallVector<int, 8> Assignments;
1201
1202  // Conflict resolution for overlapping values.
1203  enum ConflictResolution {
1204    // No overlap, simply keep this value.
1205    CR_Keep,
1206
1207    // Merge this value into OtherVNI and erase the defining instruction.
1208    // Used for IMPLICIT_DEF, coalescable copies, and copies from external
1209    // values.
1210    CR_Erase,
1211
1212    // Merge this value into OtherVNI but keep the defining instruction.
1213    // This is for the special case where OtherVNI is defined by the same
1214    // instruction.
1215    CR_Merge,
1216
1217    // Keep this value, and have it replace OtherVNI where possible. This
1218    // complicates value mapping since OtherVNI maps to two different values
1219    // before and after this def.
1220    // Used when clobbering undefined or dead lanes.
1221    CR_Replace,
1222
1223    // Unresolved conflict. Visit later when all values have been mapped.
1224    CR_Unresolved,
1225
1226    // Unresolvable conflict. Abort the join.
1227    CR_Impossible
1228  };
1229
1230  // Per-value info for LI. The lane bit masks are all relative to the final
1231  // joined register, so they can be compared directly between SrcReg and
1232  // DstReg.
1233  struct Val {
1234    ConflictResolution Resolution;
1235
1236    // Lanes written by this def, 0 for unanalyzed values.
1237    unsigned WriteLanes;
1238
1239    // Lanes with defined values in this register. Other lanes are undef and
1240    // safe to clobber.
1241    unsigned ValidLanes;
1242
1243    // Value in LI being redefined by this def.
1244    VNInfo *RedefVNI;
1245
1246    // Value in the other live range that overlaps this def, if any.
1247    VNInfo *OtherVNI;
1248
1249    // Is this value an IMPLICIT_DEF?
1250    bool IsImplicitDef;
1251
1252    // True when the live range of this value will be pruned because of an
1253    // overlapping CR_Replace value in the other live range.
1254    bool Pruned;
1255
1256    // True once Pruned above has been computed.
1257    bool PrunedComputed;
1258
1259    Val() : Resolution(CR_Keep), WriteLanes(0), ValidLanes(0),
1260            RedefVNI(0), OtherVNI(0), IsImplicitDef(false), Pruned(false),
1261            PrunedComputed(false) {}
1262
1263    bool isAnalyzed() const { return WriteLanes != 0; }
1264  };
1265
1266  // One entry per value number in LI.
1267  SmallVector<Val, 8> Vals;
1268
1269  unsigned computeWriteLanes(const MachineInstr *DefMI, bool &Redef);
1270  VNInfo *stripCopies(VNInfo *VNI);
1271  ConflictResolution analyzeValue(unsigned ValNo, JoinVals &Other);
1272  void computeAssignment(unsigned ValNo, JoinVals &Other);
1273  bool taintExtent(unsigned, unsigned, JoinVals&,
1274                   SmallVectorImpl<std::pair<SlotIndex, unsigned> >&);
1275  bool usesLanes(MachineInstr *MI, unsigned, unsigned, unsigned);
1276  bool isPrunedValue(unsigned ValNo, JoinVals &Other);
1277
1278public:
1279  JoinVals(LiveInterval &li, unsigned subIdx,
1280           SmallVectorImpl<VNInfo*> &newVNInfo,
1281           const CoalescerPair &cp,
1282           LiveIntervals *lis,
1283           const TargetRegisterInfo *tri)
1284    : LI(li), SubIdx(subIdx), NewVNInfo(newVNInfo), CP(cp), LIS(lis),
1285      Indexes(LIS->getSlotIndexes()), TRI(tri),
1286      Assignments(LI.getNumValNums(), -1), Vals(LI.getNumValNums())
1287  {}
1288
1289  /// Analyze defs in LI and compute a value mapping in NewVNInfo.
1290  /// Returns false if any conflicts were impossible to resolve.
1291  bool mapValues(JoinVals &Other);
1292
1293  /// Try to resolve conflicts that require all values to be mapped.
1294  /// Returns false if any conflicts were impossible to resolve.
1295  bool resolveConflicts(JoinVals &Other);
1296
1297  /// Prune the live range of values in Other.LI where they would conflict with
1298  /// CR_Replace values in LI. Collect end points for restoring the live range
1299  /// after joining.
1300  void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints);
1301
1302  /// Erase any machine instructions that have been coalesced away.
1303  /// Add erased instructions to ErasedInstrs.
1304  /// Add foreign virtual registers to ShrinkRegs if their live range ended at
1305  /// the erased instrs.
1306  void eraseInstrs(SmallPtrSet<MachineInstr*, 8> &ErasedInstrs,
1307                   SmallVectorImpl<unsigned> &ShrinkRegs);
1308
1309  /// Get the value assignments suitable for passing to LiveInterval::join.
1310  const int *getAssignments() const { return Assignments.data(); }
1311};
1312} // end anonymous namespace
1313
1314/// Compute the bitmask of lanes actually written by DefMI.
1315/// Set Redef if there are any partial register definitions that depend on the
1316/// previous value of the register.
1317unsigned JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) {
1318  unsigned L = 0;
1319  for (ConstMIOperands MO(DefMI); MO.isValid(); ++MO) {
1320    if (!MO->isReg() || MO->getReg() != LI.reg || !MO->isDef())
1321      continue;
1322    L |= TRI->getSubRegIndexLaneMask(
1323           TRI->composeSubRegIndices(SubIdx, MO->getSubReg()));
1324    if (MO->readsReg())
1325      Redef = true;
1326  }
1327  return L;
1328}
1329
1330/// Find the ultimate value that VNI was copied from.
1331VNInfo *JoinVals::stripCopies(VNInfo *VNI) {
1332  while (!VNI->isPHIDef()) {
1333    MachineInstr *MI = Indexes->getInstructionFromIndex(VNI->def);
1334    assert(MI && "No defining instruction");
1335    if (!MI->isFullCopy())
1336      break;
1337    unsigned Reg = MI->getOperand(1).getReg();
1338    if (!TargetRegisterInfo::isVirtualRegister(Reg))
1339      break;
1340    LiveRangeQuery LRQ(LIS->getInterval(Reg), VNI->def);
1341    if (!LRQ.valueIn())
1342      break;
1343    VNI = LRQ.valueIn();
1344  }
1345  return VNI;
1346}
1347
1348/// Analyze ValNo in this live range, and set all fields of Vals[ValNo].
1349/// Return a conflict resolution when possible, but leave the hard cases as
1350/// CR_Unresolved.
1351/// Recursively calls computeAssignment() on this and Other, guaranteeing that
1352/// both OtherVNI and RedefVNI have been analyzed and mapped before returning.
1353/// The recursion always goes upwards in the dominator tree, making loops
1354/// impossible.
1355JoinVals::ConflictResolution
1356JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
1357  Val &V = Vals[ValNo];
1358  assert(!V.isAnalyzed() && "Value has already been analyzed!");
1359  VNInfo *VNI = LI.getValNumInfo(ValNo);
1360  if (VNI->isUnused()) {
1361    V.WriteLanes = ~0u;
1362    return CR_Keep;
1363  }
1364
1365  // Get the instruction defining this value, compute the lanes written.
1366  const MachineInstr *DefMI = 0;
1367  if (VNI->isPHIDef()) {
1368    // Conservatively assume that all lanes in a PHI are valid.
1369    V.ValidLanes = V.WriteLanes = TRI->getSubRegIndexLaneMask(SubIdx);
1370  } else {
1371    DefMI = Indexes->getInstructionFromIndex(VNI->def);
1372    bool Redef = false;
1373    V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef);
1374
1375    // If this is a read-modify-write instruction, there may be more valid
1376    // lanes than the ones written by this instruction.
1377    // This only covers partial redef operands. DefMI may have normal use
1378    // operands reading the register. They don't contribute valid lanes.
1379    //
1380    // This adds ssub1 to the set of valid lanes in %src:
1381    //
1382    //   %src:ssub1<def> = FOO
1383    //
1384    // This leaves only ssub1 valid, making any other lanes undef:
1385    //
1386    //   %src:ssub1<def,read-undef> = FOO %src:ssub2
1387    //
1388    // The <read-undef> flag on the def operand means that old lane values are
1389    // not important.
1390    if (Redef) {
1391      V.RedefVNI = LiveRangeQuery(LI, VNI->def).valueIn();
1392      assert(V.RedefVNI && "Instruction is reading nonexistent value");
1393      computeAssignment(V.RedefVNI->id, Other);
1394      V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes;
1395    }
1396
1397    // An IMPLICIT_DEF writes undef values.
1398    if (DefMI->isImplicitDef()) {
1399      V.IsImplicitDef = true;
1400      V.ValidLanes &= ~V.WriteLanes;
1401    }
1402  }
1403
1404  // Find the value in Other that overlaps VNI->def, if any.
1405  LiveRangeQuery OtherLRQ(Other.LI, VNI->def);
1406
1407  // It is possible that both values are defined by the same instruction, or
1408  // the values are PHIs defined in the same block. When that happens, the two
1409  // values should be merged into one, but not into any preceding value.
1410  // The first value defined or visited gets CR_Keep, the other gets CR_Merge.
1411  if (VNInfo *OtherVNI = OtherLRQ.valueDefined()) {
1412    assert(SlotIndex::isSameInstr(VNI->def, OtherVNI->def) && "Broken LRQ");
1413
1414    // One value stays, the other is merged. Keep the earlier one, or the first
1415    // one we see.
1416    if (OtherVNI->def < VNI->def)
1417      Other.computeAssignment(OtherVNI->id, *this);
1418    else if (VNI->def < OtherVNI->def && OtherLRQ.valueIn()) {
1419      // This is an early-clobber def overlapping a live-in value in the other
1420      // register. Not mergeable.
1421      V.OtherVNI = OtherLRQ.valueIn();
1422      return CR_Impossible;
1423    }
1424    V.OtherVNI = OtherVNI;
1425    Val &OtherV = Other.Vals[OtherVNI->id];
1426    // Keep this value, check for conflicts when analyzing OtherVNI.
1427    if (!OtherV.isAnalyzed())
1428      return CR_Keep;
1429    // Both sides have been analyzed now.
1430    // Allow overlapping PHI values. Any real interference would show up in a
1431    // predecessor, the PHI itself can't introduce any conflicts.
1432    if (VNI->isPHIDef())
1433      return CR_Merge;
1434    if (V.ValidLanes & OtherV.ValidLanes)
1435      // Overlapping lanes can't be resolved.
1436      return CR_Impossible;
1437    else
1438      return CR_Merge;
1439  }
1440
1441  // No simultaneous def. Is Other live at the def?
1442  V.OtherVNI = OtherLRQ.valueIn();
1443  if (!V.OtherVNI)
1444    // No overlap, no conflict.
1445    return CR_Keep;
1446
1447  assert(!SlotIndex::isSameInstr(VNI->def, V.OtherVNI->def) && "Broken LRQ");
1448
1449  // We have overlapping values, or possibly a kill of Other.
1450  // Recursively compute assignments up the dominator tree.
1451  Other.computeAssignment(V.OtherVNI->id, *this);
1452  const Val &OtherV = Other.Vals[V.OtherVNI->id];
1453
1454  // Allow overlapping PHI values. Any real interference would show up in a
1455  // predecessor, the PHI itself can't introduce any conflicts.
1456  if (VNI->isPHIDef())
1457    return CR_Replace;
1458
1459  // Check for simple erasable conflicts.
1460  if (DefMI->isImplicitDef())
1461    return CR_Erase;
1462
1463  // Include the non-conflict where DefMI is a coalescable copy that kills
1464  // OtherVNI. We still want the copy erased and value numbers merged.
1465  if (CP.isCoalescable(DefMI)) {
1466    // Some of the lanes copied from OtherVNI may be undef, making them undef
1467    // here too.
1468    V.ValidLanes &= ~V.WriteLanes | OtherV.ValidLanes;
1469    return CR_Erase;
1470  }
1471
1472  // This may not be a real conflict if DefMI simply kills Other and defines
1473  // VNI.
1474  if (OtherLRQ.isKill() && OtherLRQ.endPoint() <= VNI->def)
1475    return CR_Keep;
1476
1477  // Handle the case where VNI and OtherVNI can be proven to be identical:
1478  //
1479  //   %other = COPY %ext
1480  //   %this  = COPY %ext <-- Erase this copy
1481  //
1482  if (DefMI->isFullCopy() && !CP.isPartial() &&
1483      stripCopies(VNI) == stripCopies(V.OtherVNI))
1484    return CR_Erase;
1485
1486  // If the lanes written by this instruction were all undef in OtherVNI, it is
1487  // still safe to join the live ranges. This can't be done with a simple value
1488  // mapping, though - OtherVNI will map to multiple values:
1489  //
1490  //   1 %dst:ssub0 = FOO                <-- OtherVNI
1491  //   2 %src = BAR                      <-- VNI
1492  //   3 %dst:ssub1 = COPY %src<kill>    <-- Eliminate this copy.
1493  //   4 BAZ %dst<kill>
1494  //   5 QUUX %src<kill>
1495  //
1496  // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace
1497  // handles this complex value mapping.
1498  if ((V.WriteLanes & OtherV.ValidLanes) == 0)
1499    return CR_Replace;
1500
1501  // If the other live range is killed by DefMI and the live ranges are still
1502  // overlapping, it must be because we're looking at an early clobber def:
1503  //
1504  //   %dst<def,early-clobber> = ASM %src<kill>
1505  //
1506  // In this case, it is illegal to merge the two live ranges since the early
1507  // clobber def would clobber %src before it was read.
1508  if (OtherLRQ.isKill()) {
1509    // This case where the def doesn't overlap the kill is handled above.
1510    assert(VNI->def.isEarlyClobber() &&
1511           "Only early clobber defs can overlap a kill");
1512    return CR_Impossible;
1513  }
1514
1515  // VNI is clobbering live lanes in OtherVNI, but there is still the
1516  // possibility that no instructions actually read the clobbered lanes.
1517  // If we're clobbering all the lanes in OtherVNI, at least one must be read.
1518  // Otherwise Other.LI wouldn't be live here.
1519  if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes) == 0)
1520    return CR_Impossible;
1521
1522  // We need to verify that no instructions are reading the clobbered lanes. To
1523  // save compile time, we'll only check that locally. Don't allow the tainted
1524  // value to escape the basic block.
1525  MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
1526  if (OtherLRQ.endPoint() >= Indexes->getMBBEndIdx(MBB))
1527    return CR_Impossible;
1528
1529  // There are still some things that could go wrong besides clobbered lanes
1530  // being read, for example OtherVNI may be only partially redefined in MBB,
1531  // and some clobbered lanes could escape the block. Save this analysis for
1532  // resolveConflicts() when all values have been mapped. We need to know
1533  // RedefVNI and WriteLanes for any later defs in MBB, and we can't compute
1534  // that now - the recursive analyzeValue() calls must go upwards in the
1535  // dominator tree.
1536  return CR_Unresolved;
1537}
1538
1539/// Compute the value assignment for ValNo in LI.
1540/// This may be called recursively by analyzeValue(), but never for a ValNo on
1541/// the stack.
1542void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) {
1543  Val &V = Vals[ValNo];
1544  if (V.isAnalyzed()) {
1545    // Recursion should always move up the dominator tree, so ValNo is not
1546    // supposed to reappear before it has been assigned.
1547    assert(Assignments[ValNo] != -1 && "Bad recursion?");
1548    return;
1549  }
1550  switch ((V.Resolution = analyzeValue(ValNo, Other))) {
1551  case CR_Erase:
1552  case CR_Merge:
1553    // Merge this ValNo into OtherVNI.
1554    assert(V.OtherVNI && "OtherVNI not assigned, can't merge.");
1555    assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion");
1556    Assignments[ValNo] = Other.Assignments[V.OtherVNI->id];
1557    DEBUG(dbgs() << "\t\tmerge " << PrintReg(LI.reg) << ':' << ValNo << '@'
1558                 << LI.getValNumInfo(ValNo)->def << " into "
1559                 << PrintReg(Other.LI.reg) << ':' << V.OtherVNI->id << '@'
1560                 << V.OtherVNI->def << " --> @"
1561                 << NewVNInfo[Assignments[ValNo]]->def << '\n');
1562    break;
1563  case CR_Replace:
1564  case CR_Unresolved:
1565    // The other value is going to be pruned if this join is successful.
1566    assert(V.OtherVNI && "OtherVNI not assigned, can't prune");
1567    Other.Vals[V.OtherVNI->id].Pruned = true;
1568    // Fall through.
1569  default:
1570    // This value number needs to go in the final joined live range.
1571    Assignments[ValNo] = NewVNInfo.size();
1572    NewVNInfo.push_back(LI.getValNumInfo(ValNo));
1573    break;
1574  }
1575}
1576
1577bool JoinVals::mapValues(JoinVals &Other) {
1578  for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) {
1579    computeAssignment(i, Other);
1580    if (Vals[i].Resolution == CR_Impossible) {
1581      DEBUG(dbgs() << "\t\tinterference at " << PrintReg(LI.reg) << ':' << i
1582                   << '@' << LI.getValNumInfo(i)->def << '\n');
1583      return false;
1584    }
1585  }
1586  return true;
1587}
1588
1589/// Assuming ValNo is going to clobber some valid lanes in Other.LI, compute
1590/// the extent of the tainted lanes in the block.
1591///
1592/// Multiple values in Other.LI can be affected since partial redefinitions can
1593/// preserve previously tainted lanes.
1594///
1595///   1 %dst = VLOAD           <-- Define all lanes in %dst
1596///   2 %src = FOO             <-- ValNo to be joined with %dst:ssub0
1597///   3 %dst:ssub1 = BAR       <-- Partial redef doesn't clear taint in ssub0
1598///   4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read
1599///
1600/// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes)
1601/// entry to TaintedVals.
1602///
1603/// Returns false if the tainted lanes extend beyond the basic block.
1604bool JoinVals::
1605taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other,
1606            SmallVectorImpl<std::pair<SlotIndex, unsigned> > &TaintExtent) {
1607  VNInfo *VNI = LI.getValNumInfo(ValNo);
1608  MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
1609  SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB);
1610
1611  // Scan Other.LI from VNI.def to MBBEnd.
1612  LiveInterval::iterator OtherI = Other.LI.find(VNI->def);
1613  assert(OtherI != Other.LI.end() && "No conflict?");
1614  do {
1615    // OtherI is pointing to a tainted value. Abort the join if the tainted
1616    // lanes escape the block.
1617    SlotIndex End = OtherI->end;
1618    if (End >= MBBEnd) {
1619      DEBUG(dbgs() << "\t\ttaints global " << PrintReg(Other.LI.reg) << ':'
1620                   << OtherI->valno->id << '@' << OtherI->start << '\n');
1621      return false;
1622    }
1623    DEBUG(dbgs() << "\t\ttaints local " << PrintReg(Other.LI.reg) << ':'
1624                 << OtherI->valno->id << '@' << OtherI->start
1625                 << " to " << End << '\n');
1626    // A dead def is not a problem.
1627    if (End.isDead())
1628      break;
1629    TaintExtent.push_back(std::make_pair(End, TaintedLanes));
1630
1631    // Check for another def in the MBB.
1632    if (++OtherI == Other.LI.end() || OtherI->start >= MBBEnd)
1633      break;
1634
1635    // Lanes written by the new def are no longer tainted.
1636    const Val &OV = Other.Vals[OtherI->valno->id];
1637    TaintedLanes &= ~OV.WriteLanes;
1638    if (!OV.RedefVNI)
1639      break;
1640  } while (TaintedLanes);
1641  return true;
1642}
1643
1644/// Return true if MI uses any of the given Lanes from Reg.
1645/// This does not include partial redefinitions of Reg.
1646bool JoinVals::usesLanes(MachineInstr *MI, unsigned Reg, unsigned SubIdx,
1647                         unsigned Lanes) {
1648  if (MI->isDebugValue())
1649    return false;
1650  for (ConstMIOperands MO(MI); MO.isValid(); ++MO) {
1651    if (!MO->isReg() || MO->isDef() || MO->getReg() != Reg)
1652      continue;
1653    if (!MO->readsReg())
1654      continue;
1655    if (Lanes & TRI->getSubRegIndexLaneMask(
1656                  TRI->composeSubRegIndices(SubIdx, MO->getSubReg())))
1657      return true;
1658  }
1659  return false;
1660}
1661
1662bool JoinVals::resolveConflicts(JoinVals &Other) {
1663  for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) {
1664    Val &V = Vals[i];
1665    assert (V.Resolution != CR_Impossible && "Unresolvable conflict");
1666    if (V.Resolution != CR_Unresolved)
1667      continue;
1668    DEBUG(dbgs() << "\t\tconflict at " << PrintReg(LI.reg) << ':' << i
1669                 << '@' << LI.getValNumInfo(i)->def << '\n');
1670    ++NumLaneConflicts;
1671    assert(V.OtherVNI && "Inconsistent conflict resolution.");
1672    VNInfo *VNI = LI.getValNumInfo(i);
1673    const Val &OtherV = Other.Vals[V.OtherVNI->id];
1674
1675    // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the
1676    // join, those lanes will be tainted with a wrong value. Get the extent of
1677    // the tainted lanes.
1678    unsigned TaintedLanes = V.WriteLanes & OtherV.ValidLanes;
1679    SmallVector<std::pair<SlotIndex, unsigned>, 8> TaintExtent;
1680    if (!taintExtent(i, TaintedLanes, Other, TaintExtent))
1681      // Tainted lanes would extend beyond the basic block.
1682      return false;
1683
1684    assert(!TaintExtent.empty() && "There should be at least one conflict.");
1685
1686    // Now look at the instructions from VNI->def to TaintExtent (inclusive).
1687    MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
1688    MachineBasicBlock::iterator MI = MBB->begin();
1689    if (!VNI->isPHIDef()) {
1690      MI = Indexes->getInstructionFromIndex(VNI->def);
1691      // No need to check the instruction defining VNI for reads.
1692      ++MI;
1693    }
1694    assert(!SlotIndex::isSameInstr(VNI->def, TaintExtent.front().first) &&
1695           "Interference ends on VNI->def. Should have been handled earlier");
1696    MachineInstr *LastMI =
1697      Indexes->getInstructionFromIndex(TaintExtent.front().first);
1698    assert(LastMI && "Range must end at a proper instruction");
1699    unsigned TaintNum = 0;
1700    for(;;) {
1701      assert(MI != MBB->end() && "Bad LastMI");
1702      if (usesLanes(MI, Other.LI.reg, Other.SubIdx, TaintedLanes)) {
1703        DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI);
1704        return false;
1705      }
1706      // LastMI is the last instruction to use the current value.
1707      if (&*MI == LastMI) {
1708        if (++TaintNum == TaintExtent.size())
1709          break;
1710        LastMI = Indexes->getInstructionFromIndex(TaintExtent[TaintNum].first);
1711        assert(LastMI && "Range must end at a proper instruction");
1712        TaintedLanes = TaintExtent[TaintNum].second;
1713      }
1714      ++MI;
1715    }
1716
1717    // The tainted lanes are unused.
1718    V.Resolution = CR_Replace;
1719    ++NumLaneResolves;
1720  }
1721  return true;
1722}
1723
1724// Determine if ValNo is a copy of a value number in LI or Other.LI that will
1725// be pruned:
1726//
1727//   %dst = COPY %src
1728//   %src = COPY %dst  <-- This value to be pruned.
1729//   %dst = COPY %src  <-- This value is a copy of a pruned value.
1730//
1731bool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) {
1732  Val &V = Vals[ValNo];
1733  if (V.Pruned || V.PrunedComputed)
1734    return V.Pruned;
1735
1736  if (V.Resolution != CR_Erase && V.Resolution != CR_Merge)
1737    return V.Pruned;
1738
1739  // Follow copies up the dominator tree and check if any intermediate value
1740  // has been pruned.
1741  V.PrunedComputed = true;
1742  V.Pruned = Other.isPrunedValue(V.OtherVNI->id, *this);
1743  return V.Pruned;
1744}
1745
1746void JoinVals::pruneValues(JoinVals &Other,
1747                           SmallVectorImpl<SlotIndex> &EndPoints) {
1748  for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) {
1749    SlotIndex Def = LI.getValNumInfo(i)->def;
1750    switch (Vals[i].Resolution) {
1751    case CR_Keep:
1752      break;
1753    case CR_Replace: {
1754      // This value takes precedence over the value in Other.LI.
1755      LIS->pruneValue(&Other.LI, Def, &EndPoints);
1756      // Check if we're replacing an IMPLICIT_DEF value. The IMPLICIT_DEF
1757      // instructions are only inserted to provide a live-out value for PHI
1758      // predecessors, so the instruction should simply go away once its value
1759      // has been replaced.
1760      Val &OtherV = Other.Vals[Vals[i].OtherVNI->id];
1761      bool EraseImpDef = OtherV.IsImplicitDef && OtherV.Resolution == CR_Keep;
1762      if (!Def.isBlock()) {
1763        // Remove <def,read-undef> flags. This def is now a partial redef.
1764        // Also remove <def,dead> flags since the joined live range will
1765        // continue past this instruction.
1766        for (MIOperands MO(Indexes->getInstructionFromIndex(Def));
1767             MO.isValid(); ++MO)
1768          if (MO->isReg() && MO->isDef() && MO->getReg() == LI.reg) {
1769            MO->setIsUndef(EraseImpDef);
1770            MO->setIsDead(false);
1771          }
1772        // This value will reach instructions below, but we need to make sure
1773        // the live range also reaches the instruction at Def.
1774        if (!EraseImpDef)
1775          EndPoints.push_back(Def);
1776      }
1777      DEBUG(dbgs() << "\t\tpruned " << PrintReg(Other.LI.reg) << " at " << Def
1778                   << ": " << Other.LI << '\n');
1779      break;
1780    }
1781    case CR_Erase:
1782    case CR_Merge:
1783      if (isPrunedValue(i, Other)) {
1784        // This value is ultimately a copy of a pruned value in LI or Other.LI.
1785        // We can no longer trust the value mapping computed by
1786        // computeAssignment(), the value that was originally copied could have
1787        // been replaced.
1788        LIS->pruneValue(&LI, Def, &EndPoints);
1789        DEBUG(dbgs() << "\t\tpruned all of " << PrintReg(LI.reg) << " at "
1790                     << Def << ": " << LI << '\n');
1791      }
1792      break;
1793    case CR_Unresolved:
1794    case CR_Impossible:
1795      llvm_unreachable("Unresolved conflicts");
1796    }
1797  }
1798}
1799
1800void JoinVals::eraseInstrs(SmallPtrSet<MachineInstr*, 8> &ErasedInstrs,
1801                           SmallVectorImpl<unsigned> &ShrinkRegs) {
1802  for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) {
1803    // Get the def location before markUnused() below invalidates it.
1804    SlotIndex Def = LI.getValNumInfo(i)->def;
1805    switch (Vals[i].Resolution) {
1806    case CR_Keep:
1807      // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any
1808      // longer. The IMPLICIT_DEF instructions are only inserted by
1809      // PHIElimination to guarantee that all PHI predecessors have a value.
1810      if (!Vals[i].IsImplicitDef || !Vals[i].Pruned)
1811        break;
1812      // Remove value number i from LI. Note that this VNInfo is still present
1813      // in NewVNInfo, so it will appear as an unused value number in the final
1814      // joined interval.
1815      LI.getValNumInfo(i)->markUnused();
1816      LI.removeValNo(LI.getValNumInfo(i));
1817      DEBUG(dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LI << '\n');
1818      // FALL THROUGH.
1819
1820    case CR_Erase: {
1821      MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
1822      assert(MI && "No instruction to erase");
1823      if (MI->isCopy()) {
1824        unsigned Reg = MI->getOperand(1).getReg();
1825        if (TargetRegisterInfo::isVirtualRegister(Reg) &&
1826            Reg != CP.getSrcReg() && Reg != CP.getDstReg())
1827          ShrinkRegs.push_back(Reg);
1828      }
1829      ErasedInstrs.insert(MI);
1830      DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI);
1831      LIS->RemoveMachineInstrFromMaps(MI);
1832      MI->eraseFromParent();
1833      break;
1834    }
1835    default:
1836      break;
1837    }
1838  }
1839}
1840
1841bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
1842  SmallVector<VNInfo*, 16> NewVNInfo;
1843  LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
1844  LiveInterval &LHS = LIS->getInterval(CP.getDstReg());
1845  JoinVals RHSVals(RHS, CP.getSrcIdx(), NewVNInfo, CP, LIS, TRI);
1846  JoinVals LHSVals(LHS, CP.getDstIdx(), NewVNInfo, CP, LIS, TRI);
1847
1848  DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS
1849               << "\n\t\tLHS = " << PrintReg(CP.getDstReg()) << ' ' << LHS
1850               << '\n');
1851
1852  // First compute NewVNInfo and the simple value mappings.
1853  // Detect impossible conflicts early.
1854  if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals))
1855    return false;
1856
1857  // Some conflicts can only be resolved after all values have been mapped.
1858  if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals))
1859    return false;
1860
1861  // All clear, the live ranges can be merged.
1862
1863  // The merging algorithm in LiveInterval::join() can't handle conflicting
1864  // value mappings, so we need to remove any live ranges that overlap a
1865  // CR_Replace resolution. Collect a set of end points that can be used to
1866  // restore the live range after joining.
1867  SmallVector<SlotIndex, 8> EndPoints;
1868  LHSVals.pruneValues(RHSVals, EndPoints);
1869  RHSVals.pruneValues(LHSVals, EndPoints);
1870
1871  // Erase COPY and IMPLICIT_DEF instructions. This may cause some external
1872  // registers to require trimming.
1873  SmallVector<unsigned, 8> ShrinkRegs;
1874  LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs);
1875  RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs);
1876  while (!ShrinkRegs.empty())
1877    LIS->shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val()));
1878
1879  // Join RHS into LHS.
1880  LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo,
1881           MRI);
1882
1883  // Kill flags are going to be wrong if the live ranges were overlapping.
1884  // Eventually, we should simply clear all kill flags when computing live
1885  // ranges. They are reinserted after register allocation.
1886  MRI->clearKillFlags(LHS.reg);
1887  MRI->clearKillFlags(RHS.reg);
1888
1889  if (EndPoints.empty())
1890    return true;
1891
1892  // Recompute the parts of the live range we had to remove because of
1893  // CR_Replace conflicts.
1894  DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size()
1895               << " points: " << LHS << '\n');
1896  LIS->extendToIndices(&LHS, EndPoints);
1897  return true;
1898}
1899
1900/// joinIntervals - Attempt to join these two intervals.  On failure, this
1901/// returns false.
1902bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
1903  return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP);
1904}
1905
1906namespace {
1907  // DepthMBBCompare - Comparison predicate that sort first based on the loop
1908  // depth of the basic block (the unsigned), and then on the MBB number.
1909  struct DepthMBBCompare {
1910    typedef std::pair<unsigned, MachineBasicBlock*> DepthMBBPair;
1911    bool operator()(const DepthMBBPair &LHS, const DepthMBBPair &RHS) const {
1912      // Deeper loops first
1913      if (LHS.first != RHS.first)
1914        return LHS.first > RHS.first;
1915
1916      // Prefer blocks that are more connected in the CFG. This takes care of
1917      // the most difficult copies first while intervals are short.
1918      unsigned cl = LHS.second->pred_size() + LHS.second->succ_size();
1919      unsigned cr = RHS.second->pred_size() + RHS.second->succ_size();
1920      if (cl != cr)
1921        return cl > cr;
1922
1923      // As a last resort, sort by block number.
1924      return LHS.second->getNumber() < RHS.second->getNumber();
1925    }
1926  };
1927}
1928
1929// Try joining WorkList copies starting from index From.
1930// Null out any successful joins.
1931bool RegisterCoalescer::copyCoalesceWorkList(unsigned From) {
1932  assert(From <= WorkList.size() && "Out of range");
1933  bool Progress = false;
1934  for (unsigned i = From, e = WorkList.size(); i != e; ++i) {
1935    if (!WorkList[i])
1936      continue;
1937    // Skip instruction pointers that have already been erased, for example by
1938    // dead code elimination.
1939    if (ErasedInstrs.erase(WorkList[i])) {
1940      WorkList[i] = 0;
1941      continue;
1942    }
1943    bool Again = false;
1944    bool Success = joinCopy(WorkList[i], Again);
1945    Progress |= Success;
1946    if (Success || !Again)
1947      WorkList[i] = 0;
1948  }
1949  return Progress;
1950}
1951
1952void
1953RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
1954  DEBUG(dbgs() << MBB->getName() << ":\n");
1955
1956  // Collect all copy-like instructions in MBB. Don't start coalescing anything
1957  // yet, it might invalidate the iterator.
1958  const unsigned PrevSize = WorkList.size();
1959  for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
1960       MII != E; ++MII)
1961    if (MII->isCopyLike())
1962      WorkList.push_back(MII);
1963
1964  // Try coalescing the collected copies immediately, and remove the nulls.
1965  // This prevents the WorkList from getting too large since most copies are
1966  // joinable on the first attempt.
1967  if (copyCoalesceWorkList(PrevSize))
1968    WorkList.erase(std::remove(WorkList.begin() + PrevSize, WorkList.end(),
1969                               (MachineInstr*)0), WorkList.end());
1970}
1971
1972void RegisterCoalescer::joinAllIntervals() {
1973  DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n");
1974  assert(WorkList.empty() && "Old data still around.");
1975
1976  if (Loops->empty()) {
1977    // If there are no loops in the function, join intervals in function order.
1978    for (MachineFunction::iterator I = MF->begin(), E = MF->end();
1979         I != E; ++I)
1980      copyCoalesceInMBB(I);
1981  } else {
1982    // Otherwise, join intervals in inner loops before other intervals.
1983    // Unfortunately we can't just iterate over loop hierarchy here because
1984    // there may be more MBB's than BB's.  Collect MBB's for sorting.
1985
1986    // Join intervals in the function prolog first. We want to join physical
1987    // registers with virtual registers before the intervals got too long.
1988    std::vector<std::pair<unsigned, MachineBasicBlock*> > MBBs;
1989    for (MachineFunction::iterator I = MF->begin(), E = MF->end();I != E;++I){
1990      MachineBasicBlock *MBB = I;
1991      MBBs.push_back(std::make_pair(Loops->getLoopDepth(MBB), I));
1992    }
1993
1994    // Sort by loop depth.
1995    std::sort(MBBs.begin(), MBBs.end(), DepthMBBCompare());
1996
1997    // Finally, join intervals in loop nest order.
1998    for (unsigned i = 0, e = MBBs.size(); i != e; ++i)
1999      copyCoalesceInMBB(MBBs[i].second);
2000  }
2001
2002  // Joining intervals can allow other intervals to be joined.  Iteratively join
2003  // until we make no progress.
2004  while (copyCoalesceWorkList())
2005    /* empty */ ;
2006}
2007
2008void RegisterCoalescer::releaseMemory() {
2009  ErasedInstrs.clear();
2010  WorkList.clear();
2011  DeadDefs.clear();
2012  InflateRegs.clear();
2013}
2014
2015bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
2016  MF = &fn;
2017  MRI = &fn.getRegInfo();
2018  TM = &fn.getTarget();
2019  TRI = TM->getRegisterInfo();
2020  TII = TM->getInstrInfo();
2021  LIS = &getAnalysis<LiveIntervals>();
2022  LDV = &getAnalysis<LiveDebugVariables>();
2023  AA = &getAnalysis<AliasAnalysis>();
2024  Loops = &getAnalysis<MachineLoopInfo>();
2025
2026  DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n"
2027               << "********** Function: " << MF->getName() << '\n');
2028
2029  if (VerifyCoalescing)
2030    MF->verify(this, "Before register coalescing");
2031
2032  RegClassInfo.runOnMachineFunction(fn);
2033
2034  // Join (coalesce) intervals if requested.
2035  if (EnableJoining)
2036    joinAllIntervals();
2037
2038  // After deleting a lot of copies, register classes may be less constrained.
2039  // Removing sub-register operands may allow GR32_ABCD -> GR32 and DPR_VFP2 ->
2040  // DPR inflation.
2041  array_pod_sort(InflateRegs.begin(), InflateRegs.end());
2042  InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()),
2043                    InflateRegs.end());
2044  DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size() << " regs.\n");
2045  for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) {
2046    unsigned Reg = InflateRegs[i];
2047    if (MRI->reg_nodbg_empty(Reg))
2048      continue;
2049    if (MRI->recomputeRegClass(Reg, *TM)) {
2050      DEBUG(dbgs() << PrintReg(Reg) << " inflated to "
2051                   << MRI->getRegClass(Reg)->getName() << '\n');
2052      ++NumInflated;
2053    }
2054  }
2055
2056  DEBUG(dump());
2057  DEBUG(LDV->dump());
2058  if (VerifyCoalescing)
2059    MF->verify(this, "After register coalescing");
2060  return true;
2061}
2062
2063/// print - Implement the dump method.
2064void RegisterCoalescer::print(raw_ostream &O, const Module* m) const {
2065   LIS->print(O, m);
2066}
2067