1//===- MachineCSE.cpp - Machine Common Subexpression Elimination Pass -----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs global common subexpression elimination on machine
10// instructions using a scoped hash table based value numbering scheme. It
11// must be run while the machine function is still in SSA form.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/ScopedHashTable.h"
17#include "llvm/ADT/SmallPtrSet.h"
18#include "llvm/ADT/SmallSet.h"
19#include "llvm/ADT/SmallVector.h"
20#include "llvm/ADT/Statistic.h"
21#include "llvm/Analysis/AliasAnalysis.h"
22#include "llvm/Analysis/CFG.h"
23#include "llvm/CodeGen/MachineBasicBlock.h"
24#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
25#include "llvm/CodeGen/MachineDominators.h"
26#include "llvm/CodeGen/MachineFunction.h"
27#include "llvm/CodeGen/MachineFunctionPass.h"
28#include "llvm/CodeGen/MachineInstr.h"
29#include "llvm/CodeGen/MachineOperand.h"
30#include "llvm/CodeGen/MachineRegisterInfo.h"
31#include "llvm/CodeGen/Passes.h"
32#include "llvm/CodeGen/TargetInstrInfo.h"
33#include "llvm/CodeGen/TargetOpcodes.h"
34#include "llvm/CodeGen/TargetRegisterInfo.h"
35#include "llvm/CodeGen/TargetSubtargetInfo.h"
36#include "llvm/InitializePasses.h"
37#include "llvm/MC/MCRegister.h"
38#include "llvm/MC/MCRegisterInfo.h"
39#include "llvm/Pass.h"
40#include "llvm/Support/Allocator.h"
41#include "llvm/Support/Debug.h"
42#include "llvm/Support/RecyclingAllocator.h"
43#include "llvm/Support/raw_ostream.h"
44#include <cassert>
45#include <iterator>
46#include <utility>
47
48using namespace llvm;
49
50#define DEBUG_TYPE "machine-cse"
51
52STATISTIC(NumCoalesces, "Number of copies coalesced");
53STATISTIC(NumCSEs,      "Number of common subexpression eliminated");
54STATISTIC(NumPREs,      "Number of partial redundant expression"
55                        " transformed to fully redundant");
56STATISTIC(NumPhysCSEs,
57          "Number of physreg referencing common subexpr eliminated");
58STATISTIC(NumCrossBBCSEs,
59          "Number of cross-MBB physreg referencing CS eliminated");
60STATISTIC(NumCommutes,  "Number of copies coalesced after commuting");
61
62// Threshold to avoid excessive cost to compute isProfitableToCSE.
63static cl::opt<int>
64    CSUsesThreshold("csuses-threshold", cl::Hidden, cl::init(1024),
65                    cl::desc("Threshold for the size of CSUses"));
66
67static cl::opt<bool> AggressiveMachineCSE(
68    "aggressive-machine-cse", cl::Hidden, cl::init(false),
69    cl::desc("Override the profitability heuristics for Machine CSE"));
70
71namespace {
72
73  class MachineCSE : public MachineFunctionPass {
74    const TargetInstrInfo *TII = nullptr;
75    const TargetRegisterInfo *TRI = nullptr;
76    AliasAnalysis *AA = nullptr;
77    MachineDominatorTree *DT = nullptr;
78    MachineRegisterInfo *MRI = nullptr;
79    MachineBlockFrequencyInfo *MBFI = nullptr;
80
81  public:
82    static char ID; // Pass identification
83
84    MachineCSE() : MachineFunctionPass(ID) {
85      initializeMachineCSEPass(*PassRegistry::getPassRegistry());
86    }
87
88    bool runOnMachineFunction(MachineFunction &MF) override;
89
90    void getAnalysisUsage(AnalysisUsage &AU) const override {
91      AU.setPreservesCFG();
92      MachineFunctionPass::getAnalysisUsage(AU);
93      AU.addRequired<AAResultsWrapperPass>();
94      AU.addPreservedID(MachineLoopInfoID);
95      AU.addRequired<MachineDominatorTree>();
96      AU.addPreserved<MachineDominatorTree>();
97      AU.addRequired<MachineBlockFrequencyInfo>();
98      AU.addPreserved<MachineBlockFrequencyInfo>();
99    }
100
101    MachineFunctionProperties getRequiredProperties() const override {
102      return MachineFunctionProperties()
103        .set(MachineFunctionProperties::Property::IsSSA);
104    }
105
106    void releaseMemory() override {
107      ScopeMap.clear();
108      PREMap.clear();
109      Exps.clear();
110    }
111
112  private:
113    using AllocatorTy = RecyclingAllocator<BumpPtrAllocator,
114                            ScopedHashTableVal<MachineInstr *, unsigned>>;
115    using ScopedHTType =
116        ScopedHashTable<MachineInstr *, unsigned, MachineInstrExpressionTrait,
117                        AllocatorTy>;
118    using ScopeType = ScopedHTType::ScopeTy;
119    using PhysDefVector = SmallVector<std::pair<unsigned, unsigned>, 2>;
120
121    unsigned LookAheadLimit = 0;
122    DenseMap<MachineBasicBlock *, ScopeType *> ScopeMap;
123    DenseMap<MachineInstr *, MachineBasicBlock *, MachineInstrExpressionTrait>
124        PREMap;
125    ScopedHTType VNT;
126    SmallVector<MachineInstr *, 64> Exps;
127    unsigned CurrVN = 0;
128
129    bool PerformTrivialCopyPropagation(MachineInstr *MI,
130                                       MachineBasicBlock *MBB);
131    bool isPhysDefTriviallyDead(MCRegister Reg,
132                                MachineBasicBlock::const_iterator I,
133                                MachineBasicBlock::const_iterator E) const;
134    bool hasLivePhysRegDefUses(const MachineInstr *MI,
135                               const MachineBasicBlock *MBB,
136                               SmallSet<MCRegister, 8> &PhysRefs,
137                               PhysDefVector &PhysDefs, bool &PhysUseDef) const;
138    bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
139                          SmallSet<MCRegister, 8> &PhysRefs,
140                          PhysDefVector &PhysDefs, bool &NonLocal) const;
141    bool isCSECandidate(MachineInstr *MI);
142    bool isProfitableToCSE(Register CSReg, Register Reg,
143                           MachineBasicBlock *CSBB, MachineInstr *MI);
144    void EnterScope(MachineBasicBlock *MBB);
145    void ExitScope(MachineBasicBlock *MBB);
146    bool ProcessBlockCSE(MachineBasicBlock *MBB);
147    void ExitScopeIfDone(MachineDomTreeNode *Node,
148                         DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren);
149    bool PerformCSE(MachineDomTreeNode *Node);
150
151    bool isPRECandidate(MachineInstr *MI, SmallSet<MCRegister, 8> &PhysRefs);
152    bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB);
153    bool PerformSimplePRE(MachineDominatorTree *DT);
154    /// Heuristics to see if it's profitable to move common computations of MBB
155    /// and MBB1 to CandidateBB.
156    bool isProfitableToHoistInto(MachineBasicBlock *CandidateBB,
157                                 MachineBasicBlock *MBB,
158                                 MachineBasicBlock *MBB1);
159  };
160
161} // end anonymous namespace
162
163char MachineCSE::ID = 0;
164
165char &llvm::MachineCSEID = MachineCSE::ID;
166
167INITIALIZE_PASS_BEGIN(MachineCSE, DEBUG_TYPE,
168                      "Machine Common Subexpression Elimination", false, false)
169INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
170INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
171INITIALIZE_PASS_END(MachineCSE, DEBUG_TYPE,
172                    "Machine Common Subexpression Elimination", false, false)
173
174/// The source register of a COPY machine instruction can be propagated to all
175/// its users, and this propagation could increase the probability of finding
176/// common subexpressions. If the COPY has only one user, the COPY itself can
177/// be removed.
178bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,
179                                               MachineBasicBlock *MBB) {
180  bool Changed = false;
181  for (MachineOperand &MO : MI->all_uses()) {
182    Register Reg = MO.getReg();
183    if (!Reg.isVirtual())
184      continue;
185    bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg);
186    MachineInstr *DefMI = MRI->getVRegDef(Reg);
187    if (!DefMI->isCopy())
188      continue;
189    Register SrcReg = DefMI->getOperand(1).getReg();
190    if (!SrcReg.isVirtual())
191      continue;
192    if (DefMI->getOperand(0).getSubReg())
193      continue;
194    // FIXME: We should trivially coalesce subregister copies to expose CSE
195    // opportunities on instructions with truncated operands (see
196    // cse-add-with-overflow.ll). This can be done here as follows:
197    // if (SrcSubReg)
198    //  RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC,
199    //                                     SrcSubReg);
200    // MO.substVirtReg(SrcReg, SrcSubReg, *TRI);
201    //
202    // The 2-addr pass has been updated to handle coalesced subregs. However,
203    // some machine-specific code still can't handle it.
204    // To handle it properly we also need a way find a constrained subregister
205    // class given a super-reg class and subreg index.
206    if (DefMI->getOperand(1).getSubReg())
207      continue;
208    if (!MRI->constrainRegAttrs(SrcReg, Reg))
209      continue;
210    LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI);
211    LLVM_DEBUG(dbgs() << "***     to: " << *MI);
212
213    // Propagate SrcReg of copies to MI.
214    MO.setReg(SrcReg);
215    MRI->clearKillFlags(SrcReg);
216    // Coalesce single use copies.
217    if (OnlyOneUse) {
218      // If (and only if) we've eliminated all uses of the copy, also
219      // copy-propagate to any debug-users of MI, or they'll be left using
220      // an undefined value.
221      DefMI->changeDebugValuesDefReg(SrcReg);
222
223      DefMI->eraseFromParent();
224      ++NumCoalesces;
225    }
226    Changed = true;
227  }
228
229  return Changed;
230}
231
232bool MachineCSE::isPhysDefTriviallyDead(
233    MCRegister Reg, MachineBasicBlock::const_iterator I,
234    MachineBasicBlock::const_iterator E) const {
235  unsigned LookAheadLeft = LookAheadLimit;
236  while (LookAheadLeft) {
237    // Skip over dbg_value's.
238    I = skipDebugInstructionsForward(I, E);
239
240    if (I == E)
241      // Reached end of block, we don't know if register is dead or not.
242      return false;
243
244    bool SeenDef = false;
245    for (const MachineOperand &MO : I->operands()) {
246      if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
247        SeenDef = true;
248      if (!MO.isReg() || !MO.getReg())
249        continue;
250      if (!TRI->regsOverlap(MO.getReg(), Reg))
251        continue;
252      if (MO.isUse())
253        // Found a use!
254        return false;
255      SeenDef = true;
256    }
257    if (SeenDef)
258      // See a def of Reg (or an alias) before encountering any use, it's
259      // trivially dead.
260      return true;
261
262    --LookAheadLeft;
263    ++I;
264  }
265  return false;
266}
267
268static bool isCallerPreservedOrConstPhysReg(MCRegister Reg,
269                                            const MachineOperand &MO,
270                                            const MachineFunction &MF,
271                                            const TargetRegisterInfo &TRI,
272                                            const TargetInstrInfo &TII) {
273  // MachineRegisterInfo::isConstantPhysReg directly called by
274  // MachineRegisterInfo::isCallerPreservedOrConstPhysReg expects the
275  // reserved registers to be frozen. That doesn't cause a problem  post-ISel as
276  // most (if not all) targets freeze reserved registers right after ISel.
277  //
278  // It does cause issues mid-GlobalISel, however, hence the additional
279  // reservedRegsFrozen check.
280  const MachineRegisterInfo &MRI = MF.getRegInfo();
281  return TRI.isCallerPreservedPhysReg(Reg, MF) || TII.isIgnorableUse(MO) ||
282         (MRI.reservedRegsFrozen() && MRI.isConstantPhysReg(Reg));
283}
284
285/// hasLivePhysRegDefUses - Return true if the specified instruction read/write
286/// physical registers (except for dead defs of physical registers). It also
287/// returns the physical register def by reference if it's the only one and the
288/// instruction does not uses a physical register.
289bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
290                                       const MachineBasicBlock *MBB,
291                                       SmallSet<MCRegister, 8> &PhysRefs,
292                                       PhysDefVector &PhysDefs,
293                                       bool &PhysUseDef) const {
294  // First, add all uses to PhysRefs.
295  for (const MachineOperand &MO : MI->all_uses()) {
296    Register Reg = MO.getReg();
297    if (!Reg)
298      continue;
299    if (Reg.isVirtual())
300      continue;
301    // Reading either caller preserved or constant physregs is ok.
302    if (!isCallerPreservedOrConstPhysReg(Reg.asMCReg(), MO, *MI->getMF(), *TRI,
303                                         *TII))
304      for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
305        PhysRefs.insert(*AI);
306  }
307
308  // Next, collect all defs into PhysDefs.  If any is already in PhysRefs
309  // (which currently contains only uses), set the PhysUseDef flag.
310  PhysUseDef = false;
311  MachineBasicBlock::const_iterator I = MI; I = std::next(I);
312  for (const auto &MOP : llvm::enumerate(MI->operands())) {
313    const MachineOperand &MO = MOP.value();
314    if (!MO.isReg() || !MO.isDef())
315      continue;
316    Register Reg = MO.getReg();
317    if (!Reg)
318      continue;
319    if (Reg.isVirtual())
320      continue;
321    // Check against PhysRefs even if the def is "dead".
322    if (PhysRefs.count(Reg.asMCReg()))
323      PhysUseDef = true;
324    // If the def is dead, it's ok. But the def may not marked "dead". That's
325    // common since this pass is run before livevariables. We can scan
326    // forward a few instructions and check if it is obviously dead.
327    if (!MO.isDead() && !isPhysDefTriviallyDead(Reg.asMCReg(), I, MBB->end()))
328      PhysDefs.push_back(std::make_pair(MOP.index(), Reg));
329  }
330
331  // Finally, add all defs to PhysRefs as well.
332  for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i)
333    for (MCRegAliasIterator AI(PhysDefs[i].second, TRI, true); AI.isValid();
334         ++AI)
335      PhysRefs.insert(*AI);
336
337  return !PhysRefs.empty();
338}
339
340bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
341                                  SmallSet<MCRegister, 8> &PhysRefs,
342                                  PhysDefVector &PhysDefs,
343                                  bool &NonLocal) const {
344  // For now conservatively returns false if the common subexpression is
345  // not in the same basic block as the given instruction. The only exception
346  // is if the common subexpression is in the sole predecessor block.
347  const MachineBasicBlock *MBB = MI->getParent();
348  const MachineBasicBlock *CSMBB = CSMI->getParent();
349
350  bool CrossMBB = false;
351  if (CSMBB != MBB) {
352    if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB)
353      return false;
354
355    for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
356      if (MRI->isAllocatable(PhysDefs[i].second) ||
357          MRI->isReserved(PhysDefs[i].second))
358        // Avoid extending live range of physical registers if they are
359        //allocatable or reserved.
360        return false;
361    }
362    CrossMBB = true;
363  }
364  MachineBasicBlock::const_iterator I = CSMI; I = std::next(I);
365  MachineBasicBlock::const_iterator E = MI;
366  MachineBasicBlock::const_iterator EE = CSMBB->end();
367  unsigned LookAheadLeft = LookAheadLimit;
368  while (LookAheadLeft) {
369    // Skip over dbg_value's.
370    while (I != E && I != EE && I->isDebugInstr())
371      ++I;
372
373    if (I == EE) {
374      assert(CrossMBB && "Reaching end-of-MBB without finding MI?");
375      (void)CrossMBB;
376      CrossMBB = false;
377      NonLocal = true;
378      I = MBB->begin();
379      EE = MBB->end();
380      continue;
381    }
382
383    if (I == E)
384      return true;
385
386    for (const MachineOperand &MO : I->operands()) {
387      // RegMasks go on instructions like calls that clobber lots of physregs.
388      // Don't attempt to CSE across such an instruction.
389      if (MO.isRegMask())
390        return false;
391      if (!MO.isReg() || !MO.isDef())
392        continue;
393      Register MOReg = MO.getReg();
394      if (MOReg.isVirtual())
395        continue;
396      if (PhysRefs.count(MOReg.asMCReg()))
397        return false;
398    }
399
400    --LookAheadLeft;
401    ++I;
402  }
403
404  return false;
405}
406
407bool MachineCSE::isCSECandidate(MachineInstr *MI) {
408  if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() ||
409      MI->isInlineAsm() || MI->isDebugInstr() || MI->isJumpTableDebugInfo())
410    return false;
411
412  // Ignore copies.
413  if (MI->isCopyLike())
414    return false;
415
416  // Ignore stuff that we obviously can't move.
417  if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
418      MI->mayRaiseFPException() || MI->hasUnmodeledSideEffects())
419    return false;
420
421  if (MI->mayLoad()) {
422    // Okay, this instruction does a load. As a refinement, we allow the target
423    // to decide whether the loaded value is actually a constant. If so, we can
424    // actually use it as a load.
425    if (!MI->isDereferenceableInvariantLoad())
426      // FIXME: we should be able to hoist loads with no other side effects if
427      // there are no other instructions which can change memory in this loop.
428      // This is a trivial form of alias analysis.
429      return false;
430  }
431
432  // Ignore stack guard loads, otherwise the register that holds CSEed value may
433  // be spilled and get loaded back with corrupted data.
434  if (MI->getOpcode() == TargetOpcode::LOAD_STACK_GUARD)
435    return false;
436
437  return true;
438}
439
440/// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
441/// common expression that defines Reg. CSBB is basic block where CSReg is
442/// defined.
443bool MachineCSE::isProfitableToCSE(Register CSReg, Register Reg,
444                                   MachineBasicBlock *CSBB, MachineInstr *MI) {
445  if (AggressiveMachineCSE)
446    return true;
447
448  // FIXME: Heuristics that works around the lack the live range splitting.
449
450  // If CSReg is used at all uses of Reg, CSE should not increase register
451  // pressure of CSReg.
452  bool MayIncreasePressure = true;
453  if (CSReg.isVirtual() && Reg.isVirtual()) {
454    MayIncreasePressure = false;
455    SmallPtrSet<MachineInstr*, 8> CSUses;
456    int NumOfUses = 0;
457    for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) {
458      CSUses.insert(&MI);
459      // Too costly to compute if NumOfUses is very large. Conservatively assume
460      // MayIncreasePressure to avoid spending too much time here.
461      if (++NumOfUses > CSUsesThreshold) {
462        MayIncreasePressure = true;
463        break;
464      }
465    }
466    if (!MayIncreasePressure)
467      for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
468        if (!CSUses.count(&MI)) {
469          MayIncreasePressure = true;
470          break;
471        }
472      }
473  }
474  if (!MayIncreasePressure) return true;
475
476  // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
477  // an immediate predecessor. We don't want to increase register pressure and
478  // end up causing other computation to be spilled.
479  if (TII->isAsCheapAsAMove(*MI)) {
480    MachineBasicBlock *BB = MI->getParent();
481    if (CSBB != BB && !CSBB->isSuccessor(BB))
482      return false;
483  }
484
485  // Heuristics #2: If the expression doesn't not use a vr and the only use
486  // of the redundant computation are copies, do not cse.
487  bool HasVRegUse = false;
488  for (const MachineOperand &MO : MI->all_uses()) {
489    if (MO.getReg().isVirtual()) {
490      HasVRegUse = true;
491      break;
492    }
493  }
494  if (!HasVRegUse) {
495    bool HasNonCopyUse = false;
496    for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
497      // Ignore copies.
498      if (!MI.isCopyLike()) {
499        HasNonCopyUse = true;
500        break;
501      }
502    }
503    if (!HasNonCopyUse)
504      return false;
505  }
506
507  // Heuristics #3: If the common subexpression is used by PHIs, do not reuse
508  // it unless the defined value is already used in the BB of the new use.
509  bool HasPHI = false;
510  for (MachineInstr &UseMI : MRI->use_nodbg_instructions(CSReg)) {
511    HasPHI |= UseMI.isPHI();
512    if (UseMI.getParent() == MI->getParent())
513      return true;
514  }
515
516  return !HasPHI;
517}
518
519void MachineCSE::EnterScope(MachineBasicBlock *MBB) {
520  LLVM_DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
521  ScopeType *Scope = new ScopeType(VNT);
522  ScopeMap[MBB] = Scope;
523}
524
525void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
526  LLVM_DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
527  DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB);
528  assert(SI != ScopeMap.end());
529  delete SI->second;
530  ScopeMap.erase(SI);
531}
532
533bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
534  bool Changed = false;
535
536  SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
537  SmallVector<unsigned, 2> ImplicitDefsToUpdate;
538  SmallVector<unsigned, 2> ImplicitDefs;
539  for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) {
540    if (!isCSECandidate(&MI))
541      continue;
542
543    bool FoundCSE = VNT.count(&MI);
544    if (!FoundCSE) {
545      // Using trivial copy propagation to find more CSE opportunities.
546      if (PerformTrivialCopyPropagation(&MI, MBB)) {
547        Changed = true;
548
549        // After coalescing MI itself may become a copy.
550        if (MI.isCopyLike())
551          continue;
552
553        // Try again to see if CSE is possible.
554        FoundCSE = VNT.count(&MI);
555      }
556    }
557
558    // Commute commutable instructions.
559    bool Commuted = false;
560    if (!FoundCSE && MI.isCommutable()) {
561      if (MachineInstr *NewMI = TII->commuteInstruction(MI)) {
562        Commuted = true;
563        FoundCSE = VNT.count(NewMI);
564        if (NewMI != &MI) {
565          // New instruction. It doesn't need to be kept.
566          NewMI->eraseFromParent();
567          Changed = true;
568        } else if (!FoundCSE)
569          // MI was changed but it didn't help, commute it back!
570          (void)TII->commuteInstruction(MI);
571      }
572    }
573
574    // If the instruction defines physical registers and the values *may* be
575    // used, then it's not safe to replace it with a common subexpression.
576    // It's also not safe if the instruction uses physical registers.
577    bool CrossMBBPhysDef = false;
578    SmallSet<MCRegister, 8> PhysRefs;
579    PhysDefVector PhysDefs;
580    bool PhysUseDef = false;
581    if (FoundCSE &&
582        hasLivePhysRegDefUses(&MI, MBB, PhysRefs, PhysDefs, PhysUseDef)) {
583      FoundCSE = false;
584
585      // ... Unless the CS is local or is in the sole predecessor block
586      // and it also defines the physical register which is not clobbered
587      // in between and the physical register uses were not clobbered.
588      // This can never be the case if the instruction both uses and
589      // defines the same physical register, which was detected above.
590      if (!PhysUseDef) {
591        unsigned CSVN = VNT.lookup(&MI);
592        MachineInstr *CSMI = Exps[CSVN];
593        if (PhysRegDefsReach(CSMI, &MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
594          FoundCSE = true;
595      }
596    }
597
598    if (!FoundCSE) {
599      VNT.insert(&MI, CurrVN++);
600      Exps.push_back(&MI);
601      continue;
602    }
603
604    // Found a common subexpression, eliminate it.
605    unsigned CSVN = VNT.lookup(&MI);
606    MachineInstr *CSMI = Exps[CSVN];
607    LLVM_DEBUG(dbgs() << "Examining: " << MI);
608    LLVM_DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
609
610    // Prevent CSE-ing non-local convergent instructions.
611    // LLVM's current definition of `isConvergent` does not necessarily prove
612    // that non-local CSE is illegal. The following check extends the definition
613    // of `isConvergent` to assume a convergent instruction is dependent not
614    // only on additional conditions, but also on fewer conditions. LLVM does
615    // not have a MachineInstr attribute which expresses this extended
616    // definition, so it's necessary to use `isConvergent` to prevent illegally
617    // CSE-ing the subset of `isConvergent` instructions which do fall into this
618    // extended definition.
619    if (MI.isConvergent() && MI.getParent() != CSMI->getParent()) {
620      LLVM_DEBUG(dbgs() << "*** Convergent MI and subexpression exist in "
621                           "different BBs, avoid CSE!\n");
622      VNT.insert(&MI, CurrVN++);
623      Exps.push_back(&MI);
624      continue;
625    }
626
627    // Check if it's profitable to perform this CSE.
628    bool DoCSE = true;
629    unsigned NumDefs = MI.getNumDefs();
630
631    for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
632      MachineOperand &MO = MI.getOperand(i);
633      if (!MO.isReg() || !MO.isDef())
634        continue;
635      Register OldReg = MO.getReg();
636      Register NewReg = CSMI->getOperand(i).getReg();
637
638      // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
639      // we should make sure it is not dead at CSMI.
640      if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead())
641        ImplicitDefsToUpdate.push_back(i);
642
643      // Keep track of implicit defs of CSMI and MI, to clear possibly
644      // made-redundant kill flags.
645      if (MO.isImplicit() && !MO.isDead() && OldReg == NewReg)
646        ImplicitDefs.push_back(OldReg);
647
648      if (OldReg == NewReg) {
649        --NumDefs;
650        continue;
651      }
652
653      assert(OldReg.isVirtual() && NewReg.isVirtual() &&
654             "Do not CSE physical register defs!");
655
656      if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), &MI)) {
657        LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
658        DoCSE = false;
659        break;
660      }
661
662      // Don't perform CSE if the result of the new instruction cannot exist
663      // within the constraints (register class, bank, or low-level type) of
664      // the old instruction.
665      if (!MRI->constrainRegAttrs(NewReg, OldReg)) {
666        LLVM_DEBUG(
667            dbgs() << "*** Not the same register constraints, avoid CSE!\n");
668        DoCSE = false;
669        break;
670      }
671
672      CSEPairs.push_back(std::make_pair(OldReg, NewReg));
673      --NumDefs;
674    }
675
676    // Actually perform the elimination.
677    if (DoCSE) {
678      for (const std::pair<unsigned, unsigned> &CSEPair : CSEPairs) {
679        unsigned OldReg = CSEPair.first;
680        unsigned NewReg = CSEPair.second;
681        // OldReg may have been unused but is used now, clear the Dead flag
682        MachineInstr *Def = MRI->getUniqueVRegDef(NewReg);
683        assert(Def != nullptr && "CSEd register has no unique definition?");
684        Def->clearRegisterDeads(NewReg);
685        // Replace with NewReg and clear kill flags which may be wrong now.
686        MRI->replaceRegWith(OldReg, NewReg);
687        MRI->clearKillFlags(NewReg);
688      }
689
690      // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
691      // we should make sure it is not dead at CSMI.
692      for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)
693        CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false);
694      for (const auto &PhysDef : PhysDefs)
695        if (!MI.getOperand(PhysDef.first).isDead())
696          CSMI->getOperand(PhysDef.first).setIsDead(false);
697
698      // Go through implicit defs of CSMI and MI, and clear the kill flags on
699      // their uses in all the instructions between CSMI and MI.
700      // We might have made some of the kill flags redundant, consider:
701      //   subs  ... implicit-def %nzcv    <- CSMI
702      //   csinc ... implicit killed %nzcv <- this kill flag isn't valid anymore
703      //   subs  ... implicit-def %nzcv    <- MI, to be eliminated
704      //   csinc ... implicit killed %nzcv
705      // Since we eliminated MI, and reused a register imp-def'd by CSMI
706      // (here %nzcv), that register, if it was killed before MI, should have
707      // that kill flag removed, because it's lifetime was extended.
708      if (CSMI->getParent() == MI.getParent()) {
709        for (MachineBasicBlock::iterator II = CSMI, IE = &MI; II != IE; ++II)
710          for (auto ImplicitDef : ImplicitDefs)
711            if (MachineOperand *MO = II->findRegisterUseOperand(
712                    ImplicitDef, /*isKill=*/true, TRI))
713              MO->setIsKill(false);
714      } else {
715        // If the instructions aren't in the same BB, bail out and clear the
716        // kill flag on all uses of the imp-def'd register.
717        for (auto ImplicitDef : ImplicitDefs)
718          MRI->clearKillFlags(ImplicitDef);
719      }
720
721      if (CrossMBBPhysDef) {
722        // Add physical register defs now coming in from a predecessor to MBB
723        // livein list.
724        while (!PhysDefs.empty()) {
725          auto LiveIn = PhysDefs.pop_back_val();
726          if (!MBB->isLiveIn(LiveIn.second))
727            MBB->addLiveIn(LiveIn.second);
728        }
729        ++NumCrossBBCSEs;
730      }
731
732      MI.eraseFromParent();
733      ++NumCSEs;
734      if (!PhysRefs.empty())
735        ++NumPhysCSEs;
736      if (Commuted)
737        ++NumCommutes;
738      Changed = true;
739    } else {
740      VNT.insert(&MI, CurrVN++);
741      Exps.push_back(&MI);
742    }
743    CSEPairs.clear();
744    ImplicitDefsToUpdate.clear();
745    ImplicitDefs.clear();
746  }
747
748  return Changed;
749}
750
751/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
752/// dominator tree node if its a leaf or all of its children are done. Walk
753/// up the dominator tree to destroy ancestors which are now done.
754void
755MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
756                        DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren) {
757  if (OpenChildren[Node])
758    return;
759
760  // Pop scope.
761  ExitScope(Node->getBlock());
762
763  // Now traverse upwards to pop ancestors whose offsprings are all done.
764  while (MachineDomTreeNode *Parent = Node->getIDom()) {
765    unsigned Left = --OpenChildren[Parent];
766    if (Left != 0)
767      break;
768    ExitScope(Parent->getBlock());
769    Node = Parent;
770  }
771}
772
773bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
774  SmallVector<MachineDomTreeNode*, 32> Scopes;
775  SmallVector<MachineDomTreeNode*, 8> WorkList;
776  DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
777
778  CurrVN = 0;
779
780  // Perform a DFS walk to determine the order of visit.
781  WorkList.push_back(Node);
782  do {
783    Node = WorkList.pop_back_val();
784    Scopes.push_back(Node);
785    OpenChildren[Node] = Node->getNumChildren();
786    append_range(WorkList, Node->children());
787  } while (!WorkList.empty());
788
789  // Now perform CSE.
790  bool Changed = false;
791  for (MachineDomTreeNode *Node : Scopes) {
792    MachineBasicBlock *MBB = Node->getBlock();
793    EnterScope(MBB);
794    Changed |= ProcessBlockCSE(MBB);
795    // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
796    ExitScopeIfDone(Node, OpenChildren);
797  }
798
799  return Changed;
800}
801
802// We use stronger checks for PRE candidate rather than for CSE ones to embrace
803// checks inside ProcessBlockCSE(), not only inside isCSECandidate(). This helps
804// to exclude instrs created by PRE that won't be CSEed later.
805bool MachineCSE::isPRECandidate(MachineInstr *MI,
806                                SmallSet<MCRegister, 8> &PhysRefs) {
807  if (!isCSECandidate(MI) ||
808      MI->isNotDuplicable() ||
809      MI->mayLoad() ||
810      TII->isAsCheapAsAMove(*MI) ||
811      MI->getNumDefs() != 1 ||
812      MI->getNumExplicitDefs() != 1)
813    return false;
814
815  for (const MachineOperand &MO : MI->operands()) {
816    if (MO.isReg() && !MO.getReg().isVirtual()) {
817      if (MO.isDef())
818        return false;
819      else
820        PhysRefs.insert(MO.getReg());
821    }
822  }
823
824  return true;
825}
826
827bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,
828                                 MachineBasicBlock *MBB) {
829  bool Changed = false;
830  for (MachineInstr &MI : llvm::make_early_inc_range(*MBB)) {
831    SmallSet<MCRegister, 8> PhysRefs;
832    if (!isPRECandidate(&MI, PhysRefs))
833      continue;
834
835    if (!PREMap.count(&MI)) {
836      PREMap[&MI] = MBB;
837      continue;
838    }
839
840    auto MBB1 = PREMap[&MI];
841    assert(
842        !DT->properlyDominates(MBB, MBB1) &&
843        "MBB cannot properly dominate MBB1 while DFS through dominators tree!");
844    auto CMBB = DT->findNearestCommonDominator(MBB, MBB1);
845    if (!CMBB->isLegalToHoistInto())
846      continue;
847
848    if (!isProfitableToHoistInto(CMBB, MBB, MBB1))
849      continue;
850
851    // Two instrs are partial redundant if their basic blocks are reachable
852    // from one to another but one doesn't dominate another.
853    if (CMBB != MBB1) {
854      auto BB = MBB->getBasicBlock(), BB1 = MBB1->getBasicBlock();
855      if (BB != nullptr && BB1 != nullptr &&
856          (isPotentiallyReachable(BB1, BB) ||
857           isPotentiallyReachable(BB, BB1))) {
858        // The following check extends the definition of `isConvergent` to
859        // assume a convergent instruction is dependent not only on additional
860        // conditions, but also on fewer conditions. LLVM does not have a
861        // MachineInstr attribute which expresses this extended definition, so
862        // it's necessary to use `isConvergent` to prevent illegally PRE-ing the
863        // subset of `isConvergent` instructions which do fall into this
864        // extended definition.
865        if (MI.isConvergent() && CMBB != MBB)
866          continue;
867
868        // If this instruction uses physical registers then we can only do PRE
869        // if it's using the value that is live at the place we're hoisting to.
870        bool NonLocal;
871        PhysDefVector PhysDefs;
872        if (!PhysRefs.empty() &&
873            !PhysRegDefsReach(&*(CMBB->getFirstTerminator()), &MI, PhysRefs,
874                              PhysDefs, NonLocal))
875          continue;
876
877        assert(MI.getOperand(0).isDef() &&
878               "First operand of instr with one explicit def must be this def");
879        Register VReg = MI.getOperand(0).getReg();
880        Register NewReg = MRI->cloneVirtualRegister(VReg);
881        if (!isProfitableToCSE(NewReg, VReg, CMBB, &MI))
882          continue;
883        MachineInstr &NewMI =
884            TII->duplicate(*CMBB, CMBB->getFirstTerminator(), MI);
885
886        // When hoisting, make sure we don't carry the debug location of
887        // the original instruction, as that's not correct and can cause
888        // unexpected jumps when debugging optimized code.
889        auto EmptyDL = DebugLoc();
890        NewMI.setDebugLoc(EmptyDL);
891
892        NewMI.getOperand(0).setReg(NewReg);
893
894        PREMap[&MI] = CMBB;
895        ++NumPREs;
896        Changed = true;
897      }
898    }
899  }
900  return Changed;
901}
902
903// This simple PRE (partial redundancy elimination) pass doesn't actually
904// eliminate partial redundancy but transforms it to full redundancy,
905// anticipating that the next CSE step will eliminate this created redundancy.
906// If CSE doesn't eliminate this, than created instruction will remain dead
907// and eliminated later by Remove Dead Machine Instructions pass.
908bool MachineCSE::PerformSimplePRE(MachineDominatorTree *DT) {
909  SmallVector<MachineDomTreeNode *, 32> BBs;
910
911  PREMap.clear();
912  bool Changed = false;
913  BBs.push_back(DT->getRootNode());
914  do {
915    auto Node = BBs.pop_back_val();
916    append_range(BBs, Node->children());
917
918    MachineBasicBlock *MBB = Node->getBlock();
919    Changed |= ProcessBlockPRE(DT, MBB);
920
921  } while (!BBs.empty());
922
923  return Changed;
924}
925
926bool MachineCSE::isProfitableToHoistInto(MachineBasicBlock *CandidateBB,
927                                         MachineBasicBlock *MBB,
928                                         MachineBasicBlock *MBB1) {
929  if (CandidateBB->getParent()->getFunction().hasMinSize())
930    return true;
931  assert(DT->dominates(CandidateBB, MBB) && "CandidateBB should dominate MBB");
932  assert(DT->dominates(CandidateBB, MBB1) &&
933         "CandidateBB should dominate MBB1");
934  return MBFI->getBlockFreq(CandidateBB) <=
935         MBFI->getBlockFreq(MBB) + MBFI->getBlockFreq(MBB1);
936}
937
938bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
939  if (skipFunction(MF.getFunction()))
940    return false;
941
942  TII = MF.getSubtarget().getInstrInfo();
943  TRI = MF.getSubtarget().getRegisterInfo();
944  MRI = &MF.getRegInfo();
945  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
946  DT = &getAnalysis<MachineDominatorTree>();
947  MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
948  LookAheadLimit = TII->getMachineCSELookAheadLimit();
949  bool ChangedPRE, ChangedCSE;
950  ChangedPRE = PerformSimplePRE(DT);
951  ChangedCSE = PerformCSE(DT->getRootNode());
952  return ChangedPRE || ChangedCSE;
953}
954