1//===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass performs a simple dominator tree walk that eliminates trivially
11// redundant instructions.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/Transforms/Scalar/EarlyCSE.h"
16#include "llvm/ADT/Hashing.h"
17#include "llvm/ADT/ScopedHashTable.h"
18#include "llvm/ADT/Statistic.h"
19#include "llvm/Analysis/GlobalsModRef.h"
20#include "llvm/Analysis/AssumptionCache.h"
21#include "llvm/Analysis/InstructionSimplify.h"
22#include "llvm/Analysis/TargetLibraryInfo.h"
23#include "llvm/Analysis/TargetTransformInfo.h"
24#include "llvm/IR/DataLayout.h"
25#include "llvm/IR/Dominators.h"
26#include "llvm/IR/Instructions.h"
27#include "llvm/IR/IntrinsicInst.h"
28#include "llvm/IR/PatternMatch.h"
29#include "llvm/Pass.h"
30#include "llvm/Support/Debug.h"
31#include "llvm/Support/RecyclingAllocator.h"
32#include "llvm/Support/raw_ostream.h"
33#include "llvm/Transforms/Scalar.h"
34#include "llvm/Transforms/Utils/Local.h"
35#include <deque>
36using namespace llvm;
37using namespace llvm::PatternMatch;
38
39#define DEBUG_TYPE "early-cse"
40
41STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
42STATISTIC(NumCSE,      "Number of instructions CSE'd");
43STATISTIC(NumCSELoad,  "Number of load instructions CSE'd");
44STATISTIC(NumCSECall,  "Number of call instructions CSE'd");
45STATISTIC(NumDSE,      "Number of trivial dead stores removed");
46
47//===----------------------------------------------------------------------===//
48// SimpleValue
49//===----------------------------------------------------------------------===//
50
51namespace {
52/// \brief Struct representing the available values in the scoped hash table.
53struct SimpleValue {
54  Instruction *Inst;
55
56  SimpleValue(Instruction *I) : Inst(I) {
57    assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
58  }
59
60  bool isSentinel() const {
61    return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
62           Inst == DenseMapInfo<Instruction *>::getTombstoneKey();
63  }
64
65  static bool canHandle(Instruction *Inst) {
66    // This can only handle non-void readnone functions.
67    if (CallInst *CI = dyn_cast<CallInst>(Inst))
68      return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy();
69    return isa<CastInst>(Inst) || isa<BinaryOperator>(Inst) ||
70           isa<GetElementPtrInst>(Inst) || isa<CmpInst>(Inst) ||
71           isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
72           isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
73           isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst);
74  }
75};
76}
77
78namespace llvm {
79template <> struct DenseMapInfo<SimpleValue> {
80  static inline SimpleValue getEmptyKey() {
81    return DenseMapInfo<Instruction *>::getEmptyKey();
82  }
83  static inline SimpleValue getTombstoneKey() {
84    return DenseMapInfo<Instruction *>::getTombstoneKey();
85  }
86  static unsigned getHashValue(SimpleValue Val);
87  static bool isEqual(SimpleValue LHS, SimpleValue RHS);
88};
89}
90
91unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
92  Instruction *Inst = Val.Inst;
93  // Hash in all of the operands as pointers.
94  if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst)) {
95    Value *LHS = BinOp->getOperand(0);
96    Value *RHS = BinOp->getOperand(1);
97    if (BinOp->isCommutative() && BinOp->getOperand(0) > BinOp->getOperand(1))
98      std::swap(LHS, RHS);
99
100    if (isa<OverflowingBinaryOperator>(BinOp)) {
101      // Hash the overflow behavior
102      unsigned Overflow =
103          BinOp->hasNoSignedWrap() * OverflowingBinaryOperator::NoSignedWrap |
104          BinOp->hasNoUnsignedWrap() *
105              OverflowingBinaryOperator::NoUnsignedWrap;
106      return hash_combine(BinOp->getOpcode(), Overflow, LHS, RHS);
107    }
108
109    return hash_combine(BinOp->getOpcode(), LHS, RHS);
110  }
111
112  if (CmpInst *CI = dyn_cast<CmpInst>(Inst)) {
113    Value *LHS = CI->getOperand(0);
114    Value *RHS = CI->getOperand(1);
115    CmpInst::Predicate Pred = CI->getPredicate();
116    if (Inst->getOperand(0) > Inst->getOperand(1)) {
117      std::swap(LHS, RHS);
118      Pred = CI->getSwappedPredicate();
119    }
120    return hash_combine(Inst->getOpcode(), Pred, LHS, RHS);
121  }
122
123  if (CastInst *CI = dyn_cast<CastInst>(Inst))
124    return hash_combine(CI->getOpcode(), CI->getType(), CI->getOperand(0));
125
126  if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst))
127    return hash_combine(EVI->getOpcode(), EVI->getOperand(0),
128                        hash_combine_range(EVI->idx_begin(), EVI->idx_end()));
129
130  if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst))
131    return hash_combine(IVI->getOpcode(), IVI->getOperand(0),
132                        IVI->getOperand(1),
133                        hash_combine_range(IVI->idx_begin(), IVI->idx_end()));
134
135  assert((isa<CallInst>(Inst) || isa<BinaryOperator>(Inst) ||
136          isa<GetElementPtrInst>(Inst) || isa<SelectInst>(Inst) ||
137          isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
138          isa<ShuffleVectorInst>(Inst)) &&
139         "Invalid/unknown instruction");
140
141  // Mix in the opcode.
142  return hash_combine(
143      Inst->getOpcode(),
144      hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
145}
146
147bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
148  Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
149
150  if (LHS.isSentinel() || RHS.isSentinel())
151    return LHSI == RHSI;
152
153  if (LHSI->getOpcode() != RHSI->getOpcode())
154    return false;
155  if (LHSI->isIdenticalTo(RHSI))
156    return true;
157
158  // If we're not strictly identical, we still might be a commutable instruction
159  if (BinaryOperator *LHSBinOp = dyn_cast<BinaryOperator>(LHSI)) {
160    if (!LHSBinOp->isCommutative())
161      return false;
162
163    assert(isa<BinaryOperator>(RHSI) &&
164           "same opcode, but different instruction type?");
165    BinaryOperator *RHSBinOp = cast<BinaryOperator>(RHSI);
166
167    // Check overflow attributes
168    if (isa<OverflowingBinaryOperator>(LHSBinOp)) {
169      assert(isa<OverflowingBinaryOperator>(RHSBinOp) &&
170             "same opcode, but different operator type?");
171      if (LHSBinOp->hasNoUnsignedWrap() != RHSBinOp->hasNoUnsignedWrap() ||
172          LHSBinOp->hasNoSignedWrap() != RHSBinOp->hasNoSignedWrap())
173        return false;
174    }
175
176    // Commuted equality
177    return LHSBinOp->getOperand(0) == RHSBinOp->getOperand(1) &&
178           LHSBinOp->getOperand(1) == RHSBinOp->getOperand(0);
179  }
180  if (CmpInst *LHSCmp = dyn_cast<CmpInst>(LHSI)) {
181    assert(isa<CmpInst>(RHSI) &&
182           "same opcode, but different instruction type?");
183    CmpInst *RHSCmp = cast<CmpInst>(RHSI);
184    // Commuted equality
185    return LHSCmp->getOperand(0) == RHSCmp->getOperand(1) &&
186           LHSCmp->getOperand(1) == RHSCmp->getOperand(0) &&
187           LHSCmp->getSwappedPredicate() == RHSCmp->getPredicate();
188  }
189
190  return false;
191}
192
193//===----------------------------------------------------------------------===//
194// CallValue
195//===----------------------------------------------------------------------===//
196
197namespace {
198/// \brief Struct representing the available call values in the scoped hash
199/// table.
200struct CallValue {
201  Instruction *Inst;
202
203  CallValue(Instruction *I) : Inst(I) {
204    assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
205  }
206
207  bool isSentinel() const {
208    return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
209           Inst == DenseMapInfo<Instruction *>::getTombstoneKey();
210  }
211
212  static bool canHandle(Instruction *Inst) {
213    // Don't value number anything that returns void.
214    if (Inst->getType()->isVoidTy())
215      return false;
216
217    CallInst *CI = dyn_cast<CallInst>(Inst);
218    if (!CI || !CI->onlyReadsMemory())
219      return false;
220    return true;
221  }
222};
223}
224
225namespace llvm {
226template <> struct DenseMapInfo<CallValue> {
227  static inline CallValue getEmptyKey() {
228    return DenseMapInfo<Instruction *>::getEmptyKey();
229  }
230  static inline CallValue getTombstoneKey() {
231    return DenseMapInfo<Instruction *>::getTombstoneKey();
232  }
233  static unsigned getHashValue(CallValue Val);
234  static bool isEqual(CallValue LHS, CallValue RHS);
235};
236}
237
238unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
239  Instruction *Inst = Val.Inst;
240  // Hash all of the operands as pointers and mix in the opcode.
241  return hash_combine(
242      Inst->getOpcode(),
243      hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
244}
245
246bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
247  Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
248  if (LHS.isSentinel() || RHS.isSentinel())
249    return LHSI == RHSI;
250  return LHSI->isIdenticalTo(RHSI);
251}
252
253//===----------------------------------------------------------------------===//
254// EarlyCSE implementation
255//===----------------------------------------------------------------------===//
256
257namespace {
258/// \brief A simple and fast domtree-based CSE pass.
259///
260/// This pass does a simple depth-first walk over the dominator tree,
261/// eliminating trivially redundant instructions and using instsimplify to
262/// canonicalize things as it goes. It is intended to be fast and catch obvious
263/// cases so that instcombine and other passes are more effective. It is
264/// expected that a later pass of GVN will catch the interesting/hard cases.
265class EarlyCSE {
266public:
267  const TargetLibraryInfo &TLI;
268  const TargetTransformInfo &TTI;
269  DominatorTree &DT;
270  AssumptionCache &AC;
271  typedef RecyclingAllocator<
272      BumpPtrAllocator, ScopedHashTableVal<SimpleValue, Value *>> AllocatorTy;
273  typedef ScopedHashTable<SimpleValue, Value *, DenseMapInfo<SimpleValue>,
274                          AllocatorTy> ScopedHTType;
275
276  /// \brief A scoped hash table of the current values of all of our simple
277  /// scalar expressions.
278  ///
279  /// As we walk down the domtree, we look to see if instructions are in this:
280  /// if so, we replace them with what we find, otherwise we insert them so
281  /// that dominated values can succeed in their lookup.
282  ScopedHTType AvailableValues;
283
284  /// A scoped hash table of the current values of previously encounted memory
285  /// locations.
286  ///
287  /// This allows us to get efficient access to dominating loads or stores when
288  /// we have a fully redundant load.  In addition to the most recent load, we
289  /// keep track of a generation count of the read, which is compared against
290  /// the current generation count.  The current generation count is incremented
291  /// after every possibly writing memory operation, which ensures that we only
292  /// CSE loads with other loads that have no intervening store.  Ordering
293  /// events (such as fences or atomic instructions) increment the generation
294  /// count as well; essentially, we model these as writes to all possible
295  /// locations.  Note that atomic and/or volatile loads and stores can be
296  /// present the table; it is the responsibility of the consumer to inspect
297  /// the atomicity/volatility if needed.
298  struct LoadValue {
299    Value *Data;
300    unsigned Generation;
301    int MatchingId;
302    bool IsAtomic;
303    LoadValue()
304      : Data(nullptr), Generation(0), MatchingId(-1), IsAtomic(false) {}
305    LoadValue(Value *Data, unsigned Generation, unsigned MatchingId,
306              bool IsAtomic)
307      : Data(Data), Generation(Generation), MatchingId(MatchingId),
308        IsAtomic(IsAtomic) {}
309  };
310  typedef RecyclingAllocator<BumpPtrAllocator,
311                             ScopedHashTableVal<Value *, LoadValue>>
312      LoadMapAllocator;
313  typedef ScopedHashTable<Value *, LoadValue, DenseMapInfo<Value *>,
314                          LoadMapAllocator> LoadHTType;
315  LoadHTType AvailableLoads;
316
317  /// \brief A scoped hash table of the current values of read-only call
318  /// values.
319  ///
320  /// It uses the same generation count as loads.
321  typedef ScopedHashTable<CallValue, std::pair<Value *, unsigned>> CallHTType;
322  CallHTType AvailableCalls;
323
324  /// \brief This is the current generation of the memory value.
325  unsigned CurrentGeneration;
326
327  /// \brief Set up the EarlyCSE runner for a particular function.
328  EarlyCSE(const TargetLibraryInfo &TLI, const TargetTransformInfo &TTI,
329           DominatorTree &DT, AssumptionCache &AC)
330      : TLI(TLI), TTI(TTI), DT(DT), AC(AC), CurrentGeneration(0) {}
331
332  bool run();
333
334private:
335  // Almost a POD, but needs to call the constructors for the scoped hash
336  // tables so that a new scope gets pushed on. These are RAII so that the
337  // scope gets popped when the NodeScope is destroyed.
338  class NodeScope {
339  public:
340    NodeScope(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
341              CallHTType &AvailableCalls)
342        : Scope(AvailableValues), LoadScope(AvailableLoads),
343          CallScope(AvailableCalls) {}
344
345  private:
346    NodeScope(const NodeScope &) = delete;
347    void operator=(const NodeScope &) = delete;
348
349    ScopedHTType::ScopeTy Scope;
350    LoadHTType::ScopeTy LoadScope;
351    CallHTType::ScopeTy CallScope;
352  };
353
354  // Contains all the needed information to create a stack for doing a depth
355  // first tranversal of the tree. This includes scopes for values, loads, and
356  // calls as well as the generation. There is a child iterator so that the
357  // children do not need to be store spearately.
358  class StackNode {
359  public:
360    StackNode(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
361              CallHTType &AvailableCalls, unsigned cg, DomTreeNode *n,
362              DomTreeNode::iterator child, DomTreeNode::iterator end)
363        : CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child),
364          EndIter(end), Scopes(AvailableValues, AvailableLoads, AvailableCalls),
365          Processed(false) {}
366
367    // Accessors.
368    unsigned currentGeneration() { return CurrentGeneration; }
369    unsigned childGeneration() { return ChildGeneration; }
370    void childGeneration(unsigned generation) { ChildGeneration = generation; }
371    DomTreeNode *node() { return Node; }
372    DomTreeNode::iterator childIter() { return ChildIter; }
373    DomTreeNode *nextChild() {
374      DomTreeNode *child = *ChildIter;
375      ++ChildIter;
376      return child;
377    }
378    DomTreeNode::iterator end() { return EndIter; }
379    bool isProcessed() { return Processed; }
380    void process() { Processed = true; }
381
382  private:
383    StackNode(const StackNode &) = delete;
384    void operator=(const StackNode &) = delete;
385
386    // Members.
387    unsigned CurrentGeneration;
388    unsigned ChildGeneration;
389    DomTreeNode *Node;
390    DomTreeNode::iterator ChildIter;
391    DomTreeNode::iterator EndIter;
392    NodeScope Scopes;
393    bool Processed;
394  };
395
396  /// \brief Wrapper class to handle memory instructions, including loads,
397  /// stores and intrinsic loads and stores defined by the target.
398  class ParseMemoryInst {
399  public:
400    ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI)
401      : IsTargetMemInst(false), Inst(Inst) {
402      if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
403        if (TTI.getTgtMemIntrinsic(II, Info) && Info.NumMemRefs == 1)
404          IsTargetMemInst = true;
405    }
406    bool isLoad() const {
407      if (IsTargetMemInst) return Info.ReadMem;
408      return isa<LoadInst>(Inst);
409    }
410    bool isStore() const {
411      if (IsTargetMemInst) return Info.WriteMem;
412      return isa<StoreInst>(Inst);
413    }
414    bool isAtomic() const {
415      if (IsTargetMemInst) {
416        assert(Info.IsSimple && "need to refine IsSimple in TTI");
417        return false;
418      }
419      return Inst->isAtomic();
420    }
421    bool isUnordered() const {
422      if (IsTargetMemInst) {
423        assert(Info.IsSimple && "need to refine IsSimple in TTI");
424        return true;
425      }
426      if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
427        return LI->isUnordered();
428      } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
429        return SI->isUnordered();
430      }
431      // Conservative answer
432      return !Inst->isAtomic();
433    }
434
435    bool isVolatile() const {
436      if (IsTargetMemInst) {
437        assert(Info.IsSimple && "need to refine IsSimple in TTI");
438        return false;
439      }
440      if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
441        return LI->isVolatile();
442      } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
443        return SI->isVolatile();
444      }
445      // Conservative answer
446      return true;
447    }
448
449
450    bool isMatchingMemLoc(const ParseMemoryInst &Inst) const {
451      return (getPointerOperand() == Inst.getPointerOperand() &&
452              getMatchingId() == Inst.getMatchingId());
453    }
454    bool isValid() const { return getPointerOperand() != nullptr; }
455
456    // For regular (non-intrinsic) loads/stores, this is set to -1. For
457    // intrinsic loads/stores, the id is retrieved from the corresponding
458    // field in the MemIntrinsicInfo structure.  That field contains
459    // non-negative values only.
460    int getMatchingId() const {
461      if (IsTargetMemInst) return Info.MatchingId;
462      return -1;
463    }
464    Value *getPointerOperand() const {
465      if (IsTargetMemInst) return Info.PtrVal;
466      if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
467        return LI->getPointerOperand();
468      } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
469        return SI->getPointerOperand();
470      }
471      return nullptr;
472    }
473    bool mayReadFromMemory() const {
474      if (IsTargetMemInst) return Info.ReadMem;
475      return Inst->mayReadFromMemory();
476    }
477    bool mayWriteToMemory() const {
478      if (IsTargetMemInst) return Info.WriteMem;
479      return Inst->mayWriteToMemory();
480    }
481
482  private:
483    bool IsTargetMemInst;
484    MemIntrinsicInfo Info;
485    Instruction *Inst;
486  };
487
488  bool processNode(DomTreeNode *Node);
489
490  Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const {
491    if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
492      return LI;
493    else if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
494      return SI->getValueOperand();
495    assert(isa<IntrinsicInst>(Inst) && "Instruction not supported");
496    return TTI.getOrCreateResultFromMemIntrinsic(cast<IntrinsicInst>(Inst),
497                                                 ExpectedType);
498  }
499};
500}
501
502bool EarlyCSE::processNode(DomTreeNode *Node) {
503  BasicBlock *BB = Node->getBlock();
504
505  // If this block has a single predecessor, then the predecessor is the parent
506  // of the domtree node and all of the live out memory values are still current
507  // in this block.  If this block has multiple predecessors, then they could
508  // have invalidated the live-out memory values of our parent value.  For now,
509  // just be conservative and invalidate memory if this block has multiple
510  // predecessors.
511  if (!BB->getSinglePredecessor())
512    ++CurrentGeneration;
513
514  // If this node has a single predecessor which ends in a conditional branch,
515  // we can infer the value of the branch condition given that we took this
516  // path.  We need the single predeccesor to ensure there's not another path
517  // which reaches this block where the condition might hold a different
518  // value.  Since we're adding this to the scoped hash table (like any other
519  // def), it will have been popped if we encounter a future merge block.
520  if (BasicBlock *Pred = BB->getSinglePredecessor())
521    if (auto *BI = dyn_cast<BranchInst>(Pred->getTerminator()))
522      if (BI->isConditional())
523        if (auto *CondInst = dyn_cast<Instruction>(BI->getCondition()))
524          if (SimpleValue::canHandle(CondInst)) {
525            assert(BI->getSuccessor(0) == BB || BI->getSuccessor(1) == BB);
526            auto *ConditionalConstant = (BI->getSuccessor(0) == BB) ?
527              ConstantInt::getTrue(BB->getContext()) :
528              ConstantInt::getFalse(BB->getContext());
529            AvailableValues.insert(CondInst, ConditionalConstant);
530            DEBUG(dbgs() << "EarlyCSE CVP: Add conditional value for '"
531                  << CondInst->getName() << "' as " << *ConditionalConstant
532                  << " in " << BB->getName() << "\n");
533            // Replace all dominated uses with the known value
534            replaceDominatedUsesWith(CondInst, ConditionalConstant, DT,
535                                     BasicBlockEdge(Pred, BB));
536          }
537
538  /// LastStore - Keep track of the last non-volatile store that we saw... for
539  /// as long as there in no instruction that reads memory.  If we see a store
540  /// to the same location, we delete the dead store.  This zaps trivial dead
541  /// stores which can occur in bitfield code among other things.
542  Instruction *LastStore = nullptr;
543
544  bool Changed = false;
545  const DataLayout &DL = BB->getModule()->getDataLayout();
546
547  // See if any instructions in the block can be eliminated.  If so, do it.  If
548  // not, add them to AvailableValues.
549  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
550    Instruction *Inst = &*I++;
551
552    // Dead instructions should just be removed.
553    if (isInstructionTriviallyDead(Inst, &TLI)) {
554      DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n');
555      Inst->eraseFromParent();
556      Changed = true;
557      ++NumSimplify;
558      continue;
559    }
560
561    // Skip assume intrinsics, they don't really have side effects (although
562    // they're marked as such to ensure preservation of control dependencies),
563    // and this pass will not disturb any of the assumption's control
564    // dependencies.
565    if (match(Inst, m_Intrinsic<Intrinsic::assume>())) {
566      DEBUG(dbgs() << "EarlyCSE skipping assumption: " << *Inst << '\n');
567      continue;
568    }
569
570    // If the instruction can be simplified (e.g. X+0 = X) then replace it with
571    // its simpler value.
572    if (Value *V = SimplifyInstruction(Inst, DL, &TLI, &DT, &AC)) {
573      DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << "  to: " << *V << '\n');
574      Inst->replaceAllUsesWith(V);
575      Inst->eraseFromParent();
576      Changed = true;
577      ++NumSimplify;
578      continue;
579    }
580
581    // If this is a simple instruction that we can value number, process it.
582    if (SimpleValue::canHandle(Inst)) {
583      // See if the instruction has an available value.  If so, use it.
584      if (Value *V = AvailableValues.lookup(Inst)) {
585        DEBUG(dbgs() << "EarlyCSE CSE: " << *Inst << "  to: " << *V << '\n');
586        Inst->replaceAllUsesWith(V);
587        Inst->eraseFromParent();
588        Changed = true;
589        ++NumCSE;
590        continue;
591      }
592
593      // Otherwise, just remember that this value is available.
594      AvailableValues.insert(Inst, Inst);
595      continue;
596    }
597
598    ParseMemoryInst MemInst(Inst, TTI);
599    // If this is a non-volatile load, process it.
600    if (MemInst.isValid() && MemInst.isLoad()) {
601      // (conservatively) we can't peak past the ordering implied by this
602      // operation, but we can add this load to our set of available values
603      if (MemInst.isVolatile() || !MemInst.isUnordered()) {
604        LastStore = nullptr;
605        ++CurrentGeneration;
606      }
607
608      // If we have an available version of this load, and if it is the right
609      // generation, replace this instruction.
610      LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
611      if (InVal.Data != nullptr && InVal.Generation == CurrentGeneration &&
612          InVal.MatchingId == MemInst.getMatchingId() &&
613          // We don't yet handle removing loads with ordering of any kind.
614          !MemInst.isVolatile() && MemInst.isUnordered() &&
615          // We can't replace an atomic load with one which isn't also atomic.
616          InVal.IsAtomic >= MemInst.isAtomic()) {
617        Value *Op = getOrCreateResult(InVal.Data, Inst->getType());
618        if (Op != nullptr) {
619          DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst
620                       << "  to: " << *InVal.Data << '\n');
621          if (!Inst->use_empty())
622            Inst->replaceAllUsesWith(Op);
623          Inst->eraseFromParent();
624          Changed = true;
625          ++NumCSELoad;
626          continue;
627        }
628      }
629
630      // Otherwise, remember that we have this instruction.
631      AvailableLoads.insert(
632          MemInst.getPointerOperand(),
633          LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
634                    MemInst.isAtomic()));
635      LastStore = nullptr;
636      continue;
637    }
638
639    // If this instruction may read from memory, forget LastStore.
640    // Load/store intrinsics will indicate both a read and a write to
641    // memory.  The target may override this (e.g. so that a store intrinsic
642    // does not read  from memory, and thus will be treated the same as a
643    // regular store for commoning purposes).
644    if (Inst->mayReadFromMemory() &&
645        !(MemInst.isValid() && !MemInst.mayReadFromMemory()))
646      LastStore = nullptr;
647
648    // If this is a read-only call, process it.
649    if (CallValue::canHandle(Inst)) {
650      // If we have an available version of this call, and if it is the right
651      // generation, replace this instruction.
652      std::pair<Value *, unsigned> InVal = AvailableCalls.lookup(Inst);
653      if (InVal.first != nullptr && InVal.second == CurrentGeneration) {
654        DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst
655                     << "  to: " << *InVal.first << '\n');
656        if (!Inst->use_empty())
657          Inst->replaceAllUsesWith(InVal.first);
658        Inst->eraseFromParent();
659        Changed = true;
660        ++NumCSECall;
661        continue;
662      }
663
664      // Otherwise, remember that we have this instruction.
665      AvailableCalls.insert(
666          Inst, std::pair<Value *, unsigned>(Inst, CurrentGeneration));
667      continue;
668    }
669
670    // A release fence requires that all stores complete before it, but does
671    // not prevent the reordering of following loads 'before' the fence.  As a
672    // result, we don't need to consider it as writing to memory and don't need
673    // to advance the generation.  We do need to prevent DSE across the fence,
674    // but that's handled above.
675    if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
676      if (FI->getOrdering() == Release) {
677        assert(Inst->mayReadFromMemory() && "relied on to prevent DSE above");
678        continue;
679      }
680
681    // write back DSE - If we write back the same value we just loaded from
682    // the same location and haven't passed any intervening writes or ordering
683    // operations, we can remove the write.  The primary benefit is in allowing
684    // the available load table to remain valid and value forward past where
685    // the store originally was.
686    if (MemInst.isValid() && MemInst.isStore()) {
687      LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
688      if (InVal.Data &&
689          InVal.Data == getOrCreateResult(Inst, InVal.Data->getType()) &&
690          InVal.Generation == CurrentGeneration &&
691          InVal.MatchingId == MemInst.getMatchingId() &&
692          // We don't yet handle removing stores with ordering of any kind.
693          !MemInst.isVolatile() && MemInst.isUnordered()) {
694        assert((!LastStore ||
695                ParseMemoryInst(LastStore, TTI).getPointerOperand() ==
696                MemInst.getPointerOperand()) &&
697               "can't have an intervening store!");
698        DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << *Inst << '\n');
699        Inst->eraseFromParent();
700        Changed = true;
701        ++NumDSE;
702        // We can avoid incrementing the generation count since we were able
703        // to eliminate this store.
704        continue;
705      }
706    }
707
708    // Okay, this isn't something we can CSE at all.  Check to see if it is
709    // something that could modify memory.  If so, our available memory values
710    // cannot be used so bump the generation count.
711    if (Inst->mayWriteToMemory()) {
712      ++CurrentGeneration;
713
714      if (MemInst.isValid() && MemInst.isStore()) {
715        // We do a trivial form of DSE if there are two stores to the same
716        // location with no intervening loads.  Delete the earlier store.
717        // At the moment, we don't remove ordered stores, but do remove
718        // unordered atomic stores.  There's no special requirement (for
719        // unordered atomics) about removing atomic stores only in favor of
720        // other atomic stores since we we're going to execute the non-atomic
721        // one anyway and the atomic one might never have become visible.
722        if (LastStore) {
723          ParseMemoryInst LastStoreMemInst(LastStore, TTI);
724          assert(LastStoreMemInst.isUnordered() &&
725                 !LastStoreMemInst.isVolatile() &&
726                 "Violated invariant");
727          if (LastStoreMemInst.isMatchingMemLoc(MemInst)) {
728            DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
729                         << "  due to: " << *Inst << '\n');
730            LastStore->eraseFromParent();
731            Changed = true;
732            ++NumDSE;
733            LastStore = nullptr;
734          }
735          // fallthrough - we can exploit information about this store
736        }
737
738        // Okay, we just invalidated anything we knew about loaded values.  Try
739        // to salvage *something* by remembering that the stored value is a live
740        // version of the pointer.  It is safe to forward from volatile stores
741        // to non-volatile loads, so we don't have to check for volatility of
742        // the store.
743        AvailableLoads.insert(
744            MemInst.getPointerOperand(),
745            LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
746                      MemInst.isAtomic()));
747
748        // Remember that this was the last unordered store we saw for DSE. We
749        // don't yet handle DSE on ordered or volatile stores since we don't
750        // have a good way to model the ordering requirement for following
751        // passes  once the store is removed.  We could insert a fence, but
752        // since fences are slightly stronger than stores in their ordering,
753        // it's not clear this is a profitable transform. Another option would
754        // be to merge the ordering with that of the post dominating store.
755        if (MemInst.isUnordered() && !MemInst.isVolatile())
756          LastStore = Inst;
757        else
758          LastStore = nullptr;
759      }
760    }
761  }
762
763  return Changed;
764}
765
766bool EarlyCSE::run() {
767  // Note, deque is being used here because there is significant performance
768  // gains over vector when the container becomes very large due to the
769  // specific access patterns. For more information see the mailing list
770  // discussion on this:
771  // http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html
772  std::deque<StackNode *> nodesToProcess;
773
774  bool Changed = false;
775
776  // Process the root node.
777  nodesToProcess.push_back(new StackNode(
778      AvailableValues, AvailableLoads, AvailableCalls, CurrentGeneration,
779      DT.getRootNode(), DT.getRootNode()->begin(), DT.getRootNode()->end()));
780
781  // Save the current generation.
782  unsigned LiveOutGeneration = CurrentGeneration;
783
784  // Process the stack.
785  while (!nodesToProcess.empty()) {
786    // Grab the first item off the stack. Set the current generation, remove
787    // the node from the stack, and process it.
788    StackNode *NodeToProcess = nodesToProcess.back();
789
790    // Initialize class members.
791    CurrentGeneration = NodeToProcess->currentGeneration();
792
793    // Check if the node needs to be processed.
794    if (!NodeToProcess->isProcessed()) {
795      // Process the node.
796      Changed |= processNode(NodeToProcess->node());
797      NodeToProcess->childGeneration(CurrentGeneration);
798      NodeToProcess->process();
799    } else if (NodeToProcess->childIter() != NodeToProcess->end()) {
800      // Push the next child onto the stack.
801      DomTreeNode *child = NodeToProcess->nextChild();
802      nodesToProcess.push_back(
803          new StackNode(AvailableValues, AvailableLoads, AvailableCalls,
804                        NodeToProcess->childGeneration(), child, child->begin(),
805                        child->end()));
806    } else {
807      // It has been processed, and there are no more children to process,
808      // so delete it and pop it off the stack.
809      delete NodeToProcess;
810      nodesToProcess.pop_back();
811    }
812  } // while (!nodes...)
813
814  // Reset the current generation.
815  CurrentGeneration = LiveOutGeneration;
816
817  return Changed;
818}
819
820PreservedAnalyses EarlyCSEPass::run(Function &F,
821                                    AnalysisManager<Function> *AM) {
822  auto &TLI = AM->getResult<TargetLibraryAnalysis>(F);
823  auto &TTI = AM->getResult<TargetIRAnalysis>(F);
824  auto &DT = AM->getResult<DominatorTreeAnalysis>(F);
825  auto &AC = AM->getResult<AssumptionAnalysis>(F);
826
827  EarlyCSE CSE(TLI, TTI, DT, AC);
828
829  if (!CSE.run())
830    return PreservedAnalyses::all();
831
832  // CSE preserves the dominator tree because it doesn't mutate the CFG.
833  // FIXME: Bundle this with other CFG-preservation.
834  PreservedAnalyses PA;
835  PA.preserve<DominatorTreeAnalysis>();
836  return PA;
837}
838
839namespace {
840/// \brief A simple and fast domtree-based CSE pass.
841///
842/// This pass does a simple depth-first walk over the dominator tree,
843/// eliminating trivially redundant instructions and using instsimplify to
844/// canonicalize things as it goes. It is intended to be fast and catch obvious
845/// cases so that instcombine and other passes are more effective. It is
846/// expected that a later pass of GVN will catch the interesting/hard cases.
847class EarlyCSELegacyPass : public FunctionPass {
848public:
849  static char ID;
850
851  EarlyCSELegacyPass() : FunctionPass(ID) {
852    initializeEarlyCSELegacyPassPass(*PassRegistry::getPassRegistry());
853  }
854
855  bool runOnFunction(Function &F) override {
856    if (skipOptnoneFunction(F))
857      return false;
858
859    auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
860    auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
861    auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
862    auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
863
864    EarlyCSE CSE(TLI, TTI, DT, AC);
865
866    return CSE.run();
867  }
868
869  void getAnalysisUsage(AnalysisUsage &AU) const override {
870    AU.addRequired<AssumptionCacheTracker>();
871    AU.addRequired<DominatorTreeWrapperPass>();
872    AU.addRequired<TargetLibraryInfoWrapperPass>();
873    AU.addRequired<TargetTransformInfoWrapperPass>();
874    AU.addPreserved<GlobalsAAWrapperPass>();
875    AU.setPreservesCFG();
876  }
877};
878}
879
880char EarlyCSELegacyPass::ID = 0;
881
882FunctionPass *llvm::createEarlyCSEPass() { return new EarlyCSELegacyPass(); }
883
884INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass, "early-cse", "Early CSE", false,
885                      false)
886INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
887INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
888INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
889INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
890INITIALIZE_PASS_END(EarlyCSELegacyPass, "early-cse", "Early CSE", false, false)
891