1//===- GVNSink.cpp - sink expressions into successors ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file GVNSink.cpp
10/// This pass attempts to sink instructions into successors, reducing static
11/// instruction count and enabling if-conversion.
12///
13/// We use a variant of global value numbering to decide what can be sunk.
14/// Consider:
15///
16/// [ %a1 = add i32 %b, 1  ]   [ %c1 = add i32 %d, 1  ]
17/// [ %a2 = xor i32 %a1, 1 ]   [ %c2 = xor i32 %c1, 1 ]
18///                  \           /
19///            [ %e = phi i32 %a2, %c2 ]
20///            [ add i32 %e, 4         ]
21///
22///
23/// GVN would number %a1 and %c1 differently because they compute different
24/// results - the VN of an instruction is a function of its opcode and the
25/// transitive closure of its operands. This is the key property for hoisting
26/// and CSE.
27///
28/// What we want when sinking however is for a numbering that is a function of
29/// the *uses* of an instruction, which allows us to answer the question "if I
30/// replace %a1 with %c1, will it contribute in an equivalent way to all
31/// successive instructions?". The PostValueTable class in GVN provides this
32/// mapping.
33//
34//===----------------------------------------------------------------------===//
35
36#include "llvm/ADT/ArrayRef.h"
37#include "llvm/ADT/DenseMap.h"
38#include "llvm/ADT/DenseMapInfo.h"
39#include "llvm/ADT/DenseSet.h"
40#include "llvm/ADT/Hashing.h"
41#include "llvm/ADT/None.h"
42#include "llvm/ADT/Optional.h"
43#include "llvm/ADT/PostOrderIterator.h"
44#include "llvm/ADT/STLExtras.h"
45#include "llvm/ADT/SmallPtrSet.h"
46#include "llvm/ADT/SmallVector.h"
47#include "llvm/ADT/Statistic.h"
48#include "llvm/ADT/StringExtras.h"
49#include "llvm/Analysis/GlobalsModRef.h"
50#include "llvm/IR/BasicBlock.h"
51#include "llvm/IR/CFG.h"
52#include "llvm/IR/Constants.h"
53#include "llvm/IR/Function.h"
54#include "llvm/IR/InstrTypes.h"
55#include "llvm/IR/Instruction.h"
56#include "llvm/IR/Instructions.h"
57#include "llvm/IR/PassManager.h"
58#include "llvm/IR/Type.h"
59#include "llvm/IR/Use.h"
60#include "llvm/IR/Value.h"
61#include "llvm/InitializePasses.h"
62#include "llvm/Pass.h"
63#include "llvm/Support/Allocator.h"
64#include "llvm/Support/ArrayRecycler.h"
65#include "llvm/Support/AtomicOrdering.h"
66#include "llvm/Support/Casting.h"
67#include "llvm/Support/Compiler.h"
68#include "llvm/Support/Debug.h"
69#include "llvm/Support/raw_ostream.h"
70#include "llvm/Transforms/Scalar.h"
71#include "llvm/Transforms/Scalar/GVN.h"
72#include "llvm/Transforms/Scalar/GVNExpression.h"
73#include "llvm/Transforms/Utils/BasicBlockUtils.h"
74#include "llvm/Transforms/Utils/Local.h"
75#include <algorithm>
76#include <cassert>
77#include <cstddef>
78#include <cstdint>
79#include <iterator>
80#include <utility>
81
82using namespace llvm;
83
84#define DEBUG_TYPE "gvn-sink"
85
86STATISTIC(NumRemoved, "Number of instructions removed");
87
88namespace llvm {
89namespace GVNExpression {
90
91LLVM_DUMP_METHOD void Expression::dump() const {
92  print(dbgs());
93  dbgs() << "\n";
94}
95
96} // end namespace GVNExpression
97} // end namespace llvm
98
99namespace {
100
101static bool isMemoryInst(const Instruction *I) {
102  return isa<LoadInst>(I) || isa<StoreInst>(I) ||
103         (isa<InvokeInst>(I) && !cast<InvokeInst>(I)->doesNotAccessMemory()) ||
104         (isa<CallInst>(I) && !cast<CallInst>(I)->doesNotAccessMemory());
105}
106
107/// Iterates through instructions in a set of blocks in reverse order from the
108/// first non-terminator. For example (assume all blocks have size n):
109///   LockstepReverseIterator I([B1, B2, B3]);
110///   *I-- = [B1[n], B2[n], B3[n]];
111///   *I-- = [B1[n-1], B2[n-1], B3[n-1]];
112///   *I-- = [B1[n-2], B2[n-2], B3[n-2]];
113///   ...
114///
115/// It continues until all blocks have been exhausted. Use \c getActiveBlocks()
116/// to
117/// determine which blocks are still going and the order they appear in the
118/// list returned by operator*.
119class LockstepReverseIterator {
120  ArrayRef<BasicBlock *> Blocks;
121  SmallSetVector<BasicBlock *, 4> ActiveBlocks;
122  SmallVector<Instruction *, 4> Insts;
123  bool Fail;
124
125public:
126  LockstepReverseIterator(ArrayRef<BasicBlock *> Blocks) : Blocks(Blocks) {
127    reset();
128  }
129
130  void reset() {
131    Fail = false;
132    ActiveBlocks.clear();
133    for (BasicBlock *BB : Blocks)
134      ActiveBlocks.insert(BB);
135    Insts.clear();
136    for (BasicBlock *BB : Blocks) {
137      if (BB->size() <= 1) {
138        // Block wasn't big enough - only contained a terminator.
139        ActiveBlocks.remove(BB);
140        continue;
141      }
142      Insts.push_back(BB->getTerminator()->getPrevNode());
143    }
144    if (Insts.empty())
145      Fail = true;
146  }
147
148  bool isValid() const { return !Fail; }
149  ArrayRef<Instruction *> operator*() const { return Insts; }
150
151  // Note: This needs to return a SmallSetVector as the elements of
152  // ActiveBlocks will be later copied to Blocks using std::copy. The
153  // resultant order of elements in Blocks needs to be deterministic.
154  // Using SmallPtrSet instead causes non-deterministic order while
155  // copying. And we cannot simply sort Blocks as they need to match the
156  // corresponding Values.
157  SmallSetVector<BasicBlock *, 4> &getActiveBlocks() { return ActiveBlocks; }
158
159  void restrictToBlocks(SmallSetVector<BasicBlock *, 4> &Blocks) {
160    for (auto II = Insts.begin(); II != Insts.end();) {
161      if (std::find(Blocks.begin(), Blocks.end(), (*II)->getParent()) ==
162          Blocks.end()) {
163        ActiveBlocks.remove((*II)->getParent());
164        II = Insts.erase(II);
165      } else {
166        ++II;
167      }
168    }
169  }
170
171  void operator--() {
172    if (Fail)
173      return;
174    SmallVector<Instruction *, 4> NewInsts;
175    for (auto *Inst : Insts) {
176      if (Inst == &Inst->getParent()->front())
177        ActiveBlocks.remove(Inst->getParent());
178      else
179        NewInsts.push_back(Inst->getPrevNode());
180    }
181    if (NewInsts.empty()) {
182      Fail = true;
183      return;
184    }
185    Insts = NewInsts;
186  }
187};
188
189//===----------------------------------------------------------------------===//
190
191/// Candidate solution for sinking. There may be different ways to
192/// sink instructions, differing in the number of instructions sunk,
193/// the number of predecessors sunk from and the number of PHIs
194/// required.
195struct SinkingInstructionCandidate {
196  unsigned NumBlocks;
197  unsigned NumInstructions;
198  unsigned NumPHIs;
199  unsigned NumMemoryInsts;
200  int Cost = -1;
201  SmallVector<BasicBlock *, 4> Blocks;
202
203  void calculateCost(unsigned NumOrigPHIs, unsigned NumOrigBlocks) {
204    unsigned NumExtraPHIs = NumPHIs - NumOrigPHIs;
205    unsigned SplitEdgeCost = (NumOrigBlocks > NumBlocks) ? 2 : 0;
206    Cost = (NumInstructions * (NumBlocks - 1)) -
207           (NumExtraPHIs *
208            NumExtraPHIs) // PHIs are expensive, so make sure they're worth it.
209           - SplitEdgeCost;
210  }
211
212  bool operator>(const SinkingInstructionCandidate &Other) const {
213    return Cost > Other.Cost;
214  }
215};
216
217#ifndef NDEBUG
218raw_ostream &operator<<(raw_ostream &OS, const SinkingInstructionCandidate &C) {
219  OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks
220     << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">";
221  return OS;
222}
223#endif
224
225//===----------------------------------------------------------------------===//
226
227/// Describes a PHI node that may or may not exist. These track the PHIs
228/// that must be created if we sunk a sequence of instructions. It provides
229/// a hash function for efficient equality comparisons.
230class ModelledPHI {
231  SmallVector<Value *, 4> Values;
232  SmallVector<BasicBlock *, 4> Blocks;
233
234public:
235  ModelledPHI() = default;
236
237  ModelledPHI(const PHINode *PN) {
238    // BasicBlock comes first so we sort by basic block pointer order, then by value pointer order.
239    SmallVector<std::pair<BasicBlock *, Value *>, 4> Ops;
240    for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I)
241      Ops.push_back({PN->getIncomingBlock(I), PN->getIncomingValue(I)});
242    llvm::sort(Ops);
243    for (auto &P : Ops) {
244      Blocks.push_back(P.first);
245      Values.push_back(P.second);
246    }
247  }
248
249  /// Create a dummy ModelledPHI that will compare unequal to any other ModelledPHI
250  /// without the same ID.
251  /// \note This is specifically for DenseMapInfo - do not use this!
252  static ModelledPHI createDummy(size_t ID) {
253    ModelledPHI M;
254    M.Values.push_back(reinterpret_cast<Value*>(ID));
255    return M;
256  }
257
258  /// Create a PHI from an array of incoming values and incoming blocks.
259  template <typename VArray, typename BArray>
260  ModelledPHI(const VArray &V, const BArray &B) {
261    llvm::copy(V, std::back_inserter(Values));
262    llvm::copy(B, std::back_inserter(Blocks));
263  }
264
265  /// Create a PHI from [I[OpNum] for I in Insts].
266  template <typename BArray>
267  ModelledPHI(ArrayRef<Instruction *> Insts, unsigned OpNum, const BArray &B) {
268    llvm::copy(B, std::back_inserter(Blocks));
269    for (auto *I : Insts)
270      Values.push_back(I->getOperand(OpNum));
271  }
272
273  /// Restrict the PHI's contents down to only \c NewBlocks.
274  /// \c NewBlocks must be a subset of \c this->Blocks.
275  void restrictToBlocks(const SmallSetVector<BasicBlock *, 4> &NewBlocks) {
276    auto BI = Blocks.begin();
277    auto VI = Values.begin();
278    while (BI != Blocks.end()) {
279      assert(VI != Values.end());
280      if (std::find(NewBlocks.begin(), NewBlocks.end(), *BI) ==
281          NewBlocks.end()) {
282        BI = Blocks.erase(BI);
283        VI = Values.erase(VI);
284      } else {
285        ++BI;
286        ++VI;
287      }
288    }
289    assert(Blocks.size() == NewBlocks.size());
290  }
291
292  ArrayRef<Value *> getValues() const { return Values; }
293
294  bool areAllIncomingValuesSame() const {
295    return llvm::all_of(Values, [&](Value *V) { return V == Values[0]; });
296  }
297
298  bool areAllIncomingValuesSameType() const {
299    return llvm::all_of(
300        Values, [&](Value *V) { return V->getType() == Values[0]->getType(); });
301  }
302
303  bool areAnyIncomingValuesConstant() const {
304    return llvm::any_of(Values, [&](Value *V) { return isa<Constant>(V); });
305  }
306
307  // Hash functor
308  unsigned hash() const {
309      return (unsigned)hash_combine_range(Values.begin(), Values.end());
310  }
311
312  bool operator==(const ModelledPHI &Other) const {
313    return Values == Other.Values && Blocks == Other.Blocks;
314  }
315};
316
317template <typename ModelledPHI> struct DenseMapInfo {
318  static inline ModelledPHI &getEmptyKey() {
319    static ModelledPHI Dummy = ModelledPHI::createDummy(0);
320    return Dummy;
321  }
322
323  static inline ModelledPHI &getTombstoneKey() {
324    static ModelledPHI Dummy = ModelledPHI::createDummy(1);
325    return Dummy;
326  }
327
328  static unsigned getHashValue(const ModelledPHI &V) { return V.hash(); }
329
330  static bool isEqual(const ModelledPHI &LHS, const ModelledPHI &RHS) {
331    return LHS == RHS;
332  }
333};
334
335using ModelledPHISet = DenseSet<ModelledPHI, DenseMapInfo<ModelledPHI>>;
336
337//===----------------------------------------------------------------------===//
338//                             ValueTable
339//===----------------------------------------------------------------------===//
340// This is a value number table where the value number is a function of the
341// *uses* of a value, rather than its operands. Thus, if VN(A) == VN(B) we know
342// that the program would be equivalent if we replaced A with PHI(A, B).
343//===----------------------------------------------------------------------===//
344
345/// A GVN expression describing how an instruction is used. The operands
346/// field of BasicExpression is used to store uses, not operands.
347///
348/// This class also contains fields for discriminators used when determining
349/// equivalence of instructions with sideeffects.
350class InstructionUseExpr : public GVNExpression::BasicExpression {
351  unsigned MemoryUseOrder = -1;
352  bool Volatile = false;
353  ArrayRef<int> ShuffleMask;
354
355public:
356  InstructionUseExpr(Instruction *I, ArrayRecycler<Value *> &R,
357                     BumpPtrAllocator &A)
358      : GVNExpression::BasicExpression(I->getNumUses()) {
359    allocateOperands(R, A);
360    setOpcode(I->getOpcode());
361    setType(I->getType());
362
363    if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I))
364      ShuffleMask = SVI->getShuffleMask().copy(A);
365
366    for (auto &U : I->uses())
367      op_push_back(U.getUser());
368    llvm::sort(op_begin(), op_end());
369  }
370
371  void setMemoryUseOrder(unsigned MUO) { MemoryUseOrder = MUO; }
372  void setVolatile(bool V) { Volatile = V; }
373
374  hash_code getHashValue() const override {
375    return hash_combine(GVNExpression::BasicExpression::getHashValue(),
376                        MemoryUseOrder, Volatile, ShuffleMask);
377  }
378
379  template <typename Function> hash_code getHashValue(Function MapFn) {
380    hash_code H = hash_combine(getOpcode(), getType(), MemoryUseOrder, Volatile,
381                               ShuffleMask);
382    for (auto *V : operands())
383      H = hash_combine(H, MapFn(V));
384    return H;
385  }
386};
387
388class ValueTable {
389  DenseMap<Value *, uint32_t> ValueNumbering;
390  DenseMap<GVNExpression::Expression *, uint32_t> ExpressionNumbering;
391  DenseMap<size_t, uint32_t> HashNumbering;
392  BumpPtrAllocator Allocator;
393  ArrayRecycler<Value *> Recycler;
394  uint32_t nextValueNumber = 1;
395
396  /// Create an expression for I based on its opcode and its uses. If I
397  /// touches or reads memory, the expression is also based upon its memory
398  /// order - see \c getMemoryUseOrder().
399  InstructionUseExpr *createExpr(Instruction *I) {
400    InstructionUseExpr *E =
401        new (Allocator) InstructionUseExpr(I, Recycler, Allocator);
402    if (isMemoryInst(I))
403      E->setMemoryUseOrder(getMemoryUseOrder(I));
404
405    if (CmpInst *C = dyn_cast<CmpInst>(I)) {
406      CmpInst::Predicate Predicate = C->getPredicate();
407      E->setOpcode((C->getOpcode() << 8) | Predicate);
408    }
409    return E;
410  }
411
412  /// Helper to compute the value number for a memory instruction
413  /// (LoadInst/StoreInst), including checking the memory ordering and
414  /// volatility.
415  template <class Inst> InstructionUseExpr *createMemoryExpr(Inst *I) {
416    if (isStrongerThanUnordered(I->getOrdering()) || I->isAtomic())
417      return nullptr;
418    InstructionUseExpr *E = createExpr(I);
419    E->setVolatile(I->isVolatile());
420    return E;
421  }
422
423public:
424  ValueTable() = default;
425
426  /// Returns the value number for the specified value, assigning
427  /// it a new number if it did not have one before.
428  uint32_t lookupOrAdd(Value *V) {
429    auto VI = ValueNumbering.find(V);
430    if (VI != ValueNumbering.end())
431      return VI->second;
432
433    if (!isa<Instruction>(V)) {
434      ValueNumbering[V] = nextValueNumber;
435      return nextValueNumber++;
436    }
437
438    Instruction *I = cast<Instruction>(V);
439    InstructionUseExpr *exp = nullptr;
440    switch (I->getOpcode()) {
441    case Instruction::Load:
442      exp = createMemoryExpr(cast<LoadInst>(I));
443      break;
444    case Instruction::Store:
445      exp = createMemoryExpr(cast<StoreInst>(I));
446      break;
447    case Instruction::Call:
448    case Instruction::Invoke:
449    case Instruction::FNeg:
450    case Instruction::Add:
451    case Instruction::FAdd:
452    case Instruction::Sub:
453    case Instruction::FSub:
454    case Instruction::Mul:
455    case Instruction::FMul:
456    case Instruction::UDiv:
457    case Instruction::SDiv:
458    case Instruction::FDiv:
459    case Instruction::URem:
460    case Instruction::SRem:
461    case Instruction::FRem:
462    case Instruction::Shl:
463    case Instruction::LShr:
464    case Instruction::AShr:
465    case Instruction::And:
466    case Instruction::Or:
467    case Instruction::Xor:
468    case Instruction::ICmp:
469    case Instruction::FCmp:
470    case Instruction::Trunc:
471    case Instruction::ZExt:
472    case Instruction::SExt:
473    case Instruction::FPToUI:
474    case Instruction::FPToSI:
475    case Instruction::UIToFP:
476    case Instruction::SIToFP:
477    case Instruction::FPTrunc:
478    case Instruction::FPExt:
479    case Instruction::PtrToInt:
480    case Instruction::IntToPtr:
481    case Instruction::BitCast:
482    case Instruction::AddrSpaceCast:
483    case Instruction::Select:
484    case Instruction::ExtractElement:
485    case Instruction::InsertElement:
486    case Instruction::ShuffleVector:
487    case Instruction::InsertValue:
488    case Instruction::GetElementPtr:
489      exp = createExpr(I);
490      break;
491    default:
492      break;
493    }
494
495    if (!exp) {
496      ValueNumbering[V] = nextValueNumber;
497      return nextValueNumber++;
498    }
499
500    uint32_t e = ExpressionNumbering[exp];
501    if (!e) {
502      hash_code H = exp->getHashValue([=](Value *V) { return lookupOrAdd(V); });
503      auto I = HashNumbering.find(H);
504      if (I != HashNumbering.end()) {
505        e = I->second;
506      } else {
507        e = nextValueNumber++;
508        HashNumbering[H] = e;
509        ExpressionNumbering[exp] = e;
510      }
511    }
512    ValueNumbering[V] = e;
513    return e;
514  }
515
516  /// Returns the value number of the specified value. Fails if the value has
517  /// not yet been numbered.
518  uint32_t lookup(Value *V) const {
519    auto VI = ValueNumbering.find(V);
520    assert(VI != ValueNumbering.end() && "Value not numbered?");
521    return VI->second;
522  }
523
524  /// Removes all value numberings and resets the value table.
525  void clear() {
526    ValueNumbering.clear();
527    ExpressionNumbering.clear();
528    HashNumbering.clear();
529    Recycler.clear(Allocator);
530    nextValueNumber = 1;
531  }
532
533  /// \c Inst uses or touches memory. Return an ID describing the memory state
534  /// at \c Inst such that if getMemoryUseOrder(I1) == getMemoryUseOrder(I2),
535  /// the exact same memory operations happen after I1 and I2.
536  ///
537  /// This is a very hard problem in general, so we use domain-specific
538  /// knowledge that we only ever check for equivalence between blocks sharing a
539  /// single immediate successor that is common, and when determining if I1 ==
540  /// I2 we will have already determined that next(I1) == next(I2). This
541  /// inductive property allows us to simply return the value number of the next
542  /// instruction that defines memory.
543  uint32_t getMemoryUseOrder(Instruction *Inst) {
544    auto *BB = Inst->getParent();
545    for (auto I = std::next(Inst->getIterator()), E = BB->end();
546         I != E && !I->isTerminator(); ++I) {
547      if (!isMemoryInst(&*I))
548        continue;
549      if (isa<LoadInst>(&*I))
550        continue;
551      CallInst *CI = dyn_cast<CallInst>(&*I);
552      if (CI && CI->onlyReadsMemory())
553        continue;
554      InvokeInst *II = dyn_cast<InvokeInst>(&*I);
555      if (II && II->onlyReadsMemory())
556        continue;
557      return lookupOrAdd(&*I);
558    }
559    return 0;
560  }
561};
562
563//===----------------------------------------------------------------------===//
564
565class GVNSink {
566public:
567  GVNSink() = default;
568
569  bool run(Function &F) {
570    LLVM_DEBUG(dbgs() << "GVNSink: running on function @" << F.getName()
571                      << "\n");
572
573    unsigned NumSunk = 0;
574    ReversePostOrderTraversal<Function*> RPOT(&F);
575    for (auto *N : RPOT)
576      NumSunk += sinkBB(N);
577
578    return NumSunk > 0;
579  }
580
581private:
582  ValueTable VN;
583
584  bool shouldAvoidSinkingInstruction(Instruction *I) {
585    // These instructions may change or break semantics if moved.
586    if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
587        I->getType()->isTokenTy())
588      return true;
589    return false;
590  }
591
592  /// The main heuristic function. Analyze the set of instructions pointed to by
593  /// LRI and return a candidate solution if these instructions can be sunk, or
594  /// None otherwise.
595  Optional<SinkingInstructionCandidate> analyzeInstructionForSinking(
596      LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
597      ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents);
598
599  /// Create a ModelledPHI for each PHI in BB, adding to PHIs.
600  void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs,
601                          SmallPtrSetImpl<Value *> &PHIContents) {
602    for (PHINode &PN : BB->phis()) {
603      auto MPHI = ModelledPHI(&PN);
604      PHIs.insert(MPHI);
605      for (auto *V : MPHI.getValues())
606        PHIContents.insert(V);
607    }
608  }
609
610  /// The main instruction sinking driver. Set up state and try and sink
611  /// instructions into BBEnd from its predecessors.
612  unsigned sinkBB(BasicBlock *BBEnd);
613
614  /// Perform the actual mechanics of sinking an instruction from Blocks into
615  /// BBEnd, which is their only successor.
616  void sinkLastInstruction(ArrayRef<BasicBlock *> Blocks, BasicBlock *BBEnd);
617
618  /// Remove PHIs that all have the same incoming value.
619  void foldPointlessPHINodes(BasicBlock *BB) {
620    auto I = BB->begin();
621    while (PHINode *PN = dyn_cast<PHINode>(I++)) {
622      if (!llvm::all_of(PN->incoming_values(), [&](const Value *V) {
623            return V == PN->getIncomingValue(0);
624          }))
625        continue;
626      if (PN->getIncomingValue(0) != PN)
627        PN->replaceAllUsesWith(PN->getIncomingValue(0));
628      else
629        PN->replaceAllUsesWith(UndefValue::get(PN->getType()));
630      PN->eraseFromParent();
631    }
632  }
633};
634
635Optional<SinkingInstructionCandidate> GVNSink::analyzeInstructionForSinking(
636  LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
637  ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents) {
638  auto Insts = *LRI;
639  LLVM_DEBUG(dbgs() << " -- Analyzing instruction set: [\n"; for (auto *I
640                                                                  : Insts) {
641    I->dump();
642  } dbgs() << " ]\n";);
643
644  DenseMap<uint32_t, unsigned> VNums;
645  for (auto *I : Insts) {
646    uint32_t N = VN.lookupOrAdd(I);
647    LLVM_DEBUG(dbgs() << " VN=" << Twine::utohexstr(N) << " for" << *I << "\n");
648    if (N == ~0U)
649      return None;
650    VNums[N]++;
651  }
652  unsigned VNumToSink =
653      std::max_element(VNums.begin(), VNums.end(),
654                       [](const std::pair<uint32_t, unsigned> &I,
655                          const std::pair<uint32_t, unsigned> &J) {
656                         return I.second < J.second;
657                       })
658          ->first;
659
660  if (VNums[VNumToSink] == 1)
661    // Can't sink anything!
662    return None;
663
664  // Now restrict the number of incoming blocks down to only those with
665  // VNumToSink.
666  auto &ActivePreds = LRI.getActiveBlocks();
667  unsigned InitialActivePredSize = ActivePreds.size();
668  SmallVector<Instruction *, 4> NewInsts;
669  for (auto *I : Insts) {
670    if (VN.lookup(I) != VNumToSink)
671      ActivePreds.remove(I->getParent());
672    else
673      NewInsts.push_back(I);
674  }
675  for (auto *I : NewInsts)
676    if (shouldAvoidSinkingInstruction(I))
677      return None;
678
679  // If we've restricted the incoming blocks, restrict all needed PHIs also
680  // to that set.
681  bool RecomputePHIContents = false;
682  if (ActivePreds.size() != InitialActivePredSize) {
683    ModelledPHISet NewNeededPHIs;
684    for (auto P : NeededPHIs) {
685      P.restrictToBlocks(ActivePreds);
686      NewNeededPHIs.insert(P);
687    }
688    NeededPHIs = NewNeededPHIs;
689    LRI.restrictToBlocks(ActivePreds);
690    RecomputePHIContents = true;
691  }
692
693  // The sunk instruction's results.
694  ModelledPHI NewPHI(NewInsts, ActivePreds);
695
696  // Does sinking this instruction render previous PHIs redundant?
697  if (NeededPHIs.find(NewPHI) != NeededPHIs.end()) {
698    NeededPHIs.erase(NewPHI);
699    RecomputePHIContents = true;
700  }
701
702  if (RecomputePHIContents) {
703    // The needed PHIs have changed, so recompute the set of all needed
704    // values.
705    PHIContents.clear();
706    for (auto &PHI : NeededPHIs)
707      PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
708  }
709
710  // Is this instruction required by a later PHI that doesn't match this PHI?
711  // if so, we can't sink this instruction.
712  for (auto *V : NewPHI.getValues())
713    if (PHIContents.count(V))
714      // V exists in this PHI, but the whole PHI is different to NewPHI
715      // (else it would have been removed earlier). We cannot continue
716      // because this isn't representable.
717      return None;
718
719  // Which operands need PHIs?
720  // FIXME: If any of these fail, we should partition up the candidates to
721  // try and continue making progress.
722  Instruction *I0 = NewInsts[0];
723
724  // If all instructions that are going to participate don't have the same
725  // number of operands, we can't do any useful PHI analysis for all operands.
726  auto hasDifferentNumOperands = [&I0](Instruction *I) {
727    return I->getNumOperands() != I0->getNumOperands();
728  };
729  if (any_of(NewInsts, hasDifferentNumOperands))
730    return None;
731
732  for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) {
733    ModelledPHI PHI(NewInsts, OpNum, ActivePreds);
734    if (PHI.areAllIncomingValuesSame())
735      continue;
736    if (!canReplaceOperandWithVariable(I0, OpNum))
737      // We can 't create a PHI from this instruction!
738      return None;
739    if (NeededPHIs.count(PHI))
740      continue;
741    if (!PHI.areAllIncomingValuesSameType())
742      return None;
743    // Don't create indirect calls! The called value is the final operand.
744    if ((isa<CallInst>(I0) || isa<InvokeInst>(I0)) && OpNum == E - 1 &&
745        PHI.areAnyIncomingValuesConstant())
746      return None;
747
748    NeededPHIs.reserve(NeededPHIs.size());
749    NeededPHIs.insert(PHI);
750    PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
751  }
752
753  if (isMemoryInst(NewInsts[0]))
754    ++MemoryInstNum;
755
756  SinkingInstructionCandidate Cand;
757  Cand.NumInstructions = ++InstNum;
758  Cand.NumMemoryInsts = MemoryInstNum;
759  Cand.NumBlocks = ActivePreds.size();
760  Cand.NumPHIs = NeededPHIs.size();
761  for (auto *C : ActivePreds)
762    Cand.Blocks.push_back(C);
763
764  return Cand;
765}
766
767unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {
768  LLVM_DEBUG(dbgs() << "GVNSink: running on basic block ";
769             BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
770  SmallVector<BasicBlock *, 4> Preds;
771  for (auto *B : predecessors(BBEnd)) {
772    auto *T = B->getTerminator();
773    if (isa<BranchInst>(T) || isa<SwitchInst>(T))
774      Preds.push_back(B);
775    else
776      return 0;
777  }
778  if (Preds.size() < 2)
779    return 0;
780  llvm::sort(Preds);
781
782  unsigned NumOrigPreds = Preds.size();
783  // We can only sink instructions through unconditional branches.
784  for (auto I = Preds.begin(); I != Preds.end();) {
785    if ((*I)->getTerminator()->getNumSuccessors() != 1)
786      I = Preds.erase(I);
787    else
788      ++I;
789  }
790
791  LockstepReverseIterator LRI(Preds);
792  SmallVector<SinkingInstructionCandidate, 4> Candidates;
793  unsigned InstNum = 0, MemoryInstNum = 0;
794  ModelledPHISet NeededPHIs;
795  SmallPtrSet<Value *, 4> PHIContents;
796  analyzeInitialPHIs(BBEnd, NeededPHIs, PHIContents);
797  unsigned NumOrigPHIs = NeededPHIs.size();
798
799  while (LRI.isValid()) {
800    auto Cand = analyzeInstructionForSinking(LRI, InstNum, MemoryInstNum,
801                                             NeededPHIs, PHIContents);
802    if (!Cand)
803      break;
804    Cand->calculateCost(NumOrigPHIs, Preds.size());
805    Candidates.emplace_back(*Cand);
806    --LRI;
807  }
808
809  llvm::stable_sort(Candidates, std::greater<SinkingInstructionCandidate>());
810  LLVM_DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C
811                                                         : Candidates) dbgs()
812                                                    << "  " << C << "\n";);
813
814  // Pick the top candidate, as long it is positive!
815  if (Candidates.empty() || Candidates.front().Cost <= 0)
816    return 0;
817  auto C = Candidates.front();
818
819  LLVM_DEBUG(dbgs() << " -- Sinking: " << C << "\n");
820  BasicBlock *InsertBB = BBEnd;
821  if (C.Blocks.size() < NumOrigPreds) {
822    LLVM_DEBUG(dbgs() << " -- Splitting edge to ";
823               BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
824    InsertBB = SplitBlockPredecessors(BBEnd, C.Blocks, ".gvnsink.split");
825    if (!InsertBB) {
826      LLVM_DEBUG(dbgs() << " -- FAILED to split edge!\n");
827      // Edge couldn't be split.
828      return 0;
829    }
830  }
831
832  for (unsigned I = 0; I < C.NumInstructions; ++I)
833    sinkLastInstruction(C.Blocks, InsertBB);
834
835  return C.NumInstructions;
836}
837
838void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks,
839                                  BasicBlock *BBEnd) {
840  SmallVector<Instruction *, 4> Insts;
841  for (BasicBlock *BB : Blocks)
842    Insts.push_back(BB->getTerminator()->getPrevNode());
843  Instruction *I0 = Insts.front();
844
845  SmallVector<Value *, 4> NewOperands;
846  for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
847    bool NeedPHI = llvm::any_of(Insts, [&I0, O](const Instruction *I) {
848      return I->getOperand(O) != I0->getOperand(O);
849    });
850    if (!NeedPHI) {
851      NewOperands.push_back(I0->getOperand(O));
852      continue;
853    }
854
855    // Create a new PHI in the successor block and populate it.
856    auto *Op = I0->getOperand(O);
857    assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
858    auto *PN = PHINode::Create(Op->getType(), Insts.size(),
859                               Op->getName() + ".sink", &BBEnd->front());
860    for (auto *I : Insts)
861      PN->addIncoming(I->getOperand(O), I->getParent());
862    NewOperands.push_back(PN);
863  }
864
865  // Arbitrarily use I0 as the new "common" instruction; remap its operands
866  // and move it to the start of the successor block.
867  for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
868    I0->getOperandUse(O).set(NewOperands[O]);
869  I0->moveBefore(&*BBEnd->getFirstInsertionPt());
870
871  // Update metadata and IR flags.
872  for (auto *I : Insts)
873    if (I != I0) {
874      combineMetadataForCSE(I0, I, true);
875      I0->andIRFlags(I);
876    }
877
878  for (auto *I : Insts)
879    if (I != I0)
880      I->replaceAllUsesWith(I0);
881  foldPointlessPHINodes(BBEnd);
882
883  // Finally nuke all instructions apart from the common instruction.
884  for (auto *I : Insts)
885    if (I != I0)
886      I->eraseFromParent();
887
888  NumRemoved += Insts.size() - 1;
889}
890
891////////////////////////////////////////////////////////////////////////////////
892// Pass machinery / boilerplate
893
894class GVNSinkLegacyPass : public FunctionPass {
895public:
896  static char ID;
897
898  GVNSinkLegacyPass() : FunctionPass(ID) {
899    initializeGVNSinkLegacyPassPass(*PassRegistry::getPassRegistry());
900  }
901
902  bool runOnFunction(Function &F) override {
903    if (skipFunction(F))
904      return false;
905    GVNSink G;
906    return G.run(F);
907  }
908
909  void getAnalysisUsage(AnalysisUsage &AU) const override {
910    AU.addPreserved<GlobalsAAWrapperPass>();
911  }
912};
913
914} // end anonymous namespace
915
916PreservedAnalyses GVNSinkPass::run(Function &F, FunctionAnalysisManager &AM) {
917  GVNSink G;
918  if (!G.run(F))
919    return PreservedAnalyses::all();
920
921  PreservedAnalyses PA;
922  PA.preserve<GlobalsAA>();
923  return PA;
924}
925
926char GVNSinkLegacyPass::ID = 0;
927
928INITIALIZE_PASS_BEGIN(GVNSinkLegacyPass, "gvn-sink",
929                      "Early GVN sinking of Expressions", false, false)
930INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
931INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
932INITIALIZE_PASS_END(GVNSinkLegacyPass, "gvn-sink",
933                    "Early GVN sinking of Expressions", false, false)
934
935FunctionPass *llvm::createGVNSinkPass() { return new GVNSinkLegacyPass(); }
936