1//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Peephole optimize the CFG.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/Optional.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/SetOperations.h"
19#include "llvm/ADT/SetVector.h"
20#include "llvm/ADT/SmallPtrSet.h"
21#include "llvm/ADT/SmallVector.h"
22#include "llvm/ADT/Statistic.h"
23#include "llvm/ADT/StringRef.h"
24#include "llvm/Analysis/AssumptionCache.h"
25#include "llvm/Analysis/ConstantFolding.h"
26#include "llvm/Analysis/EHPersonalities.h"
27#include "llvm/Analysis/GuardUtils.h"
28#include "llvm/Analysis/InstructionSimplify.h"
29#include "llvm/Analysis/MemorySSA.h"
30#include "llvm/Analysis/MemorySSAUpdater.h"
31#include "llvm/Analysis/TargetTransformInfo.h"
32#include "llvm/Analysis/ValueTracking.h"
33#include "llvm/IR/Attributes.h"
34#include "llvm/IR/BasicBlock.h"
35#include "llvm/IR/CFG.h"
36#include "llvm/IR/Constant.h"
37#include "llvm/IR/ConstantRange.h"
38#include "llvm/IR/Constants.h"
39#include "llvm/IR/DataLayout.h"
40#include "llvm/IR/DerivedTypes.h"
41#include "llvm/IR/Function.h"
42#include "llvm/IR/GlobalValue.h"
43#include "llvm/IR/GlobalVariable.h"
44#include "llvm/IR/IRBuilder.h"
45#include "llvm/IR/InstrTypes.h"
46#include "llvm/IR/Instruction.h"
47#include "llvm/IR/Instructions.h"
48#include "llvm/IR/IntrinsicInst.h"
49#include "llvm/IR/Intrinsics.h"
50#include "llvm/IR/LLVMContext.h"
51#include "llvm/IR/MDBuilder.h"
52#include "llvm/IR/Metadata.h"
53#include "llvm/IR/Module.h"
54#include "llvm/IR/NoFolder.h"
55#include "llvm/IR/Operator.h"
56#include "llvm/IR/PatternMatch.h"
57#include "llvm/IR/Type.h"
58#include "llvm/IR/Use.h"
59#include "llvm/IR/User.h"
60#include "llvm/IR/Value.h"
61#include "llvm/Support/Casting.h"
62#include "llvm/Support/CommandLine.h"
63#include "llvm/Support/Debug.h"
64#include "llvm/Support/ErrorHandling.h"
65#include "llvm/Support/KnownBits.h"
66#include "llvm/Support/MathExtras.h"
67#include "llvm/Support/raw_ostream.h"
68#include "llvm/Transforms/Utils/BasicBlockUtils.h"
69#include "llvm/Transforms/Utils/Local.h"
70#include "llvm/Transforms/Utils/ValueMapper.h"
71#include <algorithm>
72#include <cassert>
73#include <climits>
74#include <cstddef>
75#include <cstdint>
76#include <iterator>
77#include <map>
78#include <set>
79#include <tuple>
80#include <utility>
81#include <vector>
82
83using namespace llvm;
84using namespace PatternMatch;
85
86#define DEBUG_TYPE "simplifycfg"
87
88// Chosen as 2 so as to be cheap, but still to have enough power to fold
89// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
90// To catch this, we need to fold a compare and a select, hence '2' being the
91// minimum reasonable default.
92static cl::opt<unsigned> PHINodeFoldingThreshold(
93    "phi-node-folding-threshold", cl::Hidden, cl::init(2),
94    cl::desc(
95        "Control the amount of phi node folding to perform (default = 2)"));
96
97static cl::opt<unsigned> TwoEntryPHINodeFoldingThreshold(
98    "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4),
99    cl::desc("Control the maximal total instruction cost that we are willing "
100             "to speculatively execute to fold a 2-entry PHI node into a "
101             "select (default = 4)"));
102
103static cl::opt<bool> DupRet(
104    "simplifycfg-dup-ret", cl::Hidden, cl::init(false),
105    cl::desc("Duplicate return instructions into unconditional branches"));
106
107static cl::opt<bool>
108    SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
109               cl::desc("Sink common instructions down to the end block"));
110
111static cl::opt<bool> HoistCondStores(
112    "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
113    cl::desc("Hoist conditional stores if an unconditional store precedes"));
114
115static cl::opt<bool> MergeCondStores(
116    "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
117    cl::desc("Hoist conditional stores even if an unconditional store does not "
118             "precede - hoist multiple conditional stores into a single "
119             "predicated store"));
120
121static cl::opt<bool> MergeCondStoresAggressively(
122    "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
123    cl::desc("When merging conditional stores, do so even if the resultant "
124             "basic blocks are unlikely to be if-converted as a result"));
125
126static cl::opt<bool> SpeculateOneExpensiveInst(
127    "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
128    cl::desc("Allow exactly one expensive instruction to be speculatively "
129             "executed"));
130
131static cl::opt<unsigned> MaxSpeculationDepth(
132    "max-speculation-depth", cl::Hidden, cl::init(10),
133    cl::desc("Limit maximum recursion depth when calculating costs of "
134             "speculatively executed instructions"));
135
136static cl::opt<int>
137MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden, cl::init(10),
138                  cl::desc("Max size of a block which is still considered "
139                           "small enough to thread through"));
140
141STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
142STATISTIC(NumLinearMaps,
143          "Number of switch instructions turned into linear mapping");
144STATISTIC(NumLookupTables,
145          "Number of switch instructions turned into lookup tables");
146STATISTIC(
147    NumLookupTablesHoles,
148    "Number of switch instructions turned into lookup tables (holes checked)");
149STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
150STATISTIC(NumSinkCommons,
151          "Number of common instructions sunk down to the end block");
152STATISTIC(NumSpeculations, "Number of speculative executed instructions");
153
154namespace {
155
156// The first field contains the value that the switch produces when a certain
157// case group is selected, and the second field is a vector containing the
158// cases composing the case group.
159using SwitchCaseResultVectorTy =
160    SmallVector<std::pair<Constant *, SmallVector<ConstantInt *, 4>>, 2>;
161
162// The first field contains the phi node that generates a result of the switch
163// and the second field contains the value generated for a certain case in the
164// switch for that PHI.
165using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
166
167/// ValueEqualityComparisonCase - Represents a case of a switch.
168struct ValueEqualityComparisonCase {
169  ConstantInt *Value;
170  BasicBlock *Dest;
171
172  ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
173      : Value(Value), Dest(Dest) {}
174
175  bool operator<(ValueEqualityComparisonCase RHS) const {
176    // Comparing pointers is ok as we only rely on the order for uniquing.
177    return Value < RHS.Value;
178  }
179
180  bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
181};
182
183class SimplifyCFGOpt {
184  const TargetTransformInfo &TTI;
185  const DataLayout &DL;
186  SmallPtrSetImpl<BasicBlock *> *LoopHeaders;
187  const SimplifyCFGOptions &Options;
188  bool Resimplify;
189
190  Value *isValueEqualityComparison(Instruction *TI);
191  BasicBlock *GetValueEqualityComparisonCases(
192      Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
193  bool SimplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
194                                                     BasicBlock *Pred,
195                                                     IRBuilder<> &Builder);
196  bool FoldValueComparisonIntoPredecessors(Instruction *TI,
197                                           IRBuilder<> &Builder);
198
199  bool simplifyReturn(ReturnInst *RI, IRBuilder<> &Builder);
200  bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
201  bool simplifySingleResume(ResumeInst *RI);
202  bool simplifyCommonResume(ResumeInst *RI);
203  bool simplifyCleanupReturn(CleanupReturnInst *RI);
204  bool simplifyUnreachable(UnreachableInst *UI);
205  bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
206  bool simplifyIndirectBr(IndirectBrInst *IBI);
207  bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
208  bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
209  bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
210  bool SimplifyCondBranchToTwoReturns(BranchInst *BI, IRBuilder<> &Builder);
211
212  bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
213                                             IRBuilder<> &Builder);
214
215  bool HoistThenElseCodeToIf(BranchInst *BI, const TargetTransformInfo &TTI);
216  bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
217                              const TargetTransformInfo &TTI);
218  bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
219                                  BasicBlock *TrueBB, BasicBlock *FalseBB,
220                                  uint32_t TrueWeight, uint32_t FalseWeight);
221  bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
222                                 const DataLayout &DL);
223  bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
224  bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
225  bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
226
227public:
228  SimplifyCFGOpt(const TargetTransformInfo &TTI, const DataLayout &DL,
229                 SmallPtrSetImpl<BasicBlock *> *LoopHeaders,
230                 const SimplifyCFGOptions &Opts)
231      : TTI(TTI), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {}
232
233  bool run(BasicBlock *BB);
234  bool simplifyOnce(BasicBlock *BB);
235
236  // Helper to set Resimplify and return change indication.
237  bool requestResimplify() {
238    Resimplify = true;
239    return true;
240  }
241};
242
243} // end anonymous namespace
244
245/// Return true if it is safe to merge these two
246/// terminator instructions together.
247static bool
248SafeToMergeTerminators(Instruction *SI1, Instruction *SI2,
249                       SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
250  if (SI1 == SI2)
251    return false; // Can't merge with self!
252
253  // It is not safe to merge these two switch instructions if they have a common
254  // successor, and if that successor has a PHI node, and if *that* PHI node has
255  // conflicting incoming values from the two switch blocks.
256  BasicBlock *SI1BB = SI1->getParent();
257  BasicBlock *SI2BB = SI2->getParent();
258
259  SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
260  bool Fail = false;
261  for (BasicBlock *Succ : successors(SI2BB))
262    if (SI1Succs.count(Succ))
263      for (BasicBlock::iterator BBI = Succ->begin(); isa<PHINode>(BBI); ++BBI) {
264        PHINode *PN = cast<PHINode>(BBI);
265        if (PN->getIncomingValueForBlock(SI1BB) !=
266            PN->getIncomingValueForBlock(SI2BB)) {
267          if (FailBlocks)
268            FailBlocks->insert(Succ);
269          Fail = true;
270        }
271      }
272
273  return !Fail;
274}
275
276/// Return true if it is safe and profitable to merge these two terminator
277/// instructions together, where SI1 is an unconditional branch. PhiNodes will
278/// store all PHI nodes in common successors.
279static bool
280isProfitableToFoldUnconditional(BranchInst *SI1, BranchInst *SI2,
281                                Instruction *Cond,
282                                SmallVectorImpl<PHINode *> &PhiNodes) {
283  if (SI1 == SI2)
284    return false; // Can't merge with self!
285  assert(SI1->isUnconditional() && SI2->isConditional());
286
287  // We fold the unconditional branch if we can easily update all PHI nodes in
288  // common successors:
289  // 1> We have a constant incoming value for the conditional branch;
290  // 2> We have "Cond" as the incoming value for the unconditional branch;
291  // 3> SI2->getCondition() and Cond have same operands.
292  CmpInst *Ci2 = dyn_cast<CmpInst>(SI2->getCondition());
293  if (!Ci2)
294    return false;
295  if (!(Cond->getOperand(0) == Ci2->getOperand(0) &&
296        Cond->getOperand(1) == Ci2->getOperand(1)) &&
297      !(Cond->getOperand(0) == Ci2->getOperand(1) &&
298        Cond->getOperand(1) == Ci2->getOperand(0)))
299    return false;
300
301  BasicBlock *SI1BB = SI1->getParent();
302  BasicBlock *SI2BB = SI2->getParent();
303  SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
304  for (BasicBlock *Succ : successors(SI2BB))
305    if (SI1Succs.count(Succ))
306      for (BasicBlock::iterator BBI = Succ->begin(); isa<PHINode>(BBI); ++BBI) {
307        PHINode *PN = cast<PHINode>(BBI);
308        if (PN->getIncomingValueForBlock(SI1BB) != Cond ||
309            !isa<ConstantInt>(PN->getIncomingValueForBlock(SI2BB)))
310          return false;
311        PhiNodes.push_back(PN);
312      }
313  return true;
314}
315
316/// Update PHI nodes in Succ to indicate that there will now be entries in it
317/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
318/// will be the same as those coming in from ExistPred, an existing predecessor
319/// of Succ.
320static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
321                                  BasicBlock *ExistPred,
322                                  MemorySSAUpdater *MSSAU = nullptr) {
323  for (PHINode &PN : Succ->phis())
324    PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
325  if (MSSAU)
326    if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
327      MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
328}
329
330/// Compute an abstract "cost" of speculating the given instruction,
331/// which is assumed to be safe to speculate. TCC_Free means cheap,
332/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
333/// expensive.
334static unsigned ComputeSpeculationCost(const User *I,
335                                       const TargetTransformInfo &TTI) {
336  assert(isSafeToSpeculativelyExecute(I) &&
337         "Instruction is not safe to speculatively execute!");
338  return TTI.getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency);
339}
340
341/// If we have a merge point of an "if condition" as accepted above,
342/// return true if the specified value dominates the block.  We
343/// don't handle the true generality of domination here, just a special case
344/// which works well enough for us.
345///
346/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
347/// see if V (which must be an instruction) and its recursive operands
348/// that do not dominate BB have a combined cost lower than CostRemaining and
349/// are non-trapping.  If both are true, the instruction is inserted into the
350/// set and true is returned.
351///
352/// The cost for most non-trapping instructions is defined as 1 except for
353/// Select whose cost is 2.
354///
355/// After this function returns, CostRemaining is decreased by the cost of
356/// V plus its non-dominating operands.  If that cost is greater than
357/// CostRemaining, false is returned and CostRemaining is undefined.
358static bool DominatesMergePoint(Value *V, BasicBlock *BB,
359                                SmallPtrSetImpl<Instruction *> &AggressiveInsts,
360                                int &BudgetRemaining,
361                                const TargetTransformInfo &TTI,
362                                unsigned Depth = 0) {
363  // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
364  // so limit the recursion depth.
365  // TODO: While this recursion limit does prevent pathological behavior, it
366  // would be better to track visited instructions to avoid cycles.
367  if (Depth == MaxSpeculationDepth)
368    return false;
369
370  Instruction *I = dyn_cast<Instruction>(V);
371  if (!I) {
372    // Non-instructions all dominate instructions, but not all constantexprs
373    // can be executed unconditionally.
374    if (ConstantExpr *C = dyn_cast<ConstantExpr>(V))
375      if (C->canTrap())
376        return false;
377    return true;
378  }
379  BasicBlock *PBB = I->getParent();
380
381  // We don't want to allow weird loops that might have the "if condition" in
382  // the bottom of this block.
383  if (PBB == BB)
384    return false;
385
386  // If this instruction is defined in a block that contains an unconditional
387  // branch to BB, then it must be in the 'conditional' part of the "if
388  // statement".  If not, it definitely dominates the region.
389  BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator());
390  if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB)
391    return true;
392
393  // If we have seen this instruction before, don't count it again.
394  if (AggressiveInsts.count(I))
395    return true;
396
397  // Okay, it looks like the instruction IS in the "condition".  Check to
398  // see if it's a cheap instruction to unconditionally compute, and if it
399  // only uses stuff defined outside of the condition.  If so, hoist it out.
400  if (!isSafeToSpeculativelyExecute(I))
401    return false;
402
403  BudgetRemaining -= ComputeSpeculationCost(I, TTI);
404
405  // Allow exactly one instruction to be speculated regardless of its cost
406  // (as long as it is safe to do so).
407  // This is intended to flatten the CFG even if the instruction is a division
408  // or other expensive operation. The speculation of an expensive instruction
409  // is expected to be undone in CodeGenPrepare if the speculation has not
410  // enabled further IR optimizations.
411  if (BudgetRemaining < 0 &&
412      (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0))
413    return false;
414
415  // Okay, we can only really hoist these out if their operands do
416  // not take us over the cost threshold.
417  for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i)
418    if (!DominatesMergePoint(*i, BB, AggressiveInsts, BudgetRemaining, TTI,
419                             Depth + 1))
420      return false;
421  // Okay, it's safe to do this!  Remember this instruction.
422  AggressiveInsts.insert(I);
423  return true;
424}
425
426/// Extract ConstantInt from value, looking through IntToPtr
427/// and PointerNullValue. Return NULL if value is not a constant int.
428static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) {
429  // Normal constant int.
430  ConstantInt *CI = dyn_cast<ConstantInt>(V);
431  if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy())
432    return CI;
433
434  // This is some kind of pointer constant. Turn it into a pointer-sized
435  // ConstantInt if possible.
436  IntegerType *PtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
437
438  // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
439  if (isa<ConstantPointerNull>(V))
440    return ConstantInt::get(PtrTy, 0);
441
442  // IntToPtr const int.
443  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
444    if (CE->getOpcode() == Instruction::IntToPtr)
445      if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
446        // The constant is very likely to have the right type already.
447        if (CI->getType() == PtrTy)
448          return CI;
449        else
450          return cast<ConstantInt>(
451              ConstantExpr::getIntegerCast(CI, PtrTy, /*isSigned=*/false));
452      }
453  return nullptr;
454}
455
456namespace {
457
458/// Given a chain of or (||) or and (&&) comparison of a value against a
459/// constant, this will try to recover the information required for a switch
460/// structure.
461/// It will depth-first traverse the chain of comparison, seeking for patterns
462/// like %a == 12 or %a < 4 and combine them to produce a set of integer
463/// representing the different cases for the switch.
464/// Note that if the chain is composed of '||' it will build the set of elements
465/// that matches the comparisons (i.e. any of this value validate the chain)
466/// while for a chain of '&&' it will build the set elements that make the test
467/// fail.
468struct ConstantComparesGatherer {
469  const DataLayout &DL;
470
471  /// Value found for the switch comparison
472  Value *CompValue = nullptr;
473
474  /// Extra clause to be checked before the switch
475  Value *Extra = nullptr;
476
477  /// Set of integers to match in switch
478  SmallVector<ConstantInt *, 8> Vals;
479
480  /// Number of comparisons matched in the and/or chain
481  unsigned UsedICmps = 0;
482
483  /// Construct and compute the result for the comparison instruction Cond
484  ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
485    gather(Cond);
486  }
487
488  ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
489  ConstantComparesGatherer &
490  operator=(const ConstantComparesGatherer &) = delete;
491
492private:
493  /// Try to set the current value used for the comparison, it succeeds only if
494  /// it wasn't set before or if the new value is the same as the old one
495  bool setValueOnce(Value *NewVal) {
496    if (CompValue && CompValue != NewVal)
497      return false;
498    CompValue = NewVal;
499    return (CompValue != nullptr);
500  }
501
502  /// Try to match Instruction "I" as a comparison against a constant and
503  /// populates the array Vals with the set of values that match (or do not
504  /// match depending on isEQ).
505  /// Return false on failure. On success, the Value the comparison matched
506  /// against is placed in CompValue.
507  /// If CompValue is already set, the function is expected to fail if a match
508  /// is found but the value compared to is different.
509  bool matchInstruction(Instruction *I, bool isEQ) {
510    // If this is an icmp against a constant, handle this as one of the cases.
511    ICmpInst *ICI;
512    ConstantInt *C;
513    if (!((ICI = dyn_cast<ICmpInst>(I)) &&
514          (C = GetConstantInt(I->getOperand(1), DL)))) {
515      return false;
516    }
517
518    Value *RHSVal;
519    const APInt *RHSC;
520
521    // Pattern match a special case
522    // (x & ~2^z) == y --> x == y || x == y|2^z
523    // This undoes a transformation done by instcombine to fuse 2 compares.
524    if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
525      // It's a little bit hard to see why the following transformations are
526      // correct. Here is a CVC3 program to verify them for 64-bit values:
527
528      /*
529         ONE  : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
530         x    : BITVECTOR(64);
531         y    : BITVECTOR(64);
532         z    : BITVECTOR(64);
533         mask : BITVECTOR(64) = BVSHL(ONE, z);
534         QUERY( (y & ~mask = y) =>
535                ((x & ~mask = y) <=> (x = y OR x = (y |  mask)))
536         );
537         QUERY( (y |  mask = y) =>
538                ((x |  mask = y) <=> (x = y OR x = (y & ~mask)))
539         );
540      */
541
542      // Please note that each pattern must be a dual implication (<--> or
543      // iff). One directional implication can create spurious matches. If the
544      // implication is only one-way, an unsatisfiable condition on the left
545      // side can imply a satisfiable condition on the right side. Dual
546      // implication ensures that satisfiable conditions are transformed to
547      // other satisfiable conditions and unsatisfiable conditions are
548      // transformed to other unsatisfiable conditions.
549
550      // Here is a concrete example of a unsatisfiable condition on the left
551      // implying a satisfiable condition on the right:
552      //
553      // mask = (1 << z)
554      // (x & ~mask) == y  --> (x == y || x == (y | mask))
555      //
556      // Substituting y = 3, z = 0 yields:
557      // (x & -2) == 3 --> (x == 3 || x == 2)
558
559      // Pattern match a special case:
560      /*
561        QUERY( (y & ~mask = y) =>
562               ((x & ~mask = y) <=> (x = y OR x = (y |  mask)))
563        );
564      */
565      if (match(ICI->getOperand(0),
566                m_And(m_Value(RHSVal), m_APInt(RHSC)))) {
567        APInt Mask = ~*RHSC;
568        if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
569          // If we already have a value for the switch, it has to match!
570          if (!setValueOnce(RHSVal))
571            return false;
572
573          Vals.push_back(C);
574          Vals.push_back(
575              ConstantInt::get(C->getContext(),
576                               C->getValue() | Mask));
577          UsedICmps++;
578          return true;
579        }
580      }
581
582      // Pattern match a special case:
583      /*
584        QUERY( (y |  mask = y) =>
585               ((x |  mask = y) <=> (x = y OR x = (y & ~mask)))
586        );
587      */
588      if (match(ICI->getOperand(0),
589                m_Or(m_Value(RHSVal), m_APInt(RHSC)))) {
590        APInt Mask = *RHSC;
591        if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
592          // If we already have a value for the switch, it has to match!
593          if (!setValueOnce(RHSVal))
594            return false;
595
596          Vals.push_back(C);
597          Vals.push_back(ConstantInt::get(C->getContext(),
598                                          C->getValue() & ~Mask));
599          UsedICmps++;
600          return true;
601        }
602      }
603
604      // If we already have a value for the switch, it has to match!
605      if (!setValueOnce(ICI->getOperand(0)))
606        return false;
607
608      UsedICmps++;
609      Vals.push_back(C);
610      return ICI->getOperand(0);
611    }
612
613    // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
614    ConstantRange Span = ConstantRange::makeAllowedICmpRegion(
615        ICI->getPredicate(), C->getValue());
616
617    // Shift the range if the compare is fed by an add. This is the range
618    // compare idiom as emitted by instcombine.
619    Value *CandidateVal = I->getOperand(0);
620    if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) {
621      Span = Span.subtract(*RHSC);
622      CandidateVal = RHSVal;
623    }
624
625    // If this is an and/!= check, then we are looking to build the set of
626    // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
627    // x != 0 && x != 1.
628    if (!isEQ)
629      Span = Span.inverse();
630
631    // If there are a ton of values, we don't want to make a ginormous switch.
632    if (Span.isSizeLargerThan(8) || Span.isEmptySet()) {
633      return false;
634    }
635
636    // If we already have a value for the switch, it has to match!
637    if (!setValueOnce(CandidateVal))
638      return false;
639
640    // Add all values from the range to the set
641    for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
642      Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
643
644    UsedICmps++;
645    return true;
646  }
647
648  /// Given a potentially 'or'd or 'and'd together collection of icmp
649  /// eq/ne/lt/gt instructions that compare a value against a constant, extract
650  /// the value being compared, and stick the list constants into the Vals
651  /// vector.
652  /// One "Extra" case is allowed to differ from the other.
653  void gather(Value *V) {
654    bool isEQ = (cast<Instruction>(V)->getOpcode() == Instruction::Or);
655
656    // Keep a stack (SmallVector for efficiency) for depth-first traversal
657    SmallVector<Value *, 8> DFT;
658    SmallPtrSet<Value *, 8> Visited;
659
660    // Initialize
661    Visited.insert(V);
662    DFT.push_back(V);
663
664    while (!DFT.empty()) {
665      V = DFT.pop_back_val();
666
667      if (Instruction *I = dyn_cast<Instruction>(V)) {
668        // If it is a || (or && depending on isEQ), process the operands.
669        if (I->getOpcode() == (isEQ ? Instruction::Or : Instruction::And)) {
670          if (Visited.insert(I->getOperand(1)).second)
671            DFT.push_back(I->getOperand(1));
672          if (Visited.insert(I->getOperand(0)).second)
673            DFT.push_back(I->getOperand(0));
674          continue;
675        }
676
677        // Try to match the current instruction
678        if (matchInstruction(I, isEQ))
679          // Match succeed, continue the loop
680          continue;
681      }
682
683      // One element of the sequence of || (or &&) could not be match as a
684      // comparison against the same value as the others.
685      // We allow only one "Extra" case to be checked before the switch
686      if (!Extra) {
687        Extra = V;
688        continue;
689      }
690      // Failed to parse a proper sequence, abort now
691      CompValue = nullptr;
692      break;
693    }
694  }
695};
696
697} // end anonymous namespace
698
699static void EraseTerminatorAndDCECond(Instruction *TI,
700                                      MemorySSAUpdater *MSSAU = nullptr) {
701  Instruction *Cond = nullptr;
702  if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
703    Cond = dyn_cast<Instruction>(SI->getCondition());
704  } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
705    if (BI->isConditional())
706      Cond = dyn_cast<Instruction>(BI->getCondition());
707  } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
708    Cond = dyn_cast<Instruction>(IBI->getAddress());
709  }
710
711  TI->eraseFromParent();
712  if (Cond)
713    RecursivelyDeleteTriviallyDeadInstructions(Cond, nullptr, MSSAU);
714}
715
716/// Return true if the specified terminator checks
717/// to see if a value is equal to constant integer value.
718Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
719  Value *CV = nullptr;
720  if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
721    // Do not permit merging of large switch instructions into their
722    // predecessors unless there is only one predecessor.
723    if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
724      CV = SI->getCondition();
725  } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
726    if (BI->isConditional() && BI->getCondition()->hasOneUse())
727      if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
728        if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), DL))
729          CV = ICI->getOperand(0);
730      }
731
732  // Unwrap any lossless ptrtoint cast.
733  if (CV) {
734    if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
735      Value *Ptr = PTII->getPointerOperand();
736      if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
737        CV = Ptr;
738    }
739  }
740  return CV;
741}
742
743/// Given a value comparison instruction,
744/// decode all of the 'cases' that it represents and return the 'default' block.
745BasicBlock *SimplifyCFGOpt::GetValueEqualityComparisonCases(
746    Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
747  if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
748    Cases.reserve(SI->getNumCases());
749    for (auto Case : SI->cases())
750      Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
751                                                  Case.getCaseSuccessor()));
752    return SI->getDefaultDest();
753  }
754
755  BranchInst *BI = cast<BranchInst>(TI);
756  ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
757  BasicBlock *Succ = BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_NE);
758  Cases.push_back(ValueEqualityComparisonCase(
759      GetConstantInt(ICI->getOperand(1), DL), Succ));
760  return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ);
761}
762
763/// Given a vector of bb/value pairs, remove any entries
764/// in the list that match the specified block.
765static void
766EliminateBlockCases(BasicBlock *BB,
767                    std::vector<ValueEqualityComparisonCase> &Cases) {
768  Cases.erase(std::remove(Cases.begin(), Cases.end(), BB), Cases.end());
769}
770
771/// Return true if there are any keys in C1 that exist in C2 as well.
772static bool ValuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
773                          std::vector<ValueEqualityComparisonCase> &C2) {
774  std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
775
776  // Make V1 be smaller than V2.
777  if (V1->size() > V2->size())
778    std::swap(V1, V2);
779
780  if (V1->empty())
781    return false;
782  if (V1->size() == 1) {
783    // Just scan V2.
784    ConstantInt *TheVal = (*V1)[0].Value;
785    for (unsigned i = 0, e = V2->size(); i != e; ++i)
786      if (TheVal == (*V2)[i].Value)
787        return true;
788  }
789
790  // Otherwise, just sort both lists and compare element by element.
791  array_pod_sort(V1->begin(), V1->end());
792  array_pod_sort(V2->begin(), V2->end());
793  unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
794  while (i1 != e1 && i2 != e2) {
795    if ((*V1)[i1].Value == (*V2)[i2].Value)
796      return true;
797    if ((*V1)[i1].Value < (*V2)[i2].Value)
798      ++i1;
799    else
800      ++i2;
801  }
802  return false;
803}
804
805// Set branch weights on SwitchInst. This sets the metadata if there is at
806// least one non-zero weight.
807static void setBranchWeights(SwitchInst *SI, ArrayRef<uint32_t> Weights) {
808  // Check that there is at least one non-zero weight. Otherwise, pass
809  // nullptr to setMetadata which will erase the existing metadata.
810  MDNode *N = nullptr;
811  if (llvm::any_of(Weights, [](uint32_t W) { return W != 0; }))
812    N = MDBuilder(SI->getParent()->getContext()).createBranchWeights(Weights);
813  SI->setMetadata(LLVMContext::MD_prof, N);
814}
815
816// Similar to the above, but for branch and select instructions that take
817// exactly 2 weights.
818static void setBranchWeights(Instruction *I, uint32_t TrueWeight,
819                             uint32_t FalseWeight) {
820  assert(isa<BranchInst>(I) || isa<SelectInst>(I));
821  // Check that there is at least one non-zero weight. Otherwise, pass
822  // nullptr to setMetadata which will erase the existing metadata.
823  MDNode *N = nullptr;
824  if (TrueWeight || FalseWeight)
825    N = MDBuilder(I->getParent()->getContext())
826            .createBranchWeights(TrueWeight, FalseWeight);
827  I->setMetadata(LLVMContext::MD_prof, N);
828}
829
830/// If TI is known to be a terminator instruction and its block is known to
831/// only have a single predecessor block, check to see if that predecessor is
832/// also a value comparison with the same value, and if that comparison
833/// determines the outcome of this comparison. If so, simplify TI. This does a
834/// very limited form of jump threading.
835bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
836    Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
837  Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
838  if (!PredVal)
839    return false; // Not a value comparison in predecessor.
840
841  Value *ThisVal = isValueEqualityComparison(TI);
842  assert(ThisVal && "This isn't a value comparison!!");
843  if (ThisVal != PredVal)
844    return false; // Different predicates.
845
846  // TODO: Preserve branch weight metadata, similarly to how
847  // FoldValueComparisonIntoPredecessors preserves it.
848
849  // Find out information about when control will move from Pred to TI's block.
850  std::vector<ValueEqualityComparisonCase> PredCases;
851  BasicBlock *PredDef =
852      GetValueEqualityComparisonCases(Pred->getTerminator(), PredCases);
853  EliminateBlockCases(PredDef, PredCases); // Remove default from cases.
854
855  // Find information about how control leaves this block.
856  std::vector<ValueEqualityComparisonCase> ThisCases;
857  BasicBlock *ThisDef = GetValueEqualityComparisonCases(TI, ThisCases);
858  EliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
859
860  // If TI's block is the default block from Pred's comparison, potentially
861  // simplify TI based on this knowledge.
862  if (PredDef == TI->getParent()) {
863    // If we are here, we know that the value is none of those cases listed in
864    // PredCases.  If there are any cases in ThisCases that are in PredCases, we
865    // can simplify TI.
866    if (!ValuesOverlap(PredCases, ThisCases))
867      return false;
868
869    if (isa<BranchInst>(TI)) {
870      // Okay, one of the successors of this condbr is dead.  Convert it to a
871      // uncond br.
872      assert(ThisCases.size() == 1 && "Branch can only have one case!");
873      // Insert the new branch.
874      Instruction *NI = Builder.CreateBr(ThisDef);
875      (void)NI;
876
877      // Remove PHI node entries for the dead edge.
878      ThisCases[0].Dest->removePredecessor(TI->getParent());
879
880      LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
881                        << "Through successor TI: " << *TI << "Leaving: " << *NI
882                        << "\n");
883
884      EraseTerminatorAndDCECond(TI);
885      return true;
886    }
887
888    SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
889    // Okay, TI has cases that are statically dead, prune them away.
890    SmallPtrSet<Constant *, 16> DeadCases;
891    for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
892      DeadCases.insert(PredCases[i].Value);
893
894    LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
895                      << "Through successor TI: " << *TI);
896
897    for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
898      --i;
899      if (DeadCases.count(i->getCaseValue())) {
900        i->getCaseSuccessor()->removePredecessor(TI->getParent());
901        SI.removeCase(i);
902      }
903    }
904    LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
905    return true;
906  }
907
908  // Otherwise, TI's block must correspond to some matched value.  Find out
909  // which value (or set of values) this is.
910  ConstantInt *TIV = nullptr;
911  BasicBlock *TIBB = TI->getParent();
912  for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
913    if (PredCases[i].Dest == TIBB) {
914      if (TIV)
915        return false; // Cannot handle multiple values coming to this block.
916      TIV = PredCases[i].Value;
917    }
918  assert(TIV && "No edge from pred to succ?");
919
920  // Okay, we found the one constant that our value can be if we get into TI's
921  // BB.  Find out which successor will unconditionally be branched to.
922  BasicBlock *TheRealDest = nullptr;
923  for (unsigned i = 0, e = ThisCases.size(); i != e; ++i)
924    if (ThisCases[i].Value == TIV) {
925      TheRealDest = ThisCases[i].Dest;
926      break;
927    }
928
929  // If not handled by any explicit cases, it is handled by the default case.
930  if (!TheRealDest)
931    TheRealDest = ThisDef;
932
933  // Remove PHI node entries for dead edges.
934  BasicBlock *CheckEdge = TheRealDest;
935  for (BasicBlock *Succ : successors(TIBB))
936    if (Succ != CheckEdge)
937      Succ->removePredecessor(TIBB);
938    else
939      CheckEdge = nullptr;
940
941  // Insert the new branch.
942  Instruction *NI = Builder.CreateBr(TheRealDest);
943  (void)NI;
944
945  LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
946                    << "Through successor TI: " << *TI << "Leaving: " << *NI
947                    << "\n");
948
949  EraseTerminatorAndDCECond(TI);
950  return true;
951}
952
953namespace {
954
955/// This class implements a stable ordering of constant
956/// integers that does not depend on their address.  This is important for
957/// applications that sort ConstantInt's to ensure uniqueness.
958struct ConstantIntOrdering {
959  bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
960    return LHS->getValue().ult(RHS->getValue());
961  }
962};
963
964} // end anonymous namespace
965
966static int ConstantIntSortPredicate(ConstantInt *const *P1,
967                                    ConstantInt *const *P2) {
968  const ConstantInt *LHS = *P1;
969  const ConstantInt *RHS = *P2;
970  if (LHS == RHS)
971    return 0;
972  return LHS->getValue().ult(RHS->getValue()) ? 1 : -1;
973}
974
975static inline bool HasBranchWeights(const Instruction *I) {
976  MDNode *ProfMD = I->getMetadata(LLVMContext::MD_prof);
977  if (ProfMD && ProfMD->getOperand(0))
978    if (MDString *MDS = dyn_cast<MDString>(ProfMD->getOperand(0)))
979      return MDS->getString().equals("branch_weights");
980
981  return false;
982}
983
984/// Get Weights of a given terminator, the default weight is at the front
985/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
986/// metadata.
987static void GetBranchWeights(Instruction *TI,
988                             SmallVectorImpl<uint64_t> &Weights) {
989  MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
990  assert(MD);
991  for (unsigned i = 1, e = MD->getNumOperands(); i < e; ++i) {
992    ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(i));
993    Weights.push_back(CI->getValue().getZExtValue());
994  }
995
996  // If TI is a conditional eq, the default case is the false case,
997  // and the corresponding branch-weight data is at index 2. We swap the
998  // default weight to be the first entry.
999  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1000    assert(Weights.size() == 2);
1001    ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
1002    if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1003      std::swap(Weights.front(), Weights.back());
1004  }
1005}
1006
1007/// Keep halving the weights until all can fit in uint32_t.
1008static void FitWeights(MutableArrayRef<uint64_t> Weights) {
1009  uint64_t Max = *std::max_element(Weights.begin(), Weights.end());
1010  if (Max > UINT_MAX) {
1011    unsigned Offset = 32 - countLeadingZeros(Max);
1012    for (uint64_t &I : Weights)
1013      I >>= Offset;
1014  }
1015}
1016
1017/// The specified terminator is a value equality comparison instruction
1018/// (either a switch or a branch on "X == c").
1019/// See if any of the predecessors of the terminator block are value comparisons
1020/// on the same value.  If so, and if safe to do so, fold them together.
1021bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(Instruction *TI,
1022                                                         IRBuilder<> &Builder) {
1023  BasicBlock *BB = TI->getParent();
1024  Value *CV = isValueEqualityComparison(TI); // CondVal
1025  assert(CV && "Not a comparison?");
1026  bool Changed = false;
1027
1028  SmallVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
1029  while (!Preds.empty()) {
1030    BasicBlock *Pred = Preds.pop_back_val();
1031
1032    // See if the predecessor is a comparison with the same value.
1033    Instruction *PTI = Pred->getTerminator();
1034    Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
1035
1036    if (PCV == CV && TI != PTI) {
1037      SmallSetVector<BasicBlock*, 4> FailBlocks;
1038      if (!SafeToMergeTerminators(TI, PTI, &FailBlocks)) {
1039        for (auto *Succ : FailBlocks) {
1040          if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split"))
1041            return false;
1042        }
1043      }
1044
1045      // Figure out which 'cases' to copy from SI to PSI.
1046      std::vector<ValueEqualityComparisonCase> BBCases;
1047      BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases);
1048
1049      std::vector<ValueEqualityComparisonCase> PredCases;
1050      BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases);
1051
1052      // Based on whether the default edge from PTI goes to BB or not, fill in
1053      // PredCases and PredDefault with the new switch cases we would like to
1054      // build.
1055      SmallVector<BasicBlock *, 8> NewSuccessors;
1056
1057      // Update the branch weight metadata along the way
1058      SmallVector<uint64_t, 8> Weights;
1059      bool PredHasWeights = HasBranchWeights(PTI);
1060      bool SuccHasWeights = HasBranchWeights(TI);
1061
1062      if (PredHasWeights) {
1063        GetBranchWeights(PTI, Weights);
1064        // branch-weight metadata is inconsistent here.
1065        if (Weights.size() != 1 + PredCases.size())
1066          PredHasWeights = SuccHasWeights = false;
1067      } else if (SuccHasWeights)
1068        // If there are no predecessor weights but there are successor weights,
1069        // populate Weights with 1, which will later be scaled to the sum of
1070        // successor's weights
1071        Weights.assign(1 + PredCases.size(), 1);
1072
1073      SmallVector<uint64_t, 8> SuccWeights;
1074      if (SuccHasWeights) {
1075        GetBranchWeights(TI, SuccWeights);
1076        // branch-weight metadata is inconsistent here.
1077        if (SuccWeights.size() != 1 + BBCases.size())
1078          PredHasWeights = SuccHasWeights = false;
1079      } else if (PredHasWeights)
1080        SuccWeights.assign(1 + BBCases.size(), 1);
1081
1082      if (PredDefault == BB) {
1083        // If this is the default destination from PTI, only the edges in TI
1084        // that don't occur in PTI, or that branch to BB will be activated.
1085        std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1086        for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1087          if (PredCases[i].Dest != BB)
1088            PTIHandled.insert(PredCases[i].Value);
1089          else {
1090            // The default destination is BB, we don't need explicit targets.
1091            std::swap(PredCases[i], PredCases.back());
1092
1093            if (PredHasWeights || SuccHasWeights) {
1094              // Increase weight for the default case.
1095              Weights[0] += Weights[i + 1];
1096              std::swap(Weights[i + 1], Weights.back());
1097              Weights.pop_back();
1098            }
1099
1100            PredCases.pop_back();
1101            --i;
1102            --e;
1103          }
1104
1105        // Reconstruct the new switch statement we will be building.
1106        if (PredDefault != BBDefault) {
1107          PredDefault->removePredecessor(Pred);
1108          PredDefault = BBDefault;
1109          NewSuccessors.push_back(BBDefault);
1110        }
1111
1112        unsigned CasesFromPred = Weights.size();
1113        uint64_t ValidTotalSuccWeight = 0;
1114        for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1115          if (!PTIHandled.count(BBCases[i].Value) &&
1116              BBCases[i].Dest != BBDefault) {
1117            PredCases.push_back(BBCases[i]);
1118            NewSuccessors.push_back(BBCases[i].Dest);
1119            if (SuccHasWeights || PredHasWeights) {
1120              // The default weight is at index 0, so weight for the ith case
1121              // should be at index i+1. Scale the cases from successor by
1122              // PredDefaultWeight (Weights[0]).
1123              Weights.push_back(Weights[0] * SuccWeights[i + 1]);
1124              ValidTotalSuccWeight += SuccWeights[i + 1];
1125            }
1126          }
1127
1128        if (SuccHasWeights || PredHasWeights) {
1129          ValidTotalSuccWeight += SuccWeights[0];
1130          // Scale the cases from predecessor by ValidTotalSuccWeight.
1131          for (unsigned i = 1; i < CasesFromPred; ++i)
1132            Weights[i] *= ValidTotalSuccWeight;
1133          // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1134          Weights[0] *= SuccWeights[0];
1135        }
1136      } else {
1137        // If this is not the default destination from PSI, only the edges
1138        // in SI that occur in PSI with a destination of BB will be
1139        // activated.
1140        std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1141        std::map<ConstantInt *, uint64_t> WeightsForHandled;
1142        for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1143          if (PredCases[i].Dest == BB) {
1144            PTIHandled.insert(PredCases[i].Value);
1145
1146            if (PredHasWeights || SuccHasWeights) {
1147              WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1148              std::swap(Weights[i + 1], Weights.back());
1149              Weights.pop_back();
1150            }
1151
1152            std::swap(PredCases[i], PredCases.back());
1153            PredCases.pop_back();
1154            --i;
1155            --e;
1156          }
1157
1158        // Okay, now we know which constants were sent to BB from the
1159        // predecessor.  Figure out where they will all go now.
1160        for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1161          if (PTIHandled.count(BBCases[i].Value)) {
1162            // If this is one we are capable of getting...
1163            if (PredHasWeights || SuccHasWeights)
1164              Weights.push_back(WeightsForHandled[BBCases[i].Value]);
1165            PredCases.push_back(BBCases[i]);
1166            NewSuccessors.push_back(BBCases[i].Dest);
1167            PTIHandled.erase(
1168                BBCases[i].Value); // This constant is taken care of
1169          }
1170
1171        // If there are any constants vectored to BB that TI doesn't handle,
1172        // they must go to the default destination of TI.
1173        for (ConstantInt *I : PTIHandled) {
1174          if (PredHasWeights || SuccHasWeights)
1175            Weights.push_back(WeightsForHandled[I]);
1176          PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
1177          NewSuccessors.push_back(BBDefault);
1178        }
1179      }
1180
1181      // Okay, at this point, we know which new successor Pred will get.  Make
1182      // sure we update the number of entries in the PHI nodes for these
1183      // successors.
1184      for (BasicBlock *NewSuccessor : NewSuccessors)
1185        AddPredecessorToBlock(NewSuccessor, Pred, BB);
1186
1187      Builder.SetInsertPoint(PTI);
1188      // Convert pointer to int before we switch.
1189      if (CV->getType()->isPointerTy()) {
1190        CV = Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()),
1191                                    "magicptr");
1192      }
1193
1194      // Now that the successors are updated, create the new Switch instruction.
1195      SwitchInst *NewSI =
1196          Builder.CreateSwitch(CV, PredDefault, PredCases.size());
1197      NewSI->setDebugLoc(PTI->getDebugLoc());
1198      for (ValueEqualityComparisonCase &V : PredCases)
1199        NewSI->addCase(V.Value, V.Dest);
1200
1201      if (PredHasWeights || SuccHasWeights) {
1202        // Halve the weights if any of them cannot fit in an uint32_t
1203        FitWeights(Weights);
1204
1205        SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
1206
1207        setBranchWeights(NewSI, MDWeights);
1208      }
1209
1210      EraseTerminatorAndDCECond(PTI);
1211
1212      // Okay, last check.  If BB is still a successor of PSI, then we must
1213      // have an infinite loop case.  If so, add an infinitely looping block
1214      // to handle the case to preserve the behavior of the code.
1215      BasicBlock *InfLoopBlock = nullptr;
1216      for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
1217        if (NewSI->getSuccessor(i) == BB) {
1218          if (!InfLoopBlock) {
1219            // Insert it at the end of the function, because it's either code,
1220            // or it won't matter if it's hot. :)
1221            InfLoopBlock = BasicBlock::Create(BB->getContext(), "infloop",
1222                                              BB->getParent());
1223            BranchInst::Create(InfLoopBlock, InfLoopBlock);
1224          }
1225          NewSI->setSuccessor(i, InfLoopBlock);
1226        }
1227
1228      Changed = true;
1229    }
1230  }
1231  return Changed;
1232}
1233
1234// If we would need to insert a select that uses the value of this invoke
1235// (comments in HoistThenElseCodeToIf explain why we would need to do this), we
1236// can't hoist the invoke, as there is nowhere to put the select in this case.
1237static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,
1238                                Instruction *I1, Instruction *I2) {
1239  for (BasicBlock *Succ : successors(BB1)) {
1240    for (const PHINode &PN : Succ->phis()) {
1241      Value *BB1V = PN.getIncomingValueForBlock(BB1);
1242      Value *BB2V = PN.getIncomingValueForBlock(BB2);
1243      if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
1244        return false;
1245      }
1246    }
1247  }
1248  return true;
1249}
1250
1251static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I);
1252
1253/// Given a conditional branch that goes to BB1 and BB2, hoist any common code
1254/// in the two blocks up into the branch block. The caller of this function
1255/// guarantees that BI's block dominates BB1 and BB2.
1256bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI,
1257                                           const TargetTransformInfo &TTI) {
1258  // This does very trivial matching, with limited scanning, to find identical
1259  // instructions in the two blocks.  In particular, we don't want to get into
1260  // O(M*N) situations here where M and N are the sizes of BB1 and BB2.  As
1261  // such, we currently just scan for obviously identical instructions in an
1262  // identical order.
1263  BasicBlock *BB1 = BI->getSuccessor(0); // The true destination.
1264  BasicBlock *BB2 = BI->getSuccessor(1); // The false destination
1265
1266  BasicBlock::iterator BB1_Itr = BB1->begin();
1267  BasicBlock::iterator BB2_Itr = BB2->begin();
1268
1269  Instruction *I1 = &*BB1_Itr++, *I2 = &*BB2_Itr++;
1270  // Skip debug info if it is not identical.
1271  DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
1272  DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
1273  if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) {
1274    while (isa<DbgInfoIntrinsic>(I1))
1275      I1 = &*BB1_Itr++;
1276    while (isa<DbgInfoIntrinsic>(I2))
1277      I2 = &*BB2_Itr++;
1278  }
1279  // FIXME: Can we define a safety predicate for CallBr?
1280  if (isa<PHINode>(I1) || !I1->isIdenticalToWhenDefined(I2) ||
1281      (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)) ||
1282      isa<CallBrInst>(I1))
1283    return false;
1284
1285  BasicBlock *BIParent = BI->getParent();
1286
1287  bool Changed = false;
1288  do {
1289    // If we are hoisting the terminator instruction, don't move one (making a
1290    // broken BB), instead clone it, and remove BI.
1291    if (I1->isTerminator())
1292      goto HoistTerminator;
1293
1294    // If we're going to hoist a call, make sure that the two instructions we're
1295    // commoning/hoisting are both marked with musttail, or neither of them is
1296    // marked as such. Otherwise, we might end up in a situation where we hoist
1297    // from a block where the terminator is a `ret` to a block where the terminator
1298    // is a `br`, and `musttail` calls expect to be followed by a return.
1299    auto *C1 = dyn_cast<CallInst>(I1);
1300    auto *C2 = dyn_cast<CallInst>(I2);
1301    if (C1 && C2)
1302      if (C1->isMustTailCall() != C2->isMustTailCall())
1303        return Changed;
1304
1305    if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2))
1306      return Changed;
1307
1308    // If any of the two call sites has nomerge attribute, stop hoisting.
1309    if (const auto *CB1 = dyn_cast<CallBase>(I1))
1310      if (CB1->cannotMerge())
1311        return Changed;
1312    if (const auto *CB2 = dyn_cast<CallBase>(I2))
1313      if (CB2->cannotMerge())
1314        return Changed;
1315
1316    if (isa<DbgInfoIntrinsic>(I1) || isa<DbgInfoIntrinsic>(I2)) {
1317      assert (isa<DbgInfoIntrinsic>(I1) && isa<DbgInfoIntrinsic>(I2));
1318      // The debug location is an integral part of a debug info intrinsic
1319      // and can't be separated from it or replaced.  Instead of attempting
1320      // to merge locations, simply hoist both copies of the intrinsic.
1321      BIParent->getInstList().splice(BI->getIterator(),
1322                                     BB1->getInstList(), I1);
1323      BIParent->getInstList().splice(BI->getIterator(),
1324                                     BB2->getInstList(), I2);
1325      Changed = true;
1326    } else {
1327      // For a normal instruction, we just move one to right before the branch,
1328      // then replace all uses of the other with the first.  Finally, we remove
1329      // the now redundant second instruction.
1330      BIParent->getInstList().splice(BI->getIterator(),
1331                                     BB1->getInstList(), I1);
1332      if (!I2->use_empty())
1333        I2->replaceAllUsesWith(I1);
1334      I1->andIRFlags(I2);
1335      unsigned KnownIDs[] = {LLVMContext::MD_tbaa,
1336                             LLVMContext::MD_range,
1337                             LLVMContext::MD_fpmath,
1338                             LLVMContext::MD_invariant_load,
1339                             LLVMContext::MD_nonnull,
1340                             LLVMContext::MD_invariant_group,
1341                             LLVMContext::MD_align,
1342                             LLVMContext::MD_dereferenceable,
1343                             LLVMContext::MD_dereferenceable_or_null,
1344                             LLVMContext::MD_mem_parallel_loop_access,
1345                             LLVMContext::MD_access_group,
1346                             LLVMContext::MD_preserve_access_index};
1347      combineMetadata(I1, I2, KnownIDs, true);
1348
1349      // I1 and I2 are being combined into a single instruction.  Its debug
1350      // location is the merged locations of the original instructions.
1351      I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
1352
1353      I2->eraseFromParent();
1354      Changed = true;
1355    }
1356
1357    I1 = &*BB1_Itr++;
1358    I2 = &*BB2_Itr++;
1359    // Skip debug info if it is not identical.
1360    DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
1361    DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
1362    if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) {
1363      while (isa<DbgInfoIntrinsic>(I1))
1364        I1 = &*BB1_Itr++;
1365      while (isa<DbgInfoIntrinsic>(I2))
1366        I2 = &*BB2_Itr++;
1367    }
1368  } while (I1->isIdenticalToWhenDefined(I2));
1369
1370  return true;
1371
1372HoistTerminator:
1373  // It may not be possible to hoist an invoke.
1374  // FIXME: Can we define a safety predicate for CallBr?
1375  if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))
1376    return Changed;
1377
1378  // TODO: callbr hoisting currently disabled pending further study.
1379  if (isa<CallBrInst>(I1))
1380    return Changed;
1381
1382  for (BasicBlock *Succ : successors(BB1)) {
1383    for (PHINode &PN : Succ->phis()) {
1384      Value *BB1V = PN.getIncomingValueForBlock(BB1);
1385      Value *BB2V = PN.getIncomingValueForBlock(BB2);
1386      if (BB1V == BB2V)
1387        continue;
1388
1389      // Check for passingValueIsAlwaysUndefined here because we would rather
1390      // eliminate undefined control flow then converting it to a select.
1391      if (passingValueIsAlwaysUndefined(BB1V, &PN) ||
1392          passingValueIsAlwaysUndefined(BB2V, &PN))
1393        return Changed;
1394
1395      if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V))
1396        return Changed;
1397      if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V))
1398        return Changed;
1399    }
1400  }
1401
1402  // Okay, it is safe to hoist the terminator.
1403  Instruction *NT = I1->clone();
1404  BIParent->getInstList().insert(BI->getIterator(), NT);
1405  if (!NT->getType()->isVoidTy()) {
1406    I1->replaceAllUsesWith(NT);
1407    I2->replaceAllUsesWith(NT);
1408    NT->takeName(I1);
1409  }
1410
1411  // Ensure terminator gets a debug location, even an unknown one, in case
1412  // it involves inlinable calls.
1413  NT->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
1414
1415  // PHIs created below will adopt NT's merged DebugLoc.
1416  IRBuilder<NoFolder> Builder(NT);
1417
1418  // Hoisting one of the terminators from our successor is a great thing.
1419  // Unfortunately, the successors of the if/else blocks may have PHI nodes in
1420  // them.  If they do, all PHI entries for BB1/BB2 must agree for all PHI
1421  // nodes, so we insert select instruction to compute the final result.
1422  std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
1423  for (BasicBlock *Succ : successors(BB1)) {
1424    for (PHINode &PN : Succ->phis()) {
1425      Value *BB1V = PN.getIncomingValueForBlock(BB1);
1426      Value *BB2V = PN.getIncomingValueForBlock(BB2);
1427      if (BB1V == BB2V)
1428        continue;
1429
1430      // These values do not agree.  Insert a select instruction before NT
1431      // that determines the right value.
1432      SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
1433      if (!SI) {
1434        // Propagate fast-math-flags from phi node to its replacement select.
1435        IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
1436        if (isa<FPMathOperator>(PN))
1437          Builder.setFastMathFlags(PN.getFastMathFlags());
1438
1439        SI = cast<SelectInst>(
1440            Builder.CreateSelect(BI->getCondition(), BB1V, BB2V,
1441                                 BB1V->getName() + "." + BB2V->getName(), BI));
1442      }
1443
1444      // Make the PHI node use the select for all incoming values for BB1/BB2
1445      for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
1446        if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
1447          PN.setIncomingValue(i, SI);
1448    }
1449  }
1450
1451  // Update any PHI nodes in our new successors.
1452  for (BasicBlock *Succ : successors(BB1))
1453    AddPredecessorToBlock(Succ, BIParent, BB1);
1454
1455  EraseTerminatorAndDCECond(BI);
1456  return true;
1457}
1458
1459// Check lifetime markers.
1460static bool isLifeTimeMarker(const Instruction *I) {
1461  if (auto II = dyn_cast<IntrinsicInst>(I)) {
1462    switch (II->getIntrinsicID()) {
1463    default:
1464      break;
1465    case Intrinsic::lifetime_start:
1466    case Intrinsic::lifetime_end:
1467      return true;
1468    }
1469  }
1470  return false;
1471}
1472
1473// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
1474// into variables.
1475static bool replacingOperandWithVariableIsCheap(const Instruction *I,
1476                                                int OpIdx) {
1477  return !isa<IntrinsicInst>(I);
1478}
1479
1480// All instructions in Insts belong to different blocks that all unconditionally
1481// branch to a common successor. Analyze each instruction and return true if it
1482// would be possible to sink them into their successor, creating one common
1483// instruction instead. For every value that would be required to be provided by
1484// PHI node (because an operand varies in each input block), add to PHIOperands.
1485static bool canSinkInstructions(
1486    ArrayRef<Instruction *> Insts,
1487    DenseMap<Instruction *, SmallVector<Value *, 4>> &PHIOperands) {
1488  // Prune out obviously bad instructions to move. Each instruction must have
1489  // exactly zero or one use, and we check later that use is by a single, common
1490  // PHI instruction in the successor.
1491  bool HasUse = !Insts.front()->user_empty();
1492  for (auto *I : Insts) {
1493    // These instructions may change or break semantics if moved.
1494    if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
1495        I->getType()->isTokenTy())
1496      return false;
1497
1498    // Conservatively return false if I is an inline-asm instruction. Sinking
1499    // and merging inline-asm instructions can potentially create arguments
1500    // that cannot satisfy the inline-asm constraints.
1501    // If the instruction has nomerge attribute, return false.
1502    if (const auto *C = dyn_cast<CallBase>(I))
1503      if (C->isInlineAsm() || C->cannotMerge())
1504        return false;
1505
1506    // Each instruction must have zero or one use.
1507    if (HasUse && !I->hasOneUse())
1508      return false;
1509    if (!HasUse && !I->user_empty())
1510      return false;
1511  }
1512
1513  const Instruction *I0 = Insts.front();
1514  for (auto *I : Insts)
1515    if (!I->isSameOperationAs(I0))
1516      return false;
1517
1518  // All instructions in Insts are known to be the same opcode. If they have a
1519  // use, check that the only user is a PHI or in the same block as the
1520  // instruction, because if a user is in the same block as an instruction we're
1521  // contemplating sinking, it must already be determined to be sinkable.
1522  if (HasUse) {
1523    auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
1524    auto *Succ = I0->getParent()->getTerminator()->getSuccessor(0);
1525    if (!all_of(Insts, [&PNUse,&Succ](const Instruction *I) -> bool {
1526          auto *U = cast<Instruction>(*I->user_begin());
1527          return (PNUse &&
1528                  PNUse->getParent() == Succ &&
1529                  PNUse->getIncomingValueForBlock(I->getParent()) == I) ||
1530                 U->getParent() == I->getParent();
1531        }))
1532      return false;
1533  }
1534
1535  // Because SROA can't handle speculating stores of selects, try not to sink
1536  // loads, stores or lifetime markers of allocas when we'd have to create a
1537  // PHI for the address operand. Also, because it is likely that loads or
1538  // stores of allocas will disappear when Mem2Reg/SROA is run, don't sink
1539  // them.
1540  // This can cause code churn which can have unintended consequences down
1541  // the line - see https://llvm.org/bugs/show_bug.cgi?id=30244.
1542  // FIXME: This is a workaround for a deficiency in SROA - see
1543  // https://llvm.org/bugs/show_bug.cgi?id=30188
1544  if (isa<StoreInst>(I0) && any_of(Insts, [](const Instruction *I) {
1545        return isa<AllocaInst>(I->getOperand(1)->stripPointerCasts());
1546      }))
1547    return false;
1548  if (isa<LoadInst>(I0) && any_of(Insts, [](const Instruction *I) {
1549        return isa<AllocaInst>(I->getOperand(0)->stripPointerCasts());
1550      }))
1551    return false;
1552  if (isLifeTimeMarker(I0) && any_of(Insts, [](const Instruction *I) {
1553        return isa<AllocaInst>(I->getOperand(1)->stripPointerCasts());
1554      }))
1555    return false;
1556
1557  for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
1558    Value *Op = I0->getOperand(OI);
1559    if (Op->getType()->isTokenTy())
1560      // Don't touch any operand of token type.
1561      return false;
1562
1563    auto SameAsI0 = [&I0, OI](const Instruction *I) {
1564      assert(I->getNumOperands() == I0->getNumOperands());
1565      return I->getOperand(OI) == I0->getOperand(OI);
1566    };
1567    if (!all_of(Insts, SameAsI0)) {
1568      if ((isa<Constant>(Op) && !replacingOperandWithVariableIsCheap(I0, OI)) ||
1569          !canReplaceOperandWithVariable(I0, OI))
1570        // We can't create a PHI from this GEP.
1571        return false;
1572      // Don't create indirect calls! The called value is the final operand.
1573      if (isa<CallBase>(I0) && OI == OE - 1) {
1574        // FIXME: if the call was *already* indirect, we should do this.
1575        return false;
1576      }
1577      for (auto *I : Insts)
1578        PHIOperands[I].push_back(I->getOperand(OI));
1579    }
1580  }
1581  return true;
1582}
1583
1584// Assuming canSinkLastInstruction(Blocks) has returned true, sink the last
1585// instruction of every block in Blocks to their common successor, commoning
1586// into one instruction.
1587static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
1588  auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
1589
1590  // canSinkLastInstruction returning true guarantees that every block has at
1591  // least one non-terminator instruction.
1592  SmallVector<Instruction*,4> Insts;
1593  for (auto *BB : Blocks) {
1594    Instruction *I = BB->getTerminator();
1595    do {
1596      I = I->getPrevNode();
1597    } while (isa<DbgInfoIntrinsic>(I) && I != &BB->front());
1598    if (!isa<DbgInfoIntrinsic>(I))
1599      Insts.push_back(I);
1600  }
1601
1602  // The only checking we need to do now is that all users of all instructions
1603  // are the same PHI node. canSinkLastInstruction should have checked this but
1604  // it is slightly over-aggressive - it gets confused by commutative instructions
1605  // so double-check it here.
1606  Instruction *I0 = Insts.front();
1607  if (!I0->user_empty()) {
1608    auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
1609    if (!all_of(Insts, [&PNUse](const Instruction *I) -> bool {
1610          auto *U = cast<Instruction>(*I->user_begin());
1611          return U == PNUse;
1612        }))
1613      return false;
1614  }
1615
1616  // We don't need to do any more checking here; canSinkLastInstruction should
1617  // have done it all for us.
1618  SmallVector<Value*, 4> NewOperands;
1619  for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
1620    // This check is different to that in canSinkLastInstruction. There, we
1621    // cared about the global view once simplifycfg (and instcombine) have
1622    // completed - it takes into account PHIs that become trivially
1623    // simplifiable.  However here we need a more local view; if an operand
1624    // differs we create a PHI and rely on instcombine to clean up the very
1625    // small mess we may make.
1626    bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
1627      return I->getOperand(O) != I0->getOperand(O);
1628    });
1629    if (!NeedPHI) {
1630      NewOperands.push_back(I0->getOperand(O));
1631      continue;
1632    }
1633
1634    // Create a new PHI in the successor block and populate it.
1635    auto *Op = I0->getOperand(O);
1636    assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
1637    auto *PN = PHINode::Create(Op->getType(), Insts.size(),
1638                               Op->getName() + ".sink", &BBEnd->front());
1639    for (auto *I : Insts)
1640      PN->addIncoming(I->getOperand(O), I->getParent());
1641    NewOperands.push_back(PN);
1642  }
1643
1644  // Arbitrarily use I0 as the new "common" instruction; remap its operands
1645  // and move it to the start of the successor block.
1646  for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
1647    I0->getOperandUse(O).set(NewOperands[O]);
1648  I0->moveBefore(&*BBEnd->getFirstInsertionPt());
1649
1650  // Update metadata and IR flags, and merge debug locations.
1651  for (auto *I : Insts)
1652    if (I != I0) {
1653      // The debug location for the "common" instruction is the merged locations
1654      // of all the commoned instructions.  We start with the original location
1655      // of the "common" instruction and iteratively merge each location in the
1656      // loop below.
1657      // This is an N-way merge, which will be inefficient if I0 is a CallInst.
1658      // However, as N-way merge for CallInst is rare, so we use simplified API
1659      // instead of using complex API for N-way merge.
1660      I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
1661      combineMetadataForCSE(I0, I, true);
1662      I0->andIRFlags(I);
1663    }
1664
1665  if (!I0->user_empty()) {
1666    // canSinkLastInstruction checked that all instructions were used by
1667    // one and only one PHI node. Find that now, RAUW it to our common
1668    // instruction and nuke it.
1669    auto *PN = cast<PHINode>(*I0->user_begin());
1670    PN->replaceAllUsesWith(I0);
1671    PN->eraseFromParent();
1672  }
1673
1674  // Finally nuke all instructions apart from the common instruction.
1675  for (auto *I : Insts)
1676    if (I != I0)
1677      I->eraseFromParent();
1678
1679  return true;
1680}
1681
1682namespace {
1683
1684  // LockstepReverseIterator - Iterates through instructions
1685  // in a set of blocks in reverse order from the first non-terminator.
1686  // For example (assume all blocks have size n):
1687  //   LockstepReverseIterator I([B1, B2, B3]);
1688  //   *I-- = [B1[n], B2[n], B3[n]];
1689  //   *I-- = [B1[n-1], B2[n-1], B3[n-1]];
1690  //   *I-- = [B1[n-2], B2[n-2], B3[n-2]];
1691  //   ...
1692  class LockstepReverseIterator {
1693    ArrayRef<BasicBlock*> Blocks;
1694    SmallVector<Instruction*,4> Insts;
1695    bool Fail;
1696
1697  public:
1698    LockstepReverseIterator(ArrayRef<BasicBlock*> Blocks) : Blocks(Blocks) {
1699      reset();
1700    }
1701
1702    void reset() {
1703      Fail = false;
1704      Insts.clear();
1705      for (auto *BB : Blocks) {
1706        Instruction *Inst = BB->getTerminator();
1707        for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
1708          Inst = Inst->getPrevNode();
1709        if (!Inst) {
1710          // Block wasn't big enough.
1711          Fail = true;
1712          return;
1713        }
1714        Insts.push_back(Inst);
1715      }
1716    }
1717
1718    bool isValid() const {
1719      return !Fail;
1720    }
1721
1722    void operator--() {
1723      if (Fail)
1724        return;
1725      for (auto *&Inst : Insts) {
1726        for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
1727          Inst = Inst->getPrevNode();
1728        // Already at beginning of block.
1729        if (!Inst) {
1730          Fail = true;
1731          return;
1732        }
1733      }
1734    }
1735
1736    ArrayRef<Instruction*> operator * () const {
1737      return Insts;
1738    }
1739  };
1740
1741} // end anonymous namespace
1742
1743/// Check whether BB's predecessors end with unconditional branches. If it is
1744/// true, sink any common code from the predecessors to BB.
1745/// We also allow one predecessor to end with conditional branch (but no more
1746/// than one).
1747static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) {
1748  // We support two situations:
1749  //   (1) all incoming arcs are unconditional
1750  //   (2) one incoming arc is conditional
1751  //
1752  // (2) is very common in switch defaults and
1753  // else-if patterns;
1754  //
1755  //   if (a) f(1);
1756  //   else if (b) f(2);
1757  //
1758  // produces:
1759  //
1760  //       [if]
1761  //      /    \
1762  //    [f(1)] [if]
1763  //      |     | \
1764  //      |     |  |
1765  //      |  [f(2)]|
1766  //       \    | /
1767  //        [ end ]
1768  //
1769  // [end] has two unconditional predecessor arcs and one conditional. The
1770  // conditional refers to the implicit empty 'else' arc. This conditional
1771  // arc can also be caused by an empty default block in a switch.
1772  //
1773  // In this case, we attempt to sink code from all *unconditional* arcs.
1774  // If we can sink instructions from these arcs (determined during the scan
1775  // phase below) we insert a common successor for all unconditional arcs and
1776  // connect that to [end], to enable sinking:
1777  //
1778  //       [if]
1779  //      /    \
1780  //    [x(1)] [if]
1781  //      |     | \
1782  //      |     |  \
1783  //      |  [x(2)] |
1784  //       \   /    |
1785  //   [sink.split] |
1786  //         \     /
1787  //         [ end ]
1788  //
1789  SmallVector<BasicBlock*,4> UnconditionalPreds;
1790  Instruction *Cond = nullptr;
1791  for (auto *B : predecessors(BB)) {
1792    auto *T = B->getTerminator();
1793    if (isa<BranchInst>(T) && cast<BranchInst>(T)->isUnconditional())
1794      UnconditionalPreds.push_back(B);
1795    else if ((isa<BranchInst>(T) || isa<SwitchInst>(T)) && !Cond)
1796      Cond = T;
1797    else
1798      return false;
1799  }
1800  if (UnconditionalPreds.size() < 2)
1801    return false;
1802
1803  bool Changed = false;
1804  // We take a two-step approach to tail sinking. First we scan from the end of
1805  // each block upwards in lockstep. If the n'th instruction from the end of each
1806  // block can be sunk, those instructions are added to ValuesToSink and we
1807  // carry on. If we can sink an instruction but need to PHI-merge some operands
1808  // (because they're not identical in each instruction) we add these to
1809  // PHIOperands.
1810  unsigned ScanIdx = 0;
1811  SmallPtrSet<Value*,4> InstructionsToSink;
1812  DenseMap<Instruction*, SmallVector<Value*,4>> PHIOperands;
1813  LockstepReverseIterator LRI(UnconditionalPreds);
1814  while (LRI.isValid() &&
1815         canSinkInstructions(*LRI, PHIOperands)) {
1816    LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
1817                      << "\n");
1818    InstructionsToSink.insert((*LRI).begin(), (*LRI).end());
1819    ++ScanIdx;
1820    --LRI;
1821  }
1822
1823  auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) {
1824    unsigned NumPHIdValues = 0;
1825    for (auto *I : *LRI)
1826      for (auto *V : PHIOperands[I])
1827        if (InstructionsToSink.count(V) == 0)
1828          ++NumPHIdValues;
1829    LLVM_DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n");
1830    unsigned NumPHIInsts = NumPHIdValues / UnconditionalPreds.size();
1831    if ((NumPHIdValues % UnconditionalPreds.size()) != 0)
1832        NumPHIInsts++;
1833
1834    return NumPHIInsts <= 1;
1835  };
1836
1837  if (ScanIdx > 0 && Cond) {
1838    // Check if we would actually sink anything first! This mutates the CFG and
1839    // adds an extra block. The goal in doing this is to allow instructions that
1840    // couldn't be sunk before to be sunk - obviously, speculatable instructions
1841    // (such as trunc, add) can be sunk and predicated already. So we check that
1842    // we're going to sink at least one non-speculatable instruction.
1843    LRI.reset();
1844    unsigned Idx = 0;
1845    bool Profitable = false;
1846    while (ProfitableToSinkInstruction(LRI) && Idx < ScanIdx) {
1847      if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
1848        Profitable = true;
1849        break;
1850      }
1851      --LRI;
1852      ++Idx;
1853    }
1854    if (!Profitable)
1855      return false;
1856
1857    LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
1858    // We have a conditional edge and we're going to sink some instructions.
1859    // Insert a new block postdominating all blocks we're going to sink from.
1860    if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split"))
1861      // Edges couldn't be split.
1862      return false;
1863    Changed = true;
1864  }
1865
1866  // Now that we've analyzed all potential sinking candidates, perform the
1867  // actual sink. We iteratively sink the last non-terminator of the source
1868  // blocks into their common successor unless doing so would require too
1869  // many PHI instructions to be generated (currently only one PHI is allowed
1870  // per sunk instruction).
1871  //
1872  // We can use InstructionsToSink to discount values needing PHI-merging that will
1873  // actually be sunk in a later iteration. This allows us to be more
1874  // aggressive in what we sink. This does allow a false positive where we
1875  // sink presuming a later value will also be sunk, but stop half way through
1876  // and never actually sink it which means we produce more PHIs than intended.
1877  // This is unlikely in practice though.
1878  for (unsigned SinkIdx = 0; SinkIdx != ScanIdx; ++SinkIdx) {
1879    LLVM_DEBUG(dbgs() << "SINK: Sink: "
1880                      << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
1881                      << "\n");
1882
1883    // Because we've sunk every instruction in turn, the current instruction to
1884    // sink is always at index 0.
1885    LRI.reset();
1886    if (!ProfitableToSinkInstruction(LRI)) {
1887      // Too many PHIs would be created.
1888      LLVM_DEBUG(
1889          dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
1890      break;
1891    }
1892
1893    if (!sinkLastInstruction(UnconditionalPreds))
1894      return Changed;
1895    NumSinkCommons++;
1896    Changed = true;
1897  }
1898  return Changed;
1899}
1900
1901/// Determine if we can hoist sink a sole store instruction out of a
1902/// conditional block.
1903///
1904/// We are looking for code like the following:
1905///   BrBB:
1906///     store i32 %add, i32* %arrayidx2
1907///     ... // No other stores or function calls (we could be calling a memory
1908///     ... // function).
1909///     %cmp = icmp ult %x, %y
1910///     br i1 %cmp, label %EndBB, label %ThenBB
1911///   ThenBB:
1912///     store i32 %add5, i32* %arrayidx2
1913///     br label EndBB
1914///   EndBB:
1915///     ...
1916///   We are going to transform this into:
1917///   BrBB:
1918///     store i32 %add, i32* %arrayidx2
1919///     ... //
1920///     %cmp = icmp ult %x, %y
1921///     %add.add5 = select i1 %cmp, i32 %add, %add5
1922///     store i32 %add.add5, i32* %arrayidx2
1923///     ...
1924///
1925/// \return The pointer to the value of the previous store if the store can be
1926///         hoisted into the predecessor block. 0 otherwise.
1927static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
1928                                     BasicBlock *StoreBB, BasicBlock *EndBB) {
1929  StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
1930  if (!StoreToHoist)
1931    return nullptr;
1932
1933  // Volatile or atomic.
1934  if (!StoreToHoist->isSimple())
1935    return nullptr;
1936
1937  Value *StorePtr = StoreToHoist->getPointerOperand();
1938
1939  // Look for a store to the same pointer in BrBB.
1940  unsigned MaxNumInstToLookAt = 9;
1941  for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug())) {
1942    if (!MaxNumInstToLookAt)
1943      break;
1944    --MaxNumInstToLookAt;
1945
1946    // Could be calling an instruction that affects memory like free().
1947    if (CurI.mayHaveSideEffects() && !isa<StoreInst>(CurI))
1948      return nullptr;
1949
1950    if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
1951      // Found the previous store make sure it stores to the same location.
1952      if (SI->getPointerOperand() == StorePtr)
1953        // Found the previous store, return its value operand.
1954        return SI->getValueOperand();
1955      return nullptr; // Unknown store.
1956    }
1957  }
1958
1959  return nullptr;
1960}
1961
1962/// Speculate a conditional basic block flattening the CFG.
1963///
1964/// Note that this is a very risky transform currently. Speculating
1965/// instructions like this is most often not desirable. Instead, there is an MI
1966/// pass which can do it with full awareness of the resource constraints.
1967/// However, some cases are "obvious" and we should do directly. An example of
1968/// this is speculating a single, reasonably cheap instruction.
1969///
1970/// There is only one distinct advantage to flattening the CFG at the IR level:
1971/// it makes very common but simplistic optimizations such as are common in
1972/// instcombine and the DAG combiner more powerful by removing CFG edges and
1973/// modeling their effects with easier to reason about SSA value graphs.
1974///
1975///
1976/// An illustration of this transform is turning this IR:
1977/// \code
1978///   BB:
1979///     %cmp = icmp ult %x, %y
1980///     br i1 %cmp, label %EndBB, label %ThenBB
1981///   ThenBB:
1982///     %sub = sub %x, %y
1983///     br label BB2
1984///   EndBB:
1985///     %phi = phi [ %sub, %ThenBB ], [ 0, %EndBB ]
1986///     ...
1987/// \endcode
1988///
1989/// Into this IR:
1990/// \code
1991///   BB:
1992///     %cmp = icmp ult %x, %y
1993///     %sub = sub %x, %y
1994///     %cond = select i1 %cmp, 0, %sub
1995///     ...
1996/// \endcode
1997///
1998/// \returns true if the conditional block is removed.
1999bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
2000                                            const TargetTransformInfo &TTI) {
2001  // Be conservative for now. FP select instruction can often be expensive.
2002  Value *BrCond = BI->getCondition();
2003  if (isa<FCmpInst>(BrCond))
2004    return false;
2005
2006  BasicBlock *BB = BI->getParent();
2007  BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
2008
2009  // If ThenBB is actually on the false edge of the conditional branch, remember
2010  // to swap the select operands later.
2011  bool Invert = false;
2012  if (ThenBB != BI->getSuccessor(0)) {
2013    assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
2014    Invert = true;
2015  }
2016  assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
2017
2018  // Keep a count of how many times instructions are used within ThenBB when
2019  // they are candidates for sinking into ThenBB. Specifically:
2020  // - They are defined in BB, and
2021  // - They have no side effects, and
2022  // - All of their uses are in ThenBB.
2023  SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
2024
2025  SmallVector<Instruction *, 4> SpeculatedDbgIntrinsics;
2026
2027  unsigned SpeculatedInstructions = 0;
2028  Value *SpeculatedStoreValue = nullptr;
2029  StoreInst *SpeculatedStore = nullptr;
2030  for (BasicBlock::iterator BBI = ThenBB->begin(),
2031                            BBE = std::prev(ThenBB->end());
2032       BBI != BBE; ++BBI) {
2033    Instruction *I = &*BBI;
2034    // Skip debug info.
2035    if (isa<DbgInfoIntrinsic>(I)) {
2036      SpeculatedDbgIntrinsics.push_back(I);
2037      continue;
2038    }
2039
2040    // Only speculatively execute a single instruction (not counting the
2041    // terminator) for now.
2042    ++SpeculatedInstructions;
2043    if (SpeculatedInstructions > 1)
2044      return false;
2045
2046    // Don't hoist the instruction if it's unsafe or expensive.
2047    if (!isSafeToSpeculativelyExecute(I) &&
2048        !(HoistCondStores && (SpeculatedStoreValue = isSafeToSpeculateStore(
2049                                  I, BB, ThenBB, EndBB))))
2050      return false;
2051    if (!SpeculatedStoreValue &&
2052        ComputeSpeculationCost(I, TTI) >
2053            PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic)
2054      return false;
2055
2056    // Store the store speculation candidate.
2057    if (SpeculatedStoreValue)
2058      SpeculatedStore = cast<StoreInst>(I);
2059
2060    // Do not hoist the instruction if any of its operands are defined but not
2061    // used in BB. The transformation will prevent the operand from
2062    // being sunk into the use block.
2063    for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) {
2064      Instruction *OpI = dyn_cast<Instruction>(*i);
2065      if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
2066        continue; // Not a candidate for sinking.
2067
2068      ++SinkCandidateUseCounts[OpI];
2069    }
2070  }
2071
2072  // Consider any sink candidates which are only used in ThenBB as costs for
2073  // speculation. Note, while we iterate over a DenseMap here, we are summing
2074  // and so iteration order isn't significant.
2075  for (SmallDenseMap<Instruction *, unsigned, 4>::iterator
2076           I = SinkCandidateUseCounts.begin(),
2077           E = SinkCandidateUseCounts.end();
2078       I != E; ++I)
2079    if (I->first->hasNUses(I->second)) {
2080      ++SpeculatedInstructions;
2081      if (SpeculatedInstructions > 1)
2082        return false;
2083    }
2084
2085  // Check that the PHI nodes can be converted to selects.
2086  bool HaveRewritablePHIs = false;
2087  for (PHINode &PN : EndBB->phis()) {
2088    Value *OrigV = PN.getIncomingValueForBlock(BB);
2089    Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
2090
2091    // FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf.
2092    // Skip PHIs which are trivial.
2093    if (ThenV == OrigV)
2094      continue;
2095
2096    // Don't convert to selects if we could remove undefined behavior instead.
2097    if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
2098        passingValueIsAlwaysUndefined(ThenV, &PN))
2099      return false;
2100
2101    HaveRewritablePHIs = true;
2102    ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
2103    ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
2104    if (!OrigCE && !ThenCE)
2105      continue; // Known safe and cheap.
2106
2107    if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) ||
2108        (OrigCE && !isSafeToSpeculativelyExecute(OrigCE)))
2109      return false;
2110    unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, TTI) : 0;
2111    unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, TTI) : 0;
2112    unsigned MaxCost =
2113        2 * PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
2114    if (OrigCost + ThenCost > MaxCost)
2115      return false;
2116
2117    // Account for the cost of an unfolded ConstantExpr which could end up
2118    // getting expanded into Instructions.
2119    // FIXME: This doesn't account for how many operations are combined in the
2120    // constant expression.
2121    ++SpeculatedInstructions;
2122    if (SpeculatedInstructions > 1)
2123      return false;
2124  }
2125
2126  // If there are no PHIs to process, bail early. This helps ensure idempotence
2127  // as well.
2128  if (!HaveRewritablePHIs && !(HoistCondStores && SpeculatedStoreValue))
2129    return false;
2130
2131  // If we get here, we can hoist the instruction and if-convert.
2132  LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
2133
2134  // Insert a select of the value of the speculated store.
2135  if (SpeculatedStoreValue) {
2136    IRBuilder<NoFolder> Builder(BI);
2137    Value *TrueV = SpeculatedStore->getValueOperand();
2138    Value *FalseV = SpeculatedStoreValue;
2139    if (Invert)
2140      std::swap(TrueV, FalseV);
2141    Value *S = Builder.CreateSelect(
2142        BrCond, TrueV, FalseV, "spec.store.select", BI);
2143    SpeculatedStore->setOperand(0, S);
2144    SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
2145                                         SpeculatedStore->getDebugLoc());
2146  }
2147
2148  // Metadata can be dependent on the condition we are hoisting above.
2149  // Conservatively strip all metadata on the instruction. Drop the debug loc
2150  // to avoid making it appear as if the condition is a constant, which would
2151  // be misleading while debugging.
2152  for (auto &I : *ThenBB) {
2153    if (!SpeculatedStoreValue || &I != SpeculatedStore)
2154      I.setDebugLoc(DebugLoc());
2155    I.dropUnknownNonDebugMetadata();
2156  }
2157
2158  // Hoist the instructions.
2159  BB->getInstList().splice(BI->getIterator(), ThenBB->getInstList(),
2160                           ThenBB->begin(), std::prev(ThenBB->end()));
2161
2162  // Insert selects and rewrite the PHI operands.
2163  IRBuilder<NoFolder> Builder(BI);
2164  for (PHINode &PN : EndBB->phis()) {
2165    unsigned OrigI = PN.getBasicBlockIndex(BB);
2166    unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
2167    Value *OrigV = PN.getIncomingValue(OrigI);
2168    Value *ThenV = PN.getIncomingValue(ThenI);
2169
2170    // Skip PHIs which are trivial.
2171    if (OrigV == ThenV)
2172      continue;
2173
2174    // Create a select whose true value is the speculatively executed value and
2175    // false value is the pre-existing value. Swap them if the branch
2176    // destinations were inverted.
2177    Value *TrueV = ThenV, *FalseV = OrigV;
2178    if (Invert)
2179      std::swap(TrueV, FalseV);
2180    Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
2181    PN.setIncomingValue(OrigI, V);
2182    PN.setIncomingValue(ThenI, V);
2183  }
2184
2185  // Remove speculated dbg intrinsics.
2186  // FIXME: Is it possible to do this in a more elegant way? Moving/merging the
2187  // dbg value for the different flows and inserting it after the select.
2188  for (Instruction *I : SpeculatedDbgIntrinsics)
2189    I->eraseFromParent();
2190
2191  ++NumSpeculations;
2192  return true;
2193}
2194
2195/// Return true if we can thread a branch across this block.
2196static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
2197  int Size = 0;
2198
2199  for (Instruction &I : BB->instructionsWithoutDebug()) {
2200    if (Size > MaxSmallBlockSize)
2201      return false; // Don't clone large BB's.
2202    // We will delete Phis while threading, so Phis should not be accounted in
2203    // block's size
2204    if (!isa<PHINode>(I))
2205      ++Size;
2206
2207    // We can only support instructions that do not define values that are
2208    // live outside of the current basic block.
2209    for (User *U : I.users()) {
2210      Instruction *UI = cast<Instruction>(U);
2211      if (UI->getParent() != BB || isa<PHINode>(UI))
2212        return false;
2213    }
2214
2215    // Looks ok, continue checking.
2216  }
2217
2218  return true;
2219}
2220
2221/// If we have a conditional branch on a PHI node value that is defined in the
2222/// same block as the branch and if any PHI entries are constants, thread edges
2223/// corresponding to that entry to be branches to their ultimate destination.
2224static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL,
2225                                AssumptionCache *AC) {
2226  BasicBlock *BB = BI->getParent();
2227  PHINode *PN = dyn_cast<PHINode>(BI->getCondition());
2228  // NOTE: we currently cannot transform this case if the PHI node is used
2229  // outside of the block.
2230  if (!PN || PN->getParent() != BB || !PN->hasOneUse())
2231    return false;
2232
2233  // Degenerate case of a single entry PHI.
2234  if (PN->getNumIncomingValues() == 1) {
2235    FoldSingleEntryPHINodes(PN->getParent());
2236    return true;
2237  }
2238
2239  // Now we know that this block has multiple preds and two succs.
2240  if (!BlockIsSimpleEnoughToThreadThrough(BB))
2241    return false;
2242
2243  // Can't fold blocks that contain noduplicate or convergent calls.
2244  if (any_of(*BB, [](const Instruction &I) {
2245        const CallInst *CI = dyn_cast<CallInst>(&I);
2246        return CI && (CI->cannotDuplicate() || CI->isConvergent());
2247      }))
2248    return false;
2249
2250  // Okay, this is a simple enough basic block.  See if any phi values are
2251  // constants.
2252  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
2253    ConstantInt *CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i));
2254    if (!CB || !CB->getType()->isIntegerTy(1))
2255      continue;
2256
2257    // Okay, we now know that all edges from PredBB should be revectored to
2258    // branch to RealDest.
2259    BasicBlock *PredBB = PN->getIncomingBlock(i);
2260    BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
2261
2262    if (RealDest == BB)
2263      continue; // Skip self loops.
2264    // Skip if the predecessor's terminator is an indirect branch.
2265    if (isa<IndirectBrInst>(PredBB->getTerminator()))
2266      continue;
2267
2268    // The dest block might have PHI nodes, other predecessors and other
2269    // difficult cases.  Instead of being smart about this, just insert a new
2270    // block that jumps to the destination block, effectively splitting
2271    // the edge we are about to create.
2272    BasicBlock *EdgeBB =
2273        BasicBlock::Create(BB->getContext(), RealDest->getName() + ".critedge",
2274                           RealDest->getParent(), RealDest);
2275    BranchInst *CritEdgeBranch = BranchInst::Create(RealDest, EdgeBB);
2276    CritEdgeBranch->setDebugLoc(BI->getDebugLoc());
2277
2278    // Update PHI nodes.
2279    AddPredecessorToBlock(RealDest, EdgeBB, BB);
2280
2281    // BB may have instructions that are being threaded over.  Clone these
2282    // instructions into EdgeBB.  We know that there will be no uses of the
2283    // cloned instructions outside of EdgeBB.
2284    BasicBlock::iterator InsertPt = EdgeBB->begin();
2285    DenseMap<Value *, Value *> TranslateMap; // Track translated values.
2286    for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
2287      if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
2288        TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB);
2289        continue;
2290      }
2291      // Clone the instruction.
2292      Instruction *N = BBI->clone();
2293      if (BBI->hasName())
2294        N->setName(BBI->getName() + ".c");
2295
2296      // Update operands due to translation.
2297      for (User::op_iterator i = N->op_begin(), e = N->op_end(); i != e; ++i) {
2298        DenseMap<Value *, Value *>::iterator PI = TranslateMap.find(*i);
2299        if (PI != TranslateMap.end())
2300          *i = PI->second;
2301      }
2302
2303      // Check for trivial simplification.
2304      if (Value *V = SimplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
2305        if (!BBI->use_empty())
2306          TranslateMap[&*BBI] = V;
2307        if (!N->mayHaveSideEffects()) {
2308          N->deleteValue(); // Instruction folded away, don't need actual inst
2309          N = nullptr;
2310        }
2311      } else {
2312        if (!BBI->use_empty())
2313          TranslateMap[&*BBI] = N;
2314      }
2315      if (N) {
2316        // Insert the new instruction into its new home.
2317        EdgeBB->getInstList().insert(InsertPt, N);
2318
2319        // Register the new instruction with the assumption cache if necessary.
2320        if (AC && match(N, m_Intrinsic<Intrinsic::assume>()))
2321          AC->registerAssumption(cast<IntrinsicInst>(N));
2322      }
2323    }
2324
2325    // Loop over all of the edges from PredBB to BB, changing them to branch
2326    // to EdgeBB instead.
2327    Instruction *PredBBTI = PredBB->getTerminator();
2328    for (unsigned i = 0, e = PredBBTI->getNumSuccessors(); i != e; ++i)
2329      if (PredBBTI->getSuccessor(i) == BB) {
2330        BB->removePredecessor(PredBB);
2331        PredBBTI->setSuccessor(i, EdgeBB);
2332      }
2333
2334    // Recurse, simplifying any other constants.
2335    return FoldCondBranchOnPHI(BI, DL, AC) || true;
2336  }
2337
2338  return false;
2339}
2340
2341/// Given a BB that starts with the specified two-entry PHI node,
2342/// see if we can eliminate it.
2343static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
2344                                const DataLayout &DL) {
2345  // Ok, this is a two entry PHI node.  Check to see if this is a simple "if
2346  // statement", which has a very simple dominance structure.  Basically, we
2347  // are trying to find the condition that is being branched on, which
2348  // subsequently causes this merge to happen.  We really want control
2349  // dependence information for this check, but simplifycfg can't keep it up
2350  // to date, and this catches most of the cases we care about anyway.
2351  BasicBlock *BB = PN->getParent();
2352
2353  BasicBlock *IfTrue, *IfFalse;
2354  Value *IfCond = GetIfCondition(BB, IfTrue, IfFalse);
2355  if (!IfCond ||
2356      // Don't bother if the branch will be constant folded trivially.
2357      isa<ConstantInt>(IfCond))
2358    return false;
2359
2360  // Okay, we found that we can merge this two-entry phi node into a select.
2361  // Doing so would require us to fold *all* two entry phi nodes in this block.
2362  // At some point this becomes non-profitable (particularly if the target
2363  // doesn't support cmov's).  Only do this transformation if there are two or
2364  // fewer PHI nodes in this block.
2365  unsigned NumPhis = 0;
2366  for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
2367    if (NumPhis > 2)
2368      return false;
2369
2370  // Loop over the PHI's seeing if we can promote them all to select
2371  // instructions.  While we are at it, keep track of the instructions
2372  // that need to be moved to the dominating block.
2373  SmallPtrSet<Instruction *, 4> AggressiveInsts;
2374  int BudgetRemaining =
2375      TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
2376
2377  for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
2378    PHINode *PN = cast<PHINode>(II++);
2379    if (Value *V = SimplifyInstruction(PN, {DL, PN})) {
2380      PN->replaceAllUsesWith(V);
2381      PN->eraseFromParent();
2382      continue;
2383    }
2384
2385    if (!DominatesMergePoint(PN->getIncomingValue(0), BB, AggressiveInsts,
2386                             BudgetRemaining, TTI) ||
2387        !DominatesMergePoint(PN->getIncomingValue(1), BB, AggressiveInsts,
2388                             BudgetRemaining, TTI))
2389      return false;
2390  }
2391
2392  // If we folded the first phi, PN dangles at this point.  Refresh it.  If
2393  // we ran out of PHIs then we simplified them all.
2394  PN = dyn_cast<PHINode>(BB->begin());
2395  if (!PN)
2396    return true;
2397
2398  // Return true if at least one of these is a 'not', and another is either
2399  // a 'not' too, or a constant.
2400  auto CanHoistNotFromBothValues = [](Value *V0, Value *V1) {
2401    if (!match(V0, m_Not(m_Value())))
2402      std::swap(V0, V1);
2403    auto Invertible = m_CombineOr(m_Not(m_Value()), m_AnyIntegralConstant());
2404    return match(V0, m_Not(m_Value())) && match(V1, Invertible);
2405  };
2406
2407  // Don't fold i1 branches on PHIs which contain binary operators, unless one
2408  // of the incoming values is an 'not' and another one is freely invertible.
2409  // These can often be turned into switches and other things.
2410  if (PN->getType()->isIntegerTy(1) &&
2411      (isa<BinaryOperator>(PN->getIncomingValue(0)) ||
2412       isa<BinaryOperator>(PN->getIncomingValue(1)) ||
2413       isa<BinaryOperator>(IfCond)) &&
2414      !CanHoistNotFromBothValues(PN->getIncomingValue(0),
2415                                 PN->getIncomingValue(1)))
2416    return false;
2417
2418  // If all PHI nodes are promotable, check to make sure that all instructions
2419  // in the predecessor blocks can be promoted as well. If not, we won't be able
2420  // to get rid of the control flow, so it's not worth promoting to select
2421  // instructions.
2422  BasicBlock *DomBlock = nullptr;
2423  BasicBlock *IfBlock1 = PN->getIncomingBlock(0);
2424  BasicBlock *IfBlock2 = PN->getIncomingBlock(1);
2425  if (cast<BranchInst>(IfBlock1->getTerminator())->isConditional()) {
2426    IfBlock1 = nullptr;
2427  } else {
2428    DomBlock = *pred_begin(IfBlock1);
2429    for (BasicBlock::iterator I = IfBlock1->begin(); !I->isTerminator(); ++I)
2430      if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I)) {
2431        // This is not an aggressive instruction that we can promote.
2432        // Because of this, we won't be able to get rid of the control flow, so
2433        // the xform is not worth it.
2434        return false;
2435      }
2436  }
2437
2438  if (cast<BranchInst>(IfBlock2->getTerminator())->isConditional()) {
2439    IfBlock2 = nullptr;
2440  } else {
2441    DomBlock = *pred_begin(IfBlock2);
2442    for (BasicBlock::iterator I = IfBlock2->begin(); !I->isTerminator(); ++I)
2443      if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I)) {
2444        // This is not an aggressive instruction that we can promote.
2445        // Because of this, we won't be able to get rid of the control flow, so
2446        // the xform is not worth it.
2447        return false;
2448      }
2449  }
2450  assert(DomBlock && "Failed to find root DomBlock");
2451
2452  LLVM_DEBUG(dbgs() << "FOUND IF CONDITION!  " << *IfCond
2453                    << "  T: " << IfTrue->getName()
2454                    << "  F: " << IfFalse->getName() << "\n");
2455
2456  // If we can still promote the PHI nodes after this gauntlet of tests,
2457  // do all of the PHI's now.
2458  Instruction *InsertPt = DomBlock->getTerminator();
2459  IRBuilder<NoFolder> Builder(InsertPt);
2460
2461  // Move all 'aggressive' instructions, which are defined in the
2462  // conditional parts of the if's up to the dominating block.
2463  if (IfBlock1)
2464    hoistAllInstructionsInto(DomBlock, InsertPt, IfBlock1);
2465  if (IfBlock2)
2466    hoistAllInstructionsInto(DomBlock, InsertPt, IfBlock2);
2467
2468  // Propagate fast-math-flags from phi nodes to replacement selects.
2469  IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
2470  while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
2471    if (isa<FPMathOperator>(PN))
2472      Builder.setFastMathFlags(PN->getFastMathFlags());
2473
2474    // Change the PHI node into a select instruction.
2475    Value *TrueVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfFalse);
2476    Value *FalseVal = PN->getIncomingValue(PN->getIncomingBlock(0) == IfTrue);
2477
2478    Value *Sel = Builder.CreateSelect(IfCond, TrueVal, FalseVal, "", InsertPt);
2479    PN->replaceAllUsesWith(Sel);
2480    Sel->takeName(PN);
2481    PN->eraseFromParent();
2482  }
2483
2484  // At this point, IfBlock1 and IfBlock2 are both empty, so our if statement
2485  // has been flattened.  Change DomBlock to jump directly to our new block to
2486  // avoid other simplifycfg's kicking in on the diamond.
2487  Instruction *OldTI = DomBlock->getTerminator();
2488  Builder.SetInsertPoint(OldTI);
2489  Builder.CreateBr(BB);
2490  OldTI->eraseFromParent();
2491  return true;
2492}
2493
2494/// If we found a conditional branch that goes to two returning blocks,
2495/// try to merge them together into one return,
2496/// introducing a select if the return values disagree.
2497bool SimplifyCFGOpt::SimplifyCondBranchToTwoReturns(BranchInst *BI,
2498                                                    IRBuilder<> &Builder) {
2499  assert(BI->isConditional() && "Must be a conditional branch");
2500  BasicBlock *TrueSucc = BI->getSuccessor(0);
2501  BasicBlock *FalseSucc = BI->getSuccessor(1);
2502  ReturnInst *TrueRet = cast<ReturnInst>(TrueSucc->getTerminator());
2503  ReturnInst *FalseRet = cast<ReturnInst>(FalseSucc->getTerminator());
2504
2505  // Check to ensure both blocks are empty (just a return) or optionally empty
2506  // with PHI nodes.  If there are other instructions, merging would cause extra
2507  // computation on one path or the other.
2508  if (!TrueSucc->getFirstNonPHIOrDbg()->isTerminator())
2509    return false;
2510  if (!FalseSucc->getFirstNonPHIOrDbg()->isTerminator())
2511    return false;
2512
2513  Builder.SetInsertPoint(BI);
2514  // Okay, we found a branch that is going to two return nodes.  If
2515  // there is no return value for this function, just change the
2516  // branch into a return.
2517  if (FalseRet->getNumOperands() == 0) {
2518    TrueSucc->removePredecessor(BI->getParent());
2519    FalseSucc->removePredecessor(BI->getParent());
2520    Builder.CreateRetVoid();
2521    EraseTerminatorAndDCECond(BI);
2522    return true;
2523  }
2524
2525  // Otherwise, figure out what the true and false return values are
2526  // so we can insert a new select instruction.
2527  Value *TrueValue = TrueRet->getReturnValue();
2528  Value *FalseValue = FalseRet->getReturnValue();
2529
2530  // Unwrap any PHI nodes in the return blocks.
2531  if (PHINode *TVPN = dyn_cast_or_null<PHINode>(TrueValue))
2532    if (TVPN->getParent() == TrueSucc)
2533      TrueValue = TVPN->getIncomingValueForBlock(BI->getParent());
2534  if (PHINode *FVPN = dyn_cast_or_null<PHINode>(FalseValue))
2535    if (FVPN->getParent() == FalseSucc)
2536      FalseValue = FVPN->getIncomingValueForBlock(BI->getParent());
2537
2538  // In order for this transformation to be safe, we must be able to
2539  // unconditionally execute both operands to the return.  This is
2540  // normally the case, but we could have a potentially-trapping
2541  // constant expression that prevents this transformation from being
2542  // safe.
2543  if (ConstantExpr *TCV = dyn_cast_or_null<ConstantExpr>(TrueValue))
2544    if (TCV->canTrap())
2545      return false;
2546  if (ConstantExpr *FCV = dyn_cast_or_null<ConstantExpr>(FalseValue))
2547    if (FCV->canTrap())
2548      return false;
2549
2550  // Okay, we collected all the mapped values and checked them for sanity, and
2551  // defined to really do this transformation.  First, update the CFG.
2552  TrueSucc->removePredecessor(BI->getParent());
2553  FalseSucc->removePredecessor(BI->getParent());
2554
2555  // Insert select instructions where needed.
2556  Value *BrCond = BI->getCondition();
2557  if (TrueValue) {
2558    // Insert a select if the results differ.
2559    if (TrueValue == FalseValue || isa<UndefValue>(FalseValue)) {
2560    } else if (isa<UndefValue>(TrueValue)) {
2561      TrueValue = FalseValue;
2562    } else {
2563      TrueValue =
2564          Builder.CreateSelect(BrCond, TrueValue, FalseValue, "retval", BI);
2565    }
2566  }
2567
2568  Value *RI =
2569      !TrueValue ? Builder.CreateRetVoid() : Builder.CreateRet(TrueValue);
2570
2571  (void)RI;
2572
2573  LLVM_DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:"
2574                    << "\n  " << *BI << "\nNewRet = " << *RI << "\nTRUEBLOCK: "
2575                    << *TrueSucc << "\nFALSEBLOCK: " << *FalseSucc);
2576
2577  EraseTerminatorAndDCECond(BI);
2578
2579  return true;
2580}
2581
2582/// Return true if the given instruction is available
2583/// in its predecessor block. If yes, the instruction will be removed.
2584static bool tryCSEWithPredecessor(Instruction *Inst, BasicBlock *PB) {
2585  if (!isa<BinaryOperator>(Inst) && !isa<CmpInst>(Inst))
2586    return false;
2587  for (Instruction &I : *PB) {
2588    Instruction *PBI = &I;
2589    // Check whether Inst and PBI generate the same value.
2590    if (Inst->isIdenticalTo(PBI)) {
2591      Inst->replaceAllUsesWith(PBI);
2592      Inst->eraseFromParent();
2593      return true;
2594    }
2595  }
2596  return false;
2597}
2598
2599/// Return true if either PBI or BI has branch weight available, and store
2600/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
2601/// not have branch weight, use 1:1 as its weight.
2602static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI,
2603                                   uint64_t &PredTrueWeight,
2604                                   uint64_t &PredFalseWeight,
2605                                   uint64_t &SuccTrueWeight,
2606                                   uint64_t &SuccFalseWeight) {
2607  bool PredHasWeights =
2608      PBI->extractProfMetadata(PredTrueWeight, PredFalseWeight);
2609  bool SuccHasWeights =
2610      BI->extractProfMetadata(SuccTrueWeight, SuccFalseWeight);
2611  if (PredHasWeights || SuccHasWeights) {
2612    if (!PredHasWeights)
2613      PredTrueWeight = PredFalseWeight = 1;
2614    if (!SuccHasWeights)
2615      SuccTrueWeight = SuccFalseWeight = 1;
2616    return true;
2617  } else {
2618    return false;
2619  }
2620}
2621
2622/// If this basic block is simple enough, and if a predecessor branches to us
2623/// and one of our successors, fold the block into the predecessor and use
2624/// logical operations to pick the right destination.
2625bool llvm::FoldBranchToCommonDest(BranchInst *BI, MemorySSAUpdater *MSSAU,
2626                                  unsigned BonusInstThreshold) {
2627  BasicBlock *BB = BI->getParent();
2628
2629  const unsigned PredCount = pred_size(BB);
2630
2631  bool Changed = false;
2632
2633  Instruction *Cond = nullptr;
2634  if (BI->isConditional())
2635    Cond = dyn_cast<Instruction>(BI->getCondition());
2636  else {
2637    // For unconditional branch, check for a simple CFG pattern, where
2638    // BB has a single predecessor and BB's successor is also its predecessor's
2639    // successor. If such pattern exists, check for CSE between BB and its
2640    // predecessor.
2641    if (BasicBlock *PB = BB->getSinglePredecessor())
2642      if (BranchInst *PBI = dyn_cast<BranchInst>(PB->getTerminator()))
2643        if (PBI->isConditional() &&
2644            (BI->getSuccessor(0) == PBI->getSuccessor(0) ||
2645             BI->getSuccessor(0) == PBI->getSuccessor(1))) {
2646          for (auto I = BB->instructionsWithoutDebug().begin(),
2647                    E = BB->instructionsWithoutDebug().end();
2648               I != E;) {
2649            Instruction *Curr = &*I++;
2650            if (isa<CmpInst>(Curr)) {
2651              Cond = Curr;
2652              break;
2653            }
2654            // Quit if we can't remove this instruction.
2655            if (!tryCSEWithPredecessor(Curr, PB))
2656              return Changed;
2657            Changed = true;
2658          }
2659        }
2660
2661    if (!Cond)
2662      return Changed;
2663  }
2664
2665  if (!Cond || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) ||
2666      Cond->getParent() != BB || !Cond->hasOneUse())
2667    return Changed;
2668
2669  // Make sure the instruction after the condition is the cond branch.
2670  BasicBlock::iterator CondIt = ++Cond->getIterator();
2671
2672  // Ignore dbg intrinsics.
2673  while (isa<DbgInfoIntrinsic>(CondIt))
2674    ++CondIt;
2675
2676  if (&*CondIt != BI)
2677    return Changed;
2678
2679  // Only allow this transformation if computing the condition doesn't involve
2680  // too many instructions and these involved instructions can be executed
2681  // unconditionally. We denote all involved instructions except the condition
2682  // as "bonus instructions", and only allow this transformation when the
2683  // number of the bonus instructions we'll need to create when cloning into
2684  // each predecessor does not exceed a certain threshold.
2685  unsigned NumBonusInsts = 0;
2686  for (auto I = BB->begin(); Cond != &*I; ++I) {
2687    // Ignore dbg intrinsics.
2688    if (isa<DbgInfoIntrinsic>(I))
2689      continue;
2690    if (!I->hasOneUse() || !isSafeToSpeculativelyExecute(&*I))
2691      return Changed;
2692    // I has only one use and can be executed unconditionally.
2693    Instruction *User = dyn_cast<Instruction>(I->user_back());
2694    if (User == nullptr || User->getParent() != BB)
2695      return Changed;
2696    // I is used in the same BB. Since BI uses Cond and doesn't have more slots
2697    // to use any other instruction, User must be an instruction between next(I)
2698    // and Cond.
2699
2700    // Account for the cost of duplicating this instruction into each
2701    // predecessor.
2702    NumBonusInsts += PredCount;
2703    // Early exits once we reach the limit.
2704    if (NumBonusInsts > BonusInstThreshold)
2705      return Changed;
2706  }
2707
2708  // Cond is known to be a compare or binary operator.  Check to make sure that
2709  // neither operand is a potentially-trapping constant expression.
2710  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(0)))
2711    if (CE->canTrap())
2712      return Changed;
2713  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(1)))
2714    if (CE->canTrap())
2715      return Changed;
2716
2717  // Finally, don't infinitely unroll conditional loops.
2718  BasicBlock *TrueDest = BI->getSuccessor(0);
2719  BasicBlock *FalseDest = (BI->isConditional()) ? BI->getSuccessor(1) : nullptr;
2720  if (TrueDest == BB || FalseDest == BB)
2721    return Changed;
2722
2723  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
2724    BasicBlock *PredBlock = *PI;
2725    BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
2726
2727    // Check that we have two conditional branches.  If there is a PHI node in
2728    // the common successor, verify that the same value flows in from both
2729    // blocks.
2730    SmallVector<PHINode *, 4> PHIs;
2731    if (!PBI || PBI->isUnconditional() ||
2732        (BI->isConditional() && !SafeToMergeTerminators(BI, PBI)) ||
2733        (!BI->isConditional() &&
2734         !isProfitableToFoldUnconditional(BI, PBI, Cond, PHIs)))
2735      continue;
2736
2737    // Determine if the two branches share a common destination.
2738    Instruction::BinaryOps Opc = Instruction::BinaryOpsEnd;
2739    bool InvertPredCond = false;
2740
2741    if (BI->isConditional()) {
2742      if (PBI->getSuccessor(0) == TrueDest) {
2743        Opc = Instruction::Or;
2744      } else if (PBI->getSuccessor(1) == FalseDest) {
2745        Opc = Instruction::And;
2746      } else if (PBI->getSuccessor(0) == FalseDest) {
2747        Opc = Instruction::And;
2748        InvertPredCond = true;
2749      } else if (PBI->getSuccessor(1) == TrueDest) {
2750        Opc = Instruction::Or;
2751        InvertPredCond = true;
2752      } else {
2753        continue;
2754      }
2755    } else {
2756      if (PBI->getSuccessor(0) != TrueDest && PBI->getSuccessor(1) != TrueDest)
2757        continue;
2758    }
2759
2760    LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
2761    Changed = true;
2762
2763    IRBuilder<> Builder(PBI);
2764
2765    // If we need to invert the condition in the pred block to match, do so now.
2766    if (InvertPredCond) {
2767      Value *NewCond = PBI->getCondition();
2768
2769      if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) {
2770        CmpInst *CI = cast<CmpInst>(NewCond);
2771        CI->setPredicate(CI->getInversePredicate());
2772      } else {
2773        NewCond =
2774            Builder.CreateNot(NewCond, PBI->getCondition()->getName() + ".not");
2775      }
2776
2777      PBI->setCondition(NewCond);
2778      PBI->swapSuccessors();
2779    }
2780
2781    // If we have bonus instructions, clone them into the predecessor block.
2782    // Note that there may be multiple predecessor blocks, so we cannot move
2783    // bonus instructions to a predecessor block.
2784    ValueToValueMapTy VMap; // maps original values to cloned values
2785    // We already make sure Cond is the last instruction before BI. Therefore,
2786    // all instructions before Cond other than DbgInfoIntrinsic are bonus
2787    // instructions.
2788    for (auto BonusInst = BB->begin(); Cond != &*BonusInst; ++BonusInst) {
2789      if (isa<DbgInfoIntrinsic>(BonusInst))
2790        continue;
2791      Instruction *NewBonusInst = BonusInst->clone();
2792
2793      // When we fold the bonus instructions we want to make sure we
2794      // reset their debug locations in order to avoid stepping on dead
2795      // code caused by folding dead branches.
2796      NewBonusInst->setDebugLoc(DebugLoc());
2797
2798      RemapInstruction(NewBonusInst, VMap,
2799                       RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
2800      VMap[&*BonusInst] = NewBonusInst;
2801
2802      // If we moved a load, we cannot any longer claim any knowledge about
2803      // its potential value. The previous information might have been valid
2804      // only given the branch precondition.
2805      // For an analogous reason, we must also drop all the metadata whose
2806      // semantics we don't understand.
2807      NewBonusInst->dropUnknownNonDebugMetadata();
2808
2809      PredBlock->getInstList().insert(PBI->getIterator(), NewBonusInst);
2810      NewBonusInst->takeName(&*BonusInst);
2811      BonusInst->setName(BonusInst->getName() + ".old");
2812    }
2813
2814    // Clone Cond into the predecessor basic block, and or/and the
2815    // two conditions together.
2816    Instruction *CondInPred = Cond->clone();
2817
2818    // Reset the condition debug location to avoid jumping on dead code
2819    // as the result of folding dead branches.
2820    CondInPred->setDebugLoc(DebugLoc());
2821
2822    RemapInstruction(CondInPred, VMap,
2823                     RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
2824    PredBlock->getInstList().insert(PBI->getIterator(), CondInPred);
2825    CondInPred->takeName(Cond);
2826    Cond->setName(CondInPred->getName() + ".old");
2827
2828    if (BI->isConditional()) {
2829      Instruction *NewCond = cast<Instruction>(
2830          Builder.CreateBinOp(Opc, PBI->getCondition(), CondInPred, "or.cond"));
2831      PBI->setCondition(NewCond);
2832
2833      uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
2834      bool HasWeights =
2835          extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
2836                                 SuccTrueWeight, SuccFalseWeight);
2837      SmallVector<uint64_t, 8> NewWeights;
2838
2839      if (PBI->getSuccessor(0) == BB) {
2840        if (HasWeights) {
2841          // PBI: br i1 %x, BB, FalseDest
2842          // BI:  br i1 %y, TrueDest, FalseDest
2843          // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
2844          NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
2845          // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
2846          //               TrueWeight for PBI * FalseWeight for BI.
2847          // We assume that total weights of a BranchInst can fit into 32 bits.
2848          // Therefore, we will not have overflow using 64-bit arithmetic.
2849          NewWeights.push_back(PredFalseWeight *
2850                                   (SuccFalseWeight + SuccTrueWeight) +
2851                               PredTrueWeight * SuccFalseWeight);
2852        }
2853        AddPredecessorToBlock(TrueDest, PredBlock, BB, MSSAU);
2854        PBI->setSuccessor(0, TrueDest);
2855      }
2856      if (PBI->getSuccessor(1) == BB) {
2857        if (HasWeights) {
2858          // PBI: br i1 %x, TrueDest, BB
2859          // BI:  br i1 %y, TrueDest, FalseDest
2860          // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
2861          //              FalseWeight for PBI * TrueWeight for BI.
2862          NewWeights.push_back(PredTrueWeight *
2863                                   (SuccFalseWeight + SuccTrueWeight) +
2864                               PredFalseWeight * SuccTrueWeight);
2865          // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
2866          NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
2867        }
2868        AddPredecessorToBlock(FalseDest, PredBlock, BB, MSSAU);
2869        PBI->setSuccessor(1, FalseDest);
2870      }
2871      if (NewWeights.size() == 2) {
2872        // Halve the weights if any of them cannot fit in an uint32_t
2873        FitWeights(NewWeights);
2874
2875        SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(),
2876                                           NewWeights.end());
2877        setBranchWeights(PBI, MDWeights[0], MDWeights[1]);
2878      } else
2879        PBI->setMetadata(LLVMContext::MD_prof, nullptr);
2880    } else {
2881      // Update PHI nodes in the common successors.
2882      for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
2883        ConstantInt *PBI_C = cast<ConstantInt>(
2884            PHIs[i]->getIncomingValueForBlock(PBI->getParent()));
2885        assert(PBI_C->getType()->isIntegerTy(1));
2886        Instruction *MergedCond = nullptr;
2887        if (PBI->getSuccessor(0) == TrueDest) {
2888          // Create (PBI_Cond and PBI_C) or (!PBI_Cond and BI_Value)
2889          // PBI_C is true: PBI_Cond or (!PBI_Cond and BI_Value)
2890          //       is false: !PBI_Cond and BI_Value
2891          Instruction *NotCond = cast<Instruction>(
2892              Builder.CreateNot(PBI->getCondition(), "not.cond"));
2893          MergedCond = cast<Instruction>(
2894               Builder.CreateBinOp(Instruction::And, NotCond, CondInPred,
2895                                   "and.cond"));
2896          if (PBI_C->isOne())
2897            MergedCond = cast<Instruction>(Builder.CreateBinOp(
2898                Instruction::Or, PBI->getCondition(), MergedCond, "or.cond"));
2899        } else {
2900          // Create (PBI_Cond and BI_Value) or (!PBI_Cond and PBI_C)
2901          // PBI_C is true: (PBI_Cond and BI_Value) or (!PBI_Cond)
2902          //       is false: PBI_Cond and BI_Value
2903          MergedCond = cast<Instruction>(Builder.CreateBinOp(
2904              Instruction::And, PBI->getCondition(), CondInPred, "and.cond"));
2905          if (PBI_C->isOne()) {
2906            Instruction *NotCond = cast<Instruction>(
2907                Builder.CreateNot(PBI->getCondition(), "not.cond"));
2908            MergedCond = cast<Instruction>(Builder.CreateBinOp(
2909                Instruction::Or, NotCond, MergedCond, "or.cond"));
2910          }
2911        }
2912        // Update PHI Node.
2913	PHIs[i]->setIncomingValueForBlock(PBI->getParent(), MergedCond);
2914      }
2915
2916      // PBI is changed to branch to TrueDest below. Remove itself from
2917      // potential phis from all other successors.
2918      if (MSSAU)
2919        MSSAU->changeCondBranchToUnconditionalTo(PBI, TrueDest);
2920
2921      // Change PBI from Conditional to Unconditional.
2922      BranchInst *New_PBI = BranchInst::Create(TrueDest, PBI);
2923      EraseTerminatorAndDCECond(PBI, MSSAU);
2924      PBI = New_PBI;
2925    }
2926
2927    // If BI was a loop latch, it may have had associated loop metadata.
2928    // We need to copy it to the new latch, that is, PBI.
2929    if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
2930      PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
2931
2932    // TODO: If BB is reachable from all paths through PredBlock, then we
2933    // could replace PBI's branch probabilities with BI's.
2934
2935    // Copy any debug value intrinsics into the end of PredBlock.
2936    for (Instruction &I : *BB) {
2937      if (isa<DbgInfoIntrinsic>(I)) {
2938        Instruction *NewI = I.clone();
2939        RemapInstruction(NewI, VMap,
2940                         RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
2941        NewI->insertBefore(PBI);
2942      }
2943    }
2944
2945    return Changed;
2946  }
2947  return Changed;
2948}
2949
2950// If there is only one store in BB1 and BB2, return it, otherwise return
2951// nullptr.
2952static StoreInst *findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2) {
2953  StoreInst *S = nullptr;
2954  for (auto *BB : {BB1, BB2}) {
2955    if (!BB)
2956      continue;
2957    for (auto &I : *BB)
2958      if (auto *SI = dyn_cast<StoreInst>(&I)) {
2959        if (S)
2960          // Multiple stores seen.
2961          return nullptr;
2962        else
2963          S = SI;
2964      }
2965  }
2966  return S;
2967}
2968
2969static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB,
2970                                              Value *AlternativeV = nullptr) {
2971  // PHI is going to be a PHI node that allows the value V that is defined in
2972  // BB to be referenced in BB's only successor.
2973  //
2974  // If AlternativeV is nullptr, the only value we care about in PHI is V. It
2975  // doesn't matter to us what the other operand is (it'll never get used). We
2976  // could just create a new PHI with an undef incoming value, but that could
2977  // increase register pressure if EarlyCSE/InstCombine can't fold it with some
2978  // other PHI. So here we directly look for some PHI in BB's successor with V
2979  // as an incoming operand. If we find one, we use it, else we create a new
2980  // one.
2981  //
2982  // If AlternativeV is not nullptr, we care about both incoming values in PHI.
2983  // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
2984  // where OtherBB is the single other predecessor of BB's only successor.
2985  PHINode *PHI = nullptr;
2986  BasicBlock *Succ = BB->getSingleSuccessor();
2987
2988  for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
2989    if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
2990      PHI = cast<PHINode>(I);
2991      if (!AlternativeV)
2992        break;
2993
2994      assert(Succ->hasNPredecessors(2));
2995      auto PredI = pred_begin(Succ);
2996      BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
2997      if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
2998        break;
2999      PHI = nullptr;
3000    }
3001  if (PHI)
3002    return PHI;
3003
3004  // If V is not an instruction defined in BB, just return it.
3005  if (!AlternativeV &&
3006      (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
3007    return V;
3008
3009  PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge", &Succ->front());
3010  PHI->addIncoming(V, BB);
3011  for (BasicBlock *PredBB : predecessors(Succ))
3012    if (PredBB != BB)
3013      PHI->addIncoming(
3014          AlternativeV ? AlternativeV : UndefValue::get(V->getType()), PredBB);
3015  return PHI;
3016}
3017
3018static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,
3019                                           BasicBlock *QTB, BasicBlock *QFB,
3020                                           BasicBlock *PostBB, Value *Address,
3021                                           bool InvertPCond, bool InvertQCond,
3022                                           const DataLayout &DL,
3023                                           const TargetTransformInfo &TTI) {
3024  // For every pointer, there must be exactly two stores, one coming from
3025  // PTB or PFB, and the other from QTB or QFB. We don't support more than one
3026  // store (to any address) in PTB,PFB or QTB,QFB.
3027  // FIXME: We could relax this restriction with a bit more work and performance
3028  // testing.
3029  StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
3030  StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
3031  if (!PStore || !QStore)
3032    return false;
3033
3034  // Now check the stores are compatible.
3035  if (!QStore->isUnordered() || !PStore->isUnordered())
3036    return false;
3037
3038  // Check that sinking the store won't cause program behavior changes. Sinking
3039  // the store out of the Q blocks won't change any behavior as we're sinking
3040  // from a block to its unconditional successor. But we're moving a store from
3041  // the P blocks down through the middle block (QBI) and past both QFB and QTB.
3042  // So we need to check that there are no aliasing loads or stores in
3043  // QBI, QTB and QFB. We also need to check there are no conflicting memory
3044  // operations between PStore and the end of its parent block.
3045  //
3046  // The ideal way to do this is to query AliasAnalysis, but we don't
3047  // preserve AA currently so that is dangerous. Be super safe and just
3048  // check there are no other memory operations at all.
3049  for (auto &I : *QFB->getSinglePredecessor())
3050    if (I.mayReadOrWriteMemory())
3051      return false;
3052  for (auto &I : *QFB)
3053    if (&I != QStore && I.mayReadOrWriteMemory())
3054      return false;
3055  if (QTB)
3056    for (auto &I : *QTB)
3057      if (&I != QStore && I.mayReadOrWriteMemory())
3058        return false;
3059  for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
3060       I != E; ++I)
3061    if (&*I != PStore && I->mayReadOrWriteMemory())
3062      return false;
3063
3064  // If we're not in aggressive mode, we only optimize if we have some
3065  // confidence that by optimizing we'll allow P and/or Q to be if-converted.
3066  auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
3067    if (!BB)
3068      return true;
3069    // Heuristic: if the block can be if-converted/phi-folded and the
3070    // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
3071    // thread this store.
3072    int BudgetRemaining =
3073        PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
3074    for (auto &I : BB->instructionsWithoutDebug()) {
3075      // Consider terminator instruction to be free.
3076      if (I.isTerminator())
3077        continue;
3078      // If this is one the stores that we want to speculate out of this BB,
3079      // then don't count it's cost, consider it to be free.
3080      if (auto *S = dyn_cast<StoreInst>(&I))
3081        if (llvm::find(FreeStores, S))
3082          continue;
3083      // Else, we have a white-list of instructions that we are ak speculating.
3084      if (!isa<BinaryOperator>(I) && !isa<GetElementPtrInst>(I))
3085        return false; // Not in white-list - not worthwhile folding.
3086      // And finally, if this is a non-free instruction that we are okay
3087      // speculating, ensure that we consider the speculation budget.
3088      BudgetRemaining -= TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
3089      if (BudgetRemaining < 0)
3090        return false; // Eagerly refuse to fold as soon as we're out of budget.
3091    }
3092    assert(BudgetRemaining >= 0 &&
3093           "When we run out of budget we will eagerly return from within the "
3094           "per-instruction loop.");
3095    return true;
3096  };
3097
3098  const SmallVector<StoreInst *, 2> FreeStores = {PStore, QStore};
3099  if (!MergeCondStoresAggressively &&
3100      (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
3101       !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
3102    return false;
3103
3104  // If PostBB has more than two predecessors, we need to split it so we can
3105  // sink the store.
3106  if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
3107    // We know that QFB's only successor is PostBB. And QFB has a single
3108    // predecessor. If QTB exists, then its only successor is also PostBB.
3109    // If QTB does not exist, then QFB's only predecessor has a conditional
3110    // branch to QFB and PostBB.
3111    BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
3112    BasicBlock *NewBB = SplitBlockPredecessors(PostBB, { QFB, TruePred},
3113                                               "condstore.split");
3114    if (!NewBB)
3115      return false;
3116    PostBB = NewBB;
3117  }
3118
3119  // OK, we're going to sink the stores to PostBB. The store has to be
3120  // conditional though, so first create the predicate.
3121  Value *PCond = cast<BranchInst>(PFB->getSinglePredecessor()->getTerminator())
3122                     ->getCondition();
3123  Value *QCond = cast<BranchInst>(QFB->getSinglePredecessor()->getTerminator())
3124                     ->getCondition();
3125
3126  Value *PPHI = ensureValueAvailableInSuccessor(PStore->getValueOperand(),
3127                                                PStore->getParent());
3128  Value *QPHI = ensureValueAvailableInSuccessor(QStore->getValueOperand(),
3129                                                QStore->getParent(), PPHI);
3130
3131  IRBuilder<> QB(&*PostBB->getFirstInsertionPt());
3132
3133  Value *PPred = PStore->getParent() == PTB ? PCond : QB.CreateNot(PCond);
3134  Value *QPred = QStore->getParent() == QTB ? QCond : QB.CreateNot(QCond);
3135
3136  if (InvertPCond)
3137    PPred = QB.CreateNot(PPred);
3138  if (InvertQCond)
3139    QPred = QB.CreateNot(QPred);
3140  Value *CombinedPred = QB.CreateOr(PPred, QPred);
3141
3142  auto *T =
3143      SplitBlockAndInsertIfThen(CombinedPred, &*QB.GetInsertPoint(), false);
3144  QB.SetInsertPoint(T);
3145  StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
3146  AAMDNodes AAMD;
3147  PStore->getAAMetadata(AAMD, /*Merge=*/false);
3148  PStore->getAAMetadata(AAMD, /*Merge=*/true);
3149  SI->setAAMetadata(AAMD);
3150  // Choose the minimum alignment. If we could prove both stores execute, we
3151  // could use biggest one.  In this case, though, we only know that one of the
3152  // stores executes.  And we don't know it's safe to take the alignment from a
3153  // store that doesn't execute.
3154  SI->setAlignment(std::min(PStore->getAlign(), QStore->getAlign()));
3155
3156  QStore->eraseFromParent();
3157  PStore->eraseFromParent();
3158
3159  return true;
3160}
3161
3162static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI,
3163                                   const DataLayout &DL,
3164                                   const TargetTransformInfo &TTI) {
3165  // The intention here is to find diamonds or triangles (see below) where each
3166  // conditional block contains a store to the same address. Both of these
3167  // stores are conditional, so they can't be unconditionally sunk. But it may
3168  // be profitable to speculatively sink the stores into one merged store at the
3169  // end, and predicate the merged store on the union of the two conditions of
3170  // PBI and QBI.
3171  //
3172  // This can reduce the number of stores executed if both of the conditions are
3173  // true, and can allow the blocks to become small enough to be if-converted.
3174  // This optimization will also chain, so that ladders of test-and-set
3175  // sequences can be if-converted away.
3176  //
3177  // We only deal with simple diamonds or triangles:
3178  //
3179  //     PBI       or      PBI        or a combination of the two
3180  //    /   \               | \
3181  //   PTB  PFB             |  PFB
3182  //    \   /               | /
3183  //     QBI                QBI
3184  //    /  \                | \
3185  //   QTB  QFB             |  QFB
3186  //    \  /                | /
3187  //    PostBB            PostBB
3188  //
3189  // We model triangles as a type of diamond with a nullptr "true" block.
3190  // Triangles are canonicalized so that the fallthrough edge is represented by
3191  // a true condition, as in the diagram above.
3192  BasicBlock *PTB = PBI->getSuccessor(0);
3193  BasicBlock *PFB = PBI->getSuccessor(1);
3194  BasicBlock *QTB = QBI->getSuccessor(0);
3195  BasicBlock *QFB = QBI->getSuccessor(1);
3196  BasicBlock *PostBB = QFB->getSingleSuccessor();
3197
3198  // Make sure we have a good guess for PostBB. If QTB's only successor is
3199  // QFB, then QFB is a better PostBB.
3200  if (QTB->getSingleSuccessor() == QFB)
3201    PostBB = QFB;
3202
3203  // If we couldn't find a good PostBB, stop.
3204  if (!PostBB)
3205    return false;
3206
3207  bool InvertPCond = false, InvertQCond = false;
3208  // Canonicalize fallthroughs to the true branches.
3209  if (PFB == QBI->getParent()) {
3210    std::swap(PFB, PTB);
3211    InvertPCond = true;
3212  }
3213  if (QFB == PostBB) {
3214    std::swap(QFB, QTB);
3215    InvertQCond = true;
3216  }
3217
3218  // From this point on we can assume PTB or QTB may be fallthroughs but PFB
3219  // and QFB may not. Model fallthroughs as a nullptr block.
3220  if (PTB == QBI->getParent())
3221    PTB = nullptr;
3222  if (QTB == PostBB)
3223    QTB = nullptr;
3224
3225  // Legality bailouts. We must have at least the non-fallthrough blocks and
3226  // the post-dominating block, and the non-fallthroughs must only have one
3227  // predecessor.
3228  auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
3229    return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
3230  };
3231  if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
3232      !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
3233    return false;
3234  if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
3235      (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
3236    return false;
3237  if (!QBI->getParent()->hasNUses(2))
3238    return false;
3239
3240  // OK, this is a sequence of two diamonds or triangles.
3241  // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
3242  SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
3243  for (auto *BB : {PTB, PFB}) {
3244    if (!BB)
3245      continue;
3246    for (auto &I : *BB)
3247      if (StoreInst *SI = dyn_cast<StoreInst>(&I))
3248        PStoreAddresses.insert(SI->getPointerOperand());
3249  }
3250  for (auto *BB : {QTB, QFB}) {
3251    if (!BB)
3252      continue;
3253    for (auto &I : *BB)
3254      if (StoreInst *SI = dyn_cast<StoreInst>(&I))
3255        QStoreAddresses.insert(SI->getPointerOperand());
3256  }
3257
3258  set_intersect(PStoreAddresses, QStoreAddresses);
3259  // set_intersect mutates PStoreAddresses in place. Rename it here to make it
3260  // clear what it contains.
3261  auto &CommonAddresses = PStoreAddresses;
3262
3263  bool Changed = false;
3264  for (auto *Address : CommonAddresses)
3265    Changed |= mergeConditionalStoreToAddress(
3266        PTB, PFB, QTB, QFB, PostBB, Address, InvertPCond, InvertQCond, DL, TTI);
3267  return Changed;
3268}
3269
3270
3271/// If the previous block ended with a widenable branch, determine if reusing
3272/// the target block is profitable and legal.  This will have the effect of
3273/// "widening" PBI, but doesn't require us to reason about hosting safety.
3274static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
3275  // TODO: This can be generalized in two important ways:
3276  // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
3277  //    values from the PBI edge.
3278  // 2) We can sink side effecting instructions into BI's fallthrough
3279  //    successor provided they doesn't contribute to computation of
3280  //    BI's condition.
3281  Value *CondWB, *WC;
3282  BasicBlock *IfTrueBB, *IfFalseBB;
3283  if (!parseWidenableBranch(PBI, CondWB, WC, IfTrueBB, IfFalseBB) ||
3284      IfTrueBB != BI->getParent() || !BI->getParent()->getSinglePredecessor())
3285    return false;
3286  if (!IfFalseBB->phis().empty())
3287    return false; // TODO
3288  // Use lambda to lazily compute expensive condition after cheap ones.
3289  auto NoSideEffects = [](BasicBlock &BB) {
3290    return !llvm::any_of(BB, [](const Instruction &I) {
3291        return I.mayWriteToMemory() || I.mayHaveSideEffects();
3292      });
3293  };
3294  if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
3295      BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
3296      NoSideEffects(*BI->getParent())) {
3297    BI->getSuccessor(1)->removePredecessor(BI->getParent());
3298    BI->setSuccessor(1, IfFalseBB);
3299    return true;
3300  }
3301  if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
3302      BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
3303      NoSideEffects(*BI->getParent())) {
3304    BI->getSuccessor(0)->removePredecessor(BI->getParent());
3305    BI->setSuccessor(0, IfFalseBB);
3306    return true;
3307  }
3308  return false;
3309}
3310
3311/// If we have a conditional branch as a predecessor of another block,
3312/// this function tries to simplify it.  We know
3313/// that PBI and BI are both conditional branches, and BI is in one of the
3314/// successor blocks of PBI - PBI branches to BI.
3315static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
3316                                           const DataLayout &DL,
3317                                           const TargetTransformInfo &TTI) {
3318  assert(PBI->isConditional() && BI->isConditional());
3319  BasicBlock *BB = BI->getParent();
3320
3321  // If this block ends with a branch instruction, and if there is a
3322  // predecessor that ends on a branch of the same condition, make
3323  // this conditional branch redundant.
3324  if (PBI->getCondition() == BI->getCondition() &&
3325      PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
3326    // Okay, the outcome of this conditional branch is statically
3327    // knowable.  If this block had a single pred, handle specially.
3328    if (BB->getSinglePredecessor()) {
3329      // Turn this into a branch on constant.
3330      bool CondIsTrue = PBI->getSuccessor(0) == BB;
3331      BI->setCondition(
3332          ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
3333      return true; // Nuke the branch on constant.
3334    }
3335
3336    // Otherwise, if there are multiple predecessors, insert a PHI that merges
3337    // in the constant and simplify the block result.  Subsequent passes of
3338    // simplifycfg will thread the block.
3339    if (BlockIsSimpleEnoughToThreadThrough(BB)) {
3340      pred_iterator PB = pred_begin(BB), PE = pred_end(BB);
3341      PHINode *NewPN = PHINode::Create(
3342          Type::getInt1Ty(BB->getContext()), std::distance(PB, PE),
3343          BI->getCondition()->getName() + ".pr", &BB->front());
3344      // Okay, we're going to insert the PHI node.  Since PBI is not the only
3345      // predecessor, compute the PHI'd conditional value for all of the preds.
3346      // Any predecessor where the condition is not computable we keep symbolic.
3347      for (pred_iterator PI = PB; PI != PE; ++PI) {
3348        BasicBlock *P = *PI;
3349        if ((PBI = dyn_cast<BranchInst>(P->getTerminator())) && PBI != BI &&
3350            PBI->isConditional() && PBI->getCondition() == BI->getCondition() &&
3351            PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
3352          bool CondIsTrue = PBI->getSuccessor(0) == BB;
3353          NewPN->addIncoming(
3354              ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue),
3355              P);
3356        } else {
3357          NewPN->addIncoming(BI->getCondition(), P);
3358        }
3359      }
3360
3361      BI->setCondition(NewPN);
3362      return true;
3363    }
3364  }
3365
3366  // If the previous block ended with a widenable branch, determine if reusing
3367  // the target block is profitable and legal.  This will have the effect of
3368  // "widening" PBI, but doesn't require us to reason about hosting safety.
3369  if (tryWidenCondBranchToCondBranch(PBI, BI))
3370    return true;
3371
3372  if (auto *CE = dyn_cast<ConstantExpr>(BI->getCondition()))
3373    if (CE->canTrap())
3374      return false;
3375
3376  // If both branches are conditional and both contain stores to the same
3377  // address, remove the stores from the conditionals and create a conditional
3378  // merged store at the end.
3379  if (MergeCondStores && mergeConditionalStores(PBI, BI, DL, TTI))
3380    return true;
3381
3382  // If this is a conditional branch in an empty block, and if any
3383  // predecessors are a conditional branch to one of our destinations,
3384  // fold the conditions into logical ops and one cond br.
3385
3386  // Ignore dbg intrinsics.
3387  if (&*BB->instructionsWithoutDebug().begin() != BI)
3388    return false;
3389
3390  int PBIOp, BIOp;
3391  if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
3392    PBIOp = 0;
3393    BIOp = 0;
3394  } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
3395    PBIOp = 0;
3396    BIOp = 1;
3397  } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
3398    PBIOp = 1;
3399    BIOp = 0;
3400  } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
3401    PBIOp = 1;
3402    BIOp = 1;
3403  } else {
3404    return false;
3405  }
3406
3407  // Check to make sure that the other destination of this branch
3408  // isn't BB itself.  If so, this is an infinite loop that will
3409  // keep getting unwound.
3410  if (PBI->getSuccessor(PBIOp) == BB)
3411    return false;
3412
3413  // Do not perform this transformation if it would require
3414  // insertion of a large number of select instructions. For targets
3415  // without predication/cmovs, this is a big pessimization.
3416
3417  // Also do not perform this transformation if any phi node in the common
3418  // destination block can trap when reached by BB or PBB (PR17073). In that
3419  // case, it would be unsafe to hoist the operation into a select instruction.
3420
3421  BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
3422  unsigned NumPhis = 0;
3423  for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
3424       ++II, ++NumPhis) {
3425    if (NumPhis > 2) // Disable this xform.
3426      return false;
3427
3428    PHINode *PN = cast<PHINode>(II);
3429    Value *BIV = PN->getIncomingValueForBlock(BB);
3430    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BIV))
3431      if (CE->canTrap())
3432        return false;
3433
3434    unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent());
3435    Value *PBIV = PN->getIncomingValue(PBBIdx);
3436    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(PBIV))
3437      if (CE->canTrap())
3438        return false;
3439  }
3440
3441  // Finally, if everything is ok, fold the branches to logical ops.
3442  BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
3443
3444  LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
3445                    << "AND: " << *BI->getParent());
3446
3447  // If OtherDest *is* BB, then BB is a basic block with a single conditional
3448  // branch in it, where one edge (OtherDest) goes back to itself but the other
3449  // exits.  We don't *know* that the program avoids the infinite loop
3450  // (even though that seems likely).  If we do this xform naively, we'll end up
3451  // recursively unpeeling the loop.  Since we know that (after the xform is
3452  // done) that the block *is* infinite if reached, we just make it an obviously
3453  // infinite loop with no cond branch.
3454  if (OtherDest == BB) {
3455    // Insert it at the end of the function, because it's either code,
3456    // or it won't matter if it's hot. :)
3457    BasicBlock *InfLoopBlock =
3458        BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
3459    BranchInst::Create(InfLoopBlock, InfLoopBlock);
3460    OtherDest = InfLoopBlock;
3461  }
3462
3463  LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
3464
3465  // BI may have other predecessors.  Because of this, we leave
3466  // it alone, but modify PBI.
3467
3468  // Make sure we get to CommonDest on True&True directions.
3469  Value *PBICond = PBI->getCondition();
3470  IRBuilder<NoFolder> Builder(PBI);
3471  if (PBIOp)
3472    PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not");
3473
3474  Value *BICond = BI->getCondition();
3475  if (BIOp)
3476    BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
3477
3478  // Merge the conditions.
3479  Value *Cond = Builder.CreateOr(PBICond, BICond, "brmerge");
3480
3481  // Modify PBI to branch on the new condition to the new dests.
3482  PBI->setCondition(Cond);
3483  PBI->setSuccessor(0, CommonDest);
3484  PBI->setSuccessor(1, OtherDest);
3485
3486  // Update branch weight for PBI.
3487  uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
3488  uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
3489  bool HasWeights =
3490      extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
3491                             SuccTrueWeight, SuccFalseWeight);
3492  if (HasWeights) {
3493    PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
3494    PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
3495    SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
3496    SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
3497    // The weight to CommonDest should be PredCommon * SuccTotal +
3498    //                                    PredOther * SuccCommon.
3499    // The weight to OtherDest should be PredOther * SuccOther.
3500    uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
3501                                  PredOther * SuccCommon,
3502                              PredOther * SuccOther};
3503    // Halve the weights if any of them cannot fit in an uint32_t
3504    FitWeights(NewWeights);
3505
3506    setBranchWeights(PBI, NewWeights[0], NewWeights[1]);
3507  }
3508
3509  // OtherDest may have phi nodes.  If so, add an entry from PBI's
3510  // block that are identical to the entries for BI's block.
3511  AddPredecessorToBlock(OtherDest, PBI->getParent(), BB);
3512
3513  // We know that the CommonDest already had an edge from PBI to
3514  // it.  If it has PHIs though, the PHIs may have different
3515  // entries for BB and PBI's BB.  If so, insert a select to make
3516  // them agree.
3517  for (PHINode &PN : CommonDest->phis()) {
3518    Value *BIV = PN.getIncomingValueForBlock(BB);
3519    unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent());
3520    Value *PBIV = PN.getIncomingValue(PBBIdx);
3521    if (BIV != PBIV) {
3522      // Insert a select in PBI to pick the right value.
3523      SelectInst *NV = cast<SelectInst>(
3524          Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
3525      PN.setIncomingValue(PBBIdx, NV);
3526      // Although the select has the same condition as PBI, the original branch
3527      // weights for PBI do not apply to the new select because the select's
3528      // 'logical' edges are incoming edges of the phi that is eliminated, not
3529      // the outgoing edges of PBI.
3530      if (HasWeights) {
3531        uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
3532        uint64_t PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
3533        uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
3534        uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
3535        // The weight to PredCommonDest should be PredCommon * SuccTotal.
3536        // The weight to PredOtherDest should be PredOther * SuccCommon.
3537        uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther),
3538                                  PredOther * SuccCommon};
3539
3540        FitWeights(NewWeights);
3541
3542        setBranchWeights(NV, NewWeights[0], NewWeights[1]);
3543      }
3544    }
3545  }
3546
3547  LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
3548  LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
3549
3550  // This basic block is probably dead.  We know it has at least
3551  // one fewer predecessor.
3552  return true;
3553}
3554
3555// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
3556// true or to FalseBB if Cond is false.
3557// Takes care of updating the successors and removing the old terminator.
3558// Also makes sure not to introduce new successors by assuming that edges to
3559// non-successor TrueBBs and FalseBBs aren't reachable.
3560bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm,
3561                                                Value *Cond, BasicBlock *TrueBB,
3562                                                BasicBlock *FalseBB,
3563                                                uint32_t TrueWeight,
3564                                                uint32_t FalseWeight) {
3565  // Remove any superfluous successor edges from the CFG.
3566  // First, figure out which successors to preserve.
3567  // If TrueBB and FalseBB are equal, only try to preserve one copy of that
3568  // successor.
3569  BasicBlock *KeepEdge1 = TrueBB;
3570  BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
3571
3572  // Then remove the rest.
3573  for (BasicBlock *Succ : successors(OldTerm)) {
3574    // Make sure only to keep exactly one copy of each edge.
3575    if (Succ == KeepEdge1)
3576      KeepEdge1 = nullptr;
3577    else if (Succ == KeepEdge2)
3578      KeepEdge2 = nullptr;
3579    else
3580      Succ->removePredecessor(OldTerm->getParent(),
3581                              /*KeepOneInputPHIs=*/true);
3582  }
3583
3584  IRBuilder<> Builder(OldTerm);
3585  Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
3586
3587  // Insert an appropriate new terminator.
3588  if (!KeepEdge1 && !KeepEdge2) {
3589    if (TrueBB == FalseBB)
3590      // We were only looking for one successor, and it was present.
3591      // Create an unconditional branch to it.
3592      Builder.CreateBr(TrueBB);
3593    else {
3594      // We found both of the successors we were looking for.
3595      // Create a conditional branch sharing the condition of the select.
3596      BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
3597      if (TrueWeight != FalseWeight)
3598        setBranchWeights(NewBI, TrueWeight, FalseWeight);
3599    }
3600  } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
3601    // Neither of the selected blocks were successors, so this
3602    // terminator must be unreachable.
3603    new UnreachableInst(OldTerm->getContext(), OldTerm);
3604  } else {
3605    // One of the selected values was a successor, but the other wasn't.
3606    // Insert an unconditional branch to the one that was found;
3607    // the edge to the one that wasn't must be unreachable.
3608    if (!KeepEdge1)
3609      // Only TrueBB was found.
3610      Builder.CreateBr(TrueBB);
3611    else
3612      // Only FalseBB was found.
3613      Builder.CreateBr(FalseBB);
3614  }
3615
3616  EraseTerminatorAndDCECond(OldTerm);
3617  return true;
3618}
3619
3620// Replaces
3621//   (switch (select cond, X, Y)) on constant X, Y
3622// with a branch - conditional if X and Y lead to distinct BBs,
3623// unconditional otherwise.
3624bool SimplifyCFGOpt::SimplifySwitchOnSelect(SwitchInst *SI,
3625                                            SelectInst *Select) {
3626  // Check for constant integer values in the select.
3627  ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
3628  ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
3629  if (!TrueVal || !FalseVal)
3630    return false;
3631
3632  // Find the relevant condition and destinations.
3633  Value *Condition = Select->getCondition();
3634  BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
3635  BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
3636
3637  // Get weight for TrueBB and FalseBB.
3638  uint32_t TrueWeight = 0, FalseWeight = 0;
3639  SmallVector<uint64_t, 8> Weights;
3640  bool HasWeights = HasBranchWeights(SI);
3641  if (HasWeights) {
3642    GetBranchWeights(SI, Weights);
3643    if (Weights.size() == 1 + SI->getNumCases()) {
3644      TrueWeight =
3645          (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
3646      FalseWeight =
3647          (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
3648    }
3649  }
3650
3651  // Perform the actual simplification.
3652  return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
3653                                    FalseWeight);
3654}
3655
3656// Replaces
3657//   (indirectbr (select cond, blockaddress(@fn, BlockA),
3658//                             blockaddress(@fn, BlockB)))
3659// with
3660//   (br cond, BlockA, BlockB).
3661bool SimplifyCFGOpt::SimplifyIndirectBrOnSelect(IndirectBrInst *IBI,
3662                                                SelectInst *SI) {
3663  // Check that both operands of the select are block addresses.
3664  BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
3665  BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
3666  if (!TBA || !FBA)
3667    return false;
3668
3669  // Extract the actual blocks.
3670  BasicBlock *TrueBB = TBA->getBasicBlock();
3671  BasicBlock *FalseBB = FBA->getBasicBlock();
3672
3673  // Perform the actual simplification.
3674  return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, 0,
3675                                    0);
3676}
3677
3678/// This is called when we find an icmp instruction
3679/// (a seteq/setne with a constant) as the only instruction in a
3680/// block that ends with an uncond branch.  We are looking for a very specific
3681/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified.  In
3682/// this case, we merge the first two "or's of icmp" into a switch, but then the
3683/// default value goes to an uncond block with a seteq in it, we get something
3684/// like:
3685///
3686///   switch i8 %A, label %DEFAULT [ i8 1, label %end    i8 2, label %end ]
3687/// DEFAULT:
3688///   %tmp = icmp eq i8 %A, 92
3689///   br label %end
3690/// end:
3691///   ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
3692///
3693/// We prefer to split the edge to 'end' so that there is a true/false entry to
3694/// the PHI, merging the third icmp into the switch.
3695bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
3696    ICmpInst *ICI, IRBuilder<> &Builder) {
3697  BasicBlock *BB = ICI->getParent();
3698
3699  // If the block has any PHIs in it or the icmp has multiple uses, it is too
3700  // complex.
3701  if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse())
3702    return false;
3703
3704  Value *V = ICI->getOperand(0);
3705  ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
3706
3707  // The pattern we're looking for is where our only predecessor is a switch on
3708  // 'V' and this block is the default case for the switch.  In this case we can
3709  // fold the compared value into the switch to simplify things.
3710  BasicBlock *Pred = BB->getSinglePredecessor();
3711  if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
3712    return false;
3713
3714  SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
3715  if (SI->getCondition() != V)
3716    return false;
3717
3718  // If BB is reachable on a non-default case, then we simply know the value of
3719  // V in this block.  Substitute it and constant fold the icmp instruction
3720  // away.
3721  if (SI->getDefaultDest() != BB) {
3722    ConstantInt *VVal = SI->findCaseDest(BB);
3723    assert(VVal && "Should have a unique destination value");
3724    ICI->setOperand(0, VVal);
3725
3726    if (Value *V = SimplifyInstruction(ICI, {DL, ICI})) {
3727      ICI->replaceAllUsesWith(V);
3728      ICI->eraseFromParent();
3729    }
3730    // BB is now empty, so it is likely to simplify away.
3731    return requestResimplify();
3732  }
3733
3734  // Ok, the block is reachable from the default dest.  If the constant we're
3735  // comparing exists in one of the other edges, then we can constant fold ICI
3736  // and zap it.
3737  if (SI->findCaseValue(Cst) != SI->case_default()) {
3738    Value *V;
3739    if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
3740      V = ConstantInt::getFalse(BB->getContext());
3741    else
3742      V = ConstantInt::getTrue(BB->getContext());
3743
3744    ICI->replaceAllUsesWith(V);
3745    ICI->eraseFromParent();
3746    // BB is now empty, so it is likely to simplify away.
3747    return requestResimplify();
3748  }
3749
3750  // The use of the icmp has to be in the 'end' block, by the only PHI node in
3751  // the block.
3752  BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
3753  PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back());
3754  if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
3755      isa<PHINode>(++BasicBlock::iterator(PHIUse)))
3756    return false;
3757
3758  // If the icmp is a SETEQ, then the default dest gets false, the new edge gets
3759  // true in the PHI.
3760  Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
3761  Constant *NewCst = ConstantInt::getFalse(BB->getContext());
3762
3763  if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
3764    std::swap(DefaultCst, NewCst);
3765
3766  // Replace ICI (which is used by the PHI for the default value) with true or
3767  // false depending on if it is EQ or NE.
3768  ICI->replaceAllUsesWith(DefaultCst);
3769  ICI->eraseFromParent();
3770
3771  // Okay, the switch goes to this block on a default value.  Add an edge from
3772  // the switch to the merge point on the compared value.
3773  BasicBlock *NewBB =
3774      BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
3775  {
3776    SwitchInstProfUpdateWrapper SIW(*SI);
3777    auto W0 = SIW.getSuccessorWeight(0);
3778    SwitchInstProfUpdateWrapper::CaseWeightOpt NewW;
3779    if (W0) {
3780      NewW = ((uint64_t(*W0) + 1) >> 1);
3781      SIW.setSuccessorWeight(0, *NewW);
3782    }
3783    SIW.addCase(Cst, NewBB, NewW);
3784  }
3785
3786  // NewBB branches to the phi block, add the uncond branch and the phi entry.
3787  Builder.SetInsertPoint(NewBB);
3788  Builder.SetCurrentDebugLocation(SI->getDebugLoc());
3789  Builder.CreateBr(SuccBlock);
3790  PHIUse->addIncoming(NewCst, NewBB);
3791  return true;
3792}
3793
3794/// The specified branch is a conditional branch.
3795/// Check to see if it is branching on an or/and chain of icmp instructions, and
3796/// fold it into a switch instruction if so.
3797bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI,
3798                                               IRBuilder<> &Builder,
3799                                               const DataLayout &DL) {
3800  Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
3801  if (!Cond)
3802    return false;
3803
3804  // Change br (X == 0 | X == 1), T, F into a switch instruction.
3805  // If this is a bunch of seteq's or'd together, or if it's a bunch of
3806  // 'setne's and'ed together, collect them.
3807
3808  // Try to gather values from a chain of and/or to be turned into a switch
3809  ConstantComparesGatherer ConstantCompare(Cond, DL);
3810  // Unpack the result
3811  SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
3812  Value *CompVal = ConstantCompare.CompValue;
3813  unsigned UsedICmps = ConstantCompare.UsedICmps;
3814  Value *ExtraCase = ConstantCompare.Extra;
3815
3816  // If we didn't have a multiply compared value, fail.
3817  if (!CompVal)
3818    return false;
3819
3820  // Avoid turning single icmps into a switch.
3821  if (UsedICmps <= 1)
3822    return false;
3823
3824  bool TrueWhenEqual = (Cond->getOpcode() == Instruction::Or);
3825
3826  // There might be duplicate constants in the list, which the switch
3827  // instruction can't handle, remove them now.
3828  array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate);
3829  Values.erase(std::unique(Values.begin(), Values.end()), Values.end());
3830
3831  // If Extra was used, we require at least two switch values to do the
3832  // transformation.  A switch with one value is just a conditional branch.
3833  if (ExtraCase && Values.size() < 2)
3834    return false;
3835
3836  // TODO: Preserve branch weight metadata, similarly to how
3837  // FoldValueComparisonIntoPredecessors preserves it.
3838
3839  // Figure out which block is which destination.
3840  BasicBlock *DefaultBB = BI->getSuccessor(1);
3841  BasicBlock *EdgeBB = BI->getSuccessor(0);
3842  if (!TrueWhenEqual)
3843    std::swap(DefaultBB, EdgeBB);
3844
3845  BasicBlock *BB = BI->getParent();
3846
3847  // MSAN does not like undefs as branch condition which can be introduced
3848  // with "explicit branch".
3849  if (ExtraCase && BB->getParent()->hasFnAttribute(Attribute::SanitizeMemory))
3850    return false;
3851
3852  LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
3853                    << " cases into SWITCH.  BB is:\n"
3854                    << *BB);
3855
3856  // If there are any extra values that couldn't be folded into the switch
3857  // then we evaluate them with an explicit branch first. Split the block
3858  // right before the condbr to handle it.
3859  if (ExtraCase) {
3860    BasicBlock *NewBB =
3861        BB->splitBasicBlock(BI->getIterator(), "switch.early.test");
3862    // Remove the uncond branch added to the old block.
3863    Instruction *OldTI = BB->getTerminator();
3864    Builder.SetInsertPoint(OldTI);
3865
3866    if (TrueWhenEqual)
3867      Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
3868    else
3869      Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
3870
3871    OldTI->eraseFromParent();
3872
3873    // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
3874    // for the edge we just added.
3875    AddPredecessorToBlock(EdgeBB, BB, NewBB);
3876
3877    LLVM_DEBUG(dbgs() << "  ** 'icmp' chain unhandled condition: " << *ExtraCase
3878                      << "\nEXTRABB = " << *BB);
3879    BB = NewBB;
3880  }
3881
3882  Builder.SetInsertPoint(BI);
3883  // Convert pointer to int before we switch.
3884  if (CompVal->getType()->isPointerTy()) {
3885    CompVal = Builder.CreatePtrToInt(
3886        CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
3887  }
3888
3889  // Create the new switch instruction now.
3890  SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
3891
3892  // Add all of the 'cases' to the switch instruction.
3893  for (unsigned i = 0, e = Values.size(); i != e; ++i)
3894    New->addCase(Values[i], EdgeBB);
3895
3896  // We added edges from PI to the EdgeBB.  As such, if there were any
3897  // PHI nodes in EdgeBB, they need entries to be added corresponding to
3898  // the number of edges added.
3899  for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
3900    PHINode *PN = cast<PHINode>(BBI);
3901    Value *InVal = PN->getIncomingValueForBlock(BB);
3902    for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
3903      PN->addIncoming(InVal, BB);
3904  }
3905
3906  // Erase the old branch instruction.
3907  EraseTerminatorAndDCECond(BI);
3908
3909  LLVM_DEBUG(dbgs() << "  ** 'icmp' chain result is:\n" << *BB << '\n');
3910  return true;
3911}
3912
3913bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
3914  if (isa<PHINode>(RI->getValue()))
3915    return simplifyCommonResume(RI);
3916  else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHI()) &&
3917           RI->getValue() == RI->getParent()->getFirstNonPHI())
3918    // The resume must unwind the exception that caused control to branch here.
3919    return simplifySingleResume(RI);
3920
3921  return false;
3922}
3923
3924// Simplify resume that is shared by several landing pads (phi of landing pad).
3925bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
3926  BasicBlock *BB = RI->getParent();
3927
3928  // Check that there are no other instructions except for debug intrinsics
3929  // between the phi of landing pads (RI->getValue()) and resume instruction.
3930  BasicBlock::iterator I = cast<Instruction>(RI->getValue())->getIterator(),
3931                       E = RI->getIterator();
3932  while (++I != E)
3933    if (!isa<DbgInfoIntrinsic>(I))
3934      return false;
3935
3936  SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
3937  auto *PhiLPInst = cast<PHINode>(RI->getValue());
3938
3939  // Check incoming blocks to see if any of them are trivial.
3940  for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
3941       Idx++) {
3942    auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
3943    auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
3944
3945    // If the block has other successors, we can not delete it because
3946    // it has other dependents.
3947    if (IncomingBB->getUniqueSuccessor() != BB)
3948      continue;
3949
3950    auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHI());
3951    // Not the landing pad that caused the control to branch here.
3952    if (IncomingValue != LandingPad)
3953      continue;
3954
3955    bool isTrivial = true;
3956
3957    I = IncomingBB->getFirstNonPHI()->getIterator();
3958    E = IncomingBB->getTerminator()->getIterator();
3959    while (++I != E)
3960      if (!isa<DbgInfoIntrinsic>(I)) {
3961        isTrivial = false;
3962        break;
3963      }
3964
3965    if (isTrivial)
3966      TrivialUnwindBlocks.insert(IncomingBB);
3967  }
3968
3969  // If no trivial unwind blocks, don't do any simplifications.
3970  if (TrivialUnwindBlocks.empty())
3971    return false;
3972
3973  // Turn all invokes that unwind here into calls.
3974  for (auto *TrivialBB : TrivialUnwindBlocks) {
3975    // Blocks that will be simplified should be removed from the phi node.
3976    // Note there could be multiple edges to the resume block, and we need
3977    // to remove them all.
3978    while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
3979      BB->removePredecessor(TrivialBB, true);
3980
3981    for (pred_iterator PI = pred_begin(TrivialBB), PE = pred_end(TrivialBB);
3982         PI != PE;) {
3983      BasicBlock *Pred = *PI++;
3984      removeUnwindEdge(Pred);
3985    }
3986
3987    // In each SimplifyCFG run, only the current processed block can be erased.
3988    // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
3989    // of erasing TrivialBB, we only remove the branch to the common resume
3990    // block so that we can later erase the resume block since it has no
3991    // predecessors.
3992    TrivialBB->getTerminator()->eraseFromParent();
3993    new UnreachableInst(RI->getContext(), TrivialBB);
3994  }
3995
3996  // Delete the resume block if all its predecessors have been removed.
3997  if (pred_empty(BB))
3998    BB->eraseFromParent();
3999
4000  return !TrivialUnwindBlocks.empty();
4001}
4002
4003// Check if cleanup block is empty
4004static bool isCleanupBlockEmpty(Instruction *Inst, Instruction *RI) {
4005  BasicBlock::iterator I = Inst->getIterator(), E = RI->getIterator();
4006  while (++I != E) {
4007    auto *II = dyn_cast<IntrinsicInst>(I);
4008    if (!II)
4009      return false;
4010
4011    Intrinsic::ID IntrinsicID = II->getIntrinsicID();
4012    switch (IntrinsicID) {
4013    case Intrinsic::dbg_declare:
4014    case Intrinsic::dbg_value:
4015    case Intrinsic::dbg_label:
4016    case Intrinsic::lifetime_end:
4017      break;
4018    default:
4019      return false;
4020    }
4021  }
4022  return true;
4023}
4024
4025// Simplify resume that is only used by a single (non-phi) landing pad.
4026bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
4027  BasicBlock *BB = RI->getParent();
4028  auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHI());
4029  assert(RI->getValue() == LPInst &&
4030         "Resume must unwind the exception that caused control to here");
4031
4032  // Check that there are no other instructions except for debug intrinsics.
4033  if (!isCleanupBlockEmpty(LPInst, RI))
4034    return false;
4035
4036  // Turn all invokes that unwind here into calls and delete the basic block.
4037  for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) {
4038    BasicBlock *Pred = *PI++;
4039    removeUnwindEdge(Pred);
4040  }
4041
4042  // The landingpad is now unreachable.  Zap it.
4043  if (LoopHeaders)
4044    LoopHeaders->erase(BB);
4045  BB->eraseFromParent();
4046  return true;
4047}
4048
4049static bool removeEmptyCleanup(CleanupReturnInst *RI) {
4050  // If this is a trivial cleanup pad that executes no instructions, it can be
4051  // eliminated.  If the cleanup pad continues to the caller, any predecessor
4052  // that is an EH pad will be updated to continue to the caller and any
4053  // predecessor that terminates with an invoke instruction will have its invoke
4054  // instruction converted to a call instruction.  If the cleanup pad being
4055  // simplified does not continue to the caller, each predecessor will be
4056  // updated to continue to the unwind destination of the cleanup pad being
4057  // simplified.
4058  BasicBlock *BB = RI->getParent();
4059  CleanupPadInst *CPInst = RI->getCleanupPad();
4060  if (CPInst->getParent() != BB)
4061    // This isn't an empty cleanup.
4062    return false;
4063
4064  // We cannot kill the pad if it has multiple uses.  This typically arises
4065  // from unreachable basic blocks.
4066  if (!CPInst->hasOneUse())
4067    return false;
4068
4069  // Check that there are no other instructions except for benign intrinsics.
4070  if (!isCleanupBlockEmpty(CPInst, RI))
4071    return false;
4072
4073  // If the cleanup return we are simplifying unwinds to the caller, this will
4074  // set UnwindDest to nullptr.
4075  BasicBlock *UnwindDest = RI->getUnwindDest();
4076  Instruction *DestEHPad = UnwindDest ? UnwindDest->getFirstNonPHI() : nullptr;
4077
4078  // We're about to remove BB from the control flow.  Before we do, sink any
4079  // PHINodes into the unwind destination.  Doing this before changing the
4080  // control flow avoids some potentially slow checks, since we can currently
4081  // be certain that UnwindDest and BB have no common predecessors (since they
4082  // are both EH pads).
4083  if (UnwindDest) {
4084    // First, go through the PHI nodes in UnwindDest and update any nodes that
4085    // reference the block we are removing
4086    for (BasicBlock::iterator I = UnwindDest->begin(),
4087                              IE = DestEHPad->getIterator();
4088         I != IE; ++I) {
4089      PHINode *DestPN = cast<PHINode>(I);
4090
4091      int Idx = DestPN->getBasicBlockIndex(BB);
4092      // Since BB unwinds to UnwindDest, it has to be in the PHI node.
4093      assert(Idx != -1);
4094      // This PHI node has an incoming value that corresponds to a control
4095      // path through the cleanup pad we are removing.  If the incoming
4096      // value is in the cleanup pad, it must be a PHINode (because we
4097      // verified above that the block is otherwise empty).  Otherwise, the
4098      // value is either a constant or a value that dominates the cleanup
4099      // pad being removed.
4100      //
4101      // Because BB and UnwindDest are both EH pads, all of their
4102      // predecessors must unwind to these blocks, and since no instruction
4103      // can have multiple unwind destinations, there will be no overlap in
4104      // incoming blocks between SrcPN and DestPN.
4105      Value *SrcVal = DestPN->getIncomingValue(Idx);
4106      PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
4107
4108      // Remove the entry for the block we are deleting.
4109      DestPN->removeIncomingValue(Idx, false);
4110
4111      if (SrcPN && SrcPN->getParent() == BB) {
4112        // If the incoming value was a PHI node in the cleanup pad we are
4113        // removing, we need to merge that PHI node's incoming values into
4114        // DestPN.
4115        for (unsigned SrcIdx = 0, SrcE = SrcPN->getNumIncomingValues();
4116             SrcIdx != SrcE; ++SrcIdx) {
4117          DestPN->addIncoming(SrcPN->getIncomingValue(SrcIdx),
4118                              SrcPN->getIncomingBlock(SrcIdx));
4119        }
4120      } else {
4121        // Otherwise, the incoming value came from above BB and
4122        // so we can just reuse it.  We must associate all of BB's
4123        // predecessors with this value.
4124        for (auto *pred : predecessors(BB)) {
4125          DestPN->addIncoming(SrcVal, pred);
4126        }
4127      }
4128    }
4129
4130    // Sink any remaining PHI nodes directly into UnwindDest.
4131    Instruction *InsertPt = DestEHPad;
4132    for (BasicBlock::iterator I = BB->begin(),
4133                              IE = BB->getFirstNonPHI()->getIterator();
4134         I != IE;) {
4135      // The iterator must be incremented here because the instructions are
4136      // being moved to another block.
4137      PHINode *PN = cast<PHINode>(I++);
4138      if (PN->use_empty() || !PN->isUsedOutsideOfBlock(BB))
4139        // If the PHI node has no uses or all of its uses are in this basic
4140        // block (meaning they are debug or lifetime intrinsics), just leave
4141        // it.  It will be erased when we erase BB below.
4142        continue;
4143
4144      // Otherwise, sink this PHI node into UnwindDest.
4145      // Any predecessors to UnwindDest which are not already represented
4146      // must be back edges which inherit the value from the path through
4147      // BB.  In this case, the PHI value must reference itself.
4148      for (auto *pred : predecessors(UnwindDest))
4149        if (pred != BB)
4150          PN->addIncoming(PN, pred);
4151      PN->moveBefore(InsertPt);
4152    }
4153  }
4154
4155  for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE;) {
4156    // The iterator must be updated here because we are removing this pred.
4157    BasicBlock *PredBB = *PI++;
4158    if (UnwindDest == nullptr) {
4159      removeUnwindEdge(PredBB);
4160    } else {
4161      Instruction *TI = PredBB->getTerminator();
4162      TI->replaceUsesOfWith(BB, UnwindDest);
4163    }
4164  }
4165
4166  // The cleanup pad is now unreachable.  Zap it.
4167  BB->eraseFromParent();
4168  return true;
4169}
4170
4171// Try to merge two cleanuppads together.
4172static bool mergeCleanupPad(CleanupReturnInst *RI) {
4173  // Skip any cleanuprets which unwind to caller, there is nothing to merge
4174  // with.
4175  BasicBlock *UnwindDest = RI->getUnwindDest();
4176  if (!UnwindDest)
4177    return false;
4178
4179  // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
4180  // be safe to merge without code duplication.
4181  if (UnwindDest->getSinglePredecessor() != RI->getParent())
4182    return false;
4183
4184  // Verify that our cleanuppad's unwind destination is another cleanuppad.
4185  auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front());
4186  if (!SuccessorCleanupPad)
4187    return false;
4188
4189  CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
4190  // Replace any uses of the successor cleanupad with the predecessor pad
4191  // The only cleanuppad uses should be this cleanupret, it's cleanupret and
4192  // funclet bundle operands.
4193  SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad);
4194  // Remove the old cleanuppad.
4195  SuccessorCleanupPad->eraseFromParent();
4196  // Now, we simply replace the cleanupret with a branch to the unwind
4197  // destination.
4198  BranchInst::Create(UnwindDest, RI->getParent());
4199  RI->eraseFromParent();
4200
4201  return true;
4202}
4203
4204bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
4205  // It is possible to transiantly have an undef cleanuppad operand because we
4206  // have deleted some, but not all, dead blocks.
4207  // Eventually, this block will be deleted.
4208  if (isa<UndefValue>(RI->getOperand(0)))
4209    return false;
4210
4211  if (mergeCleanupPad(RI))
4212    return true;
4213
4214  if (removeEmptyCleanup(RI))
4215    return true;
4216
4217  return false;
4218}
4219
4220bool SimplifyCFGOpt::simplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {
4221  BasicBlock *BB = RI->getParent();
4222  if (!BB->getFirstNonPHIOrDbg()->isTerminator())
4223    return false;
4224
4225  // Find predecessors that end with branches.
4226  SmallVector<BasicBlock *, 8> UncondBranchPreds;
4227  SmallVector<BranchInst *, 8> CondBranchPreds;
4228  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
4229    BasicBlock *P = *PI;
4230    Instruction *PTI = P->getTerminator();
4231    if (BranchInst *BI = dyn_cast<BranchInst>(PTI)) {
4232      if (BI->isUnconditional())
4233        UncondBranchPreds.push_back(P);
4234      else
4235        CondBranchPreds.push_back(BI);
4236    }
4237  }
4238
4239  // If we found some, do the transformation!
4240  if (!UncondBranchPreds.empty() && DupRet) {
4241    while (!UncondBranchPreds.empty()) {
4242      BasicBlock *Pred = UncondBranchPreds.pop_back_val();
4243      LLVM_DEBUG(dbgs() << "FOLDING: " << *BB
4244                        << "INTO UNCOND BRANCH PRED: " << *Pred);
4245      (void)FoldReturnIntoUncondBranch(RI, BB, Pred);
4246    }
4247
4248    // If we eliminated all predecessors of the block, delete the block now.
4249    if (pred_empty(BB)) {
4250      // We know there are no successors, so just nuke the block.
4251      if (LoopHeaders)
4252        LoopHeaders->erase(BB);
4253      BB->eraseFromParent();
4254    }
4255
4256    return true;
4257  }
4258
4259  // Check out all of the conditional branches going to this return
4260  // instruction.  If any of them just select between returns, change the
4261  // branch itself into a select/return pair.
4262  while (!CondBranchPreds.empty()) {
4263    BranchInst *BI = CondBranchPreds.pop_back_val();
4264
4265    // Check to see if the non-BB successor is also a return block.
4266    if (isa<ReturnInst>(BI->getSuccessor(0)->getTerminator()) &&
4267        isa<ReturnInst>(BI->getSuccessor(1)->getTerminator()) &&
4268        SimplifyCondBranchToTwoReturns(BI, Builder))
4269      return true;
4270  }
4271  return false;
4272}
4273
4274bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
4275  BasicBlock *BB = UI->getParent();
4276
4277  bool Changed = false;
4278
4279  // If there are any instructions immediately before the unreachable that can
4280  // be removed, do so.
4281  while (UI->getIterator() != BB->begin()) {
4282    BasicBlock::iterator BBI = UI->getIterator();
4283    --BBI;
4284    // Do not delete instructions that can have side effects which might cause
4285    // the unreachable to not be reachable; specifically, calls and volatile
4286    // operations may have this effect.
4287    if (isa<CallInst>(BBI) && !isa<DbgInfoIntrinsic>(BBI))
4288      break;
4289
4290    if (BBI->mayHaveSideEffects()) {
4291      if (auto *SI = dyn_cast<StoreInst>(BBI)) {
4292        if (SI->isVolatile())
4293          break;
4294      } else if (auto *LI = dyn_cast<LoadInst>(BBI)) {
4295        if (LI->isVolatile())
4296          break;
4297      } else if (auto *RMWI = dyn_cast<AtomicRMWInst>(BBI)) {
4298        if (RMWI->isVolatile())
4299          break;
4300      } else if (auto *CXI = dyn_cast<AtomicCmpXchgInst>(BBI)) {
4301        if (CXI->isVolatile())
4302          break;
4303      } else if (isa<CatchPadInst>(BBI)) {
4304        // A catchpad may invoke exception object constructors and such, which
4305        // in some languages can be arbitrary code, so be conservative by
4306        // default.
4307        // For CoreCLR, it just involves a type test, so can be removed.
4308        if (classifyEHPersonality(BB->getParent()->getPersonalityFn()) !=
4309            EHPersonality::CoreCLR)
4310          break;
4311      } else if (!isa<FenceInst>(BBI) && !isa<VAArgInst>(BBI) &&
4312                 !isa<LandingPadInst>(BBI)) {
4313        break;
4314      }
4315      // Note that deleting LandingPad's here is in fact okay, although it
4316      // involves a bit of subtle reasoning. If this inst is a LandingPad,
4317      // all the predecessors of this block will be the unwind edges of Invokes,
4318      // and we can therefore guarantee this block will be erased.
4319    }
4320
4321    // Delete this instruction (any uses are guaranteed to be dead)
4322    if (!BBI->use_empty())
4323      BBI->replaceAllUsesWith(UndefValue::get(BBI->getType()));
4324    BBI->eraseFromParent();
4325    Changed = true;
4326  }
4327
4328  // If the unreachable instruction is the first in the block, take a gander
4329  // at all of the predecessors of this instruction, and simplify them.
4330  if (&BB->front() != UI)
4331    return Changed;
4332
4333  SmallVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB));
4334  for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
4335    Instruction *TI = Preds[i]->getTerminator();
4336    IRBuilder<> Builder(TI);
4337    if (auto *BI = dyn_cast<BranchInst>(TI)) {
4338      if (BI->isUnconditional()) {
4339        assert(BI->getSuccessor(0) == BB && "Incorrect CFG");
4340        new UnreachableInst(TI->getContext(), TI);
4341        TI->eraseFromParent();
4342        Changed = true;
4343      } else {
4344        Value* Cond = BI->getCondition();
4345        if (BI->getSuccessor(0) == BB) {
4346          Builder.CreateAssumption(Builder.CreateNot(Cond));
4347          Builder.CreateBr(BI->getSuccessor(1));
4348        } else {
4349          assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
4350          Builder.CreateAssumption(Cond);
4351          Builder.CreateBr(BI->getSuccessor(0));
4352        }
4353        EraseTerminatorAndDCECond(BI);
4354        Changed = true;
4355      }
4356    } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
4357      SwitchInstProfUpdateWrapper SU(*SI);
4358      for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
4359        if (i->getCaseSuccessor() != BB) {
4360          ++i;
4361          continue;
4362        }
4363        BB->removePredecessor(SU->getParent());
4364        i = SU.removeCase(i);
4365        e = SU->case_end();
4366        Changed = true;
4367      }
4368    } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
4369      if (II->getUnwindDest() == BB) {
4370        removeUnwindEdge(TI->getParent());
4371        Changed = true;
4372      }
4373    } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
4374      if (CSI->getUnwindDest() == BB) {
4375        removeUnwindEdge(TI->getParent());
4376        Changed = true;
4377        continue;
4378      }
4379
4380      for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
4381                                             E = CSI->handler_end();
4382           I != E; ++I) {
4383        if (*I == BB) {
4384          CSI->removeHandler(I);
4385          --I;
4386          --E;
4387          Changed = true;
4388        }
4389      }
4390      if (CSI->getNumHandlers() == 0) {
4391        BasicBlock *CatchSwitchBB = CSI->getParent();
4392        if (CSI->hasUnwindDest()) {
4393          // Redirect preds to the unwind dest
4394          CatchSwitchBB->replaceAllUsesWith(CSI->getUnwindDest());
4395        } else {
4396          // Rewrite all preds to unwind to caller (or from invoke to call).
4397          SmallVector<BasicBlock *, 8> EHPreds(predecessors(CatchSwitchBB));
4398          for (BasicBlock *EHPred : EHPreds)
4399            removeUnwindEdge(EHPred);
4400        }
4401        // The catchswitch is no longer reachable.
4402        new UnreachableInst(CSI->getContext(), CSI);
4403        CSI->eraseFromParent();
4404        Changed = true;
4405      }
4406    } else if (isa<CleanupReturnInst>(TI)) {
4407      new UnreachableInst(TI->getContext(), TI);
4408      TI->eraseFromParent();
4409      Changed = true;
4410    }
4411  }
4412
4413  // If this block is now dead, remove it.
4414  if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
4415    // We know there are no successors, so just nuke the block.
4416    if (LoopHeaders)
4417      LoopHeaders->erase(BB);
4418    BB->eraseFromParent();
4419    return true;
4420  }
4421
4422  return Changed;
4423}
4424
4425static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) {
4426  assert(Cases.size() >= 1);
4427
4428  array_pod_sort(Cases.begin(), Cases.end(), ConstantIntSortPredicate);
4429  for (size_t I = 1, E = Cases.size(); I != E; ++I) {
4430    if (Cases[I - 1]->getValue() != Cases[I]->getValue() + 1)
4431      return false;
4432  }
4433  return true;
4434}
4435
4436static void createUnreachableSwitchDefault(SwitchInst *Switch) {
4437  LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
4438  BasicBlock *NewDefaultBlock =
4439     SplitBlockPredecessors(Switch->getDefaultDest(), Switch->getParent(), "");
4440  Switch->setDefaultDest(&*NewDefaultBlock);
4441  SplitBlock(&*NewDefaultBlock, &NewDefaultBlock->front());
4442  auto *NewTerminator = NewDefaultBlock->getTerminator();
4443  new UnreachableInst(Switch->getContext(), NewTerminator);
4444  EraseTerminatorAndDCECond(NewTerminator);
4445}
4446
4447/// Turn a switch with two reachable destinations into an integer range
4448/// comparison and branch.
4449bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI,
4450                                             IRBuilder<> &Builder) {
4451  assert(SI->getNumCases() > 1 && "Degenerate switch?");
4452
4453  bool HasDefault =
4454      !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
4455
4456  // Partition the cases into two sets with different destinations.
4457  BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
4458  BasicBlock *DestB = nullptr;
4459  SmallVector<ConstantInt *, 16> CasesA;
4460  SmallVector<ConstantInt *, 16> CasesB;
4461
4462  for (auto Case : SI->cases()) {
4463    BasicBlock *Dest = Case.getCaseSuccessor();
4464    if (!DestA)
4465      DestA = Dest;
4466    if (Dest == DestA) {
4467      CasesA.push_back(Case.getCaseValue());
4468      continue;
4469    }
4470    if (!DestB)
4471      DestB = Dest;
4472    if (Dest == DestB) {
4473      CasesB.push_back(Case.getCaseValue());
4474      continue;
4475    }
4476    return false; // More than two destinations.
4477  }
4478
4479  assert(DestA && DestB &&
4480         "Single-destination switch should have been folded.");
4481  assert(DestA != DestB);
4482  assert(DestB != SI->getDefaultDest());
4483  assert(!CasesB.empty() && "There must be non-default cases.");
4484  assert(!CasesA.empty() || HasDefault);
4485
4486  // Figure out if one of the sets of cases form a contiguous range.
4487  SmallVectorImpl<ConstantInt *> *ContiguousCases = nullptr;
4488  BasicBlock *ContiguousDest = nullptr;
4489  BasicBlock *OtherDest = nullptr;
4490  if (!CasesA.empty() && CasesAreContiguous(CasesA)) {
4491    ContiguousCases = &CasesA;
4492    ContiguousDest = DestA;
4493    OtherDest = DestB;
4494  } else if (CasesAreContiguous(CasesB)) {
4495    ContiguousCases = &CasesB;
4496    ContiguousDest = DestB;
4497    OtherDest = DestA;
4498  } else
4499    return false;
4500
4501  // Start building the compare and branch.
4502
4503  Constant *Offset = ConstantExpr::getNeg(ContiguousCases->back());
4504  Constant *NumCases =
4505      ConstantInt::get(Offset->getType(), ContiguousCases->size());
4506
4507  Value *Sub = SI->getCondition();
4508  if (!Offset->isNullValue())
4509    Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
4510
4511  Value *Cmp;
4512  // If NumCases overflowed, then all possible values jump to the successor.
4513  if (NumCases->isNullValue() && !ContiguousCases->empty())
4514    Cmp = ConstantInt::getTrue(SI->getContext());
4515  else
4516    Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
4517  BranchInst *NewBI = Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest);
4518
4519  // Update weight for the newly-created conditional branch.
4520  if (HasBranchWeights(SI)) {
4521    SmallVector<uint64_t, 8> Weights;
4522    GetBranchWeights(SI, Weights);
4523    if (Weights.size() == 1 + SI->getNumCases()) {
4524      uint64_t TrueWeight = 0;
4525      uint64_t FalseWeight = 0;
4526      for (size_t I = 0, E = Weights.size(); I != E; ++I) {
4527        if (SI->getSuccessor(I) == ContiguousDest)
4528          TrueWeight += Weights[I];
4529        else
4530          FalseWeight += Weights[I];
4531      }
4532      while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
4533        TrueWeight /= 2;
4534        FalseWeight /= 2;
4535      }
4536      setBranchWeights(NewBI, TrueWeight, FalseWeight);
4537    }
4538  }
4539
4540  // Prune obsolete incoming values off the successors' PHI nodes.
4541  for (auto BBI = ContiguousDest->begin(); isa<PHINode>(BBI); ++BBI) {
4542    unsigned PreviousEdges = ContiguousCases->size();
4543    if (ContiguousDest == SI->getDefaultDest())
4544      ++PreviousEdges;
4545    for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
4546      cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
4547  }
4548  for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) {
4549    unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size();
4550    if (OtherDest == SI->getDefaultDest())
4551      ++PreviousEdges;
4552    for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
4553      cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
4554  }
4555
4556  // Clean up the default block - it may have phis or other instructions before
4557  // the unreachable terminator.
4558  if (!HasDefault)
4559    createUnreachableSwitchDefault(SI);
4560
4561  // Drop the switch.
4562  SI->eraseFromParent();
4563
4564  return true;
4565}
4566
4567/// Compute masked bits for the condition of a switch
4568/// and use it to remove dead cases.
4569static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,
4570                                     const DataLayout &DL) {
4571  Value *Cond = SI->getCondition();
4572  unsigned Bits = Cond->getType()->getIntegerBitWidth();
4573  KnownBits Known = computeKnownBits(Cond, DL, 0, AC, SI);
4574
4575  // We can also eliminate cases by determining that their values are outside of
4576  // the limited range of the condition based on how many significant (non-sign)
4577  // bits are in the condition value.
4578  unsigned ExtraSignBits = ComputeNumSignBits(Cond, DL, 0, AC, SI) - 1;
4579  unsigned MaxSignificantBitsInCond = Bits - ExtraSignBits;
4580
4581  // Gather dead cases.
4582  SmallVector<ConstantInt *, 8> DeadCases;
4583  for (auto &Case : SI->cases()) {
4584    const APInt &CaseVal = Case.getCaseValue()->getValue();
4585    if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
4586        (CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) {
4587      DeadCases.push_back(Case.getCaseValue());
4588      LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
4589                        << " is dead.\n");
4590    }
4591  }
4592
4593  // If we can prove that the cases must cover all possible values, the
4594  // default destination becomes dead and we can remove it.  If we know some
4595  // of the bits in the value, we can use that to more precisely compute the
4596  // number of possible unique case values.
4597  bool HasDefault =
4598      !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
4599  const unsigned NumUnknownBits =
4600      Bits - (Known.Zero | Known.One).countPopulation();
4601  assert(NumUnknownBits <= Bits);
4602  if (HasDefault && DeadCases.empty() &&
4603      NumUnknownBits < 64 /* avoid overflow */ &&
4604      SI->getNumCases() == (1ULL << NumUnknownBits)) {
4605    createUnreachableSwitchDefault(SI);
4606    return true;
4607  }
4608
4609  if (DeadCases.empty())
4610    return false;
4611
4612  SwitchInstProfUpdateWrapper SIW(*SI);
4613  for (ConstantInt *DeadCase : DeadCases) {
4614    SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
4615    assert(CaseI != SI->case_default() &&
4616           "Case was not found. Probably mistake in DeadCases forming.");
4617    // Prune unused values from PHI nodes.
4618    CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
4619    SIW.removeCase(CaseI);
4620  }
4621
4622  return true;
4623}
4624
4625/// If BB would be eligible for simplification by
4626/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
4627/// by an unconditional branch), look at the phi node for BB in the successor
4628/// block and see if the incoming value is equal to CaseValue. If so, return
4629/// the phi node, and set PhiIndex to BB's index in the phi node.
4630static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue,
4631                                              BasicBlock *BB, int *PhiIndex) {
4632  if (BB->getFirstNonPHIOrDbg() != BB->getTerminator())
4633    return nullptr; // BB must be empty to be a candidate for simplification.
4634  if (!BB->getSinglePredecessor())
4635    return nullptr; // BB must be dominated by the switch.
4636
4637  BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
4638  if (!Branch || !Branch->isUnconditional())
4639    return nullptr; // Terminator must be unconditional branch.
4640
4641  BasicBlock *Succ = Branch->getSuccessor(0);
4642
4643  for (PHINode &PHI : Succ->phis()) {
4644    int Idx = PHI.getBasicBlockIndex(BB);
4645    assert(Idx >= 0 && "PHI has no entry for predecessor?");
4646
4647    Value *InValue = PHI.getIncomingValue(Idx);
4648    if (InValue != CaseValue)
4649      continue;
4650
4651    *PhiIndex = Idx;
4652    return &PHI;
4653  }
4654
4655  return nullptr;
4656}
4657
4658/// Try to forward the condition of a switch instruction to a phi node
4659/// dominated by the switch, if that would mean that some of the destination
4660/// blocks of the switch can be folded away. Return true if a change is made.
4661static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
4662  using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
4663
4664  ForwardingNodesMap ForwardingNodes;
4665  BasicBlock *SwitchBlock = SI->getParent();
4666  bool Changed = false;
4667  for (auto &Case : SI->cases()) {
4668    ConstantInt *CaseValue = Case.getCaseValue();
4669    BasicBlock *CaseDest = Case.getCaseSuccessor();
4670
4671    // Replace phi operands in successor blocks that are using the constant case
4672    // value rather than the switch condition variable:
4673    //   switchbb:
4674    //   switch i32 %x, label %default [
4675    //     i32 17, label %succ
4676    //   ...
4677    //   succ:
4678    //     %r = phi i32 ... [ 17, %switchbb ] ...
4679    // -->
4680    //     %r = phi i32 ... [ %x, %switchbb ] ...
4681
4682    for (PHINode &Phi : CaseDest->phis()) {
4683      // This only works if there is exactly 1 incoming edge from the switch to
4684      // a phi. If there is >1, that means multiple cases of the switch map to 1
4685      // value in the phi, and that phi value is not the switch condition. Thus,
4686      // this transform would not make sense (the phi would be invalid because
4687      // a phi can't have different incoming values from the same block).
4688      int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock);
4689      if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue &&
4690          count(Phi.blocks(), SwitchBlock) == 1) {
4691        Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());
4692        Changed = true;
4693      }
4694    }
4695
4696    // Collect phi nodes that are indirectly using this switch's case constants.
4697    int PhiIdx;
4698    if (auto *Phi = FindPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx))
4699      ForwardingNodes[Phi].push_back(PhiIdx);
4700  }
4701
4702  for (auto &ForwardingNode : ForwardingNodes) {
4703    PHINode *Phi = ForwardingNode.first;
4704    SmallVectorImpl<int> &Indexes = ForwardingNode.second;
4705    if (Indexes.size() < 2)
4706      continue;
4707
4708    for (int Index : Indexes)
4709      Phi->setIncomingValue(Index, SI->getCondition());
4710    Changed = true;
4711  }
4712
4713  return Changed;
4714}
4715
4716/// Return true if the backend will be able to handle
4717/// initializing an array of constants like C.
4718static bool ValidLookupTableConstant(Constant *C, const TargetTransformInfo &TTI) {
4719  if (C->isThreadDependent())
4720    return false;
4721  if (C->isDLLImportDependent())
4722    return false;
4723
4724  if (!isa<ConstantFP>(C) && !isa<ConstantInt>(C) &&
4725      !isa<ConstantPointerNull>(C) && !isa<GlobalValue>(C) &&
4726      !isa<UndefValue>(C) && !isa<ConstantExpr>(C))
4727    return false;
4728
4729  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
4730    if (!CE->isGEPWithNoNotionalOverIndexing())
4731      return false;
4732    if (!ValidLookupTableConstant(CE->getOperand(0), TTI))
4733      return false;
4734  }
4735
4736  if (!TTI.shouldBuildLookupTablesForConstant(C))
4737    return false;
4738
4739  return true;
4740}
4741
4742/// If V is a Constant, return it. Otherwise, try to look up
4743/// its constant value in ConstantPool, returning 0 if it's not there.
4744static Constant *
4745LookupConstant(Value *V,
4746               const SmallDenseMap<Value *, Constant *> &ConstantPool) {
4747  if (Constant *C = dyn_cast<Constant>(V))
4748    return C;
4749  return ConstantPool.lookup(V);
4750}
4751
4752/// Try to fold instruction I into a constant. This works for
4753/// simple instructions such as binary operations where both operands are
4754/// constant or can be replaced by constants from the ConstantPool. Returns the
4755/// resulting constant on success, 0 otherwise.
4756static Constant *
4757ConstantFold(Instruction *I, const DataLayout &DL,
4758             const SmallDenseMap<Value *, Constant *> &ConstantPool) {
4759  if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
4760    Constant *A = LookupConstant(Select->getCondition(), ConstantPool);
4761    if (!A)
4762      return nullptr;
4763    if (A->isAllOnesValue())
4764      return LookupConstant(Select->getTrueValue(), ConstantPool);
4765    if (A->isNullValue())
4766      return LookupConstant(Select->getFalseValue(), ConstantPool);
4767    return nullptr;
4768  }
4769
4770  SmallVector<Constant *, 4> COps;
4771  for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
4772    if (Constant *A = LookupConstant(I->getOperand(N), ConstantPool))
4773      COps.push_back(A);
4774    else
4775      return nullptr;
4776  }
4777
4778  if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
4779    return ConstantFoldCompareInstOperands(Cmp->getPredicate(), COps[0],
4780                                           COps[1], DL);
4781  }
4782
4783  return ConstantFoldInstOperands(I, COps, DL);
4784}
4785
4786/// Try to determine the resulting constant values in phi nodes
4787/// at the common destination basic block, *CommonDest, for one of the case
4788/// destionations CaseDest corresponding to value CaseVal (0 for the default
4789/// case), of a switch instruction SI.
4790static bool
4791GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
4792               BasicBlock **CommonDest,
4793               SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
4794               const DataLayout &DL, const TargetTransformInfo &TTI) {
4795  // The block from which we enter the common destination.
4796  BasicBlock *Pred = SI->getParent();
4797
4798  // If CaseDest is empty except for some side-effect free instructions through
4799  // which we can constant-propagate the CaseVal, continue to its successor.
4800  SmallDenseMap<Value *, Constant *> ConstantPool;
4801  ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
4802  for (Instruction &I :CaseDest->instructionsWithoutDebug()) {
4803    if (I.isTerminator()) {
4804      // If the terminator is a simple branch, continue to the next block.
4805      if (I.getNumSuccessors() != 1 || I.isExceptionalTerminator())
4806        return false;
4807      Pred = CaseDest;
4808      CaseDest = I.getSuccessor(0);
4809    } else if (Constant *C = ConstantFold(&I, DL, ConstantPool)) {
4810      // Instruction is side-effect free and constant.
4811
4812      // If the instruction has uses outside this block or a phi node slot for
4813      // the block, it is not safe to bypass the instruction since it would then
4814      // no longer dominate all its uses.
4815      for (auto &Use : I.uses()) {
4816        User *User = Use.getUser();
4817        if (Instruction *I = dyn_cast<Instruction>(User))
4818          if (I->getParent() == CaseDest)
4819            continue;
4820        if (PHINode *Phi = dyn_cast<PHINode>(User))
4821          if (Phi->getIncomingBlock(Use) == CaseDest)
4822            continue;
4823        return false;
4824      }
4825
4826      ConstantPool.insert(std::make_pair(&I, C));
4827    } else {
4828      break;
4829    }
4830  }
4831
4832  // If we did not have a CommonDest before, use the current one.
4833  if (!*CommonDest)
4834    *CommonDest = CaseDest;
4835  // If the destination isn't the common one, abort.
4836  if (CaseDest != *CommonDest)
4837    return false;
4838
4839  // Get the values for this case from phi nodes in the destination block.
4840  for (PHINode &PHI : (*CommonDest)->phis()) {
4841    int Idx = PHI.getBasicBlockIndex(Pred);
4842    if (Idx == -1)
4843      continue;
4844
4845    Constant *ConstVal =
4846        LookupConstant(PHI.getIncomingValue(Idx), ConstantPool);
4847    if (!ConstVal)
4848      return false;
4849
4850    // Be conservative about which kinds of constants we support.
4851    if (!ValidLookupTableConstant(ConstVal, TTI))
4852      return false;
4853
4854    Res.push_back(std::make_pair(&PHI, ConstVal));
4855  }
4856
4857  return Res.size() > 0;
4858}
4859
4860// Helper function used to add CaseVal to the list of cases that generate
4861// Result. Returns the updated number of cases that generate this result.
4862static uintptr_t MapCaseToResult(ConstantInt *CaseVal,
4863                                 SwitchCaseResultVectorTy &UniqueResults,
4864                                 Constant *Result) {
4865  for (auto &I : UniqueResults) {
4866    if (I.first == Result) {
4867      I.second.push_back(CaseVal);
4868      return I.second.size();
4869    }
4870  }
4871  UniqueResults.push_back(
4872      std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
4873  return 1;
4874}
4875
4876// Helper function that initializes a map containing
4877// results for the PHI node of the common destination block for a switch
4878// instruction. Returns false if multiple PHI nodes have been found or if
4879// there is not a common destination block for the switch.
4880static bool
4881InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest,
4882                      SwitchCaseResultVectorTy &UniqueResults,
4883                      Constant *&DefaultResult, const DataLayout &DL,
4884                      const TargetTransformInfo &TTI,
4885                      uintptr_t MaxUniqueResults, uintptr_t MaxCasesPerResult) {
4886  for (auto &I : SI->cases()) {
4887    ConstantInt *CaseVal = I.getCaseValue();
4888
4889    // Resulting value at phi nodes for this case value.
4890    SwitchCaseResultsTy Results;
4891    if (!GetCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
4892                        DL, TTI))
4893      return false;
4894
4895    // Only one value per case is permitted.
4896    if (Results.size() > 1)
4897      return false;
4898
4899    // Add the case->result mapping to UniqueResults.
4900    const uintptr_t NumCasesForResult =
4901        MapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
4902
4903    // Early out if there are too many cases for this result.
4904    if (NumCasesForResult > MaxCasesPerResult)
4905      return false;
4906
4907    // Early out if there are too many unique results.
4908    if (UniqueResults.size() > MaxUniqueResults)
4909      return false;
4910
4911    // Check the PHI consistency.
4912    if (!PHI)
4913      PHI = Results[0].first;
4914    else if (PHI != Results[0].first)
4915      return false;
4916  }
4917  // Find the default result value.
4918  SmallVector<std::pair<PHINode *, Constant *>, 1> DefaultResults;
4919  BasicBlock *DefaultDest = SI->getDefaultDest();
4920  GetCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
4921                 DL, TTI);
4922  // If the default value is not found abort unless the default destination
4923  // is unreachable.
4924  DefaultResult =
4925      DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
4926  if ((!DefaultResult &&
4927       !isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg())))
4928    return false;
4929
4930  return true;
4931}
4932
4933// Helper function that checks if it is possible to transform a switch with only
4934// two cases (or two cases + default) that produces a result into a select.
4935// Example:
4936// switch (a) {
4937//   case 10:                %0 = icmp eq i32 %a, 10
4938//     return 10;            %1 = select i1 %0, i32 10, i32 4
4939//   case 20:        ---->   %2 = icmp eq i32 %a, 20
4940//     return 2;             %3 = select i1 %2, i32 2, i32 %1
4941//   default:
4942//     return 4;
4943// }
4944static Value *ConvertTwoCaseSwitch(const SwitchCaseResultVectorTy &ResultVector,
4945                                   Constant *DefaultResult, Value *Condition,
4946                                   IRBuilder<> &Builder) {
4947  assert(ResultVector.size() == 2 &&
4948         "We should have exactly two unique results at this point");
4949  // If we are selecting between only two cases transform into a simple
4950  // select or a two-way select if default is possible.
4951  if (ResultVector[0].second.size() == 1 &&
4952      ResultVector[1].second.size() == 1) {
4953    ConstantInt *const FirstCase = ResultVector[0].second[0];
4954    ConstantInt *const SecondCase = ResultVector[1].second[0];
4955
4956    bool DefaultCanTrigger = DefaultResult;
4957    Value *SelectValue = ResultVector[1].first;
4958    if (DefaultCanTrigger) {
4959      Value *const ValueCompare =
4960          Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
4961      SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
4962                                         DefaultResult, "switch.select");
4963    }
4964    Value *const ValueCompare =
4965        Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
4966    return Builder.CreateSelect(ValueCompare, ResultVector[0].first,
4967                                SelectValue, "switch.select");
4968  }
4969
4970  return nullptr;
4971}
4972
4973// Helper function to cleanup a switch instruction that has been converted into
4974// a select, fixing up PHI nodes and basic blocks.
4975static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI,
4976                                              Value *SelectValue,
4977                                              IRBuilder<> &Builder) {
4978  BasicBlock *SelectBB = SI->getParent();
4979  while (PHI->getBasicBlockIndex(SelectBB) >= 0)
4980    PHI->removeIncomingValue(SelectBB);
4981  PHI->addIncoming(SelectValue, SelectBB);
4982
4983  Builder.CreateBr(PHI->getParent());
4984
4985  // Remove the switch.
4986  for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
4987    BasicBlock *Succ = SI->getSuccessor(i);
4988
4989    if (Succ == PHI->getParent())
4990      continue;
4991    Succ->removePredecessor(SelectBB);
4992  }
4993  SI->eraseFromParent();
4994}
4995
4996/// If the switch is only used to initialize one or more
4997/// phi nodes in a common successor block with only two different
4998/// constant values, replace the switch with select.
4999static bool switchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
5000                           const DataLayout &DL,
5001                           const TargetTransformInfo &TTI) {
5002  Value *const Cond = SI->getCondition();
5003  PHINode *PHI = nullptr;
5004  BasicBlock *CommonDest = nullptr;
5005  Constant *DefaultResult;
5006  SwitchCaseResultVectorTy UniqueResults;
5007  // Collect all the cases that will deliver the same value from the switch.
5008  if (!InitializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
5009                             DL, TTI, 2, 1))
5010    return false;
5011  // Selects choose between maximum two values.
5012  if (UniqueResults.size() != 2)
5013    return false;
5014  assert(PHI != nullptr && "PHI for value select not found");
5015
5016  Builder.SetInsertPoint(SI);
5017  Value *SelectValue =
5018      ConvertTwoCaseSwitch(UniqueResults, DefaultResult, Cond, Builder);
5019  if (SelectValue) {
5020    RemoveSwitchAfterSelectConversion(SI, PHI, SelectValue, Builder);
5021    return true;
5022  }
5023  // The switch couldn't be converted into a select.
5024  return false;
5025}
5026
5027namespace {
5028
5029/// This class represents a lookup table that can be used to replace a switch.
5030class SwitchLookupTable {
5031public:
5032  /// Create a lookup table to use as a switch replacement with the contents
5033  /// of Values, using DefaultValue to fill any holes in the table.
5034  SwitchLookupTable(
5035      Module &M, uint64_t TableSize, ConstantInt *Offset,
5036      const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
5037      Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
5038
5039  /// Build instructions with Builder to retrieve the value at
5040  /// the position given by Index in the lookup table.
5041  Value *BuildLookup(Value *Index, IRBuilder<> &Builder);
5042
5043  /// Return true if a table with TableSize elements of
5044  /// type ElementType would fit in a target-legal register.
5045  static bool WouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
5046                                 Type *ElementType);
5047
5048private:
5049  // Depending on the contents of the table, it can be represented in
5050  // different ways.
5051  enum {
5052    // For tables where each element contains the same value, we just have to
5053    // store that single value and return it for each lookup.
5054    SingleValueKind,
5055
5056    // For tables where there is a linear relationship between table index
5057    // and values. We calculate the result with a simple multiplication
5058    // and addition instead of a table lookup.
5059    LinearMapKind,
5060
5061    // For small tables with integer elements, we can pack them into a bitmap
5062    // that fits into a target-legal register. Values are retrieved by
5063    // shift and mask operations.
5064    BitMapKind,
5065
5066    // The table is stored as an array of values. Values are retrieved by load
5067    // instructions from the table.
5068    ArrayKind
5069  } Kind;
5070
5071  // For SingleValueKind, this is the single value.
5072  Constant *SingleValue = nullptr;
5073
5074  // For BitMapKind, this is the bitmap.
5075  ConstantInt *BitMap = nullptr;
5076  IntegerType *BitMapElementTy = nullptr;
5077
5078  // For LinearMapKind, these are the constants used to derive the value.
5079  ConstantInt *LinearOffset = nullptr;
5080  ConstantInt *LinearMultiplier = nullptr;
5081
5082  // For ArrayKind, this is the array.
5083  GlobalVariable *Array = nullptr;
5084};
5085
5086} // end anonymous namespace
5087
5088SwitchLookupTable::SwitchLookupTable(
5089    Module &M, uint64_t TableSize, ConstantInt *Offset,
5090    const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
5091    Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName) {
5092  assert(Values.size() && "Can't build lookup table without values!");
5093  assert(TableSize >= Values.size() && "Can't fit values in table!");
5094
5095  // If all values in the table are equal, this is that value.
5096  SingleValue = Values.begin()->second;
5097
5098  Type *ValueType = Values.begin()->second->getType();
5099
5100  // Build up the table contents.
5101  SmallVector<Constant *, 64> TableContents(TableSize);
5102  for (size_t I = 0, E = Values.size(); I != E; ++I) {
5103    ConstantInt *CaseVal = Values[I].first;
5104    Constant *CaseRes = Values[I].second;
5105    assert(CaseRes->getType() == ValueType);
5106
5107    uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
5108    TableContents[Idx] = CaseRes;
5109
5110    if (CaseRes != SingleValue)
5111      SingleValue = nullptr;
5112  }
5113
5114  // Fill in any holes in the table with the default result.
5115  if (Values.size() < TableSize) {
5116    assert(DefaultValue &&
5117           "Need a default value to fill the lookup table holes.");
5118    assert(DefaultValue->getType() == ValueType);
5119    for (uint64_t I = 0; I < TableSize; ++I) {
5120      if (!TableContents[I])
5121        TableContents[I] = DefaultValue;
5122    }
5123
5124    if (DefaultValue != SingleValue)
5125      SingleValue = nullptr;
5126  }
5127
5128  // If each element in the table contains the same value, we only need to store
5129  // that single value.
5130  if (SingleValue) {
5131    Kind = SingleValueKind;
5132    return;
5133  }
5134
5135  // Check if we can derive the value with a linear transformation from the
5136  // table index.
5137  if (isa<IntegerType>(ValueType)) {
5138    bool LinearMappingPossible = true;
5139    APInt PrevVal;
5140    APInt DistToPrev;
5141    assert(TableSize >= 2 && "Should be a SingleValue table.");
5142    // Check if there is the same distance between two consecutive values.
5143    for (uint64_t I = 0; I < TableSize; ++I) {
5144      ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
5145      if (!ConstVal) {
5146        // This is an undef. We could deal with it, but undefs in lookup tables
5147        // are very seldom. It's probably not worth the additional complexity.
5148        LinearMappingPossible = false;
5149        break;
5150      }
5151      const APInt &Val = ConstVal->getValue();
5152      if (I != 0) {
5153        APInt Dist = Val - PrevVal;
5154        if (I == 1) {
5155          DistToPrev = Dist;
5156        } else if (Dist != DistToPrev) {
5157          LinearMappingPossible = false;
5158          break;
5159        }
5160      }
5161      PrevVal = Val;
5162    }
5163    if (LinearMappingPossible) {
5164      LinearOffset = cast<ConstantInt>(TableContents[0]);
5165      LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
5166      Kind = LinearMapKind;
5167      ++NumLinearMaps;
5168      return;
5169    }
5170  }
5171
5172  // If the type is integer and the table fits in a register, build a bitmap.
5173  if (WouldFitInRegister(DL, TableSize, ValueType)) {
5174    IntegerType *IT = cast<IntegerType>(ValueType);
5175    APInt TableInt(TableSize * IT->getBitWidth(), 0);
5176    for (uint64_t I = TableSize; I > 0; --I) {
5177      TableInt <<= IT->getBitWidth();
5178      // Insert values into the bitmap. Undef values are set to zero.
5179      if (!isa<UndefValue>(TableContents[I - 1])) {
5180        ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
5181        TableInt |= Val->getValue().zext(TableInt.getBitWidth());
5182      }
5183    }
5184    BitMap = ConstantInt::get(M.getContext(), TableInt);
5185    BitMapElementTy = IT;
5186    Kind = BitMapKind;
5187    ++NumBitMaps;
5188    return;
5189  }
5190
5191  // Store the table in an array.
5192  ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize);
5193  Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
5194
5195  Array = new GlobalVariable(M, ArrayTy, /*isConstant=*/true,
5196                             GlobalVariable::PrivateLinkage, Initializer,
5197                             "switch.table." + FuncName);
5198  Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
5199  // Set the alignment to that of an array items. We will be only loading one
5200  // value out of it.
5201  Array->setAlignment(Align(DL.getPrefTypeAlignment(ValueType)));
5202  Kind = ArrayKind;
5203}
5204
5205Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
5206  switch (Kind) {
5207  case SingleValueKind:
5208    return SingleValue;
5209  case LinearMapKind: {
5210    // Derive the result value from the input value.
5211    Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
5212                                          false, "switch.idx.cast");
5213    if (!LinearMultiplier->isOne())
5214      Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult");
5215    if (!LinearOffset->isZero())
5216      Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset");
5217    return Result;
5218  }
5219  case BitMapKind: {
5220    // Type of the bitmap (e.g. i59).
5221    IntegerType *MapTy = BitMap->getType();
5222
5223    // Cast Index to the same type as the bitmap.
5224    // Note: The Index is <= the number of elements in the table, so
5225    // truncating it to the width of the bitmask is safe.
5226    Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
5227
5228    // Multiply the shift amount by the element width.
5229    ShiftAmt = Builder.CreateMul(
5230        ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
5231        "switch.shiftamt");
5232
5233    // Shift down.
5234    Value *DownShifted =
5235        Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift");
5236    // Mask off.
5237    return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked");
5238  }
5239  case ArrayKind: {
5240    // Make sure the table index will not overflow when treated as signed.
5241    IntegerType *IT = cast<IntegerType>(Index->getType());
5242    uint64_t TableSize =
5243        Array->getInitializer()->getType()->getArrayNumElements();
5244    if (TableSize > (1ULL << (IT->getBitWidth() - 1)))
5245      Index = Builder.CreateZExt(
5246          Index, IntegerType::get(IT->getContext(), IT->getBitWidth() + 1),
5247          "switch.tableidx.zext");
5248
5249    Value *GEPIndices[] = {Builder.getInt32(0), Index};
5250    Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array,
5251                                           GEPIndices, "switch.gep");
5252    return Builder.CreateLoad(
5253        cast<ArrayType>(Array->getValueType())->getElementType(), GEP,
5254        "switch.load");
5255  }
5256  }
5257  llvm_unreachable("Unknown lookup table kind!");
5258}
5259
5260bool SwitchLookupTable::WouldFitInRegister(const DataLayout &DL,
5261                                           uint64_t TableSize,
5262                                           Type *ElementType) {
5263  auto *IT = dyn_cast<IntegerType>(ElementType);
5264  if (!IT)
5265    return false;
5266  // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
5267  // are <= 15, we could try to narrow the type.
5268
5269  // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
5270  if (TableSize >= UINT_MAX / IT->getBitWidth())
5271    return false;
5272  return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
5273}
5274
5275/// Determine whether a lookup table should be built for this switch, based on
5276/// the number of cases, size of the table, and the types of the results.
5277static bool
5278ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize,
5279                       const TargetTransformInfo &TTI, const DataLayout &DL,
5280                       const SmallDenseMap<PHINode *, Type *> &ResultTypes) {
5281  if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX / 10)
5282    return false; // TableSize overflowed, or mul below might overflow.
5283
5284  bool AllTablesFitInRegister = true;
5285  bool HasIllegalType = false;
5286  for (const auto &I : ResultTypes) {
5287    Type *Ty = I.second;
5288
5289    // Saturate this flag to true.
5290    HasIllegalType = HasIllegalType || !TTI.isTypeLegal(Ty);
5291
5292    // Saturate this flag to false.
5293    AllTablesFitInRegister =
5294        AllTablesFitInRegister &&
5295        SwitchLookupTable::WouldFitInRegister(DL, TableSize, Ty);
5296
5297    // If both flags saturate, we're done. NOTE: This *only* works with
5298    // saturating flags, and all flags have to saturate first due to the
5299    // non-deterministic behavior of iterating over a dense map.
5300    if (HasIllegalType && !AllTablesFitInRegister)
5301      break;
5302  }
5303
5304  // If each table would fit in a register, we should build it anyway.
5305  if (AllTablesFitInRegister)
5306    return true;
5307
5308  // Don't build a table that doesn't fit in-register if it has illegal types.
5309  if (HasIllegalType)
5310    return false;
5311
5312  // The table density should be at least 40%. This is the same criterion as for
5313  // jump tables, see SelectionDAGBuilder::handleJTSwitchCase.
5314  // FIXME: Find the best cut-off.
5315  return SI->getNumCases() * 10 >= TableSize * 4;
5316}
5317
5318/// Try to reuse the switch table index compare. Following pattern:
5319/// \code
5320///     if (idx < tablesize)
5321///        r = table[idx]; // table does not contain default_value
5322///     else
5323///        r = default_value;
5324///     if (r != default_value)
5325///        ...
5326/// \endcode
5327/// Is optimized to:
5328/// \code
5329///     cond = idx < tablesize;
5330///     if (cond)
5331///        r = table[idx];
5332///     else
5333///        r = default_value;
5334///     if (cond)
5335///        ...
5336/// \endcode
5337/// Jump threading will then eliminate the second if(cond).
5338static void reuseTableCompare(
5339    User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch,
5340    Constant *DefaultValue,
5341    const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
5342  ICmpInst *CmpInst = dyn_cast<ICmpInst>(PhiUser);
5343  if (!CmpInst)
5344    return;
5345
5346  // We require that the compare is in the same block as the phi so that jump
5347  // threading can do its work afterwards.
5348  if (CmpInst->getParent() != PhiBlock)
5349    return;
5350
5351  Constant *CmpOp1 = dyn_cast<Constant>(CmpInst->getOperand(1));
5352  if (!CmpOp1)
5353    return;
5354
5355  Value *RangeCmp = RangeCheckBranch->getCondition();
5356  Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType());
5357  Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType());
5358
5359  // Check if the compare with the default value is constant true or false.
5360  Constant *DefaultConst = ConstantExpr::getICmp(CmpInst->getPredicate(),
5361                                                 DefaultValue, CmpOp1, true);
5362  if (DefaultConst != TrueConst && DefaultConst != FalseConst)
5363    return;
5364
5365  // Check if the compare with the case values is distinct from the default
5366  // compare result.
5367  for (auto ValuePair : Values) {
5368    Constant *CaseConst = ConstantExpr::getICmp(CmpInst->getPredicate(),
5369                                                ValuePair.second, CmpOp1, true);
5370    if (!CaseConst || CaseConst == DefaultConst || isa<UndefValue>(CaseConst))
5371      return;
5372    assert((CaseConst == TrueConst || CaseConst == FalseConst) &&
5373           "Expect true or false as compare result.");
5374  }
5375
5376  // Check if the branch instruction dominates the phi node. It's a simple
5377  // dominance check, but sufficient for our needs.
5378  // Although this check is invariant in the calling loops, it's better to do it
5379  // at this late stage. Practically we do it at most once for a switch.
5380  BasicBlock *BranchBlock = RangeCheckBranch->getParent();
5381  for (auto PI = pred_begin(PhiBlock), E = pred_end(PhiBlock); PI != E; ++PI) {
5382    BasicBlock *Pred = *PI;
5383    if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
5384      return;
5385  }
5386
5387  if (DefaultConst == FalseConst) {
5388    // The compare yields the same result. We can replace it.
5389    CmpInst->replaceAllUsesWith(RangeCmp);
5390    ++NumTableCmpReuses;
5391  } else {
5392    // The compare yields the same result, just inverted. We can replace it.
5393    Value *InvertedTableCmp = BinaryOperator::CreateXor(
5394        RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
5395        RangeCheckBranch);
5396    CmpInst->replaceAllUsesWith(InvertedTableCmp);
5397    ++NumTableCmpReuses;
5398  }
5399}
5400
5401/// If the switch is only used to initialize one or more phi nodes in a common
5402/// successor block with different constant values, replace the switch with
5403/// lookup tables.
5404static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
5405                                const DataLayout &DL,
5406                                const TargetTransformInfo &TTI) {
5407  assert(SI->getNumCases() > 1 && "Degenerate switch?");
5408
5409  Function *Fn = SI->getParent()->getParent();
5410  // Only build lookup table when we have a target that supports it or the
5411  // attribute is not set.
5412  if (!TTI.shouldBuildLookupTables() ||
5413      (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true"))
5414    return false;
5415
5416  // FIXME: If the switch is too sparse for a lookup table, perhaps we could
5417  // split off a dense part and build a lookup table for that.
5418
5419  // FIXME: This creates arrays of GEPs to constant strings, which means each
5420  // GEP needs a runtime relocation in PIC code. We should just build one big
5421  // string and lookup indices into that.
5422
5423  // Ignore switches with less than three cases. Lookup tables will not make
5424  // them faster, so we don't analyze them.
5425  if (SI->getNumCases() < 3)
5426    return false;
5427
5428  // Figure out the corresponding result for each case value and phi node in the
5429  // common destination, as well as the min and max case values.
5430  assert(!SI->cases().empty());
5431  SwitchInst::CaseIt CI = SI->case_begin();
5432  ConstantInt *MinCaseVal = CI->getCaseValue();
5433  ConstantInt *MaxCaseVal = CI->getCaseValue();
5434
5435  BasicBlock *CommonDest = nullptr;
5436
5437  using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
5438  SmallDenseMap<PHINode *, ResultListTy> ResultLists;
5439
5440  SmallDenseMap<PHINode *, Constant *> DefaultResults;
5441  SmallDenseMap<PHINode *, Type *> ResultTypes;
5442  SmallVector<PHINode *, 4> PHIs;
5443
5444  for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
5445    ConstantInt *CaseVal = CI->getCaseValue();
5446    if (CaseVal->getValue().slt(MinCaseVal->getValue()))
5447      MinCaseVal = CaseVal;
5448    if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
5449      MaxCaseVal = CaseVal;
5450
5451    // Resulting value at phi nodes for this case value.
5452    using ResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
5453    ResultsTy Results;
5454    if (!GetCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
5455                        Results, DL, TTI))
5456      return false;
5457
5458    // Append the result from this case to the list for each phi.
5459    for (const auto &I : Results) {
5460      PHINode *PHI = I.first;
5461      Constant *Value = I.second;
5462      if (!ResultLists.count(PHI))
5463        PHIs.push_back(PHI);
5464      ResultLists[PHI].push_back(std::make_pair(CaseVal, Value));
5465    }
5466  }
5467
5468  // Keep track of the result types.
5469  for (PHINode *PHI : PHIs) {
5470    ResultTypes[PHI] = ResultLists[PHI][0].second->getType();
5471  }
5472
5473  uint64_t NumResults = ResultLists[PHIs[0]].size();
5474  APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue();
5475  uint64_t TableSize = RangeSpread.getLimitedValue() + 1;
5476  bool TableHasHoles = (NumResults < TableSize);
5477
5478  // If the table has holes, we need a constant result for the default case
5479  // or a bitmask that fits in a register.
5480  SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
5481  bool HasDefaultResults =
5482      GetCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
5483                     DefaultResultsList, DL, TTI);
5484
5485  bool NeedMask = (TableHasHoles && !HasDefaultResults);
5486  if (NeedMask) {
5487    // As an extra penalty for the validity test we require more cases.
5488    if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
5489      return false;
5490    if (!DL.fitsInLegalInteger(TableSize))
5491      return false;
5492  }
5493
5494  for (const auto &I : DefaultResultsList) {
5495    PHINode *PHI = I.first;
5496    Constant *Result = I.second;
5497    DefaultResults[PHI] = Result;
5498  }
5499
5500  if (!ShouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
5501    return false;
5502
5503  // Create the BB that does the lookups.
5504  Module &Mod = *CommonDest->getParent()->getParent();
5505  BasicBlock *LookupBB = BasicBlock::Create(
5506      Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
5507
5508  // Compute the table index value.
5509  Builder.SetInsertPoint(SI);
5510  Value *TableIndex;
5511  if (MinCaseVal->isNullValue())
5512    TableIndex = SI->getCondition();
5513  else
5514    TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal,
5515                                   "switch.tableidx");
5516
5517  // Compute the maximum table size representable by the integer type we are
5518  // switching upon.
5519  unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
5520  uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
5521  assert(MaxTableSize >= TableSize &&
5522         "It is impossible for a switch to have more entries than the max "
5523         "representable value of its input integer type's size.");
5524
5525  // If the default destination is unreachable, or if the lookup table covers
5526  // all values of the conditional variable, branch directly to the lookup table
5527  // BB. Otherwise, check that the condition is within the case range.
5528  const bool DefaultIsReachable =
5529      !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
5530  const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
5531  BranchInst *RangeCheckBranch = nullptr;
5532
5533  if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
5534    Builder.CreateBr(LookupBB);
5535    // Note: We call removeProdecessor later since we need to be able to get the
5536    // PHI value for the default case in case we're using a bit mask.
5537  } else {
5538    Value *Cmp = Builder.CreateICmpULT(
5539        TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
5540    RangeCheckBranch =
5541        Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
5542  }
5543
5544  // Populate the BB that does the lookups.
5545  Builder.SetInsertPoint(LookupBB);
5546
5547  if (NeedMask) {
5548    // Before doing the lookup, we do the hole check. The LookupBB is therefore
5549    // re-purposed to do the hole check, and we create a new LookupBB.
5550    BasicBlock *MaskBB = LookupBB;
5551    MaskBB->setName("switch.hole_check");
5552    LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
5553                                  CommonDest->getParent(), CommonDest);
5554
5555    // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
5556    // unnecessary illegal types.
5557    uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
5558    APInt MaskInt(TableSizePowOf2, 0);
5559    APInt One(TableSizePowOf2, 1);
5560    // Build bitmask; fill in a 1 bit for every case.
5561    const ResultListTy &ResultList = ResultLists[PHIs[0]];
5562    for (size_t I = 0, E = ResultList.size(); I != E; ++I) {
5563      uint64_t Idx = (ResultList[I].first->getValue() - MinCaseVal->getValue())
5564                         .getLimitedValue();
5565      MaskInt |= One << Idx;
5566    }
5567    ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
5568
5569    // Get the TableIndex'th bit of the bitmask.
5570    // If this bit is 0 (meaning hole) jump to the default destination,
5571    // else continue with table lookup.
5572    IntegerType *MapTy = TableMask->getType();
5573    Value *MaskIndex =
5574        Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
5575    Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
5576    Value *LoBit = Builder.CreateTrunc(
5577        Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
5578    Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
5579
5580    Builder.SetInsertPoint(LookupBB);
5581    AddPredecessorToBlock(SI->getDefaultDest(), MaskBB, SI->getParent());
5582  }
5583
5584  if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
5585    // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
5586    // do not delete PHINodes here.
5587    SI->getDefaultDest()->removePredecessor(SI->getParent(),
5588                                            /*KeepOneInputPHIs=*/true);
5589  }
5590
5591  bool ReturnedEarly = false;
5592  for (PHINode *PHI : PHIs) {
5593    const ResultListTy &ResultList = ResultLists[PHI];
5594
5595    // If using a bitmask, use any value to fill the lookup table holes.
5596    Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI];
5597    StringRef FuncName = Fn->getName();
5598    SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultList, DV, DL,
5599                            FuncName);
5600
5601    Value *Result = Table.BuildLookup(TableIndex, Builder);
5602
5603    // If the result is used to return immediately from the function, we want to
5604    // do that right here.
5605    if (PHI->hasOneUse() && isa<ReturnInst>(*PHI->user_begin()) &&
5606        PHI->user_back() == CommonDest->getFirstNonPHIOrDbg()) {
5607      Builder.CreateRet(Result);
5608      ReturnedEarly = true;
5609      break;
5610    }
5611
5612    // Do a small peephole optimization: re-use the switch table compare if
5613    // possible.
5614    if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
5615      BasicBlock *PhiBlock = PHI->getParent();
5616      // Search for compare instructions which use the phi.
5617      for (auto *User : PHI->users()) {
5618        reuseTableCompare(User, PhiBlock, RangeCheckBranch, DV, ResultList);
5619      }
5620    }
5621
5622    PHI->addIncoming(Result, LookupBB);
5623  }
5624
5625  if (!ReturnedEarly)
5626    Builder.CreateBr(CommonDest);
5627
5628  // Remove the switch.
5629  for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
5630    BasicBlock *Succ = SI->getSuccessor(i);
5631
5632    if (Succ == SI->getDefaultDest())
5633      continue;
5634    Succ->removePredecessor(SI->getParent());
5635  }
5636  SI->eraseFromParent();
5637
5638  ++NumLookupTables;
5639  if (NeedMask)
5640    ++NumLookupTablesHoles;
5641  return true;
5642}
5643
5644static bool isSwitchDense(ArrayRef<int64_t> Values) {
5645  // See also SelectionDAGBuilder::isDense(), which this function was based on.
5646  uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
5647  uint64_t Range = Diff + 1;
5648  uint64_t NumCases = Values.size();
5649  // 40% is the default density for building a jump table in optsize/minsize mode.
5650  uint64_t MinDensity = 40;
5651
5652  return NumCases * 100 >= Range * MinDensity;
5653}
5654
5655/// Try to transform a switch that has "holes" in it to a contiguous sequence
5656/// of cases.
5657///
5658/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
5659/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
5660///
5661/// This converts a sparse switch into a dense switch which allows better
5662/// lowering and could also allow transforming into a lookup table.
5663static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
5664                              const DataLayout &DL,
5665                              const TargetTransformInfo &TTI) {
5666  auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
5667  if (CondTy->getIntegerBitWidth() > 64 ||
5668      !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
5669    return false;
5670  // Only bother with this optimization if there are more than 3 switch cases;
5671  // SDAG will only bother creating jump tables for 4 or more cases.
5672  if (SI->getNumCases() < 4)
5673    return false;
5674
5675  // This transform is agnostic to the signedness of the input or case values. We
5676  // can treat the case values as signed or unsigned. We can optimize more common
5677  // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
5678  // as signed.
5679  SmallVector<int64_t,4> Values;
5680  for (auto &C : SI->cases())
5681    Values.push_back(C.getCaseValue()->getValue().getSExtValue());
5682  llvm::sort(Values);
5683
5684  // If the switch is already dense, there's nothing useful to do here.
5685  if (isSwitchDense(Values))
5686    return false;
5687
5688  // First, transform the values such that they start at zero and ascend.
5689  int64_t Base = Values[0];
5690  for (auto &V : Values)
5691    V -= (uint64_t)(Base);
5692
5693  // Now we have signed numbers that have been shifted so that, given enough
5694  // precision, there are no negative values. Since the rest of the transform
5695  // is bitwise only, we switch now to an unsigned representation.
5696
5697  // This transform can be done speculatively because it is so cheap - it
5698  // results in a single rotate operation being inserted.
5699  // FIXME: It's possible that optimizing a switch on powers of two might also
5700  // be beneficial - flag values are often powers of two and we could use a CLZ
5701  // as the key function.
5702
5703  // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
5704  // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
5705  // less than 64.
5706  unsigned Shift = 64;
5707  for (auto &V : Values)
5708    Shift = std::min(Shift, countTrailingZeros((uint64_t)V));
5709  assert(Shift < 64);
5710  if (Shift > 0)
5711    for (auto &V : Values)
5712      V = (int64_t)((uint64_t)V >> Shift);
5713
5714  if (!isSwitchDense(Values))
5715    // Transform didn't create a dense switch.
5716    return false;
5717
5718  // The obvious transform is to shift the switch condition right and emit a
5719  // check that the condition actually cleanly divided by GCD, i.e.
5720  //   C & (1 << Shift - 1) == 0
5721  // inserting a new CFG edge to handle the case where it didn't divide cleanly.
5722  //
5723  // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
5724  // shift and puts the shifted-off bits in the uppermost bits. If any of these
5725  // are nonzero then the switch condition will be very large and will hit the
5726  // default case.
5727
5728  auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
5729  Builder.SetInsertPoint(SI);
5730  auto *ShiftC = ConstantInt::get(Ty, Shift);
5731  auto *Sub = Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
5732  auto *LShr = Builder.CreateLShr(Sub, ShiftC);
5733  auto *Shl = Builder.CreateShl(Sub, Ty->getBitWidth() - Shift);
5734  auto *Rot = Builder.CreateOr(LShr, Shl);
5735  SI->replaceUsesOfWith(SI->getCondition(), Rot);
5736
5737  for (auto Case : SI->cases()) {
5738    auto *Orig = Case.getCaseValue();
5739    auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base);
5740    Case.setValue(
5741        cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(ShiftC->getValue()))));
5742  }
5743  return true;
5744}
5745
5746bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
5747  BasicBlock *BB = SI->getParent();
5748
5749  if (isValueEqualityComparison(SI)) {
5750    // If we only have one predecessor, and if it is a branch on this value,
5751    // see if that predecessor totally determines the outcome of this switch.
5752    if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
5753      if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
5754        return requestResimplify();
5755
5756    Value *Cond = SI->getCondition();
5757    if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
5758      if (SimplifySwitchOnSelect(SI, Select))
5759        return requestResimplify();
5760
5761    // If the block only contains the switch, see if we can fold the block
5762    // away into any preds.
5763    if (SI == &*BB->instructionsWithoutDebug().begin())
5764      if (FoldValueComparisonIntoPredecessors(SI, Builder))
5765        return requestResimplify();
5766  }
5767
5768  // Try to transform the switch into an icmp and a branch.
5769  if (TurnSwitchRangeIntoICmp(SI, Builder))
5770    return requestResimplify();
5771
5772  // Remove unreachable cases.
5773  if (eliminateDeadSwitchCases(SI, Options.AC, DL))
5774    return requestResimplify();
5775
5776  if (switchToSelect(SI, Builder, DL, TTI))
5777    return requestResimplify();
5778
5779  if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI))
5780    return requestResimplify();
5781
5782  // The conversion from switch to lookup tables results in difficult-to-analyze
5783  // code and makes pruning branches much harder. This is a problem if the
5784  // switch expression itself can still be restricted as a result of inlining or
5785  // CVP. Therefore, only apply this transformation during late stages of the
5786  // optimisation pipeline.
5787  if (Options.ConvertSwitchToLookupTable &&
5788      SwitchToLookupTable(SI, Builder, DL, TTI))
5789    return requestResimplify();
5790
5791  if (ReduceSwitchRange(SI, Builder, DL, TTI))
5792    return requestResimplify();
5793
5794  return false;
5795}
5796
5797bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
5798  BasicBlock *BB = IBI->getParent();
5799  bool Changed = false;
5800
5801  // Eliminate redundant destinations.
5802  SmallPtrSet<Value *, 8> Succs;
5803  for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
5804    BasicBlock *Dest = IBI->getDestination(i);
5805    if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
5806      Dest->removePredecessor(BB);
5807      IBI->removeDestination(i);
5808      --i;
5809      --e;
5810      Changed = true;
5811    }
5812  }
5813
5814  if (IBI->getNumDestinations() == 0) {
5815    // If the indirectbr has no successors, change it to unreachable.
5816    new UnreachableInst(IBI->getContext(), IBI);
5817    EraseTerminatorAndDCECond(IBI);
5818    return true;
5819  }
5820
5821  if (IBI->getNumDestinations() == 1) {
5822    // If the indirectbr has one successor, change it to a direct branch.
5823    BranchInst::Create(IBI->getDestination(0), IBI);
5824    EraseTerminatorAndDCECond(IBI);
5825    return true;
5826  }
5827
5828  if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
5829    if (SimplifyIndirectBrOnSelect(IBI, SI))
5830      return requestResimplify();
5831  }
5832  return Changed;
5833}
5834
5835/// Given an block with only a single landing pad and a unconditional branch
5836/// try to find another basic block which this one can be merged with.  This
5837/// handles cases where we have multiple invokes with unique landing pads, but
5838/// a shared handler.
5839///
5840/// We specifically choose to not worry about merging non-empty blocks
5841/// here.  That is a PRE/scheduling problem and is best solved elsewhere.  In
5842/// practice, the optimizer produces empty landing pad blocks quite frequently
5843/// when dealing with exception dense code.  (see: instcombine, gvn, if-else
5844/// sinking in this file)
5845///
5846/// This is primarily a code size optimization.  We need to avoid performing
5847/// any transform which might inhibit optimization (such as our ability to
5848/// specialize a particular handler via tail commoning).  We do this by not
5849/// merging any blocks which require us to introduce a phi.  Since the same
5850/// values are flowing through both blocks, we don't lose any ability to
5851/// specialize.  If anything, we make such specialization more likely.
5852///
5853/// TODO - This transformation could remove entries from a phi in the target
5854/// block when the inputs in the phi are the same for the two blocks being
5855/// merged.  In some cases, this could result in removal of the PHI entirely.
5856static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI,
5857                                 BasicBlock *BB) {
5858  auto Succ = BB->getUniqueSuccessor();
5859  assert(Succ);
5860  // If there's a phi in the successor block, we'd likely have to introduce
5861  // a phi into the merged landing pad block.
5862  if (isa<PHINode>(*Succ->begin()))
5863    return false;
5864
5865  for (BasicBlock *OtherPred : predecessors(Succ)) {
5866    if (BB == OtherPred)
5867      continue;
5868    BasicBlock::iterator I = OtherPred->begin();
5869    LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(I);
5870    if (!LPad2 || !LPad2->isIdenticalTo(LPad))
5871      continue;
5872    for (++I; isa<DbgInfoIntrinsic>(I); ++I)
5873      ;
5874    BranchInst *BI2 = dyn_cast<BranchInst>(I);
5875    if (!BI2 || !BI2->isIdenticalTo(BI))
5876      continue;
5877
5878    // We've found an identical block.  Update our predecessors to take that
5879    // path instead and make ourselves dead.
5880    SmallPtrSet<BasicBlock *, 16> Preds;
5881    Preds.insert(pred_begin(BB), pred_end(BB));
5882    for (BasicBlock *Pred : Preds) {
5883      InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
5884      assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
5885             "unexpected successor");
5886      II->setUnwindDest(OtherPred);
5887    }
5888
5889    // The debug info in OtherPred doesn't cover the merged control flow that
5890    // used to go through BB.  We need to delete it or update it.
5891    for (auto I = OtherPred->begin(), E = OtherPred->end(); I != E;) {
5892      Instruction &Inst = *I;
5893      I++;
5894      if (isa<DbgInfoIntrinsic>(Inst))
5895        Inst.eraseFromParent();
5896    }
5897
5898    SmallPtrSet<BasicBlock *, 16> Succs;
5899    Succs.insert(succ_begin(BB), succ_end(BB));
5900    for (BasicBlock *Succ : Succs) {
5901      Succ->removePredecessor(BB);
5902    }
5903
5904    IRBuilder<> Builder(BI);
5905    Builder.CreateUnreachable();
5906    BI->eraseFromParent();
5907    return true;
5908  }
5909  return false;
5910}
5911
5912bool SimplifyCFGOpt::simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder) {
5913  return Branch->isUnconditional() ? simplifyUncondBranch(Branch, Builder)
5914                                   : simplifyCondBranch(Branch, Builder);
5915}
5916
5917bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
5918                                          IRBuilder<> &Builder) {
5919  BasicBlock *BB = BI->getParent();
5920  BasicBlock *Succ = BI->getSuccessor(0);
5921
5922  // If the Terminator is the only non-phi instruction, simplify the block.
5923  // If LoopHeader is provided, check if the block or its successor is a loop
5924  // header. (This is for early invocations before loop simplify and
5925  // vectorization to keep canonical loop forms for nested loops. These blocks
5926  // can be eliminated when the pass is invoked later in the back-end.)
5927  // Note that if BB has only one predecessor then we do not introduce new
5928  // backedge, so we can eliminate BB.
5929  bool NeedCanonicalLoop =
5930      Options.NeedCanonicalLoop &&
5931      (LoopHeaders && BB->hasNPredecessorsOrMore(2) &&
5932       (LoopHeaders->count(BB) || LoopHeaders->count(Succ)));
5933  BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator();
5934  if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
5935      !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB))
5936    return true;
5937
5938  // If the only instruction in the block is a seteq/setne comparison against a
5939  // constant, try to simplify the block.
5940  if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
5941    if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
5942      for (++I; isa<DbgInfoIntrinsic>(I); ++I)
5943        ;
5944      if (I->isTerminator() &&
5945          tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
5946        return true;
5947    }
5948
5949  // See if we can merge an empty landing pad block with another which is
5950  // equivalent.
5951  if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
5952    for (++I; isa<DbgInfoIntrinsic>(I); ++I)
5953      ;
5954    if (I->isTerminator() && TryToMergeLandingPad(LPad, BI, BB))
5955      return true;
5956  }
5957
5958  // If this basic block is ONLY a compare and a branch, and if a predecessor
5959  // branches to us and our successor, fold the comparison into the
5960  // predecessor and use logical operations to update the incoming value
5961  // for PHI nodes in common successor.
5962  if (FoldBranchToCommonDest(BI, nullptr, Options.BonusInstThreshold))
5963    return requestResimplify();
5964  return false;
5965}
5966
5967static BasicBlock *allPredecessorsComeFromSameSource(BasicBlock *BB) {
5968  BasicBlock *PredPred = nullptr;
5969  for (auto *P : predecessors(BB)) {
5970    BasicBlock *PPred = P->getSinglePredecessor();
5971    if (!PPred || (PredPred && PredPred != PPred))
5972      return nullptr;
5973    PredPred = PPred;
5974  }
5975  return PredPred;
5976}
5977
5978bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
5979  BasicBlock *BB = BI->getParent();
5980  if (!Options.SimplifyCondBranch)
5981    return false;
5982
5983  // Conditional branch
5984  if (isValueEqualityComparison(BI)) {
5985    // If we only have one predecessor, and if it is a branch on this value,
5986    // see if that predecessor totally determines the outcome of this
5987    // switch.
5988    if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
5989      if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
5990        return requestResimplify();
5991
5992    // This block must be empty, except for the setcond inst, if it exists.
5993    // Ignore dbg intrinsics.
5994    auto I = BB->instructionsWithoutDebug().begin();
5995    if (&*I == BI) {
5996      if (FoldValueComparisonIntoPredecessors(BI, Builder))
5997        return requestResimplify();
5998    } else if (&*I == cast<Instruction>(BI->getCondition())) {
5999      ++I;
6000      if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder))
6001        return requestResimplify();
6002    }
6003  }
6004
6005  // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
6006  if (SimplifyBranchOnICmpChain(BI, Builder, DL))
6007    return true;
6008
6009  // If this basic block has dominating predecessor blocks and the dominating
6010  // blocks' conditions imply BI's condition, we know the direction of BI.
6011  Optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL);
6012  if (Imp) {
6013    // Turn this into a branch on constant.
6014    auto *OldCond = BI->getCondition();
6015    ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext())
6016                             : ConstantInt::getFalse(BB->getContext());
6017    BI->setCondition(TorF);
6018    RecursivelyDeleteTriviallyDeadInstructions(OldCond);
6019    return requestResimplify();
6020  }
6021
6022  // If this basic block is ONLY a compare and a branch, and if a predecessor
6023  // branches to us and one of our successors, fold the comparison into the
6024  // predecessor and use logical operations to pick the right destination.
6025  if (FoldBranchToCommonDest(BI, nullptr, Options.BonusInstThreshold))
6026    return requestResimplify();
6027
6028  // We have a conditional branch to two blocks that are only reachable
6029  // from BI.  We know that the condbr dominates the two blocks, so see if
6030  // there is any identical code in the "then" and "else" blocks.  If so, we
6031  // can hoist it up to the branching block.
6032  if (BI->getSuccessor(0)->getSinglePredecessor()) {
6033    if (BI->getSuccessor(1)->getSinglePredecessor()) {
6034      if (HoistThenElseCodeToIf(BI, TTI))
6035        return requestResimplify();
6036    } else {
6037      // If Successor #1 has multiple preds, we may be able to conditionally
6038      // execute Successor #0 if it branches to Successor #1.
6039      Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator();
6040      if (Succ0TI->getNumSuccessors() == 1 &&
6041          Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
6042        if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), TTI))
6043          return requestResimplify();
6044    }
6045  } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
6046    // If Successor #0 has multiple preds, we may be able to conditionally
6047    // execute Successor #1 if it branches to Successor #0.
6048    Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator();
6049    if (Succ1TI->getNumSuccessors() == 1 &&
6050        Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
6051      if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), TTI))
6052        return requestResimplify();
6053  }
6054
6055  // If this is a branch on a phi node in the current block, thread control
6056  // through this block if any PHI node entries are constants.
6057  if (PHINode *PN = dyn_cast<PHINode>(BI->getCondition()))
6058    if (PN->getParent() == BI->getParent())
6059      if (FoldCondBranchOnPHI(BI, DL, Options.AC))
6060        return requestResimplify();
6061
6062  // Scan predecessor blocks for conditional branches.
6063  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
6064    if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
6065      if (PBI != BI && PBI->isConditional())
6066        if (SimplifyCondBranchToCondBranch(PBI, BI, DL, TTI))
6067          return requestResimplify();
6068
6069  // Look for diamond patterns.
6070  if (MergeCondStores)
6071    if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
6072      if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
6073        if (PBI != BI && PBI->isConditional())
6074          if (mergeConditionalStores(PBI, BI, DL, TTI))
6075            return requestResimplify();
6076
6077  return false;
6078}
6079
6080/// Check if passing a value to an instruction will cause undefined behavior.
6081static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) {
6082  Constant *C = dyn_cast<Constant>(V);
6083  if (!C)
6084    return false;
6085
6086  if (I->use_empty())
6087    return false;
6088
6089  if (C->isNullValue() || isa<UndefValue>(C)) {
6090    // Only look at the first use, avoid hurting compile time with long uselists
6091    User *Use = *I->user_begin();
6092
6093    // Now make sure that there are no instructions in between that can alter
6094    // control flow (eg. calls)
6095    for (BasicBlock::iterator
6096             i = ++BasicBlock::iterator(I),
6097             UI = BasicBlock::iterator(dyn_cast<Instruction>(Use));
6098         i != UI; ++i)
6099      if (i == I->getParent()->end() || i->mayHaveSideEffects())
6100        return false;
6101
6102    // Look through GEPs. A load from a GEP derived from NULL is still undefined
6103    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Use))
6104      if (GEP->getPointerOperand() == I)
6105        return passingValueIsAlwaysUndefined(V, GEP);
6106
6107    // Look through bitcasts.
6108    if (BitCastInst *BC = dyn_cast<BitCastInst>(Use))
6109      return passingValueIsAlwaysUndefined(V, BC);
6110
6111    // Load from null is undefined.
6112    if (LoadInst *LI = dyn_cast<LoadInst>(Use))
6113      if (!LI->isVolatile())
6114        return !NullPointerIsDefined(LI->getFunction(),
6115                                     LI->getPointerAddressSpace());
6116
6117    // Store to null is undefined.
6118    if (StoreInst *SI = dyn_cast<StoreInst>(Use))
6119      if (!SI->isVolatile())
6120        return (!NullPointerIsDefined(SI->getFunction(),
6121                                      SI->getPointerAddressSpace())) &&
6122               SI->getPointerOperand() == I;
6123
6124    // A call to null is undefined.
6125    if (auto *CB = dyn_cast<CallBase>(Use))
6126      return !NullPointerIsDefined(CB->getFunction()) &&
6127             CB->getCalledOperand() == I;
6128  }
6129  return false;
6130}
6131
6132/// If BB has an incoming value that will always trigger undefined behavior
6133/// (eg. null pointer dereference), remove the branch leading here.
6134static bool removeUndefIntroducingPredecessor(BasicBlock *BB) {
6135  for (PHINode &PHI : BB->phis())
6136    for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
6137      if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
6138        Instruction *T = PHI.getIncomingBlock(i)->getTerminator();
6139        IRBuilder<> Builder(T);
6140        if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
6141          BB->removePredecessor(PHI.getIncomingBlock(i));
6142          // Turn uncoditional branches into unreachables and remove the dead
6143          // destination from conditional branches.
6144          if (BI->isUnconditional())
6145            Builder.CreateUnreachable();
6146          else
6147            Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
6148                                                       : BI->getSuccessor(0));
6149          BI->eraseFromParent();
6150          return true;
6151        }
6152        // TODO: SwitchInst.
6153      }
6154
6155  return false;
6156}
6157
6158bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
6159  bool Changed = false;
6160
6161  assert(BB && BB->getParent() && "Block not embedded in function!");
6162  assert(BB->getTerminator() && "Degenerate basic block encountered!");
6163
6164  // Remove basic blocks that have no predecessors (except the entry block)...
6165  // or that just have themself as a predecessor.  These are unreachable.
6166  if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
6167      BB->getSinglePredecessor() == BB) {
6168    LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
6169    DeleteDeadBlock(BB);
6170    return true;
6171  }
6172
6173  // Check to see if we can constant propagate this terminator instruction
6174  // away...
6175  Changed |= ConstantFoldTerminator(BB, true);
6176
6177  // Check for and eliminate duplicate PHI nodes in this block.
6178  Changed |= EliminateDuplicatePHINodes(BB);
6179
6180  // Check for and remove branches that will always cause undefined behavior.
6181  Changed |= removeUndefIntroducingPredecessor(BB);
6182
6183  // Merge basic blocks into their predecessor if there is only one distinct
6184  // pred, and if there is only one distinct successor of the predecessor, and
6185  // if there are no PHI nodes.
6186  if (MergeBlockIntoPredecessor(BB))
6187    return true;
6188
6189  if (SinkCommon && Options.SinkCommonInsts)
6190    Changed |= SinkCommonCodeFromPredecessors(BB);
6191
6192  IRBuilder<> Builder(BB);
6193
6194  if (Options.FoldTwoEntryPHINode) {
6195    // If there is a trivial two-entry PHI node in this basic block, and we can
6196    // eliminate it, do so now.
6197    if (auto *PN = dyn_cast<PHINode>(BB->begin()))
6198      if (PN->getNumIncomingValues() == 2)
6199        Changed |= FoldTwoEntryPHINode(PN, TTI, DL);
6200  }
6201
6202  Instruction *Terminator = BB->getTerminator();
6203  Builder.SetInsertPoint(Terminator);
6204  switch (Terminator->getOpcode()) {
6205  case Instruction::Br:
6206    Changed |= simplifyBranch(cast<BranchInst>(Terminator), Builder);
6207    break;
6208  case Instruction::Ret:
6209    Changed |= simplifyReturn(cast<ReturnInst>(Terminator), Builder);
6210    break;
6211  case Instruction::Resume:
6212    Changed |= simplifyResume(cast<ResumeInst>(Terminator), Builder);
6213    break;
6214  case Instruction::CleanupRet:
6215    Changed |= simplifyCleanupReturn(cast<CleanupReturnInst>(Terminator));
6216    break;
6217  case Instruction::Switch:
6218    Changed |= simplifySwitch(cast<SwitchInst>(Terminator), Builder);
6219    break;
6220  case Instruction::Unreachable:
6221    Changed |= simplifyUnreachable(cast<UnreachableInst>(Terminator));
6222    break;
6223  case Instruction::IndirectBr:
6224    Changed |= simplifyIndirectBr(cast<IndirectBrInst>(Terminator));
6225    break;
6226  }
6227
6228  return Changed;
6229}
6230
6231bool SimplifyCFGOpt::run(BasicBlock *BB) {
6232  bool Changed = false;
6233
6234  // Repeated simplify BB as long as resimplification is requested.
6235  do {
6236    Resimplify = false;
6237
6238    // Perform one round of simplifcation. Resimplify flag will be set if
6239    // another iteration is requested.
6240    Changed |= simplifyOnce(BB);
6241  } while (Resimplify);
6242
6243  return Changed;
6244}
6245
6246bool llvm::simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
6247                       const SimplifyCFGOptions &Options,
6248                       SmallPtrSetImpl<BasicBlock *> *LoopHeaders) {
6249  return SimplifyCFGOpt(TTI, BB->getModule()->getDataLayout(), LoopHeaders,
6250                        Options)
6251      .run(BB);
6252}
6253