1//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Peephole optimize the CFG.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/ADT/Sequence.h"
19#include "llvm/ADT/SetOperations.h"
20#include "llvm/ADT/SetVector.h"
21#include "llvm/ADT/SmallPtrSet.h"
22#include "llvm/ADT/SmallVector.h"
23#include "llvm/ADT/Statistic.h"
24#include "llvm/ADT/StringRef.h"
25#include "llvm/Analysis/AssumptionCache.h"
26#include "llvm/Analysis/CaptureTracking.h"
27#include "llvm/Analysis/ConstantFolding.h"
28#include "llvm/Analysis/DomTreeUpdater.h"
29#include "llvm/Analysis/GuardUtils.h"
30#include "llvm/Analysis/InstructionSimplify.h"
31#include "llvm/Analysis/MemorySSA.h"
32#include "llvm/Analysis/MemorySSAUpdater.h"
33#include "llvm/Analysis/TargetTransformInfo.h"
34#include "llvm/Analysis/ValueTracking.h"
35#include "llvm/IR/Attributes.h"
36#include "llvm/IR/BasicBlock.h"
37#include "llvm/IR/CFG.h"
38#include "llvm/IR/Constant.h"
39#include "llvm/IR/ConstantRange.h"
40#include "llvm/IR/Constants.h"
41#include "llvm/IR/DataLayout.h"
42#include "llvm/IR/DebugInfo.h"
43#include "llvm/IR/DerivedTypes.h"
44#include "llvm/IR/Function.h"
45#include "llvm/IR/GlobalValue.h"
46#include "llvm/IR/GlobalVariable.h"
47#include "llvm/IR/IRBuilder.h"
48#include "llvm/IR/InstrTypes.h"
49#include "llvm/IR/Instruction.h"
50#include "llvm/IR/Instructions.h"
51#include "llvm/IR/IntrinsicInst.h"
52#include "llvm/IR/LLVMContext.h"
53#include "llvm/IR/MDBuilder.h"
54#include "llvm/IR/Metadata.h"
55#include "llvm/IR/Module.h"
56#include "llvm/IR/NoFolder.h"
57#include "llvm/IR/Operator.h"
58#include "llvm/IR/PatternMatch.h"
59#include "llvm/IR/ProfDataUtils.h"
60#include "llvm/IR/Type.h"
61#include "llvm/IR/Use.h"
62#include "llvm/IR/User.h"
63#include "llvm/IR/Value.h"
64#include "llvm/IR/ValueHandle.h"
65#include "llvm/Support/BranchProbability.h"
66#include "llvm/Support/Casting.h"
67#include "llvm/Support/CommandLine.h"
68#include "llvm/Support/Debug.h"
69#include "llvm/Support/ErrorHandling.h"
70#include "llvm/Support/KnownBits.h"
71#include "llvm/Support/MathExtras.h"
72#include "llvm/Support/raw_ostream.h"
73#include "llvm/Transforms/Utils/BasicBlockUtils.h"
74#include "llvm/Transforms/Utils/Local.h"
75#include "llvm/Transforms/Utils/ValueMapper.h"
76#include <algorithm>
77#include <cassert>
78#include <climits>
79#include <cstddef>
80#include <cstdint>
81#include <iterator>
82#include <map>
83#include <optional>
84#include <set>
85#include <tuple>
86#include <utility>
87#include <vector>
88
89using namespace llvm;
90using namespace PatternMatch;
91
92#define DEBUG_TYPE "simplifycfg"
93
94cl::opt<bool> llvm::RequireAndPreserveDomTree(
95    "simplifycfg-require-and-preserve-domtree", cl::Hidden,
96
97    cl::desc("Temorary development switch used to gradually uplift SimplifyCFG "
98             "into preserving DomTree,"));
99
100// Chosen as 2 so as to be cheap, but still to have enough power to fold
101// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
102// To catch this, we need to fold a compare and a select, hence '2' being the
103// minimum reasonable default.
104static cl::opt<unsigned> PHINodeFoldingThreshold(
105    "phi-node-folding-threshold", cl::Hidden, cl::init(2),
106    cl::desc(
107        "Control the amount of phi node folding to perform (default = 2)"));
108
109static cl::opt<unsigned> TwoEntryPHINodeFoldingThreshold(
110    "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4),
111    cl::desc("Control the maximal total instruction cost that we are willing "
112             "to speculatively execute to fold a 2-entry PHI node into a "
113             "select (default = 4)"));
114
115static cl::opt<bool>
116    HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
117                cl::desc("Hoist common instructions up to the parent block"));
118
119static cl::opt<unsigned>
120    HoistCommonSkipLimit("simplifycfg-hoist-common-skip-limit", cl::Hidden,
121                         cl::init(20),
122                         cl::desc("Allow reordering across at most this many "
123                                  "instructions when hoisting"));
124
125static cl::opt<bool>
126    SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
127               cl::desc("Sink common instructions down to the end block"));
128
129static cl::opt<bool> HoistCondStores(
130    "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
131    cl::desc("Hoist conditional stores if an unconditional store precedes"));
132
133static cl::opt<bool> MergeCondStores(
134    "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
135    cl::desc("Hoist conditional stores even if an unconditional store does not "
136             "precede - hoist multiple conditional stores into a single "
137             "predicated store"));
138
139static cl::opt<bool> MergeCondStoresAggressively(
140    "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
141    cl::desc("When merging conditional stores, do so even if the resultant "
142             "basic blocks are unlikely to be if-converted as a result"));
143
144static cl::opt<bool> SpeculateOneExpensiveInst(
145    "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
146    cl::desc("Allow exactly one expensive instruction to be speculatively "
147             "executed"));
148
149static cl::opt<unsigned> MaxSpeculationDepth(
150    "max-speculation-depth", cl::Hidden, cl::init(10),
151    cl::desc("Limit maximum recursion depth when calculating costs of "
152             "speculatively executed instructions"));
153
154static cl::opt<int>
155    MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
156                      cl::init(10),
157                      cl::desc("Max size of a block which is still considered "
158                               "small enough to thread through"));
159
160// Two is chosen to allow one negation and a logical combine.
161static cl::opt<unsigned>
162    BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
163                        cl::init(2),
164                        cl::desc("Maximum cost of combining conditions when "
165                                 "folding branches"));
166
167static cl::opt<unsigned> BranchFoldToCommonDestVectorMultiplier(
168    "simplifycfg-branch-fold-common-dest-vector-multiplier", cl::Hidden,
169    cl::init(2),
170    cl::desc("Multiplier to apply to threshold when determining whether or not "
171             "to fold branch to common destination when vector operations are "
172             "present"));
173
174static cl::opt<bool> EnableMergeCompatibleInvokes(
175    "simplifycfg-merge-compatible-invokes", cl::Hidden, cl::init(true),
176    cl::desc("Allow SimplifyCFG to merge invokes together when appropriate"));
177
178static cl::opt<unsigned> MaxSwitchCasesPerResult(
179    "max-switch-cases-per-result", cl::Hidden, cl::init(16),
180    cl::desc("Limit cases to analyze when converting a switch to select"));
181
182STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps");
183STATISTIC(NumLinearMaps,
184          "Number of switch instructions turned into linear mapping");
185STATISTIC(NumLookupTables,
186          "Number of switch instructions turned into lookup tables");
187STATISTIC(
188    NumLookupTablesHoles,
189    "Number of switch instructions turned into lookup tables (holes checked)");
190STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
191STATISTIC(NumFoldValueComparisonIntoPredecessors,
192          "Number of value comparisons folded into predecessor basic blocks");
193STATISTIC(NumFoldBranchToCommonDest,
194          "Number of branches folded into predecessor basic block");
195STATISTIC(
196    NumHoistCommonCode,
197    "Number of common instruction 'blocks' hoisted up to the begin block");
198STATISTIC(NumHoistCommonInstrs,
199          "Number of common instructions hoisted up to the begin block");
200STATISTIC(NumSinkCommonCode,
201          "Number of common instruction 'blocks' sunk down to the end block");
202STATISTIC(NumSinkCommonInstrs,
203          "Number of common instructions sunk down to the end block");
204STATISTIC(NumSpeculations, "Number of speculative executed instructions");
205STATISTIC(NumInvokes,
206          "Number of invokes with empty resume blocks simplified into calls");
207STATISTIC(NumInvokesMerged, "Number of invokes that were merged together");
208STATISTIC(NumInvokeSetsFormed, "Number of invoke sets that were formed");
209
210namespace {
211
212// The first field contains the value that the switch produces when a certain
213// case group is selected, and the second field is a vector containing the
214// cases composing the case group.
215using SwitchCaseResultVectorTy =
216    SmallVector<std::pair<Constant *, SmallVector<ConstantInt *, 4>>, 2>;
217
218// The first field contains the phi node that generates a result of the switch
219// and the second field contains the value generated for a certain case in the
220// switch for that PHI.
221using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
222
223/// ValueEqualityComparisonCase - Represents a case of a switch.
224struct ValueEqualityComparisonCase {
225  ConstantInt *Value;
226  BasicBlock *Dest;
227
228  ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
229      : Value(Value), Dest(Dest) {}
230
231  bool operator<(ValueEqualityComparisonCase RHS) const {
232    // Comparing pointers is ok as we only rely on the order for uniquing.
233    return Value < RHS.Value;
234  }
235
236  bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
237};
238
239class SimplifyCFGOpt {
240  const TargetTransformInfo &TTI;
241  DomTreeUpdater *DTU;
242  const DataLayout &DL;
243  ArrayRef<WeakVH> LoopHeaders;
244  const SimplifyCFGOptions &Options;
245  bool Resimplify;
246
247  Value *isValueEqualityComparison(Instruction *TI);
248  BasicBlock *GetValueEqualityComparisonCases(
249      Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
250  bool SimplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
251                                                     BasicBlock *Pred,
252                                                     IRBuilder<> &Builder);
253  bool PerformValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
254                                                    Instruction *PTI,
255                                                    IRBuilder<> &Builder);
256  bool FoldValueComparisonIntoPredecessors(Instruction *TI,
257                                           IRBuilder<> &Builder);
258
259  bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
260  bool simplifySingleResume(ResumeInst *RI);
261  bool simplifyCommonResume(ResumeInst *RI);
262  bool simplifyCleanupReturn(CleanupReturnInst *RI);
263  bool simplifyUnreachable(UnreachableInst *UI);
264  bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
265  bool simplifyIndirectBr(IndirectBrInst *IBI);
266  bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
267  bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
268  bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
269
270  bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
271                                             IRBuilder<> &Builder);
272
273  bool hoistCommonCodeFromSuccessors(BasicBlock *BB, bool EqTermsOnly);
274  bool hoistSuccIdenticalTerminatorToSwitchOrIf(
275      Instruction *TI, Instruction *I1,
276      SmallVectorImpl<Instruction *> &OtherSuccTIs);
277  bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB);
278  bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
279                                  BasicBlock *TrueBB, BasicBlock *FalseBB,
280                                  uint32_t TrueWeight, uint32_t FalseWeight);
281  bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
282                                 const DataLayout &DL);
283  bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
284  bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
285  bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
286
287public:
288  SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
289                 const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
290                 const SimplifyCFGOptions &Opts)
291      : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
292    assert((!DTU || !DTU->hasPostDomTree()) &&
293           "SimplifyCFG is not yet capable of maintaining validity of a "
294           "PostDomTree, so don't ask for it.");
295  }
296
297  bool simplifyOnce(BasicBlock *BB);
298  bool run(BasicBlock *BB);
299
300  // Helper to set Resimplify and return change indication.
301  bool requestResimplify() {
302    Resimplify = true;
303    return true;
304  }
305};
306
307} // end anonymous namespace
308
309/// Return true if all the PHI nodes in the basic block \p BB
310/// receive compatible (identical) incoming values when coming from
311/// all of the predecessor blocks that are specified in \p IncomingBlocks.
312///
313/// Note that if the values aren't exactly identical, but \p EquivalenceSet
314/// is provided, and *both* of the values are present in the set,
315/// then they are considered equal.
316static bool IncomingValuesAreCompatible(
317    BasicBlock *BB, ArrayRef<BasicBlock *> IncomingBlocks,
318    SmallPtrSetImpl<Value *> *EquivalenceSet = nullptr) {
319  assert(IncomingBlocks.size() == 2 &&
320         "Only for a pair of incoming blocks at the time!");
321
322  // FIXME: it is okay if one of the incoming values is an `undef` value,
323  //        iff the other incoming value is guaranteed to be a non-poison value.
324  // FIXME: it is okay if one of the incoming values is a `poison` value.
325  return all_of(BB->phis(), [IncomingBlocks, EquivalenceSet](PHINode &PN) {
326    Value *IV0 = PN.getIncomingValueForBlock(IncomingBlocks[0]);
327    Value *IV1 = PN.getIncomingValueForBlock(IncomingBlocks[1]);
328    if (IV0 == IV1)
329      return true;
330    if (EquivalenceSet && EquivalenceSet->contains(IV0) &&
331        EquivalenceSet->contains(IV1))
332      return true;
333    return false;
334  });
335}
336
337/// Return true if it is safe to merge these two
338/// terminator instructions together.
339static bool
340SafeToMergeTerminators(Instruction *SI1, Instruction *SI2,
341                       SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
342  if (SI1 == SI2)
343    return false; // Can't merge with self!
344
345  // It is not safe to merge these two switch instructions if they have a common
346  // successor, and if that successor has a PHI node, and if *that* PHI node has
347  // conflicting incoming values from the two switch blocks.
348  BasicBlock *SI1BB = SI1->getParent();
349  BasicBlock *SI2BB = SI2->getParent();
350
351  SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
352  bool Fail = false;
353  for (BasicBlock *Succ : successors(SI2BB)) {
354    if (!SI1Succs.count(Succ))
355      continue;
356    if (IncomingValuesAreCompatible(Succ, {SI1BB, SI2BB}))
357      continue;
358    Fail = true;
359    if (FailBlocks)
360      FailBlocks->insert(Succ);
361    else
362      break;
363  }
364
365  return !Fail;
366}
367
368/// Update PHI nodes in Succ to indicate that there will now be entries in it
369/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
370/// will be the same as those coming in from ExistPred, an existing predecessor
371/// of Succ.
372static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
373                                  BasicBlock *ExistPred,
374                                  MemorySSAUpdater *MSSAU = nullptr) {
375  for (PHINode &PN : Succ->phis())
376    PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
377  if (MSSAU)
378    if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
379      MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
380}
381
382/// Compute an abstract "cost" of speculating the given instruction,
383/// which is assumed to be safe to speculate. TCC_Free means cheap,
384/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
385/// expensive.
386static InstructionCost computeSpeculationCost(const User *I,
387                                              const TargetTransformInfo &TTI) {
388  assert((!isa<Instruction>(I) ||
389          isSafeToSpeculativelyExecute(cast<Instruction>(I))) &&
390         "Instruction is not safe to speculatively execute!");
391  return TTI.getInstructionCost(I, TargetTransformInfo::TCK_SizeAndLatency);
392}
393
394/// If we have a merge point of an "if condition" as accepted above,
395/// return true if the specified value dominates the block.  We
396/// don't handle the true generality of domination here, just a special case
397/// which works well enough for us.
398///
399/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
400/// see if V (which must be an instruction) and its recursive operands
401/// that do not dominate BB have a combined cost lower than Budget and
402/// are non-trapping.  If both are true, the instruction is inserted into the
403/// set and true is returned.
404///
405/// The cost for most non-trapping instructions is defined as 1 except for
406/// Select whose cost is 2.
407///
408/// After this function returns, Cost is increased by the cost of
409/// V plus its non-dominating operands.  If that cost is greater than
410/// Budget, false is returned and Cost is undefined.
411static bool dominatesMergePoint(Value *V, BasicBlock *BB,
412                                SmallPtrSetImpl<Instruction *> &AggressiveInsts,
413                                InstructionCost &Cost,
414                                InstructionCost Budget,
415                                const TargetTransformInfo &TTI,
416                                unsigned Depth = 0) {
417  // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
418  // so limit the recursion depth.
419  // TODO: While this recursion limit does prevent pathological behavior, it
420  // would be better to track visited instructions to avoid cycles.
421  if (Depth == MaxSpeculationDepth)
422    return false;
423
424  Instruction *I = dyn_cast<Instruction>(V);
425  if (!I) {
426    // Non-instructions dominate all instructions and can be executed
427    // unconditionally.
428    return true;
429  }
430  BasicBlock *PBB = I->getParent();
431
432  // We don't want to allow weird loops that might have the "if condition" in
433  // the bottom of this block.
434  if (PBB == BB)
435    return false;
436
437  // If this instruction is defined in a block that contains an unconditional
438  // branch to BB, then it must be in the 'conditional' part of the "if
439  // statement".  If not, it definitely dominates the region.
440  BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator());
441  if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB)
442    return true;
443
444  // If we have seen this instruction before, don't count it again.
445  if (AggressiveInsts.count(I))
446    return true;
447
448  // Okay, it looks like the instruction IS in the "condition".  Check to
449  // see if it's a cheap instruction to unconditionally compute, and if it
450  // only uses stuff defined outside of the condition.  If so, hoist it out.
451  if (!isSafeToSpeculativelyExecute(I))
452    return false;
453
454  Cost += computeSpeculationCost(I, TTI);
455
456  // Allow exactly one instruction to be speculated regardless of its cost
457  // (as long as it is safe to do so).
458  // This is intended to flatten the CFG even if the instruction is a division
459  // or other expensive operation. The speculation of an expensive instruction
460  // is expected to be undone in CodeGenPrepare if the speculation has not
461  // enabled further IR optimizations.
462  if (Cost > Budget &&
463      (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
464       !Cost.isValid()))
465    return false;
466
467  // Okay, we can only really hoist these out if their operands do
468  // not take us over the cost threshold.
469  for (Use &Op : I->operands())
470    if (!dominatesMergePoint(Op, BB, AggressiveInsts, Cost, Budget, TTI,
471                             Depth + 1))
472      return false;
473  // Okay, it's safe to do this!  Remember this instruction.
474  AggressiveInsts.insert(I);
475  return true;
476}
477
478/// Extract ConstantInt from value, looking through IntToPtr
479/// and PointerNullValue. Return NULL if value is not a constant int.
480static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) {
481  // Normal constant int.
482  ConstantInt *CI = dyn_cast<ConstantInt>(V);
483  if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy() ||
484      DL.isNonIntegralPointerType(V->getType()))
485    return CI;
486
487  // This is some kind of pointer constant. Turn it into a pointer-sized
488  // ConstantInt if possible.
489  IntegerType *PtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
490
491  // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
492  if (isa<ConstantPointerNull>(V))
493    return ConstantInt::get(PtrTy, 0);
494
495  // IntToPtr const int.
496  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
497    if (CE->getOpcode() == Instruction::IntToPtr)
498      if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
499        // The constant is very likely to have the right type already.
500        if (CI->getType() == PtrTy)
501          return CI;
502        else
503          return cast<ConstantInt>(
504              ConstantFoldIntegerCast(CI, PtrTy, /*isSigned=*/false, DL));
505      }
506  return nullptr;
507}
508
509namespace {
510
511/// Given a chain of or (||) or and (&&) comparison of a value against a
512/// constant, this will try to recover the information required for a switch
513/// structure.
514/// It will depth-first traverse the chain of comparison, seeking for patterns
515/// like %a == 12 or %a < 4 and combine them to produce a set of integer
516/// representing the different cases for the switch.
517/// Note that if the chain is composed of '||' it will build the set of elements
518/// that matches the comparisons (i.e. any of this value validate the chain)
519/// while for a chain of '&&' it will build the set elements that make the test
520/// fail.
521struct ConstantComparesGatherer {
522  const DataLayout &DL;
523
524  /// Value found for the switch comparison
525  Value *CompValue = nullptr;
526
527  /// Extra clause to be checked before the switch
528  Value *Extra = nullptr;
529
530  /// Set of integers to match in switch
531  SmallVector<ConstantInt *, 8> Vals;
532
533  /// Number of comparisons matched in the and/or chain
534  unsigned UsedICmps = 0;
535
536  /// Construct and compute the result for the comparison instruction Cond
537  ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
538    gather(Cond);
539  }
540
541  ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
542  ConstantComparesGatherer &
543  operator=(const ConstantComparesGatherer &) = delete;
544
545private:
546  /// Try to set the current value used for the comparison, it succeeds only if
547  /// it wasn't set before or if the new value is the same as the old one
548  bool setValueOnce(Value *NewVal) {
549    if (CompValue && CompValue != NewVal)
550      return false;
551    CompValue = NewVal;
552    return (CompValue != nullptr);
553  }
554
555  /// Try to match Instruction "I" as a comparison against a constant and
556  /// populates the array Vals with the set of values that match (or do not
557  /// match depending on isEQ).
558  /// Return false on failure. On success, the Value the comparison matched
559  /// against is placed in CompValue.
560  /// If CompValue is already set, the function is expected to fail if a match
561  /// is found but the value compared to is different.
562  bool matchInstruction(Instruction *I, bool isEQ) {
563    // If this is an icmp against a constant, handle this as one of the cases.
564    ICmpInst *ICI;
565    ConstantInt *C;
566    if (!((ICI = dyn_cast<ICmpInst>(I)) &&
567          (C = GetConstantInt(I->getOperand(1), DL)))) {
568      return false;
569    }
570
571    Value *RHSVal;
572    const APInt *RHSC;
573
574    // Pattern match a special case
575    // (x & ~2^z) == y --> x == y || x == y|2^z
576    // This undoes a transformation done by instcombine to fuse 2 compares.
577    if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
578      // It's a little bit hard to see why the following transformations are
579      // correct. Here is a CVC3 program to verify them for 64-bit values:
580
581      /*
582         ONE  : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
583         x    : BITVECTOR(64);
584         y    : BITVECTOR(64);
585         z    : BITVECTOR(64);
586         mask : BITVECTOR(64) = BVSHL(ONE, z);
587         QUERY( (y & ~mask = y) =>
588                ((x & ~mask = y) <=> (x = y OR x = (y |  mask)))
589         );
590         QUERY( (y |  mask = y) =>
591                ((x |  mask = y) <=> (x = y OR x = (y & ~mask)))
592         );
593      */
594
595      // Please note that each pattern must be a dual implication (<--> or
596      // iff). One directional implication can create spurious matches. If the
597      // implication is only one-way, an unsatisfiable condition on the left
598      // side can imply a satisfiable condition on the right side. Dual
599      // implication ensures that satisfiable conditions are transformed to
600      // other satisfiable conditions and unsatisfiable conditions are
601      // transformed to other unsatisfiable conditions.
602
603      // Here is a concrete example of a unsatisfiable condition on the left
604      // implying a satisfiable condition on the right:
605      //
606      // mask = (1 << z)
607      // (x & ~mask) == y  --> (x == y || x == (y | mask))
608      //
609      // Substituting y = 3, z = 0 yields:
610      // (x & -2) == 3 --> (x == 3 || x == 2)
611
612      // Pattern match a special case:
613      /*
614        QUERY( (y & ~mask = y) =>
615               ((x & ~mask = y) <=> (x = y OR x = (y |  mask)))
616        );
617      */
618      if (match(ICI->getOperand(0),
619                m_And(m_Value(RHSVal), m_APInt(RHSC)))) {
620        APInt Mask = ~*RHSC;
621        if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
622          // If we already have a value for the switch, it has to match!
623          if (!setValueOnce(RHSVal))
624            return false;
625
626          Vals.push_back(C);
627          Vals.push_back(
628              ConstantInt::get(C->getContext(),
629                               C->getValue() | Mask));
630          UsedICmps++;
631          return true;
632        }
633      }
634
635      // Pattern match a special case:
636      /*
637        QUERY( (y |  mask = y) =>
638               ((x |  mask = y) <=> (x = y OR x = (y & ~mask)))
639        );
640      */
641      if (match(ICI->getOperand(0),
642                m_Or(m_Value(RHSVal), m_APInt(RHSC)))) {
643        APInt Mask = *RHSC;
644        if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
645          // If we already have a value for the switch, it has to match!
646          if (!setValueOnce(RHSVal))
647            return false;
648
649          Vals.push_back(C);
650          Vals.push_back(ConstantInt::get(C->getContext(),
651                                          C->getValue() & ~Mask));
652          UsedICmps++;
653          return true;
654        }
655      }
656
657      // If we already have a value for the switch, it has to match!
658      if (!setValueOnce(ICI->getOperand(0)))
659        return false;
660
661      UsedICmps++;
662      Vals.push_back(C);
663      return ICI->getOperand(0);
664    }
665
666    // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
667    ConstantRange Span =
668        ConstantRange::makeExactICmpRegion(ICI->getPredicate(), C->getValue());
669
670    // Shift the range if the compare is fed by an add. This is the range
671    // compare idiom as emitted by instcombine.
672    Value *CandidateVal = I->getOperand(0);
673    if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) {
674      Span = Span.subtract(*RHSC);
675      CandidateVal = RHSVal;
676    }
677
678    // If this is an and/!= check, then we are looking to build the set of
679    // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
680    // x != 0 && x != 1.
681    if (!isEQ)
682      Span = Span.inverse();
683
684    // If there are a ton of values, we don't want to make a ginormous switch.
685    if (Span.isSizeLargerThan(8) || Span.isEmptySet()) {
686      return false;
687    }
688
689    // If we already have a value for the switch, it has to match!
690    if (!setValueOnce(CandidateVal))
691      return false;
692
693    // Add all values from the range to the set
694    for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
695      Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
696
697    UsedICmps++;
698    return true;
699  }
700
701  /// Given a potentially 'or'd or 'and'd together collection of icmp
702  /// eq/ne/lt/gt instructions that compare a value against a constant, extract
703  /// the value being compared, and stick the list constants into the Vals
704  /// vector.
705  /// One "Extra" case is allowed to differ from the other.
706  void gather(Value *V) {
707    bool isEQ = match(V, m_LogicalOr(m_Value(), m_Value()));
708
709    // Keep a stack (SmallVector for efficiency) for depth-first traversal
710    SmallVector<Value *, 8> DFT;
711    SmallPtrSet<Value *, 8> Visited;
712
713    // Initialize
714    Visited.insert(V);
715    DFT.push_back(V);
716
717    while (!DFT.empty()) {
718      V = DFT.pop_back_val();
719
720      if (Instruction *I = dyn_cast<Instruction>(V)) {
721        // If it is a || (or && depending on isEQ), process the operands.
722        Value *Op0, *Op1;
723        if (isEQ ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1)))
724                 : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
725          if (Visited.insert(Op1).second)
726            DFT.push_back(Op1);
727          if (Visited.insert(Op0).second)
728            DFT.push_back(Op0);
729
730          continue;
731        }
732
733        // Try to match the current instruction
734        if (matchInstruction(I, isEQ))
735          // Match succeed, continue the loop
736          continue;
737      }
738
739      // One element of the sequence of || (or &&) could not be match as a
740      // comparison against the same value as the others.
741      // We allow only one "Extra" case to be checked before the switch
742      if (!Extra) {
743        Extra = V;
744        continue;
745      }
746      // Failed to parse a proper sequence, abort now
747      CompValue = nullptr;
748      break;
749    }
750  }
751};
752
753} // end anonymous namespace
754
755static void EraseTerminatorAndDCECond(Instruction *TI,
756                                      MemorySSAUpdater *MSSAU = nullptr) {
757  Instruction *Cond = nullptr;
758  if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
759    Cond = dyn_cast<Instruction>(SI->getCondition());
760  } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
761    if (BI->isConditional())
762      Cond = dyn_cast<Instruction>(BI->getCondition());
763  } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
764    Cond = dyn_cast<Instruction>(IBI->getAddress());
765  }
766
767  TI->eraseFromParent();
768  if (Cond)
769    RecursivelyDeleteTriviallyDeadInstructions(Cond, nullptr, MSSAU);
770}
771
772/// Return true if the specified terminator checks
773/// to see if a value is equal to constant integer value.
774Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
775  Value *CV = nullptr;
776  if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
777    // Do not permit merging of large switch instructions into their
778    // predecessors unless there is only one predecessor.
779    if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
780      CV = SI->getCondition();
781  } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
782    if (BI->isConditional() && BI->getCondition()->hasOneUse())
783      if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
784        if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), DL))
785          CV = ICI->getOperand(0);
786      }
787
788  // Unwrap any lossless ptrtoint cast.
789  if (CV) {
790    if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
791      Value *Ptr = PTII->getPointerOperand();
792      if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
793        CV = Ptr;
794    }
795  }
796  return CV;
797}
798
799/// Given a value comparison instruction,
800/// decode all of the 'cases' that it represents and return the 'default' block.
801BasicBlock *SimplifyCFGOpt::GetValueEqualityComparisonCases(
802    Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
803  if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
804    Cases.reserve(SI->getNumCases());
805    for (auto Case : SI->cases())
806      Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
807                                                  Case.getCaseSuccessor()));
808    return SI->getDefaultDest();
809  }
810
811  BranchInst *BI = cast<BranchInst>(TI);
812  ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
813  BasicBlock *Succ = BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_NE);
814  Cases.push_back(ValueEqualityComparisonCase(
815      GetConstantInt(ICI->getOperand(1), DL), Succ));
816  return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ);
817}
818
819/// Given a vector of bb/value pairs, remove any entries
820/// in the list that match the specified block.
821static void
822EliminateBlockCases(BasicBlock *BB,
823                    std::vector<ValueEqualityComparisonCase> &Cases) {
824  llvm::erase(Cases, BB);
825}
826
827/// Return true if there are any keys in C1 that exist in C2 as well.
828static bool ValuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
829                          std::vector<ValueEqualityComparisonCase> &C2) {
830  std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
831
832  // Make V1 be smaller than V2.
833  if (V1->size() > V2->size())
834    std::swap(V1, V2);
835
836  if (V1->empty())
837    return false;
838  if (V1->size() == 1) {
839    // Just scan V2.
840    ConstantInt *TheVal = (*V1)[0].Value;
841    for (const ValueEqualityComparisonCase &VECC : *V2)
842      if (TheVal == VECC.Value)
843        return true;
844  }
845
846  // Otherwise, just sort both lists and compare element by element.
847  array_pod_sort(V1->begin(), V1->end());
848  array_pod_sort(V2->begin(), V2->end());
849  unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
850  while (i1 != e1 && i2 != e2) {
851    if ((*V1)[i1].Value == (*V2)[i2].Value)
852      return true;
853    if ((*V1)[i1].Value < (*V2)[i2].Value)
854      ++i1;
855    else
856      ++i2;
857  }
858  return false;
859}
860
861// Set branch weights on SwitchInst. This sets the metadata if there is at
862// least one non-zero weight.
863static void setBranchWeights(SwitchInst *SI, ArrayRef<uint32_t> Weights) {
864  // Check that there is at least one non-zero weight. Otherwise, pass
865  // nullptr to setMetadata which will erase the existing metadata.
866  MDNode *N = nullptr;
867  if (llvm::any_of(Weights, [](uint32_t W) { return W != 0; }))
868    N = MDBuilder(SI->getParent()->getContext()).createBranchWeights(Weights);
869  SI->setMetadata(LLVMContext::MD_prof, N);
870}
871
872// Similar to the above, but for branch and select instructions that take
873// exactly 2 weights.
874static void setBranchWeights(Instruction *I, uint32_t TrueWeight,
875                             uint32_t FalseWeight) {
876  assert(isa<BranchInst>(I) || isa<SelectInst>(I));
877  // Check that there is at least one non-zero weight. Otherwise, pass
878  // nullptr to setMetadata which will erase the existing metadata.
879  MDNode *N = nullptr;
880  if (TrueWeight || FalseWeight)
881    N = MDBuilder(I->getParent()->getContext())
882            .createBranchWeights(TrueWeight, FalseWeight);
883  I->setMetadata(LLVMContext::MD_prof, N);
884}
885
886/// If TI is known to be a terminator instruction and its block is known to
887/// only have a single predecessor block, check to see if that predecessor is
888/// also a value comparison with the same value, and if that comparison
889/// determines the outcome of this comparison. If so, simplify TI. This does a
890/// very limited form of jump threading.
891bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
892    Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
893  Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
894  if (!PredVal)
895    return false; // Not a value comparison in predecessor.
896
897  Value *ThisVal = isValueEqualityComparison(TI);
898  assert(ThisVal && "This isn't a value comparison!!");
899  if (ThisVal != PredVal)
900    return false; // Different predicates.
901
902  // TODO: Preserve branch weight metadata, similarly to how
903  // FoldValueComparisonIntoPredecessors preserves it.
904
905  // Find out information about when control will move from Pred to TI's block.
906  std::vector<ValueEqualityComparisonCase> PredCases;
907  BasicBlock *PredDef =
908      GetValueEqualityComparisonCases(Pred->getTerminator(), PredCases);
909  EliminateBlockCases(PredDef, PredCases); // Remove default from cases.
910
911  // Find information about how control leaves this block.
912  std::vector<ValueEqualityComparisonCase> ThisCases;
913  BasicBlock *ThisDef = GetValueEqualityComparisonCases(TI, ThisCases);
914  EliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
915
916  // If TI's block is the default block from Pred's comparison, potentially
917  // simplify TI based on this knowledge.
918  if (PredDef == TI->getParent()) {
919    // If we are here, we know that the value is none of those cases listed in
920    // PredCases.  If there are any cases in ThisCases that are in PredCases, we
921    // can simplify TI.
922    if (!ValuesOverlap(PredCases, ThisCases))
923      return false;
924
925    if (isa<BranchInst>(TI)) {
926      // Okay, one of the successors of this condbr is dead.  Convert it to a
927      // uncond br.
928      assert(ThisCases.size() == 1 && "Branch can only have one case!");
929      // Insert the new branch.
930      Instruction *NI = Builder.CreateBr(ThisDef);
931      (void)NI;
932
933      // Remove PHI node entries for the dead edge.
934      ThisCases[0].Dest->removePredecessor(PredDef);
935
936      LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
937                        << "Through successor TI: " << *TI << "Leaving: " << *NI
938                        << "\n");
939
940      EraseTerminatorAndDCECond(TI);
941
942      if (DTU)
943        DTU->applyUpdates(
944            {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
945
946      return true;
947    }
948
949    SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
950    // Okay, TI has cases that are statically dead, prune them away.
951    SmallPtrSet<Constant *, 16> DeadCases;
952    for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
953      DeadCases.insert(PredCases[i].Value);
954
955    LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
956                      << "Through successor TI: " << *TI);
957
958    SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
959    for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
960      --i;
961      auto *Successor = i->getCaseSuccessor();
962      if (DTU)
963        ++NumPerSuccessorCases[Successor];
964      if (DeadCases.count(i->getCaseValue())) {
965        Successor->removePredecessor(PredDef);
966        SI.removeCase(i);
967        if (DTU)
968          --NumPerSuccessorCases[Successor];
969      }
970    }
971
972    if (DTU) {
973      std::vector<DominatorTree::UpdateType> Updates;
974      for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
975        if (I.second == 0)
976          Updates.push_back({DominatorTree::Delete, PredDef, I.first});
977      DTU->applyUpdates(Updates);
978    }
979
980    LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");
981    return true;
982  }
983
984  // Otherwise, TI's block must correspond to some matched value.  Find out
985  // which value (or set of values) this is.
986  ConstantInt *TIV = nullptr;
987  BasicBlock *TIBB = TI->getParent();
988  for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
989    if (PredCases[i].Dest == TIBB) {
990      if (TIV)
991        return false; // Cannot handle multiple values coming to this block.
992      TIV = PredCases[i].Value;
993    }
994  assert(TIV && "No edge from pred to succ?");
995
996  // Okay, we found the one constant that our value can be if we get into TI's
997  // BB.  Find out which successor will unconditionally be branched to.
998  BasicBlock *TheRealDest = nullptr;
999  for (unsigned i = 0, e = ThisCases.size(); i != e; ++i)
1000    if (ThisCases[i].Value == TIV) {
1001      TheRealDest = ThisCases[i].Dest;
1002      break;
1003    }
1004
1005  // If not handled by any explicit cases, it is handled by the default case.
1006  if (!TheRealDest)
1007    TheRealDest = ThisDef;
1008
1009  SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
1010
1011  // Remove PHI node entries for dead edges.
1012  BasicBlock *CheckEdge = TheRealDest;
1013  for (BasicBlock *Succ : successors(TIBB))
1014    if (Succ != CheckEdge) {
1015      if (Succ != TheRealDest)
1016        RemovedSuccs.insert(Succ);
1017      Succ->removePredecessor(TIBB);
1018    } else
1019      CheckEdge = nullptr;
1020
1021  // Insert the new branch.
1022  Instruction *NI = Builder.CreateBr(TheRealDest);
1023  (void)NI;
1024
1025  LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()
1026                    << "Through successor TI: " << *TI << "Leaving: " << *NI
1027                    << "\n");
1028
1029  EraseTerminatorAndDCECond(TI);
1030  if (DTU) {
1031    SmallVector<DominatorTree::UpdateType, 2> Updates;
1032    Updates.reserve(RemovedSuccs.size());
1033    for (auto *RemovedSucc : RemovedSuccs)
1034      Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc});
1035    DTU->applyUpdates(Updates);
1036  }
1037  return true;
1038}
1039
1040namespace {
1041
1042/// This class implements a stable ordering of constant
1043/// integers that does not depend on their address.  This is important for
1044/// applications that sort ConstantInt's to ensure uniqueness.
1045struct ConstantIntOrdering {
1046  bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
1047    return LHS->getValue().ult(RHS->getValue());
1048  }
1049};
1050
1051} // end anonymous namespace
1052
1053static int ConstantIntSortPredicate(ConstantInt *const *P1,
1054                                    ConstantInt *const *P2) {
1055  const ConstantInt *LHS = *P1;
1056  const ConstantInt *RHS = *P2;
1057  if (LHS == RHS)
1058    return 0;
1059  return LHS->getValue().ult(RHS->getValue()) ? 1 : -1;
1060}
1061
1062/// Get Weights of a given terminator, the default weight is at the front
1063/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1064/// metadata.
1065static void GetBranchWeights(Instruction *TI,
1066                             SmallVectorImpl<uint64_t> &Weights) {
1067  MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
1068  assert(MD);
1069  for (unsigned i = 1, e = MD->getNumOperands(); i < e; ++i) {
1070    ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(i));
1071    Weights.push_back(CI->getValue().getZExtValue());
1072  }
1073
1074  // If TI is a conditional eq, the default case is the false case,
1075  // and the corresponding branch-weight data is at index 2. We swap the
1076  // default weight to be the first entry.
1077  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1078    assert(Weights.size() == 2);
1079    ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
1080    if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1081      std::swap(Weights.front(), Weights.back());
1082  }
1083}
1084
1085/// Keep halving the weights until all can fit in uint32_t.
1086static void FitWeights(MutableArrayRef<uint64_t> Weights) {
1087  uint64_t Max = *std::max_element(Weights.begin(), Weights.end());
1088  if (Max > UINT_MAX) {
1089    unsigned Offset = 32 - llvm::countl_zero(Max);
1090    for (uint64_t &I : Weights)
1091      I >>= Offset;
1092  }
1093}
1094
1095static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(
1096    BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
1097  Instruction *PTI = PredBlock->getTerminator();
1098
1099  // If we have bonus instructions, clone them into the predecessor block.
1100  // Note that there may be multiple predecessor blocks, so we cannot move
1101  // bonus instructions to a predecessor block.
1102  for (Instruction &BonusInst : *BB) {
1103    if (BonusInst.isTerminator())
1104      continue;
1105
1106    Instruction *NewBonusInst = BonusInst.clone();
1107
1108    if (!isa<DbgInfoIntrinsic>(BonusInst) &&
1109        PTI->getDebugLoc() != NewBonusInst->getDebugLoc()) {
1110      // Unless the instruction has the same !dbg location as the original
1111      // branch, drop it. When we fold the bonus instructions we want to make
1112      // sure we reset their debug locations in order to avoid stepping on
1113      // dead code caused by folding dead branches.
1114      NewBonusInst->setDebugLoc(DebugLoc());
1115    }
1116
1117    RemapInstruction(NewBonusInst, VMap,
1118                     RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1119
1120    // If we speculated an instruction, we need to drop any metadata that may
1121    // result in undefined behavior, as the metadata might have been valid
1122    // only given the branch precondition.
1123    // Similarly strip attributes on call parameters that may cause UB in
1124    // location the call is moved to.
1125    NewBonusInst->dropUBImplyingAttrsAndMetadata();
1126
1127    NewBonusInst->insertInto(PredBlock, PTI->getIterator());
1128    auto Range = NewBonusInst->cloneDebugInfoFrom(&BonusInst);
1129    RemapDPValueRange(NewBonusInst->getModule(), Range, VMap,
1130                      RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1131
1132    if (isa<DbgInfoIntrinsic>(BonusInst))
1133      continue;
1134
1135    NewBonusInst->takeName(&BonusInst);
1136    BonusInst.setName(NewBonusInst->getName() + ".old");
1137    VMap[&BonusInst] = NewBonusInst;
1138
1139    // Update (liveout) uses of bonus instructions,
1140    // now that the bonus instruction has been cloned into predecessor.
1141    // Note that we expect to be in a block-closed SSA form for this to work!
1142    for (Use &U : make_early_inc_range(BonusInst.uses())) {
1143      auto *UI = cast<Instruction>(U.getUser());
1144      auto *PN = dyn_cast<PHINode>(UI);
1145      if (!PN) {
1146        assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&
1147               "If the user is not a PHI node, then it should be in the same "
1148               "block as, and come after, the original bonus instruction.");
1149        continue; // Keep using the original bonus instruction.
1150      }
1151      // Is this the block-closed SSA form PHI node?
1152      if (PN->getIncomingBlock(U) == BB)
1153        continue; // Great, keep using the original bonus instruction.
1154      // The only other alternative is an "use" when coming from
1155      // the predecessor block - here we should refer to the cloned bonus instr.
1156      assert(PN->getIncomingBlock(U) == PredBlock &&
1157             "Not in block-closed SSA form?");
1158      U.set(NewBonusInst);
1159    }
1160  }
1161}
1162
1163bool SimplifyCFGOpt::PerformValueComparisonIntoPredecessorFolding(
1164    Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
1165  BasicBlock *BB = TI->getParent();
1166  BasicBlock *Pred = PTI->getParent();
1167
1168  SmallVector<DominatorTree::UpdateType, 32> Updates;
1169
1170  // Figure out which 'cases' to copy from SI to PSI.
1171  std::vector<ValueEqualityComparisonCase> BBCases;
1172  BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases);
1173
1174  std::vector<ValueEqualityComparisonCase> PredCases;
1175  BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases);
1176
1177  // Based on whether the default edge from PTI goes to BB or not, fill in
1178  // PredCases and PredDefault with the new switch cases we would like to
1179  // build.
1180  SmallMapVector<BasicBlock *, int, 8> NewSuccessors;
1181
1182  // Update the branch weight metadata along the way
1183  SmallVector<uint64_t, 8> Weights;
1184  bool PredHasWeights = hasBranchWeightMD(*PTI);
1185  bool SuccHasWeights = hasBranchWeightMD(*TI);
1186
1187  if (PredHasWeights) {
1188    GetBranchWeights(PTI, Weights);
1189    // branch-weight metadata is inconsistent here.
1190    if (Weights.size() != 1 + PredCases.size())
1191      PredHasWeights = SuccHasWeights = false;
1192  } else if (SuccHasWeights)
1193    // If there are no predecessor weights but there are successor weights,
1194    // populate Weights with 1, which will later be scaled to the sum of
1195    // successor's weights
1196    Weights.assign(1 + PredCases.size(), 1);
1197
1198  SmallVector<uint64_t, 8> SuccWeights;
1199  if (SuccHasWeights) {
1200    GetBranchWeights(TI, SuccWeights);
1201    // branch-weight metadata is inconsistent here.
1202    if (SuccWeights.size() != 1 + BBCases.size())
1203      PredHasWeights = SuccHasWeights = false;
1204  } else if (PredHasWeights)
1205    SuccWeights.assign(1 + BBCases.size(), 1);
1206
1207  if (PredDefault == BB) {
1208    // If this is the default destination from PTI, only the edges in TI
1209    // that don't occur in PTI, or that branch to BB will be activated.
1210    std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1211    for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1212      if (PredCases[i].Dest != BB)
1213        PTIHandled.insert(PredCases[i].Value);
1214      else {
1215        // The default destination is BB, we don't need explicit targets.
1216        std::swap(PredCases[i], PredCases.back());
1217
1218        if (PredHasWeights || SuccHasWeights) {
1219          // Increase weight for the default case.
1220          Weights[0] += Weights[i + 1];
1221          std::swap(Weights[i + 1], Weights.back());
1222          Weights.pop_back();
1223        }
1224
1225        PredCases.pop_back();
1226        --i;
1227        --e;
1228      }
1229
1230    // Reconstruct the new switch statement we will be building.
1231    if (PredDefault != BBDefault) {
1232      PredDefault->removePredecessor(Pred);
1233      if (DTU && PredDefault != BB)
1234        Updates.push_back({DominatorTree::Delete, Pred, PredDefault});
1235      PredDefault = BBDefault;
1236      ++NewSuccessors[BBDefault];
1237    }
1238
1239    unsigned CasesFromPred = Weights.size();
1240    uint64_t ValidTotalSuccWeight = 0;
1241    for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1242      if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
1243        PredCases.push_back(BBCases[i]);
1244        ++NewSuccessors[BBCases[i].Dest];
1245        if (SuccHasWeights || PredHasWeights) {
1246          // The default weight is at index 0, so weight for the ith case
1247          // should be at index i+1. Scale the cases from successor by
1248          // PredDefaultWeight (Weights[0]).
1249          Weights.push_back(Weights[0] * SuccWeights[i + 1]);
1250          ValidTotalSuccWeight += SuccWeights[i + 1];
1251        }
1252      }
1253
1254    if (SuccHasWeights || PredHasWeights) {
1255      ValidTotalSuccWeight += SuccWeights[0];
1256      // Scale the cases from predecessor by ValidTotalSuccWeight.
1257      for (unsigned i = 1; i < CasesFromPred; ++i)
1258        Weights[i] *= ValidTotalSuccWeight;
1259      // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1260      Weights[0] *= SuccWeights[0];
1261    }
1262  } else {
1263    // If this is not the default destination from PSI, only the edges
1264    // in SI that occur in PSI with a destination of BB will be
1265    // activated.
1266    std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1267    std::map<ConstantInt *, uint64_t> WeightsForHandled;
1268    for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1269      if (PredCases[i].Dest == BB) {
1270        PTIHandled.insert(PredCases[i].Value);
1271
1272        if (PredHasWeights || SuccHasWeights) {
1273          WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1274          std::swap(Weights[i + 1], Weights.back());
1275          Weights.pop_back();
1276        }
1277
1278        std::swap(PredCases[i], PredCases.back());
1279        PredCases.pop_back();
1280        --i;
1281        --e;
1282      }
1283
1284    // Okay, now we know which constants were sent to BB from the
1285    // predecessor.  Figure out where they will all go now.
1286    for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1287      if (PTIHandled.count(BBCases[i].Value)) {
1288        // If this is one we are capable of getting...
1289        if (PredHasWeights || SuccHasWeights)
1290          Weights.push_back(WeightsForHandled[BBCases[i].Value]);
1291        PredCases.push_back(BBCases[i]);
1292        ++NewSuccessors[BBCases[i].Dest];
1293        PTIHandled.erase(BBCases[i].Value); // This constant is taken care of
1294      }
1295
1296    // If there are any constants vectored to BB that TI doesn't handle,
1297    // they must go to the default destination of TI.
1298    for (ConstantInt *I : PTIHandled) {
1299      if (PredHasWeights || SuccHasWeights)
1300        Weights.push_back(WeightsForHandled[I]);
1301      PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
1302      ++NewSuccessors[BBDefault];
1303    }
1304  }
1305
1306  // Okay, at this point, we know which new successor Pred will get.  Make
1307  // sure we update the number of entries in the PHI nodes for these
1308  // successors.
1309  SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
1310  if (DTU) {
1311    SuccsOfPred = {succ_begin(Pred), succ_end(Pred)};
1312    Updates.reserve(Updates.size() + NewSuccessors.size());
1313  }
1314  for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
1315       NewSuccessors) {
1316    for (auto I : seq(NewSuccessor.second)) {
1317      (void)I;
1318      AddPredecessorToBlock(NewSuccessor.first, Pred, BB);
1319    }
1320    if (DTU && !SuccsOfPred.contains(NewSuccessor.first))
1321      Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first});
1322  }
1323
1324  Builder.SetInsertPoint(PTI);
1325  // Convert pointer to int before we switch.
1326  if (CV->getType()->isPointerTy()) {
1327    CV =
1328        Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr");
1329  }
1330
1331  // Now that the successors are updated, create the new Switch instruction.
1332  SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size());
1333  NewSI->setDebugLoc(PTI->getDebugLoc());
1334  for (ValueEqualityComparisonCase &V : PredCases)
1335    NewSI->addCase(V.Value, V.Dest);
1336
1337  if (PredHasWeights || SuccHasWeights) {
1338    // Halve the weights if any of them cannot fit in an uint32_t
1339    FitWeights(Weights);
1340
1341    SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
1342
1343    setBranchWeights(NewSI, MDWeights);
1344  }
1345
1346  EraseTerminatorAndDCECond(PTI);
1347
1348  // Okay, last check.  If BB is still a successor of PSI, then we must
1349  // have an infinite loop case.  If so, add an infinitely looping block
1350  // to handle the case to preserve the behavior of the code.
1351  BasicBlock *InfLoopBlock = nullptr;
1352  for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
1353    if (NewSI->getSuccessor(i) == BB) {
1354      if (!InfLoopBlock) {
1355        // Insert it at the end of the function, because it's either code,
1356        // or it won't matter if it's hot. :)
1357        InfLoopBlock =
1358            BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
1359        BranchInst::Create(InfLoopBlock, InfLoopBlock);
1360        if (DTU)
1361          Updates.push_back(
1362              {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1363      }
1364      NewSI->setSuccessor(i, InfLoopBlock);
1365    }
1366
1367  if (DTU) {
1368    if (InfLoopBlock)
1369      Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock});
1370
1371    Updates.push_back({DominatorTree::Delete, Pred, BB});
1372
1373    DTU->applyUpdates(Updates);
1374  }
1375
1376  ++NumFoldValueComparisonIntoPredecessors;
1377  return true;
1378}
1379
1380/// The specified terminator is a value equality comparison instruction
1381/// (either a switch or a branch on "X == c").
1382/// See if any of the predecessors of the terminator block are value comparisons
1383/// on the same value.  If so, and if safe to do so, fold them together.
1384bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(Instruction *TI,
1385                                                         IRBuilder<> &Builder) {
1386  BasicBlock *BB = TI->getParent();
1387  Value *CV = isValueEqualityComparison(TI); // CondVal
1388  assert(CV && "Not a comparison?");
1389
1390  bool Changed = false;
1391
1392  SmallSetVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
1393  while (!Preds.empty()) {
1394    BasicBlock *Pred = Preds.pop_back_val();
1395    Instruction *PTI = Pred->getTerminator();
1396
1397    // Don't try to fold into itself.
1398    if (Pred == BB)
1399      continue;
1400
1401    // See if the predecessor is a comparison with the same value.
1402    Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
1403    if (PCV != CV)
1404      continue;
1405
1406    SmallSetVector<BasicBlock *, 4> FailBlocks;
1407    if (!SafeToMergeTerminators(TI, PTI, &FailBlocks)) {
1408      for (auto *Succ : FailBlocks) {
1409        if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU))
1410          return false;
1411      }
1412    }
1413
1414    PerformValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1415    Changed = true;
1416  }
1417  return Changed;
1418}
1419
1420// If we would need to insert a select that uses the value of this invoke
1421// (comments in hoistSuccIdenticalTerminatorToSwitchOrIf explain why we would
1422// need to do this), we can't hoist the invoke, as there is nowhere to put the
1423// select in this case.
1424static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,
1425                                Instruction *I1, Instruction *I2) {
1426  for (BasicBlock *Succ : successors(BB1)) {
1427    for (const PHINode &PN : Succ->phis()) {
1428      Value *BB1V = PN.getIncomingValueForBlock(BB1);
1429      Value *BB2V = PN.getIncomingValueForBlock(BB2);
1430      if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
1431        return false;
1432      }
1433    }
1434  }
1435  return true;
1436}
1437
1438// Get interesting characteristics of instructions that
1439// `hoistCommonCodeFromSuccessors` didn't hoist. They restrict what kind of
1440// instructions can be reordered across.
1441enum SkipFlags {
1442  SkipReadMem = 1,
1443  SkipSideEffect = 2,
1444  SkipImplicitControlFlow = 4
1445};
1446
1447static unsigned skippedInstrFlags(Instruction *I) {
1448  unsigned Flags = 0;
1449  if (I->mayReadFromMemory())
1450    Flags |= SkipReadMem;
1451  // We can't arbitrarily move around allocas, e.g. moving allocas (especially
1452  // inalloca) across stacksave/stackrestore boundaries.
1453  if (I->mayHaveSideEffects() || isa<AllocaInst>(I))
1454    Flags |= SkipSideEffect;
1455  if (!isGuaranteedToTransferExecutionToSuccessor(I))
1456    Flags |= SkipImplicitControlFlow;
1457  return Flags;
1458}
1459
1460// Returns true if it is safe to reorder an instruction across preceding
1461// instructions in a basic block.
1462static bool isSafeToHoistInstr(Instruction *I, unsigned Flags) {
1463  // Don't reorder a store over a load.
1464  if ((Flags & SkipReadMem) && I->mayWriteToMemory())
1465    return false;
1466
1467  // If we have seen an instruction with side effects, it's unsafe to reorder an
1468  // instruction which reads memory or itself has side effects.
1469  if ((Flags & SkipSideEffect) &&
1470      (I->mayReadFromMemory() || I->mayHaveSideEffects() || isa<AllocaInst>(I)))
1471    return false;
1472
1473  // Reordering across an instruction which does not necessarily transfer
1474  // control to the next instruction is speculation.
1475  if ((Flags & SkipImplicitControlFlow) && !isSafeToSpeculativelyExecute(I))
1476    return false;
1477
1478  // Hoisting of llvm.deoptimize is only legal together with the next return
1479  // instruction, which this pass is not always able to do.
1480  if (auto *CB = dyn_cast<CallBase>(I))
1481    if (CB->getIntrinsicID() == Intrinsic::experimental_deoptimize)
1482      return false;
1483
1484  // It's also unsafe/illegal to hoist an instruction above its instruction
1485  // operands
1486  BasicBlock *BB = I->getParent();
1487  for (Value *Op : I->operands()) {
1488    if (auto *J = dyn_cast<Instruction>(Op))
1489      if (J->getParent() == BB)
1490        return false;
1491  }
1492
1493  return true;
1494}
1495
1496static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
1497
1498/// Helper function for hoistCommonCodeFromSuccessors. Return true if identical
1499/// instructions \p I1 and \p I2 can and should be hoisted.
1500static bool shouldHoistCommonInstructions(Instruction *I1, Instruction *I2,
1501                                          const TargetTransformInfo &TTI) {
1502  // If we're going to hoist a call, make sure that the two instructions
1503  // we're commoning/hoisting are both marked with musttail, or neither of
1504  // them is marked as such. Otherwise, we might end up in a situation where
1505  // we hoist from a block where the terminator is a `ret` to a block where
1506  // the terminator is a `br`, and `musttail` calls expect to be followed by
1507  // a return.
1508  auto *C1 = dyn_cast<CallInst>(I1);
1509  auto *C2 = dyn_cast<CallInst>(I2);
1510  if (C1 && C2)
1511    if (C1->isMustTailCall() != C2->isMustTailCall())
1512      return false;
1513
1514  if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2))
1515    return false;
1516
1517  // If any of the two call sites has nomerge or convergent attribute, stop
1518  // hoisting.
1519  if (const auto *CB1 = dyn_cast<CallBase>(I1))
1520    if (CB1->cannotMerge() || CB1->isConvergent())
1521      return false;
1522  if (const auto *CB2 = dyn_cast<CallBase>(I2))
1523    if (CB2->cannotMerge() || CB2->isConvergent())
1524      return false;
1525
1526  return true;
1527}
1528
1529/// Hoist any common code in the successor blocks up into the block. This
1530/// function guarantees that BB dominates all successors. If EqTermsOnly is
1531/// given, only perform hoisting in case both blocks only contain a terminator.
1532/// In that case, only the original BI will be replaced and selects for PHIs are
1533/// added.
1534bool SimplifyCFGOpt::hoistCommonCodeFromSuccessors(BasicBlock *BB,
1535                                                   bool EqTermsOnly) {
1536  // This does very trivial matching, with limited scanning, to find identical
1537  // instructions in the two blocks. In particular, we don't want to get into
1538  // O(N1*N2*...) situations here where Ni are the sizes of these successors. As
1539  // such, we currently just scan for obviously identical instructions in an
1540  // identical order, possibly separated by the same number of non-identical
1541  // instructions.
1542  unsigned int SuccSize = succ_size(BB);
1543  if (SuccSize < 2)
1544    return false;
1545
1546  // If either of the blocks has it's address taken, then we can't do this fold,
1547  // because the code we'd hoist would no longer run when we jump into the block
1548  // by it's address.
1549  for (auto *Succ : successors(BB))
1550    if (Succ->hasAddressTaken() || !Succ->getSinglePredecessor())
1551      return false;
1552
1553  auto *TI = BB->getTerminator();
1554
1555  // The second of pair is a SkipFlags bitmask.
1556  using SuccIterPair = std::pair<BasicBlock::iterator, unsigned>;
1557  SmallVector<SuccIterPair, 8> SuccIterPairs;
1558  for (auto *Succ : successors(BB)) {
1559    BasicBlock::iterator SuccItr = Succ->begin();
1560    if (isa<PHINode>(*SuccItr))
1561      return false;
1562    SuccIterPairs.push_back(SuccIterPair(SuccItr, 0));
1563  }
1564
1565  // Check if only hoisting terminators is allowed. This does not add new
1566  // instructions to the hoist location.
1567  if (EqTermsOnly) {
1568    // Skip any debug intrinsics, as they are free to hoist.
1569    for (auto &SuccIter : make_first_range(SuccIterPairs)) {
1570      auto *INonDbg = &*skipDebugIntrinsics(SuccIter);
1571      if (!INonDbg->isTerminator())
1572        return false;
1573    }
1574    // Now we know that we only need to hoist debug intrinsics and the
1575    // terminator. Let the loop below handle those 2 cases.
1576  }
1577
1578  // Count how many instructions were not hoisted so far. There's a limit on how
1579  // many instructions we skip, serving as a compilation time control as well as
1580  // preventing excessive increase of life ranges.
1581  unsigned NumSkipped = 0;
1582  // If we find an unreachable instruction at the beginning of a basic block, we
1583  // can still hoist instructions from the rest of the basic blocks.
1584  if (SuccIterPairs.size() > 2) {
1585    erase_if(SuccIterPairs,
1586             [](const auto &Pair) { return isa<UnreachableInst>(Pair.first); });
1587    if (SuccIterPairs.size() < 2)
1588      return false;
1589  }
1590
1591  bool Changed = false;
1592
1593  for (;;) {
1594    auto *SuccIterPairBegin = SuccIterPairs.begin();
1595    auto &BB1ItrPair = *SuccIterPairBegin++;
1596    auto OtherSuccIterPairRange =
1597        iterator_range(SuccIterPairBegin, SuccIterPairs.end());
1598    auto OtherSuccIterRange = make_first_range(OtherSuccIterPairRange);
1599
1600    Instruction *I1 = &*BB1ItrPair.first;
1601    auto *BB1 = I1->getParent();
1602
1603    // Skip debug info if it is not identical.
1604    bool AllDbgInstsAreIdentical = all_of(OtherSuccIterRange, [I1](auto &Iter) {
1605      Instruction *I2 = &*Iter;
1606      return I1->isIdenticalToWhenDefined(I2);
1607    });
1608    if (!AllDbgInstsAreIdentical) {
1609      while (isa<DbgInfoIntrinsic>(I1))
1610        I1 = &*++BB1ItrPair.first;
1611      for (auto &SuccIter : OtherSuccIterRange) {
1612        Instruction *I2 = &*SuccIter;
1613        while (isa<DbgInfoIntrinsic>(I2))
1614          I2 = &*++SuccIter;
1615      }
1616    }
1617
1618    bool AllInstsAreIdentical = true;
1619    bool HasTerminator = I1->isTerminator();
1620    for (auto &SuccIter : OtherSuccIterRange) {
1621      Instruction *I2 = &*SuccIter;
1622      HasTerminator |= I2->isTerminator();
1623      if (AllInstsAreIdentical && !I1->isIdenticalToWhenDefined(I2))
1624        AllInstsAreIdentical = false;
1625    }
1626
1627    // If we are hoisting the terminator instruction, don't move one (making a
1628    // broken BB), instead clone it, and remove BI.
1629    if (HasTerminator) {
1630      // Even if BB, which contains only one unreachable instruction, is ignored
1631      // at the beginning of the loop, we can hoist the terminator instruction.
1632      // If any instructions remain in the block, we cannot hoist terminators.
1633      if (NumSkipped || !AllInstsAreIdentical)
1634        return Changed;
1635      SmallVector<Instruction *, 8> Insts;
1636      for (auto &SuccIter : OtherSuccIterRange)
1637        Insts.push_back(&*SuccIter);
1638      return hoistSuccIdenticalTerminatorToSwitchOrIf(TI, I1, Insts) || Changed;
1639    }
1640
1641    if (AllInstsAreIdentical) {
1642      unsigned SkipFlagsBB1 = BB1ItrPair.second;
1643      AllInstsAreIdentical =
1644          isSafeToHoistInstr(I1, SkipFlagsBB1) &&
1645          all_of(OtherSuccIterPairRange, [=](const auto &Pair) {
1646            Instruction *I2 = &*Pair.first;
1647            unsigned SkipFlagsBB2 = Pair.second;
1648            // Even if the instructions are identical, it may not
1649            // be safe to hoist them if we have skipped over
1650            // instructions with side effects or their operands
1651            // weren't hoisted.
1652            return isSafeToHoistInstr(I2, SkipFlagsBB2) &&
1653                   shouldHoistCommonInstructions(I1, I2, TTI);
1654          });
1655    }
1656
1657    if (AllInstsAreIdentical) {
1658      BB1ItrPair.first++;
1659      if (isa<DbgInfoIntrinsic>(I1)) {
1660        // The debug location is an integral part of a debug info intrinsic
1661        // and can't be separated from it or replaced.  Instead of attempting
1662        // to merge locations, simply hoist both copies of the intrinsic.
1663        I1->moveBeforePreserving(TI);
1664        for (auto &SuccIter : OtherSuccIterRange) {
1665          auto *I2 = &*SuccIter++;
1666          assert(isa<DbgInfoIntrinsic>(I2));
1667          I2->moveBeforePreserving(TI);
1668        }
1669      } else {
1670        // For a normal instruction, we just move one to right before the
1671        // branch, then replace all uses of the other with the first.  Finally,
1672        // we remove the now redundant second instruction.
1673        I1->moveBeforePreserving(TI);
1674        BB->splice(TI->getIterator(), BB1, I1->getIterator());
1675        for (auto &SuccIter : OtherSuccIterRange) {
1676          Instruction *I2 = &*SuccIter++;
1677          assert(I2 != I1);
1678          if (!I2->use_empty())
1679            I2->replaceAllUsesWith(I1);
1680          I1->andIRFlags(I2);
1681          combineMetadataForCSE(I1, I2, true);
1682          // I1 and I2 are being combined into a single instruction.  Its debug
1683          // location is the merged locations of the original instructions.
1684          I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
1685          I2->eraseFromParent();
1686        }
1687      }
1688      if (!Changed)
1689        NumHoistCommonCode += SuccIterPairs.size();
1690      Changed = true;
1691      NumHoistCommonInstrs += SuccIterPairs.size();
1692    } else {
1693      if (NumSkipped >= HoistCommonSkipLimit)
1694        return Changed;
1695      // We are about to skip over a pair of non-identical instructions. Record
1696      // if any have characteristics that would prevent reordering instructions
1697      // across them.
1698      for (auto &SuccIterPair : SuccIterPairs) {
1699        Instruction *I = &*SuccIterPair.first++;
1700        SuccIterPair.second |= skippedInstrFlags(I);
1701      }
1702      ++NumSkipped;
1703    }
1704  }
1705}
1706
1707bool SimplifyCFGOpt::hoistSuccIdenticalTerminatorToSwitchOrIf(
1708    Instruction *TI, Instruction *I1,
1709    SmallVectorImpl<Instruction *> &OtherSuccTIs) {
1710
1711  auto *BI = dyn_cast<BranchInst>(TI);
1712
1713  bool Changed = false;
1714  BasicBlock *TIParent = TI->getParent();
1715  BasicBlock *BB1 = I1->getParent();
1716
1717  // Use only for an if statement.
1718  auto *I2 = *OtherSuccTIs.begin();
1719  auto *BB2 = I2->getParent();
1720  if (BI) {
1721    assert(OtherSuccTIs.size() == 1);
1722    assert(BI->getSuccessor(0) == I1->getParent());
1723    assert(BI->getSuccessor(1) == I2->getParent());
1724  }
1725
1726  // In the case of an if statement, we try to hoist an invoke.
1727  // FIXME: Can we define a safety predicate for CallBr?
1728  // FIXME: Test case llvm/test/Transforms/SimplifyCFG/2009-06-15-InvokeCrash.ll
1729  // removed in 4c923b3b3fd0ac1edebf0603265ca3ba51724937 commit?
1730  if (isa<InvokeInst>(I1) && (!BI || !isSafeToHoistInvoke(BB1, BB2, I1, I2)))
1731    return false;
1732
1733  // TODO: callbr hoisting currently disabled pending further study.
1734  if (isa<CallBrInst>(I1))
1735    return false;
1736
1737  for (BasicBlock *Succ : successors(BB1)) {
1738    for (PHINode &PN : Succ->phis()) {
1739      Value *BB1V = PN.getIncomingValueForBlock(BB1);
1740      for (Instruction *OtherSuccTI : OtherSuccTIs) {
1741        Value *BB2V = PN.getIncomingValueForBlock(OtherSuccTI->getParent());
1742        if (BB1V == BB2V)
1743          continue;
1744
1745        // In the case of an if statement, check for
1746        // passingValueIsAlwaysUndefined here because we would rather eliminate
1747        // undefined control flow then converting it to a select.
1748        if (!BI || passingValueIsAlwaysUndefined(BB1V, &PN) ||
1749            passingValueIsAlwaysUndefined(BB2V, &PN))
1750          return false;
1751      }
1752    }
1753  }
1754
1755  // Okay, it is safe to hoist the terminator.
1756  Instruction *NT = I1->clone();
1757  NT->insertInto(TIParent, TI->getIterator());
1758  if (!NT->getType()->isVoidTy()) {
1759    I1->replaceAllUsesWith(NT);
1760    for (Instruction *OtherSuccTI : OtherSuccTIs)
1761      OtherSuccTI->replaceAllUsesWith(NT);
1762    NT->takeName(I1);
1763  }
1764  Changed = true;
1765  NumHoistCommonInstrs += OtherSuccTIs.size() + 1;
1766
1767  // Ensure terminator gets a debug location, even an unknown one, in case
1768  // it involves inlinable calls.
1769  SmallVector<DILocation *, 4> Locs;
1770  Locs.push_back(I1->getDebugLoc());
1771  for (auto *OtherSuccTI : OtherSuccTIs)
1772    Locs.push_back(OtherSuccTI->getDebugLoc());
1773  // Also clone DPValues from the existing terminator, and all others (to
1774  // duplicate existing hoisting behaviour).
1775  NT->cloneDebugInfoFrom(I1);
1776  for (Instruction *OtherSuccTI : OtherSuccTIs)
1777    NT->cloneDebugInfoFrom(OtherSuccTI);
1778  NT->setDebugLoc(DILocation::getMergedLocations(Locs));
1779
1780  // PHIs created below will adopt NT's merged DebugLoc.
1781  IRBuilder<NoFolder> Builder(NT);
1782
1783  // In the case of an if statement, hoisting one of the terminators from our
1784  // successor is a great thing. Unfortunately, the successors of the if/else
1785  // blocks may have PHI nodes in them.  If they do, all PHI entries for BB1/BB2
1786  // must agree for all PHI nodes, so we insert select instruction to compute
1787  // the final result.
1788  if (BI) {
1789    std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
1790    for (BasicBlock *Succ : successors(BB1)) {
1791      for (PHINode &PN : Succ->phis()) {
1792        Value *BB1V = PN.getIncomingValueForBlock(BB1);
1793        Value *BB2V = PN.getIncomingValueForBlock(BB2);
1794        if (BB1V == BB2V)
1795          continue;
1796
1797        // These values do not agree.  Insert a select instruction before NT
1798        // that determines the right value.
1799        SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
1800        if (!SI) {
1801          // Propagate fast-math-flags from phi node to its replacement select.
1802          IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
1803          if (isa<FPMathOperator>(PN))
1804            Builder.setFastMathFlags(PN.getFastMathFlags());
1805
1806          SI = cast<SelectInst>(Builder.CreateSelect(
1807              BI->getCondition(), BB1V, BB2V,
1808              BB1V->getName() + "." + BB2V->getName(), BI));
1809        }
1810
1811        // Make the PHI node use the select for all incoming values for BB1/BB2
1812        for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
1813          if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
1814            PN.setIncomingValue(i, SI);
1815      }
1816    }
1817  }
1818
1819  SmallVector<DominatorTree::UpdateType, 4> Updates;
1820
1821  // Update any PHI nodes in our new successors.
1822  for (BasicBlock *Succ : successors(BB1)) {
1823    AddPredecessorToBlock(Succ, TIParent, BB1);
1824    if (DTU)
1825      Updates.push_back({DominatorTree::Insert, TIParent, Succ});
1826  }
1827
1828  if (DTU)
1829    for (BasicBlock *Succ : successors(TI))
1830      Updates.push_back({DominatorTree::Delete, TIParent, Succ});
1831
1832  EraseTerminatorAndDCECond(TI);
1833  if (DTU)
1834    DTU->applyUpdates(Updates);
1835  return Changed;
1836}
1837
1838// Check lifetime markers.
1839static bool isLifeTimeMarker(const Instruction *I) {
1840  if (auto II = dyn_cast<IntrinsicInst>(I)) {
1841    switch (II->getIntrinsicID()) {
1842    default:
1843      break;
1844    case Intrinsic::lifetime_start:
1845    case Intrinsic::lifetime_end:
1846      return true;
1847    }
1848  }
1849  return false;
1850}
1851
1852// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
1853// into variables.
1854static bool replacingOperandWithVariableIsCheap(const Instruction *I,
1855                                                int OpIdx) {
1856  return !isa<IntrinsicInst>(I);
1857}
1858
1859// All instructions in Insts belong to different blocks that all unconditionally
1860// branch to a common successor. Analyze each instruction and return true if it
1861// would be possible to sink them into their successor, creating one common
1862// instruction instead. For every value that would be required to be provided by
1863// PHI node (because an operand varies in each input block), add to PHIOperands.
1864static bool canSinkInstructions(
1865    ArrayRef<Instruction *> Insts,
1866    DenseMap<Instruction *, SmallVector<Value *, 4>> &PHIOperands) {
1867  // Prune out obviously bad instructions to move. Each instruction must have
1868  // exactly zero or one use, and we check later that use is by a single, common
1869  // PHI instruction in the successor.
1870  bool HasUse = !Insts.front()->user_empty();
1871  for (auto *I : Insts) {
1872    // These instructions may change or break semantics if moved.
1873    if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
1874        I->getType()->isTokenTy())
1875      return false;
1876
1877    // Do not try to sink an instruction in an infinite loop - it can cause
1878    // this algorithm to infinite loop.
1879    if (I->getParent()->getSingleSuccessor() == I->getParent())
1880      return false;
1881
1882    // Conservatively return false if I is an inline-asm instruction. Sinking
1883    // and merging inline-asm instructions can potentially create arguments
1884    // that cannot satisfy the inline-asm constraints.
1885    // If the instruction has nomerge or convergent attribute, return false.
1886    if (const auto *C = dyn_cast<CallBase>(I))
1887      if (C->isInlineAsm() || C->cannotMerge() || C->isConvergent())
1888        return false;
1889
1890    // Each instruction must have zero or one use.
1891    if (HasUse && !I->hasOneUse())
1892      return false;
1893    if (!HasUse && !I->user_empty())
1894      return false;
1895  }
1896
1897  const Instruction *I0 = Insts.front();
1898  for (auto *I : Insts) {
1899    if (!I->isSameOperationAs(I0))
1900      return false;
1901
1902    // swifterror pointers can only be used by a load or store; sinking a load
1903    // or store would require introducing a select for the pointer operand,
1904    // which isn't allowed for swifterror pointers.
1905    if (isa<StoreInst>(I) && I->getOperand(1)->isSwiftError())
1906      return false;
1907    if (isa<LoadInst>(I) && I->getOperand(0)->isSwiftError())
1908      return false;
1909  }
1910
1911  // All instructions in Insts are known to be the same opcode. If they have a
1912  // use, check that the only user is a PHI or in the same block as the
1913  // instruction, because if a user is in the same block as an instruction we're
1914  // contemplating sinking, it must already be determined to be sinkable.
1915  if (HasUse) {
1916    auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
1917    auto *Succ = I0->getParent()->getTerminator()->getSuccessor(0);
1918    if (!all_of(Insts, [&PNUse,&Succ](const Instruction *I) -> bool {
1919          auto *U = cast<Instruction>(*I->user_begin());
1920          return (PNUse &&
1921                  PNUse->getParent() == Succ &&
1922                  PNUse->getIncomingValueForBlock(I->getParent()) == I) ||
1923                 U->getParent() == I->getParent();
1924        }))
1925      return false;
1926  }
1927
1928  // Because SROA can't handle speculating stores of selects, try not to sink
1929  // loads, stores or lifetime markers of allocas when we'd have to create a
1930  // PHI for the address operand. Also, because it is likely that loads or
1931  // stores of allocas will disappear when Mem2Reg/SROA is run, don't sink
1932  // them.
1933  // This can cause code churn which can have unintended consequences down
1934  // the line - see https://llvm.org/bugs/show_bug.cgi?id=30244.
1935  // FIXME: This is a workaround for a deficiency in SROA - see
1936  // https://llvm.org/bugs/show_bug.cgi?id=30188
1937  if (isa<StoreInst>(I0) && any_of(Insts, [](const Instruction *I) {
1938        return isa<AllocaInst>(I->getOperand(1)->stripPointerCasts());
1939      }))
1940    return false;
1941  if (isa<LoadInst>(I0) && any_of(Insts, [](const Instruction *I) {
1942        return isa<AllocaInst>(I->getOperand(0)->stripPointerCasts());
1943      }))
1944    return false;
1945  if (isLifeTimeMarker(I0) && any_of(Insts, [](const Instruction *I) {
1946        return isa<AllocaInst>(I->getOperand(1)->stripPointerCasts());
1947      }))
1948    return false;
1949
1950  // For calls to be sinkable, they must all be indirect, or have same callee.
1951  // I.e. if we have two direct calls to different callees, we don't want to
1952  // turn that into an indirect call. Likewise, if we have an indirect call,
1953  // and a direct call, we don't actually want to have a single indirect call.
1954  if (isa<CallBase>(I0)) {
1955    auto IsIndirectCall = [](const Instruction *I) {
1956      return cast<CallBase>(I)->isIndirectCall();
1957    };
1958    bool HaveIndirectCalls = any_of(Insts, IsIndirectCall);
1959    bool AllCallsAreIndirect = all_of(Insts, IsIndirectCall);
1960    if (HaveIndirectCalls) {
1961      if (!AllCallsAreIndirect)
1962        return false;
1963    } else {
1964      // All callees must be identical.
1965      Value *Callee = nullptr;
1966      for (const Instruction *I : Insts) {
1967        Value *CurrCallee = cast<CallBase>(I)->getCalledOperand();
1968        if (!Callee)
1969          Callee = CurrCallee;
1970        else if (Callee != CurrCallee)
1971          return false;
1972      }
1973    }
1974  }
1975
1976  for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
1977    Value *Op = I0->getOperand(OI);
1978    if (Op->getType()->isTokenTy())
1979      // Don't touch any operand of token type.
1980      return false;
1981
1982    auto SameAsI0 = [&I0, OI](const Instruction *I) {
1983      assert(I->getNumOperands() == I0->getNumOperands());
1984      return I->getOperand(OI) == I0->getOperand(OI);
1985    };
1986    if (!all_of(Insts, SameAsI0)) {
1987      if ((isa<Constant>(Op) && !replacingOperandWithVariableIsCheap(I0, OI)) ||
1988          !canReplaceOperandWithVariable(I0, OI))
1989        // We can't create a PHI from this GEP.
1990        return false;
1991      for (auto *I : Insts)
1992        PHIOperands[I].push_back(I->getOperand(OI));
1993    }
1994  }
1995  return true;
1996}
1997
1998// Assuming canSinkInstructions(Blocks) has returned true, sink the last
1999// instruction of every block in Blocks to their common successor, commoning
2000// into one instruction.
2001static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
2002  auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
2003
2004  // canSinkInstructions returning true guarantees that every block has at
2005  // least one non-terminator instruction.
2006  SmallVector<Instruction*,4> Insts;
2007  for (auto *BB : Blocks) {
2008    Instruction *I = BB->getTerminator();
2009    do {
2010      I = I->getPrevNode();
2011    } while (isa<DbgInfoIntrinsic>(I) && I != &BB->front());
2012    if (!isa<DbgInfoIntrinsic>(I))
2013      Insts.push_back(I);
2014  }
2015
2016  // The only checking we need to do now is that all users of all instructions
2017  // are the same PHI node. canSinkInstructions should have checked this but
2018  // it is slightly over-aggressive - it gets confused by commutative
2019  // instructions so double-check it here.
2020  Instruction *I0 = Insts.front();
2021  if (!I0->user_empty()) {
2022    auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
2023    if (!all_of(Insts, [&PNUse](const Instruction *I) -> bool {
2024          auto *U = cast<Instruction>(*I->user_begin());
2025          return U == PNUse;
2026        }))
2027      return false;
2028  }
2029
2030  // We don't need to do any more checking here; canSinkInstructions should
2031  // have done it all for us.
2032  SmallVector<Value*, 4> NewOperands;
2033  for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
2034    // This check is different to that in canSinkInstructions. There, we
2035    // cared about the global view once simplifycfg (and instcombine) have
2036    // completed - it takes into account PHIs that become trivially
2037    // simplifiable.  However here we need a more local view; if an operand
2038    // differs we create a PHI and rely on instcombine to clean up the very
2039    // small mess we may make.
2040    bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
2041      return I->getOperand(O) != I0->getOperand(O);
2042    });
2043    if (!NeedPHI) {
2044      NewOperands.push_back(I0->getOperand(O));
2045      continue;
2046    }
2047
2048    // Create a new PHI in the successor block and populate it.
2049    auto *Op = I0->getOperand(O);
2050    assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
2051    auto *PN =
2052        PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");
2053    PN->insertBefore(BBEnd->begin());
2054    for (auto *I : Insts)
2055      PN->addIncoming(I->getOperand(O), I->getParent());
2056    NewOperands.push_back(PN);
2057  }
2058
2059  // Arbitrarily use I0 as the new "common" instruction; remap its operands
2060  // and move it to the start of the successor block.
2061  for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
2062    I0->getOperandUse(O).set(NewOperands[O]);
2063
2064  I0->moveBefore(*BBEnd, BBEnd->getFirstInsertionPt());
2065
2066  // Update metadata and IR flags, and merge debug locations.
2067  for (auto *I : Insts)
2068    if (I != I0) {
2069      // The debug location for the "common" instruction is the merged locations
2070      // of all the commoned instructions.  We start with the original location
2071      // of the "common" instruction and iteratively merge each location in the
2072      // loop below.
2073      // This is an N-way merge, which will be inefficient if I0 is a CallInst.
2074      // However, as N-way merge for CallInst is rare, so we use simplified API
2075      // instead of using complex API for N-way merge.
2076      I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
2077      combineMetadataForCSE(I0, I, true);
2078      I0->andIRFlags(I);
2079    }
2080
2081  if (!I0->user_empty()) {
2082    // canSinkLastInstruction checked that all instructions were used by
2083    // one and only one PHI node. Find that now, RAUW it to our common
2084    // instruction and nuke it.
2085    auto *PN = cast<PHINode>(*I0->user_begin());
2086    PN->replaceAllUsesWith(I0);
2087    PN->eraseFromParent();
2088  }
2089
2090  // Finally nuke all instructions apart from the common instruction.
2091  for (auto *I : Insts) {
2092    if (I == I0)
2093      continue;
2094    // The remaining uses are debug users, replace those with the common inst.
2095    // In most (all?) cases this just introduces a use-before-def.
2096    assert(I->user_empty() && "Inst unexpectedly still has non-dbg users");
2097    I->replaceAllUsesWith(I0);
2098    I->eraseFromParent();
2099  }
2100
2101  return true;
2102}
2103
2104namespace {
2105
2106  // LockstepReverseIterator - Iterates through instructions
2107  // in a set of blocks in reverse order from the first non-terminator.
2108  // For example (assume all blocks have size n):
2109  //   LockstepReverseIterator I([B1, B2, B3]);
2110  //   *I-- = [B1[n], B2[n], B3[n]];
2111  //   *I-- = [B1[n-1], B2[n-1], B3[n-1]];
2112  //   *I-- = [B1[n-2], B2[n-2], B3[n-2]];
2113  //   ...
2114  class LockstepReverseIterator {
2115    ArrayRef<BasicBlock*> Blocks;
2116    SmallVector<Instruction*,4> Insts;
2117    bool Fail;
2118
2119  public:
2120    LockstepReverseIterator(ArrayRef<BasicBlock*> Blocks) : Blocks(Blocks) {
2121      reset();
2122    }
2123
2124    void reset() {
2125      Fail = false;
2126      Insts.clear();
2127      for (auto *BB : Blocks) {
2128        Instruction *Inst = BB->getTerminator();
2129        for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
2130          Inst = Inst->getPrevNode();
2131        if (!Inst) {
2132          // Block wasn't big enough.
2133          Fail = true;
2134          return;
2135        }
2136        Insts.push_back(Inst);
2137      }
2138    }
2139
2140    bool isValid() const {
2141      return !Fail;
2142    }
2143
2144    void operator--() {
2145      if (Fail)
2146        return;
2147      for (auto *&Inst : Insts) {
2148        for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
2149          Inst = Inst->getPrevNode();
2150        // Already at beginning of block.
2151        if (!Inst) {
2152          Fail = true;
2153          return;
2154        }
2155      }
2156    }
2157
2158    void operator++() {
2159      if (Fail)
2160        return;
2161      for (auto *&Inst : Insts) {
2162        for (Inst = Inst->getNextNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
2163          Inst = Inst->getNextNode();
2164        // Already at end of block.
2165        if (!Inst) {
2166          Fail = true;
2167          return;
2168        }
2169      }
2170    }
2171
2172    ArrayRef<Instruction*> operator * () const {
2173      return Insts;
2174    }
2175  };
2176
2177} // end anonymous namespace
2178
2179/// Check whether BB's predecessors end with unconditional branches. If it is
2180/// true, sink any common code from the predecessors to BB.
2181static bool SinkCommonCodeFromPredecessors(BasicBlock *BB,
2182                                           DomTreeUpdater *DTU) {
2183  // We support two situations:
2184  //   (1) all incoming arcs are unconditional
2185  //   (2) there are non-unconditional incoming arcs
2186  //
2187  // (2) is very common in switch defaults and
2188  // else-if patterns;
2189  //
2190  //   if (a) f(1);
2191  //   else if (b) f(2);
2192  //
2193  // produces:
2194  //
2195  //       [if]
2196  //      /    \
2197  //    [f(1)] [if]
2198  //      |     | \
2199  //      |     |  |
2200  //      |  [f(2)]|
2201  //       \    | /
2202  //        [ end ]
2203  //
2204  // [end] has two unconditional predecessor arcs and one conditional. The
2205  // conditional refers to the implicit empty 'else' arc. This conditional
2206  // arc can also be caused by an empty default block in a switch.
2207  //
2208  // In this case, we attempt to sink code from all *unconditional* arcs.
2209  // If we can sink instructions from these arcs (determined during the scan
2210  // phase below) we insert a common successor for all unconditional arcs and
2211  // connect that to [end], to enable sinking:
2212  //
2213  //       [if]
2214  //      /    \
2215  //    [x(1)] [if]
2216  //      |     | \
2217  //      |     |  \
2218  //      |  [x(2)] |
2219  //       \   /    |
2220  //   [sink.split] |
2221  //         \     /
2222  //         [ end ]
2223  //
2224  SmallVector<BasicBlock*,4> UnconditionalPreds;
2225  bool HaveNonUnconditionalPredecessors = false;
2226  for (auto *PredBB : predecessors(BB)) {
2227    auto *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
2228    if (PredBr && PredBr->isUnconditional())
2229      UnconditionalPreds.push_back(PredBB);
2230    else
2231      HaveNonUnconditionalPredecessors = true;
2232  }
2233  if (UnconditionalPreds.size() < 2)
2234    return false;
2235
2236  // We take a two-step approach to tail sinking. First we scan from the end of
2237  // each block upwards in lockstep. If the n'th instruction from the end of each
2238  // block can be sunk, those instructions are added to ValuesToSink and we
2239  // carry on. If we can sink an instruction but need to PHI-merge some operands
2240  // (because they're not identical in each instruction) we add these to
2241  // PHIOperands.
2242  int ScanIdx = 0;
2243  SmallPtrSet<Value*,4> InstructionsToSink;
2244  DenseMap<Instruction*, SmallVector<Value*,4>> PHIOperands;
2245  LockstepReverseIterator LRI(UnconditionalPreds);
2246  while (LRI.isValid() &&
2247         canSinkInstructions(*LRI, PHIOperands)) {
2248    LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]
2249                      << "\n");
2250    InstructionsToSink.insert((*LRI).begin(), (*LRI).end());
2251    ++ScanIdx;
2252    --LRI;
2253  }
2254
2255  // If no instructions can be sunk, early-return.
2256  if (ScanIdx == 0)
2257    return false;
2258
2259  bool followedByDeoptOrUnreachable = IsBlockFollowedByDeoptOrUnreachable(BB);
2260
2261  if (!followedByDeoptOrUnreachable) {
2262    // Okay, we *could* sink last ScanIdx instructions. But how many can we
2263    // actually sink before encountering instruction that is unprofitable to
2264    // sink?
2265    auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) {
2266      unsigned NumPHIdValues = 0;
2267      for (auto *I : *LRI)
2268        for (auto *V : PHIOperands[I]) {
2269          if (!InstructionsToSink.contains(V))
2270            ++NumPHIdValues;
2271          // FIXME: this check is overly optimistic. We may end up not sinking
2272          // said instruction, due to the very same profitability check.
2273          // See @creating_too_many_phis in sink-common-code.ll.
2274        }
2275      LLVM_DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n");
2276      unsigned NumPHIInsts = NumPHIdValues / UnconditionalPreds.size();
2277      if ((NumPHIdValues % UnconditionalPreds.size()) != 0)
2278        NumPHIInsts++;
2279
2280      return NumPHIInsts <= 1;
2281    };
2282
2283    // We've determined that we are going to sink last ScanIdx instructions,
2284    // and recorded them in InstructionsToSink. Now, some instructions may be
2285    // unprofitable to sink. But that determination depends on the instructions
2286    // that we are going to sink.
2287
2288    // First, forward scan: find the first instruction unprofitable to sink,
2289    // recording all the ones that are profitable to sink.
2290    // FIXME: would it be better, after we detect that not all are profitable.
2291    // to either record the profitable ones, or erase the unprofitable ones?
2292    // Maybe we need to choose (at runtime) the one that will touch least
2293    // instrs?
2294    LRI.reset();
2295    int Idx = 0;
2296    SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
2297    while (Idx < ScanIdx) {
2298      if (!ProfitableToSinkInstruction(LRI)) {
2299        // Too many PHIs would be created.
2300        LLVM_DEBUG(
2301            dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
2302        break;
2303      }
2304      InstructionsProfitableToSink.insert((*LRI).begin(), (*LRI).end());
2305      --LRI;
2306      ++Idx;
2307    }
2308
2309    // If no instructions can be sunk, early-return.
2310    if (Idx == 0)
2311      return false;
2312
2313    // Did we determine that (only) some instructions are unprofitable to sink?
2314    if (Idx < ScanIdx) {
2315      // Okay, some instructions are unprofitable.
2316      ScanIdx = Idx;
2317      InstructionsToSink = InstructionsProfitableToSink;
2318
2319      // But, that may make other instructions unprofitable, too.
2320      // So, do a backward scan, do any earlier instructions become
2321      // unprofitable?
2322      assert(
2323          !ProfitableToSinkInstruction(LRI) &&
2324          "We already know that the last instruction is unprofitable to sink");
2325      ++LRI;
2326      --Idx;
2327      while (Idx >= 0) {
2328        // If we detect that an instruction becomes unprofitable to sink,
2329        // all earlier instructions won't be sunk either,
2330        // so preemptively keep InstructionsProfitableToSink in sync.
2331        // FIXME: is this the most performant approach?
2332        for (auto *I : *LRI)
2333          InstructionsProfitableToSink.erase(I);
2334        if (!ProfitableToSinkInstruction(LRI)) {
2335          // Everything starting with this instruction won't be sunk.
2336          ScanIdx = Idx;
2337          InstructionsToSink = InstructionsProfitableToSink;
2338        }
2339        ++LRI;
2340        --Idx;
2341      }
2342    }
2343
2344    // If no instructions can be sunk, early-return.
2345    if (ScanIdx == 0)
2346      return false;
2347  }
2348
2349  bool Changed = false;
2350
2351  if (HaveNonUnconditionalPredecessors) {
2352    if (!followedByDeoptOrUnreachable) {
2353      // It is always legal to sink common instructions from unconditional
2354      // predecessors. However, if not all predecessors are unconditional,
2355      // this transformation might be pessimizing. So as a rule of thumb,
2356      // don't do it unless we'd sink at least one non-speculatable instruction.
2357      // See https://bugs.llvm.org/show_bug.cgi?id=30244
2358      LRI.reset();
2359      int Idx = 0;
2360      bool Profitable = false;
2361      while (Idx < ScanIdx) {
2362        if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
2363          Profitable = true;
2364          break;
2365        }
2366        --LRI;
2367        ++Idx;
2368      }
2369      if (!Profitable)
2370        return false;
2371    }
2372
2373    LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");
2374    // We have a conditional edge and we're going to sink some instructions.
2375    // Insert a new block postdominating all blocks we're going to sink from.
2376    if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU))
2377      // Edges couldn't be split.
2378      return false;
2379    Changed = true;
2380  }
2381
2382  // Now that we've analyzed all potential sinking candidates, perform the
2383  // actual sink. We iteratively sink the last non-terminator of the source
2384  // blocks into their common successor unless doing so would require too
2385  // many PHI instructions to be generated (currently only one PHI is allowed
2386  // per sunk instruction).
2387  //
2388  // We can use InstructionsToSink to discount values needing PHI-merging that will
2389  // actually be sunk in a later iteration. This allows us to be more
2390  // aggressive in what we sink. This does allow a false positive where we
2391  // sink presuming a later value will also be sunk, but stop half way through
2392  // and never actually sink it which means we produce more PHIs than intended.
2393  // This is unlikely in practice though.
2394  int SinkIdx = 0;
2395  for (; SinkIdx != ScanIdx; ++SinkIdx) {
2396    LLVM_DEBUG(dbgs() << "SINK: Sink: "
2397                      << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
2398                      << "\n");
2399
2400    // Because we've sunk every instruction in turn, the current instruction to
2401    // sink is always at index 0.
2402    LRI.reset();
2403
2404    if (!sinkLastInstruction(UnconditionalPreds)) {
2405      LLVM_DEBUG(
2406          dbgs()
2407          << "SINK: stopping here, failed to actually sink instruction!\n");
2408      break;
2409    }
2410
2411    NumSinkCommonInstrs++;
2412    Changed = true;
2413  }
2414  if (SinkIdx != 0)
2415    ++NumSinkCommonCode;
2416  return Changed;
2417}
2418
2419namespace {
2420
2421struct CompatibleSets {
2422  using SetTy = SmallVector<InvokeInst *, 2>;
2423
2424  SmallVector<SetTy, 1> Sets;
2425
2426  static bool shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes);
2427
2428  SetTy &getCompatibleSet(InvokeInst *II);
2429
2430  void insert(InvokeInst *II);
2431};
2432
2433CompatibleSets::SetTy &CompatibleSets::getCompatibleSet(InvokeInst *II) {
2434  // Perform a linear scan over all the existing sets, see if the new `invoke`
2435  // is compatible with any particular set. Since we know that all the `invokes`
2436  // within a set are compatible, only check the first `invoke` in each set.
2437  // WARNING: at worst, this has quadratic complexity.
2438  for (CompatibleSets::SetTy &Set : Sets) {
2439    if (CompatibleSets::shouldBelongToSameSet({Set.front(), II}))
2440      return Set;
2441  }
2442
2443  // Otherwise, we either had no sets yet, or this invoke forms a new set.
2444  return Sets.emplace_back();
2445}
2446
2447void CompatibleSets::insert(InvokeInst *II) {
2448  getCompatibleSet(II).emplace_back(II);
2449}
2450
2451bool CompatibleSets::shouldBelongToSameSet(ArrayRef<InvokeInst *> Invokes) {
2452  assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2453
2454  // Can we theoretically merge these `invoke`s?
2455  auto IsIllegalToMerge = [](InvokeInst *II) {
2456    return II->cannotMerge() || II->isInlineAsm();
2457  };
2458  if (any_of(Invokes, IsIllegalToMerge))
2459    return false;
2460
2461  // Either both `invoke`s must be   direct,
2462  // or     both `invoke`s must be indirect.
2463  auto IsIndirectCall = [](InvokeInst *II) { return II->isIndirectCall(); };
2464  bool HaveIndirectCalls = any_of(Invokes, IsIndirectCall);
2465  bool AllCallsAreIndirect = all_of(Invokes, IsIndirectCall);
2466  if (HaveIndirectCalls) {
2467    if (!AllCallsAreIndirect)
2468      return false;
2469  } else {
2470    // All callees must be identical.
2471    Value *Callee = nullptr;
2472    for (InvokeInst *II : Invokes) {
2473      Value *CurrCallee = II->getCalledOperand();
2474      assert(CurrCallee && "There is always a called operand.");
2475      if (!Callee)
2476        Callee = CurrCallee;
2477      else if (Callee != CurrCallee)
2478        return false;
2479    }
2480  }
2481
2482  // Either both `invoke`s must not have a normal destination,
2483  // or     both `invoke`s must     have a normal destination,
2484  auto HasNormalDest = [](InvokeInst *II) {
2485    return !isa<UnreachableInst>(II->getNormalDest()->getFirstNonPHIOrDbg());
2486  };
2487  if (any_of(Invokes, HasNormalDest)) {
2488    // Do not merge `invoke` that does not have a normal destination with one
2489    // that does have a normal destination, even though doing so would be legal.
2490    if (!all_of(Invokes, HasNormalDest))
2491      return false;
2492
2493    // All normal destinations must be identical.
2494    BasicBlock *NormalBB = nullptr;
2495    for (InvokeInst *II : Invokes) {
2496      BasicBlock *CurrNormalBB = II->getNormalDest();
2497      assert(CurrNormalBB && "There is always a 'continue to' basic block.");
2498      if (!NormalBB)
2499        NormalBB = CurrNormalBB;
2500      else if (NormalBB != CurrNormalBB)
2501        return false;
2502    }
2503
2504    // In the normal destination, the incoming values for these two `invoke`s
2505    // must be compatible.
2506    SmallPtrSet<Value *, 16> EquivalenceSet(Invokes.begin(), Invokes.end());
2507    if (!IncomingValuesAreCompatible(
2508            NormalBB, {Invokes[0]->getParent(), Invokes[1]->getParent()},
2509            &EquivalenceSet))
2510      return false;
2511  }
2512
2513#ifndef NDEBUG
2514  // All unwind destinations must be identical.
2515  // We know that because we have started from said unwind destination.
2516  BasicBlock *UnwindBB = nullptr;
2517  for (InvokeInst *II : Invokes) {
2518    BasicBlock *CurrUnwindBB = II->getUnwindDest();
2519    assert(CurrUnwindBB && "There is always an 'unwind to' basic block.");
2520    if (!UnwindBB)
2521      UnwindBB = CurrUnwindBB;
2522    else
2523      assert(UnwindBB == CurrUnwindBB && "Unexpected unwind destination.");
2524  }
2525#endif
2526
2527  // In the unwind destination, the incoming values for these two `invoke`s
2528  // must be compatible.
2529  if (!IncomingValuesAreCompatible(
2530          Invokes.front()->getUnwindDest(),
2531          {Invokes[0]->getParent(), Invokes[1]->getParent()}))
2532    return false;
2533
2534  // Ignoring arguments, these `invoke`s must be identical,
2535  // including operand bundles.
2536  const InvokeInst *II0 = Invokes.front();
2537  for (auto *II : Invokes.drop_front())
2538    if (!II->isSameOperationAs(II0))
2539      return false;
2540
2541  // Can we theoretically form the data operands for the merged `invoke`?
2542  auto IsIllegalToMergeArguments = [](auto Ops) {
2543    Use &U0 = std::get<0>(Ops);
2544    Use &U1 = std::get<1>(Ops);
2545    if (U0 == U1)
2546      return false;
2547    return U0->getType()->isTokenTy() ||
2548           !canReplaceOperandWithVariable(cast<Instruction>(U0.getUser()),
2549                                          U0.getOperandNo());
2550  };
2551  assert(Invokes.size() == 2 && "Always called with exactly two candidates.");
2552  if (any_of(zip(Invokes[0]->data_ops(), Invokes[1]->data_ops()),
2553             IsIllegalToMergeArguments))
2554    return false;
2555
2556  return true;
2557}
2558
2559} // namespace
2560
2561// Merge all invokes in the provided set, all of which are compatible
2562// as per the `CompatibleSets::shouldBelongToSameSet()`.
2563static void MergeCompatibleInvokesImpl(ArrayRef<InvokeInst *> Invokes,
2564                                       DomTreeUpdater *DTU) {
2565  assert(Invokes.size() >= 2 && "Must have at least two invokes to merge.");
2566
2567  SmallVector<DominatorTree::UpdateType, 8> Updates;
2568  if (DTU)
2569    Updates.reserve(2 + 3 * Invokes.size());
2570
2571  bool HasNormalDest =
2572      !isa<UnreachableInst>(Invokes[0]->getNormalDest()->getFirstNonPHIOrDbg());
2573
2574  // Clone one of the invokes into a new basic block.
2575  // Since they are all compatible, it doesn't matter which invoke is cloned.
2576  InvokeInst *MergedInvoke = [&Invokes, HasNormalDest]() {
2577    InvokeInst *II0 = Invokes.front();
2578    BasicBlock *II0BB = II0->getParent();
2579    BasicBlock *InsertBeforeBlock =
2580        II0->getParent()->getIterator()->getNextNode();
2581    Function *Func = II0BB->getParent();
2582    LLVMContext &Ctx = II0->getContext();
2583
2584    BasicBlock *MergedInvokeBB = BasicBlock::Create(
2585        Ctx, II0BB->getName() + ".invoke", Func, InsertBeforeBlock);
2586
2587    auto *MergedInvoke = cast<InvokeInst>(II0->clone());
2588    // NOTE: all invokes have the same attributes, so no handling needed.
2589    MergedInvoke->insertInto(MergedInvokeBB, MergedInvokeBB->end());
2590
2591    if (!HasNormalDest) {
2592      // This set does not have a normal destination,
2593      // so just form a new block with unreachable terminator.
2594      BasicBlock *MergedNormalDest = BasicBlock::Create(
2595          Ctx, II0BB->getName() + ".cont", Func, InsertBeforeBlock);
2596      new UnreachableInst(Ctx, MergedNormalDest);
2597      MergedInvoke->setNormalDest(MergedNormalDest);
2598    }
2599
2600    // The unwind destination, however, remainds identical for all invokes here.
2601
2602    return MergedInvoke;
2603  }();
2604
2605  if (DTU) {
2606    // Predecessor blocks that contained these invokes will now branch to
2607    // the new block that contains the merged invoke, ...
2608    for (InvokeInst *II : Invokes)
2609      Updates.push_back(
2610          {DominatorTree::Insert, II->getParent(), MergedInvoke->getParent()});
2611
2612    // ... which has the new `unreachable` block as normal destination,
2613    // or unwinds to the (same for all `invoke`s in this set) `landingpad`,
2614    for (BasicBlock *SuccBBOfMergedInvoke : successors(MergedInvoke))
2615      Updates.push_back({DominatorTree::Insert, MergedInvoke->getParent(),
2616                         SuccBBOfMergedInvoke});
2617
2618    // Since predecessor blocks now unconditionally branch to a new block,
2619    // they no longer branch to their original successors.
2620    for (InvokeInst *II : Invokes)
2621      for (BasicBlock *SuccOfPredBB : successors(II->getParent()))
2622        Updates.push_back(
2623            {DominatorTree::Delete, II->getParent(), SuccOfPredBB});
2624  }
2625
2626  bool IsIndirectCall = Invokes[0]->isIndirectCall();
2627
2628  // Form the merged operands for the merged invoke.
2629  for (Use &U : MergedInvoke->operands()) {
2630    // Only PHI together the indirect callees and data operands.
2631    if (MergedInvoke->isCallee(&U)) {
2632      if (!IsIndirectCall)
2633        continue;
2634    } else if (!MergedInvoke->isDataOperand(&U))
2635      continue;
2636
2637    // Don't create trivial PHI's with all-identical incoming values.
2638    bool NeedPHI = any_of(Invokes, [&U](InvokeInst *II) {
2639      return II->getOperand(U.getOperandNo()) != U.get();
2640    });
2641    if (!NeedPHI)
2642      continue;
2643
2644    // Form a PHI out of all the data ops under this index.
2645    PHINode *PN = PHINode::Create(
2646        U->getType(), /*NumReservedValues=*/Invokes.size(), "", MergedInvoke);
2647    for (InvokeInst *II : Invokes)
2648      PN->addIncoming(II->getOperand(U.getOperandNo()), II->getParent());
2649
2650    U.set(PN);
2651  }
2652
2653  // We've ensured that each PHI node has compatible (identical) incoming values
2654  // when coming from each of the `invoke`s in the current merge set,
2655  // so update the PHI nodes accordingly.
2656  for (BasicBlock *Succ : successors(MergedInvoke))
2657    AddPredecessorToBlock(Succ, /*NewPred=*/MergedInvoke->getParent(),
2658                          /*ExistPred=*/Invokes.front()->getParent());
2659
2660  // And finally, replace the original `invoke`s with an unconditional branch
2661  // to the block with the merged `invoke`. Also, give that merged `invoke`
2662  // the merged debugloc of all the original `invoke`s.
2663  DILocation *MergedDebugLoc = nullptr;
2664  for (InvokeInst *II : Invokes) {
2665    // Compute the debug location common to all the original `invoke`s.
2666    if (!MergedDebugLoc)
2667      MergedDebugLoc = II->getDebugLoc();
2668    else
2669      MergedDebugLoc =
2670          DILocation::getMergedLocation(MergedDebugLoc, II->getDebugLoc());
2671
2672    // And replace the old `invoke` with an unconditionally branch
2673    // to the block with the merged `invoke`.
2674    for (BasicBlock *OrigSuccBB : successors(II->getParent()))
2675      OrigSuccBB->removePredecessor(II->getParent());
2676    BranchInst::Create(MergedInvoke->getParent(), II->getParent());
2677    II->replaceAllUsesWith(MergedInvoke);
2678    II->eraseFromParent();
2679    ++NumInvokesMerged;
2680  }
2681  MergedInvoke->setDebugLoc(MergedDebugLoc);
2682  ++NumInvokeSetsFormed;
2683
2684  if (DTU)
2685    DTU->applyUpdates(Updates);
2686}
2687
2688/// If this block is a `landingpad` exception handling block, categorize all
2689/// the predecessor `invoke`s into sets, with all `invoke`s in each set
2690/// being "mergeable" together, and then merge invokes in each set together.
2691///
2692/// This is a weird mix of hoisting and sinking. Visually, it goes from:
2693///          [...]        [...]
2694///            |            |
2695///        [invoke0]    [invoke1]
2696///           / \          / \
2697///     [cont0] [landingpad] [cont1]
2698/// to:
2699///      [...] [...]
2700///          \ /
2701///       [invoke]
2702///          / \
2703///     [cont] [landingpad]
2704///
2705/// But of course we can only do that if the invokes share the `landingpad`,
2706/// edges invoke0->cont0 and invoke1->cont1 are "compatible",
2707/// and the invoked functions are "compatible".
2708static bool MergeCompatibleInvokes(BasicBlock *BB, DomTreeUpdater *DTU) {
2709  if (!EnableMergeCompatibleInvokes)
2710    return false;
2711
2712  bool Changed = false;
2713
2714  // FIXME: generalize to all exception handling blocks?
2715  if (!BB->isLandingPad())
2716    return Changed;
2717
2718  CompatibleSets Grouper;
2719
2720  // Record all the predecessors of this `landingpad`. As per verifier,
2721  // the only allowed predecessor is the unwind edge of an `invoke`.
2722  // We want to group "compatible" `invokes` into the same set to be merged.
2723  for (BasicBlock *PredBB : predecessors(BB))
2724    Grouper.insert(cast<InvokeInst>(PredBB->getTerminator()));
2725
2726  // And now, merge `invoke`s that were grouped togeter.
2727  for (ArrayRef<InvokeInst *> Invokes : Grouper.Sets) {
2728    if (Invokes.size() < 2)
2729      continue;
2730    Changed = true;
2731    MergeCompatibleInvokesImpl(Invokes, DTU);
2732  }
2733
2734  return Changed;
2735}
2736
2737namespace {
2738/// Track ephemeral values, which should be ignored for cost-modelling
2739/// purposes. Requires walking instructions in reverse order.
2740class EphemeralValueTracker {
2741  SmallPtrSet<const Instruction *, 32> EphValues;
2742
2743  bool isEphemeral(const Instruction *I) {
2744    if (isa<AssumeInst>(I))
2745      return true;
2746    return !I->mayHaveSideEffects() && !I->isTerminator() &&
2747           all_of(I->users(), [&](const User *U) {
2748             return EphValues.count(cast<Instruction>(U));
2749           });
2750  }
2751
2752public:
2753  bool track(const Instruction *I) {
2754    if (isEphemeral(I)) {
2755      EphValues.insert(I);
2756      return true;
2757    }
2758    return false;
2759  }
2760
2761  bool contains(const Instruction *I) const { return EphValues.contains(I); }
2762};
2763} // namespace
2764
2765/// Determine if we can hoist sink a sole store instruction out of a
2766/// conditional block.
2767///
2768/// We are looking for code like the following:
2769///   BrBB:
2770///     store i32 %add, i32* %arrayidx2
2771///     ... // No other stores or function calls (we could be calling a memory
2772///     ... // function).
2773///     %cmp = icmp ult %x, %y
2774///     br i1 %cmp, label %EndBB, label %ThenBB
2775///   ThenBB:
2776///     store i32 %add5, i32* %arrayidx2
2777///     br label EndBB
2778///   EndBB:
2779///     ...
2780///   We are going to transform this into:
2781///   BrBB:
2782///     store i32 %add, i32* %arrayidx2
2783///     ... //
2784///     %cmp = icmp ult %x, %y
2785///     %add.add5 = select i1 %cmp, i32 %add, %add5
2786///     store i32 %add.add5, i32* %arrayidx2
2787///     ...
2788///
2789/// \return The pointer to the value of the previous store if the store can be
2790///         hoisted into the predecessor block. 0 otherwise.
2791static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
2792                                     BasicBlock *StoreBB, BasicBlock *EndBB) {
2793  StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
2794  if (!StoreToHoist)
2795    return nullptr;
2796
2797  // Volatile or atomic.
2798  if (!StoreToHoist->isSimple())
2799    return nullptr;
2800
2801  Value *StorePtr = StoreToHoist->getPointerOperand();
2802  Type *StoreTy = StoreToHoist->getValueOperand()->getType();
2803
2804  // Look for a store to the same pointer in BrBB.
2805  unsigned MaxNumInstToLookAt = 9;
2806  // Skip pseudo probe intrinsic calls which are not really killing any memory
2807  // accesses.
2808  for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug(true))) {
2809    if (!MaxNumInstToLookAt)
2810      break;
2811    --MaxNumInstToLookAt;
2812
2813    // Could be calling an instruction that affects memory like free().
2814    if (CurI.mayWriteToMemory() && !isa<StoreInst>(CurI))
2815      return nullptr;
2816
2817    if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
2818      // Found the previous store to same location and type. Make sure it is
2819      // simple, to avoid introducing a spurious non-atomic write after an
2820      // atomic write.
2821      if (SI->getPointerOperand() == StorePtr &&
2822          SI->getValueOperand()->getType() == StoreTy && SI->isSimple())
2823        // Found the previous store, return its value operand.
2824        return SI->getValueOperand();
2825      return nullptr; // Unknown store.
2826    }
2827
2828    if (auto *LI = dyn_cast<LoadInst>(&CurI)) {
2829      if (LI->getPointerOperand() == StorePtr && LI->getType() == StoreTy &&
2830          LI->isSimple()) {
2831        // Local objects (created by an `alloca` instruction) are always
2832        // writable, so once we are past a read from a location it is valid to
2833        // also write to that same location.
2834        // If the address of the local object never escapes the function, that
2835        // means it's never concurrently read or written, hence moving the store
2836        // from under the condition will not introduce a data race.
2837        auto *AI = dyn_cast<AllocaInst>(getUnderlyingObject(StorePtr));
2838        if (AI && !PointerMayBeCaptured(AI, false, true))
2839          // Found a previous load, return it.
2840          return LI;
2841      }
2842      // The load didn't work out, but we may still find a store.
2843    }
2844  }
2845
2846  return nullptr;
2847}
2848
2849/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
2850/// converted to selects.
2851static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB,
2852                                           BasicBlock *EndBB,
2853                                           unsigned &SpeculatedInstructions,
2854                                           InstructionCost &Cost,
2855                                           const TargetTransformInfo &TTI) {
2856  TargetTransformInfo::TargetCostKind CostKind =
2857    BB->getParent()->hasMinSize()
2858    ? TargetTransformInfo::TCK_CodeSize
2859    : TargetTransformInfo::TCK_SizeAndLatency;
2860
2861  bool HaveRewritablePHIs = false;
2862  for (PHINode &PN : EndBB->phis()) {
2863    Value *OrigV = PN.getIncomingValueForBlock(BB);
2864    Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
2865
2866    // FIXME: Try to remove some of the duplication with
2867    // hoistCommonCodeFromSuccessors. Skip PHIs which are trivial.
2868    if (ThenV == OrigV)
2869      continue;
2870
2871    Cost += TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(), nullptr,
2872                                   CmpInst::BAD_ICMP_PREDICATE, CostKind);
2873
2874    // Don't convert to selects if we could remove undefined behavior instead.
2875    if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
2876        passingValueIsAlwaysUndefined(ThenV, &PN))
2877      return false;
2878
2879    HaveRewritablePHIs = true;
2880    ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
2881    ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
2882    if (!OrigCE && !ThenCE)
2883      continue; // Known cheap (FIXME: Maybe not true for aggregates).
2884
2885    InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0;
2886    InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0;
2887    InstructionCost MaxCost =
2888        2 * PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
2889    if (OrigCost + ThenCost > MaxCost)
2890      return false;
2891
2892    // Account for the cost of an unfolded ConstantExpr which could end up
2893    // getting expanded into Instructions.
2894    // FIXME: This doesn't account for how many operations are combined in the
2895    // constant expression.
2896    ++SpeculatedInstructions;
2897    if (SpeculatedInstructions > 1)
2898      return false;
2899  }
2900
2901  return HaveRewritablePHIs;
2902}
2903
2904/// Speculate a conditional basic block flattening the CFG.
2905///
2906/// Note that this is a very risky transform currently. Speculating
2907/// instructions like this is most often not desirable. Instead, there is an MI
2908/// pass which can do it with full awareness of the resource constraints.
2909/// However, some cases are "obvious" and we should do directly. An example of
2910/// this is speculating a single, reasonably cheap instruction.
2911///
2912/// There is only one distinct advantage to flattening the CFG at the IR level:
2913/// it makes very common but simplistic optimizations such as are common in
2914/// instcombine and the DAG combiner more powerful by removing CFG edges and
2915/// modeling their effects with easier to reason about SSA value graphs.
2916///
2917///
2918/// An illustration of this transform is turning this IR:
2919/// \code
2920///   BB:
2921///     %cmp = icmp ult %x, %y
2922///     br i1 %cmp, label %EndBB, label %ThenBB
2923///   ThenBB:
2924///     %sub = sub %x, %y
2925///     br label BB2
2926///   EndBB:
2927///     %phi = phi [ %sub, %ThenBB ], [ 0, %EndBB ]
2928///     ...
2929/// \endcode
2930///
2931/// Into this IR:
2932/// \code
2933///   BB:
2934///     %cmp = icmp ult %x, %y
2935///     %sub = sub %x, %y
2936///     %cond = select i1 %cmp, 0, %sub
2937///     ...
2938/// \endcode
2939///
2940/// \returns true if the conditional block is removed.
2941bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI,
2942                                            BasicBlock *ThenBB) {
2943  if (!Options.SpeculateBlocks)
2944    return false;
2945
2946  // Be conservative for now. FP select instruction can often be expensive.
2947  Value *BrCond = BI->getCondition();
2948  if (isa<FCmpInst>(BrCond))
2949    return false;
2950
2951  BasicBlock *BB = BI->getParent();
2952  BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
2953  InstructionCost Budget =
2954      PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
2955
2956  // If ThenBB is actually on the false edge of the conditional branch, remember
2957  // to swap the select operands later.
2958  bool Invert = false;
2959  if (ThenBB != BI->getSuccessor(0)) {
2960    assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?");
2961    Invert = true;
2962  }
2963  assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block");
2964
2965  // If the branch is non-unpredictable, and is predicted to *not* branch to
2966  // the `then` block, then avoid speculating it.
2967  if (!BI->getMetadata(LLVMContext::MD_unpredictable)) {
2968    uint64_t TWeight, FWeight;
2969    if (extractBranchWeights(*BI, TWeight, FWeight) &&
2970        (TWeight + FWeight) != 0) {
2971      uint64_t EndWeight = Invert ? TWeight : FWeight;
2972      BranchProbability BIEndProb =
2973          BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight);
2974      BranchProbability Likely = TTI.getPredictableBranchThreshold();
2975      if (BIEndProb >= Likely)
2976        return false;
2977    }
2978  }
2979
2980  // Keep a count of how many times instructions are used within ThenBB when
2981  // they are candidates for sinking into ThenBB. Specifically:
2982  // - They are defined in BB, and
2983  // - They have no side effects, and
2984  // - All of their uses are in ThenBB.
2985  SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
2986
2987  SmallVector<Instruction *, 4> SpeculatedDbgIntrinsics;
2988
2989  unsigned SpeculatedInstructions = 0;
2990  Value *SpeculatedStoreValue = nullptr;
2991  StoreInst *SpeculatedStore = nullptr;
2992  EphemeralValueTracker EphTracker;
2993  for (Instruction &I : reverse(drop_end(*ThenBB))) {
2994    // Skip debug info.
2995    if (isa<DbgInfoIntrinsic>(I)) {
2996      SpeculatedDbgIntrinsics.push_back(&I);
2997      continue;
2998    }
2999
3000    // Skip pseudo probes. The consequence is we lose track of the branch
3001    // probability for ThenBB, which is fine since the optimization here takes
3002    // place regardless of the branch probability.
3003    if (isa<PseudoProbeInst>(I)) {
3004      // The probe should be deleted so that it will not be over-counted when
3005      // the samples collected on the non-conditional path are counted towards
3006      // the conditional path. We leave it for the counts inference algorithm to
3007      // figure out a proper count for an unknown probe.
3008      SpeculatedDbgIntrinsics.push_back(&I);
3009      continue;
3010    }
3011
3012    // Ignore ephemeral values, they will be dropped by the transform.
3013    if (EphTracker.track(&I))
3014      continue;
3015
3016    // Only speculatively execute a single instruction (not counting the
3017    // terminator) for now.
3018    ++SpeculatedInstructions;
3019    if (SpeculatedInstructions > 1)
3020      return false;
3021
3022    // Don't hoist the instruction if it's unsafe or expensive.
3023    if (!isSafeToSpeculativelyExecute(&I) &&
3024        !(HoistCondStores && (SpeculatedStoreValue = isSafeToSpeculateStore(
3025                                  &I, BB, ThenBB, EndBB))))
3026      return false;
3027    if (!SpeculatedStoreValue &&
3028        computeSpeculationCost(&I, TTI) >
3029            PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic)
3030      return false;
3031
3032    // Store the store speculation candidate.
3033    if (SpeculatedStoreValue)
3034      SpeculatedStore = cast<StoreInst>(&I);
3035
3036    // Do not hoist the instruction if any of its operands are defined but not
3037    // used in BB. The transformation will prevent the operand from
3038    // being sunk into the use block.
3039    for (Use &Op : I.operands()) {
3040      Instruction *OpI = dyn_cast<Instruction>(Op);
3041      if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
3042        continue; // Not a candidate for sinking.
3043
3044      ++SinkCandidateUseCounts[OpI];
3045    }
3046  }
3047
3048  // Consider any sink candidates which are only used in ThenBB as costs for
3049  // speculation. Note, while we iterate over a DenseMap here, we are summing
3050  // and so iteration order isn't significant.
3051  for (const auto &[Inst, Count] : SinkCandidateUseCounts)
3052    if (Inst->hasNUses(Count)) {
3053      ++SpeculatedInstructions;
3054      if (SpeculatedInstructions > 1)
3055        return false;
3056    }
3057
3058  // Check that we can insert the selects and that it's not too expensive to do
3059  // so.
3060  bool Convert = SpeculatedStore != nullptr;
3061  InstructionCost Cost = 0;
3062  Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
3063                                            SpeculatedInstructions,
3064                                            Cost, TTI);
3065  if (!Convert || Cost > Budget)
3066    return false;
3067
3068  // If we get here, we can hoist the instruction and if-convert.
3069  LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);
3070
3071  // Insert a select of the value of the speculated store.
3072  if (SpeculatedStoreValue) {
3073    IRBuilder<NoFolder> Builder(BI);
3074    Value *OrigV = SpeculatedStore->getValueOperand();
3075    Value *TrueV = SpeculatedStore->getValueOperand();
3076    Value *FalseV = SpeculatedStoreValue;
3077    if (Invert)
3078      std::swap(TrueV, FalseV);
3079    Value *S = Builder.CreateSelect(
3080        BrCond, TrueV, FalseV, "spec.store.select", BI);
3081    SpeculatedStore->setOperand(0, S);
3082    SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
3083                                         SpeculatedStore->getDebugLoc());
3084    // The value stored is still conditional, but the store itself is now
3085    // unconditonally executed, so we must be sure that any linked dbg.assign
3086    // intrinsics are tracking the new stored value (the result of the
3087    // select). If we don't, and the store were to be removed by another pass
3088    // (e.g. DSE), then we'd eventually end up emitting a location describing
3089    // the conditional value, unconditionally.
3090    //
3091    // === Before this transformation ===
3092    // pred:
3093    //   store %one, %x.dest, !DIAssignID !1
3094    //   dbg.assign %one, "x", ..., !1, ...
3095    //   br %cond if.then
3096    //
3097    // if.then:
3098    //   store %two, %x.dest, !DIAssignID !2
3099    //   dbg.assign %two, "x", ..., !2, ...
3100    //
3101    // === After this transformation ===
3102    // pred:
3103    //   store %one, %x.dest, !DIAssignID !1
3104    //   dbg.assign %one, "x", ..., !1
3105    ///  ...
3106    //   %merge = select %cond, %two, %one
3107    //   store %merge, %x.dest, !DIAssignID !2
3108    //   dbg.assign %merge, "x", ..., !2
3109    auto replaceVariable = [OrigV, S](auto *DbgAssign) {
3110      if (llvm::is_contained(DbgAssign->location_ops(), OrigV))
3111        DbgAssign->replaceVariableLocationOp(OrigV, S);
3112    };
3113    for_each(at::getAssignmentMarkers(SpeculatedStore), replaceVariable);
3114    for_each(at::getDPVAssignmentMarkers(SpeculatedStore), replaceVariable);
3115  }
3116
3117  // Metadata can be dependent on the condition we are hoisting above.
3118  // Strip all UB-implying metadata on the instruction. Drop the debug loc
3119  // to avoid making it appear as if the condition is a constant, which would
3120  // be misleading while debugging.
3121  // Similarly strip attributes that maybe dependent on condition we are
3122  // hoisting above.
3123  for (auto &I : make_early_inc_range(*ThenBB)) {
3124    if (!SpeculatedStoreValue || &I != SpeculatedStore) {
3125      // Don't update the DILocation of dbg.assign intrinsics.
3126      if (!isa<DbgAssignIntrinsic>(&I))
3127        I.setDebugLoc(DebugLoc());
3128    }
3129    I.dropUBImplyingAttrsAndMetadata();
3130
3131    // Drop ephemeral values.
3132    if (EphTracker.contains(&I)) {
3133      I.replaceAllUsesWith(PoisonValue::get(I.getType()));
3134      I.eraseFromParent();
3135    }
3136  }
3137
3138  // Hoist the instructions.
3139  // In "RemoveDIs" non-instr debug-info mode, drop DPValues attached to these
3140  // instructions, in the same way that dbg.value intrinsics are dropped at the
3141  // end of this block.
3142  for (auto &It : make_range(ThenBB->begin(), ThenBB->end()))
3143    for (DPValue &DPV : make_early_inc_range(It.getDbgValueRange()))
3144      if (!DPV.isDbgAssign())
3145        It.dropOneDbgValue(&DPV);
3146  BB->splice(BI->getIterator(), ThenBB, ThenBB->begin(),
3147             std::prev(ThenBB->end()));
3148
3149  // Insert selects and rewrite the PHI operands.
3150  IRBuilder<NoFolder> Builder(BI);
3151  for (PHINode &PN : EndBB->phis()) {
3152    unsigned OrigI = PN.getBasicBlockIndex(BB);
3153    unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
3154    Value *OrigV = PN.getIncomingValue(OrigI);
3155    Value *ThenV = PN.getIncomingValue(ThenI);
3156
3157    // Skip PHIs which are trivial.
3158    if (OrigV == ThenV)
3159      continue;
3160
3161    // Create a select whose true value is the speculatively executed value and
3162    // false value is the pre-existing value. Swap them if the branch
3163    // destinations were inverted.
3164    Value *TrueV = ThenV, *FalseV = OrigV;
3165    if (Invert)
3166      std::swap(TrueV, FalseV);
3167    Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
3168    PN.setIncomingValue(OrigI, V);
3169    PN.setIncomingValue(ThenI, V);
3170  }
3171
3172  // Remove speculated dbg intrinsics.
3173  // FIXME: Is it possible to do this in a more elegant way? Moving/merging the
3174  // dbg value for the different flows and inserting it after the select.
3175  for (Instruction *I : SpeculatedDbgIntrinsics) {
3176    // We still want to know that an assignment took place so don't remove
3177    // dbg.assign intrinsics.
3178    if (!isa<DbgAssignIntrinsic>(I))
3179      I->eraseFromParent();
3180  }
3181
3182  ++NumSpeculations;
3183  return true;
3184}
3185
3186/// Return true if we can thread a branch across this block.
3187static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
3188  int Size = 0;
3189  EphemeralValueTracker EphTracker;
3190
3191  // Walk the loop in reverse so that we can identify ephemeral values properly
3192  // (values only feeding assumes).
3193  for (Instruction &I : reverse(BB->instructionsWithoutDebug(false))) {
3194    // Can't fold blocks that contain noduplicate or convergent calls.
3195    if (CallInst *CI = dyn_cast<CallInst>(&I))
3196      if (CI->cannotDuplicate() || CI->isConvergent())
3197        return false;
3198
3199    // Ignore ephemeral values which are deleted during codegen.
3200    // We will delete Phis while threading, so Phis should not be accounted in
3201    // block's size.
3202    if (!EphTracker.track(&I) && !isa<PHINode>(I)) {
3203      if (Size++ > MaxSmallBlockSize)
3204        return false; // Don't clone large BB's.
3205    }
3206
3207    // We can only support instructions that do not define values that are
3208    // live outside of the current basic block.
3209    for (User *U : I.users()) {
3210      Instruction *UI = cast<Instruction>(U);
3211      if (UI->getParent() != BB || isa<PHINode>(UI))
3212        return false;
3213    }
3214
3215    // Looks ok, continue checking.
3216  }
3217
3218  return true;
3219}
3220
3221static ConstantInt *getKnownValueOnEdge(Value *V, BasicBlock *From,
3222                                        BasicBlock *To) {
3223  // Don't look past the block defining the value, we might get the value from
3224  // a previous loop iteration.
3225  auto *I = dyn_cast<Instruction>(V);
3226  if (I && I->getParent() == To)
3227    return nullptr;
3228
3229  // We know the value if the From block branches on it.
3230  auto *BI = dyn_cast<BranchInst>(From->getTerminator());
3231  if (BI && BI->isConditional() && BI->getCondition() == V &&
3232      BI->getSuccessor(0) != BI->getSuccessor(1))
3233    return BI->getSuccessor(0) == To ? ConstantInt::getTrue(BI->getContext())
3234                                     : ConstantInt::getFalse(BI->getContext());
3235
3236  return nullptr;
3237}
3238
3239/// If we have a conditional branch on something for which we know the constant
3240/// value in predecessors (e.g. a phi node in the current block), thread edges
3241/// from the predecessor to their ultimate destination.
3242static std::optional<bool>
3243FoldCondBranchOnValueKnownInPredecessorImpl(BranchInst *BI, DomTreeUpdater *DTU,
3244                                            const DataLayout &DL,
3245                                            AssumptionCache *AC) {
3246  SmallMapVector<ConstantInt *, SmallSetVector<BasicBlock *, 2>, 2> KnownValues;
3247  BasicBlock *BB = BI->getParent();
3248  Value *Cond = BI->getCondition();
3249  PHINode *PN = dyn_cast<PHINode>(Cond);
3250  if (PN && PN->getParent() == BB) {
3251    // Degenerate case of a single entry PHI.
3252    if (PN->getNumIncomingValues() == 1) {
3253      FoldSingleEntryPHINodes(PN->getParent());
3254      return true;
3255    }
3256
3257    for (Use &U : PN->incoming_values())
3258      if (auto *CB = dyn_cast<ConstantInt>(U))
3259        KnownValues[CB].insert(PN->getIncomingBlock(U));
3260  } else {
3261    for (BasicBlock *Pred : predecessors(BB)) {
3262      if (ConstantInt *CB = getKnownValueOnEdge(Cond, Pred, BB))
3263        KnownValues[CB].insert(Pred);
3264    }
3265  }
3266
3267  if (KnownValues.empty())
3268    return false;
3269
3270  // Now we know that this block has multiple preds and two succs.
3271  // Check that the block is small enough and values defined in the block are
3272  // not used outside of it.
3273  if (!BlockIsSimpleEnoughToThreadThrough(BB))
3274    return false;
3275
3276  for (const auto &Pair : KnownValues) {
3277    // Okay, we now know that all edges from PredBB should be revectored to
3278    // branch to RealDest.
3279    ConstantInt *CB = Pair.first;
3280    ArrayRef<BasicBlock *> PredBBs = Pair.second.getArrayRef();
3281    BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
3282
3283    if (RealDest == BB)
3284      continue; // Skip self loops.
3285
3286    // Skip if the predecessor's terminator is an indirect branch.
3287    if (any_of(PredBBs, [](BasicBlock *PredBB) {
3288          return isa<IndirectBrInst>(PredBB->getTerminator());
3289        }))
3290      continue;
3291
3292    LLVM_DEBUG({
3293      dbgs() << "Condition " << *Cond << " in " << BB->getName()
3294             << " has value " << *Pair.first << " in predecessors:\n";
3295      for (const BasicBlock *PredBB : Pair.second)
3296        dbgs() << "  " << PredBB->getName() << "\n";
3297      dbgs() << "Threading to destination " << RealDest->getName() << ".\n";
3298    });
3299
3300    // Split the predecessors we are threading into a new edge block. We'll
3301    // clone the instructions into this block, and then redirect it to RealDest.
3302    BasicBlock *EdgeBB = SplitBlockPredecessors(BB, PredBBs, ".critedge", DTU);
3303
3304    // TODO: These just exist to reduce test diff, we can drop them if we like.
3305    EdgeBB->setName(RealDest->getName() + ".critedge");
3306    EdgeBB->moveBefore(RealDest);
3307
3308    // Update PHI nodes.
3309    AddPredecessorToBlock(RealDest, EdgeBB, BB);
3310
3311    // BB may have instructions that are being threaded over.  Clone these
3312    // instructions into EdgeBB.  We know that there will be no uses of the
3313    // cloned instructions outside of EdgeBB.
3314    BasicBlock::iterator InsertPt = EdgeBB->getFirstInsertionPt();
3315    DenseMap<Value *, Value *> TranslateMap; // Track translated values.
3316    TranslateMap[Cond] = CB;
3317
3318    // RemoveDIs: track instructions that we optimise away while folding, so
3319    // that we can copy DPValues from them later.
3320    BasicBlock::iterator SrcDbgCursor = BB->begin();
3321    for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
3322      if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
3323        TranslateMap[PN] = PN->getIncomingValueForBlock(EdgeBB);
3324        continue;
3325      }
3326      // Clone the instruction.
3327      Instruction *N = BBI->clone();
3328      // Insert the new instruction into its new home.
3329      N->insertInto(EdgeBB, InsertPt);
3330
3331      if (BBI->hasName())
3332        N->setName(BBI->getName() + ".c");
3333
3334      // Update operands due to translation.
3335      for (Use &Op : N->operands()) {
3336        DenseMap<Value *, Value *>::iterator PI = TranslateMap.find(Op);
3337        if (PI != TranslateMap.end())
3338          Op = PI->second;
3339      }
3340
3341      // Check for trivial simplification.
3342      if (Value *V = simplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
3343        if (!BBI->use_empty())
3344          TranslateMap[&*BBI] = V;
3345        if (!N->mayHaveSideEffects()) {
3346          N->eraseFromParent(); // Instruction folded away, don't need actual
3347                                // inst
3348          N = nullptr;
3349        }
3350      } else {
3351        if (!BBI->use_empty())
3352          TranslateMap[&*BBI] = N;
3353      }
3354      if (N) {
3355        // Copy all debug-info attached to instructions from the last we
3356        // successfully clone, up to this instruction (they might have been
3357        // folded away).
3358        for (; SrcDbgCursor != BBI; ++SrcDbgCursor)
3359          N->cloneDebugInfoFrom(&*SrcDbgCursor);
3360        SrcDbgCursor = std::next(BBI);
3361        // Clone debug-info on this instruction too.
3362        N->cloneDebugInfoFrom(&*BBI);
3363
3364        // Register the new instruction with the assumption cache if necessary.
3365        if (auto *Assume = dyn_cast<AssumeInst>(N))
3366          if (AC)
3367            AC->registerAssumption(Assume);
3368      }
3369    }
3370
3371    for (; &*SrcDbgCursor != BI; ++SrcDbgCursor)
3372      InsertPt->cloneDebugInfoFrom(&*SrcDbgCursor);
3373    InsertPt->cloneDebugInfoFrom(BI);
3374
3375    BB->removePredecessor(EdgeBB);
3376    BranchInst *EdgeBI = cast<BranchInst>(EdgeBB->getTerminator());
3377    EdgeBI->setSuccessor(0, RealDest);
3378    EdgeBI->setDebugLoc(BI->getDebugLoc());
3379
3380    if (DTU) {
3381      SmallVector<DominatorTree::UpdateType, 2> Updates;
3382      Updates.push_back({DominatorTree::Delete, EdgeBB, BB});
3383      Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
3384      DTU->applyUpdates(Updates);
3385    }
3386
3387    // For simplicity, we created a separate basic block for the edge. Merge
3388    // it back into the predecessor if possible. This not only avoids
3389    // unnecessary SimplifyCFG iterations, but also makes sure that we don't
3390    // bypass the check for trivial cycles above.
3391    MergeBlockIntoPredecessor(EdgeBB, DTU);
3392
3393    // Signal repeat, simplifying any other constants.
3394    return std::nullopt;
3395  }
3396
3397  return false;
3398}
3399
3400static bool FoldCondBranchOnValueKnownInPredecessor(BranchInst *BI,
3401                                                    DomTreeUpdater *DTU,
3402                                                    const DataLayout &DL,
3403                                                    AssumptionCache *AC) {
3404  std::optional<bool> Result;
3405  bool EverChanged = false;
3406  do {
3407    // Note that None means "we changed things, but recurse further."
3408    Result = FoldCondBranchOnValueKnownInPredecessorImpl(BI, DTU, DL, AC);
3409    EverChanged |= Result == std::nullopt || *Result;
3410  } while (Result == std::nullopt);
3411  return EverChanged;
3412}
3413
3414/// Given a BB that starts with the specified two-entry PHI node,
3415/// see if we can eliminate it.
3416static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
3417                                DomTreeUpdater *DTU, const DataLayout &DL) {
3418  // Ok, this is a two entry PHI node.  Check to see if this is a simple "if
3419  // statement", which has a very simple dominance structure.  Basically, we
3420  // are trying to find the condition that is being branched on, which
3421  // subsequently causes this merge to happen.  We really want control
3422  // dependence information for this check, but simplifycfg can't keep it up
3423  // to date, and this catches most of the cases we care about anyway.
3424  BasicBlock *BB = PN->getParent();
3425
3426  BasicBlock *IfTrue, *IfFalse;
3427  BranchInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
3428  if (!DomBI)
3429    return false;
3430  Value *IfCond = DomBI->getCondition();
3431  // Don't bother if the branch will be constant folded trivially.
3432  if (isa<ConstantInt>(IfCond))
3433    return false;
3434
3435  BasicBlock *DomBlock = DomBI->getParent();
3436  SmallVector<BasicBlock *, 2> IfBlocks;
3437  llvm::copy_if(
3438      PN->blocks(), std::back_inserter(IfBlocks), [](BasicBlock *IfBlock) {
3439        return cast<BranchInst>(IfBlock->getTerminator())->isUnconditional();
3440      });
3441  assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&
3442         "Will have either one or two blocks to speculate.");
3443
3444  // If the branch is non-unpredictable, see if we either predictably jump to
3445  // the merge bb (if we have only a single 'then' block), or if we predictably
3446  // jump to one specific 'then' block (if we have two of them).
3447  // It isn't beneficial to speculatively execute the code
3448  // from the block that we know is predictably not entered.
3449  if (!DomBI->getMetadata(LLVMContext::MD_unpredictable)) {
3450    uint64_t TWeight, FWeight;
3451    if (extractBranchWeights(*DomBI, TWeight, FWeight) &&
3452        (TWeight + FWeight) != 0) {
3453      BranchProbability BITrueProb =
3454          BranchProbability::getBranchProbability(TWeight, TWeight + FWeight);
3455      BranchProbability Likely = TTI.getPredictableBranchThreshold();
3456      BranchProbability BIFalseProb = BITrueProb.getCompl();
3457      if (IfBlocks.size() == 1) {
3458        BranchProbability BIBBProb =
3459            DomBI->getSuccessor(0) == BB ? BITrueProb : BIFalseProb;
3460        if (BIBBProb >= Likely)
3461          return false;
3462      } else {
3463        if (BITrueProb >= Likely || BIFalseProb >= Likely)
3464          return false;
3465      }
3466    }
3467  }
3468
3469  // Don't try to fold an unreachable block. For example, the phi node itself
3470  // can't be the candidate if-condition for a select that we want to form.
3471  if (auto *IfCondPhiInst = dyn_cast<PHINode>(IfCond))
3472    if (IfCondPhiInst->getParent() == BB)
3473      return false;
3474
3475  // Okay, we found that we can merge this two-entry phi node into a select.
3476  // Doing so would require us to fold *all* two entry phi nodes in this block.
3477  // At some point this becomes non-profitable (particularly if the target
3478  // doesn't support cmov's).  Only do this transformation if there are two or
3479  // fewer PHI nodes in this block.
3480  unsigned NumPhis = 0;
3481  for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
3482    if (NumPhis > 2)
3483      return false;
3484
3485  // Loop over the PHI's seeing if we can promote them all to select
3486  // instructions.  While we are at it, keep track of the instructions
3487  // that need to be moved to the dominating block.
3488  SmallPtrSet<Instruction *, 4> AggressiveInsts;
3489  InstructionCost Cost = 0;
3490  InstructionCost Budget =
3491      TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
3492
3493  bool Changed = false;
3494  for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
3495    PHINode *PN = cast<PHINode>(II++);
3496    if (Value *V = simplifyInstruction(PN, {DL, PN})) {
3497      PN->replaceAllUsesWith(V);
3498      PN->eraseFromParent();
3499      Changed = true;
3500      continue;
3501    }
3502
3503    if (!dominatesMergePoint(PN->getIncomingValue(0), BB, AggressiveInsts,
3504                             Cost, Budget, TTI) ||
3505        !dominatesMergePoint(PN->getIncomingValue(1), BB, AggressiveInsts,
3506                             Cost, Budget, TTI))
3507      return Changed;
3508  }
3509
3510  // If we folded the first phi, PN dangles at this point.  Refresh it.  If
3511  // we ran out of PHIs then we simplified them all.
3512  PN = dyn_cast<PHINode>(BB->begin());
3513  if (!PN)
3514    return true;
3515
3516  // Return true if at least one of these is a 'not', and another is either
3517  // a 'not' too, or a constant.
3518  auto CanHoistNotFromBothValues = [](Value *V0, Value *V1) {
3519    if (!match(V0, m_Not(m_Value())))
3520      std::swap(V0, V1);
3521    auto Invertible = m_CombineOr(m_Not(m_Value()), m_AnyIntegralConstant());
3522    return match(V0, m_Not(m_Value())) && match(V1, Invertible);
3523  };
3524
3525  // Don't fold i1 branches on PHIs which contain binary operators or
3526  // (possibly inverted) select form of or/ands,  unless one of
3527  // the incoming values is an 'not' and another one is freely invertible.
3528  // These can often be turned into switches and other things.
3529  auto IsBinOpOrAnd = [](Value *V) {
3530    return match(
3531        V, m_CombineOr(
3532               m_BinOp(),
3533               m_CombineOr(m_Select(m_Value(), m_ImmConstant(), m_Value()),
3534                           m_Select(m_Value(), m_Value(), m_ImmConstant()))));
3535  };
3536  if (PN->getType()->isIntegerTy(1) &&
3537      (IsBinOpOrAnd(PN->getIncomingValue(0)) ||
3538       IsBinOpOrAnd(PN->getIncomingValue(1)) || IsBinOpOrAnd(IfCond)) &&
3539      !CanHoistNotFromBothValues(PN->getIncomingValue(0),
3540                                 PN->getIncomingValue(1)))
3541    return Changed;
3542
3543  // If all PHI nodes are promotable, check to make sure that all instructions
3544  // in the predecessor blocks can be promoted as well. If not, we won't be able
3545  // to get rid of the control flow, so it's not worth promoting to select
3546  // instructions.
3547  for (BasicBlock *IfBlock : IfBlocks)
3548    for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
3549      if (!AggressiveInsts.count(&*I) && !I->isDebugOrPseudoInst()) {
3550        // This is not an aggressive instruction that we can promote.
3551        // Because of this, we won't be able to get rid of the control flow, so
3552        // the xform is not worth it.
3553        return Changed;
3554      }
3555
3556  // If either of the blocks has it's address taken, we can't do this fold.
3557  if (any_of(IfBlocks,
3558             [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
3559    return Changed;
3560
3561  LLVM_DEBUG(dbgs() << "FOUND IF CONDITION!  " << *IfCond
3562                    << "  T: " << IfTrue->getName()
3563                    << "  F: " << IfFalse->getName() << "\n");
3564
3565  // If we can still promote the PHI nodes after this gauntlet of tests,
3566  // do all of the PHI's now.
3567
3568  // Move all 'aggressive' instructions, which are defined in the
3569  // conditional parts of the if's up to the dominating block.
3570  for (BasicBlock *IfBlock : IfBlocks)
3571      hoistAllInstructionsInto(DomBlock, DomBI, IfBlock);
3572
3573  IRBuilder<NoFolder> Builder(DomBI);
3574  // Propagate fast-math-flags from phi nodes to replacement selects.
3575  IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
3576  while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
3577    if (isa<FPMathOperator>(PN))
3578      Builder.setFastMathFlags(PN->getFastMathFlags());
3579
3580    // Change the PHI node into a select instruction.
3581    Value *TrueVal = PN->getIncomingValueForBlock(IfTrue);
3582    Value *FalseVal = PN->getIncomingValueForBlock(IfFalse);
3583
3584    Value *Sel = Builder.CreateSelect(IfCond, TrueVal, FalseVal, "", DomBI);
3585    PN->replaceAllUsesWith(Sel);
3586    Sel->takeName(PN);
3587    PN->eraseFromParent();
3588  }
3589
3590  // At this point, all IfBlocks are empty, so our if statement
3591  // has been flattened.  Change DomBlock to jump directly to our new block to
3592  // avoid other simplifycfg's kicking in on the diamond.
3593  Builder.CreateBr(BB);
3594
3595  SmallVector<DominatorTree::UpdateType, 3> Updates;
3596  if (DTU) {
3597    Updates.push_back({DominatorTree::Insert, DomBlock, BB});
3598    for (auto *Successor : successors(DomBlock))
3599      Updates.push_back({DominatorTree::Delete, DomBlock, Successor});
3600  }
3601
3602  DomBI->eraseFromParent();
3603  if (DTU)
3604    DTU->applyUpdates(Updates);
3605
3606  return true;
3607}
3608
3609static Value *createLogicalOp(IRBuilderBase &Builder,
3610                              Instruction::BinaryOps Opc, Value *LHS,
3611                              Value *RHS, const Twine &Name = "") {
3612  // Try to relax logical op to binary op.
3613  if (impliesPoison(RHS, LHS))
3614    return Builder.CreateBinOp(Opc, LHS, RHS, Name);
3615  if (Opc == Instruction::And)
3616    return Builder.CreateLogicalAnd(LHS, RHS, Name);
3617  if (Opc == Instruction::Or)
3618    return Builder.CreateLogicalOr(LHS, RHS, Name);
3619  llvm_unreachable("Invalid logical opcode");
3620}
3621
3622/// Return true if either PBI or BI has branch weight available, and store
3623/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
3624/// not have branch weight, use 1:1 as its weight.
3625static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI,
3626                                   uint64_t &PredTrueWeight,
3627                                   uint64_t &PredFalseWeight,
3628                                   uint64_t &SuccTrueWeight,
3629                                   uint64_t &SuccFalseWeight) {
3630  bool PredHasWeights =
3631      extractBranchWeights(*PBI, PredTrueWeight, PredFalseWeight);
3632  bool SuccHasWeights =
3633      extractBranchWeights(*BI, SuccTrueWeight, SuccFalseWeight);
3634  if (PredHasWeights || SuccHasWeights) {
3635    if (!PredHasWeights)
3636      PredTrueWeight = PredFalseWeight = 1;
3637    if (!SuccHasWeights)
3638      SuccTrueWeight = SuccFalseWeight = 1;
3639    return true;
3640  } else {
3641    return false;
3642  }
3643}
3644
3645/// Determine if the two branches share a common destination and deduce a glue
3646/// that joins the branches' conditions to arrive at the common destination if
3647/// that would be profitable.
3648static std::optional<std::tuple<BasicBlock *, Instruction::BinaryOps, bool>>
3649shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI,
3650                                          const TargetTransformInfo *TTI) {
3651  assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
3652         "Both blocks must end with a conditional branches.");
3653  assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) &&
3654         "PredBB must be a predecessor of BB.");
3655
3656  // We have the potential to fold the conditions together, but if the
3657  // predecessor branch is predictable, we may not want to merge them.
3658  uint64_t PTWeight, PFWeight;
3659  BranchProbability PBITrueProb, Likely;
3660  if (TTI && !PBI->getMetadata(LLVMContext::MD_unpredictable) &&
3661      extractBranchWeights(*PBI, PTWeight, PFWeight) &&
3662      (PTWeight + PFWeight) != 0) {
3663    PBITrueProb =
3664        BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
3665    Likely = TTI->getPredictableBranchThreshold();
3666  }
3667
3668  if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
3669    // Speculate the 2nd condition unless the 1st is probably true.
3670    if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3671      return {{BI->getSuccessor(0), Instruction::Or, false}};
3672  } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
3673    // Speculate the 2nd condition unless the 1st is probably false.
3674    if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3675      return {{BI->getSuccessor(1), Instruction::And, false}};
3676  } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
3677    // Speculate the 2nd condition unless the 1st is probably true.
3678    if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
3679      return {{BI->getSuccessor(1), Instruction::And, true}};
3680  } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
3681    // Speculate the 2nd condition unless the 1st is probably false.
3682    if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
3683      return {{BI->getSuccessor(0), Instruction::Or, true}};
3684  }
3685  return std::nullopt;
3686}
3687
3688static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
3689                                             DomTreeUpdater *DTU,
3690                                             MemorySSAUpdater *MSSAU,
3691                                             const TargetTransformInfo *TTI) {
3692  BasicBlock *BB = BI->getParent();
3693  BasicBlock *PredBlock = PBI->getParent();
3694
3695  // Determine if the two branches share a common destination.
3696  BasicBlock *CommonSucc;
3697  Instruction::BinaryOps Opc;
3698  bool InvertPredCond;
3699  std::tie(CommonSucc, Opc, InvertPredCond) =
3700      *shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI);
3701
3702  LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
3703
3704  IRBuilder<> Builder(PBI);
3705  // The builder is used to create instructions to eliminate the branch in BB.
3706  // If BB's terminator has !annotation metadata, add it to the new
3707  // instructions.
3708  Builder.CollectMetadataToCopy(BB->getTerminator(),
3709                                {LLVMContext::MD_annotation});
3710
3711  // If we need to invert the condition in the pred block to match, do so now.
3712  if (InvertPredCond) {
3713    InvertBranch(PBI, Builder);
3714  }
3715
3716  BasicBlock *UniqueSucc =
3717      PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1);
3718
3719  // Before cloning instructions, notify the successor basic block that it
3720  // is about to have a new predecessor. This will update PHI nodes,
3721  // which will allow us to update live-out uses of bonus instructions.
3722  AddPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU);
3723
3724  // Try to update branch weights.
3725  uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
3726  if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
3727                             SuccTrueWeight, SuccFalseWeight)) {
3728    SmallVector<uint64_t, 8> NewWeights;
3729
3730    if (PBI->getSuccessor(0) == BB) {
3731      // PBI: br i1 %x, BB, FalseDest
3732      // BI:  br i1 %y, UniqueSucc, FalseDest
3733      // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
3734      NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
3735      // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
3736      //               TrueWeight for PBI * FalseWeight for BI.
3737      // We assume that total weights of a BranchInst can fit into 32 bits.
3738      // Therefore, we will not have overflow using 64-bit arithmetic.
3739      NewWeights.push_back(PredFalseWeight *
3740                               (SuccFalseWeight + SuccTrueWeight) +
3741                           PredTrueWeight * SuccFalseWeight);
3742    } else {
3743      // PBI: br i1 %x, TrueDest, BB
3744      // BI:  br i1 %y, TrueDest, UniqueSucc
3745      // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
3746      //              FalseWeight for PBI * TrueWeight for BI.
3747      NewWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
3748                           PredFalseWeight * SuccTrueWeight);
3749      // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
3750      NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
3751    }
3752
3753    // Halve the weights if any of them cannot fit in an uint32_t
3754    FitWeights(NewWeights);
3755
3756    SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(), NewWeights.end());
3757    setBranchWeights(PBI, MDWeights[0], MDWeights[1]);
3758
3759    // TODO: If BB is reachable from all paths through PredBlock, then we
3760    // could replace PBI's branch probabilities with BI's.
3761  } else
3762    PBI->setMetadata(LLVMContext::MD_prof, nullptr);
3763
3764  // Now, update the CFG.
3765  PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc);
3766
3767  if (DTU)
3768    DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc},
3769                       {DominatorTree::Delete, PredBlock, BB}});
3770
3771  // If BI was a loop latch, it may have had associated loop metadata.
3772  // We need to copy it to the new latch, that is, PBI.
3773  if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
3774    PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
3775
3776  ValueToValueMapTy VMap; // maps original values to cloned values
3777  CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BB, PredBlock, VMap);
3778
3779  Module *M = BB->getModule();
3780
3781  if (PredBlock->IsNewDbgInfoFormat) {
3782    PredBlock->getTerminator()->cloneDebugInfoFrom(BB->getTerminator());
3783    for (DPValue &DPV : PredBlock->getTerminator()->getDbgValueRange()) {
3784      RemapDPValue(M, &DPV, VMap,
3785                   RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
3786    }
3787  }
3788
3789  // Now that the Cond was cloned into the predecessor basic block,
3790  // or/and the two conditions together.
3791  Value *BICond = VMap[BI->getCondition()];
3792  PBI->setCondition(
3793      createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond"));
3794
3795  ++NumFoldBranchToCommonDest;
3796  return true;
3797}
3798
3799/// Return if an instruction's type or any of its operands' types are a vector
3800/// type.
3801static bool isVectorOp(Instruction &I) {
3802  return I.getType()->isVectorTy() || any_of(I.operands(), [](Use &U) {
3803           return U->getType()->isVectorTy();
3804         });
3805}
3806
3807/// If this basic block is simple enough, and if a predecessor branches to us
3808/// and one of our successors, fold the block into the predecessor and use
3809/// logical operations to pick the right destination.
3810bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
3811                                  MemorySSAUpdater *MSSAU,
3812                                  const TargetTransformInfo *TTI,
3813                                  unsigned BonusInstThreshold) {
3814  // If this block ends with an unconditional branch,
3815  // let SpeculativelyExecuteBB() deal with it.
3816  if (!BI->isConditional())
3817    return false;
3818
3819  BasicBlock *BB = BI->getParent();
3820  TargetTransformInfo::TargetCostKind CostKind =
3821    BB->getParent()->hasMinSize() ? TargetTransformInfo::TCK_CodeSize
3822                                  : TargetTransformInfo::TCK_SizeAndLatency;
3823
3824  Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
3825
3826  if (!Cond ||
3827      (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond) &&
3828       !isa<SelectInst>(Cond)) ||
3829      Cond->getParent() != BB || !Cond->hasOneUse())
3830    return false;
3831
3832  // Finally, don't infinitely unroll conditional loops.
3833  if (is_contained(successors(BB), BB))
3834    return false;
3835
3836  // With which predecessors will we want to deal with?
3837  SmallVector<BasicBlock *, 8> Preds;
3838  for (BasicBlock *PredBlock : predecessors(BB)) {
3839    BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
3840
3841    // Check that we have two conditional branches.  If there is a PHI node in
3842    // the common successor, verify that the same value flows in from both
3843    // blocks.
3844    if (!PBI || PBI->isUnconditional() || !SafeToMergeTerminators(BI, PBI))
3845      continue;
3846
3847    // Determine if the two branches share a common destination.
3848    BasicBlock *CommonSucc;
3849    Instruction::BinaryOps Opc;
3850    bool InvertPredCond;
3851    if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
3852      std::tie(CommonSucc, Opc, InvertPredCond) = *Recipe;
3853    else
3854      continue;
3855
3856    // Check the cost of inserting the necessary logic before performing the
3857    // transformation.
3858    if (TTI) {
3859      Type *Ty = BI->getCondition()->getType();
3860      InstructionCost Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind);
3861      if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
3862                             !isa<CmpInst>(PBI->getCondition())))
3863        Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
3864
3865      if (Cost > BranchFoldThreshold)
3866        continue;
3867    }
3868
3869    // Ok, we do want to deal with this predecessor. Record it.
3870    Preds.emplace_back(PredBlock);
3871  }
3872
3873  // If there aren't any predecessors into which we can fold,
3874  // don't bother checking the cost.
3875  if (Preds.empty())
3876    return false;
3877
3878  // Only allow this transformation if computing the condition doesn't involve
3879  // too many instructions and these involved instructions can be executed
3880  // unconditionally. We denote all involved instructions except the condition
3881  // as "bonus instructions", and only allow this transformation when the
3882  // number of the bonus instructions we'll need to create when cloning into
3883  // each predecessor does not exceed a certain threshold.
3884  unsigned NumBonusInsts = 0;
3885  bool SawVectorOp = false;
3886  const unsigned PredCount = Preds.size();
3887  for (Instruction &I : *BB) {
3888    // Don't check the branch condition comparison itself.
3889    if (&I == Cond)
3890      continue;
3891    // Ignore dbg intrinsics, and the terminator.
3892    if (isa<DbgInfoIntrinsic>(I) || isa<BranchInst>(I))
3893      continue;
3894    // I must be safe to execute unconditionally.
3895    if (!isSafeToSpeculativelyExecute(&I))
3896      return false;
3897    SawVectorOp |= isVectorOp(I);
3898
3899    // Account for the cost of duplicating this instruction into each
3900    // predecessor. Ignore free instructions.
3901    if (!TTI || TTI->getInstructionCost(&I, CostKind) !=
3902                    TargetTransformInfo::TCC_Free) {
3903      NumBonusInsts += PredCount;
3904
3905      // Early exits once we reach the limit.
3906      if (NumBonusInsts >
3907          BonusInstThreshold * BranchFoldToCommonDestVectorMultiplier)
3908        return false;
3909    }
3910
3911    auto IsBCSSAUse = [BB, &I](Use &U) {
3912      auto *UI = cast<Instruction>(U.getUser());
3913      if (auto *PN = dyn_cast<PHINode>(UI))
3914        return PN->getIncomingBlock(U) == BB;
3915      return UI->getParent() == BB && I.comesBefore(UI);
3916    };
3917
3918    // Does this instruction require rewriting of uses?
3919    if (!all_of(I.uses(), IsBCSSAUse))
3920      return false;
3921  }
3922  if (NumBonusInsts >
3923      BonusInstThreshold *
3924          (SawVectorOp ? BranchFoldToCommonDestVectorMultiplier : 1))
3925    return false;
3926
3927  // Ok, we have the budget. Perform the transformation.
3928  for (BasicBlock *PredBlock : Preds) {
3929    auto *PBI = cast<BranchInst>(PredBlock->getTerminator());
3930    return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
3931  }
3932  return false;
3933}
3934
3935// If there is only one store in BB1 and BB2, return it, otherwise return
3936// nullptr.
3937static StoreInst *findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2) {
3938  StoreInst *S = nullptr;
3939  for (auto *BB : {BB1, BB2}) {
3940    if (!BB)
3941      continue;
3942    for (auto &I : *BB)
3943      if (auto *SI = dyn_cast<StoreInst>(&I)) {
3944        if (S)
3945          // Multiple stores seen.
3946          return nullptr;
3947        else
3948          S = SI;
3949      }
3950  }
3951  return S;
3952}
3953
3954static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB,
3955                                              Value *AlternativeV = nullptr) {
3956  // PHI is going to be a PHI node that allows the value V that is defined in
3957  // BB to be referenced in BB's only successor.
3958  //
3959  // If AlternativeV is nullptr, the only value we care about in PHI is V. It
3960  // doesn't matter to us what the other operand is (it'll never get used). We
3961  // could just create a new PHI with an undef incoming value, but that could
3962  // increase register pressure if EarlyCSE/InstCombine can't fold it with some
3963  // other PHI. So here we directly look for some PHI in BB's successor with V
3964  // as an incoming operand. If we find one, we use it, else we create a new
3965  // one.
3966  //
3967  // If AlternativeV is not nullptr, we care about both incoming values in PHI.
3968  // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
3969  // where OtherBB is the single other predecessor of BB's only successor.
3970  PHINode *PHI = nullptr;
3971  BasicBlock *Succ = BB->getSingleSuccessor();
3972
3973  for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
3974    if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
3975      PHI = cast<PHINode>(I);
3976      if (!AlternativeV)
3977        break;
3978
3979      assert(Succ->hasNPredecessors(2));
3980      auto PredI = pred_begin(Succ);
3981      BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
3982      if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
3983        break;
3984      PHI = nullptr;
3985    }
3986  if (PHI)
3987    return PHI;
3988
3989  // If V is not an instruction defined in BB, just return it.
3990  if (!AlternativeV &&
3991      (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
3992    return V;
3993
3994  PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge");
3995  PHI->insertBefore(Succ->begin());
3996  PHI->addIncoming(V, BB);
3997  for (BasicBlock *PredBB : predecessors(Succ))
3998    if (PredBB != BB)
3999      PHI->addIncoming(
4000          AlternativeV ? AlternativeV : PoisonValue::get(V->getType()), PredBB);
4001  return PHI;
4002}
4003
4004static bool mergeConditionalStoreToAddress(
4005    BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
4006    BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
4007    DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
4008  // For every pointer, there must be exactly two stores, one coming from
4009  // PTB or PFB, and the other from QTB or QFB. We don't support more than one
4010  // store (to any address) in PTB,PFB or QTB,QFB.
4011  // FIXME: We could relax this restriction with a bit more work and performance
4012  // testing.
4013  StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
4014  StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
4015  if (!PStore || !QStore)
4016    return false;
4017
4018  // Now check the stores are compatible.
4019  if (!QStore->isUnordered() || !PStore->isUnordered() ||
4020      PStore->getValueOperand()->getType() !=
4021          QStore->getValueOperand()->getType())
4022    return false;
4023
4024  // Check that sinking the store won't cause program behavior changes. Sinking
4025  // the store out of the Q blocks won't change any behavior as we're sinking
4026  // from a block to its unconditional successor. But we're moving a store from
4027  // the P blocks down through the middle block (QBI) and past both QFB and QTB.
4028  // So we need to check that there are no aliasing loads or stores in
4029  // QBI, QTB and QFB. We also need to check there are no conflicting memory
4030  // operations between PStore and the end of its parent block.
4031  //
4032  // The ideal way to do this is to query AliasAnalysis, but we don't
4033  // preserve AA currently so that is dangerous. Be super safe and just
4034  // check there are no other memory operations at all.
4035  for (auto &I : *QFB->getSinglePredecessor())
4036    if (I.mayReadOrWriteMemory())
4037      return false;
4038  for (auto &I : *QFB)
4039    if (&I != QStore && I.mayReadOrWriteMemory())
4040      return false;
4041  if (QTB)
4042    for (auto &I : *QTB)
4043      if (&I != QStore && I.mayReadOrWriteMemory())
4044        return false;
4045  for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
4046       I != E; ++I)
4047    if (&*I != PStore && I->mayReadOrWriteMemory())
4048      return false;
4049
4050  // If we're not in aggressive mode, we only optimize if we have some
4051  // confidence that by optimizing we'll allow P and/or Q to be if-converted.
4052  auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
4053    if (!BB)
4054      return true;
4055    // Heuristic: if the block can be if-converted/phi-folded and the
4056    // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
4057    // thread this store.
4058    InstructionCost Cost = 0;
4059    InstructionCost Budget =
4060        PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
4061    for (auto &I : BB->instructionsWithoutDebug(false)) {
4062      // Consider terminator instruction to be free.
4063      if (I.isTerminator())
4064        continue;
4065      // If this is one the stores that we want to speculate out of this BB,
4066      // then don't count it's cost, consider it to be free.
4067      if (auto *S = dyn_cast<StoreInst>(&I))
4068        if (llvm::find(FreeStores, S))
4069          continue;
4070      // Else, we have a white-list of instructions that we are ak speculating.
4071      if (!isa<BinaryOperator>(I) && !isa<GetElementPtrInst>(I))
4072        return false; // Not in white-list - not worthwhile folding.
4073      // And finally, if this is a non-free instruction that we are okay
4074      // speculating, ensure that we consider the speculation budget.
4075      Cost +=
4076          TTI.getInstructionCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
4077      if (Cost > Budget)
4078        return false; // Eagerly refuse to fold as soon as we're out of budget.
4079    }
4080    assert(Cost <= Budget &&
4081           "When we run out of budget we will eagerly return from within the "
4082           "per-instruction loop.");
4083    return true;
4084  };
4085
4086  const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
4087  if (!MergeCondStoresAggressively &&
4088      (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
4089       !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
4090    return false;
4091
4092  // If PostBB has more than two predecessors, we need to split it so we can
4093  // sink the store.
4094  if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
4095    // We know that QFB's only successor is PostBB. And QFB has a single
4096    // predecessor. If QTB exists, then its only successor is also PostBB.
4097    // If QTB does not exist, then QFB's only predecessor has a conditional
4098    // branch to QFB and PostBB.
4099    BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
4100    BasicBlock *NewBB =
4101        SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU);
4102    if (!NewBB)
4103      return false;
4104    PostBB = NewBB;
4105  }
4106
4107  // OK, we're going to sink the stores to PostBB. The store has to be
4108  // conditional though, so first create the predicate.
4109  Value *PCond = cast<BranchInst>(PFB->getSinglePredecessor()->getTerminator())
4110                     ->getCondition();
4111  Value *QCond = cast<BranchInst>(QFB->getSinglePredecessor()->getTerminator())
4112                     ->getCondition();
4113
4114  Value *PPHI = ensureValueAvailableInSuccessor(PStore->getValueOperand(),
4115                                                PStore->getParent());
4116  Value *QPHI = ensureValueAvailableInSuccessor(QStore->getValueOperand(),
4117                                                QStore->getParent(), PPHI);
4118
4119  BasicBlock::iterator PostBBFirst = PostBB->getFirstInsertionPt();
4120  IRBuilder<> QB(PostBB, PostBBFirst);
4121  QB.SetCurrentDebugLocation(PostBBFirst->getStableDebugLoc());
4122
4123  Value *PPred = PStore->getParent() == PTB ? PCond : QB.CreateNot(PCond);
4124  Value *QPred = QStore->getParent() == QTB ? QCond : QB.CreateNot(QCond);
4125
4126  if (InvertPCond)
4127    PPred = QB.CreateNot(PPred);
4128  if (InvertQCond)
4129    QPred = QB.CreateNot(QPred);
4130  Value *CombinedPred = QB.CreateOr(PPred, QPred);
4131
4132  BasicBlock::iterator InsertPt = QB.GetInsertPoint();
4133  auto *T = SplitBlockAndInsertIfThen(CombinedPred, InsertPt,
4134                                      /*Unreachable=*/false,
4135                                      /*BranchWeights=*/nullptr, DTU);
4136
4137  QB.SetInsertPoint(T);
4138  StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
4139  SI->setAAMetadata(PStore->getAAMetadata().merge(QStore->getAAMetadata()));
4140  // Choose the minimum alignment. If we could prove both stores execute, we
4141  // could use biggest one.  In this case, though, we only know that one of the
4142  // stores executes.  And we don't know it's safe to take the alignment from a
4143  // store that doesn't execute.
4144  SI->setAlignment(std::min(PStore->getAlign(), QStore->getAlign()));
4145
4146  QStore->eraseFromParent();
4147  PStore->eraseFromParent();
4148
4149  return true;
4150}
4151
4152static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI,
4153                                   DomTreeUpdater *DTU, const DataLayout &DL,
4154                                   const TargetTransformInfo &TTI) {
4155  // The intention here is to find diamonds or triangles (see below) where each
4156  // conditional block contains a store to the same address. Both of these
4157  // stores are conditional, so they can't be unconditionally sunk. But it may
4158  // be profitable to speculatively sink the stores into one merged store at the
4159  // end, and predicate the merged store on the union of the two conditions of
4160  // PBI and QBI.
4161  //
4162  // This can reduce the number of stores executed if both of the conditions are
4163  // true, and can allow the blocks to become small enough to be if-converted.
4164  // This optimization will also chain, so that ladders of test-and-set
4165  // sequences can be if-converted away.
4166  //
4167  // We only deal with simple diamonds or triangles:
4168  //
4169  //     PBI       or      PBI        or a combination of the two
4170  //    /   \               | \
4171  //   PTB  PFB             |  PFB
4172  //    \   /               | /
4173  //     QBI                QBI
4174  //    /  \                | \
4175  //   QTB  QFB             |  QFB
4176  //    \  /                | /
4177  //    PostBB            PostBB
4178  //
4179  // We model triangles as a type of diamond with a nullptr "true" block.
4180  // Triangles are canonicalized so that the fallthrough edge is represented by
4181  // a true condition, as in the diagram above.
4182  BasicBlock *PTB = PBI->getSuccessor(0);
4183  BasicBlock *PFB = PBI->getSuccessor(1);
4184  BasicBlock *QTB = QBI->getSuccessor(0);
4185  BasicBlock *QFB = QBI->getSuccessor(1);
4186  BasicBlock *PostBB = QFB->getSingleSuccessor();
4187
4188  // Make sure we have a good guess for PostBB. If QTB's only successor is
4189  // QFB, then QFB is a better PostBB.
4190  if (QTB->getSingleSuccessor() == QFB)
4191    PostBB = QFB;
4192
4193  // If we couldn't find a good PostBB, stop.
4194  if (!PostBB)
4195    return false;
4196
4197  bool InvertPCond = false, InvertQCond = false;
4198  // Canonicalize fallthroughs to the true branches.
4199  if (PFB == QBI->getParent()) {
4200    std::swap(PFB, PTB);
4201    InvertPCond = true;
4202  }
4203  if (QFB == PostBB) {
4204    std::swap(QFB, QTB);
4205    InvertQCond = true;
4206  }
4207
4208  // From this point on we can assume PTB or QTB may be fallthroughs but PFB
4209  // and QFB may not. Model fallthroughs as a nullptr block.
4210  if (PTB == QBI->getParent())
4211    PTB = nullptr;
4212  if (QTB == PostBB)
4213    QTB = nullptr;
4214
4215  // Legality bailouts. We must have at least the non-fallthrough blocks and
4216  // the post-dominating block, and the non-fallthroughs must only have one
4217  // predecessor.
4218  auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
4219    return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
4220  };
4221  if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
4222      !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
4223    return false;
4224  if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
4225      (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
4226    return false;
4227  if (!QBI->getParent()->hasNUses(2))
4228    return false;
4229
4230  // OK, this is a sequence of two diamonds or triangles.
4231  // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
4232  SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
4233  for (auto *BB : {PTB, PFB}) {
4234    if (!BB)
4235      continue;
4236    for (auto &I : *BB)
4237      if (StoreInst *SI = dyn_cast<StoreInst>(&I))
4238        PStoreAddresses.insert(SI->getPointerOperand());
4239  }
4240  for (auto *BB : {QTB, QFB}) {
4241    if (!BB)
4242      continue;
4243    for (auto &I : *BB)
4244      if (StoreInst *SI = dyn_cast<StoreInst>(&I))
4245        QStoreAddresses.insert(SI->getPointerOperand());
4246  }
4247
4248  set_intersect(PStoreAddresses, QStoreAddresses);
4249  // set_intersect mutates PStoreAddresses in place. Rename it here to make it
4250  // clear what it contains.
4251  auto &CommonAddresses = PStoreAddresses;
4252
4253  bool Changed = false;
4254  for (auto *Address : CommonAddresses)
4255    Changed |=
4256        mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
4257                                       InvertPCond, InvertQCond, DTU, DL, TTI);
4258  return Changed;
4259}
4260
4261/// If the previous block ended with a widenable branch, determine if reusing
4262/// the target block is profitable and legal.  This will have the effect of
4263/// "widening" PBI, but doesn't require us to reason about hosting safety.
4264static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
4265                                           DomTreeUpdater *DTU) {
4266  // TODO: This can be generalized in two important ways:
4267  // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
4268  //    values from the PBI edge.
4269  // 2) We can sink side effecting instructions into BI's fallthrough
4270  //    successor provided they doesn't contribute to computation of
4271  //    BI's condition.
4272  BasicBlock *IfTrueBB = PBI->getSuccessor(0);
4273  BasicBlock *IfFalseBB = PBI->getSuccessor(1);
4274  if (!isWidenableBranch(PBI) || IfTrueBB != BI->getParent() ||
4275      !BI->getParent()->getSinglePredecessor())
4276    return false;
4277  if (!IfFalseBB->phis().empty())
4278    return false; // TODO
4279  // This helps avoid infinite loop with SimplifyCondBranchToCondBranch which
4280  // may undo the transform done here.
4281  // TODO: There might be a more fine-grained solution to this.
4282  if (!llvm::succ_empty(IfFalseBB))
4283    return false;
4284  // Use lambda to lazily compute expensive condition after cheap ones.
4285  auto NoSideEffects = [](BasicBlock &BB) {
4286    return llvm::none_of(BB, [](const Instruction &I) {
4287        return I.mayWriteToMemory() || I.mayHaveSideEffects();
4288      });
4289  };
4290  if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
4291      BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
4292      NoSideEffects(*BI->getParent())) {
4293    auto *OldSuccessor = BI->getSuccessor(1);
4294    OldSuccessor->removePredecessor(BI->getParent());
4295    BI->setSuccessor(1, IfFalseBB);
4296    if (DTU)
4297      DTU->applyUpdates(
4298          {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4299           {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4300    return true;
4301  }
4302  if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
4303      BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
4304      NoSideEffects(*BI->getParent())) {
4305    auto *OldSuccessor = BI->getSuccessor(0);
4306    OldSuccessor->removePredecessor(BI->getParent());
4307    BI->setSuccessor(0, IfFalseBB);
4308    if (DTU)
4309      DTU->applyUpdates(
4310          {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
4311           {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
4312    return true;
4313  }
4314  return false;
4315}
4316
4317/// If we have a conditional branch as a predecessor of another block,
4318/// this function tries to simplify it.  We know
4319/// that PBI and BI are both conditional branches, and BI is in one of the
4320/// successor blocks of PBI - PBI branches to BI.
4321static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
4322                                           DomTreeUpdater *DTU,
4323                                           const DataLayout &DL,
4324                                           const TargetTransformInfo &TTI) {
4325  assert(PBI->isConditional() && BI->isConditional());
4326  BasicBlock *BB = BI->getParent();
4327
4328  // If this block ends with a branch instruction, and if there is a
4329  // predecessor that ends on a branch of the same condition, make
4330  // this conditional branch redundant.
4331  if (PBI->getCondition() == BI->getCondition() &&
4332      PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
4333    // Okay, the outcome of this conditional branch is statically
4334    // knowable.  If this block had a single pred, handle specially, otherwise
4335    // FoldCondBranchOnValueKnownInPredecessor() will handle it.
4336    if (BB->getSinglePredecessor()) {
4337      // Turn this into a branch on constant.
4338      bool CondIsTrue = PBI->getSuccessor(0) == BB;
4339      BI->setCondition(
4340          ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
4341      return true; // Nuke the branch on constant.
4342    }
4343  }
4344
4345  // If the previous block ended with a widenable branch, determine if reusing
4346  // the target block is profitable and legal.  This will have the effect of
4347  // "widening" PBI, but doesn't require us to reason about hosting safety.
4348  if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
4349    return true;
4350
4351  // If both branches are conditional and both contain stores to the same
4352  // address, remove the stores from the conditionals and create a conditional
4353  // merged store at the end.
4354  if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI))
4355    return true;
4356
4357  // If this is a conditional branch in an empty block, and if any
4358  // predecessors are a conditional branch to one of our destinations,
4359  // fold the conditions into logical ops and one cond br.
4360
4361  // Ignore dbg intrinsics.
4362  if (&*BB->instructionsWithoutDebug(false).begin() != BI)
4363    return false;
4364
4365  int PBIOp, BIOp;
4366  if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
4367    PBIOp = 0;
4368    BIOp = 0;
4369  } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
4370    PBIOp = 0;
4371    BIOp = 1;
4372  } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
4373    PBIOp = 1;
4374    BIOp = 0;
4375  } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
4376    PBIOp = 1;
4377    BIOp = 1;
4378  } else {
4379    return false;
4380  }
4381
4382  // Check to make sure that the other destination of this branch
4383  // isn't BB itself.  If so, this is an infinite loop that will
4384  // keep getting unwound.
4385  if (PBI->getSuccessor(PBIOp) == BB)
4386    return false;
4387
4388  // If predecessor's branch probability to BB is too low don't merge branches.
4389  SmallVector<uint32_t, 2> PredWeights;
4390  if (!PBI->getMetadata(LLVMContext::MD_unpredictable) &&
4391      extractBranchWeights(*PBI, PredWeights) &&
4392      (static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]) != 0) {
4393
4394    BranchProbability CommonDestProb = BranchProbability::getBranchProbability(
4395        PredWeights[PBIOp],
4396        static_cast<uint64_t>(PredWeights[0]) + PredWeights[1]);
4397
4398    BranchProbability Likely = TTI.getPredictableBranchThreshold();
4399    if (CommonDestProb >= Likely)
4400      return false;
4401  }
4402
4403  // Do not perform this transformation if it would require
4404  // insertion of a large number of select instructions. For targets
4405  // without predication/cmovs, this is a big pessimization.
4406
4407  BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
4408  BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1);
4409  unsigned NumPhis = 0;
4410  for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
4411       ++II, ++NumPhis) {
4412    if (NumPhis > 2) // Disable this xform.
4413      return false;
4414  }
4415
4416  // Finally, if everything is ok, fold the branches to logical ops.
4417  BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
4418
4419  LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()
4420                    << "AND: " << *BI->getParent());
4421
4422  SmallVector<DominatorTree::UpdateType, 5> Updates;
4423
4424  // If OtherDest *is* BB, then BB is a basic block with a single conditional
4425  // branch in it, where one edge (OtherDest) goes back to itself but the other
4426  // exits.  We don't *know* that the program avoids the infinite loop
4427  // (even though that seems likely).  If we do this xform naively, we'll end up
4428  // recursively unpeeling the loop.  Since we know that (after the xform is
4429  // done) that the block *is* infinite if reached, we just make it an obviously
4430  // infinite loop with no cond branch.
4431  if (OtherDest == BB) {
4432    // Insert it at the end of the function, because it's either code,
4433    // or it won't matter if it's hot. :)
4434    BasicBlock *InfLoopBlock =
4435        BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
4436    BranchInst::Create(InfLoopBlock, InfLoopBlock);
4437    if (DTU)
4438      Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
4439    OtherDest = InfLoopBlock;
4440  }
4441
4442  LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4443
4444  // BI may have other predecessors.  Because of this, we leave
4445  // it alone, but modify PBI.
4446
4447  // Make sure we get to CommonDest on True&True directions.
4448  Value *PBICond = PBI->getCondition();
4449  IRBuilder<NoFolder> Builder(PBI);
4450  if (PBIOp)
4451    PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not");
4452
4453  Value *BICond = BI->getCondition();
4454  if (BIOp)
4455    BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
4456
4457  // Merge the conditions.
4458  Value *Cond =
4459      createLogicalOp(Builder, Instruction::Or, PBICond, BICond, "brmerge");
4460
4461  // Modify PBI to branch on the new condition to the new dests.
4462  PBI->setCondition(Cond);
4463  PBI->setSuccessor(0, CommonDest);
4464  PBI->setSuccessor(1, OtherDest);
4465
4466  if (DTU) {
4467    Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest});
4468    Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest});
4469
4470    DTU->applyUpdates(Updates);
4471  }
4472
4473  // Update branch weight for PBI.
4474  uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
4475  uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
4476  bool HasWeights =
4477      extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
4478                             SuccTrueWeight, SuccFalseWeight);
4479  if (HasWeights) {
4480    PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4481    PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4482    SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4483    SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4484    // The weight to CommonDest should be PredCommon * SuccTotal +
4485    //                                    PredOther * SuccCommon.
4486    // The weight to OtherDest should be PredOther * SuccOther.
4487    uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
4488                                  PredOther * SuccCommon,
4489                              PredOther * SuccOther};
4490    // Halve the weights if any of them cannot fit in an uint32_t
4491    FitWeights(NewWeights);
4492
4493    setBranchWeights(PBI, NewWeights[0], NewWeights[1]);
4494  }
4495
4496  // OtherDest may have phi nodes.  If so, add an entry from PBI's
4497  // block that are identical to the entries for BI's block.
4498  AddPredecessorToBlock(OtherDest, PBI->getParent(), BB);
4499
4500  // We know that the CommonDest already had an edge from PBI to
4501  // it.  If it has PHIs though, the PHIs may have different
4502  // entries for BB and PBI's BB.  If so, insert a select to make
4503  // them agree.
4504  for (PHINode &PN : CommonDest->phis()) {
4505    Value *BIV = PN.getIncomingValueForBlock(BB);
4506    unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent());
4507    Value *PBIV = PN.getIncomingValue(PBBIdx);
4508    if (BIV != PBIV) {
4509      // Insert a select in PBI to pick the right value.
4510      SelectInst *NV = cast<SelectInst>(
4511          Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
4512      PN.setIncomingValue(PBBIdx, NV);
4513      // Although the select has the same condition as PBI, the original branch
4514      // weights for PBI do not apply to the new select because the select's
4515      // 'logical' edges are incoming edges of the phi that is eliminated, not
4516      // the outgoing edges of PBI.
4517      if (HasWeights) {
4518        uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
4519        uint64_t PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
4520        uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
4521        uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
4522        // The weight to PredCommonDest should be PredCommon * SuccTotal.
4523        // The weight to PredOtherDest should be PredOther * SuccCommon.
4524        uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther),
4525                                  PredOther * SuccCommon};
4526
4527        FitWeights(NewWeights);
4528
4529        setBranchWeights(NV, NewWeights[0], NewWeights[1]);
4530      }
4531    }
4532  }
4533
4534  LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent());
4535  LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());
4536
4537  // This basic block is probably dead.  We know it has at least
4538  // one fewer predecessor.
4539  return true;
4540}
4541
4542// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
4543// true or to FalseBB if Cond is false.
4544// Takes care of updating the successors and removing the old terminator.
4545// Also makes sure not to introduce new successors by assuming that edges to
4546// non-successor TrueBBs and FalseBBs aren't reachable.
4547bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm,
4548                                                Value *Cond, BasicBlock *TrueBB,
4549                                                BasicBlock *FalseBB,
4550                                                uint32_t TrueWeight,
4551                                                uint32_t FalseWeight) {
4552  auto *BB = OldTerm->getParent();
4553  // Remove any superfluous successor edges from the CFG.
4554  // First, figure out which successors to preserve.
4555  // If TrueBB and FalseBB are equal, only try to preserve one copy of that
4556  // successor.
4557  BasicBlock *KeepEdge1 = TrueBB;
4558  BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
4559
4560  SmallSetVector<BasicBlock *, 2> RemovedSuccessors;
4561
4562  // Then remove the rest.
4563  for (BasicBlock *Succ : successors(OldTerm)) {
4564    // Make sure only to keep exactly one copy of each edge.
4565    if (Succ == KeepEdge1)
4566      KeepEdge1 = nullptr;
4567    else if (Succ == KeepEdge2)
4568      KeepEdge2 = nullptr;
4569    else {
4570      Succ->removePredecessor(BB,
4571                              /*KeepOneInputPHIs=*/true);
4572
4573      if (Succ != TrueBB && Succ != FalseBB)
4574        RemovedSuccessors.insert(Succ);
4575    }
4576  }
4577
4578  IRBuilder<> Builder(OldTerm);
4579  Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
4580
4581  // Insert an appropriate new terminator.
4582  if (!KeepEdge1 && !KeepEdge2) {
4583    if (TrueBB == FalseBB) {
4584      // We were only looking for one successor, and it was present.
4585      // Create an unconditional branch to it.
4586      Builder.CreateBr(TrueBB);
4587    } else {
4588      // We found both of the successors we were looking for.
4589      // Create a conditional branch sharing the condition of the select.
4590      BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
4591      if (TrueWeight != FalseWeight)
4592        setBranchWeights(NewBI, TrueWeight, FalseWeight);
4593    }
4594  } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
4595    // Neither of the selected blocks were successors, so this
4596    // terminator must be unreachable.
4597    new UnreachableInst(OldTerm->getContext(), OldTerm);
4598  } else {
4599    // One of the selected values was a successor, but the other wasn't.
4600    // Insert an unconditional branch to the one that was found;
4601    // the edge to the one that wasn't must be unreachable.
4602    if (!KeepEdge1) {
4603      // Only TrueBB was found.
4604      Builder.CreateBr(TrueBB);
4605    } else {
4606      // Only FalseBB was found.
4607      Builder.CreateBr(FalseBB);
4608    }
4609  }
4610
4611  EraseTerminatorAndDCECond(OldTerm);
4612
4613  if (DTU) {
4614    SmallVector<DominatorTree::UpdateType, 2> Updates;
4615    Updates.reserve(RemovedSuccessors.size());
4616    for (auto *RemovedSuccessor : RemovedSuccessors)
4617      Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
4618    DTU->applyUpdates(Updates);
4619  }
4620
4621  return true;
4622}
4623
4624// Replaces
4625//   (switch (select cond, X, Y)) on constant X, Y
4626// with a branch - conditional if X and Y lead to distinct BBs,
4627// unconditional otherwise.
4628bool SimplifyCFGOpt::SimplifySwitchOnSelect(SwitchInst *SI,
4629                                            SelectInst *Select) {
4630  // Check for constant integer values in the select.
4631  ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
4632  ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
4633  if (!TrueVal || !FalseVal)
4634    return false;
4635
4636  // Find the relevant condition and destinations.
4637  Value *Condition = Select->getCondition();
4638  BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
4639  BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
4640
4641  // Get weight for TrueBB and FalseBB.
4642  uint32_t TrueWeight = 0, FalseWeight = 0;
4643  SmallVector<uint64_t, 8> Weights;
4644  bool HasWeights = hasBranchWeightMD(*SI);
4645  if (HasWeights) {
4646    GetBranchWeights(SI, Weights);
4647    if (Weights.size() == 1 + SI->getNumCases()) {
4648      TrueWeight =
4649          (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
4650      FalseWeight =
4651          (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
4652    }
4653  }
4654
4655  // Perform the actual simplification.
4656  return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
4657                                    FalseWeight);
4658}
4659
4660// Replaces
4661//   (indirectbr (select cond, blockaddress(@fn, BlockA),
4662//                             blockaddress(@fn, BlockB)))
4663// with
4664//   (br cond, BlockA, BlockB).
4665bool SimplifyCFGOpt::SimplifyIndirectBrOnSelect(IndirectBrInst *IBI,
4666                                                SelectInst *SI) {
4667  // Check that both operands of the select are block addresses.
4668  BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
4669  BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
4670  if (!TBA || !FBA)
4671    return false;
4672
4673  // Extract the actual blocks.
4674  BasicBlock *TrueBB = TBA->getBasicBlock();
4675  BasicBlock *FalseBB = FBA->getBasicBlock();
4676
4677  // Perform the actual simplification.
4678  return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, 0,
4679                                    0);
4680}
4681
4682/// This is called when we find an icmp instruction
4683/// (a seteq/setne with a constant) as the only instruction in a
4684/// block that ends with an uncond branch.  We are looking for a very specific
4685/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified.  In
4686/// this case, we merge the first two "or's of icmp" into a switch, but then the
4687/// default value goes to an uncond block with a seteq in it, we get something
4688/// like:
4689///
4690///   switch i8 %A, label %DEFAULT [ i8 1, label %end    i8 2, label %end ]
4691/// DEFAULT:
4692///   %tmp = icmp eq i8 %A, 92
4693///   br label %end
4694/// end:
4695///   ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
4696///
4697/// We prefer to split the edge to 'end' so that there is a true/false entry to
4698/// the PHI, merging the third icmp into the switch.
4699bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
4700    ICmpInst *ICI, IRBuilder<> &Builder) {
4701  BasicBlock *BB = ICI->getParent();
4702
4703  // If the block has any PHIs in it or the icmp has multiple uses, it is too
4704  // complex.
4705  if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse())
4706    return false;
4707
4708  Value *V = ICI->getOperand(0);
4709  ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
4710
4711  // The pattern we're looking for is where our only predecessor is a switch on
4712  // 'V' and this block is the default case for the switch.  In this case we can
4713  // fold the compared value into the switch to simplify things.
4714  BasicBlock *Pred = BB->getSinglePredecessor();
4715  if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
4716    return false;
4717
4718  SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
4719  if (SI->getCondition() != V)
4720    return false;
4721
4722  // If BB is reachable on a non-default case, then we simply know the value of
4723  // V in this block.  Substitute it and constant fold the icmp instruction
4724  // away.
4725  if (SI->getDefaultDest() != BB) {
4726    ConstantInt *VVal = SI->findCaseDest(BB);
4727    assert(VVal && "Should have a unique destination value");
4728    ICI->setOperand(0, VVal);
4729
4730    if (Value *V = simplifyInstruction(ICI, {DL, ICI})) {
4731      ICI->replaceAllUsesWith(V);
4732      ICI->eraseFromParent();
4733    }
4734    // BB is now empty, so it is likely to simplify away.
4735    return requestResimplify();
4736  }
4737
4738  // Ok, the block is reachable from the default dest.  If the constant we're
4739  // comparing exists in one of the other edges, then we can constant fold ICI
4740  // and zap it.
4741  if (SI->findCaseValue(Cst) != SI->case_default()) {
4742    Value *V;
4743    if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
4744      V = ConstantInt::getFalse(BB->getContext());
4745    else
4746      V = ConstantInt::getTrue(BB->getContext());
4747
4748    ICI->replaceAllUsesWith(V);
4749    ICI->eraseFromParent();
4750    // BB is now empty, so it is likely to simplify away.
4751    return requestResimplify();
4752  }
4753
4754  // The use of the icmp has to be in the 'end' block, by the only PHI node in
4755  // the block.
4756  BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
4757  PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back());
4758  if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
4759      isa<PHINode>(++BasicBlock::iterator(PHIUse)))
4760    return false;
4761
4762  // If the icmp is a SETEQ, then the default dest gets false, the new edge gets
4763  // true in the PHI.
4764  Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
4765  Constant *NewCst = ConstantInt::getFalse(BB->getContext());
4766
4767  if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
4768    std::swap(DefaultCst, NewCst);
4769
4770  // Replace ICI (which is used by the PHI for the default value) with true or
4771  // false depending on if it is EQ or NE.
4772  ICI->replaceAllUsesWith(DefaultCst);
4773  ICI->eraseFromParent();
4774
4775  SmallVector<DominatorTree::UpdateType, 2> Updates;
4776
4777  // Okay, the switch goes to this block on a default value.  Add an edge from
4778  // the switch to the merge point on the compared value.
4779  BasicBlock *NewBB =
4780      BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
4781  {
4782    SwitchInstProfUpdateWrapper SIW(*SI);
4783    auto W0 = SIW.getSuccessorWeight(0);
4784    SwitchInstProfUpdateWrapper::CaseWeightOpt NewW;
4785    if (W0) {
4786      NewW = ((uint64_t(*W0) + 1) >> 1);
4787      SIW.setSuccessorWeight(0, *NewW);
4788    }
4789    SIW.addCase(Cst, NewBB, NewW);
4790    if (DTU)
4791      Updates.push_back({DominatorTree::Insert, Pred, NewBB});
4792  }
4793
4794  // NewBB branches to the phi block, add the uncond branch and the phi entry.
4795  Builder.SetInsertPoint(NewBB);
4796  Builder.SetCurrentDebugLocation(SI->getDebugLoc());
4797  Builder.CreateBr(SuccBlock);
4798  PHIUse->addIncoming(NewCst, NewBB);
4799  if (DTU) {
4800    Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock});
4801    DTU->applyUpdates(Updates);
4802  }
4803  return true;
4804}
4805
4806/// The specified branch is a conditional branch.
4807/// Check to see if it is branching on an or/and chain of icmp instructions, and
4808/// fold it into a switch instruction if so.
4809bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI,
4810                                               IRBuilder<> &Builder,
4811                                               const DataLayout &DL) {
4812  Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
4813  if (!Cond)
4814    return false;
4815
4816  // Change br (X == 0 | X == 1), T, F into a switch instruction.
4817  // If this is a bunch of seteq's or'd together, or if it's a bunch of
4818  // 'setne's and'ed together, collect them.
4819
4820  // Try to gather values from a chain of and/or to be turned into a switch
4821  ConstantComparesGatherer ConstantCompare(Cond, DL);
4822  // Unpack the result
4823  SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
4824  Value *CompVal = ConstantCompare.CompValue;
4825  unsigned UsedICmps = ConstantCompare.UsedICmps;
4826  Value *ExtraCase = ConstantCompare.Extra;
4827
4828  // If we didn't have a multiply compared value, fail.
4829  if (!CompVal)
4830    return false;
4831
4832  // Avoid turning single icmps into a switch.
4833  if (UsedICmps <= 1)
4834    return false;
4835
4836  bool TrueWhenEqual = match(Cond, m_LogicalOr(m_Value(), m_Value()));
4837
4838  // There might be duplicate constants in the list, which the switch
4839  // instruction can't handle, remove them now.
4840  array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate);
4841  Values.erase(std::unique(Values.begin(), Values.end()), Values.end());
4842
4843  // If Extra was used, we require at least two switch values to do the
4844  // transformation.  A switch with one value is just a conditional branch.
4845  if (ExtraCase && Values.size() < 2)
4846    return false;
4847
4848  // TODO: Preserve branch weight metadata, similarly to how
4849  // FoldValueComparisonIntoPredecessors preserves it.
4850
4851  // Figure out which block is which destination.
4852  BasicBlock *DefaultBB = BI->getSuccessor(1);
4853  BasicBlock *EdgeBB = BI->getSuccessor(0);
4854  if (!TrueWhenEqual)
4855    std::swap(DefaultBB, EdgeBB);
4856
4857  BasicBlock *BB = BI->getParent();
4858
4859  LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()
4860                    << " cases into SWITCH.  BB is:\n"
4861                    << *BB);
4862
4863  SmallVector<DominatorTree::UpdateType, 2> Updates;
4864
4865  // If there are any extra values that couldn't be folded into the switch
4866  // then we evaluate them with an explicit branch first. Split the block
4867  // right before the condbr to handle it.
4868  if (ExtraCase) {
4869    BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr,
4870                                   /*MSSAU=*/nullptr, "switch.early.test");
4871
4872    // Remove the uncond branch added to the old block.
4873    Instruction *OldTI = BB->getTerminator();
4874    Builder.SetInsertPoint(OldTI);
4875
4876    // There can be an unintended UB if extra values are Poison. Before the
4877    // transformation, extra values may not be evaluated according to the
4878    // condition, and it will not raise UB. But after transformation, we are
4879    // evaluating extra values before checking the condition, and it will raise
4880    // UB. It can be solved by adding freeze instruction to extra values.
4881    AssumptionCache *AC = Options.AC;
4882
4883    if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr))
4884      ExtraCase = Builder.CreateFreeze(ExtraCase);
4885
4886    if (TrueWhenEqual)
4887      Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
4888    else
4889      Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
4890
4891    OldTI->eraseFromParent();
4892
4893    if (DTU)
4894      Updates.push_back({DominatorTree::Insert, BB, EdgeBB});
4895
4896    // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
4897    // for the edge we just added.
4898    AddPredecessorToBlock(EdgeBB, BB, NewBB);
4899
4900    LLVM_DEBUG(dbgs() << "  ** 'icmp' chain unhandled condition: " << *ExtraCase
4901                      << "\nEXTRABB = " << *BB);
4902    BB = NewBB;
4903  }
4904
4905  Builder.SetInsertPoint(BI);
4906  // Convert pointer to int before we switch.
4907  if (CompVal->getType()->isPointerTy()) {
4908    CompVal = Builder.CreatePtrToInt(
4909        CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
4910  }
4911
4912  // Create the new switch instruction now.
4913  SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
4914
4915  // Add all of the 'cases' to the switch instruction.
4916  for (unsigned i = 0, e = Values.size(); i != e; ++i)
4917    New->addCase(Values[i], EdgeBB);
4918
4919  // We added edges from PI to the EdgeBB.  As such, if there were any
4920  // PHI nodes in EdgeBB, they need entries to be added corresponding to
4921  // the number of edges added.
4922  for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
4923    PHINode *PN = cast<PHINode>(BBI);
4924    Value *InVal = PN->getIncomingValueForBlock(BB);
4925    for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
4926      PN->addIncoming(InVal, BB);
4927  }
4928
4929  // Erase the old branch instruction.
4930  EraseTerminatorAndDCECond(BI);
4931  if (DTU)
4932    DTU->applyUpdates(Updates);
4933
4934  LLVM_DEBUG(dbgs() << "  ** 'icmp' chain result is:\n" << *BB << '\n');
4935  return true;
4936}
4937
4938bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
4939  if (isa<PHINode>(RI->getValue()))
4940    return simplifyCommonResume(RI);
4941  else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHI()) &&
4942           RI->getValue() == RI->getParent()->getFirstNonPHI())
4943    // The resume must unwind the exception that caused control to branch here.
4944    return simplifySingleResume(RI);
4945
4946  return false;
4947}
4948
4949// Check if cleanup block is empty
4950static bool isCleanupBlockEmpty(iterator_range<BasicBlock::iterator> R) {
4951  for (Instruction &I : R) {
4952    auto *II = dyn_cast<IntrinsicInst>(&I);
4953    if (!II)
4954      return false;
4955
4956    Intrinsic::ID IntrinsicID = II->getIntrinsicID();
4957    switch (IntrinsicID) {
4958    case Intrinsic::dbg_declare:
4959    case Intrinsic::dbg_value:
4960    case Intrinsic::dbg_label:
4961    case Intrinsic::lifetime_end:
4962      break;
4963    default:
4964      return false;
4965    }
4966  }
4967  return true;
4968}
4969
4970// Simplify resume that is shared by several landing pads (phi of landing pad).
4971bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
4972  BasicBlock *BB = RI->getParent();
4973
4974  // Check that there are no other instructions except for debug and lifetime
4975  // intrinsics between the phi's and resume instruction.
4976  if (!isCleanupBlockEmpty(
4977          make_range(RI->getParent()->getFirstNonPHI(), BB->getTerminator())))
4978    return false;
4979
4980  SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
4981  auto *PhiLPInst = cast<PHINode>(RI->getValue());
4982
4983  // Check incoming blocks to see if any of them are trivial.
4984  for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
4985       Idx++) {
4986    auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
4987    auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
4988
4989    // If the block has other successors, we can not delete it because
4990    // it has other dependents.
4991    if (IncomingBB->getUniqueSuccessor() != BB)
4992      continue;
4993
4994    auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHI());
4995    // Not the landing pad that caused the control to branch here.
4996    if (IncomingValue != LandingPad)
4997      continue;
4998
4999    if (isCleanupBlockEmpty(
5000            make_range(LandingPad->getNextNode(), IncomingBB->getTerminator())))
5001      TrivialUnwindBlocks.insert(IncomingBB);
5002  }
5003
5004  // If no trivial unwind blocks, don't do any simplifications.
5005  if (TrivialUnwindBlocks.empty())
5006    return false;
5007
5008  // Turn all invokes that unwind here into calls.
5009  for (auto *TrivialBB : TrivialUnwindBlocks) {
5010    // Blocks that will be simplified should be removed from the phi node.
5011    // Note there could be multiple edges to the resume block, and we need
5012    // to remove them all.
5013    while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
5014      BB->removePredecessor(TrivialBB, true);
5015
5016    for (BasicBlock *Pred :
5017         llvm::make_early_inc_range(predecessors(TrivialBB))) {
5018      removeUnwindEdge(Pred, DTU);
5019      ++NumInvokes;
5020    }
5021
5022    // In each SimplifyCFG run, only the current processed block can be erased.
5023    // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
5024    // of erasing TrivialBB, we only remove the branch to the common resume
5025    // block so that we can later erase the resume block since it has no
5026    // predecessors.
5027    TrivialBB->getTerminator()->eraseFromParent();
5028    new UnreachableInst(RI->getContext(), TrivialBB);
5029    if (DTU)
5030      DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}});
5031  }
5032
5033  // Delete the resume block if all its predecessors have been removed.
5034  if (pred_empty(BB))
5035    DeleteDeadBlock(BB, DTU);
5036
5037  return !TrivialUnwindBlocks.empty();
5038}
5039
5040// Simplify resume that is only used by a single (non-phi) landing pad.
5041bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
5042  BasicBlock *BB = RI->getParent();
5043  auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHI());
5044  assert(RI->getValue() == LPInst &&
5045         "Resume must unwind the exception that caused control to here");
5046
5047  // Check that there are no other instructions except for debug intrinsics.
5048  if (!isCleanupBlockEmpty(
5049          make_range<Instruction *>(LPInst->getNextNode(), RI)))
5050    return false;
5051
5052  // Turn all invokes that unwind here into calls and delete the basic block.
5053  for (BasicBlock *Pred : llvm::make_early_inc_range(predecessors(BB))) {
5054    removeUnwindEdge(Pred, DTU);
5055    ++NumInvokes;
5056  }
5057
5058  // The landingpad is now unreachable.  Zap it.
5059  DeleteDeadBlock(BB, DTU);
5060  return true;
5061}
5062
5063static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU) {
5064  // If this is a trivial cleanup pad that executes no instructions, it can be
5065  // eliminated.  If the cleanup pad continues to the caller, any predecessor
5066  // that is an EH pad will be updated to continue to the caller and any
5067  // predecessor that terminates with an invoke instruction will have its invoke
5068  // instruction converted to a call instruction.  If the cleanup pad being
5069  // simplified does not continue to the caller, each predecessor will be
5070  // updated to continue to the unwind destination of the cleanup pad being
5071  // simplified.
5072  BasicBlock *BB = RI->getParent();
5073  CleanupPadInst *CPInst = RI->getCleanupPad();
5074  if (CPInst->getParent() != BB)
5075    // This isn't an empty cleanup.
5076    return false;
5077
5078  // We cannot kill the pad if it has multiple uses.  This typically arises
5079  // from unreachable basic blocks.
5080  if (!CPInst->hasOneUse())
5081    return false;
5082
5083  // Check that there are no other instructions except for benign intrinsics.
5084  if (!isCleanupBlockEmpty(
5085          make_range<Instruction *>(CPInst->getNextNode(), RI)))
5086    return false;
5087
5088  // If the cleanup return we are simplifying unwinds to the caller, this will
5089  // set UnwindDest to nullptr.
5090  BasicBlock *UnwindDest = RI->getUnwindDest();
5091  Instruction *DestEHPad = UnwindDest ? UnwindDest->getFirstNonPHI() : nullptr;
5092
5093  // We're about to remove BB from the control flow.  Before we do, sink any
5094  // PHINodes into the unwind destination.  Doing this before changing the
5095  // control flow avoids some potentially slow checks, since we can currently
5096  // be certain that UnwindDest and BB have no common predecessors (since they
5097  // are both EH pads).
5098  if (UnwindDest) {
5099    // First, go through the PHI nodes in UnwindDest and update any nodes that
5100    // reference the block we are removing
5101    for (PHINode &DestPN : UnwindDest->phis()) {
5102      int Idx = DestPN.getBasicBlockIndex(BB);
5103      // Since BB unwinds to UnwindDest, it has to be in the PHI node.
5104      assert(Idx != -1);
5105      // This PHI node has an incoming value that corresponds to a control
5106      // path through the cleanup pad we are removing.  If the incoming
5107      // value is in the cleanup pad, it must be a PHINode (because we
5108      // verified above that the block is otherwise empty).  Otherwise, the
5109      // value is either a constant or a value that dominates the cleanup
5110      // pad being removed.
5111      //
5112      // Because BB and UnwindDest are both EH pads, all of their
5113      // predecessors must unwind to these blocks, and since no instruction
5114      // can have multiple unwind destinations, there will be no overlap in
5115      // incoming blocks between SrcPN and DestPN.
5116      Value *SrcVal = DestPN.getIncomingValue(Idx);
5117      PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
5118
5119      bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
5120      for (auto *Pred : predecessors(BB)) {
5121        Value *Incoming =
5122            NeedPHITranslation ? SrcPN->getIncomingValueForBlock(Pred) : SrcVal;
5123        DestPN.addIncoming(Incoming, Pred);
5124      }
5125    }
5126
5127    // Sink any remaining PHI nodes directly into UnwindDest.
5128    Instruction *InsertPt = DestEHPad;
5129    for (PHINode &PN : make_early_inc_range(BB->phis())) {
5130      if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
5131        // If the PHI node has no uses or all of its uses are in this basic
5132        // block (meaning they are debug or lifetime intrinsics), just leave
5133        // it.  It will be erased when we erase BB below.
5134        continue;
5135
5136      // Otherwise, sink this PHI node into UnwindDest.
5137      // Any predecessors to UnwindDest which are not already represented
5138      // must be back edges which inherit the value from the path through
5139      // BB.  In this case, the PHI value must reference itself.
5140      for (auto *pred : predecessors(UnwindDest))
5141        if (pred != BB)
5142          PN.addIncoming(&PN, pred);
5143      PN.moveBefore(InsertPt);
5144      // Also, add a dummy incoming value for the original BB itself,
5145      // so that the PHI is well-formed until we drop said predecessor.
5146      PN.addIncoming(PoisonValue::get(PN.getType()), BB);
5147    }
5148  }
5149
5150  std::vector<DominatorTree::UpdateType> Updates;
5151
5152  // We use make_early_inc_range here because we will remove all predecessors.
5153  for (BasicBlock *PredBB : llvm::make_early_inc_range(predecessors(BB))) {
5154    if (UnwindDest == nullptr) {
5155      if (DTU) {
5156        DTU->applyUpdates(Updates);
5157        Updates.clear();
5158      }
5159      removeUnwindEdge(PredBB, DTU);
5160      ++NumInvokes;
5161    } else {
5162      BB->removePredecessor(PredBB);
5163      Instruction *TI = PredBB->getTerminator();
5164      TI->replaceUsesOfWith(BB, UnwindDest);
5165      if (DTU) {
5166        Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest});
5167        Updates.push_back({DominatorTree::Delete, PredBB, BB});
5168      }
5169    }
5170  }
5171
5172  if (DTU)
5173    DTU->applyUpdates(Updates);
5174
5175  DeleteDeadBlock(BB, DTU);
5176
5177  return true;
5178}
5179
5180// Try to merge two cleanuppads together.
5181static bool mergeCleanupPad(CleanupReturnInst *RI) {
5182  // Skip any cleanuprets which unwind to caller, there is nothing to merge
5183  // with.
5184  BasicBlock *UnwindDest = RI->getUnwindDest();
5185  if (!UnwindDest)
5186    return false;
5187
5188  // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
5189  // be safe to merge without code duplication.
5190  if (UnwindDest->getSinglePredecessor() != RI->getParent())
5191    return false;
5192
5193  // Verify that our cleanuppad's unwind destination is another cleanuppad.
5194  auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front());
5195  if (!SuccessorCleanupPad)
5196    return false;
5197
5198  CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
5199  // Replace any uses of the successor cleanupad with the predecessor pad
5200  // The only cleanuppad uses should be this cleanupret, it's cleanupret and
5201  // funclet bundle operands.
5202  SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad);
5203  // Remove the old cleanuppad.
5204  SuccessorCleanupPad->eraseFromParent();
5205  // Now, we simply replace the cleanupret with a branch to the unwind
5206  // destination.
5207  BranchInst::Create(UnwindDest, RI->getParent());
5208  RI->eraseFromParent();
5209
5210  return true;
5211}
5212
5213bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
5214  // It is possible to transiantly have an undef cleanuppad operand because we
5215  // have deleted some, but not all, dead blocks.
5216  // Eventually, this block will be deleted.
5217  if (isa<UndefValue>(RI->getOperand(0)))
5218    return false;
5219
5220  if (mergeCleanupPad(RI))
5221    return true;
5222
5223  if (removeEmptyCleanup(RI, DTU))
5224    return true;
5225
5226  return false;
5227}
5228
5229// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
5230bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
5231  BasicBlock *BB = UI->getParent();
5232
5233  bool Changed = false;
5234
5235  // Ensure that any debug-info records that used to occur after the Unreachable
5236  // are moved to in front of it -- otherwise they'll "dangle" at the end of
5237  // the block.
5238  BB->flushTerminatorDbgValues();
5239
5240  // Debug-info records on the unreachable inst itself should be deleted, as
5241  // below we delete everything past the final executable instruction.
5242  UI->dropDbgValues();
5243
5244  // If there are any instructions immediately before the unreachable that can
5245  // be removed, do so.
5246  while (UI->getIterator() != BB->begin()) {
5247    BasicBlock::iterator BBI = UI->getIterator();
5248    --BBI;
5249
5250    if (!isGuaranteedToTransferExecutionToSuccessor(&*BBI))
5251      break; // Can not drop any more instructions. We're done here.
5252    // Otherwise, this instruction can be freely erased,
5253    // even if it is not side-effect free.
5254
5255    // Note that deleting EH's here is in fact okay, although it involves a bit
5256    // of subtle reasoning. If this inst is an EH, all the predecessors of this
5257    // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
5258    // and we can therefore guarantee this block will be erased.
5259
5260    // If we're deleting this, we're deleting any subsequent dbg.values, so
5261    // delete DPValue records of variable information.
5262    BBI->dropDbgValues();
5263
5264    // Delete this instruction (any uses are guaranteed to be dead)
5265    BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
5266    BBI->eraseFromParent();
5267    Changed = true;
5268  }
5269
5270  // If the unreachable instruction is the first in the block, take a gander
5271  // at all of the predecessors of this instruction, and simplify them.
5272  if (&BB->front() != UI)
5273    return Changed;
5274
5275  std::vector<DominatorTree::UpdateType> Updates;
5276
5277  SmallSetVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB));
5278  for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
5279    auto *Predecessor = Preds[i];
5280    Instruction *TI = Predecessor->getTerminator();
5281    IRBuilder<> Builder(TI);
5282    if (auto *BI = dyn_cast<BranchInst>(TI)) {
5283      // We could either have a proper unconditional branch,
5284      // or a degenerate conditional branch with matching destinations.
5285      if (all_of(BI->successors(),
5286                 [BB](auto *Successor) { return Successor == BB; })) {
5287        new UnreachableInst(TI->getContext(), TI);
5288        TI->eraseFromParent();
5289        Changed = true;
5290      } else {
5291        assert(BI->isConditional() && "Can't get here with an uncond branch.");
5292        Value* Cond = BI->getCondition();
5293        assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&
5294               "The destinations are guaranteed to be different here.");
5295        CallInst *Assumption;
5296        if (BI->getSuccessor(0) == BB) {
5297          Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
5298          Builder.CreateBr(BI->getSuccessor(1));
5299        } else {
5300          assert(BI->getSuccessor(1) == BB && "Incorrect CFG");
5301          Assumption = Builder.CreateAssumption(Cond);
5302          Builder.CreateBr(BI->getSuccessor(0));
5303        }
5304        if (Options.AC)
5305          Options.AC->registerAssumption(cast<AssumeInst>(Assumption));
5306
5307        EraseTerminatorAndDCECond(BI);
5308        Changed = true;
5309      }
5310      if (DTU)
5311        Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5312    } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
5313      SwitchInstProfUpdateWrapper SU(*SI);
5314      for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
5315        if (i->getCaseSuccessor() != BB) {
5316          ++i;
5317          continue;
5318        }
5319        BB->removePredecessor(SU->getParent());
5320        i = SU.removeCase(i);
5321        e = SU->case_end();
5322        Changed = true;
5323      }
5324      // Note that the default destination can't be removed!
5325      if (DTU && SI->getDefaultDest() != BB)
5326        Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5327    } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
5328      if (II->getUnwindDest() == BB) {
5329        if (DTU) {
5330          DTU->applyUpdates(Updates);
5331          Updates.clear();
5332        }
5333        auto *CI = cast<CallInst>(removeUnwindEdge(TI->getParent(), DTU));
5334        if (!CI->doesNotThrow())
5335          CI->setDoesNotThrow();
5336        Changed = true;
5337      }
5338    } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
5339      if (CSI->getUnwindDest() == BB) {
5340        if (DTU) {
5341          DTU->applyUpdates(Updates);
5342          Updates.clear();
5343        }
5344        removeUnwindEdge(TI->getParent(), DTU);
5345        Changed = true;
5346        continue;
5347      }
5348
5349      for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
5350                                             E = CSI->handler_end();
5351           I != E; ++I) {
5352        if (*I == BB) {
5353          CSI->removeHandler(I);
5354          --I;
5355          --E;
5356          Changed = true;
5357        }
5358      }
5359      if (DTU)
5360        Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5361      if (CSI->getNumHandlers() == 0) {
5362        if (CSI->hasUnwindDest()) {
5363          // Redirect all predecessors of the block containing CatchSwitchInst
5364          // to instead branch to the CatchSwitchInst's unwind destination.
5365          if (DTU) {
5366            for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) {
5367              Updates.push_back({DominatorTree::Insert,
5368                                 PredecessorOfPredecessor,
5369                                 CSI->getUnwindDest()});
5370              Updates.push_back({DominatorTree::Delete,
5371                                 PredecessorOfPredecessor, Predecessor});
5372            }
5373          }
5374          Predecessor->replaceAllUsesWith(CSI->getUnwindDest());
5375        } else {
5376          // Rewrite all preds to unwind to caller (or from invoke to call).
5377          if (DTU) {
5378            DTU->applyUpdates(Updates);
5379            Updates.clear();
5380          }
5381          SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor));
5382          for (BasicBlock *EHPred : EHPreds)
5383            removeUnwindEdge(EHPred, DTU);
5384        }
5385        // The catchswitch is no longer reachable.
5386        new UnreachableInst(CSI->getContext(), CSI);
5387        CSI->eraseFromParent();
5388        Changed = true;
5389      }
5390    } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
5391      (void)CRI;
5392      assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&
5393             "Expected to always have an unwind to BB.");
5394      if (DTU)
5395        Updates.push_back({DominatorTree::Delete, Predecessor, BB});
5396      new UnreachableInst(TI->getContext(), TI);
5397      TI->eraseFromParent();
5398      Changed = true;
5399    }
5400  }
5401
5402  if (DTU)
5403    DTU->applyUpdates(Updates);
5404
5405  // If this block is now dead, remove it.
5406  if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
5407    DeleteDeadBlock(BB, DTU);
5408    return true;
5409  }
5410
5411  return Changed;
5412}
5413
5414static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) {
5415  assert(Cases.size() >= 1);
5416
5417  array_pod_sort(Cases.begin(), Cases.end(), ConstantIntSortPredicate);
5418  for (size_t I = 1, E = Cases.size(); I != E; ++I) {
5419    if (Cases[I - 1]->getValue() != Cases[I]->getValue() + 1)
5420      return false;
5421  }
5422  return true;
5423}
5424
5425static void createUnreachableSwitchDefault(SwitchInst *Switch,
5426                                           DomTreeUpdater *DTU) {
5427  LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");
5428  auto *BB = Switch->getParent();
5429  auto *OrigDefaultBlock = Switch->getDefaultDest();
5430  OrigDefaultBlock->removePredecessor(BB);
5431  BasicBlock *NewDefaultBlock = BasicBlock::Create(
5432      BB->getContext(), BB->getName() + ".unreachabledefault", BB->getParent(),
5433      OrigDefaultBlock);
5434  new UnreachableInst(Switch->getContext(), NewDefaultBlock);
5435  Switch->setDefaultDest(&*NewDefaultBlock);
5436  if (DTU) {
5437    SmallVector<DominatorTree::UpdateType, 2> Updates;
5438    Updates.push_back({DominatorTree::Insert, BB, &*NewDefaultBlock});
5439    if (!is_contained(successors(BB), OrigDefaultBlock))
5440      Updates.push_back({DominatorTree::Delete, BB, &*OrigDefaultBlock});
5441    DTU->applyUpdates(Updates);
5442  }
5443}
5444
5445/// Turn a switch into an integer range comparison and branch.
5446/// Switches with more than 2 destinations are ignored.
5447/// Switches with 1 destination are also ignored.
5448bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI,
5449                                             IRBuilder<> &Builder) {
5450  assert(SI->getNumCases() > 1 && "Degenerate switch?");
5451
5452  bool HasDefault =
5453      !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
5454
5455  auto *BB = SI->getParent();
5456
5457  // Partition the cases into two sets with different destinations.
5458  BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
5459  BasicBlock *DestB = nullptr;
5460  SmallVector<ConstantInt *, 16> CasesA;
5461  SmallVector<ConstantInt *, 16> CasesB;
5462
5463  for (auto Case : SI->cases()) {
5464    BasicBlock *Dest = Case.getCaseSuccessor();
5465    if (!DestA)
5466      DestA = Dest;
5467    if (Dest == DestA) {
5468      CasesA.push_back(Case.getCaseValue());
5469      continue;
5470    }
5471    if (!DestB)
5472      DestB = Dest;
5473    if (Dest == DestB) {
5474      CasesB.push_back(Case.getCaseValue());
5475      continue;
5476    }
5477    return false; // More than two destinations.
5478  }
5479  if (!DestB)
5480    return false; // All destinations are the same and the default is unreachable
5481
5482  assert(DestA && DestB &&
5483         "Single-destination switch should have been folded.");
5484  assert(DestA != DestB);
5485  assert(DestB != SI->getDefaultDest());
5486  assert(!CasesB.empty() && "There must be non-default cases.");
5487  assert(!CasesA.empty() || HasDefault);
5488
5489  // Figure out if one of the sets of cases form a contiguous range.
5490  SmallVectorImpl<ConstantInt *> *ContiguousCases = nullptr;
5491  BasicBlock *ContiguousDest = nullptr;
5492  BasicBlock *OtherDest = nullptr;
5493  if (!CasesA.empty() && CasesAreContiguous(CasesA)) {
5494    ContiguousCases = &CasesA;
5495    ContiguousDest = DestA;
5496    OtherDest = DestB;
5497  } else if (CasesAreContiguous(CasesB)) {
5498    ContiguousCases = &CasesB;
5499    ContiguousDest = DestB;
5500    OtherDest = DestA;
5501  } else
5502    return false;
5503
5504  // Start building the compare and branch.
5505
5506  Constant *Offset = ConstantExpr::getNeg(ContiguousCases->back());
5507  Constant *NumCases =
5508      ConstantInt::get(Offset->getType(), ContiguousCases->size());
5509
5510  Value *Sub = SI->getCondition();
5511  if (!Offset->isNullValue())
5512    Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
5513
5514  Value *Cmp;
5515  // If NumCases overflowed, then all possible values jump to the successor.
5516  if (NumCases->isNullValue() && !ContiguousCases->empty())
5517    Cmp = ConstantInt::getTrue(SI->getContext());
5518  else
5519    Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
5520  BranchInst *NewBI = Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest);
5521
5522  // Update weight for the newly-created conditional branch.
5523  if (hasBranchWeightMD(*SI)) {
5524    SmallVector<uint64_t, 8> Weights;
5525    GetBranchWeights(SI, Weights);
5526    if (Weights.size() == 1 + SI->getNumCases()) {
5527      uint64_t TrueWeight = 0;
5528      uint64_t FalseWeight = 0;
5529      for (size_t I = 0, E = Weights.size(); I != E; ++I) {
5530        if (SI->getSuccessor(I) == ContiguousDest)
5531          TrueWeight += Weights[I];
5532        else
5533          FalseWeight += Weights[I];
5534      }
5535      while (TrueWeight > UINT32_MAX || FalseWeight > UINT32_MAX) {
5536        TrueWeight /= 2;
5537        FalseWeight /= 2;
5538      }
5539      setBranchWeights(NewBI, TrueWeight, FalseWeight);
5540    }
5541  }
5542
5543  // Prune obsolete incoming values off the successors' PHI nodes.
5544  for (auto BBI = ContiguousDest->begin(); isa<PHINode>(BBI); ++BBI) {
5545    unsigned PreviousEdges = ContiguousCases->size();
5546    if (ContiguousDest == SI->getDefaultDest())
5547      ++PreviousEdges;
5548    for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5549      cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
5550  }
5551  for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) {
5552    unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size();
5553    if (OtherDest == SI->getDefaultDest())
5554      ++PreviousEdges;
5555    for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
5556      cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
5557  }
5558
5559  // Clean up the default block - it may have phis or other instructions before
5560  // the unreachable terminator.
5561  if (!HasDefault)
5562    createUnreachableSwitchDefault(SI, DTU);
5563
5564  auto *UnreachableDefault = SI->getDefaultDest();
5565
5566  // Drop the switch.
5567  SI->eraseFromParent();
5568
5569  if (!HasDefault && DTU)
5570    DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}});
5571
5572  return true;
5573}
5574
5575/// Compute masked bits for the condition of a switch
5576/// and use it to remove dead cases.
5577static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU,
5578                                     AssumptionCache *AC,
5579                                     const DataLayout &DL) {
5580  Value *Cond = SI->getCondition();
5581  KnownBits Known = computeKnownBits(Cond, DL, 0, AC, SI);
5582
5583  // We can also eliminate cases by determining that their values are outside of
5584  // the limited range of the condition based on how many significant (non-sign)
5585  // bits are in the condition value.
5586  unsigned MaxSignificantBitsInCond =
5587      ComputeMaxSignificantBits(Cond, DL, 0, AC, SI);
5588
5589  // Gather dead cases.
5590  SmallVector<ConstantInt *, 8> DeadCases;
5591  SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
5592  SmallVector<BasicBlock *, 8> UniqueSuccessors;
5593  for (const auto &Case : SI->cases()) {
5594    auto *Successor = Case.getCaseSuccessor();
5595    if (DTU) {
5596      if (!NumPerSuccessorCases.count(Successor))
5597        UniqueSuccessors.push_back(Successor);
5598      ++NumPerSuccessorCases[Successor];
5599    }
5600    const APInt &CaseVal = Case.getCaseValue()->getValue();
5601    if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
5602        (CaseVal.getSignificantBits() > MaxSignificantBitsInCond)) {
5603      DeadCases.push_back(Case.getCaseValue());
5604      if (DTU)
5605        --NumPerSuccessorCases[Successor];
5606      LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal
5607                        << " is dead.\n");
5608    }
5609  }
5610
5611  // If we can prove that the cases must cover all possible values, the
5612  // default destination becomes dead and we can remove it.  If we know some
5613  // of the bits in the value, we can use that to more precisely compute the
5614  // number of possible unique case values.
5615  bool HasDefault =
5616      !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
5617  const unsigned NumUnknownBits =
5618      Known.getBitWidth() - (Known.Zero | Known.One).popcount();
5619  assert(NumUnknownBits <= Known.getBitWidth());
5620  if (HasDefault && DeadCases.empty() &&
5621      NumUnknownBits < 64 /* avoid overflow */ &&
5622      SI->getNumCases() == (1ULL << NumUnknownBits)) {
5623    createUnreachableSwitchDefault(SI, DTU);
5624    return true;
5625  }
5626
5627  if (DeadCases.empty())
5628    return false;
5629
5630  SwitchInstProfUpdateWrapper SIW(*SI);
5631  for (ConstantInt *DeadCase : DeadCases) {
5632    SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
5633    assert(CaseI != SI->case_default() &&
5634           "Case was not found. Probably mistake in DeadCases forming.");
5635    // Prune unused values from PHI nodes.
5636    CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
5637    SIW.removeCase(CaseI);
5638  }
5639
5640  if (DTU) {
5641    std::vector<DominatorTree::UpdateType> Updates;
5642    for (auto *Successor : UniqueSuccessors)
5643      if (NumPerSuccessorCases[Successor] == 0)
5644        Updates.push_back({DominatorTree::Delete, SI->getParent(), Successor});
5645    DTU->applyUpdates(Updates);
5646  }
5647
5648  return true;
5649}
5650
5651/// If BB would be eligible for simplification by
5652/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
5653/// by an unconditional branch), look at the phi node for BB in the successor
5654/// block and see if the incoming value is equal to CaseValue. If so, return
5655/// the phi node, and set PhiIndex to BB's index in the phi node.
5656static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue,
5657                                              BasicBlock *BB, int *PhiIndex) {
5658  if (BB->getFirstNonPHIOrDbg() != BB->getTerminator())
5659    return nullptr; // BB must be empty to be a candidate for simplification.
5660  if (!BB->getSinglePredecessor())
5661    return nullptr; // BB must be dominated by the switch.
5662
5663  BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
5664  if (!Branch || !Branch->isUnconditional())
5665    return nullptr; // Terminator must be unconditional branch.
5666
5667  BasicBlock *Succ = Branch->getSuccessor(0);
5668
5669  for (PHINode &PHI : Succ->phis()) {
5670    int Idx = PHI.getBasicBlockIndex(BB);
5671    assert(Idx >= 0 && "PHI has no entry for predecessor?");
5672
5673    Value *InValue = PHI.getIncomingValue(Idx);
5674    if (InValue != CaseValue)
5675      continue;
5676
5677    *PhiIndex = Idx;
5678    return &PHI;
5679  }
5680
5681  return nullptr;
5682}
5683
5684/// Try to forward the condition of a switch instruction to a phi node
5685/// dominated by the switch, if that would mean that some of the destination
5686/// blocks of the switch can be folded away. Return true if a change is made.
5687static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
5688  using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
5689
5690  ForwardingNodesMap ForwardingNodes;
5691  BasicBlock *SwitchBlock = SI->getParent();
5692  bool Changed = false;
5693  for (const auto &Case : SI->cases()) {
5694    ConstantInt *CaseValue = Case.getCaseValue();
5695    BasicBlock *CaseDest = Case.getCaseSuccessor();
5696
5697    // Replace phi operands in successor blocks that are using the constant case
5698    // value rather than the switch condition variable:
5699    //   switchbb:
5700    //   switch i32 %x, label %default [
5701    //     i32 17, label %succ
5702    //   ...
5703    //   succ:
5704    //     %r = phi i32 ... [ 17, %switchbb ] ...
5705    // -->
5706    //     %r = phi i32 ... [ %x, %switchbb ] ...
5707
5708    for (PHINode &Phi : CaseDest->phis()) {
5709      // This only works if there is exactly 1 incoming edge from the switch to
5710      // a phi. If there is >1, that means multiple cases of the switch map to 1
5711      // value in the phi, and that phi value is not the switch condition. Thus,
5712      // this transform would not make sense (the phi would be invalid because
5713      // a phi can't have different incoming values from the same block).
5714      int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock);
5715      if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue &&
5716          count(Phi.blocks(), SwitchBlock) == 1) {
5717        Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());
5718        Changed = true;
5719      }
5720    }
5721
5722    // Collect phi nodes that are indirectly using this switch's case constants.
5723    int PhiIdx;
5724    if (auto *Phi = FindPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx))
5725      ForwardingNodes[Phi].push_back(PhiIdx);
5726  }
5727
5728  for (auto &ForwardingNode : ForwardingNodes) {
5729    PHINode *Phi = ForwardingNode.first;
5730    SmallVectorImpl<int> &Indexes = ForwardingNode.second;
5731    if (Indexes.size() < 2)
5732      continue;
5733
5734    for (int Index : Indexes)
5735      Phi->setIncomingValue(Index, SI->getCondition());
5736    Changed = true;
5737  }
5738
5739  return Changed;
5740}
5741
5742/// Return true if the backend will be able to handle
5743/// initializing an array of constants like C.
5744static bool ValidLookupTableConstant(Constant *C, const TargetTransformInfo &TTI) {
5745  if (C->isThreadDependent())
5746    return false;
5747  if (C->isDLLImportDependent())
5748    return false;
5749
5750  if (!isa<ConstantFP>(C) && !isa<ConstantInt>(C) &&
5751      !isa<ConstantPointerNull>(C) && !isa<GlobalValue>(C) &&
5752      !isa<UndefValue>(C) && !isa<ConstantExpr>(C))
5753    return false;
5754
5755  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
5756    // Pointer casts and in-bounds GEPs will not prohibit the backend from
5757    // materializing the array of constants.
5758    Constant *StrippedC = cast<Constant>(CE->stripInBoundsConstantOffsets());
5759    if (StrippedC == C || !ValidLookupTableConstant(StrippedC, TTI))
5760      return false;
5761  }
5762
5763  if (!TTI.shouldBuildLookupTablesForConstant(C))
5764    return false;
5765
5766  return true;
5767}
5768
5769/// If V is a Constant, return it. Otherwise, try to look up
5770/// its constant value in ConstantPool, returning 0 if it's not there.
5771static Constant *
5772LookupConstant(Value *V,
5773               const SmallDenseMap<Value *, Constant *> &ConstantPool) {
5774  if (Constant *C = dyn_cast<Constant>(V))
5775    return C;
5776  return ConstantPool.lookup(V);
5777}
5778
5779/// Try to fold instruction I into a constant. This works for
5780/// simple instructions such as binary operations where both operands are
5781/// constant or can be replaced by constants from the ConstantPool. Returns the
5782/// resulting constant on success, 0 otherwise.
5783static Constant *
5784ConstantFold(Instruction *I, const DataLayout &DL,
5785             const SmallDenseMap<Value *, Constant *> &ConstantPool) {
5786  if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
5787    Constant *A = LookupConstant(Select->getCondition(), ConstantPool);
5788    if (!A)
5789      return nullptr;
5790    if (A->isAllOnesValue())
5791      return LookupConstant(Select->getTrueValue(), ConstantPool);
5792    if (A->isNullValue())
5793      return LookupConstant(Select->getFalseValue(), ConstantPool);
5794    return nullptr;
5795  }
5796
5797  SmallVector<Constant *, 4> COps;
5798  for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
5799    if (Constant *A = LookupConstant(I->getOperand(N), ConstantPool))
5800      COps.push_back(A);
5801    else
5802      return nullptr;
5803  }
5804
5805  return ConstantFoldInstOperands(I, COps, DL);
5806}
5807
5808/// Try to determine the resulting constant values in phi nodes
5809/// at the common destination basic block, *CommonDest, for one of the case
5810/// destionations CaseDest corresponding to value CaseVal (0 for the default
5811/// case), of a switch instruction SI.
5812static bool
5813getCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
5814               BasicBlock **CommonDest,
5815               SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
5816               const DataLayout &DL, const TargetTransformInfo &TTI) {
5817  // The block from which we enter the common destination.
5818  BasicBlock *Pred = SI->getParent();
5819
5820  // If CaseDest is empty except for some side-effect free instructions through
5821  // which we can constant-propagate the CaseVal, continue to its successor.
5822  SmallDenseMap<Value *, Constant *> ConstantPool;
5823  ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
5824  for (Instruction &I : CaseDest->instructionsWithoutDebug(false)) {
5825    if (I.isTerminator()) {
5826      // If the terminator is a simple branch, continue to the next block.
5827      if (I.getNumSuccessors() != 1 || I.isSpecialTerminator())
5828        return false;
5829      Pred = CaseDest;
5830      CaseDest = I.getSuccessor(0);
5831    } else if (Constant *C = ConstantFold(&I, DL, ConstantPool)) {
5832      // Instruction is side-effect free and constant.
5833
5834      // If the instruction has uses outside this block or a phi node slot for
5835      // the block, it is not safe to bypass the instruction since it would then
5836      // no longer dominate all its uses.
5837      for (auto &Use : I.uses()) {
5838        User *User = Use.getUser();
5839        if (Instruction *I = dyn_cast<Instruction>(User))
5840          if (I->getParent() == CaseDest)
5841            continue;
5842        if (PHINode *Phi = dyn_cast<PHINode>(User))
5843          if (Phi->getIncomingBlock(Use) == CaseDest)
5844            continue;
5845        return false;
5846      }
5847
5848      ConstantPool.insert(std::make_pair(&I, C));
5849    } else {
5850      break;
5851    }
5852  }
5853
5854  // If we did not have a CommonDest before, use the current one.
5855  if (!*CommonDest)
5856    *CommonDest = CaseDest;
5857  // If the destination isn't the common one, abort.
5858  if (CaseDest != *CommonDest)
5859    return false;
5860
5861  // Get the values for this case from phi nodes in the destination block.
5862  for (PHINode &PHI : (*CommonDest)->phis()) {
5863    int Idx = PHI.getBasicBlockIndex(Pred);
5864    if (Idx == -1)
5865      continue;
5866
5867    Constant *ConstVal =
5868        LookupConstant(PHI.getIncomingValue(Idx), ConstantPool);
5869    if (!ConstVal)
5870      return false;
5871
5872    // Be conservative about which kinds of constants we support.
5873    if (!ValidLookupTableConstant(ConstVal, TTI))
5874      return false;
5875
5876    Res.push_back(std::make_pair(&PHI, ConstVal));
5877  }
5878
5879  return Res.size() > 0;
5880}
5881
5882// Helper function used to add CaseVal to the list of cases that generate
5883// Result. Returns the updated number of cases that generate this result.
5884static size_t mapCaseToResult(ConstantInt *CaseVal,
5885                              SwitchCaseResultVectorTy &UniqueResults,
5886                              Constant *Result) {
5887  for (auto &I : UniqueResults) {
5888    if (I.first == Result) {
5889      I.second.push_back(CaseVal);
5890      return I.second.size();
5891    }
5892  }
5893  UniqueResults.push_back(
5894      std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
5895  return 1;
5896}
5897
5898// Helper function that initializes a map containing
5899// results for the PHI node of the common destination block for a switch
5900// instruction. Returns false if multiple PHI nodes have been found or if
5901// there is not a common destination block for the switch.
5902static bool initializeUniqueCases(SwitchInst *SI, PHINode *&PHI,
5903                                  BasicBlock *&CommonDest,
5904                                  SwitchCaseResultVectorTy &UniqueResults,
5905                                  Constant *&DefaultResult,
5906                                  const DataLayout &DL,
5907                                  const TargetTransformInfo &TTI,
5908                                  uintptr_t MaxUniqueResults) {
5909  for (const auto &I : SI->cases()) {
5910    ConstantInt *CaseVal = I.getCaseValue();
5911
5912    // Resulting value at phi nodes for this case value.
5913    SwitchCaseResultsTy Results;
5914    if (!getCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
5915                        DL, TTI))
5916      return false;
5917
5918    // Only one value per case is permitted.
5919    if (Results.size() > 1)
5920      return false;
5921
5922    // Add the case->result mapping to UniqueResults.
5923    const size_t NumCasesForResult =
5924        mapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
5925
5926    // Early out if there are too many cases for this result.
5927    if (NumCasesForResult > MaxSwitchCasesPerResult)
5928      return false;
5929
5930    // Early out if there are too many unique results.
5931    if (UniqueResults.size() > MaxUniqueResults)
5932      return false;
5933
5934    // Check the PHI consistency.
5935    if (!PHI)
5936      PHI = Results[0].first;
5937    else if (PHI != Results[0].first)
5938      return false;
5939  }
5940  // Find the default result value.
5941  SmallVector<std::pair<PHINode *, Constant *>, 1> DefaultResults;
5942  BasicBlock *DefaultDest = SI->getDefaultDest();
5943  getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
5944                 DL, TTI);
5945  // If the default value is not found abort unless the default destination
5946  // is unreachable.
5947  DefaultResult =
5948      DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
5949  if ((!DefaultResult &&
5950       !isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg())))
5951    return false;
5952
5953  return true;
5954}
5955
5956// Helper function that checks if it is possible to transform a switch with only
5957// two cases (or two cases + default) that produces a result into a select.
5958// TODO: Handle switches with more than 2 cases that map to the same result.
5959static Value *foldSwitchToSelect(const SwitchCaseResultVectorTy &ResultVector,
5960                                 Constant *DefaultResult, Value *Condition,
5961                                 IRBuilder<> &Builder) {
5962  // If we are selecting between only two cases transform into a simple
5963  // select or a two-way select if default is possible.
5964  // Example:
5965  // switch (a) {                  %0 = icmp eq i32 %a, 10
5966  //   case 10: return 42;         %1 = select i1 %0, i32 42, i32 4
5967  //   case 20: return 2;   ---->  %2 = icmp eq i32 %a, 20
5968  //   default: return 4;          %3 = select i1 %2, i32 2, i32 %1
5969  // }
5970  if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
5971      ResultVector[1].second.size() == 1) {
5972    ConstantInt *FirstCase = ResultVector[0].second[0];
5973    ConstantInt *SecondCase = ResultVector[1].second[0];
5974    Value *SelectValue = ResultVector[1].first;
5975    if (DefaultResult) {
5976      Value *ValueCompare =
5977          Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
5978      SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
5979                                         DefaultResult, "switch.select");
5980    }
5981    Value *ValueCompare =
5982        Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
5983    return Builder.CreateSelect(ValueCompare, ResultVector[0].first,
5984                                SelectValue, "switch.select");
5985  }
5986
5987  // Handle the degenerate case where two cases have the same result value.
5988  if (ResultVector.size() == 1 && DefaultResult) {
5989    ArrayRef<ConstantInt *> CaseValues = ResultVector[0].second;
5990    unsigned CaseCount = CaseValues.size();
5991    // n bits group cases map to the same result:
5992    // case 0,4      -> Cond & 0b1..1011 == 0 ? result : default
5993    // case 0,2,4,6  -> Cond & 0b1..1001 == 0 ? result : default
5994    // case 0,2,8,10 -> Cond & 0b1..0101 == 0 ? result : default
5995    if (isPowerOf2_32(CaseCount)) {
5996      ConstantInt *MinCaseVal = CaseValues[0];
5997      // Find mininal value.
5998      for (auto *Case : CaseValues)
5999        if (Case->getValue().slt(MinCaseVal->getValue()))
6000          MinCaseVal = Case;
6001
6002      // Mark the bits case number touched.
6003      APInt BitMask = APInt::getZero(MinCaseVal->getBitWidth());
6004      for (auto *Case : CaseValues)
6005        BitMask |= (Case->getValue() - MinCaseVal->getValue());
6006
6007      // Check if cases with the same result can cover all number
6008      // in touched bits.
6009      if (BitMask.popcount() == Log2_32(CaseCount)) {
6010        if (!MinCaseVal->isNullValue())
6011          Condition = Builder.CreateSub(Condition, MinCaseVal);
6012        Value *And = Builder.CreateAnd(Condition, ~BitMask, "switch.and");
6013        Value *Cmp = Builder.CreateICmpEQ(
6014            And, Constant::getNullValue(And->getType()), "switch.selectcmp");
6015        return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6016      }
6017    }
6018
6019    // Handle the degenerate case where two cases have the same value.
6020    if (CaseValues.size() == 2) {
6021      Value *Cmp1 = Builder.CreateICmpEQ(Condition, CaseValues[0],
6022                                         "switch.selectcmp.case1");
6023      Value *Cmp2 = Builder.CreateICmpEQ(Condition, CaseValues[1],
6024                                         "switch.selectcmp.case2");
6025      Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
6026      return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
6027    }
6028  }
6029
6030  return nullptr;
6031}
6032
6033// Helper function to cleanup a switch instruction that has been converted into
6034// a select, fixing up PHI nodes and basic blocks.
6035static void removeSwitchAfterSelectFold(SwitchInst *SI, PHINode *PHI,
6036                                        Value *SelectValue,
6037                                        IRBuilder<> &Builder,
6038                                        DomTreeUpdater *DTU) {
6039  std::vector<DominatorTree::UpdateType> Updates;
6040
6041  BasicBlock *SelectBB = SI->getParent();
6042  BasicBlock *DestBB = PHI->getParent();
6043
6044  if (DTU && !is_contained(predecessors(DestBB), SelectBB))
6045    Updates.push_back({DominatorTree::Insert, SelectBB, DestBB});
6046  Builder.CreateBr(DestBB);
6047
6048  // Remove the switch.
6049
6050  PHI->removeIncomingValueIf(
6051      [&](unsigned Idx) { return PHI->getIncomingBlock(Idx) == SelectBB; });
6052  PHI->addIncoming(SelectValue, SelectBB);
6053
6054  SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
6055  for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6056    BasicBlock *Succ = SI->getSuccessor(i);
6057
6058    if (Succ == DestBB)
6059      continue;
6060    Succ->removePredecessor(SelectBB);
6061    if (DTU && RemovedSuccessors.insert(Succ).second)
6062      Updates.push_back({DominatorTree::Delete, SelectBB, Succ});
6063  }
6064  SI->eraseFromParent();
6065  if (DTU)
6066    DTU->applyUpdates(Updates);
6067}
6068
6069/// If a switch is only used to initialize one or more phi nodes in a common
6070/// successor block with only two different constant values, try to replace the
6071/// switch with a select. Returns true if the fold was made.
6072static bool trySwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
6073                              DomTreeUpdater *DTU, const DataLayout &DL,
6074                              const TargetTransformInfo &TTI) {
6075  Value *const Cond = SI->getCondition();
6076  PHINode *PHI = nullptr;
6077  BasicBlock *CommonDest = nullptr;
6078  Constant *DefaultResult;
6079  SwitchCaseResultVectorTy UniqueResults;
6080  // Collect all the cases that will deliver the same value from the switch.
6081  if (!initializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
6082                             DL, TTI, /*MaxUniqueResults*/ 2))
6083    return false;
6084
6085  assert(PHI != nullptr && "PHI for value select not found");
6086  Builder.SetInsertPoint(SI);
6087  Value *SelectValue =
6088      foldSwitchToSelect(UniqueResults, DefaultResult, Cond, Builder);
6089  if (!SelectValue)
6090    return false;
6091
6092  removeSwitchAfterSelectFold(SI, PHI, SelectValue, Builder, DTU);
6093  return true;
6094}
6095
6096namespace {
6097
6098/// This class represents a lookup table that can be used to replace a switch.
6099class SwitchLookupTable {
6100public:
6101  /// Create a lookup table to use as a switch replacement with the contents
6102  /// of Values, using DefaultValue to fill any holes in the table.
6103  SwitchLookupTable(
6104      Module &M, uint64_t TableSize, ConstantInt *Offset,
6105      const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6106      Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
6107
6108  /// Build instructions with Builder to retrieve the value at
6109  /// the position given by Index in the lookup table.
6110  Value *BuildLookup(Value *Index, IRBuilder<> &Builder);
6111
6112  /// Return true if a table with TableSize elements of
6113  /// type ElementType would fit in a target-legal register.
6114  static bool WouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
6115                                 Type *ElementType);
6116
6117private:
6118  // Depending on the contents of the table, it can be represented in
6119  // different ways.
6120  enum {
6121    // For tables where each element contains the same value, we just have to
6122    // store that single value and return it for each lookup.
6123    SingleValueKind,
6124
6125    // For tables where there is a linear relationship between table index
6126    // and values. We calculate the result with a simple multiplication
6127    // and addition instead of a table lookup.
6128    LinearMapKind,
6129
6130    // For small tables with integer elements, we can pack them into a bitmap
6131    // that fits into a target-legal register. Values are retrieved by
6132    // shift and mask operations.
6133    BitMapKind,
6134
6135    // The table is stored as an array of values. Values are retrieved by load
6136    // instructions from the table.
6137    ArrayKind
6138  } Kind;
6139
6140  // For SingleValueKind, this is the single value.
6141  Constant *SingleValue = nullptr;
6142
6143  // For BitMapKind, this is the bitmap.
6144  ConstantInt *BitMap = nullptr;
6145  IntegerType *BitMapElementTy = nullptr;
6146
6147  // For LinearMapKind, these are the constants used to derive the value.
6148  ConstantInt *LinearOffset = nullptr;
6149  ConstantInt *LinearMultiplier = nullptr;
6150  bool LinearMapValWrapped = false;
6151
6152  // For ArrayKind, this is the array.
6153  GlobalVariable *Array = nullptr;
6154};
6155
6156} // end anonymous namespace
6157
6158SwitchLookupTable::SwitchLookupTable(
6159    Module &M, uint64_t TableSize, ConstantInt *Offset,
6160    const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
6161    Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName) {
6162  assert(Values.size() && "Can't build lookup table without values!");
6163  assert(TableSize >= Values.size() && "Can't fit values in table!");
6164
6165  // If all values in the table are equal, this is that value.
6166  SingleValue = Values.begin()->second;
6167
6168  Type *ValueType = Values.begin()->second->getType();
6169
6170  // Build up the table contents.
6171  SmallVector<Constant *, 64> TableContents(TableSize);
6172  for (size_t I = 0, E = Values.size(); I != E; ++I) {
6173    ConstantInt *CaseVal = Values[I].first;
6174    Constant *CaseRes = Values[I].second;
6175    assert(CaseRes->getType() == ValueType);
6176
6177    uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
6178    TableContents[Idx] = CaseRes;
6179
6180    if (CaseRes != SingleValue)
6181      SingleValue = nullptr;
6182  }
6183
6184  // Fill in any holes in the table with the default result.
6185  if (Values.size() < TableSize) {
6186    assert(DefaultValue &&
6187           "Need a default value to fill the lookup table holes.");
6188    assert(DefaultValue->getType() == ValueType);
6189    for (uint64_t I = 0; I < TableSize; ++I) {
6190      if (!TableContents[I])
6191        TableContents[I] = DefaultValue;
6192    }
6193
6194    if (DefaultValue != SingleValue)
6195      SingleValue = nullptr;
6196  }
6197
6198  // If each element in the table contains the same value, we only need to store
6199  // that single value.
6200  if (SingleValue) {
6201    Kind = SingleValueKind;
6202    return;
6203  }
6204
6205  // Check if we can derive the value with a linear transformation from the
6206  // table index.
6207  if (isa<IntegerType>(ValueType)) {
6208    bool LinearMappingPossible = true;
6209    APInt PrevVal;
6210    APInt DistToPrev;
6211    // When linear map is monotonic and signed overflow doesn't happen on
6212    // maximum index, we can attach nsw on Add and Mul.
6213    bool NonMonotonic = false;
6214    assert(TableSize >= 2 && "Should be a SingleValue table.");
6215    // Check if there is the same distance between two consecutive values.
6216    for (uint64_t I = 0; I < TableSize; ++I) {
6217      ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
6218      if (!ConstVal) {
6219        // This is an undef. We could deal with it, but undefs in lookup tables
6220        // are very seldom. It's probably not worth the additional complexity.
6221        LinearMappingPossible = false;
6222        break;
6223      }
6224      const APInt &Val = ConstVal->getValue();
6225      if (I != 0) {
6226        APInt Dist = Val - PrevVal;
6227        if (I == 1) {
6228          DistToPrev = Dist;
6229        } else if (Dist != DistToPrev) {
6230          LinearMappingPossible = false;
6231          break;
6232        }
6233        NonMonotonic |=
6234            Dist.isStrictlyPositive() ? Val.sle(PrevVal) : Val.sgt(PrevVal);
6235      }
6236      PrevVal = Val;
6237    }
6238    if (LinearMappingPossible) {
6239      LinearOffset = cast<ConstantInt>(TableContents[0]);
6240      LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
6241      bool MayWrap = false;
6242      APInt M = LinearMultiplier->getValue();
6243      (void)M.smul_ov(APInt(M.getBitWidth(), TableSize - 1), MayWrap);
6244      LinearMapValWrapped = NonMonotonic || MayWrap;
6245      Kind = LinearMapKind;
6246      ++NumLinearMaps;
6247      return;
6248    }
6249  }
6250
6251  // If the type is integer and the table fits in a register, build a bitmap.
6252  if (WouldFitInRegister(DL, TableSize, ValueType)) {
6253    IntegerType *IT = cast<IntegerType>(ValueType);
6254    APInt TableInt(TableSize * IT->getBitWidth(), 0);
6255    for (uint64_t I = TableSize; I > 0; --I) {
6256      TableInt <<= IT->getBitWidth();
6257      // Insert values into the bitmap. Undef values are set to zero.
6258      if (!isa<UndefValue>(TableContents[I - 1])) {
6259        ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
6260        TableInt |= Val->getValue().zext(TableInt.getBitWidth());
6261      }
6262    }
6263    BitMap = ConstantInt::get(M.getContext(), TableInt);
6264    BitMapElementTy = IT;
6265    Kind = BitMapKind;
6266    ++NumBitMaps;
6267    return;
6268  }
6269
6270  // Store the table in an array.
6271  ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize);
6272  Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
6273
6274  Array = new GlobalVariable(M, ArrayTy, /*isConstant=*/true,
6275                             GlobalVariable::PrivateLinkage, Initializer,
6276                             "switch.table." + FuncName);
6277  Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
6278  // Set the alignment to that of an array items. We will be only loading one
6279  // value out of it.
6280  Array->setAlignment(DL.getPrefTypeAlign(ValueType));
6281  Kind = ArrayKind;
6282}
6283
6284Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
6285  switch (Kind) {
6286  case SingleValueKind:
6287    return SingleValue;
6288  case LinearMapKind: {
6289    // Derive the result value from the input value.
6290    Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
6291                                          false, "switch.idx.cast");
6292    if (!LinearMultiplier->isOne())
6293      Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult",
6294                                 /*HasNUW = */ false,
6295                                 /*HasNSW = */ !LinearMapValWrapped);
6296
6297    if (!LinearOffset->isZero())
6298      Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset",
6299                                 /*HasNUW = */ false,
6300                                 /*HasNSW = */ !LinearMapValWrapped);
6301    return Result;
6302  }
6303  case BitMapKind: {
6304    // Type of the bitmap (e.g. i59).
6305    IntegerType *MapTy = BitMap->getIntegerType();
6306
6307    // Cast Index to the same type as the bitmap.
6308    // Note: The Index is <= the number of elements in the table, so
6309    // truncating it to the width of the bitmask is safe.
6310    Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
6311
6312    // Multiply the shift amount by the element width. NUW/NSW can always be
6313    // set, because WouldFitInRegister guarantees Index * ShiftAmt is in
6314    // BitMap's bit width.
6315    ShiftAmt = Builder.CreateMul(
6316        ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
6317        "switch.shiftamt",/*HasNUW =*/true,/*HasNSW =*/true);
6318
6319    // Shift down.
6320    Value *DownShifted =
6321        Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift");
6322    // Mask off.
6323    return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked");
6324  }
6325  case ArrayKind: {
6326    // Make sure the table index will not overflow when treated as signed.
6327    IntegerType *IT = cast<IntegerType>(Index->getType());
6328    uint64_t TableSize =
6329        Array->getInitializer()->getType()->getArrayNumElements();
6330    if (TableSize > (1ULL << std::min(IT->getBitWidth() - 1, 63u)))
6331      Index = Builder.CreateZExt(
6332          Index, IntegerType::get(IT->getContext(), IT->getBitWidth() + 1),
6333          "switch.tableidx.zext");
6334
6335    Value *GEPIndices[] = {Builder.getInt32(0), Index};
6336    Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array,
6337                                           GEPIndices, "switch.gep");
6338    return Builder.CreateLoad(
6339        cast<ArrayType>(Array->getValueType())->getElementType(), GEP,
6340        "switch.load");
6341  }
6342  }
6343  llvm_unreachable("Unknown lookup table kind!");
6344}
6345
6346bool SwitchLookupTable::WouldFitInRegister(const DataLayout &DL,
6347                                           uint64_t TableSize,
6348                                           Type *ElementType) {
6349  auto *IT = dyn_cast<IntegerType>(ElementType);
6350  if (!IT)
6351    return false;
6352  // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
6353  // are <= 15, we could try to narrow the type.
6354
6355  // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
6356  if (TableSize >= UINT_MAX / IT->getBitWidth())
6357    return false;
6358  return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
6359}
6360
6361static bool isTypeLegalForLookupTable(Type *Ty, const TargetTransformInfo &TTI,
6362                                      const DataLayout &DL) {
6363  // Allow any legal type.
6364  if (TTI.isTypeLegal(Ty))
6365    return true;
6366
6367  auto *IT = dyn_cast<IntegerType>(Ty);
6368  if (!IT)
6369    return false;
6370
6371  // Also allow power of 2 integer types that have at least 8 bits and fit in
6372  // a register. These types are common in frontend languages and targets
6373  // usually support loads of these types.
6374  // TODO: We could relax this to any integer that fits in a register and rely
6375  // on ABI alignment and padding in the table to allow the load to be widened.
6376  // Or we could widen the constants and truncate the load.
6377  unsigned BitWidth = IT->getBitWidth();
6378  return BitWidth >= 8 && isPowerOf2_32(BitWidth) &&
6379         DL.fitsInLegalInteger(IT->getBitWidth());
6380}
6381
6382static bool isSwitchDense(uint64_t NumCases, uint64_t CaseRange) {
6383  // 40% is the default density for building a jump table in optsize/minsize
6384  // mode. See also TargetLoweringBase::isSuitableForJumpTable(), which this
6385  // function was based on.
6386  const uint64_t MinDensity = 40;
6387
6388  if (CaseRange >= UINT64_MAX / 100)
6389    return false; // Avoid multiplication overflows below.
6390
6391  return NumCases * 100 >= CaseRange * MinDensity;
6392}
6393
6394static bool isSwitchDense(ArrayRef<int64_t> Values) {
6395  uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
6396  uint64_t Range = Diff + 1;
6397  if (Range < Diff)
6398    return false; // Overflow.
6399
6400  return isSwitchDense(Values.size(), Range);
6401}
6402
6403/// Determine whether a lookup table should be built for this switch, based on
6404/// the number of cases, size of the table, and the types of the results.
6405// TODO: We could support larger than legal types by limiting based on the
6406// number of loads required and/or table size. If the constants are small we
6407// could use smaller table entries and extend after the load.
6408static bool
6409ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize,
6410                       const TargetTransformInfo &TTI, const DataLayout &DL,
6411                       const SmallDenseMap<PHINode *, Type *> &ResultTypes) {
6412  if (SI->getNumCases() > TableSize)
6413    return false; // TableSize overflowed.
6414
6415  bool AllTablesFitInRegister = true;
6416  bool HasIllegalType = false;
6417  for (const auto &I : ResultTypes) {
6418    Type *Ty = I.second;
6419
6420    // Saturate this flag to true.
6421    HasIllegalType = HasIllegalType || !isTypeLegalForLookupTable(Ty, TTI, DL);
6422
6423    // Saturate this flag to false.
6424    AllTablesFitInRegister =
6425        AllTablesFitInRegister &&
6426        SwitchLookupTable::WouldFitInRegister(DL, TableSize, Ty);
6427
6428    // If both flags saturate, we're done. NOTE: This *only* works with
6429    // saturating flags, and all flags have to saturate first due to the
6430    // non-deterministic behavior of iterating over a dense map.
6431    if (HasIllegalType && !AllTablesFitInRegister)
6432      break;
6433  }
6434
6435  // If each table would fit in a register, we should build it anyway.
6436  if (AllTablesFitInRegister)
6437    return true;
6438
6439  // Don't build a table that doesn't fit in-register if it has illegal types.
6440  if (HasIllegalType)
6441    return false;
6442
6443  return isSwitchDense(SI->getNumCases(), TableSize);
6444}
6445
6446static bool ShouldUseSwitchConditionAsTableIndex(
6447    ConstantInt &MinCaseVal, const ConstantInt &MaxCaseVal,
6448    bool HasDefaultResults, const SmallDenseMap<PHINode *, Type *> &ResultTypes,
6449    const DataLayout &DL, const TargetTransformInfo &TTI) {
6450  if (MinCaseVal.isNullValue())
6451    return true;
6452  if (MinCaseVal.isNegative() ||
6453      MaxCaseVal.getLimitedValue() == std::numeric_limits<uint64_t>::max() ||
6454      !HasDefaultResults)
6455    return false;
6456  return all_of(ResultTypes, [&](const auto &KV) {
6457    return SwitchLookupTable::WouldFitInRegister(
6458        DL, MaxCaseVal.getLimitedValue() + 1 /* TableSize */,
6459        KV.second /* ResultType */);
6460  });
6461}
6462
6463/// Try to reuse the switch table index compare. Following pattern:
6464/// \code
6465///     if (idx < tablesize)
6466///        r = table[idx]; // table does not contain default_value
6467///     else
6468///        r = default_value;
6469///     if (r != default_value)
6470///        ...
6471/// \endcode
6472/// Is optimized to:
6473/// \code
6474///     cond = idx < tablesize;
6475///     if (cond)
6476///        r = table[idx];
6477///     else
6478///        r = default_value;
6479///     if (cond)
6480///        ...
6481/// \endcode
6482/// Jump threading will then eliminate the second if(cond).
6483static void reuseTableCompare(
6484    User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch,
6485    Constant *DefaultValue,
6486    const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
6487  ICmpInst *CmpInst = dyn_cast<ICmpInst>(PhiUser);
6488  if (!CmpInst)
6489    return;
6490
6491  // We require that the compare is in the same block as the phi so that jump
6492  // threading can do its work afterwards.
6493  if (CmpInst->getParent() != PhiBlock)
6494    return;
6495
6496  Constant *CmpOp1 = dyn_cast<Constant>(CmpInst->getOperand(1));
6497  if (!CmpOp1)
6498    return;
6499
6500  Value *RangeCmp = RangeCheckBranch->getCondition();
6501  Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType());
6502  Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType());
6503
6504  // Check if the compare with the default value is constant true or false.
6505  Constant *DefaultConst = ConstantExpr::getICmp(CmpInst->getPredicate(),
6506                                                 DefaultValue, CmpOp1, true);
6507  if (DefaultConst != TrueConst && DefaultConst != FalseConst)
6508    return;
6509
6510  // Check if the compare with the case values is distinct from the default
6511  // compare result.
6512  for (auto ValuePair : Values) {
6513    Constant *CaseConst = ConstantExpr::getICmp(CmpInst->getPredicate(),
6514                                                ValuePair.second, CmpOp1, true);
6515    if (!CaseConst || CaseConst == DefaultConst ||
6516        (CaseConst != TrueConst && CaseConst != FalseConst))
6517      return;
6518  }
6519
6520  // Check if the branch instruction dominates the phi node. It's a simple
6521  // dominance check, but sufficient for our needs.
6522  // Although this check is invariant in the calling loops, it's better to do it
6523  // at this late stage. Practically we do it at most once for a switch.
6524  BasicBlock *BranchBlock = RangeCheckBranch->getParent();
6525  for (BasicBlock *Pred : predecessors(PhiBlock)) {
6526    if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
6527      return;
6528  }
6529
6530  if (DefaultConst == FalseConst) {
6531    // The compare yields the same result. We can replace it.
6532    CmpInst->replaceAllUsesWith(RangeCmp);
6533    ++NumTableCmpReuses;
6534  } else {
6535    // The compare yields the same result, just inverted. We can replace it.
6536    Value *InvertedTableCmp = BinaryOperator::CreateXor(
6537        RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
6538        RangeCheckBranch);
6539    CmpInst->replaceAllUsesWith(InvertedTableCmp);
6540    ++NumTableCmpReuses;
6541  }
6542}
6543
6544/// If the switch is only used to initialize one or more phi nodes in a common
6545/// successor block with different constant values, replace the switch with
6546/// lookup tables.
6547static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
6548                                DomTreeUpdater *DTU, const DataLayout &DL,
6549                                const TargetTransformInfo &TTI) {
6550  assert(SI->getNumCases() > 1 && "Degenerate switch?");
6551
6552  BasicBlock *BB = SI->getParent();
6553  Function *Fn = BB->getParent();
6554  // Only build lookup table when we have a target that supports it or the
6555  // attribute is not set.
6556  if (!TTI.shouldBuildLookupTables() ||
6557      (Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
6558    return false;
6559
6560  // FIXME: If the switch is too sparse for a lookup table, perhaps we could
6561  // split off a dense part and build a lookup table for that.
6562
6563  // FIXME: This creates arrays of GEPs to constant strings, which means each
6564  // GEP needs a runtime relocation in PIC code. We should just build one big
6565  // string and lookup indices into that.
6566
6567  // Ignore switches with less than three cases. Lookup tables will not make
6568  // them faster, so we don't analyze them.
6569  if (SI->getNumCases() < 3)
6570    return false;
6571
6572  // Figure out the corresponding result for each case value and phi node in the
6573  // common destination, as well as the min and max case values.
6574  assert(!SI->cases().empty());
6575  SwitchInst::CaseIt CI = SI->case_begin();
6576  ConstantInt *MinCaseVal = CI->getCaseValue();
6577  ConstantInt *MaxCaseVal = CI->getCaseValue();
6578
6579  BasicBlock *CommonDest = nullptr;
6580
6581  using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
6582  SmallDenseMap<PHINode *, ResultListTy> ResultLists;
6583
6584  SmallDenseMap<PHINode *, Constant *> DefaultResults;
6585  SmallDenseMap<PHINode *, Type *> ResultTypes;
6586  SmallVector<PHINode *, 4> PHIs;
6587
6588  for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
6589    ConstantInt *CaseVal = CI->getCaseValue();
6590    if (CaseVal->getValue().slt(MinCaseVal->getValue()))
6591      MinCaseVal = CaseVal;
6592    if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
6593      MaxCaseVal = CaseVal;
6594
6595    // Resulting value at phi nodes for this case value.
6596    using ResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
6597    ResultsTy Results;
6598    if (!getCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
6599                        Results, DL, TTI))
6600      return false;
6601
6602    // Append the result from this case to the list for each phi.
6603    for (const auto &I : Results) {
6604      PHINode *PHI = I.first;
6605      Constant *Value = I.second;
6606      if (!ResultLists.count(PHI))
6607        PHIs.push_back(PHI);
6608      ResultLists[PHI].push_back(std::make_pair(CaseVal, Value));
6609    }
6610  }
6611
6612  // Keep track of the result types.
6613  for (PHINode *PHI : PHIs) {
6614    ResultTypes[PHI] = ResultLists[PHI][0].second->getType();
6615  }
6616
6617  uint64_t NumResults = ResultLists[PHIs[0]].size();
6618
6619  // If the table has holes, we need a constant result for the default case
6620  // or a bitmask that fits in a register.
6621  SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
6622  bool HasDefaultResults =
6623      getCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
6624                     DefaultResultsList, DL, TTI);
6625
6626  for (const auto &I : DefaultResultsList) {
6627    PHINode *PHI = I.first;
6628    Constant *Result = I.second;
6629    DefaultResults[PHI] = Result;
6630  }
6631
6632  bool UseSwitchConditionAsTableIndex = ShouldUseSwitchConditionAsTableIndex(
6633      *MinCaseVal, *MaxCaseVal, HasDefaultResults, ResultTypes, DL, TTI);
6634  uint64_t TableSize;
6635  if (UseSwitchConditionAsTableIndex)
6636    TableSize = MaxCaseVal->getLimitedValue() + 1;
6637  else
6638    TableSize =
6639        (MaxCaseVal->getValue() - MinCaseVal->getValue()).getLimitedValue() + 1;
6640
6641  bool TableHasHoles = (NumResults < TableSize);
6642  bool NeedMask = (TableHasHoles && !HasDefaultResults);
6643  if (NeedMask) {
6644    // As an extra penalty for the validity test we require more cases.
6645    if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
6646      return false;
6647    if (!DL.fitsInLegalInteger(TableSize))
6648      return false;
6649  }
6650
6651  if (!ShouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
6652    return false;
6653
6654  std::vector<DominatorTree::UpdateType> Updates;
6655
6656  // Compute the maximum table size representable by the integer type we are
6657  // switching upon.
6658  unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
6659  uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX : 1ULL << CaseSize;
6660  assert(MaxTableSize >= TableSize &&
6661         "It is impossible for a switch to have more entries than the max "
6662         "representable value of its input integer type's size.");
6663
6664  // If the default destination is unreachable, or if the lookup table covers
6665  // all values of the conditional variable, branch directly to the lookup table
6666  // BB. Otherwise, check that the condition is within the case range.
6667  bool DefaultIsReachable =
6668      !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
6669
6670  // Create the BB that does the lookups.
6671  Module &Mod = *CommonDest->getParent()->getParent();
6672  BasicBlock *LookupBB = BasicBlock::Create(
6673      Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
6674
6675  // Compute the table index value.
6676  Builder.SetInsertPoint(SI);
6677  Value *TableIndex;
6678  ConstantInt *TableIndexOffset;
6679  if (UseSwitchConditionAsTableIndex) {
6680    TableIndexOffset = ConstantInt::get(MaxCaseVal->getIntegerType(), 0);
6681    TableIndex = SI->getCondition();
6682  } else {
6683    TableIndexOffset = MinCaseVal;
6684    // If the default is unreachable, all case values are s>= MinCaseVal. Then
6685    // we can try to attach nsw.
6686    bool MayWrap = true;
6687    if (!DefaultIsReachable) {
6688      APInt Res = MaxCaseVal->getValue().ssub_ov(MinCaseVal->getValue(), MayWrap);
6689      (void)Res;
6690    }
6691
6692    TableIndex = Builder.CreateSub(SI->getCondition(), TableIndexOffset,
6693                                   "switch.tableidx", /*HasNUW =*/false,
6694                                   /*HasNSW =*/!MayWrap);
6695  }
6696
6697  BranchInst *RangeCheckBranch = nullptr;
6698
6699  // Grow the table to cover all possible index values to avoid the range check.
6700  // It will use the default result to fill in the table hole later, so make
6701  // sure it exist.
6702  if (UseSwitchConditionAsTableIndex && HasDefaultResults) {
6703    ConstantRange CR = computeConstantRange(TableIndex, /* ForSigned */ false);
6704    // Grow the table shouldn't have any size impact by checking
6705    // WouldFitInRegister.
6706    // TODO: Consider growing the table also when it doesn't fit in a register
6707    // if no optsize is specified.
6708    const uint64_t UpperBound = CR.getUpper().getLimitedValue();
6709    if (!CR.isUpperWrapped() && all_of(ResultTypes, [&](const auto &KV) {
6710          return SwitchLookupTable::WouldFitInRegister(
6711              DL, UpperBound, KV.second /* ResultType */);
6712        })) {
6713      // There may be some case index larger than the UpperBound (unreachable
6714      // case), so make sure the table size does not get smaller.
6715      TableSize = std::max(UpperBound, TableSize);
6716      // The default branch is unreachable after we enlarge the lookup table.
6717      // Adjust DefaultIsReachable to reuse code path.
6718      DefaultIsReachable = false;
6719    }
6720  }
6721
6722  const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
6723  if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
6724    Builder.CreateBr(LookupBB);
6725    if (DTU)
6726      Updates.push_back({DominatorTree::Insert, BB, LookupBB});
6727    // Note: We call removeProdecessor later since we need to be able to get the
6728    // PHI value for the default case in case we're using a bit mask.
6729  } else {
6730    Value *Cmp = Builder.CreateICmpULT(
6731        TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
6732    RangeCheckBranch =
6733        Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
6734    if (DTU)
6735      Updates.push_back({DominatorTree::Insert, BB, LookupBB});
6736  }
6737
6738  // Populate the BB that does the lookups.
6739  Builder.SetInsertPoint(LookupBB);
6740
6741  if (NeedMask) {
6742    // Before doing the lookup, we do the hole check. The LookupBB is therefore
6743    // re-purposed to do the hole check, and we create a new LookupBB.
6744    BasicBlock *MaskBB = LookupBB;
6745    MaskBB->setName("switch.hole_check");
6746    LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
6747                                  CommonDest->getParent(), CommonDest);
6748
6749    // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
6750    // unnecessary illegal types.
6751    uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
6752    APInt MaskInt(TableSizePowOf2, 0);
6753    APInt One(TableSizePowOf2, 1);
6754    // Build bitmask; fill in a 1 bit for every case.
6755    const ResultListTy &ResultList = ResultLists[PHIs[0]];
6756    for (size_t I = 0, E = ResultList.size(); I != E; ++I) {
6757      uint64_t Idx = (ResultList[I].first->getValue() - TableIndexOffset->getValue())
6758                         .getLimitedValue();
6759      MaskInt |= One << Idx;
6760    }
6761    ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
6762
6763    // Get the TableIndex'th bit of the bitmask.
6764    // If this bit is 0 (meaning hole) jump to the default destination,
6765    // else continue with table lookup.
6766    IntegerType *MapTy = TableMask->getIntegerType();
6767    Value *MaskIndex =
6768        Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
6769    Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
6770    Value *LoBit = Builder.CreateTrunc(
6771        Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
6772    Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
6773    if (DTU) {
6774      Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
6775      Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
6776    }
6777    Builder.SetInsertPoint(LookupBB);
6778    AddPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB);
6779  }
6780
6781  if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
6782    // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
6783    // do not delete PHINodes here.
6784    SI->getDefaultDest()->removePredecessor(BB,
6785                                            /*KeepOneInputPHIs=*/true);
6786    if (DTU)
6787      Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()});
6788  }
6789
6790  for (PHINode *PHI : PHIs) {
6791    const ResultListTy &ResultList = ResultLists[PHI];
6792
6793    // If using a bitmask, use any value to fill the lookup table holes.
6794    Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI];
6795    StringRef FuncName = Fn->getName();
6796    SwitchLookupTable Table(Mod, TableSize, TableIndexOffset, ResultList, DV,
6797                            DL, FuncName);
6798
6799    Value *Result = Table.BuildLookup(TableIndex, Builder);
6800
6801    // Do a small peephole optimization: re-use the switch table compare if
6802    // possible.
6803    if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
6804      BasicBlock *PhiBlock = PHI->getParent();
6805      // Search for compare instructions which use the phi.
6806      for (auto *User : PHI->users()) {
6807        reuseTableCompare(User, PhiBlock, RangeCheckBranch, DV, ResultList);
6808      }
6809    }
6810
6811    PHI->addIncoming(Result, LookupBB);
6812  }
6813
6814  Builder.CreateBr(CommonDest);
6815  if (DTU)
6816    Updates.push_back({DominatorTree::Insert, LookupBB, CommonDest});
6817
6818  // Remove the switch.
6819  SmallPtrSet<BasicBlock *, 8> RemovedSuccessors;
6820  for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
6821    BasicBlock *Succ = SI->getSuccessor(i);
6822
6823    if (Succ == SI->getDefaultDest())
6824      continue;
6825    Succ->removePredecessor(BB);
6826    if (DTU && RemovedSuccessors.insert(Succ).second)
6827      Updates.push_back({DominatorTree::Delete, BB, Succ});
6828  }
6829  SI->eraseFromParent();
6830
6831  if (DTU)
6832    DTU->applyUpdates(Updates);
6833
6834  ++NumLookupTables;
6835  if (NeedMask)
6836    ++NumLookupTablesHoles;
6837  return true;
6838}
6839
6840/// Try to transform a switch that has "holes" in it to a contiguous sequence
6841/// of cases.
6842///
6843/// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
6844/// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
6845///
6846/// This converts a sparse switch into a dense switch which allows better
6847/// lowering and could also allow transforming into a lookup table.
6848static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
6849                              const DataLayout &DL,
6850                              const TargetTransformInfo &TTI) {
6851  auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
6852  if (CondTy->getIntegerBitWidth() > 64 ||
6853      !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
6854    return false;
6855  // Only bother with this optimization if there are more than 3 switch cases;
6856  // SDAG will only bother creating jump tables for 4 or more cases.
6857  if (SI->getNumCases() < 4)
6858    return false;
6859
6860  // This transform is agnostic to the signedness of the input or case values. We
6861  // can treat the case values as signed or unsigned. We can optimize more common
6862  // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
6863  // as signed.
6864  SmallVector<int64_t,4> Values;
6865  for (const auto &C : SI->cases())
6866    Values.push_back(C.getCaseValue()->getValue().getSExtValue());
6867  llvm::sort(Values);
6868
6869  // If the switch is already dense, there's nothing useful to do here.
6870  if (isSwitchDense(Values))
6871    return false;
6872
6873  // First, transform the values such that they start at zero and ascend.
6874  int64_t Base = Values[0];
6875  for (auto &V : Values)
6876    V -= (uint64_t)(Base);
6877
6878  // Now we have signed numbers that have been shifted so that, given enough
6879  // precision, there are no negative values. Since the rest of the transform
6880  // is bitwise only, we switch now to an unsigned representation.
6881
6882  // This transform can be done speculatively because it is so cheap - it
6883  // results in a single rotate operation being inserted.
6884
6885  // countTrailingZeros(0) returns 64. As Values is guaranteed to have more than
6886  // one element and LLVM disallows duplicate cases, Shift is guaranteed to be
6887  // less than 64.
6888  unsigned Shift = 64;
6889  for (auto &V : Values)
6890    Shift = std::min(Shift, (unsigned)llvm::countr_zero((uint64_t)V));
6891  assert(Shift < 64);
6892  if (Shift > 0)
6893    for (auto &V : Values)
6894      V = (int64_t)((uint64_t)V >> Shift);
6895
6896  if (!isSwitchDense(Values))
6897    // Transform didn't create a dense switch.
6898    return false;
6899
6900  // The obvious transform is to shift the switch condition right and emit a
6901  // check that the condition actually cleanly divided by GCD, i.e.
6902  //   C & (1 << Shift - 1) == 0
6903  // inserting a new CFG edge to handle the case where it didn't divide cleanly.
6904  //
6905  // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
6906  // shift and puts the shifted-off bits in the uppermost bits. If any of these
6907  // are nonzero then the switch condition will be very large and will hit the
6908  // default case.
6909
6910  auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
6911  Builder.SetInsertPoint(SI);
6912  Value *Sub =
6913      Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
6914  Value *Rot = Builder.CreateIntrinsic(
6915      Ty, Intrinsic::fshl,
6916      {Sub, Sub, ConstantInt::get(Ty, Ty->getBitWidth() - Shift)});
6917  SI->replaceUsesOfWith(SI->getCondition(), Rot);
6918
6919  for (auto Case : SI->cases()) {
6920    auto *Orig = Case.getCaseValue();
6921    auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base);
6922    Case.setValue(cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(Shift))));
6923  }
6924  return true;
6925}
6926
6927/// Tries to transform switch of powers of two to reduce switch range.
6928/// For example, switch like:
6929/// switch (C) { case 1: case 2: case 64: case 128: }
6930/// will be transformed to:
6931/// switch (count_trailing_zeros(C)) { case 0: case 1: case 6: case 7: }
6932///
6933/// This transformation allows better lowering and could allow transforming into
6934/// a lookup table.
6935static bool simplifySwitchOfPowersOfTwo(SwitchInst *SI, IRBuilder<> &Builder,
6936                                        const DataLayout &DL,
6937                                        const TargetTransformInfo &TTI) {
6938  Value *Condition = SI->getCondition();
6939  LLVMContext &Context = SI->getContext();
6940  auto *CondTy = cast<IntegerType>(Condition->getType());
6941
6942  if (CondTy->getIntegerBitWidth() > 64 ||
6943      !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
6944    return false;
6945
6946  const auto CttzIntrinsicCost = TTI.getIntrinsicInstrCost(
6947      IntrinsicCostAttributes(Intrinsic::cttz, CondTy,
6948                              {Condition, ConstantInt::getTrue(Context)}),
6949      TTI::TCK_SizeAndLatency);
6950
6951  if (CttzIntrinsicCost > TTI::TCC_Basic)
6952    // Inserting intrinsic is too expensive.
6953    return false;
6954
6955  // Only bother with this optimization if there are more than 3 switch cases.
6956  // SDAG will only bother creating jump tables for 4 or more cases.
6957  if (SI->getNumCases() < 4)
6958    return false;
6959
6960  // We perform this optimization only for switches with
6961  // unreachable default case.
6962  // This assumtion will save us from checking if `Condition` is a power of two.
6963  if (!isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg()))
6964    return false;
6965
6966  // Check that switch cases are powers of two.
6967  SmallVector<uint64_t, 4> Values;
6968  for (const auto &Case : SI->cases()) {
6969    uint64_t CaseValue = Case.getCaseValue()->getValue().getZExtValue();
6970    if (llvm::has_single_bit(CaseValue))
6971      Values.push_back(CaseValue);
6972    else
6973      return false;
6974  }
6975
6976  // isSwichDense requires case values to be sorted.
6977  llvm::sort(Values);
6978  if (!isSwitchDense(Values.size(), llvm::countr_zero(Values.back()) -
6979                                        llvm::countr_zero(Values.front()) + 1))
6980    // Transform is unable to generate dense switch.
6981    return false;
6982
6983  Builder.SetInsertPoint(SI);
6984
6985  // Replace each case with its trailing zeros number.
6986  for (auto &Case : SI->cases()) {
6987    auto *OrigValue = Case.getCaseValue();
6988    Case.setValue(ConstantInt::get(OrigValue->getIntegerType(),
6989                                   OrigValue->getValue().countr_zero()));
6990  }
6991
6992  // Replace condition with its trailing zeros number.
6993  auto *ConditionTrailingZeros = Builder.CreateIntrinsic(
6994      Intrinsic::cttz, {CondTy}, {Condition, ConstantInt::getTrue(Context)});
6995
6996  SI->setCondition(ConditionTrailingZeros);
6997
6998  return true;
6999}
7000
7001bool SimplifyCFGOpt::simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
7002  BasicBlock *BB = SI->getParent();
7003
7004  if (isValueEqualityComparison(SI)) {
7005    // If we only have one predecessor, and if it is a branch on this value,
7006    // see if that predecessor totally determines the outcome of this switch.
7007    if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
7008      if (SimplifyEqualityComparisonWithOnlyPredecessor(SI, OnlyPred, Builder))
7009        return requestResimplify();
7010
7011    Value *Cond = SI->getCondition();
7012    if (SelectInst *Select = dyn_cast<SelectInst>(Cond))
7013      if (SimplifySwitchOnSelect(SI, Select))
7014        return requestResimplify();
7015
7016    // If the block only contains the switch, see if we can fold the block
7017    // away into any preds.
7018    if (SI == &*BB->instructionsWithoutDebug(false).begin())
7019      if (FoldValueComparisonIntoPredecessors(SI, Builder))
7020        return requestResimplify();
7021  }
7022
7023  // Try to transform the switch into an icmp and a branch.
7024  // The conversion from switch to comparison may lose information on
7025  // impossible switch values, so disable it early in the pipeline.
7026  if (Options.ConvertSwitchRangeToICmp && TurnSwitchRangeIntoICmp(SI, Builder))
7027    return requestResimplify();
7028
7029  // Remove unreachable cases.
7030  if (eliminateDeadSwitchCases(SI, DTU, Options.AC, DL))
7031    return requestResimplify();
7032
7033  if (trySwitchToSelect(SI, Builder, DTU, DL, TTI))
7034    return requestResimplify();
7035
7036  if (Options.ForwardSwitchCondToPhi && ForwardSwitchConditionToPHI(SI))
7037    return requestResimplify();
7038
7039  // The conversion from switch to lookup tables results in difficult-to-analyze
7040  // code and makes pruning branches much harder. This is a problem if the
7041  // switch expression itself can still be restricted as a result of inlining or
7042  // CVP. Therefore, only apply this transformation during late stages of the
7043  // optimisation pipeline.
7044  if (Options.ConvertSwitchToLookupTable &&
7045      SwitchToLookupTable(SI, Builder, DTU, DL, TTI))
7046    return requestResimplify();
7047
7048  if (simplifySwitchOfPowersOfTwo(SI, Builder, DL, TTI))
7049    return requestResimplify();
7050
7051  if (ReduceSwitchRange(SI, Builder, DL, TTI))
7052    return requestResimplify();
7053
7054  if (HoistCommon &&
7055      hoistCommonCodeFromSuccessors(SI->getParent(), !Options.HoistCommonInsts))
7056    return requestResimplify();
7057
7058  return false;
7059}
7060
7061bool SimplifyCFGOpt::simplifyIndirectBr(IndirectBrInst *IBI) {
7062  BasicBlock *BB = IBI->getParent();
7063  bool Changed = false;
7064
7065  // Eliminate redundant destinations.
7066  SmallPtrSet<Value *, 8> Succs;
7067  SmallSetVector<BasicBlock *, 8> RemovedSuccs;
7068  for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) {
7069    BasicBlock *Dest = IBI->getDestination(i);
7070    if (!Dest->hasAddressTaken() || !Succs.insert(Dest).second) {
7071      if (!Dest->hasAddressTaken())
7072        RemovedSuccs.insert(Dest);
7073      Dest->removePredecessor(BB);
7074      IBI->removeDestination(i);
7075      --i;
7076      --e;
7077      Changed = true;
7078    }
7079  }
7080
7081  if (DTU) {
7082    std::vector<DominatorTree::UpdateType> Updates;
7083    Updates.reserve(RemovedSuccs.size());
7084    for (auto *RemovedSucc : RemovedSuccs)
7085      Updates.push_back({DominatorTree::Delete, BB, RemovedSucc});
7086    DTU->applyUpdates(Updates);
7087  }
7088
7089  if (IBI->getNumDestinations() == 0) {
7090    // If the indirectbr has no successors, change it to unreachable.
7091    new UnreachableInst(IBI->getContext(), IBI);
7092    EraseTerminatorAndDCECond(IBI);
7093    return true;
7094  }
7095
7096  if (IBI->getNumDestinations() == 1) {
7097    // If the indirectbr has one successor, change it to a direct branch.
7098    BranchInst::Create(IBI->getDestination(0), IBI);
7099    EraseTerminatorAndDCECond(IBI);
7100    return true;
7101  }
7102
7103  if (SelectInst *SI = dyn_cast<SelectInst>(IBI->getAddress())) {
7104    if (SimplifyIndirectBrOnSelect(IBI, SI))
7105      return requestResimplify();
7106  }
7107  return Changed;
7108}
7109
7110/// Given an block with only a single landing pad and a unconditional branch
7111/// try to find another basic block which this one can be merged with.  This
7112/// handles cases where we have multiple invokes with unique landing pads, but
7113/// a shared handler.
7114///
7115/// We specifically choose to not worry about merging non-empty blocks
7116/// here.  That is a PRE/scheduling problem and is best solved elsewhere.  In
7117/// practice, the optimizer produces empty landing pad blocks quite frequently
7118/// when dealing with exception dense code.  (see: instcombine, gvn, if-else
7119/// sinking in this file)
7120///
7121/// This is primarily a code size optimization.  We need to avoid performing
7122/// any transform which might inhibit optimization (such as our ability to
7123/// specialize a particular handler via tail commoning).  We do this by not
7124/// merging any blocks which require us to introduce a phi.  Since the same
7125/// values are flowing through both blocks, we don't lose any ability to
7126/// specialize.  If anything, we make such specialization more likely.
7127///
7128/// TODO - This transformation could remove entries from a phi in the target
7129/// block when the inputs in the phi are the same for the two blocks being
7130/// merged.  In some cases, this could result in removal of the PHI entirely.
7131static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI,
7132                                 BasicBlock *BB, DomTreeUpdater *DTU) {
7133  auto Succ = BB->getUniqueSuccessor();
7134  assert(Succ);
7135  // If there's a phi in the successor block, we'd likely have to introduce
7136  // a phi into the merged landing pad block.
7137  if (isa<PHINode>(*Succ->begin()))
7138    return false;
7139
7140  for (BasicBlock *OtherPred : predecessors(Succ)) {
7141    if (BB == OtherPred)
7142      continue;
7143    BasicBlock::iterator I = OtherPred->begin();
7144    LandingPadInst *LPad2 = dyn_cast<LandingPadInst>(I);
7145    if (!LPad2 || !LPad2->isIdenticalTo(LPad))
7146      continue;
7147    for (++I; isa<DbgInfoIntrinsic>(I); ++I)
7148      ;
7149    BranchInst *BI2 = dyn_cast<BranchInst>(I);
7150    if (!BI2 || !BI2->isIdenticalTo(BI))
7151      continue;
7152
7153    std::vector<DominatorTree::UpdateType> Updates;
7154
7155    // We've found an identical block.  Update our predecessors to take that
7156    // path instead and make ourselves dead.
7157    SmallSetVector<BasicBlock *, 16> UniquePreds(pred_begin(BB), pred_end(BB));
7158    for (BasicBlock *Pred : UniquePreds) {
7159      InvokeInst *II = cast<InvokeInst>(Pred->getTerminator());
7160      assert(II->getNormalDest() != BB && II->getUnwindDest() == BB &&
7161             "unexpected successor");
7162      II->setUnwindDest(OtherPred);
7163      if (DTU) {
7164        Updates.push_back({DominatorTree::Insert, Pred, OtherPred});
7165        Updates.push_back({DominatorTree::Delete, Pred, BB});
7166      }
7167    }
7168
7169    // The debug info in OtherPred doesn't cover the merged control flow that
7170    // used to go through BB.  We need to delete it or update it.
7171    for (Instruction &Inst : llvm::make_early_inc_range(*OtherPred))
7172      if (isa<DbgInfoIntrinsic>(Inst))
7173        Inst.eraseFromParent();
7174
7175    SmallSetVector<BasicBlock *, 16> UniqueSuccs(succ_begin(BB), succ_end(BB));
7176    for (BasicBlock *Succ : UniqueSuccs) {
7177      Succ->removePredecessor(BB);
7178      if (DTU)
7179        Updates.push_back({DominatorTree::Delete, BB, Succ});
7180    }
7181
7182    IRBuilder<> Builder(BI);
7183    Builder.CreateUnreachable();
7184    BI->eraseFromParent();
7185    if (DTU)
7186      DTU->applyUpdates(Updates);
7187    return true;
7188  }
7189  return false;
7190}
7191
7192bool SimplifyCFGOpt::simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder) {
7193  return Branch->isUnconditional() ? simplifyUncondBranch(Branch, Builder)
7194                                   : simplifyCondBranch(Branch, Builder);
7195}
7196
7197bool SimplifyCFGOpt::simplifyUncondBranch(BranchInst *BI,
7198                                          IRBuilder<> &Builder) {
7199  BasicBlock *BB = BI->getParent();
7200  BasicBlock *Succ = BI->getSuccessor(0);
7201
7202  // If the Terminator is the only non-phi instruction, simplify the block.
7203  // If LoopHeader is provided, check if the block or its successor is a loop
7204  // header. (This is for early invocations before loop simplify and
7205  // vectorization to keep canonical loop forms for nested loops. These blocks
7206  // can be eliminated when the pass is invoked later in the back-end.)
7207  // Note that if BB has only one predecessor then we do not introduce new
7208  // backedge, so we can eliminate BB.
7209  bool NeedCanonicalLoop =
7210      Options.NeedCanonicalLoop &&
7211      (!LoopHeaders.empty() && BB->hasNPredecessorsOrMore(2) &&
7212       (is_contained(LoopHeaders, BB) || is_contained(LoopHeaders, Succ)));
7213  BasicBlock::iterator I = BB->getFirstNonPHIOrDbg(true)->getIterator();
7214  if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&
7215      !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB, DTU))
7216    return true;
7217
7218  // If the only instruction in the block is a seteq/setne comparison against a
7219  // constant, try to simplify the block.
7220  if (ICmpInst *ICI = dyn_cast<ICmpInst>(I))
7221    if (ICI->isEquality() && isa<ConstantInt>(ICI->getOperand(1))) {
7222      for (++I; isa<DbgInfoIntrinsic>(I); ++I)
7223        ;
7224      if (I->isTerminator() &&
7225          tryToSimplifyUncondBranchWithICmpInIt(ICI, Builder))
7226        return true;
7227    }
7228
7229  // See if we can merge an empty landing pad block with another which is
7230  // equivalent.
7231  if (LandingPadInst *LPad = dyn_cast<LandingPadInst>(I)) {
7232    for (++I; isa<DbgInfoIntrinsic>(I); ++I)
7233      ;
7234    if (I->isTerminator() && TryToMergeLandingPad(LPad, BI, BB, DTU))
7235      return true;
7236  }
7237
7238  // If this basic block is ONLY a compare and a branch, and if a predecessor
7239  // branches to us and our successor, fold the comparison into the
7240  // predecessor and use logical operations to update the incoming value
7241  // for PHI nodes in common successor.
7242  if (Options.SpeculateBlocks &&
7243      FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
7244                             Options.BonusInstThreshold))
7245    return requestResimplify();
7246  return false;
7247}
7248
7249static BasicBlock *allPredecessorsComeFromSameSource(BasicBlock *BB) {
7250  BasicBlock *PredPred = nullptr;
7251  for (auto *P : predecessors(BB)) {
7252    BasicBlock *PPred = P->getSinglePredecessor();
7253    if (!PPred || (PredPred && PredPred != PPred))
7254      return nullptr;
7255    PredPred = PPred;
7256  }
7257  return PredPred;
7258}
7259
7260bool SimplifyCFGOpt::simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {
7261  assert(
7262      !isa<ConstantInt>(BI->getCondition()) &&
7263      BI->getSuccessor(0) != BI->getSuccessor(1) &&
7264      "Tautological conditional branch should have been eliminated already.");
7265
7266  BasicBlock *BB = BI->getParent();
7267  if (!Options.SimplifyCondBranch ||
7268      BI->getFunction()->hasFnAttribute(Attribute::OptForFuzzing))
7269    return false;
7270
7271  // Conditional branch
7272  if (isValueEqualityComparison(BI)) {
7273    // If we only have one predecessor, and if it is a branch on this value,
7274    // see if that predecessor totally determines the outcome of this
7275    // switch.
7276    if (BasicBlock *OnlyPred = BB->getSinglePredecessor())
7277      if (SimplifyEqualityComparisonWithOnlyPredecessor(BI, OnlyPred, Builder))
7278        return requestResimplify();
7279
7280    // This block must be empty, except for the setcond inst, if it exists.
7281    // Ignore dbg and pseudo intrinsics.
7282    auto I = BB->instructionsWithoutDebug(true).begin();
7283    if (&*I == BI) {
7284      if (FoldValueComparisonIntoPredecessors(BI, Builder))
7285        return requestResimplify();
7286    } else if (&*I == cast<Instruction>(BI->getCondition())) {
7287      ++I;
7288      if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder))
7289        return requestResimplify();
7290    }
7291  }
7292
7293  // Try to turn "br (X == 0 | X == 1), T, F" into a switch instruction.
7294  if (SimplifyBranchOnICmpChain(BI, Builder, DL))
7295    return true;
7296
7297  // If this basic block has dominating predecessor blocks and the dominating
7298  // blocks' conditions imply BI's condition, we know the direction of BI.
7299  std::optional<bool> Imp = isImpliedByDomCondition(BI->getCondition(), BI, DL);
7300  if (Imp) {
7301    // Turn this into a branch on constant.
7302    auto *OldCond = BI->getCondition();
7303    ConstantInt *TorF = *Imp ? ConstantInt::getTrue(BB->getContext())
7304                             : ConstantInt::getFalse(BB->getContext());
7305    BI->setCondition(TorF);
7306    RecursivelyDeleteTriviallyDeadInstructions(OldCond);
7307    return requestResimplify();
7308  }
7309
7310  // If this basic block is ONLY a compare and a branch, and if a predecessor
7311  // branches to us and one of our successors, fold the comparison into the
7312  // predecessor and use logical operations to pick the right destination.
7313  if (Options.SpeculateBlocks &&
7314      FoldBranchToCommonDest(BI, DTU, /*MSSAU=*/nullptr, &TTI,
7315                             Options.BonusInstThreshold))
7316    return requestResimplify();
7317
7318  // We have a conditional branch to two blocks that are only reachable
7319  // from BI.  We know that the condbr dominates the two blocks, so see if
7320  // there is any identical code in the "then" and "else" blocks.  If so, we
7321  // can hoist it up to the branching block.
7322  if (BI->getSuccessor(0)->getSinglePredecessor()) {
7323    if (BI->getSuccessor(1)->getSinglePredecessor()) {
7324      if (HoistCommon && hoistCommonCodeFromSuccessors(
7325                             BI->getParent(), !Options.HoistCommonInsts))
7326        return requestResimplify();
7327    } else {
7328      // If Successor #1 has multiple preds, we may be able to conditionally
7329      // execute Successor #0 if it branches to Successor #1.
7330      Instruction *Succ0TI = BI->getSuccessor(0)->getTerminator();
7331      if (Succ0TI->getNumSuccessors() == 1 &&
7332          Succ0TI->getSuccessor(0) == BI->getSuccessor(1))
7333        if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0)))
7334          return requestResimplify();
7335    }
7336  } else if (BI->getSuccessor(1)->getSinglePredecessor()) {
7337    // If Successor #0 has multiple preds, we may be able to conditionally
7338    // execute Successor #1 if it branches to Successor #0.
7339    Instruction *Succ1TI = BI->getSuccessor(1)->getTerminator();
7340    if (Succ1TI->getNumSuccessors() == 1 &&
7341        Succ1TI->getSuccessor(0) == BI->getSuccessor(0))
7342      if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1)))
7343        return requestResimplify();
7344  }
7345
7346  // If this is a branch on something for which we know the constant value in
7347  // predecessors (e.g. a phi node in the current block), thread control
7348  // through this block.
7349  if (FoldCondBranchOnValueKnownInPredecessor(BI, DTU, DL, Options.AC))
7350    return requestResimplify();
7351
7352  // Scan predecessor blocks for conditional branches.
7353  for (BasicBlock *Pred : predecessors(BB))
7354    if (BranchInst *PBI = dyn_cast<BranchInst>(Pred->getTerminator()))
7355      if (PBI != BI && PBI->isConditional())
7356        if (SimplifyCondBranchToCondBranch(PBI, BI, DTU, DL, TTI))
7357          return requestResimplify();
7358
7359  // Look for diamond patterns.
7360  if (MergeCondStores)
7361    if (BasicBlock *PrevBB = allPredecessorsComeFromSameSource(BB))
7362      if (BranchInst *PBI = dyn_cast<BranchInst>(PrevBB->getTerminator()))
7363        if (PBI != BI && PBI->isConditional())
7364          if (mergeConditionalStores(PBI, BI, DTU, DL, TTI))
7365            return requestResimplify();
7366
7367  return false;
7368}
7369
7370/// Check if passing a value to an instruction will cause undefined behavior.
7371static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified) {
7372  Constant *C = dyn_cast<Constant>(V);
7373  if (!C)
7374    return false;
7375
7376  if (I->use_empty())
7377    return false;
7378
7379  if (C->isNullValue() || isa<UndefValue>(C)) {
7380    // Only look at the first use, avoid hurting compile time with long uselists
7381    auto *Use = cast<Instruction>(*I->user_begin());
7382    // Bail out if Use is not in the same BB as I or Use == I or Use comes
7383    // before I in the block. The latter two can be the case if Use is a PHI
7384    // node.
7385    if (Use->getParent() != I->getParent() || Use == I || Use->comesBefore(I))
7386      return false;
7387
7388    // Now make sure that there are no instructions in between that can alter
7389    // control flow (eg. calls)
7390    auto InstrRange =
7391        make_range(std::next(I->getIterator()), Use->getIterator());
7392    if (any_of(InstrRange, [](Instruction &I) {
7393          return !isGuaranteedToTransferExecutionToSuccessor(&I);
7394        }))
7395      return false;
7396
7397    // Look through GEPs. A load from a GEP derived from NULL is still undefined
7398    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Use))
7399      if (GEP->getPointerOperand() == I) {
7400        if (!GEP->isInBounds() || !GEP->hasAllZeroIndices())
7401          PtrValueMayBeModified = true;
7402        return passingValueIsAlwaysUndefined(V, GEP, PtrValueMayBeModified);
7403      }
7404
7405    // Look through bitcasts.
7406    if (BitCastInst *BC = dyn_cast<BitCastInst>(Use))
7407      return passingValueIsAlwaysUndefined(V, BC, PtrValueMayBeModified);
7408
7409    // Load from null is undefined.
7410    if (LoadInst *LI = dyn_cast<LoadInst>(Use))
7411      if (!LI->isVolatile())
7412        return !NullPointerIsDefined(LI->getFunction(),
7413                                     LI->getPointerAddressSpace());
7414
7415    // Store to null is undefined.
7416    if (StoreInst *SI = dyn_cast<StoreInst>(Use))
7417      if (!SI->isVolatile())
7418        return (!NullPointerIsDefined(SI->getFunction(),
7419                                      SI->getPointerAddressSpace())) &&
7420               SI->getPointerOperand() == I;
7421
7422    if (auto *CB = dyn_cast<CallBase>(Use)) {
7423      if (C->isNullValue() && NullPointerIsDefined(CB->getFunction()))
7424        return false;
7425      // A call to null is undefined.
7426      if (CB->getCalledOperand() == I)
7427        return true;
7428
7429      if (C->isNullValue()) {
7430        for (const llvm::Use &Arg : CB->args())
7431          if (Arg == I) {
7432            unsigned ArgIdx = CB->getArgOperandNo(&Arg);
7433            if (CB->isPassingUndefUB(ArgIdx) &&
7434                CB->paramHasAttr(ArgIdx, Attribute::NonNull)) {
7435              // Passing null to a nonnnull+noundef argument is undefined.
7436              return !PtrValueMayBeModified;
7437            }
7438          }
7439      } else if (isa<UndefValue>(C)) {
7440        // Passing undef to a noundef argument is undefined.
7441        for (const llvm::Use &Arg : CB->args())
7442          if (Arg == I) {
7443            unsigned ArgIdx = CB->getArgOperandNo(&Arg);
7444            if (CB->isPassingUndefUB(ArgIdx)) {
7445              // Passing undef to a noundef argument is undefined.
7446              return true;
7447            }
7448          }
7449      }
7450    }
7451  }
7452  return false;
7453}
7454
7455/// If BB has an incoming value that will always trigger undefined behavior
7456/// (eg. null pointer dereference), remove the branch leading here.
7457static bool removeUndefIntroducingPredecessor(BasicBlock *BB,
7458                                              DomTreeUpdater *DTU,
7459                                              AssumptionCache *AC) {
7460  for (PHINode &PHI : BB->phis())
7461    for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i)
7462      if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) {
7463        BasicBlock *Predecessor = PHI.getIncomingBlock(i);
7464        Instruction *T = Predecessor->getTerminator();
7465        IRBuilder<> Builder(T);
7466        if (BranchInst *BI = dyn_cast<BranchInst>(T)) {
7467          BB->removePredecessor(Predecessor);
7468          // Turn unconditional branches into unreachables and remove the dead
7469          // destination from conditional branches.
7470          if (BI->isUnconditional())
7471            Builder.CreateUnreachable();
7472          else {
7473            // Preserve guarding condition in assume, because it might not be
7474            // inferrable from any dominating condition.
7475            Value *Cond = BI->getCondition();
7476            CallInst *Assumption;
7477            if (BI->getSuccessor(0) == BB)
7478              Assumption = Builder.CreateAssumption(Builder.CreateNot(Cond));
7479            else
7480              Assumption = Builder.CreateAssumption(Cond);
7481            if (AC)
7482              AC->registerAssumption(cast<AssumeInst>(Assumption));
7483            Builder.CreateBr(BI->getSuccessor(0) == BB ? BI->getSuccessor(1)
7484                                                       : BI->getSuccessor(0));
7485          }
7486          BI->eraseFromParent();
7487          if (DTU)
7488            DTU->applyUpdates({{DominatorTree::Delete, Predecessor, BB}});
7489          return true;
7490        } else if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) {
7491          // Redirect all branches leading to UB into
7492          // a newly created unreachable block.
7493          BasicBlock *Unreachable = BasicBlock::Create(
7494              Predecessor->getContext(), "unreachable", BB->getParent(), BB);
7495          Builder.SetInsertPoint(Unreachable);
7496          // The new block contains only one instruction: Unreachable
7497          Builder.CreateUnreachable();
7498          for (const auto &Case : SI->cases())
7499            if (Case.getCaseSuccessor() == BB) {
7500              BB->removePredecessor(Predecessor);
7501              Case.setSuccessor(Unreachable);
7502            }
7503          if (SI->getDefaultDest() == BB) {
7504            BB->removePredecessor(Predecessor);
7505            SI->setDefaultDest(Unreachable);
7506          }
7507
7508          if (DTU)
7509            DTU->applyUpdates(
7510                { { DominatorTree::Insert, Predecessor, Unreachable },
7511                  { DominatorTree::Delete, Predecessor, BB } });
7512          return true;
7513        }
7514      }
7515
7516  return false;
7517}
7518
7519bool SimplifyCFGOpt::simplifyOnce(BasicBlock *BB) {
7520  bool Changed = false;
7521
7522  assert(BB && BB->getParent() && "Block not embedded in function!");
7523  assert(BB->getTerminator() && "Degenerate basic block encountered!");
7524
7525  // Remove basic blocks that have no predecessors (except the entry block)...
7526  // or that just have themself as a predecessor.  These are unreachable.
7527  if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||
7528      BB->getSinglePredecessor() == BB) {
7529    LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);
7530    DeleteDeadBlock(BB, DTU);
7531    return true;
7532  }
7533
7534  // Check to see if we can constant propagate this terminator instruction
7535  // away...
7536  Changed |= ConstantFoldTerminator(BB, /*DeleteDeadConditions=*/true,
7537                                    /*TLI=*/nullptr, DTU);
7538
7539  // Check for and eliminate duplicate PHI nodes in this block.
7540  Changed |= EliminateDuplicatePHINodes(BB);
7541
7542  // Check for and remove branches that will always cause undefined behavior.
7543  if (removeUndefIntroducingPredecessor(BB, DTU, Options.AC))
7544    return requestResimplify();
7545
7546  // Merge basic blocks into their predecessor if there is only one distinct
7547  // pred, and if there is only one distinct successor of the predecessor, and
7548  // if there are no PHI nodes.
7549  if (MergeBlockIntoPredecessor(BB, DTU))
7550    return true;
7551
7552  if (SinkCommon && Options.SinkCommonInsts)
7553    if (SinkCommonCodeFromPredecessors(BB, DTU) ||
7554        MergeCompatibleInvokes(BB, DTU)) {
7555      // SinkCommonCodeFromPredecessors() does not automatically CSE PHI's,
7556      // so we may now how duplicate PHI's.
7557      // Let's rerun EliminateDuplicatePHINodes() first,
7558      // before FoldTwoEntryPHINode() potentially converts them into select's,
7559      // after which we'd need a whole EarlyCSE pass run to cleanup them.
7560      return true;
7561    }
7562
7563  IRBuilder<> Builder(BB);
7564
7565  if (Options.SpeculateBlocks &&
7566      !BB->getParent()->hasFnAttribute(Attribute::OptForFuzzing)) {
7567    // If there is a trivial two-entry PHI node in this basic block, and we can
7568    // eliminate it, do so now.
7569    if (auto *PN = dyn_cast<PHINode>(BB->begin()))
7570      if (PN->getNumIncomingValues() == 2)
7571        if (FoldTwoEntryPHINode(PN, TTI, DTU, DL))
7572          return true;
7573  }
7574
7575  Instruction *Terminator = BB->getTerminator();
7576  Builder.SetInsertPoint(Terminator);
7577  switch (Terminator->getOpcode()) {
7578  case Instruction::Br:
7579    Changed |= simplifyBranch(cast<BranchInst>(Terminator), Builder);
7580    break;
7581  case Instruction::Resume:
7582    Changed |= simplifyResume(cast<ResumeInst>(Terminator), Builder);
7583    break;
7584  case Instruction::CleanupRet:
7585    Changed |= simplifyCleanupReturn(cast<CleanupReturnInst>(Terminator));
7586    break;
7587  case Instruction::Switch:
7588    Changed |= simplifySwitch(cast<SwitchInst>(Terminator), Builder);
7589    break;
7590  case Instruction::Unreachable:
7591    Changed |= simplifyUnreachable(cast<UnreachableInst>(Terminator));
7592    break;
7593  case Instruction::IndirectBr:
7594    Changed |= simplifyIndirectBr(cast<IndirectBrInst>(Terminator));
7595    break;
7596  }
7597
7598  return Changed;
7599}
7600
7601bool SimplifyCFGOpt::run(BasicBlock *BB) {
7602  bool Changed = false;
7603
7604  // Repeated simplify BB as long as resimplification is requested.
7605  do {
7606    Resimplify = false;
7607
7608    // Perform one round of simplifcation. Resimplify flag will be set if
7609    // another iteration is requested.
7610    Changed |= simplifyOnce(BB);
7611  } while (Resimplify);
7612
7613  return Changed;
7614}
7615
7616bool llvm::simplifyCFG(BasicBlock *BB, const TargetTransformInfo &TTI,
7617                       DomTreeUpdater *DTU, const SimplifyCFGOptions &Options,
7618                       ArrayRef<WeakVH> LoopHeaders) {
7619  return SimplifyCFGOpt(TTI, DTU, BB->getModule()->getDataLayout(), LoopHeaders,
7620                        Options)
7621      .run(BB);
7622}
7623