IndVarSimplify.cpp revision 363496
1//===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This transformation analyzes and transforms the induction variables (and
10// computations derived from them) into simpler forms suitable for subsequent
11// analysis and transformation.
12//
13// If the trip count of a loop is computable, this pass also makes the following
14// changes:
15//   1. The exit condition for the loop is canonicalized to compare the
16//      induction value against the exit value.  This turns loops like:
17//        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18//   2. Any use outside of the loop of an expression derived from the indvar
19//      is changed to compute the derived value outside of the loop, eliminating
20//      the dependence on the exit value of the induction variable.  If the only
21//      purpose of the loop is to compute the exit value of some derived
22//      expression, this transformation will make the loop dead.
23//
24//===----------------------------------------------------------------------===//
25
26#include "llvm/Transforms/Scalar/IndVarSimplify.h"
27#include "llvm/ADT/APFloat.h"
28#include "llvm/ADT/APInt.h"
29#include "llvm/ADT/ArrayRef.h"
30#include "llvm/ADT/DenseMap.h"
31#include "llvm/ADT/None.h"
32#include "llvm/ADT/Optional.h"
33#include "llvm/ADT/STLExtras.h"
34#include "llvm/ADT/SmallPtrSet.h"
35#include "llvm/ADT/SmallSet.h"
36#include "llvm/ADT/SmallVector.h"
37#include "llvm/ADT/Statistic.h"
38#include "llvm/ADT/iterator_range.h"
39#include "llvm/Analysis/LoopInfo.h"
40#include "llvm/Analysis/LoopPass.h"
41#include "llvm/Analysis/ScalarEvolution.h"
42#include "llvm/Analysis/ScalarEvolutionExpander.h"
43#include "llvm/Analysis/ScalarEvolutionExpressions.h"
44#include "llvm/Analysis/TargetLibraryInfo.h"
45#include "llvm/Analysis/TargetTransformInfo.h"
46#include "llvm/Analysis/ValueTracking.h"
47#include "llvm/IR/BasicBlock.h"
48#include "llvm/IR/Constant.h"
49#include "llvm/IR/ConstantRange.h"
50#include "llvm/IR/Constants.h"
51#include "llvm/IR/DataLayout.h"
52#include "llvm/IR/DerivedTypes.h"
53#include "llvm/IR/Dominators.h"
54#include "llvm/IR/Function.h"
55#include "llvm/IR/IRBuilder.h"
56#include "llvm/IR/InstrTypes.h"
57#include "llvm/IR/Instruction.h"
58#include "llvm/IR/Instructions.h"
59#include "llvm/IR/IntrinsicInst.h"
60#include "llvm/IR/Intrinsics.h"
61#include "llvm/IR/Module.h"
62#include "llvm/IR/Operator.h"
63#include "llvm/IR/PassManager.h"
64#include "llvm/IR/PatternMatch.h"
65#include "llvm/IR/Type.h"
66#include "llvm/IR/Use.h"
67#include "llvm/IR/User.h"
68#include "llvm/IR/Value.h"
69#include "llvm/IR/ValueHandle.h"
70#include "llvm/InitializePasses.h"
71#include "llvm/Pass.h"
72#include "llvm/Support/Casting.h"
73#include "llvm/Support/CommandLine.h"
74#include "llvm/Support/Compiler.h"
75#include "llvm/Support/Debug.h"
76#include "llvm/Support/ErrorHandling.h"
77#include "llvm/Support/MathExtras.h"
78#include "llvm/Support/raw_ostream.h"
79#include "llvm/Transforms/Scalar.h"
80#include "llvm/Transforms/Scalar/LoopPassManager.h"
81#include "llvm/Transforms/Utils/BasicBlockUtils.h"
82#include "llvm/Transforms/Utils/Local.h"
83#include "llvm/Transforms/Utils/LoopUtils.h"
84#include "llvm/Transforms/Utils/SimplifyIndVar.h"
85#include <cassert>
86#include <cstdint>
87#include <utility>
88
89using namespace llvm;
90
91#define DEBUG_TYPE "indvars"
92
93STATISTIC(NumWidened     , "Number of indvars widened");
94STATISTIC(NumReplaced    , "Number of exit values replaced");
95STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
96STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
97STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
98
99// Trip count verification can be enabled by default under NDEBUG if we
100// implement a strong expression equivalence checker in SCEV. Until then, we
101// use the verify-indvars flag, which may assert in some cases.
102static cl::opt<bool> VerifyIndvars(
103  "verify-indvars", cl::Hidden,
104  cl::desc("Verify the ScalarEvolution result after running indvars"));
105
106enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl };
107
108static cl::opt<ReplaceExitVal> ReplaceExitValue(
109    "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
110    cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
111    cl::values(clEnumValN(NeverRepl, "never", "never replace exit value"),
112               clEnumValN(OnlyCheapRepl, "cheap",
113                          "only replace exit value when the cost is cheap"),
114               clEnumValN(NoHardUse, "noharduse",
115                          "only replace exit values when loop def likely dead"),
116               clEnumValN(AlwaysRepl, "always",
117                          "always replace exit value whenever possible")));
118
119static cl::opt<bool> UsePostIncrementRanges(
120  "indvars-post-increment-ranges", cl::Hidden,
121  cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
122  cl::init(true));
123
124static cl::opt<bool>
125DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
126            cl::desc("Disable Linear Function Test Replace optimization"));
127
128static cl::opt<bool>
129LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
130                cl::desc("Predicate conditions in read only loops"));
131
132namespace {
133
134struct RewritePhi;
135
136class IndVarSimplify {
137  LoopInfo *LI;
138  ScalarEvolution *SE;
139  DominatorTree *DT;
140  const DataLayout &DL;
141  TargetLibraryInfo *TLI;
142  const TargetTransformInfo *TTI;
143
144  SmallVector<WeakTrackingVH, 16> DeadInsts;
145
146  bool isValidRewrite(Value *FromVal, Value *ToVal);
147
148  bool handleFloatingPointIV(Loop *L, PHINode *PH);
149  bool rewriteNonIntegerIVs(Loop *L);
150
151  bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
152  /// Try to eliminate loop exits based on analyzeable exit counts
153  bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
154  /// Try to form loop invariant tests for loop exits by changing how many
155  /// iterations of the loop run when that is unobservable.
156  bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
157
158  bool canLoopBeDeleted(Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet);
159  bool rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
160  bool rewriteFirstIterationLoopExitValues(Loop *L);
161  bool hasHardUserWithinLoop(const Loop *L, const Instruction *I) const;
162
163  bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
164                                 const SCEV *ExitCount,
165                                 PHINode *IndVar, SCEVExpander &Rewriter);
166
167  bool sinkUnusedInvariants(Loop *L);
168
169public:
170  IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
171                 const DataLayout &DL, TargetLibraryInfo *TLI,
172                 TargetTransformInfo *TTI)
173      : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI) {}
174
175  bool run(Loop *L);
176};
177
178} // end anonymous namespace
179
180/// Return true if the SCEV expansion generated by the rewriter can replace the
181/// original value. SCEV guarantees that it produces the same value, but the way
182/// it is produced may be illegal IR.  Ideally, this function will only be
183/// called for verification.
184bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) {
185  // If an SCEV expression subsumed multiple pointers, its expansion could
186  // reassociate the GEP changing the base pointer. This is illegal because the
187  // final address produced by a GEP chain must be inbounds relative to its
188  // underlying object. Otherwise basic alias analysis, among other things,
189  // could fail in a dangerous way. Ultimately, SCEV will be improved to avoid
190  // producing an expression involving multiple pointers. Until then, we must
191  // bail out here.
192  //
193  // Retrieve the pointer operand of the GEP. Don't use GetUnderlyingObject
194  // because it understands lcssa phis while SCEV does not.
195  Value *FromPtr = FromVal;
196  Value *ToPtr = ToVal;
197  if (auto *GEP = dyn_cast<GEPOperator>(FromVal)) {
198    FromPtr = GEP->getPointerOperand();
199  }
200  if (auto *GEP = dyn_cast<GEPOperator>(ToVal)) {
201    ToPtr = GEP->getPointerOperand();
202  }
203  if (FromPtr != FromVal || ToPtr != ToVal) {
204    // Quickly check the common case
205    if (FromPtr == ToPtr)
206      return true;
207
208    // SCEV may have rewritten an expression that produces the GEP's pointer
209    // operand. That's ok as long as the pointer operand has the same base
210    // pointer. Unlike GetUnderlyingObject(), getPointerBase() will find the
211    // base of a recurrence. This handles the case in which SCEV expansion
212    // converts a pointer type recurrence into a nonrecurrent pointer base
213    // indexed by an integer recurrence.
214
215    // If the GEP base pointer is a vector of pointers, abort.
216    if (!FromPtr->getType()->isPointerTy() || !ToPtr->getType()->isPointerTy())
217      return false;
218
219    const SCEV *FromBase = SE->getPointerBase(SE->getSCEV(FromPtr));
220    const SCEV *ToBase = SE->getPointerBase(SE->getSCEV(ToPtr));
221    if (FromBase == ToBase)
222      return true;
223
224    LLVM_DEBUG(dbgs() << "INDVARS: GEP rewrite bail out " << *FromBase
225                      << " != " << *ToBase << "\n");
226
227    return false;
228  }
229  return true;
230}
231
232/// Determine the insertion point for this user. By default, insert immediately
233/// before the user. SCEVExpander or LICM will hoist loop invariants out of the
234/// loop. For PHI nodes, there may be multiple uses, so compute the nearest
235/// common dominator for the incoming blocks. A nullptr can be returned if no
236/// viable location is found: it may happen if User is a PHI and Def only comes
237/// to this PHI from unreachable blocks.
238static Instruction *getInsertPointForUses(Instruction *User, Value *Def,
239                                          DominatorTree *DT, LoopInfo *LI) {
240  PHINode *PHI = dyn_cast<PHINode>(User);
241  if (!PHI)
242    return User;
243
244  Instruction *InsertPt = nullptr;
245  for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) {
246    if (PHI->getIncomingValue(i) != Def)
247      continue;
248
249    BasicBlock *InsertBB = PHI->getIncomingBlock(i);
250
251    if (!DT->isReachableFromEntry(InsertBB))
252      continue;
253
254    if (!InsertPt) {
255      InsertPt = InsertBB->getTerminator();
256      continue;
257    }
258    InsertBB = DT->findNearestCommonDominator(InsertPt->getParent(), InsertBB);
259    InsertPt = InsertBB->getTerminator();
260  }
261
262  // If we have skipped all inputs, it means that Def only comes to Phi from
263  // unreachable blocks.
264  if (!InsertPt)
265    return nullptr;
266
267  auto *DefI = dyn_cast<Instruction>(Def);
268  if (!DefI)
269    return InsertPt;
270
271  assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses");
272
273  auto *L = LI->getLoopFor(DefI->getParent());
274  assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent())));
275
276  for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom())
277    if (LI->getLoopFor(DTN->getBlock()) == L)
278      return DTN->getBlock()->getTerminator();
279
280  llvm_unreachable("DefI dominates InsertPt!");
281}
282
283//===----------------------------------------------------------------------===//
284// rewriteNonIntegerIVs and helpers. Prefer integer IVs.
285//===----------------------------------------------------------------------===//
286
287/// Convert APF to an integer, if possible.
288static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
289  bool isExact = false;
290  // See if we can convert this to an int64_t
291  uint64_t UIntVal;
292  if (APF.convertToInteger(makeMutableArrayRef(UIntVal), 64, true,
293                           APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
294      !isExact)
295    return false;
296  IntVal = UIntVal;
297  return true;
298}
299
300/// If the loop has floating induction variable then insert corresponding
301/// integer induction variable if possible.
302/// For example,
303/// for(double i = 0; i < 10000; ++i)
304///   bar(i)
305/// is converted into
306/// for(int i = 0; i < 10000; ++i)
307///   bar((double)i);
308bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
309  unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
310  unsigned BackEdge     = IncomingEdge^1;
311
312  // Check incoming value.
313  auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
314
315  int64_t InitValue;
316  if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
317    return false;
318
319  // Check IV increment. Reject this PN if increment operation is not
320  // an add or increment value can not be represented by an integer.
321  auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
322  if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
323
324  // If this is not an add of the PHI with a constantfp, or if the constant fp
325  // is not an integer, bail out.
326  ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
327  int64_t IncValue;
328  if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
329      !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
330    return false;
331
332  // Check Incr uses. One user is PN and the other user is an exit condition
333  // used by the conditional terminator.
334  Value::user_iterator IncrUse = Incr->user_begin();
335  Instruction *U1 = cast<Instruction>(*IncrUse++);
336  if (IncrUse == Incr->user_end()) return false;
337  Instruction *U2 = cast<Instruction>(*IncrUse++);
338  if (IncrUse != Incr->user_end()) return false;
339
340  // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
341  // only used by a branch, we can't transform it.
342  FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
343  if (!Compare)
344    Compare = dyn_cast<FCmpInst>(U2);
345  if (!Compare || !Compare->hasOneUse() ||
346      !isa<BranchInst>(Compare->user_back()))
347    return false;
348
349  BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
350
351  // We need to verify that the branch actually controls the iteration count
352  // of the loop.  If not, the new IV can overflow and no one will notice.
353  // The branch block must be in the loop and one of the successors must be out
354  // of the loop.
355  assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
356  if (!L->contains(TheBr->getParent()) ||
357      (L->contains(TheBr->getSuccessor(0)) &&
358       L->contains(TheBr->getSuccessor(1))))
359    return false;
360
361  // If it isn't a comparison with an integer-as-fp (the exit value), we can't
362  // transform it.
363  ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
364  int64_t ExitValue;
365  if (ExitValueVal == nullptr ||
366      !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
367    return false;
368
369  // Find new predicate for integer comparison.
370  CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
371  switch (Compare->getPredicate()) {
372  default: return false;  // Unknown comparison.
373  case CmpInst::FCMP_OEQ:
374  case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
375  case CmpInst::FCMP_ONE:
376  case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
377  case CmpInst::FCMP_OGT:
378  case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
379  case CmpInst::FCMP_OGE:
380  case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
381  case CmpInst::FCMP_OLT:
382  case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
383  case CmpInst::FCMP_OLE:
384  case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
385  }
386
387  // We convert the floating point induction variable to a signed i32 value if
388  // we can.  This is only safe if the comparison will not overflow in a way
389  // that won't be trapped by the integer equivalent operations.  Check for this
390  // now.
391  // TODO: We could use i64 if it is native and the range requires it.
392
393  // The start/stride/exit values must all fit in signed i32.
394  if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
395    return false;
396
397  // If not actually striding (add x, 0.0), avoid touching the code.
398  if (IncValue == 0)
399    return false;
400
401  // Positive and negative strides have different safety conditions.
402  if (IncValue > 0) {
403    // If we have a positive stride, we require the init to be less than the
404    // exit value.
405    if (InitValue >= ExitValue)
406      return false;
407
408    uint32_t Range = uint32_t(ExitValue-InitValue);
409    // Check for infinite loop, either:
410    // while (i <= Exit) or until (i > Exit)
411    if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
412      if (++Range == 0) return false;  // Range overflows.
413    }
414
415    unsigned Leftover = Range % uint32_t(IncValue);
416
417    // If this is an equality comparison, we require that the strided value
418    // exactly land on the exit value, otherwise the IV condition will wrap
419    // around and do things the fp IV wouldn't.
420    if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
421        Leftover != 0)
422      return false;
423
424    // If the stride would wrap around the i32 before exiting, we can't
425    // transform the IV.
426    if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
427      return false;
428  } else {
429    // If we have a negative stride, we require the init to be greater than the
430    // exit value.
431    if (InitValue <= ExitValue)
432      return false;
433
434    uint32_t Range = uint32_t(InitValue-ExitValue);
435    // Check for infinite loop, either:
436    // while (i >= Exit) or until (i < Exit)
437    if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
438      if (++Range == 0) return false;  // Range overflows.
439    }
440
441    unsigned Leftover = Range % uint32_t(-IncValue);
442
443    // If this is an equality comparison, we require that the strided value
444    // exactly land on the exit value, otherwise the IV condition will wrap
445    // around and do things the fp IV wouldn't.
446    if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
447        Leftover != 0)
448      return false;
449
450    // If the stride would wrap around the i32 before exiting, we can't
451    // transform the IV.
452    if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
453      return false;
454  }
455
456  IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
457
458  // Insert new integer induction variable.
459  PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
460  NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
461                      PN->getIncomingBlock(IncomingEdge));
462
463  Value *NewAdd =
464    BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
465                              Incr->getName()+".int", Incr);
466  NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
467
468  ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
469                                      ConstantInt::get(Int32Ty, ExitValue),
470                                      Compare->getName());
471
472  // In the following deletions, PN may become dead and may be deleted.
473  // Use a WeakTrackingVH to observe whether this happens.
474  WeakTrackingVH WeakPH = PN;
475
476  // Delete the old floating point exit comparison.  The branch starts using the
477  // new comparison.
478  NewCompare->takeName(Compare);
479  Compare->replaceAllUsesWith(NewCompare);
480  RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI);
481
482  // Delete the old floating point increment.
483  Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
484  RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI);
485
486  // If the FP induction variable still has uses, this is because something else
487  // in the loop uses its value.  In order to canonicalize the induction
488  // variable, we chose to eliminate the IV and rewrite it in terms of an
489  // int->fp cast.
490  //
491  // We give preference to sitofp over uitofp because it is faster on most
492  // platforms.
493  if (WeakPH) {
494    Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
495                                 &*PN->getParent()->getFirstInsertionPt());
496    PN->replaceAllUsesWith(Conv);
497    RecursivelyDeleteTriviallyDeadInstructions(PN, TLI);
498  }
499  return true;
500}
501
502bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
503  // First step.  Check to see if there are any floating-point recurrences.
504  // If there are, change them into integer recurrences, permitting analysis by
505  // the SCEV routines.
506  BasicBlock *Header = L->getHeader();
507
508  SmallVector<WeakTrackingVH, 8> PHIs;
509  for (PHINode &PN : Header->phis())
510    PHIs.push_back(&PN);
511
512  bool Changed = false;
513  for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
514    if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
515      Changed |= handleFloatingPointIV(L, PN);
516
517  // If the loop previously had floating-point IV, ScalarEvolution
518  // may not have been able to compute a trip count. Now that we've done some
519  // re-writing, the trip count may be computable.
520  if (Changed)
521    SE->forgetLoop(L);
522  return Changed;
523}
524
525namespace {
526
527// Collect information about PHI nodes which can be transformed in
528// rewriteLoopExitValues.
529struct RewritePhi {
530  PHINode *PN;               // For which PHI node is this replacement?
531  unsigned Ith;              // For which incoming value?
532  const SCEV *ExpansionSCEV; // The SCEV of the incoming value we are rewriting.
533  Instruction *ExpansionPoint; // Where we'd like to expand that SCEV?
534  bool HighCost;               // Is this expansion a high-cost?
535
536  Value *Expansion = nullptr;
537  bool ValidRewrite = false;
538
539  RewritePhi(PHINode *P, unsigned I, const SCEV *Val, Instruction *ExpansionPt,
540             bool H)
541      : PN(P), Ith(I), ExpansionSCEV(Val), ExpansionPoint(ExpansionPt),
542        HighCost(H) {}
543};
544
545} // end anonymous namespace
546
547//===----------------------------------------------------------------------===//
548// rewriteLoopExitValues - Optimize IV users outside the loop.
549// As a side effect, reduces the amount of IV processing within the loop.
550//===----------------------------------------------------------------------===//
551
552bool IndVarSimplify::hasHardUserWithinLoop(const Loop *L, const Instruction *I) const {
553  SmallPtrSet<const Instruction *, 8> Visited;
554  SmallVector<const Instruction *, 8> WorkList;
555  Visited.insert(I);
556  WorkList.push_back(I);
557  while (!WorkList.empty()) {
558    const Instruction *Curr = WorkList.pop_back_val();
559    // This use is outside the loop, nothing to do.
560    if (!L->contains(Curr))
561      continue;
562    // Do we assume it is a "hard" use which will not be eliminated easily?
563    if (Curr->mayHaveSideEffects())
564      return true;
565    // Otherwise, add all its users to worklist.
566    for (auto U : Curr->users()) {
567      auto *UI = cast<Instruction>(U);
568      if (Visited.insert(UI).second)
569        WorkList.push_back(UI);
570    }
571  }
572  return false;
573}
574
575/// Check to see if this loop has a computable loop-invariant execution count.
576/// If so, this means that we can compute the final value of any expressions
577/// that are recurrent in the loop, and substitute the exit values from the loop
578/// into any instructions outside of the loop that use the final values of the
579/// current expressions.
580///
581/// This is mostly redundant with the regular IndVarSimplify activities that
582/// happen later, except that it's more powerful in some cases, because it's
583/// able to brute-force evaluate arbitrary instructions as long as they have
584/// constant operands at the beginning of the loop.
585bool IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
586  // Check a pre-condition.
587  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
588         "Indvars did not preserve LCSSA!");
589
590  SmallVector<BasicBlock*, 8> ExitBlocks;
591  L->getUniqueExitBlocks(ExitBlocks);
592
593  SmallVector<RewritePhi, 8> RewritePhiSet;
594  // Find all values that are computed inside the loop, but used outside of it.
595  // Because of LCSSA, these values will only occur in LCSSA PHI Nodes.  Scan
596  // the exit blocks of the loop to find them.
597  for (BasicBlock *ExitBB : ExitBlocks) {
598    // If there are no PHI nodes in this exit block, then no values defined
599    // inside the loop are used on this path, skip it.
600    PHINode *PN = dyn_cast<PHINode>(ExitBB->begin());
601    if (!PN) continue;
602
603    unsigned NumPreds = PN->getNumIncomingValues();
604
605    // Iterate over all of the PHI nodes.
606    BasicBlock::iterator BBI = ExitBB->begin();
607    while ((PN = dyn_cast<PHINode>(BBI++))) {
608      if (PN->use_empty())
609        continue; // dead use, don't replace it
610
611      if (!SE->isSCEVable(PN->getType()))
612        continue;
613
614      // It's necessary to tell ScalarEvolution about this explicitly so that
615      // it can walk the def-use list and forget all SCEVs, as it may not be
616      // watching the PHI itself. Once the new exit value is in place, there
617      // may not be a def-use connection between the loop and every instruction
618      // which got a SCEVAddRecExpr for that loop.
619      SE->forgetValue(PN);
620
621      // Iterate over all of the values in all the PHI nodes.
622      for (unsigned i = 0; i != NumPreds; ++i) {
623        // If the value being merged in is not integer or is not defined
624        // in the loop, skip it.
625        Value *InVal = PN->getIncomingValue(i);
626        if (!isa<Instruction>(InVal))
627          continue;
628
629        // If this pred is for a subloop, not L itself, skip it.
630        if (LI->getLoopFor(PN->getIncomingBlock(i)) != L)
631          continue; // The Block is in a subloop, skip it.
632
633        // Check that InVal is defined in the loop.
634        Instruction *Inst = cast<Instruction>(InVal);
635        if (!L->contains(Inst))
636          continue;
637
638        // Okay, this instruction has a user outside of the current loop
639        // and varies predictably *inside* the loop.  Evaluate the value it
640        // contains when the loop exits, if possible.  We prefer to start with
641        // expressions which are true for all exits (so as to maximize
642        // expression reuse by the SCEVExpander), but resort to per-exit
643        // evaluation if that fails.
644        const SCEV *ExitValue = SE->getSCEVAtScope(Inst, L->getParentLoop());
645        if (isa<SCEVCouldNotCompute>(ExitValue) ||
646            !SE->isLoopInvariant(ExitValue, L) ||
647            !isSafeToExpand(ExitValue, *SE)) {
648          // TODO: This should probably be sunk into SCEV in some way; maybe a
649          // getSCEVForExit(SCEV*, L, ExitingBB)?  It can be generalized for
650          // most SCEV expressions and other recurrence types (e.g. shift
651          // recurrences).  Is there existing code we can reuse?
652          const SCEV *ExitCount = SE->getExitCount(L, PN->getIncomingBlock(i));
653          if (isa<SCEVCouldNotCompute>(ExitCount))
654            continue;
655          if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Inst)))
656            if (AddRec->getLoop() == L)
657              ExitValue = AddRec->evaluateAtIteration(ExitCount, *SE);
658          if (isa<SCEVCouldNotCompute>(ExitValue) ||
659              !SE->isLoopInvariant(ExitValue, L) ||
660              !isSafeToExpand(ExitValue, *SE))
661            continue;
662        }
663
664        // Computing the value outside of the loop brings no benefit if it is
665        // definitely used inside the loop in a way which can not be optimized
666        // away.  Avoid doing so unless we know we have a value which computes
667        // the ExitValue already.  TODO: This should be merged into SCEV
668        // expander to leverage its knowledge of existing expressions.
669        if (ReplaceExitValue != AlwaysRepl &&
670            !isa<SCEVConstant>(ExitValue) && !isa<SCEVUnknown>(ExitValue) &&
671            hasHardUserWithinLoop(L, Inst))
672          continue;
673
674        // Check if expansions of this SCEV would count as being high cost.
675        bool HighCost = Rewriter.isHighCostExpansion(ExitValue, L, Inst);
676
677        // Note that we must not perform expansions until after
678        // we query *all* the costs, because if we perform temporary expansion
679        // inbetween, one that we might not intend to keep, said expansion
680        // *may* affect cost calculation of the the next SCEV's we'll query,
681        // and next SCEV may errneously get smaller cost.
682
683        // Collect all the candidate PHINodes to be rewritten.
684        RewritePhiSet.emplace_back(PN, i, ExitValue, Inst, HighCost);
685      }
686    }
687  }
688
689  // Now that we've done preliminary filtering and billed all the SCEV's,
690  // we can perform the last sanity check - the expansion must be valid.
691  for (RewritePhi &Phi : RewritePhiSet) {
692    Phi.Expansion = Rewriter.expandCodeFor(Phi.ExpansionSCEV, Phi.PN->getType(),
693                                           Phi.ExpansionPoint);
694
695    LLVM_DEBUG(dbgs() << "rewriteLoopExitValues: AfterLoopVal = "
696                      << *(Phi.Expansion) << '\n'
697                      << "  LoopVal = " << *(Phi.ExpansionPoint) << "\n");
698
699    // FIXME: isValidRewrite() is a hack. it should be an assert, eventually.
700    Phi.ValidRewrite = isValidRewrite(Phi.ExpansionPoint, Phi.Expansion);
701    if (!Phi.ValidRewrite) {
702      DeadInsts.push_back(Phi.Expansion);
703      continue;
704    }
705
706#ifndef NDEBUG
707    // If we reuse an instruction from a loop which is neither L nor one of
708    // its containing loops, we end up breaking LCSSA form for this loop by
709    // creating a new use of its instruction.
710    if (auto *ExitInsn = dyn_cast<Instruction>(Phi.Expansion))
711      if (auto *EVL = LI->getLoopFor(ExitInsn->getParent()))
712        if (EVL != L)
713          assert(EVL->contains(L) && "LCSSA breach detected!");
714#endif
715  }
716
717  // TODO: after isValidRewrite() is an assertion, evaluate whether
718  // it is beneficial to change how we calculate high-cost:
719  // if we have SCEV 'A' which we know we will expand, should we calculate
720  // the cost of other SCEV's after expanding SCEV 'A',
721  // thus potentially giving cost bonus to those other SCEV's?
722
723  bool LoopCanBeDel = canLoopBeDeleted(L, RewritePhiSet);
724
725  bool Changed = false;
726  // Transformation.
727  for (const RewritePhi &Phi : RewritePhiSet) {
728    if (!Phi.ValidRewrite)
729      continue;
730
731    PHINode *PN = Phi.PN;
732    Value *ExitVal = Phi.Expansion;
733
734    // Only do the rewrite when the ExitValue can be expanded cheaply.
735    // If LoopCanBeDel is true, rewrite exit value aggressively.
736    if (ReplaceExitValue == OnlyCheapRepl && !LoopCanBeDel && Phi.HighCost) {
737      DeadInsts.push_back(ExitVal);
738      continue;
739    }
740
741    Changed = true;
742    ++NumReplaced;
743    Instruction *Inst = cast<Instruction>(PN->getIncomingValue(Phi.Ith));
744    PN->setIncomingValue(Phi.Ith, ExitVal);
745
746    // If this instruction is dead now, delete it. Don't do it now to avoid
747    // invalidating iterators.
748    if (isInstructionTriviallyDead(Inst, TLI))
749      DeadInsts.push_back(Inst);
750
751    // Replace PN with ExitVal if that is legal and does not break LCSSA.
752    if (PN->getNumIncomingValues() == 1 &&
753        LI->replacementPreservesLCSSAForm(PN, ExitVal)) {
754      PN->replaceAllUsesWith(ExitVal);
755      PN->eraseFromParent();
756    }
757  }
758
759  // The insertion point instruction may have been deleted; clear it out
760  // so that the rewriter doesn't trip over it later.
761  Rewriter.clearInsertPoint();
762  return Changed;
763}
764
765//===---------------------------------------------------------------------===//
766// rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
767// they will exit at the first iteration.
768//===---------------------------------------------------------------------===//
769
770/// Check to see if this loop has loop invariant conditions which lead to loop
771/// exits. If so, we know that if the exit path is taken, it is at the first
772/// loop iteration. This lets us predict exit values of PHI nodes that live in
773/// loop header.
774bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
775  // Verify the input to the pass is already in LCSSA form.
776  assert(L->isLCSSAForm(*DT));
777
778  SmallVector<BasicBlock *, 8> ExitBlocks;
779  L->getUniqueExitBlocks(ExitBlocks);
780
781  bool MadeAnyChanges = false;
782  for (auto *ExitBB : ExitBlocks) {
783    // If there are no more PHI nodes in this exit block, then no more
784    // values defined inside the loop are used on this path.
785    for (PHINode &PN : ExitBB->phis()) {
786      for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
787           IncomingValIdx != E; ++IncomingValIdx) {
788        auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
789
790        // Can we prove that the exit must run on the first iteration if it
791        // runs at all?  (i.e. early exits are fine for our purposes, but
792        // traces which lead to this exit being taken on the 2nd iteration
793        // aren't.)  Note that this is about whether the exit branch is
794        // executed, not about whether it is taken.
795        if (!L->getLoopLatch() ||
796            !DT->dominates(IncomingBB, L->getLoopLatch()))
797          continue;
798
799        // Get condition that leads to the exit path.
800        auto *TermInst = IncomingBB->getTerminator();
801
802        Value *Cond = nullptr;
803        if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
804          // Must be a conditional branch, otherwise the block
805          // should not be in the loop.
806          Cond = BI->getCondition();
807        } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
808          Cond = SI->getCondition();
809        else
810          continue;
811
812        if (!L->isLoopInvariant(Cond))
813          continue;
814
815        auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
816
817        // Only deal with PHIs in the loop header.
818        if (!ExitVal || ExitVal->getParent() != L->getHeader())
819          continue;
820
821        // If ExitVal is a PHI on the loop header, then we know its
822        // value along this exit because the exit can only be taken
823        // on the first iteration.
824        auto *LoopPreheader = L->getLoopPreheader();
825        assert(LoopPreheader && "Invalid loop");
826        int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
827        if (PreheaderIdx != -1) {
828          assert(ExitVal->getParent() == L->getHeader() &&
829                 "ExitVal must be in loop header");
830          MadeAnyChanges = true;
831          PN.setIncomingValue(IncomingValIdx,
832                              ExitVal->getIncomingValue(PreheaderIdx));
833        }
834      }
835    }
836  }
837  return MadeAnyChanges;
838}
839
840/// Check whether it is possible to delete the loop after rewriting exit
841/// value. If it is possible, ignore ReplaceExitValue and do rewriting
842/// aggressively.
843bool IndVarSimplify::canLoopBeDeleted(
844    Loop *L, SmallVector<RewritePhi, 8> &RewritePhiSet) {
845  BasicBlock *Preheader = L->getLoopPreheader();
846  // If there is no preheader, the loop will not be deleted.
847  if (!Preheader)
848    return false;
849
850  // In LoopDeletion pass Loop can be deleted when ExitingBlocks.size() > 1.
851  // We obviate multiple ExitingBlocks case for simplicity.
852  // TODO: If we see testcase with multiple ExitingBlocks can be deleted
853  // after exit value rewriting, we can enhance the logic here.
854  SmallVector<BasicBlock *, 4> ExitingBlocks;
855  L->getExitingBlocks(ExitingBlocks);
856  SmallVector<BasicBlock *, 8> ExitBlocks;
857  L->getUniqueExitBlocks(ExitBlocks);
858  if (ExitBlocks.size() != 1 || ExitingBlocks.size() != 1)
859    return false;
860
861  BasicBlock *ExitBlock = ExitBlocks[0];
862  BasicBlock::iterator BI = ExitBlock->begin();
863  while (PHINode *P = dyn_cast<PHINode>(BI)) {
864    Value *Incoming = P->getIncomingValueForBlock(ExitingBlocks[0]);
865
866    // If the Incoming value of P is found in RewritePhiSet, we know it
867    // could be rewritten to use a loop invariant value in transformation
868    // phase later. Skip it in the loop invariant check below.
869    bool found = false;
870    for (const RewritePhi &Phi : RewritePhiSet) {
871      if (!Phi.ValidRewrite)
872        continue;
873      unsigned i = Phi.Ith;
874      if (Phi.PN == P && (Phi.PN)->getIncomingValue(i) == Incoming) {
875        found = true;
876        break;
877      }
878    }
879
880    Instruction *I;
881    if (!found && (I = dyn_cast<Instruction>(Incoming)))
882      if (!L->hasLoopInvariantOperands(I))
883        return false;
884
885    ++BI;
886  }
887
888  for (auto *BB : L->blocks())
889    if (llvm::any_of(*BB, [](Instruction &I) {
890          return I.mayHaveSideEffects();
891        }))
892      return false;
893
894  return true;
895}
896
897//===----------------------------------------------------------------------===//
898//  IV Widening - Extend the width of an IV to cover its widest uses.
899//===----------------------------------------------------------------------===//
900
901namespace {
902
903// Collect information about induction variables that are used by sign/zero
904// extend operations. This information is recorded by CollectExtend and provides
905// the input to WidenIV.
906struct WideIVInfo {
907  PHINode *NarrowIV = nullptr;
908
909  // Widest integer type created [sz]ext
910  Type *WidestNativeType = nullptr;
911
912  // Was a sext user seen before a zext?
913  bool IsSigned = false;
914};
915
916} // end anonymous namespace
917
918/// Update information about the induction variable that is extended by this
919/// sign or zero extend operation. This is used to determine the final width of
920/// the IV before actually widening it.
921static void visitIVCast(CastInst *Cast, WideIVInfo &WI, ScalarEvolution *SE,
922                        const TargetTransformInfo *TTI) {
923  bool IsSigned = Cast->getOpcode() == Instruction::SExt;
924  if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
925    return;
926
927  Type *Ty = Cast->getType();
928  uint64_t Width = SE->getTypeSizeInBits(Ty);
929  if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
930    return;
931
932  // Check that `Cast` actually extends the induction variable (we rely on this
933  // later).  This takes care of cases where `Cast` is extending a truncation of
934  // the narrow induction variable, and thus can end up being narrower than the
935  // "narrow" induction variable.
936  uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
937  if (NarrowIVWidth >= Width)
938    return;
939
940  // Cast is either an sext or zext up to this point.
941  // We should not widen an indvar if arithmetics on the wider indvar are more
942  // expensive than those on the narrower indvar. We check only the cost of ADD
943  // because at least an ADD is required to increment the induction variable. We
944  // could compute more comprehensively the cost of all instructions on the
945  // induction variable when necessary.
946  if (TTI &&
947      TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
948          TTI->getArithmeticInstrCost(Instruction::Add,
949                                      Cast->getOperand(0)->getType())) {
950    return;
951  }
952
953  if (!WI.WidestNativeType) {
954    WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
955    WI.IsSigned = IsSigned;
956    return;
957  }
958
959  // We extend the IV to satisfy the sign of its first user, arbitrarily.
960  if (WI.IsSigned != IsSigned)
961    return;
962
963  if (Width > SE->getTypeSizeInBits(WI.WidestNativeType))
964    WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
965}
966
967namespace {
968
969/// Record a link in the Narrow IV def-use chain along with the WideIV that
970/// computes the same value as the Narrow IV def.  This avoids caching Use*
971/// pointers.
972struct NarrowIVDefUse {
973  Instruction *NarrowDef = nullptr;
974  Instruction *NarrowUse = nullptr;
975  Instruction *WideDef = nullptr;
976
977  // True if the narrow def is never negative.  Tracking this information lets
978  // us use a sign extension instead of a zero extension or vice versa, when
979  // profitable and legal.
980  bool NeverNegative = false;
981
982  NarrowIVDefUse(Instruction *ND, Instruction *NU, Instruction *WD,
983                 bool NeverNegative)
984      : NarrowDef(ND), NarrowUse(NU), WideDef(WD),
985        NeverNegative(NeverNegative) {}
986};
987
988/// The goal of this transform is to remove sign and zero extends without
989/// creating any new induction variables. To do this, it creates a new phi of
990/// the wider type and redirects all users, either removing extends or inserting
991/// truncs whenever we stop propagating the type.
992class WidenIV {
993  // Parameters
994  PHINode *OrigPhi;
995  Type *WideType;
996
997  // Context
998  LoopInfo        *LI;
999  Loop            *L;
1000  ScalarEvolution *SE;
1001  DominatorTree   *DT;
1002
1003  // Does the module have any calls to the llvm.experimental.guard intrinsic
1004  // at all? If not we can avoid scanning instructions looking for guards.
1005  bool HasGuards;
1006
1007  // Result
1008  PHINode *WidePhi = nullptr;
1009  Instruction *WideInc = nullptr;
1010  const SCEV *WideIncExpr = nullptr;
1011  SmallVectorImpl<WeakTrackingVH> &DeadInsts;
1012
1013  SmallPtrSet<Instruction *,16> Widened;
1014  SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
1015
1016  enum ExtendKind { ZeroExtended, SignExtended, Unknown };
1017
1018  // A map tracking the kind of extension used to widen each narrow IV
1019  // and narrow IV user.
1020  // Key: pointer to a narrow IV or IV user.
1021  // Value: the kind of extension used to widen this Instruction.
1022  DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
1023
1024  using DefUserPair = std::pair<AssertingVH<Value>, AssertingVH<Instruction>>;
1025
1026  // A map with control-dependent ranges for post increment IV uses. The key is
1027  // a pair of IV def and a use of this def denoting the context. The value is
1028  // a ConstantRange representing possible values of the def at the given
1029  // context.
1030  DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
1031
1032  Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
1033                                              Instruction *UseI) {
1034    DefUserPair Key(Def, UseI);
1035    auto It = PostIncRangeInfos.find(Key);
1036    return It == PostIncRangeInfos.end()
1037               ? Optional<ConstantRange>(None)
1038               : Optional<ConstantRange>(It->second);
1039  }
1040
1041  void calculatePostIncRanges(PHINode *OrigPhi);
1042  void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
1043
1044  void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
1045    DefUserPair Key(Def, UseI);
1046    auto It = PostIncRangeInfos.find(Key);
1047    if (It == PostIncRangeInfos.end())
1048      PostIncRangeInfos.insert({Key, R});
1049    else
1050      It->second = R.intersectWith(It->second);
1051  }
1052
1053public:
1054  WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv,
1055          DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI,
1056          bool HasGuards)
1057      : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo),
1058        L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree),
1059        HasGuards(HasGuards), DeadInsts(DI) {
1060    assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
1061    ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
1062  }
1063
1064  PHINode *createWideIV(SCEVExpander &Rewriter);
1065
1066protected:
1067  Value *createExtendInst(Value *NarrowOper, Type *WideType, bool IsSigned,
1068                          Instruction *Use);
1069
1070  Instruction *cloneIVUser(NarrowIVDefUse DU, const SCEVAddRecExpr *WideAR);
1071  Instruction *cloneArithmeticIVUser(NarrowIVDefUse DU,
1072                                     const SCEVAddRecExpr *WideAR);
1073  Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
1074
1075  ExtendKind getExtendKind(Instruction *I);
1076
1077  using WidenedRecTy = std::pair<const SCEVAddRecExpr *, ExtendKind>;
1078
1079  WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
1080
1081  WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
1082
1083  const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1084                              unsigned OpCode) const;
1085
1086  Instruction *widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter);
1087
1088  bool widenLoopCompare(NarrowIVDefUse DU);
1089  bool widenWithVariantLoadUse(NarrowIVDefUse DU);
1090  void widenWithVariantLoadUseCodegen(NarrowIVDefUse DU);
1091
1092  void pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef);
1093};
1094
1095} // end anonymous namespace
1096
1097Value *WidenIV::createExtendInst(Value *NarrowOper, Type *WideType,
1098                                 bool IsSigned, Instruction *Use) {
1099  // Set the debug location and conservative insertion point.
1100  IRBuilder<> Builder(Use);
1101  // Hoist the insertion point into loop preheaders as far as possible.
1102  for (const Loop *L = LI->getLoopFor(Use->getParent());
1103       L && L->getLoopPreheader() && L->isLoopInvariant(NarrowOper);
1104       L = L->getParentLoop())
1105    Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator());
1106
1107  return IsSigned ? Builder.CreateSExt(NarrowOper, WideType) :
1108                    Builder.CreateZExt(NarrowOper, WideType);
1109}
1110
1111/// Instantiate a wide operation to replace a narrow operation. This only needs
1112/// to handle operations that can evaluation to SCEVAddRec. It can safely return
1113/// 0 for any operation we decide not to clone.
1114Instruction *WidenIV::cloneIVUser(NarrowIVDefUse DU,
1115                                  const SCEVAddRecExpr *WideAR) {
1116  unsigned Opcode = DU.NarrowUse->getOpcode();
1117  switch (Opcode) {
1118  default:
1119    return nullptr;
1120  case Instruction::Add:
1121  case Instruction::Mul:
1122  case Instruction::UDiv:
1123  case Instruction::Sub:
1124    return cloneArithmeticIVUser(DU, WideAR);
1125
1126  case Instruction::And:
1127  case Instruction::Or:
1128  case Instruction::Xor:
1129  case Instruction::Shl:
1130  case Instruction::LShr:
1131  case Instruction::AShr:
1132    return cloneBitwiseIVUser(DU);
1133  }
1134}
1135
1136Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
1137  Instruction *NarrowUse = DU.NarrowUse;
1138  Instruction *NarrowDef = DU.NarrowDef;
1139  Instruction *WideDef = DU.WideDef;
1140
1141  LLVM_DEBUG(dbgs() << "Cloning bitwise IVUser: " << *NarrowUse << "\n");
1142
1143  // Replace NarrowDef operands with WideDef. Otherwise, we don't know anything
1144  // about the narrow operand yet so must insert a [sz]ext. It is probably loop
1145  // invariant and will be folded or hoisted. If it actually comes from a
1146  // widened IV, it should be removed during a future call to widenIVUse.
1147  bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
1148  Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1149                   ? WideDef
1150                   : createExtendInst(NarrowUse->getOperand(0), WideType,
1151                                      IsSigned, NarrowUse);
1152  Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1153                   ? WideDef
1154                   : createExtendInst(NarrowUse->getOperand(1), WideType,
1155                                      IsSigned, NarrowUse);
1156
1157  auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1158  auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1159                                        NarrowBO->getName());
1160  IRBuilder<> Builder(NarrowUse);
1161  Builder.Insert(WideBO);
1162  WideBO->copyIRFlags(NarrowBO);
1163  return WideBO;
1164}
1165
1166Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
1167                                            const SCEVAddRecExpr *WideAR) {
1168  Instruction *NarrowUse = DU.NarrowUse;
1169  Instruction *NarrowDef = DU.NarrowDef;
1170  Instruction *WideDef = DU.WideDef;
1171
1172  LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1173
1174  unsigned IVOpIdx = (NarrowUse->getOperand(0) == NarrowDef) ? 0 : 1;
1175
1176  // We're trying to find X such that
1177  //
1178  //  Widen(NarrowDef `op` NonIVNarrowDef) == WideAR == WideDef `op.wide` X
1179  //
1180  // We guess two solutions to X, sext(NonIVNarrowDef) and zext(NonIVNarrowDef),
1181  // and check using SCEV if any of them are correct.
1182
1183  // Returns true if extending NonIVNarrowDef according to `SignExt` is a
1184  // correct solution to X.
1185  auto GuessNonIVOperand = [&](bool SignExt) {
1186    const SCEV *WideLHS;
1187    const SCEV *WideRHS;
1188
1189    auto GetExtend = [this, SignExt](const SCEV *S, Type *Ty) {
1190      if (SignExt)
1191        return SE->getSignExtendExpr(S, Ty);
1192      return SE->getZeroExtendExpr(S, Ty);
1193    };
1194
1195    if (IVOpIdx == 0) {
1196      WideLHS = SE->getSCEV(WideDef);
1197      const SCEV *NarrowRHS = SE->getSCEV(NarrowUse->getOperand(1));
1198      WideRHS = GetExtend(NarrowRHS, WideType);
1199    } else {
1200      const SCEV *NarrowLHS = SE->getSCEV(NarrowUse->getOperand(0));
1201      WideLHS = GetExtend(NarrowLHS, WideType);
1202      WideRHS = SE->getSCEV(WideDef);
1203    }
1204
1205    // WideUse is "WideDef `op.wide` X" as described in the comment.
1206    const SCEV *WideUse = nullptr;
1207
1208    switch (NarrowUse->getOpcode()) {
1209    default:
1210      llvm_unreachable("No other possibility!");
1211
1212    case Instruction::Add:
1213      WideUse = SE->getAddExpr(WideLHS, WideRHS);
1214      break;
1215
1216    case Instruction::Mul:
1217      WideUse = SE->getMulExpr(WideLHS, WideRHS);
1218      break;
1219
1220    case Instruction::UDiv:
1221      WideUse = SE->getUDivExpr(WideLHS, WideRHS);
1222      break;
1223
1224    case Instruction::Sub:
1225      WideUse = SE->getMinusSCEV(WideLHS, WideRHS);
1226      break;
1227    }
1228
1229    return WideUse == WideAR;
1230  };
1231
1232  bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
1233  if (!GuessNonIVOperand(SignExtend)) {
1234    SignExtend = !SignExtend;
1235    if (!GuessNonIVOperand(SignExtend))
1236      return nullptr;
1237  }
1238
1239  Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1240                   ? WideDef
1241                   : createExtendInst(NarrowUse->getOperand(0), WideType,
1242                                      SignExtend, NarrowUse);
1243  Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1244                   ? WideDef
1245                   : createExtendInst(NarrowUse->getOperand(1), WideType,
1246                                      SignExtend, NarrowUse);
1247
1248  auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1249  auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1250                                        NarrowBO->getName());
1251
1252  IRBuilder<> Builder(NarrowUse);
1253  Builder.Insert(WideBO);
1254  WideBO->copyIRFlags(NarrowBO);
1255  return WideBO;
1256}
1257
1258WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
1259  auto It = ExtendKindMap.find(I);
1260  assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
1261  return It->second;
1262}
1263
1264const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
1265                                     unsigned OpCode) const {
1266  if (OpCode == Instruction::Add)
1267    return SE->getAddExpr(LHS, RHS);
1268  if (OpCode == Instruction::Sub)
1269    return SE->getMinusSCEV(LHS, RHS);
1270  if (OpCode == Instruction::Mul)
1271    return SE->getMulExpr(LHS, RHS);
1272
1273  llvm_unreachable("Unsupported opcode.");
1274}
1275
1276/// No-wrap operations can transfer sign extension of their result to their
1277/// operands. Generate the SCEV value for the widened operation without
1278/// actually modifying the IR yet. If the expression after extending the
1279/// operands is an AddRec for this loop, return the AddRec and the kind of
1280/// extension used.
1281WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
1282  // Handle the common case of add<nsw/nuw>
1283  const unsigned OpCode = DU.NarrowUse->getOpcode();
1284  // Only Add/Sub/Mul instructions supported yet.
1285  if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1286      OpCode != Instruction::Mul)
1287    return {nullptr, Unknown};
1288
1289  // One operand (NarrowDef) has already been extended to WideDef. Now determine
1290  // if extending the other will lead to a recurrence.
1291  const unsigned ExtendOperIdx =
1292      DU.NarrowUse->getOperand(0) == DU.NarrowDef ? 1 : 0;
1293  assert(DU.NarrowUse->getOperand(1-ExtendOperIdx) == DU.NarrowDef && "bad DU");
1294
1295  const SCEV *ExtendOperExpr = nullptr;
1296  const OverflowingBinaryOperator *OBO =
1297    cast<OverflowingBinaryOperator>(DU.NarrowUse);
1298  ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
1299  if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1300    ExtendOperExpr = SE->getSignExtendExpr(
1301      SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1302  else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1303    ExtendOperExpr = SE->getZeroExtendExpr(
1304      SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
1305  else
1306    return {nullptr, Unknown};
1307
1308  // When creating this SCEV expr, don't apply the current operations NSW or NUW
1309  // flags. This instruction may be guarded by control flow that the no-wrap
1310  // behavior depends on. Non-control-equivalent instructions can be mapped to
1311  // the same SCEV expression, and it would be incorrect to transfer NSW/NUW
1312  // semantics to those operations.
1313  const SCEV *lhs = SE->getSCEV(DU.WideDef);
1314  const SCEV *rhs = ExtendOperExpr;
1315
1316  // Let's swap operands to the initial order for the case of non-commutative
1317  // operations, like SUB. See PR21014.
1318  if (ExtendOperIdx == 0)
1319    std::swap(lhs, rhs);
1320  const SCEVAddRecExpr *AddRec =
1321      dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
1322
1323  if (!AddRec || AddRec->getLoop() != L)
1324    return {nullptr, Unknown};
1325
1326  return {AddRec, ExtKind};
1327}
1328
1329/// Is this instruction potentially interesting for further simplification after
1330/// widening it's type? In other words, can the extend be safely hoisted out of
1331/// the loop with SCEV reducing the value to a recurrence on the same loop. If
1332/// so, return the extended recurrence and the kind of extension used. Otherwise
1333/// return {nullptr, Unknown}.
1334WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
1335  if (!SE->isSCEVable(DU.NarrowUse->getType()))
1336    return {nullptr, Unknown};
1337
1338  const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
1339  if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
1340      SE->getTypeSizeInBits(WideType)) {
1341    // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
1342    // index. So don't follow this use.
1343    return {nullptr, Unknown};
1344  }
1345
1346  const SCEV *WideExpr;
1347  ExtendKind ExtKind;
1348  if (DU.NeverNegative) {
1349    WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1350    if (isa<SCEVAddRecExpr>(WideExpr))
1351      ExtKind = SignExtended;
1352    else {
1353      WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1354      ExtKind = ZeroExtended;
1355    }
1356  } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
1357    WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
1358    ExtKind = SignExtended;
1359  } else {
1360    WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
1361    ExtKind = ZeroExtended;
1362  }
1363  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
1364  if (!AddRec || AddRec->getLoop() != L)
1365    return {nullptr, Unknown};
1366  return {AddRec, ExtKind};
1367}
1368
1369/// This IV user cannot be widened. Replace this use of the original narrow IV
1370/// with a truncation of the new wide IV to isolate and eliminate the narrow IV.
1371static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) {
1372  auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1373  if (!InsertPt)
1374    return;
1375  LLVM_DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user "
1376                    << *DU.NarrowUse << "\n");
1377  IRBuilder<> Builder(InsertPt);
1378  Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType());
1379  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc);
1380}
1381
1382/// If the narrow use is a compare instruction, then widen the compare
1383//  (and possibly the other operand).  The extend operation is hoisted into the
1384// loop preheader as far as possible.
1385bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
1386  ICmpInst *Cmp = dyn_cast<ICmpInst>(DU.NarrowUse);
1387  if (!Cmp)
1388    return false;
1389
1390  // We can legally widen the comparison in the following two cases:
1391  //
1392  //  - The signedness of the IV extension and comparison match
1393  //
1394  //  - The narrow IV is always positive (and thus its sign extension is equal
1395  //    to its zero extension).  For instance, let's say we're zero extending
1396  //    %narrow for the following use
1397  //
1398  //      icmp slt i32 %narrow, %val   ... (A)
1399  //
1400  //    and %narrow is always positive.  Then
1401  //
1402  //      (A) == icmp slt i32 sext(%narrow), sext(%val)
1403  //          == icmp slt i32 zext(%narrow), sext(%val)
1404  bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
1405  if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
1406    return false;
1407
1408  Value *Op = Cmp->getOperand(Cmp->getOperand(0) == DU.NarrowDef ? 1 : 0);
1409  unsigned CastWidth = SE->getTypeSizeInBits(Op->getType());
1410  unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1411  assert(CastWidth <= IVWidth && "Unexpected width while widening compare.");
1412
1413  // Widen the compare instruction.
1414  auto *InsertPt = getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI);
1415  if (!InsertPt)
1416    return false;
1417  IRBuilder<> Builder(InsertPt);
1418  DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1419
1420  // Widen the other operand of the compare, if necessary.
1421  if (CastWidth < IVWidth) {
1422    Value *ExtOp = createExtendInst(Op, WideType, Cmp->isSigned(), Cmp);
1423    DU.NarrowUse->replaceUsesOfWith(Op, ExtOp);
1424  }
1425  return true;
1426}
1427
1428/// If the narrow use is an instruction whose two operands are the defining
1429/// instruction of DU and a load instruction, then we have the following:
1430/// if the load is hoisted outside the loop, then we do not reach this function
1431/// as scalar evolution analysis works fine in widenIVUse with variables
1432/// hoisted outside the loop and efficient code is subsequently generated by
1433/// not emitting truncate instructions. But when the load is not hoisted
1434/// (whether due to limitation in alias analysis or due to a true legality),
1435/// then scalar evolution can not proceed with loop variant values and
1436/// inefficient code is generated. This function handles the non-hoisted load
1437/// special case by making the optimization generate the same type of code for
1438/// hoisted and non-hoisted load (widen use and eliminate sign extend
1439/// instruction). This special case is important especially when the induction
1440/// variables are affecting addressing mode in code generation.
1441bool WidenIV::widenWithVariantLoadUse(NarrowIVDefUse DU) {
1442  Instruction *NarrowUse = DU.NarrowUse;
1443  Instruction *NarrowDef = DU.NarrowDef;
1444  Instruction *WideDef = DU.WideDef;
1445
1446  // Handle the common case of add<nsw/nuw>
1447  const unsigned OpCode = NarrowUse->getOpcode();
1448  // Only Add/Sub/Mul instructions are supported.
1449  if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
1450      OpCode != Instruction::Mul)
1451    return false;
1452
1453  // The operand that is not defined by NarrowDef of DU. Let's call it the
1454  // other operand.
1455  unsigned ExtendOperIdx = DU.NarrowUse->getOperand(0) == NarrowDef ? 1 : 0;
1456  assert(DU.NarrowUse->getOperand(1 - ExtendOperIdx) == DU.NarrowDef &&
1457         "bad DU");
1458
1459  const SCEV *ExtendOperExpr = nullptr;
1460  const OverflowingBinaryOperator *OBO =
1461    cast<OverflowingBinaryOperator>(NarrowUse);
1462  ExtendKind ExtKind = getExtendKind(NarrowDef);
1463  if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
1464    ExtendOperExpr = SE->getSignExtendExpr(
1465      SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1466  else if (ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
1467    ExtendOperExpr = SE->getZeroExtendExpr(
1468      SE->getSCEV(NarrowUse->getOperand(ExtendOperIdx)), WideType);
1469  else
1470    return false;
1471
1472  // We are interested in the other operand being a load instruction.
1473  // But, we should look into relaxing this restriction later on.
1474  auto *I = dyn_cast<Instruction>(NarrowUse->getOperand(ExtendOperIdx));
1475  if (I && I->getOpcode() != Instruction::Load)
1476    return false;
1477
1478  // Verifying that Defining operand is an AddRec
1479  const SCEV *Op1 = SE->getSCEV(WideDef);
1480  const SCEVAddRecExpr *AddRecOp1 = dyn_cast<SCEVAddRecExpr>(Op1);
1481  if (!AddRecOp1 || AddRecOp1->getLoop() != L)
1482    return false;
1483  // Verifying that other operand is an Extend.
1484  if (ExtKind == SignExtended) {
1485    if (!isa<SCEVSignExtendExpr>(ExtendOperExpr))
1486      return false;
1487  } else {
1488    if (!isa<SCEVZeroExtendExpr>(ExtendOperExpr))
1489      return false;
1490  }
1491
1492  if (ExtKind == SignExtended) {
1493    for (Use &U : NarrowUse->uses()) {
1494      SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1495      if (!User || User->getType() != WideType)
1496        return false;
1497    }
1498  } else { // ExtKind == ZeroExtended
1499    for (Use &U : NarrowUse->uses()) {
1500      ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1501      if (!User || User->getType() != WideType)
1502        return false;
1503    }
1504  }
1505
1506  return true;
1507}
1508
1509/// Special Case for widening with variant Loads (see
1510/// WidenIV::widenWithVariantLoadUse). This is the code generation part.
1511void WidenIV::widenWithVariantLoadUseCodegen(NarrowIVDefUse DU) {
1512  Instruction *NarrowUse = DU.NarrowUse;
1513  Instruction *NarrowDef = DU.NarrowDef;
1514  Instruction *WideDef = DU.WideDef;
1515
1516  ExtendKind ExtKind = getExtendKind(NarrowDef);
1517
1518  LLVM_DEBUG(dbgs() << "Cloning arithmetic IVUser: " << *NarrowUse << "\n");
1519
1520  // Generating a widening use instruction.
1521  Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
1522                   ? WideDef
1523                   : createExtendInst(NarrowUse->getOperand(0), WideType,
1524                                      ExtKind, NarrowUse);
1525  Value *RHS = (NarrowUse->getOperand(1) == NarrowDef)
1526                   ? WideDef
1527                   : createExtendInst(NarrowUse->getOperand(1), WideType,
1528                                      ExtKind, NarrowUse);
1529
1530  auto *NarrowBO = cast<BinaryOperator>(NarrowUse);
1531  auto *WideBO = BinaryOperator::Create(NarrowBO->getOpcode(), LHS, RHS,
1532                                        NarrowBO->getName());
1533  IRBuilder<> Builder(NarrowUse);
1534  Builder.Insert(WideBO);
1535  WideBO->copyIRFlags(NarrowBO);
1536
1537  if (ExtKind == SignExtended)
1538    ExtendKindMap[NarrowUse] = SignExtended;
1539  else
1540    ExtendKindMap[NarrowUse] = ZeroExtended;
1541
1542  // Update the Use.
1543  if (ExtKind == SignExtended) {
1544    for (Use &U : NarrowUse->uses()) {
1545      SExtInst *User = dyn_cast<SExtInst>(U.getUser());
1546      if (User && User->getType() == WideType) {
1547        LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1548                          << *WideBO << "\n");
1549        ++NumElimExt;
1550        User->replaceAllUsesWith(WideBO);
1551        DeadInsts.emplace_back(User);
1552      }
1553    }
1554  } else { // ExtKind == ZeroExtended
1555    for (Use &U : NarrowUse->uses()) {
1556      ZExtInst *User = dyn_cast<ZExtInst>(U.getUser());
1557      if (User && User->getType() == WideType) {
1558        LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *User << " replaced by "
1559                          << *WideBO << "\n");
1560        ++NumElimExt;
1561        User->replaceAllUsesWith(WideBO);
1562        DeadInsts.emplace_back(User);
1563      }
1564    }
1565  }
1566}
1567
1568/// Determine whether an individual user of the narrow IV can be widened. If so,
1569/// return the wide clone of the user.
1570Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
1571  assert(ExtendKindMap.count(DU.NarrowDef) &&
1572         "Should already know the kind of extension used to widen NarrowDef");
1573
1574  // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
1575  if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
1576    if (LI->getLoopFor(UsePhi->getParent()) != L) {
1577      // For LCSSA phis, sink the truncate outside the loop.
1578      // After SimplifyCFG most loop exit targets have a single predecessor.
1579      // Otherwise fall back to a truncate within the loop.
1580      if (UsePhi->getNumOperands() != 1)
1581        truncateIVUse(DU, DT, LI);
1582      else {
1583        // Widening the PHI requires us to insert a trunc.  The logical place
1584        // for this trunc is in the same BB as the PHI.  This is not possible if
1585        // the BB is terminated by a catchswitch.
1586        if (isa<CatchSwitchInst>(UsePhi->getParent()->getTerminator()))
1587          return nullptr;
1588
1589        PHINode *WidePhi =
1590          PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide",
1591                          UsePhi);
1592        WidePhi->addIncoming(DU.WideDef, UsePhi->getIncomingBlock(0));
1593        IRBuilder<> Builder(&*WidePhi->getParent()->getFirstInsertionPt());
1594        Value *Trunc = Builder.CreateTrunc(WidePhi, DU.NarrowDef->getType());
1595        UsePhi->replaceAllUsesWith(Trunc);
1596        DeadInsts.emplace_back(UsePhi);
1597        LLVM_DEBUG(dbgs() << "INDVARS: Widen lcssa phi " << *UsePhi << " to "
1598                          << *WidePhi << "\n");
1599      }
1600      return nullptr;
1601    }
1602  }
1603
1604  // This narrow use can be widened by a sext if it's non-negative or its narrow
1605  // def was widended by a sext. Same for zext.
1606  auto canWidenBySExt = [&]() {
1607    return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
1608  };
1609  auto canWidenByZExt = [&]() {
1610    return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
1611  };
1612
1613  // Our raison d'etre! Eliminate sign and zero extension.
1614  if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
1615      (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
1616    Value *NewDef = DU.WideDef;
1617    if (DU.NarrowUse->getType() != WideType) {
1618      unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
1619      unsigned IVWidth = SE->getTypeSizeInBits(WideType);
1620      if (CastWidth < IVWidth) {
1621        // The cast isn't as wide as the IV, so insert a Trunc.
1622        IRBuilder<> Builder(DU.NarrowUse);
1623        NewDef = Builder.CreateTrunc(DU.WideDef, DU.NarrowUse->getType());
1624      }
1625      else {
1626        // A wider extend was hidden behind a narrower one. This may induce
1627        // another round of IV widening in which the intermediate IV becomes
1628        // dead. It should be very rare.
1629        LLVM_DEBUG(dbgs() << "INDVARS: New IV " << *WidePhi
1630                          << " not wide enough to subsume " << *DU.NarrowUse
1631                          << "\n");
1632        DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef);
1633        NewDef = DU.NarrowUse;
1634      }
1635    }
1636    if (NewDef != DU.NarrowUse) {
1637      LLVM_DEBUG(dbgs() << "INDVARS: eliminating " << *DU.NarrowUse
1638                        << " replaced by " << *DU.WideDef << "\n");
1639      ++NumElimExt;
1640      DU.NarrowUse->replaceAllUsesWith(NewDef);
1641      DeadInsts.emplace_back(DU.NarrowUse);
1642    }
1643    // Now that the extend is gone, we want to expose it's uses for potential
1644    // further simplification. We don't need to directly inform SimplifyIVUsers
1645    // of the new users, because their parent IV will be processed later as a
1646    // new loop phi. If we preserved IVUsers analysis, we would also want to
1647    // push the uses of WideDef here.
1648
1649    // No further widening is needed. The deceased [sz]ext had done it for us.
1650    return nullptr;
1651  }
1652
1653  // Does this user itself evaluate to a recurrence after widening?
1654  WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
1655  if (!WideAddRec.first)
1656    WideAddRec = getWideRecurrence(DU);
1657
1658  assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
1659  if (!WideAddRec.first) {
1660    // If use is a loop condition, try to promote the condition instead of
1661    // truncating the IV first.
1662    if (widenLoopCompare(DU))
1663      return nullptr;
1664
1665    // We are here about to generate a truncate instruction that may hurt
1666    // performance because the scalar evolution expression computed earlier
1667    // in WideAddRec.first does not indicate a polynomial induction expression.
1668    // In that case, look at the operands of the use instruction to determine
1669    // if we can still widen the use instead of truncating its operand.
1670    if (widenWithVariantLoadUse(DU)) {
1671      widenWithVariantLoadUseCodegen(DU);
1672      return nullptr;
1673    }
1674
1675    // This user does not evaluate to a recurrence after widening, so don't
1676    // follow it. Instead insert a Trunc to kill off the original use,
1677    // eventually isolating the original narrow IV so it can be removed.
1678    truncateIVUse(DU, DT, LI);
1679    return nullptr;
1680  }
1681  // Assume block terminators cannot evaluate to a recurrence. We can't to
1682  // insert a Trunc after a terminator if there happens to be a critical edge.
1683  assert(DU.NarrowUse != DU.NarrowUse->getParent()->getTerminator() &&
1684         "SCEV is not expected to evaluate a block terminator");
1685
1686  // Reuse the IV increment that SCEVExpander created as long as it dominates
1687  // NarrowUse.
1688  Instruction *WideUse = nullptr;
1689  if (WideAddRec.first == WideIncExpr &&
1690      Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
1691    WideUse = WideInc;
1692  else {
1693    WideUse = cloneIVUser(DU, WideAddRec.first);
1694    if (!WideUse)
1695      return nullptr;
1696  }
1697  // Evaluation of WideAddRec ensured that the narrow expression could be
1698  // extended outside the loop without overflow. This suggests that the wide use
1699  // evaluates to the same expression as the extended narrow use, but doesn't
1700  // absolutely guarantee it. Hence the following failsafe check. In rare cases
1701  // where it fails, we simply throw away the newly created wide use.
1702  if (WideAddRec.first != SE->getSCEV(WideUse)) {
1703    LLVM_DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse << ": "
1704                      << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first
1705                      << "\n");
1706    DeadInsts.emplace_back(WideUse);
1707    return nullptr;
1708  }
1709
1710  // if we reached this point then we are going to replace
1711  // DU.NarrowUse with WideUse. Reattach DbgValue then.
1712  replaceAllDbgUsesWith(*DU.NarrowUse, *WideUse, *WideUse, *DT);
1713
1714  ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
1715  // Returning WideUse pushes it on the worklist.
1716  return WideUse;
1717}
1718
1719/// Add eligible users of NarrowDef to NarrowIVUsers.
1720void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
1721  const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
1722  bool NonNegativeDef =
1723      SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
1724                           SE->getConstant(NarrowSCEV->getType(), 0));
1725  for (User *U : NarrowDef->users()) {
1726    Instruction *NarrowUser = cast<Instruction>(U);
1727
1728    // Handle data flow merges and bizarre phi cycles.
1729    if (!Widened.insert(NarrowUser).second)
1730      continue;
1731
1732    bool NonNegativeUse = false;
1733    if (!NonNegativeDef) {
1734      // We might have a control-dependent range information for this context.
1735      if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
1736        NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
1737    }
1738
1739    NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
1740                               NonNegativeDef || NonNegativeUse);
1741  }
1742}
1743
1744/// Process a single induction variable. First use the SCEVExpander to create a
1745/// wide induction variable that evaluates to the same recurrence as the
1746/// original narrow IV. Then use a worklist to forward traverse the narrow IV's
1747/// def-use chain. After widenIVUse has processed all interesting IV users, the
1748/// narrow IV will be isolated for removal by DeleteDeadPHIs.
1749///
1750/// It would be simpler to delete uses as they are processed, but we must avoid
1751/// invalidating SCEV expressions.
1752PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
1753  // Is this phi an induction variable?
1754  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(OrigPhi));
1755  if (!AddRec)
1756    return nullptr;
1757
1758  // Widen the induction variable expression.
1759  const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
1760                               ? SE->getSignExtendExpr(AddRec, WideType)
1761                               : SE->getZeroExtendExpr(AddRec, WideType);
1762
1763  assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
1764         "Expect the new IV expression to preserve its type");
1765
1766  // Can the IV be extended outside the loop without overflow?
1767  AddRec = dyn_cast<SCEVAddRecExpr>(WideIVExpr);
1768  if (!AddRec || AddRec->getLoop() != L)
1769    return nullptr;
1770
1771  // An AddRec must have loop-invariant operands. Since this AddRec is
1772  // materialized by a loop header phi, the expression cannot have any post-loop
1773  // operands, so they must dominate the loop header.
1774  assert(
1775      SE->properlyDominates(AddRec->getStart(), L->getHeader()) &&
1776      SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
1777      "Loop header phi recurrence inputs do not dominate the loop");
1778
1779  // Iterate over IV uses (including transitive ones) looking for IV increments
1780  // of the form 'add nsw %iv, <const>'. For each increment and each use of
1781  // the increment calculate control-dependent range information basing on
1782  // dominating conditions inside of the loop (e.g. a range check inside of the
1783  // loop). Calculated ranges are stored in PostIncRangeInfos map.
1784  //
1785  // Control-dependent range information is later used to prove that a narrow
1786  // definition is not negative (see pushNarrowIVUsers). It's difficult to do
1787  // this on demand because when pushNarrowIVUsers needs this information some
1788  // of the dominating conditions might be already widened.
1789  if (UsePostIncrementRanges)
1790    calculatePostIncRanges(OrigPhi);
1791
1792  // The rewriter provides a value for the desired IV expression. This may
1793  // either find an existing phi or materialize a new one. Either way, we
1794  // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
1795  // of the phi-SCC dominates the loop entry.
1796  Instruction *InsertPt = &L->getHeader()->front();
1797  WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
1798
1799  // Remembering the WideIV increment generated by SCEVExpander allows
1800  // widenIVUse to reuse it when widening the narrow IV's increment. We don't
1801  // employ a general reuse mechanism because the call above is the only call to
1802  // SCEVExpander. Henceforth, we produce 1-to-1 narrow to wide uses.
1803  if (BasicBlock *LatchBlock = L->getLoopLatch()) {
1804    WideInc =
1805      cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
1806    WideIncExpr = SE->getSCEV(WideInc);
1807    // Propagate the debug location associated with the original loop increment
1808    // to the new (widened) increment.
1809    auto *OrigInc =
1810      cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
1811    WideInc->setDebugLoc(OrigInc->getDebugLoc());
1812  }
1813
1814  LLVM_DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
1815  ++NumWidened;
1816
1817  // Traverse the def-use chain using a worklist starting at the original IV.
1818  assert(Widened.empty() && NarrowIVUsers.empty() && "expect initial state" );
1819
1820  Widened.insert(OrigPhi);
1821  pushNarrowIVUsers(OrigPhi, WidePhi);
1822
1823  while (!NarrowIVUsers.empty()) {
1824    NarrowIVDefUse DU = NarrowIVUsers.pop_back_val();
1825
1826    // Process a def-use edge. This may replace the use, so don't hold a
1827    // use_iterator across it.
1828    Instruction *WideUse = widenIVUse(DU, Rewriter);
1829
1830    // Follow all def-use edges from the previous narrow use.
1831    if (WideUse)
1832      pushNarrowIVUsers(DU.NarrowUse, WideUse);
1833
1834    // widenIVUse may have removed the def-use edge.
1835    if (DU.NarrowDef->use_empty())
1836      DeadInsts.emplace_back(DU.NarrowDef);
1837  }
1838
1839  // Attach any debug information to the new PHI.
1840  replaceAllDbgUsesWith(*OrigPhi, *WidePhi, *WidePhi, *DT);
1841
1842  return WidePhi;
1843}
1844
1845/// Calculates control-dependent range for the given def at the given context
1846/// by looking at dominating conditions inside of the loop
1847void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
1848                                    Instruction *NarrowUser) {
1849  using namespace llvm::PatternMatch;
1850
1851  Value *NarrowDefLHS;
1852  const APInt *NarrowDefRHS;
1853  if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
1854                                 m_APInt(NarrowDefRHS))) ||
1855      !NarrowDefRHS->isNonNegative())
1856    return;
1857
1858  auto UpdateRangeFromCondition = [&] (Value *Condition,
1859                                       bool TrueDest) {
1860    CmpInst::Predicate Pred;
1861    Value *CmpRHS;
1862    if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
1863                                 m_Value(CmpRHS))))
1864      return;
1865
1866    CmpInst::Predicate P =
1867            TrueDest ? Pred : CmpInst::getInversePredicate(Pred);
1868
1869    auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
1870    auto CmpConstrainedLHSRange =
1871            ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
1872    auto NarrowDefRange = CmpConstrainedLHSRange.addWithNoWrap(
1873        *NarrowDefRHS, OverflowingBinaryOperator::NoSignedWrap);
1874
1875    updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
1876  };
1877
1878  auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
1879    if (!HasGuards)
1880      return;
1881
1882    for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
1883                                     Ctx->getParent()->rend())) {
1884      Value *C = nullptr;
1885      if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
1886        UpdateRangeFromCondition(C, /*TrueDest=*/true);
1887    }
1888  };
1889
1890  UpdateRangeFromGuards(NarrowUser);
1891
1892  BasicBlock *NarrowUserBB = NarrowUser->getParent();
1893  // If NarrowUserBB is statically unreachable asking dominator queries may
1894  // yield surprising results. (e.g. the block may not have a dom tree node)
1895  if (!DT->isReachableFromEntry(NarrowUserBB))
1896    return;
1897
1898  for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
1899       L->contains(DTB->getBlock());
1900       DTB = DTB->getIDom()) {
1901    auto *BB = DTB->getBlock();
1902    auto *TI = BB->getTerminator();
1903    UpdateRangeFromGuards(TI);
1904
1905    auto *BI = dyn_cast<BranchInst>(TI);
1906    if (!BI || !BI->isConditional())
1907      continue;
1908
1909    auto *TrueSuccessor = BI->getSuccessor(0);
1910    auto *FalseSuccessor = BI->getSuccessor(1);
1911
1912    auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
1913      return BBE.isSingleEdge() &&
1914             DT->dominates(BBE, NarrowUser->getParent());
1915    };
1916
1917    if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
1918      UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
1919
1920    if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
1921      UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
1922  }
1923}
1924
1925/// Calculates PostIncRangeInfos map for the given IV
1926void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
1927  SmallPtrSet<Instruction *, 16> Visited;
1928  SmallVector<Instruction *, 6> Worklist;
1929  Worklist.push_back(OrigPhi);
1930  Visited.insert(OrigPhi);
1931
1932  while (!Worklist.empty()) {
1933    Instruction *NarrowDef = Worklist.pop_back_val();
1934
1935    for (Use &U : NarrowDef->uses()) {
1936      auto *NarrowUser = cast<Instruction>(U.getUser());
1937
1938      // Don't go looking outside the current loop.
1939      auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
1940      if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
1941        continue;
1942
1943      if (!Visited.insert(NarrowUser).second)
1944        continue;
1945
1946      Worklist.push_back(NarrowUser);
1947
1948      calculatePostIncRange(NarrowDef, NarrowUser);
1949    }
1950  }
1951}
1952
1953//===----------------------------------------------------------------------===//
1954//  Live IV Reduction - Minimize IVs live across the loop.
1955//===----------------------------------------------------------------------===//
1956
1957//===----------------------------------------------------------------------===//
1958//  Simplification of IV users based on SCEV evaluation.
1959//===----------------------------------------------------------------------===//
1960
1961namespace {
1962
1963class IndVarSimplifyVisitor : public IVVisitor {
1964  ScalarEvolution *SE;
1965  const TargetTransformInfo *TTI;
1966  PHINode *IVPhi;
1967
1968public:
1969  WideIVInfo WI;
1970
1971  IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
1972                        const TargetTransformInfo *TTI,
1973                        const DominatorTree *DTree)
1974    : SE(SCEV), TTI(TTI), IVPhi(IV) {
1975    DT = DTree;
1976    WI.NarrowIV = IVPhi;
1977  }
1978
1979  // Implement the interface used by simplifyUsersOfIV.
1980  void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
1981};
1982
1983} // end anonymous namespace
1984
1985/// Iteratively perform simplification on a worklist of IV users. Each
1986/// successive simplification may push more users which may themselves be
1987/// candidates for simplification.
1988///
1989/// Sign/Zero extend elimination is interleaved with IV simplification.
1990bool IndVarSimplify::simplifyAndExtend(Loop *L,
1991                                       SCEVExpander &Rewriter,
1992                                       LoopInfo *LI) {
1993  SmallVector<WideIVInfo, 8> WideIVs;
1994
1995  auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
1996          Intrinsic::getName(Intrinsic::experimental_guard));
1997  bool HasGuards = GuardDecl && !GuardDecl->use_empty();
1998
1999  SmallVector<PHINode*, 8> LoopPhis;
2000  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
2001    LoopPhis.push_back(cast<PHINode>(I));
2002  }
2003  // Each round of simplification iterates through the SimplifyIVUsers worklist
2004  // for all current phis, then determines whether any IVs can be
2005  // widened. Widening adds new phis to LoopPhis, inducing another round of
2006  // simplification on the wide IVs.
2007  bool Changed = false;
2008  while (!LoopPhis.empty()) {
2009    // Evaluate as many IV expressions as possible before widening any IVs. This
2010    // forces SCEV to set no-wrap flags before evaluating sign/zero
2011    // extension. The first time SCEV attempts to normalize sign/zero extension,
2012    // the result becomes final. So for the most predictable results, we delay
2013    // evaluation of sign/zero extend evaluation until needed, and avoid running
2014    // other SCEV based analysis prior to simplifyAndExtend.
2015    do {
2016      PHINode *CurrIV = LoopPhis.pop_back_val();
2017
2018      // Information about sign/zero extensions of CurrIV.
2019      IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
2020
2021      Changed |=
2022          simplifyUsersOfIV(CurrIV, SE, DT, LI, DeadInsts, Rewriter, &Visitor);
2023
2024      if (Visitor.WI.WidestNativeType) {
2025        WideIVs.push_back(Visitor.WI);
2026      }
2027    } while(!LoopPhis.empty());
2028
2029    for (; !WideIVs.empty(); WideIVs.pop_back()) {
2030      WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
2031      if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
2032        Changed = true;
2033        LoopPhis.push_back(WidePhi);
2034      }
2035    }
2036  }
2037  return Changed;
2038}
2039
2040//===----------------------------------------------------------------------===//
2041//  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
2042//===----------------------------------------------------------------------===//
2043
2044/// Given an Value which is hoped to be part of an add recurance in the given
2045/// loop, return the associated Phi node if so.  Otherwise, return null.  Note
2046/// that this is less general than SCEVs AddRec checking.
2047static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
2048  Instruction *IncI = dyn_cast<Instruction>(IncV);
2049  if (!IncI)
2050    return nullptr;
2051
2052  switch (IncI->getOpcode()) {
2053  case Instruction::Add:
2054  case Instruction::Sub:
2055    break;
2056  case Instruction::GetElementPtr:
2057    // An IV counter must preserve its type.
2058    if (IncI->getNumOperands() == 2)
2059      break;
2060    LLVM_FALLTHROUGH;
2061  default:
2062    return nullptr;
2063  }
2064
2065  PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
2066  if (Phi && Phi->getParent() == L->getHeader()) {
2067    if (L->isLoopInvariant(IncI->getOperand(1)))
2068      return Phi;
2069    return nullptr;
2070  }
2071  if (IncI->getOpcode() == Instruction::GetElementPtr)
2072    return nullptr;
2073
2074  // Allow add/sub to be commuted.
2075  Phi = dyn_cast<PHINode>(IncI->getOperand(1));
2076  if (Phi && Phi->getParent() == L->getHeader()) {
2077    if (L->isLoopInvariant(IncI->getOperand(0)))
2078      return Phi;
2079  }
2080  return nullptr;
2081}
2082
2083/// Whether the current loop exit test is based on this value.  Currently this
2084/// is limited to a direct use in the loop condition.
2085static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
2086  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2087  ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
2088  // TODO: Allow non-icmp loop test.
2089  if (!ICmp)
2090    return false;
2091
2092  // TODO: Allow indirect use.
2093  return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
2094}
2095
2096/// linearFunctionTestReplace policy. Return true unless we can show that the
2097/// current exit test is already sufficiently canonical.
2098static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
2099  assert(L->getLoopLatch() && "Must be in simplified form");
2100
2101  // Avoid converting a constant or loop invariant test back to a runtime
2102  // test.  This is critical for when SCEV's cached ExitCount is less precise
2103  // than the current IR (such as after we've proven a particular exit is
2104  // actually dead and thus the BE count never reaches our ExitCount.)
2105  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2106  if (L->isLoopInvariant(BI->getCondition()))
2107    return false;
2108
2109  // Do LFTR to simplify the exit condition to an ICMP.
2110  ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
2111  if (!Cond)
2112    return true;
2113
2114  // Do LFTR to simplify the exit ICMP to EQ/NE
2115  ICmpInst::Predicate Pred = Cond->getPredicate();
2116  if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
2117    return true;
2118
2119  // Look for a loop invariant RHS
2120  Value *LHS = Cond->getOperand(0);
2121  Value *RHS = Cond->getOperand(1);
2122  if (!L->isLoopInvariant(RHS)) {
2123    if (!L->isLoopInvariant(LHS))
2124      return true;
2125    std::swap(LHS, RHS);
2126  }
2127  // Look for a simple IV counter LHS
2128  PHINode *Phi = dyn_cast<PHINode>(LHS);
2129  if (!Phi)
2130    Phi = getLoopPhiForCounter(LHS, L);
2131
2132  if (!Phi)
2133    return true;
2134
2135  // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
2136  int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
2137  if (Idx < 0)
2138    return true;
2139
2140  // Do LFTR if the exit condition's IV is *not* a simple counter.
2141  Value *IncV = Phi->getIncomingValue(Idx);
2142  return Phi != getLoopPhiForCounter(IncV, L);
2143}
2144
2145/// Return true if undefined behavior would provable be executed on the path to
2146/// OnPathTo if Root produced a posion result.  Note that this doesn't say
2147/// anything about whether OnPathTo is actually executed or whether Root is
2148/// actually poison.  This can be used to assess whether a new use of Root can
2149/// be added at a location which is control equivalent with OnPathTo (such as
2150/// immediately before it) without introducing UB which didn't previously
2151/// exist.  Note that a false result conveys no information.
2152static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
2153                                          Instruction *OnPathTo,
2154                                          DominatorTree *DT) {
2155  // Basic approach is to assume Root is poison, propagate poison forward
2156  // through all users we can easily track, and then check whether any of those
2157  // users are provable UB and must execute before out exiting block might
2158  // exit.
2159
2160  // The set of all recursive users we've visited (which are assumed to all be
2161  // poison because of said visit)
2162  SmallSet<const Value *, 16> KnownPoison;
2163  SmallVector<const Instruction*, 16> Worklist;
2164  Worklist.push_back(Root);
2165  while (!Worklist.empty()) {
2166    const Instruction *I = Worklist.pop_back_val();
2167
2168    // If we know this must trigger UB on a path leading our target.
2169    if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
2170      return true;
2171
2172    // If we can't analyze propagation through this instruction, just skip it
2173    // and transitive users.  Safe as false is a conservative result.
2174    if (!propagatesFullPoison(I) && I != Root)
2175      continue;
2176
2177    if (KnownPoison.insert(I).second)
2178      for (const User *User : I->users())
2179        Worklist.push_back(cast<Instruction>(User));
2180  }
2181
2182  // Might be non-UB, or might have a path we couldn't prove must execute on
2183  // way to exiting bb.
2184  return false;
2185}
2186
2187/// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
2188/// down to checking that all operands are constant and listing instructions
2189/// that may hide undef.
2190static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
2191                               unsigned Depth) {
2192  if (isa<Constant>(V))
2193    return !isa<UndefValue>(V);
2194
2195  if (Depth >= 6)
2196    return false;
2197
2198  // Conservatively handle non-constant non-instructions. For example, Arguments
2199  // may be undef.
2200  Instruction *I = dyn_cast<Instruction>(V);
2201  if (!I)
2202    return false;
2203
2204  // Load and return values may be undef.
2205  if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
2206    return false;
2207
2208  // Optimistically handle other instructions.
2209  for (Value *Op : I->operands()) {
2210    if (!Visited.insert(Op).second)
2211      continue;
2212    if (!hasConcreteDefImpl(Op, Visited, Depth+1))
2213      return false;
2214  }
2215  return true;
2216}
2217
2218/// Return true if the given value is concrete. We must prove that undef can
2219/// never reach it.
2220///
2221/// TODO: If we decide that this is a good approach to checking for undef, we
2222/// may factor it into a common location.
2223static bool hasConcreteDef(Value *V) {
2224  SmallPtrSet<Value*, 8> Visited;
2225  Visited.insert(V);
2226  return hasConcreteDefImpl(V, Visited, 0);
2227}
2228
2229/// Return true if this IV has any uses other than the (soon to be rewritten)
2230/// loop exit test.
2231static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
2232  int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
2233  Value *IncV = Phi->getIncomingValue(LatchIdx);
2234
2235  for (User *U : Phi->users())
2236    if (U != Cond && U != IncV) return false;
2237
2238  for (User *U : IncV->users())
2239    if (U != Cond && U != Phi) return false;
2240  return true;
2241}
2242
2243/// Return true if the given phi is a "counter" in L.  A counter is an
2244/// add recurance (of integer or pointer type) with an arbitrary start, and a
2245/// step of 1.  Note that L must have exactly one latch.
2246static bool isLoopCounter(PHINode* Phi, Loop *L,
2247                          ScalarEvolution *SE) {
2248  assert(Phi->getParent() == L->getHeader());
2249  assert(L->getLoopLatch());
2250
2251  if (!SE->isSCEVable(Phi->getType()))
2252    return false;
2253
2254  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2255  if (!AR || AR->getLoop() != L || !AR->isAffine())
2256    return false;
2257
2258  const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
2259  if (!Step || !Step->isOne())
2260    return false;
2261
2262  int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
2263  Value *IncV = Phi->getIncomingValue(LatchIdx);
2264  return (getLoopPhiForCounter(IncV, L) == Phi);
2265}
2266
2267/// Search the loop header for a loop counter (anadd rec w/step of one)
2268/// suitable for use by LFTR.  If multiple counters are available, select the
2269/// "best" one based profitable heuristics.
2270///
2271/// BECount may be an i8* pointer type. The pointer difference is already
2272/// valid count without scaling the address stride, so it remains a pointer
2273/// expression as far as SCEV is concerned.
2274static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
2275                                const SCEV *BECount,
2276                                ScalarEvolution *SE, DominatorTree *DT) {
2277  uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
2278
2279  Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
2280
2281  // Loop over all of the PHI nodes, looking for a simple counter.
2282  PHINode *BestPhi = nullptr;
2283  const SCEV *BestInit = nullptr;
2284  BasicBlock *LatchBlock = L->getLoopLatch();
2285  assert(LatchBlock && "Must be in simplified form");
2286  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2287
2288  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
2289    PHINode *Phi = cast<PHINode>(I);
2290    if (!isLoopCounter(Phi, L, SE))
2291      continue;
2292
2293    // Avoid comparing an integer IV against a pointer Limit.
2294    if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
2295      continue;
2296
2297    const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
2298
2299    // AR may be a pointer type, while BECount is an integer type.
2300    // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
2301    // AR may not be a narrower type, or we may never exit.
2302    uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
2303    if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
2304      continue;
2305
2306    // Avoid reusing a potentially undef value to compute other values that may
2307    // have originally had a concrete definition.
2308    if (!hasConcreteDef(Phi)) {
2309      // We explicitly allow unknown phis as long as they are already used by
2310      // the loop exit test.  This is legal since performing LFTR could not
2311      // increase the number of undef users.
2312      Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
2313      if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
2314          !isLoopExitTestBasedOn(IncPhi, ExitingBB))
2315        continue;
2316    }
2317
2318    // Avoid introducing undefined behavior due to poison which didn't exist in
2319    // the original program.  (Annoyingly, the rules for poison and undef
2320    // propagation are distinct, so this does NOT cover the undef case above.)
2321    // We have to ensure that we don't introduce UB by introducing a use on an
2322    // iteration where said IV produces poison.  Our strategy here differs for
2323    // pointers and integer IVs.  For integers, we strip and reinfer as needed,
2324    // see code in linearFunctionTestReplace.  For pointers, we restrict
2325    // transforms as there is no good way to reinfer inbounds once lost.
2326    if (!Phi->getType()->isIntegerTy() &&
2327        !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
2328      continue;
2329
2330    const SCEV *Init = AR->getStart();
2331
2332    if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
2333      // Don't force a live loop counter if another IV can be used.
2334      if (AlmostDeadIV(Phi, LatchBlock, Cond))
2335        continue;
2336
2337      // Prefer to count-from-zero. This is a more "canonical" counter form. It
2338      // also prefers integer to pointer IVs.
2339      if (BestInit->isZero() != Init->isZero()) {
2340        if (BestInit->isZero())
2341          continue;
2342      }
2343      // If two IVs both count from zero or both count from nonzero then the
2344      // narrower is likely a dead phi that has been widened. Use the wider phi
2345      // to allow the other to be eliminated.
2346      else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
2347        continue;
2348    }
2349    BestPhi = Phi;
2350    BestInit = Init;
2351  }
2352  return BestPhi;
2353}
2354
2355/// Insert an IR expression which computes the value held by the IV IndVar
2356/// (which must be an loop counter w/unit stride) after the backedge of loop L
2357/// is taken ExitCount times.
2358static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
2359                           const SCEV *ExitCount, bool UsePostInc, Loop *L,
2360                           SCEVExpander &Rewriter, ScalarEvolution *SE) {
2361  assert(isLoopCounter(IndVar, L, SE));
2362  const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
2363  const SCEV *IVInit = AR->getStart();
2364
2365  // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
2366  // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
2367  // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
2368  // the existing GEPs whenever possible.
2369  if (IndVar->getType()->isPointerTy() &&
2370      !ExitCount->getType()->isPointerTy()) {
2371    // IVOffset will be the new GEP offset that is interpreted by GEP as a
2372    // signed value. ExitCount on the other hand represents the loop trip count,
2373    // which is an unsigned value. FindLoopCounter only allows induction
2374    // variables that have a positive unit stride of one. This means we don't
2375    // have to handle the case of negative offsets (yet) and just need to zero
2376    // extend ExitCount.
2377    Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
2378    const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
2379    if (UsePostInc)
2380      IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
2381
2382    // Expand the code for the iteration count.
2383    assert(SE->isLoopInvariant(IVOffset, L) &&
2384           "Computed iteration count is not loop invariant!");
2385
2386    // We could handle pointer IVs other than i8*, but we need to compensate for
2387    // gep index scaling.
2388    assert(SE->getSizeOfExpr(IntegerType::getInt64Ty(IndVar->getContext()),
2389                             cast<PointerType>(IndVar->getType())
2390                                 ->getElementType())->isOne() &&
2391           "unit stride pointer IV must be i8*");
2392
2393    const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
2394    BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2395    return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
2396  } else {
2397    // In any other case, convert both IVInit and ExitCount to integers before
2398    // comparing. This may result in SCEV expansion of pointers, but in practice
2399    // SCEV will fold the pointer arithmetic away as such:
2400    // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
2401    //
2402    // Valid Cases: (1) both integers is most common; (2) both may be pointers
2403    // for simple memset-style loops.
2404    //
2405    // IVInit integer and ExitCount pointer would only occur if a canonical IV
2406    // were generated on top of case #2, which is not expected.
2407
2408    assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
2409    // For unit stride, IVCount = Start + ExitCount with 2's complement
2410    // overflow.
2411
2412    // For integer IVs, truncate the IV before computing IVInit + BECount,
2413    // unless we know apriori that the limit must be a constant when evaluated
2414    // in the bitwidth of the IV.  We prefer (potentially) keeping a truncate
2415    // of the IV in the loop over a (potentially) expensive expansion of the
2416    // widened exit count add(zext(add)) expression.
2417    if (SE->getTypeSizeInBits(IVInit->getType())
2418        > SE->getTypeSizeInBits(ExitCount->getType())) {
2419      if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
2420        ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
2421      else
2422        IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
2423    }
2424
2425    const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
2426
2427    if (UsePostInc)
2428      IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
2429
2430    // Expand the code for the iteration count.
2431    assert(SE->isLoopInvariant(IVLimit, L) &&
2432           "Computed iteration count is not loop invariant!");
2433    // Ensure that we generate the same type as IndVar, or a smaller integer
2434    // type. In the presence of null pointer values, we have an integer type
2435    // SCEV expression (IVInit) for a pointer type IV value (IndVar).
2436    Type *LimitTy = ExitCount->getType()->isPointerTy() ?
2437      IndVar->getType() : ExitCount->getType();
2438    BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2439    return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
2440  }
2441}
2442
2443/// This method rewrites the exit condition of the loop to be a canonical !=
2444/// comparison against the incremented loop induction variable.  This pass is
2445/// able to rewrite the exit tests of any loop where the SCEV analysis can
2446/// determine a loop-invariant trip count of the loop, which is actually a much
2447/// broader range than just linear tests.
2448bool IndVarSimplify::
2449linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
2450                          const SCEV *ExitCount,
2451                          PHINode *IndVar, SCEVExpander &Rewriter) {
2452  assert(L->getLoopLatch() && "Loop no longer in simplified form?");
2453  assert(isLoopCounter(IndVar, L, SE));
2454  Instruction * const IncVar =
2455    cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
2456
2457  // Initialize CmpIndVar to the preincremented IV.
2458  Value *CmpIndVar = IndVar;
2459  bool UsePostInc = false;
2460
2461  // If the exiting block is the same as the backedge block, we prefer to
2462  // compare against the post-incremented value, otherwise we must compare
2463  // against the preincremented value.
2464  if (ExitingBB == L->getLoopLatch()) {
2465    // For pointer IVs, we chose to not strip inbounds which requires us not
2466    // to add a potentially UB introducing use.  We need to either a) show
2467    // the loop test we're modifying is already in post-inc form, or b) show
2468    // that adding a use must not introduce UB.
2469    bool SafeToPostInc =
2470        IndVar->getType()->isIntegerTy() ||
2471        isLoopExitTestBasedOn(IncVar, ExitingBB) ||
2472        mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
2473    if (SafeToPostInc) {
2474      UsePostInc = true;
2475      CmpIndVar = IncVar;
2476    }
2477  }
2478
2479  // It may be necessary to drop nowrap flags on the incrementing instruction
2480  // if either LFTR moves from a pre-inc check to a post-inc check (in which
2481  // case the increment might have previously been poison on the last iteration
2482  // only) or if LFTR switches to a different IV that was previously dynamically
2483  // dead (and as such may be arbitrarily poison). We remove any nowrap flags
2484  // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
2485  // check), because the pre-inc addrec flags may be adopted from the original
2486  // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
2487  // TODO: This handling is inaccurate for one case: If we switch to a
2488  // dynamically dead IV that wraps on the first loop iteration only, which is
2489  // not covered by the post-inc addrec. (If the new IV was not dynamically
2490  // dead, it could not be poison on the first iteration in the first place.)
2491  if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
2492    const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
2493    if (BO->hasNoUnsignedWrap())
2494      BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
2495    if (BO->hasNoSignedWrap())
2496      BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
2497  }
2498
2499  Value *ExitCnt = genLoopLimit(
2500      IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
2501  assert(ExitCnt->getType()->isPointerTy() ==
2502             IndVar->getType()->isPointerTy() &&
2503         "genLoopLimit missed a cast");
2504
2505  // Insert a new icmp_ne or icmp_eq instruction before the branch.
2506  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2507  ICmpInst::Predicate P;
2508  if (L->contains(BI->getSuccessor(0)))
2509    P = ICmpInst::ICMP_NE;
2510  else
2511    P = ICmpInst::ICMP_EQ;
2512
2513  IRBuilder<> Builder(BI);
2514
2515  // The new loop exit condition should reuse the debug location of the
2516  // original loop exit condition.
2517  if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
2518    Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
2519
2520  // For integer IVs, if we evaluated the limit in the narrower bitwidth to
2521  // avoid the expensive expansion of the limit expression in the wider type,
2522  // emit a truncate to narrow the IV to the ExitCount type.  This is safe
2523  // since we know (from the exit count bitwidth), that we can't self-wrap in
2524  // the narrower type.
2525  unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
2526  unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
2527  if (CmpIndVarSize > ExitCntSize) {
2528    assert(!CmpIndVar->getType()->isPointerTy() &&
2529           !ExitCnt->getType()->isPointerTy());
2530
2531    // Before resorting to actually inserting the truncate, use the same
2532    // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
2533    // the other side of the comparison instead.  We still evaluate the limit
2534    // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
2535    // a truncate within in.
2536    bool Extended = false;
2537    const SCEV *IV = SE->getSCEV(CmpIndVar);
2538    const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
2539                                                  ExitCnt->getType());
2540    const SCEV *ZExtTrunc =
2541      SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
2542
2543    if (ZExtTrunc == IV) {
2544      Extended = true;
2545      ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
2546                                   "wide.trip.count");
2547    } else {
2548      const SCEV *SExtTrunc =
2549        SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
2550      if (SExtTrunc == IV) {
2551        Extended = true;
2552        ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
2553                                     "wide.trip.count");
2554      }
2555    }
2556
2557    if (Extended) {
2558      bool Discard;
2559      L->makeLoopInvariant(ExitCnt, Discard);
2560    } else
2561      CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
2562                                      "lftr.wideiv");
2563  }
2564  LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
2565                    << "      LHS:" << *CmpIndVar << '\n'
2566                    << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
2567                    << "\n"
2568                    << "      RHS:\t" << *ExitCnt << "\n"
2569                    << "ExitCount:\t" << *ExitCount << "\n"
2570                    << "  was: " << *BI->getCondition() << "\n");
2571
2572  Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
2573  Value *OrigCond = BI->getCondition();
2574  // It's tempting to use replaceAllUsesWith here to fully replace the old
2575  // comparison, but that's not immediately safe, since users of the old
2576  // comparison may not be dominated by the new comparison. Instead, just
2577  // update the branch to use the new comparison; in the common case this
2578  // will make old comparison dead.
2579  BI->setCondition(Cond);
2580  DeadInsts.push_back(OrigCond);
2581
2582  ++NumLFTR;
2583  return true;
2584}
2585
2586//===----------------------------------------------------------------------===//
2587//  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
2588//===----------------------------------------------------------------------===//
2589
2590/// If there's a single exit block, sink any loop-invariant values that
2591/// were defined in the preheader but not used inside the loop into the
2592/// exit block to reduce register pressure in the loop.
2593bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
2594  BasicBlock *ExitBlock = L->getExitBlock();
2595  if (!ExitBlock) return false;
2596
2597  BasicBlock *Preheader = L->getLoopPreheader();
2598  if (!Preheader) return false;
2599
2600  bool MadeAnyChanges = false;
2601  BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
2602  BasicBlock::iterator I(Preheader->getTerminator());
2603  while (I != Preheader->begin()) {
2604    --I;
2605    // New instructions were inserted at the end of the preheader.
2606    if (isa<PHINode>(I))
2607      break;
2608
2609    // Don't move instructions which might have side effects, since the side
2610    // effects need to complete before instructions inside the loop.  Also don't
2611    // move instructions which might read memory, since the loop may modify
2612    // memory. Note that it's okay if the instruction might have undefined
2613    // behavior: LoopSimplify guarantees that the preheader dominates the exit
2614    // block.
2615    if (I->mayHaveSideEffects() || I->mayReadFromMemory())
2616      continue;
2617
2618    // Skip debug info intrinsics.
2619    if (isa<DbgInfoIntrinsic>(I))
2620      continue;
2621
2622    // Skip eh pad instructions.
2623    if (I->isEHPad())
2624      continue;
2625
2626    // Don't sink alloca: we never want to sink static alloca's out of the
2627    // entry block, and correctly sinking dynamic alloca's requires
2628    // checks for stacksave/stackrestore intrinsics.
2629    // FIXME: Refactor this check somehow?
2630    if (isa<AllocaInst>(I))
2631      continue;
2632
2633    // Determine if there is a use in or before the loop (direct or
2634    // otherwise).
2635    bool UsedInLoop = false;
2636    for (Use &U : I->uses()) {
2637      Instruction *User = cast<Instruction>(U.getUser());
2638      BasicBlock *UseBB = User->getParent();
2639      if (PHINode *P = dyn_cast<PHINode>(User)) {
2640        unsigned i =
2641          PHINode::getIncomingValueNumForOperand(U.getOperandNo());
2642        UseBB = P->getIncomingBlock(i);
2643      }
2644      if (UseBB == Preheader || L->contains(UseBB)) {
2645        UsedInLoop = true;
2646        break;
2647      }
2648    }
2649
2650    // If there is, the def must remain in the preheader.
2651    if (UsedInLoop)
2652      continue;
2653
2654    // Otherwise, sink it to the exit block.
2655    Instruction *ToMove = &*I;
2656    bool Done = false;
2657
2658    if (I != Preheader->begin()) {
2659      // Skip debug info intrinsics.
2660      do {
2661        --I;
2662      } while (isa<DbgInfoIntrinsic>(I) && I != Preheader->begin());
2663
2664      if (isa<DbgInfoIntrinsic>(I) && I == Preheader->begin())
2665        Done = true;
2666    } else {
2667      Done = true;
2668    }
2669
2670    MadeAnyChanges = true;
2671    ToMove->moveBefore(*ExitBlock, InsertPt);
2672    if (Done) break;
2673    InsertPt = ToMove->getIterator();
2674  }
2675
2676  return MadeAnyChanges;
2677}
2678
2679/// Return a symbolic upper bound for the backedge taken count of the loop.
2680/// This is more general than getConstantMaxBackedgeTakenCount as it returns
2681/// an arbitrary expression as opposed to only constants.
2682/// TODO: Move into the ScalarEvolution class.
2683static const SCEV* getMaxBackedgeTakenCount(ScalarEvolution &SE,
2684                                            DominatorTree &DT, Loop *L) {
2685  SmallVector<BasicBlock*, 16> ExitingBlocks;
2686  L->getExitingBlocks(ExitingBlocks);
2687
2688  // Form an expression for the maximum exit count possible for this loop. We
2689  // merge the max and exact information to approximate a version of
2690  // getConstantMaxBackedgeTakenCount which isn't restricted to just constants.
2691  SmallVector<const SCEV*, 4> ExitCounts;
2692  for (BasicBlock *ExitingBB : ExitingBlocks) {
2693    const SCEV *ExitCount = SE.getExitCount(L, ExitingBB);
2694    if (isa<SCEVCouldNotCompute>(ExitCount))
2695      ExitCount = SE.getExitCount(L, ExitingBB,
2696                                  ScalarEvolution::ConstantMaximum);
2697    if (!isa<SCEVCouldNotCompute>(ExitCount)) {
2698      assert(DT.dominates(ExitingBB, L->getLoopLatch()) &&
2699             "We should only have known counts for exiting blocks that "
2700             "dominate latch!");
2701      ExitCounts.push_back(ExitCount);
2702    }
2703  }
2704  if (ExitCounts.empty())
2705    return SE.getCouldNotCompute();
2706  return SE.getUMinFromMismatchedTypes(ExitCounts);
2707}
2708
2709bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
2710  SmallVector<BasicBlock*, 16> ExitingBlocks;
2711  L->getExitingBlocks(ExitingBlocks);
2712
2713  // Remove all exits which aren't both rewriteable and analyzeable.
2714  auto NewEnd = llvm::remove_if(ExitingBlocks,
2715                                [&](BasicBlock *ExitingBB) {
2716    // If our exitting block exits multiple loops, we can only rewrite the
2717    // innermost one.  Otherwise, we're changing how many times the innermost
2718    // loop runs before it exits.
2719    if (LI->getLoopFor(ExitingBB) != L)
2720      return true;
2721
2722    // Can't rewrite non-branch yet.
2723    BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2724    if (!BI)
2725      return true;
2726
2727    // If already constant, nothing to do.
2728    if (isa<Constant>(BI->getCondition()))
2729      return true;
2730
2731    const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2732    if (isa<SCEVCouldNotCompute>(ExitCount))
2733      return true;
2734    return false;
2735   });
2736  ExitingBlocks.erase(NewEnd, ExitingBlocks.end());
2737
2738  if (ExitingBlocks.empty())
2739    return false;
2740
2741  // Get a symbolic upper bound on the loop backedge taken count.
2742  const SCEV *MaxExitCount = getMaxBackedgeTakenCount(*SE, *DT, L);
2743  if (isa<SCEVCouldNotCompute>(MaxExitCount))
2744    return false;
2745
2746  // Visit our exit blocks in order of dominance.  We know from the fact that
2747  // all exits (left) are analyzeable that the must be a total dominance order
2748  // between them as each must dominate the latch.  The visit order only
2749  // matters for the provably equal case.
2750  llvm::sort(ExitingBlocks,
2751             [&](BasicBlock *A, BasicBlock *B) {
2752               // std::sort sorts in ascending order, so we want the inverse of
2753               // the normal dominance relation.
2754               if (DT->properlyDominates(A, B)) return true;
2755               if (DT->properlyDominates(B, A)) return false;
2756               llvm_unreachable("expected total dominance order!");
2757             });
2758#ifdef ASSERT
2759  for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
2760    assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
2761  }
2762#endif
2763
2764  auto FoldExit = [&](BasicBlock *ExitingBB, bool IsTaken) {
2765    BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
2766    bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
2767    auto *OldCond = BI->getCondition();
2768    auto *NewCond = ConstantInt::get(OldCond->getType(),
2769                                     IsTaken ? ExitIfTrue : !ExitIfTrue);
2770    BI->setCondition(NewCond);
2771    if (OldCond->use_empty())
2772      DeadInsts.push_back(OldCond);
2773  };
2774
2775  bool Changed = false;
2776  SmallSet<const SCEV*, 8> DominatingExitCounts;
2777  for (BasicBlock *ExitingBB : ExitingBlocks) {
2778    const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2779    assert(!isa<SCEVCouldNotCompute>(ExitCount) && "checked above");
2780
2781    // If we know we'd exit on the first iteration, rewrite the exit to
2782    // reflect this.  This does not imply the loop must exit through this
2783    // exit; there may be an earlier one taken on the first iteration.
2784    // TODO: Given we know the backedge can't be taken, we should go ahead
2785    // and break it.  Or at least, kill all the header phis and simplify.
2786    if (ExitCount->isZero()) {
2787      FoldExit(ExitingBB, true);
2788      Changed = true;
2789      continue;
2790    }
2791
2792    // If we end up with a pointer exit count, bail.  Note that we can end up
2793    // with a pointer exit count for one exiting block, and not for another in
2794    // the same loop.
2795    if (!ExitCount->getType()->isIntegerTy() ||
2796        !MaxExitCount->getType()->isIntegerTy())
2797      continue;
2798
2799    Type *WiderType =
2800      SE->getWiderType(MaxExitCount->getType(), ExitCount->getType());
2801    ExitCount = SE->getNoopOrZeroExtend(ExitCount, WiderType);
2802    MaxExitCount = SE->getNoopOrZeroExtend(MaxExitCount, WiderType);
2803    assert(MaxExitCount->getType() == ExitCount->getType());
2804
2805    // Can we prove that some other exit must be taken strictly before this
2806    // one?
2807    if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT,
2808                                     MaxExitCount, ExitCount)) {
2809      FoldExit(ExitingBB, false);
2810      Changed = true;
2811      continue;
2812    }
2813
2814    // As we run, keep track of which exit counts we've encountered.  If we
2815    // find a duplicate, we've found an exit which would have exited on the
2816    // exiting iteration, but (from the visit order) strictly follows another
2817    // which does the same and is thus dead.
2818    if (!DominatingExitCounts.insert(ExitCount).second) {
2819      FoldExit(ExitingBB, false);
2820      Changed = true;
2821      continue;
2822    }
2823
2824    // TODO: There might be another oppurtunity to leverage SCEV's reasoning
2825    // here.  If we kept track of the min of dominanting exits so far, we could
2826    // discharge exits with EC >= MDEC. This is less powerful than the existing
2827    // transform (since later exits aren't considered), but potentially more
2828    // powerful for any case where SCEV can prove a >=u b, but neither a == b
2829    // or a >u b.  Such a case is not currently known.
2830  }
2831  return Changed;
2832}
2833
2834bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
2835  SmallVector<BasicBlock*, 16> ExitingBlocks;
2836  L->getExitingBlocks(ExitingBlocks);
2837
2838  bool Changed = false;
2839
2840  // Finally, see if we can rewrite our exit conditions into a loop invariant
2841  // form.  If we have a read-only loop, and we can tell that we must exit down
2842  // a path which does not need any of the values computed within the loop, we
2843  // can rewrite the loop to exit on the first iteration.  Note that this
2844  // doesn't either a) tell us the loop exits on the first iteration (unless
2845  // *all* exits are predicateable) or b) tell us *which* exit might be taken.
2846  // This transformation looks a lot like a restricted form of dead loop
2847  // elimination, but restricted to read-only loops and without neccesssarily
2848  // needing to kill the loop entirely.
2849  if (!LoopPredication)
2850    return Changed;
2851
2852  if (!SE->hasLoopInvariantBackedgeTakenCount(L))
2853    return Changed;
2854
2855  // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
2856  // through *explicit* control flow.  We have to eliminate the possibility of
2857  // implicit exits (see below) before we know it's truly exact.
2858  const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
2859  if (isa<SCEVCouldNotCompute>(ExactBTC) ||
2860      !SE->isLoopInvariant(ExactBTC, L) ||
2861      !isSafeToExpand(ExactBTC, *SE))
2862    return Changed;
2863
2864  // If we end up with a pointer exit count, bail.  It may be unsized.
2865  if (!ExactBTC->getType()->isIntegerTy())
2866    return Changed;
2867
2868  auto BadExit = [&](BasicBlock *ExitingBB) {
2869    // If our exiting block exits multiple loops, we can only rewrite the
2870    // innermost one.  Otherwise, we're changing how many times the innermost
2871    // loop runs before it exits.
2872    if (LI->getLoopFor(ExitingBB) != L)
2873      return true;
2874
2875    // Can't rewrite non-branch yet.
2876    BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
2877    if (!BI)
2878      return true;
2879
2880    // If already constant, nothing to do.
2881    if (isa<Constant>(BI->getCondition()))
2882      return true;
2883
2884    // If the exit block has phis, we need to be able to compute the values
2885    // within the loop which contains them.  This assumes trivially lcssa phis
2886    // have already been removed; TODO: generalize
2887    BasicBlock *ExitBlock =
2888    BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
2889    if (!ExitBlock->phis().empty())
2890      return true;
2891
2892    const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2893    assert(!isa<SCEVCouldNotCompute>(ExactBTC) && "implied by having exact trip count");
2894    if (!SE->isLoopInvariant(ExitCount, L) ||
2895        !isSafeToExpand(ExitCount, *SE))
2896      return true;
2897
2898    // If we end up with a pointer exit count, bail.  It may be unsized.
2899    if (!ExitCount->getType()->isIntegerTy())
2900      return true;
2901
2902    return false;
2903  };
2904
2905  // If we have any exits which can't be predicated themselves, than we can't
2906  // predicate any exit which isn't guaranteed to execute before it.  Consider
2907  // two exits (a) and (b) which would both exit on the same iteration.  If we
2908  // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
2909  // we could convert a loop from exiting through (a) to one exiting through
2910  // (b).  Note that this problem exists only for exits with the same exit
2911  // count, and we could be more aggressive when exit counts are known inequal.
2912  llvm::sort(ExitingBlocks,
2913            [&](BasicBlock *A, BasicBlock *B) {
2914              // std::sort sorts in ascending order, so we want the inverse of
2915              // the normal dominance relation, plus a tie breaker for blocks
2916              // unordered by dominance.
2917              if (DT->properlyDominates(A, B)) return true;
2918              if (DT->properlyDominates(B, A)) return false;
2919              return A->getName() < B->getName();
2920            });
2921  // Check to see if our exit blocks are a total order (i.e. a linear chain of
2922  // exits before the backedge).  If they aren't, reasoning about reachability
2923  // is complicated and we choose not to for now.
2924  for (unsigned i = 1; i < ExitingBlocks.size(); i++)
2925    if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
2926      return Changed;
2927
2928  // Given our sorted total order, we know that exit[j] must be evaluated
2929  // after all exit[i] such j > i.
2930  for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
2931    if (BadExit(ExitingBlocks[i])) {
2932      ExitingBlocks.resize(i);
2933      break;
2934    }
2935
2936  if (ExitingBlocks.empty())
2937    return Changed;
2938
2939  // We rely on not being able to reach an exiting block on a later iteration
2940  // then it's statically compute exit count.  The implementaton of
2941  // getExitCount currently has this invariant, but assert it here so that
2942  // breakage is obvious if this ever changes..
2943  assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
2944        return DT->dominates(ExitingBB, L->getLoopLatch());
2945      }));
2946
2947  // At this point, ExitingBlocks consists of only those blocks which are
2948  // predicatable.  Given that, we know we have at least one exit we can
2949  // predicate if the loop is doesn't have side effects and doesn't have any
2950  // implicit exits (because then our exact BTC isn't actually exact).
2951  // @Reviewers - As structured, this is O(I^2) for loop nests.  Any
2952  // suggestions on how to improve this?  I can obviously bail out for outer
2953  // loops, but that seems less than ideal.  MemorySSA can find memory writes,
2954  // is that enough for *all* side effects?
2955  for (BasicBlock *BB : L->blocks())
2956    for (auto &I : *BB)
2957      // TODO:isGuaranteedToTransfer
2958      if (I.mayHaveSideEffects() || I.mayThrow())
2959        return Changed;
2960
2961  // Finally, do the actual predication for all predicatable blocks.  A couple
2962  // of notes here:
2963  // 1) We don't bother to constant fold dominated exits with identical exit
2964  //    counts; that's simply a form of CSE/equality propagation and we leave
2965  //    it for dedicated passes.
2966  // 2) We insert the comparison at the branch.  Hoisting introduces additional
2967  //    legality constraints and we leave that to dedicated logic.  We want to
2968  //    predicate even if we can't insert a loop invariant expression as
2969  //    peeling or unrolling will likely reduce the cost of the otherwise loop
2970  //    varying check.
2971  Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
2972  IRBuilder<> B(L->getLoopPreheader()->getTerminator());
2973  Value *ExactBTCV = nullptr; // Lazily generated if needed.
2974  for (BasicBlock *ExitingBB : ExitingBlocks) {
2975    const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2976
2977    auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
2978    Value *NewCond;
2979    if (ExitCount == ExactBTC) {
2980      NewCond = L->contains(BI->getSuccessor(0)) ?
2981        B.getFalse() : B.getTrue();
2982    } else {
2983      Value *ECV = Rewriter.expandCodeFor(ExitCount);
2984      if (!ExactBTCV)
2985        ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
2986      Value *RHS = ExactBTCV;
2987      if (ECV->getType() != RHS->getType()) {
2988        Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
2989        ECV = B.CreateZExt(ECV, WiderTy);
2990        RHS = B.CreateZExt(RHS, WiderTy);
2991      }
2992      auto Pred = L->contains(BI->getSuccessor(0)) ?
2993        ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
2994      NewCond = B.CreateICmp(Pred, ECV, RHS);
2995    }
2996    Value *OldCond = BI->getCondition();
2997    BI->setCondition(NewCond);
2998    if (OldCond->use_empty())
2999      DeadInsts.push_back(OldCond);
3000    Changed = true;
3001  }
3002
3003  return Changed;
3004}
3005
3006//===----------------------------------------------------------------------===//
3007//  IndVarSimplify driver. Manage several subpasses of IV simplification.
3008//===----------------------------------------------------------------------===//
3009
3010bool IndVarSimplify::run(Loop *L) {
3011  // We need (and expect!) the incoming loop to be in LCSSA.
3012  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
3013         "LCSSA required to run indvars!");
3014  bool Changed = false;
3015
3016  // If LoopSimplify form is not available, stay out of trouble. Some notes:
3017  //  - LSR currently only supports LoopSimplify-form loops. Indvars'
3018  //    canonicalization can be a pessimization without LSR to "clean up"
3019  //    afterwards.
3020  //  - We depend on having a preheader; in particular,
3021  //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
3022  //    and we're in trouble if we can't find the induction variable even when
3023  //    we've manually inserted one.
3024  //  - LFTR relies on having a single backedge.
3025  if (!L->isLoopSimplifyForm())
3026    return false;
3027
3028#ifndef NDEBUG
3029  // Used below for a consistency check only
3030  const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
3031#endif
3032
3033  // If there are any floating-point recurrences, attempt to
3034  // transform them to use integer recurrences.
3035  Changed |= rewriteNonIntegerIVs(L);
3036
3037  // Create a rewriter object which we'll use to transform the code with.
3038  SCEVExpander Rewriter(*SE, DL, "indvars");
3039#ifndef NDEBUG
3040  Rewriter.setDebugType(DEBUG_TYPE);
3041#endif
3042
3043  // Eliminate redundant IV users.
3044  //
3045  // Simplification works best when run before other consumers of SCEV. We
3046  // attempt to avoid evaluating SCEVs for sign/zero extend operations until
3047  // other expressions involving loop IVs have been evaluated. This helps SCEV
3048  // set no-wrap flags before normalizing sign/zero extension.
3049  Rewriter.disableCanonicalMode();
3050  Changed |= simplifyAndExtend(L, Rewriter, LI);
3051
3052  // Check to see if we can compute the final value of any expressions
3053  // that are recurrent in the loop, and substitute the exit values from the
3054  // loop into any instructions outside of the loop that use the final values
3055  // of the current expressions.
3056  if (ReplaceExitValue != NeverRepl)
3057    Changed |= rewriteLoopExitValues(L, Rewriter);
3058
3059  // Eliminate redundant IV cycles.
3060  NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts);
3061
3062  // Try to eliminate loop exits based on analyzeable exit counts
3063  if (optimizeLoopExits(L, Rewriter))  {
3064    Changed = true;
3065    // Given we've changed exit counts, notify SCEV
3066    SE->forgetLoop(L);
3067  }
3068
3069  // Try to form loop invariant tests for loop exits by changing how many
3070  // iterations of the loop run when that is unobservable.
3071  if (predicateLoopExits(L, Rewriter)) {
3072    Changed = true;
3073    // Given we've changed exit counts, notify SCEV
3074    SE->forgetLoop(L);
3075  }
3076
3077  // If we have a trip count expression, rewrite the loop's exit condition
3078  // using it.
3079  if (!DisableLFTR) {
3080    SmallVector<BasicBlock*, 16> ExitingBlocks;
3081    L->getExitingBlocks(ExitingBlocks);
3082    for (BasicBlock *ExitingBB : ExitingBlocks) {
3083      // Can't rewrite non-branch yet.
3084      if (!isa<BranchInst>(ExitingBB->getTerminator()))
3085        continue;
3086
3087      // If our exitting block exits multiple loops, we can only rewrite the
3088      // innermost one.  Otherwise, we're changing how many times the innermost
3089      // loop runs before it exits.
3090      if (LI->getLoopFor(ExitingBB) != L)
3091        continue;
3092
3093      if (!needsLFTR(L, ExitingBB))
3094        continue;
3095
3096      const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
3097      if (isa<SCEVCouldNotCompute>(ExitCount))
3098        continue;
3099
3100      // This was handled above, but as we form SCEVs, we can sometimes refine
3101      // existing ones; this allows exit counts to be folded to zero which
3102      // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
3103      // until stable to handle cases like this better.
3104      if (ExitCount->isZero())
3105        continue;
3106
3107      PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
3108      if (!IndVar)
3109        continue;
3110
3111      // Avoid high cost expansions.  Note: This heuristic is questionable in
3112      // that our definition of "high cost" is not exactly principled.
3113      if (Rewriter.isHighCostExpansion(ExitCount, L))
3114        continue;
3115
3116      // Check preconditions for proper SCEVExpander operation. SCEV does not
3117      // express SCEVExpander's dependencies, such as LoopSimplify. Instead
3118      // any pass that uses the SCEVExpander must do it. This does not work
3119      // well for loop passes because SCEVExpander makes assumptions about
3120      // all loops, while LoopPassManager only forces the current loop to be
3121      // simplified.
3122      //
3123      // FIXME: SCEV expansion has no way to bail out, so the caller must
3124      // explicitly check any assumptions made by SCEV. Brittle.
3125      const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
3126      if (!AR || AR->getLoop()->getLoopPreheader())
3127        Changed |= linearFunctionTestReplace(L, ExitingBB,
3128                                             ExitCount, IndVar,
3129                                             Rewriter);
3130    }
3131  }
3132  // Clear the rewriter cache, because values that are in the rewriter's cache
3133  // can be deleted in the loop below, causing the AssertingVH in the cache to
3134  // trigger.
3135  Rewriter.clear();
3136
3137  // Now that we're done iterating through lists, clean up any instructions
3138  // which are now dead.
3139  while (!DeadInsts.empty())
3140    if (Instruction *Inst =
3141            dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val()))
3142      Changed |= RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI);
3143
3144  // The Rewriter may not be used from this point on.
3145
3146  // Loop-invariant instructions in the preheader that aren't used in the
3147  // loop may be sunk below the loop to reduce register pressure.
3148  Changed |= sinkUnusedInvariants(L);
3149
3150  // rewriteFirstIterationLoopExitValues does not rely on the computation of
3151  // trip count and therefore can further simplify exit values in addition to
3152  // rewriteLoopExitValues.
3153  Changed |= rewriteFirstIterationLoopExitValues(L);
3154
3155  // Clean up dead instructions.
3156  Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
3157
3158  // Check a post-condition.
3159  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
3160         "Indvars did not preserve LCSSA!");
3161
3162  // Verify that LFTR, and any other change have not interfered with SCEV's
3163  // ability to compute trip count.  We may have *changed* the exit count, but
3164  // only by reducing it.
3165#ifndef NDEBUG
3166  if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
3167    SE->forgetLoop(L);
3168    const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
3169    if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
3170        SE->getTypeSizeInBits(NewBECount->getType()))
3171      NewBECount = SE->getTruncateOrNoop(NewBECount,
3172                                         BackedgeTakenCount->getType());
3173    else
3174      BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
3175                                                 NewBECount->getType());
3176    assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount,
3177                                 NewBECount) && "indvars must preserve SCEV");
3178  }
3179#endif
3180
3181  return Changed;
3182}
3183
3184PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
3185                                          LoopStandardAnalysisResults &AR,
3186                                          LPMUpdater &) {
3187  Function *F = L.getHeader()->getParent();
3188  const DataLayout &DL = F->getParent()->getDataLayout();
3189
3190  IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI);
3191  if (!IVS.run(&L))
3192    return PreservedAnalyses::all();
3193
3194  auto PA = getLoopPassPreservedAnalyses();
3195  PA.preserveSet<CFGAnalyses>();
3196  return PA;
3197}
3198
3199namespace {
3200
3201struct IndVarSimplifyLegacyPass : public LoopPass {
3202  static char ID; // Pass identification, replacement for typeid
3203
3204  IndVarSimplifyLegacyPass() : LoopPass(ID) {
3205    initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
3206  }
3207
3208  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
3209    if (skipLoop(L))
3210      return false;
3211
3212    auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
3213    auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
3214    auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
3215    auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
3216    auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
3217    auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
3218    auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
3219    const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
3220
3221    IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI);
3222    return IVS.run(L);
3223  }
3224
3225  void getAnalysisUsage(AnalysisUsage &AU) const override {
3226    AU.setPreservesCFG();
3227    getLoopAnalysisUsage(AU);
3228  }
3229};
3230
3231} // end anonymous namespace
3232
3233char IndVarSimplifyLegacyPass::ID = 0;
3234
3235INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
3236                      "Induction Variable Simplification", false, false)
3237INITIALIZE_PASS_DEPENDENCY(LoopPass)
3238INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
3239                    "Induction Variable Simplification", false, false)
3240
3241Pass *llvm::createIndVarSimplifyPass() {
3242  return new IndVarSimplifyLegacyPass();
3243}
3244