IndVarSimplify.cpp revision 1.1.1.5
1//===- IndVarSimplify.cpp - Induction Variable Elimination ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This transformation analyzes and transforms the induction variables (and
10// computations derived from them) into simpler forms suitable for subsequent
11// analysis and transformation.
12//
13// If the trip count of a loop is computable, this pass also makes the following
14// changes:
15//   1. The exit condition for the loop is canonicalized to compare the
16//      induction value against the exit value.  This turns loops like:
17//        'for (i = 7; i*i < 1000; ++i)' into 'for (i = 0; i != 25; ++i)'
18//   2. Any use outside of the loop of an expression derived from the indvar
19//      is changed to compute the derived value outside of the loop, eliminating
20//      the dependence on the exit value of the induction variable.  If the only
21//      purpose of the loop is to compute the exit value of some derived
22//      expression, this transformation will make the loop dead.
23//
24//===----------------------------------------------------------------------===//
25
26#include "llvm/Transforms/Scalar/IndVarSimplify.h"
27#include "llvm/ADT/APFloat.h"
28#include "llvm/ADT/ArrayRef.h"
29#include "llvm/ADT/STLExtras.h"
30#include "llvm/ADT/SmallPtrSet.h"
31#include "llvm/ADT/SmallSet.h"
32#include "llvm/ADT/SmallVector.h"
33#include "llvm/ADT/Statistic.h"
34#include "llvm/ADT/iterator_range.h"
35#include "llvm/Analysis/LoopInfo.h"
36#include "llvm/Analysis/LoopPass.h"
37#include "llvm/Analysis/MemorySSA.h"
38#include "llvm/Analysis/MemorySSAUpdater.h"
39#include "llvm/Analysis/ScalarEvolution.h"
40#include "llvm/Analysis/ScalarEvolutionExpressions.h"
41#include "llvm/Analysis/TargetLibraryInfo.h"
42#include "llvm/Analysis/TargetTransformInfo.h"
43#include "llvm/Analysis/ValueTracking.h"
44#include "llvm/IR/BasicBlock.h"
45#include "llvm/IR/Constant.h"
46#include "llvm/IR/ConstantRange.h"
47#include "llvm/IR/Constants.h"
48#include "llvm/IR/DataLayout.h"
49#include "llvm/IR/DerivedTypes.h"
50#include "llvm/IR/Dominators.h"
51#include "llvm/IR/Function.h"
52#include "llvm/IR/IRBuilder.h"
53#include "llvm/IR/InstrTypes.h"
54#include "llvm/IR/Instruction.h"
55#include "llvm/IR/Instructions.h"
56#include "llvm/IR/IntrinsicInst.h"
57#include "llvm/IR/Intrinsics.h"
58#include "llvm/IR/Module.h"
59#include "llvm/IR/Operator.h"
60#include "llvm/IR/PassManager.h"
61#include "llvm/IR/PatternMatch.h"
62#include "llvm/IR/Type.h"
63#include "llvm/IR/Use.h"
64#include "llvm/IR/User.h"
65#include "llvm/IR/Value.h"
66#include "llvm/IR/ValueHandle.h"
67#include "llvm/InitializePasses.h"
68#include "llvm/Pass.h"
69#include "llvm/Support/Casting.h"
70#include "llvm/Support/CommandLine.h"
71#include "llvm/Support/Compiler.h"
72#include "llvm/Support/Debug.h"
73#include "llvm/Support/MathExtras.h"
74#include "llvm/Support/raw_ostream.h"
75#include "llvm/Transforms/Scalar.h"
76#include "llvm/Transforms/Utils/BasicBlockUtils.h"
77#include "llvm/Transforms/Utils/Local.h"
78#include "llvm/Transforms/Utils/LoopUtils.h"
79#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
80#include "llvm/Transforms/Utils/SimplifyIndVar.h"
81#include <cassert>
82#include <cstdint>
83#include <utility>
84
85using namespace llvm;
86using namespace PatternMatch;
87
88#define DEBUG_TYPE "indvars"
89
90STATISTIC(NumWidened     , "Number of indvars widened");
91STATISTIC(NumReplaced    , "Number of exit values replaced");
92STATISTIC(NumLFTR        , "Number of loop exit tests replaced");
93STATISTIC(NumElimExt     , "Number of IV sign/zero extends eliminated");
94STATISTIC(NumElimIV      , "Number of congruent IVs eliminated");
95
96// Trip count verification can be enabled by default under NDEBUG if we
97// implement a strong expression equivalence checker in SCEV. Until then, we
98// use the verify-indvars flag, which may assert in some cases.
99static cl::opt<bool> VerifyIndvars(
100    "verify-indvars", cl::Hidden,
101    cl::desc("Verify the ScalarEvolution result after running indvars. Has no "
102             "effect in release builds. (Note: this adds additional SCEV "
103             "queries potentially changing the analysis result)"));
104
105static cl::opt<ReplaceExitVal> ReplaceExitValue(
106    "replexitval", cl::Hidden, cl::init(OnlyCheapRepl),
107    cl::desc("Choose the strategy to replace exit value in IndVarSimplify"),
108    cl::values(
109        clEnumValN(NeverRepl, "never", "never replace exit value"),
110        clEnumValN(OnlyCheapRepl, "cheap",
111                   "only replace exit value when the cost is cheap"),
112        clEnumValN(
113            UnusedIndVarInLoop, "unusedindvarinloop",
114            "only replace exit value when it is an unused "
115            "induction variable in the loop and has cheap replacement cost"),
116        clEnumValN(NoHardUse, "noharduse",
117                   "only replace exit values when loop def likely dead"),
118        clEnumValN(AlwaysRepl, "always",
119                   "always replace exit value whenever possible")));
120
121static cl::opt<bool> UsePostIncrementRanges(
122  "indvars-post-increment-ranges", cl::Hidden,
123  cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
124  cl::init(true));
125
126static cl::opt<bool>
127DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
128            cl::desc("Disable Linear Function Test Replace optimization"));
129
130static cl::opt<bool>
131LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
132                cl::desc("Predicate conditions in read only loops"));
133
134static cl::opt<bool>
135AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
136                cl::desc("Allow widening of indvars to eliminate s/zext"));
137
138namespace {
139
140class IndVarSimplify {
141  LoopInfo *LI;
142  ScalarEvolution *SE;
143  DominatorTree *DT;
144  const DataLayout &DL;
145  TargetLibraryInfo *TLI;
146  const TargetTransformInfo *TTI;
147  std::unique_ptr<MemorySSAUpdater> MSSAU;
148
149  SmallVector<WeakTrackingVH, 16> DeadInsts;
150  bool WidenIndVars;
151
152  bool handleFloatingPointIV(Loop *L, PHINode *PH);
153  bool rewriteNonIntegerIVs(Loop *L);
154
155  bool simplifyAndExtend(Loop *L, SCEVExpander &Rewriter, LoopInfo *LI);
156  /// Try to improve our exit conditions by converting condition from signed
157  /// to unsigned or rotating computation out of the loop.
158  /// (See inline comment about why this is duplicated from simplifyAndExtend)
159  bool canonicalizeExitCondition(Loop *L);
160  /// Try to eliminate loop exits based on analyzeable exit counts
161  bool optimizeLoopExits(Loop *L, SCEVExpander &Rewriter);
162  /// Try to form loop invariant tests for loop exits by changing how many
163  /// iterations of the loop run when that is unobservable.
164  bool predicateLoopExits(Loop *L, SCEVExpander &Rewriter);
165
166  bool rewriteFirstIterationLoopExitValues(Loop *L);
167
168  bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
169                                 const SCEV *ExitCount,
170                                 PHINode *IndVar, SCEVExpander &Rewriter);
171
172  bool sinkUnusedInvariants(Loop *L);
173
174public:
175  IndVarSimplify(LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT,
176                 const DataLayout &DL, TargetLibraryInfo *TLI,
177                 TargetTransformInfo *TTI, MemorySSA *MSSA, bool WidenIndVars)
178      : LI(LI), SE(SE), DT(DT), DL(DL), TLI(TLI), TTI(TTI),
179        WidenIndVars(WidenIndVars) {
180    if (MSSA)
181      MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
182  }
183
184  bool run(Loop *L);
185};
186
187} // end anonymous namespace
188
189//===----------------------------------------------------------------------===//
190// rewriteNonIntegerIVs and helpers. Prefer integer IVs.
191//===----------------------------------------------------------------------===//
192
193/// Convert APF to an integer, if possible.
194static bool ConvertToSInt(const APFloat &APF, int64_t &IntVal) {
195  bool isExact = false;
196  // See if we can convert this to an int64_t
197  uint64_t UIntVal;
198  if (APF.convertToInteger(MutableArrayRef(UIntVal), 64, true,
199                           APFloat::rmTowardZero, &isExact) != APFloat::opOK ||
200      !isExact)
201    return false;
202  IntVal = UIntVal;
203  return true;
204}
205
206/// If the loop has floating induction variable then insert corresponding
207/// integer induction variable if possible.
208/// For example,
209/// for(double i = 0; i < 10000; ++i)
210///   bar(i)
211/// is converted into
212/// for(int i = 0; i < 10000; ++i)
213///   bar((double)i);
214bool IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {
215  unsigned IncomingEdge = L->contains(PN->getIncomingBlock(0));
216  unsigned BackEdge     = IncomingEdge^1;
217
218  // Check incoming value.
219  auto *InitValueVal = dyn_cast<ConstantFP>(PN->getIncomingValue(IncomingEdge));
220
221  int64_t InitValue;
222  if (!InitValueVal || !ConvertToSInt(InitValueVal->getValueAPF(), InitValue))
223    return false;
224
225  // Check IV increment. Reject this PN if increment operation is not
226  // an add or increment value can not be represented by an integer.
227  auto *Incr = dyn_cast<BinaryOperator>(PN->getIncomingValue(BackEdge));
228  if (Incr == nullptr || Incr->getOpcode() != Instruction::FAdd) return false;
229
230  // If this is not an add of the PHI with a constantfp, or if the constant fp
231  // is not an integer, bail out.
232  ConstantFP *IncValueVal = dyn_cast<ConstantFP>(Incr->getOperand(1));
233  int64_t IncValue;
234  if (IncValueVal == nullptr || Incr->getOperand(0) != PN ||
235      !ConvertToSInt(IncValueVal->getValueAPF(), IncValue))
236    return false;
237
238  // Check Incr uses. One user is PN and the other user is an exit condition
239  // used by the conditional terminator.
240  Value::user_iterator IncrUse = Incr->user_begin();
241  Instruction *U1 = cast<Instruction>(*IncrUse++);
242  if (IncrUse == Incr->user_end()) return false;
243  Instruction *U2 = cast<Instruction>(*IncrUse++);
244  if (IncrUse != Incr->user_end()) return false;
245
246  // Find exit condition, which is an fcmp.  If it doesn't exist, or if it isn't
247  // only used by a branch, we can't transform it.
248  FCmpInst *Compare = dyn_cast<FCmpInst>(U1);
249  if (!Compare)
250    Compare = dyn_cast<FCmpInst>(U2);
251  if (!Compare || !Compare->hasOneUse() ||
252      !isa<BranchInst>(Compare->user_back()))
253    return false;
254
255  BranchInst *TheBr = cast<BranchInst>(Compare->user_back());
256
257  // We need to verify that the branch actually controls the iteration count
258  // of the loop.  If not, the new IV can overflow and no one will notice.
259  // The branch block must be in the loop and one of the successors must be out
260  // of the loop.
261  assert(TheBr->isConditional() && "Can't use fcmp if not conditional");
262  if (!L->contains(TheBr->getParent()) ||
263      (L->contains(TheBr->getSuccessor(0)) &&
264       L->contains(TheBr->getSuccessor(1))))
265    return false;
266
267  // If it isn't a comparison with an integer-as-fp (the exit value), we can't
268  // transform it.
269  ConstantFP *ExitValueVal = dyn_cast<ConstantFP>(Compare->getOperand(1));
270  int64_t ExitValue;
271  if (ExitValueVal == nullptr ||
272      !ConvertToSInt(ExitValueVal->getValueAPF(), ExitValue))
273    return false;
274
275  // Find new predicate for integer comparison.
276  CmpInst::Predicate NewPred = CmpInst::BAD_ICMP_PREDICATE;
277  switch (Compare->getPredicate()) {
278  default: return false;  // Unknown comparison.
279  case CmpInst::FCMP_OEQ:
280  case CmpInst::FCMP_UEQ: NewPred = CmpInst::ICMP_EQ; break;
281  case CmpInst::FCMP_ONE:
282  case CmpInst::FCMP_UNE: NewPred = CmpInst::ICMP_NE; break;
283  case CmpInst::FCMP_OGT:
284  case CmpInst::FCMP_UGT: NewPred = CmpInst::ICMP_SGT; break;
285  case CmpInst::FCMP_OGE:
286  case CmpInst::FCMP_UGE: NewPred = CmpInst::ICMP_SGE; break;
287  case CmpInst::FCMP_OLT:
288  case CmpInst::FCMP_ULT: NewPred = CmpInst::ICMP_SLT; break;
289  case CmpInst::FCMP_OLE:
290  case CmpInst::FCMP_ULE: NewPred = CmpInst::ICMP_SLE; break;
291  }
292
293  // We convert the floating point induction variable to a signed i32 value if
294  // we can.  This is only safe if the comparison will not overflow in a way
295  // that won't be trapped by the integer equivalent operations.  Check for this
296  // now.
297  // TODO: We could use i64 if it is native and the range requires it.
298
299  // The start/stride/exit values must all fit in signed i32.
300  if (!isInt<32>(InitValue) || !isInt<32>(IncValue) || !isInt<32>(ExitValue))
301    return false;
302
303  // If not actually striding (add x, 0.0), avoid touching the code.
304  if (IncValue == 0)
305    return false;
306
307  // Positive and negative strides have different safety conditions.
308  if (IncValue > 0) {
309    // If we have a positive stride, we require the init to be less than the
310    // exit value.
311    if (InitValue >= ExitValue)
312      return false;
313
314    uint32_t Range = uint32_t(ExitValue-InitValue);
315    // Check for infinite loop, either:
316    // while (i <= Exit) or until (i > Exit)
317    if (NewPred == CmpInst::ICMP_SLE || NewPred == CmpInst::ICMP_SGT) {
318      if (++Range == 0) return false;  // Range overflows.
319    }
320
321    unsigned Leftover = Range % uint32_t(IncValue);
322
323    // If this is an equality comparison, we require that the strided value
324    // exactly land on the exit value, otherwise the IV condition will wrap
325    // around and do things the fp IV wouldn't.
326    if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
327        Leftover != 0)
328      return false;
329
330    // If the stride would wrap around the i32 before exiting, we can't
331    // transform the IV.
332    if (Leftover != 0 && int32_t(ExitValue+IncValue) < ExitValue)
333      return false;
334  } else {
335    // If we have a negative stride, we require the init to be greater than the
336    // exit value.
337    if (InitValue <= ExitValue)
338      return false;
339
340    uint32_t Range = uint32_t(InitValue-ExitValue);
341    // Check for infinite loop, either:
342    // while (i >= Exit) or until (i < Exit)
343    if (NewPred == CmpInst::ICMP_SGE || NewPred == CmpInst::ICMP_SLT) {
344      if (++Range == 0) return false;  // Range overflows.
345    }
346
347    unsigned Leftover = Range % uint32_t(-IncValue);
348
349    // If this is an equality comparison, we require that the strided value
350    // exactly land on the exit value, otherwise the IV condition will wrap
351    // around and do things the fp IV wouldn't.
352    if ((NewPred == CmpInst::ICMP_EQ || NewPred == CmpInst::ICMP_NE) &&
353        Leftover != 0)
354      return false;
355
356    // If the stride would wrap around the i32 before exiting, we can't
357    // transform the IV.
358    if (Leftover != 0 && int32_t(ExitValue+IncValue) > ExitValue)
359      return false;
360  }
361
362  IntegerType *Int32Ty = Type::getInt32Ty(PN->getContext());
363
364  // Insert new integer induction variable.
365  PHINode *NewPHI = PHINode::Create(Int32Ty, 2, PN->getName()+".int", PN);
366  NewPHI->addIncoming(ConstantInt::get(Int32Ty, InitValue),
367                      PN->getIncomingBlock(IncomingEdge));
368
369  Value *NewAdd =
370    BinaryOperator::CreateAdd(NewPHI, ConstantInt::get(Int32Ty, IncValue),
371                              Incr->getName()+".int", Incr);
372  NewPHI->addIncoming(NewAdd, PN->getIncomingBlock(BackEdge));
373
374  ICmpInst *NewCompare = new ICmpInst(TheBr, NewPred, NewAdd,
375                                      ConstantInt::get(Int32Ty, ExitValue),
376                                      Compare->getName());
377
378  // In the following deletions, PN may become dead and may be deleted.
379  // Use a WeakTrackingVH to observe whether this happens.
380  WeakTrackingVH WeakPH = PN;
381
382  // Delete the old floating point exit comparison.  The branch starts using the
383  // new comparison.
384  NewCompare->takeName(Compare);
385  Compare->replaceAllUsesWith(NewCompare);
386  RecursivelyDeleteTriviallyDeadInstructions(Compare, TLI, MSSAU.get());
387
388  // Delete the old floating point increment.
389  Incr->replaceAllUsesWith(PoisonValue::get(Incr->getType()));
390  RecursivelyDeleteTriviallyDeadInstructions(Incr, TLI, MSSAU.get());
391
392  // If the FP induction variable still has uses, this is because something else
393  // in the loop uses its value.  In order to canonicalize the induction
394  // variable, we chose to eliminate the IV and rewrite it in terms of an
395  // int->fp cast.
396  //
397  // We give preference to sitofp over uitofp because it is faster on most
398  // platforms.
399  if (WeakPH) {
400    Value *Conv = new SIToFPInst(NewPHI, PN->getType(), "indvar.conv",
401                                 &*PN->getParent()->getFirstInsertionPt());
402    PN->replaceAllUsesWith(Conv);
403    RecursivelyDeleteTriviallyDeadInstructions(PN, TLI, MSSAU.get());
404  }
405  return true;
406}
407
408bool IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {
409  // First step.  Check to see if there are any floating-point recurrences.
410  // If there are, change them into integer recurrences, permitting analysis by
411  // the SCEV routines.
412  BasicBlock *Header = L->getHeader();
413
414  SmallVector<WeakTrackingVH, 8> PHIs;
415  for (PHINode &PN : Header->phis())
416    PHIs.push_back(&PN);
417
418  bool Changed = false;
419  for (unsigned i = 0, e = PHIs.size(); i != e; ++i)
420    if (PHINode *PN = dyn_cast_or_null<PHINode>(&*PHIs[i]))
421      Changed |= handleFloatingPointIV(L, PN);
422
423  // If the loop previously had floating-point IV, ScalarEvolution
424  // may not have been able to compute a trip count. Now that we've done some
425  // re-writing, the trip count may be computable.
426  if (Changed)
427    SE->forgetLoop(L);
428  return Changed;
429}
430
431//===---------------------------------------------------------------------===//
432// rewriteFirstIterationLoopExitValues: Rewrite loop exit values if we know
433// they will exit at the first iteration.
434//===---------------------------------------------------------------------===//
435
436/// Check to see if this loop has loop invariant conditions which lead to loop
437/// exits. If so, we know that if the exit path is taken, it is at the first
438/// loop iteration. This lets us predict exit values of PHI nodes that live in
439/// loop header.
440bool IndVarSimplify::rewriteFirstIterationLoopExitValues(Loop *L) {
441  // Verify the input to the pass is already in LCSSA form.
442  assert(L->isLCSSAForm(*DT));
443
444  SmallVector<BasicBlock *, 8> ExitBlocks;
445  L->getUniqueExitBlocks(ExitBlocks);
446
447  bool MadeAnyChanges = false;
448  for (auto *ExitBB : ExitBlocks) {
449    // If there are no more PHI nodes in this exit block, then no more
450    // values defined inside the loop are used on this path.
451    for (PHINode &PN : ExitBB->phis()) {
452      for (unsigned IncomingValIdx = 0, E = PN.getNumIncomingValues();
453           IncomingValIdx != E; ++IncomingValIdx) {
454        auto *IncomingBB = PN.getIncomingBlock(IncomingValIdx);
455
456        // Can we prove that the exit must run on the first iteration if it
457        // runs at all?  (i.e. early exits are fine for our purposes, but
458        // traces which lead to this exit being taken on the 2nd iteration
459        // aren't.)  Note that this is about whether the exit branch is
460        // executed, not about whether it is taken.
461        if (!L->getLoopLatch() ||
462            !DT->dominates(IncomingBB, L->getLoopLatch()))
463          continue;
464
465        // Get condition that leads to the exit path.
466        auto *TermInst = IncomingBB->getTerminator();
467
468        Value *Cond = nullptr;
469        if (auto *BI = dyn_cast<BranchInst>(TermInst)) {
470          // Must be a conditional branch, otherwise the block
471          // should not be in the loop.
472          Cond = BI->getCondition();
473        } else if (auto *SI = dyn_cast<SwitchInst>(TermInst))
474          Cond = SI->getCondition();
475        else
476          continue;
477
478        if (!L->isLoopInvariant(Cond))
479          continue;
480
481        auto *ExitVal = dyn_cast<PHINode>(PN.getIncomingValue(IncomingValIdx));
482
483        // Only deal with PHIs in the loop header.
484        if (!ExitVal || ExitVal->getParent() != L->getHeader())
485          continue;
486
487        // If ExitVal is a PHI on the loop header, then we know its
488        // value along this exit because the exit can only be taken
489        // on the first iteration.
490        auto *LoopPreheader = L->getLoopPreheader();
491        assert(LoopPreheader && "Invalid loop");
492        int PreheaderIdx = ExitVal->getBasicBlockIndex(LoopPreheader);
493        if (PreheaderIdx != -1) {
494          assert(ExitVal->getParent() == L->getHeader() &&
495                 "ExitVal must be in loop header");
496          MadeAnyChanges = true;
497          PN.setIncomingValue(IncomingValIdx,
498                              ExitVal->getIncomingValue(PreheaderIdx));
499          SE->forgetValue(&PN);
500        }
501      }
502    }
503  }
504  return MadeAnyChanges;
505}
506
507//===----------------------------------------------------------------------===//
508//  IV Widening - Extend the width of an IV to cover its widest uses.
509//===----------------------------------------------------------------------===//
510
511/// Update information about the induction variable that is extended by this
512/// sign or zero extend operation. This is used to determine the final width of
513/// the IV before actually widening it.
514static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
515                        ScalarEvolution *SE,
516                        const TargetTransformInfo *TTI) {
517  bool IsSigned = Cast->getOpcode() == Instruction::SExt;
518  if (!IsSigned && Cast->getOpcode() != Instruction::ZExt)
519    return;
520
521  Type *Ty = Cast->getType();
522  uint64_t Width = SE->getTypeSizeInBits(Ty);
523  if (!Cast->getModule()->getDataLayout().isLegalInteger(Width))
524    return;
525
526  // Check that `Cast` actually extends the induction variable (we rely on this
527  // later).  This takes care of cases where `Cast` is extending a truncation of
528  // the narrow induction variable, and thus can end up being narrower than the
529  // "narrow" induction variable.
530  uint64_t NarrowIVWidth = SE->getTypeSizeInBits(WI.NarrowIV->getType());
531  if (NarrowIVWidth >= Width)
532    return;
533
534  // Cast is either an sext or zext up to this point.
535  // We should not widen an indvar if arithmetics on the wider indvar are more
536  // expensive than those on the narrower indvar. We check only the cost of ADD
537  // because at least an ADD is required to increment the induction variable. We
538  // could compute more comprehensively the cost of all instructions on the
539  // induction variable when necessary.
540  if (TTI &&
541      TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
542          TTI->getArithmeticInstrCost(Instruction::Add,
543                                      Cast->getOperand(0)->getType())) {
544    return;
545  }
546
547  if (!WI.WidestNativeType ||
548      Width > SE->getTypeSizeInBits(WI.WidestNativeType)) {
549    WI.WidestNativeType = SE->getEffectiveSCEVType(Ty);
550    WI.IsSigned = IsSigned;
551    return;
552  }
553
554  // We extend the IV to satisfy the sign of its user(s), or 'signed'
555  // if there are multiple users with both sign- and zero extensions,
556  // in order not to introduce nondeterministic behaviour based on the
557  // unspecified order of a PHI nodes' users-iterator.
558  WI.IsSigned |= IsSigned;
559}
560
561//===----------------------------------------------------------------------===//
562//  Live IV Reduction - Minimize IVs live across the loop.
563//===----------------------------------------------------------------------===//
564
565//===----------------------------------------------------------------------===//
566//  Simplification of IV users based on SCEV evaluation.
567//===----------------------------------------------------------------------===//
568
569namespace {
570
571class IndVarSimplifyVisitor : public IVVisitor {
572  ScalarEvolution *SE;
573  const TargetTransformInfo *TTI;
574  PHINode *IVPhi;
575
576public:
577  WideIVInfo WI;
578
579  IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
580                        const TargetTransformInfo *TTI,
581                        const DominatorTree *DTree)
582    : SE(SCEV), TTI(TTI), IVPhi(IV) {
583    DT = DTree;
584    WI.NarrowIV = IVPhi;
585  }
586
587  // Implement the interface used by simplifyUsersOfIV.
588  void visitCast(CastInst *Cast) override { visitIVCast(Cast, WI, SE, TTI); }
589};
590
591} // end anonymous namespace
592
593/// Iteratively perform simplification on a worklist of IV users. Each
594/// successive simplification may push more users which may themselves be
595/// candidates for simplification.
596///
597/// Sign/Zero extend elimination is interleaved with IV simplification.
598bool IndVarSimplify::simplifyAndExtend(Loop *L,
599                                       SCEVExpander &Rewriter,
600                                       LoopInfo *LI) {
601  SmallVector<WideIVInfo, 8> WideIVs;
602
603  auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
604          Intrinsic::getName(Intrinsic::experimental_guard));
605  bool HasGuards = GuardDecl && !GuardDecl->use_empty();
606
607  SmallVector<PHINode *, 8> LoopPhis;
608  for (PHINode &PN : L->getHeader()->phis())
609    LoopPhis.push_back(&PN);
610
611  // Each round of simplification iterates through the SimplifyIVUsers worklist
612  // for all current phis, then determines whether any IVs can be
613  // widened. Widening adds new phis to LoopPhis, inducing another round of
614  // simplification on the wide IVs.
615  bool Changed = false;
616  while (!LoopPhis.empty()) {
617    // Evaluate as many IV expressions as possible before widening any IVs. This
618    // forces SCEV to set no-wrap flags before evaluating sign/zero
619    // extension. The first time SCEV attempts to normalize sign/zero extension,
620    // the result becomes final. So for the most predictable results, we delay
621    // evaluation of sign/zero extend evaluation until needed, and avoid running
622    // other SCEV based analysis prior to simplifyAndExtend.
623    do {
624      PHINode *CurrIV = LoopPhis.pop_back_val();
625
626      // Information about sign/zero extensions of CurrIV.
627      IndVarSimplifyVisitor Visitor(CurrIV, SE, TTI, DT);
628
629      Changed |= simplifyUsersOfIV(CurrIV, SE, DT, LI, TTI, DeadInsts, Rewriter,
630                                   &Visitor);
631
632      if (Visitor.WI.WidestNativeType) {
633        WideIVs.push_back(Visitor.WI);
634      }
635    } while(!LoopPhis.empty());
636
637    // Continue if we disallowed widening.
638    if (!WidenIndVars)
639      continue;
640
641    for (; !WideIVs.empty(); WideIVs.pop_back()) {
642      unsigned ElimExt;
643      unsigned Widened;
644      if (PHINode *WidePhi = createWideIV(WideIVs.back(), LI, SE, Rewriter,
645                                          DT, DeadInsts, ElimExt, Widened,
646                                          HasGuards, UsePostIncrementRanges)) {
647        NumElimExt += ElimExt;
648        NumWidened += Widened;
649        Changed = true;
650        LoopPhis.push_back(WidePhi);
651      }
652    }
653  }
654  return Changed;
655}
656
657//===----------------------------------------------------------------------===//
658//  linearFunctionTestReplace and its kin. Rewrite the loop exit condition.
659//===----------------------------------------------------------------------===//
660
661/// Given an Value which is hoped to be part of an add recurance in the given
662/// loop, return the associated Phi node if so.  Otherwise, return null.  Note
663/// that this is less general than SCEVs AddRec checking.
664static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L) {
665  Instruction *IncI = dyn_cast<Instruction>(IncV);
666  if (!IncI)
667    return nullptr;
668
669  switch (IncI->getOpcode()) {
670  case Instruction::Add:
671  case Instruction::Sub:
672    break;
673  case Instruction::GetElementPtr:
674    // An IV counter must preserve its type.
675    if (IncI->getNumOperands() == 2)
676      break;
677    [[fallthrough]];
678  default:
679    return nullptr;
680  }
681
682  PHINode *Phi = dyn_cast<PHINode>(IncI->getOperand(0));
683  if (Phi && Phi->getParent() == L->getHeader()) {
684    if (L->isLoopInvariant(IncI->getOperand(1)))
685      return Phi;
686    return nullptr;
687  }
688  if (IncI->getOpcode() == Instruction::GetElementPtr)
689    return nullptr;
690
691  // Allow add/sub to be commuted.
692  Phi = dyn_cast<PHINode>(IncI->getOperand(1));
693  if (Phi && Phi->getParent() == L->getHeader()) {
694    if (L->isLoopInvariant(IncI->getOperand(0)))
695      return Phi;
696  }
697  return nullptr;
698}
699
700/// Whether the current loop exit test is based on this value.  Currently this
701/// is limited to a direct use in the loop condition.
702static bool isLoopExitTestBasedOn(Value *V, BasicBlock *ExitingBB) {
703  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
704  ICmpInst *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
705  // TODO: Allow non-icmp loop test.
706  if (!ICmp)
707    return false;
708
709  // TODO: Allow indirect use.
710  return ICmp->getOperand(0) == V || ICmp->getOperand(1) == V;
711}
712
713/// linearFunctionTestReplace policy. Return true unless we can show that the
714/// current exit test is already sufficiently canonical.
715static bool needsLFTR(Loop *L, BasicBlock *ExitingBB) {
716  assert(L->getLoopLatch() && "Must be in simplified form");
717
718  // Avoid converting a constant or loop invariant test back to a runtime
719  // test.  This is critical for when SCEV's cached ExitCount is less precise
720  // than the current IR (such as after we've proven a particular exit is
721  // actually dead and thus the BE count never reaches our ExitCount.)
722  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
723  if (L->isLoopInvariant(BI->getCondition()))
724    return false;
725
726  // Do LFTR to simplify the exit condition to an ICMP.
727  ICmpInst *Cond = dyn_cast<ICmpInst>(BI->getCondition());
728  if (!Cond)
729    return true;
730
731  // Do LFTR to simplify the exit ICMP to EQ/NE
732  ICmpInst::Predicate Pred = Cond->getPredicate();
733  if (Pred != ICmpInst::ICMP_NE && Pred != ICmpInst::ICMP_EQ)
734    return true;
735
736  // Look for a loop invariant RHS
737  Value *LHS = Cond->getOperand(0);
738  Value *RHS = Cond->getOperand(1);
739  if (!L->isLoopInvariant(RHS)) {
740    if (!L->isLoopInvariant(LHS))
741      return true;
742    std::swap(LHS, RHS);
743  }
744  // Look for a simple IV counter LHS
745  PHINode *Phi = dyn_cast<PHINode>(LHS);
746  if (!Phi)
747    Phi = getLoopPhiForCounter(LHS, L);
748
749  if (!Phi)
750    return true;
751
752  // Do LFTR if PHI node is defined in the loop, but is *not* a counter.
753  int Idx = Phi->getBasicBlockIndex(L->getLoopLatch());
754  if (Idx < 0)
755    return true;
756
757  // Do LFTR if the exit condition's IV is *not* a simple counter.
758  Value *IncV = Phi->getIncomingValue(Idx);
759  return Phi != getLoopPhiForCounter(IncV, L);
760}
761
762/// Return true if undefined behavior would provable be executed on the path to
763/// OnPathTo if Root produced a posion result.  Note that this doesn't say
764/// anything about whether OnPathTo is actually executed or whether Root is
765/// actually poison.  This can be used to assess whether a new use of Root can
766/// be added at a location which is control equivalent with OnPathTo (such as
767/// immediately before it) without introducing UB which didn't previously
768/// exist.  Note that a false result conveys no information.
769static bool mustExecuteUBIfPoisonOnPathTo(Instruction *Root,
770                                          Instruction *OnPathTo,
771                                          DominatorTree *DT) {
772  // Basic approach is to assume Root is poison, propagate poison forward
773  // through all users we can easily track, and then check whether any of those
774  // users are provable UB and must execute before out exiting block might
775  // exit.
776
777  // The set of all recursive users we've visited (which are assumed to all be
778  // poison because of said visit)
779  SmallSet<const Value *, 16> KnownPoison;
780  SmallVector<const Instruction*, 16> Worklist;
781  Worklist.push_back(Root);
782  while (!Worklist.empty()) {
783    const Instruction *I = Worklist.pop_back_val();
784
785    // If we know this must trigger UB on a path leading our target.
786    if (mustTriggerUB(I, KnownPoison) && DT->dominates(I, OnPathTo))
787      return true;
788
789    // If we can't analyze propagation through this instruction, just skip it
790    // and transitive users.  Safe as false is a conservative result.
791    if (I != Root && !any_of(I->operands(), [&KnownPoison](const Use &U) {
792          return KnownPoison.contains(U) && propagatesPoison(U);
793        }))
794      continue;
795
796    if (KnownPoison.insert(I).second)
797      for (const User *User : I->users())
798        Worklist.push_back(cast<Instruction>(User));
799  }
800
801  // Might be non-UB, or might have a path we couldn't prove must execute on
802  // way to exiting bb.
803  return false;
804}
805
806/// Recursive helper for hasConcreteDef(). Unfortunately, this currently boils
807/// down to checking that all operands are constant and listing instructions
808/// that may hide undef.
809static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl<Value*> &Visited,
810                               unsigned Depth) {
811  if (isa<Constant>(V))
812    return !isa<UndefValue>(V);
813
814  if (Depth >= 6)
815    return false;
816
817  // Conservatively handle non-constant non-instructions. For example, Arguments
818  // may be undef.
819  Instruction *I = dyn_cast<Instruction>(V);
820  if (!I)
821    return false;
822
823  // Load and return values may be undef.
824  if(I->mayReadFromMemory() || isa<CallInst>(I) || isa<InvokeInst>(I))
825    return false;
826
827  // Optimistically handle other instructions.
828  for (Value *Op : I->operands()) {
829    if (!Visited.insert(Op).second)
830      continue;
831    if (!hasConcreteDefImpl(Op, Visited, Depth+1))
832      return false;
833  }
834  return true;
835}
836
837/// Return true if the given value is concrete. We must prove that undef can
838/// never reach it.
839///
840/// TODO: If we decide that this is a good approach to checking for undef, we
841/// may factor it into a common location.
842static bool hasConcreteDef(Value *V) {
843  SmallPtrSet<Value*, 8> Visited;
844  Visited.insert(V);
845  return hasConcreteDefImpl(V, Visited, 0);
846}
847
848/// Return true if this IV has any uses other than the (soon to be rewritten)
849/// loop exit test.
850static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) {
851  int LatchIdx = Phi->getBasicBlockIndex(LatchBlock);
852  Value *IncV = Phi->getIncomingValue(LatchIdx);
853
854  for (User *U : Phi->users())
855    if (U != Cond && U != IncV) return false;
856
857  for (User *U : IncV->users())
858    if (U != Cond && U != Phi) return false;
859  return true;
860}
861
862/// Return true if the given phi is a "counter" in L.  A counter is an
863/// add recurance (of integer or pointer type) with an arbitrary start, and a
864/// step of 1.  Note that L must have exactly one latch.
865static bool isLoopCounter(PHINode* Phi, Loop *L,
866                          ScalarEvolution *SE) {
867  assert(Phi->getParent() == L->getHeader());
868  assert(L->getLoopLatch());
869
870  if (!SE->isSCEVable(Phi->getType()))
871    return false;
872
873  const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
874  if (!AR || AR->getLoop() != L || !AR->isAffine())
875    return false;
876
877  const SCEV *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
878  if (!Step || !Step->isOne())
879    return false;
880
881  int LatchIdx = Phi->getBasicBlockIndex(L->getLoopLatch());
882  Value *IncV = Phi->getIncomingValue(LatchIdx);
883  return (getLoopPhiForCounter(IncV, L) == Phi &&
884          isa<SCEVAddRecExpr>(SE->getSCEV(IncV)));
885}
886
887/// Search the loop header for a loop counter (anadd rec w/step of one)
888/// suitable for use by LFTR.  If multiple counters are available, select the
889/// "best" one based profitable heuristics.
890///
891/// BECount may be an i8* pointer type. The pointer difference is already
892/// valid count without scaling the address stride, so it remains a pointer
893/// expression as far as SCEV is concerned.
894static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
895                                const SCEV *BECount,
896                                ScalarEvolution *SE, DominatorTree *DT) {
897  uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType());
898
899  Value *Cond = cast<BranchInst>(ExitingBB->getTerminator())->getCondition();
900
901  // Loop over all of the PHI nodes, looking for a simple counter.
902  PHINode *BestPhi = nullptr;
903  const SCEV *BestInit = nullptr;
904  BasicBlock *LatchBlock = L->getLoopLatch();
905  assert(LatchBlock && "Must be in simplified form");
906  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
907
908  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
909    PHINode *Phi = cast<PHINode>(I);
910    if (!isLoopCounter(Phi, L, SE))
911      continue;
912
913    // Avoid comparing an integer IV against a pointer Limit.
914    if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy())
915      continue;
916
917    const auto *AR = cast<SCEVAddRecExpr>(SE->getSCEV(Phi));
918
919    // AR may be a pointer type, while BECount is an integer type.
920    // AR may be wider than BECount. With eq/ne tests overflow is immaterial.
921    // AR may not be a narrower type, or we may never exit.
922    uint64_t PhiWidth = SE->getTypeSizeInBits(AR->getType());
923    if (PhiWidth < BCWidth || !DL.isLegalInteger(PhiWidth))
924      continue;
925
926    // Avoid reusing a potentially undef value to compute other values that may
927    // have originally had a concrete definition.
928    if (!hasConcreteDef(Phi)) {
929      // We explicitly allow unknown phis as long as they are already used by
930      // the loop exit test.  This is legal since performing LFTR could not
931      // increase the number of undef users.
932      Value *IncPhi = Phi->getIncomingValueForBlock(LatchBlock);
933      if (!isLoopExitTestBasedOn(Phi, ExitingBB) &&
934          !isLoopExitTestBasedOn(IncPhi, ExitingBB))
935        continue;
936    }
937
938    // Avoid introducing undefined behavior due to poison which didn't exist in
939    // the original program.  (Annoyingly, the rules for poison and undef
940    // propagation are distinct, so this does NOT cover the undef case above.)
941    // We have to ensure that we don't introduce UB by introducing a use on an
942    // iteration where said IV produces poison.  Our strategy here differs for
943    // pointers and integer IVs.  For integers, we strip and reinfer as needed,
944    // see code in linearFunctionTestReplace.  For pointers, we restrict
945    // transforms as there is no good way to reinfer inbounds once lost.
946    if (!Phi->getType()->isIntegerTy() &&
947        !mustExecuteUBIfPoisonOnPathTo(Phi, ExitingBB->getTerminator(), DT))
948      continue;
949
950    const SCEV *Init = AR->getStart();
951
952    if (BestPhi && !AlmostDeadIV(BestPhi, LatchBlock, Cond)) {
953      // Don't force a live loop counter if another IV can be used.
954      if (AlmostDeadIV(Phi, LatchBlock, Cond))
955        continue;
956
957      // Prefer to count-from-zero. This is a more "canonical" counter form. It
958      // also prefers integer to pointer IVs.
959      if (BestInit->isZero() != Init->isZero()) {
960        if (BestInit->isZero())
961          continue;
962      }
963      // If two IVs both count from zero or both count from nonzero then the
964      // narrower is likely a dead phi that has been widened. Use the wider phi
965      // to allow the other to be eliminated.
966      else if (PhiWidth <= SE->getTypeSizeInBits(BestPhi->getType()))
967        continue;
968    }
969    BestPhi = Phi;
970    BestInit = Init;
971  }
972  return BestPhi;
973}
974
975/// Insert an IR expression which computes the value held by the IV IndVar
976/// (which must be an loop counter w/unit stride) after the backedge of loop L
977/// is taken ExitCount times.
978static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
979                           const SCEV *ExitCount, bool UsePostInc, Loop *L,
980                           SCEVExpander &Rewriter, ScalarEvolution *SE) {
981  assert(isLoopCounter(IndVar, L, SE));
982  const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
983  const SCEV *IVInit = AR->getStart();
984  assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
985
986  // IVInit may be a pointer while ExitCount is an integer when FindLoopCounter
987  // finds a valid pointer IV. Sign extend ExitCount in order to materialize a
988  // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing
989  // the existing GEPs whenever possible.
990  if (IndVar->getType()->isPointerTy() &&
991      !ExitCount->getType()->isPointerTy()) {
992    // IVOffset will be the new GEP offset that is interpreted by GEP as a
993    // signed value. ExitCount on the other hand represents the loop trip count,
994    // which is an unsigned value. FindLoopCounter only allows induction
995    // variables that have a positive unit stride of one. This means we don't
996    // have to handle the case of negative offsets (yet) and just need to zero
997    // extend ExitCount.
998    Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType());
999    const SCEV *IVOffset = SE->getTruncateOrZeroExtend(ExitCount, OfsTy);
1000    if (UsePostInc)
1001      IVOffset = SE->getAddExpr(IVOffset, SE->getOne(OfsTy));
1002
1003    // Expand the code for the iteration count.
1004    assert(SE->isLoopInvariant(IVOffset, L) &&
1005           "Computed iteration count is not loop invariant!");
1006
1007    const SCEV *IVLimit = SE->getAddExpr(IVInit, IVOffset);
1008    BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1009    return Rewriter.expandCodeFor(IVLimit, IndVar->getType(), BI);
1010  } else {
1011    // In any other case, convert both IVInit and ExitCount to integers before
1012    // comparing. This may result in SCEV expansion of pointers, but in practice
1013    // SCEV will fold the pointer arithmetic away as such:
1014    // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc).
1015    //
1016    // Valid Cases: (1) both integers is most common; (2) both may be pointers
1017    // for simple memset-style loops.
1018    //
1019    // IVInit integer and ExitCount pointer would only occur if a canonical IV
1020    // were generated on top of case #2, which is not expected.
1021
1022    // For unit stride, IVCount = Start + ExitCount with 2's complement
1023    // overflow.
1024
1025    // For integer IVs, truncate the IV before computing IVInit + BECount,
1026    // unless we know apriori that the limit must be a constant when evaluated
1027    // in the bitwidth of the IV.  We prefer (potentially) keeping a truncate
1028    // of the IV in the loop over a (potentially) expensive expansion of the
1029    // widened exit count add(zext(add)) expression.
1030    if (SE->getTypeSizeInBits(IVInit->getType())
1031        > SE->getTypeSizeInBits(ExitCount->getType())) {
1032      if (isa<SCEVConstant>(IVInit) && isa<SCEVConstant>(ExitCount))
1033        ExitCount = SE->getZeroExtendExpr(ExitCount, IVInit->getType());
1034      else
1035        IVInit = SE->getTruncateExpr(IVInit, ExitCount->getType());
1036    }
1037
1038    const SCEV *IVLimit = SE->getAddExpr(IVInit, ExitCount);
1039
1040    if (UsePostInc)
1041      IVLimit = SE->getAddExpr(IVLimit, SE->getOne(IVLimit->getType()));
1042
1043    // Expand the code for the iteration count.
1044    assert(SE->isLoopInvariant(IVLimit, L) &&
1045           "Computed iteration count is not loop invariant!");
1046    // Ensure that we generate the same type as IndVar, or a smaller integer
1047    // type. In the presence of null pointer values, we have an integer type
1048    // SCEV expression (IVInit) for a pointer type IV value (IndVar).
1049    Type *LimitTy = ExitCount->getType()->isPointerTy() ?
1050      IndVar->getType() : ExitCount->getType();
1051    BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1052    return Rewriter.expandCodeFor(IVLimit, LimitTy, BI);
1053  }
1054}
1055
1056/// This method rewrites the exit condition of the loop to be a canonical !=
1057/// comparison against the incremented loop induction variable.  This pass is
1058/// able to rewrite the exit tests of any loop where the SCEV analysis can
1059/// determine a loop-invariant trip count of the loop, which is actually a much
1060/// broader range than just linear tests.
1061bool IndVarSimplify::
1062linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
1063                          const SCEV *ExitCount,
1064                          PHINode *IndVar, SCEVExpander &Rewriter) {
1065  assert(L->getLoopLatch() && "Loop no longer in simplified form?");
1066  assert(isLoopCounter(IndVar, L, SE));
1067  Instruction * const IncVar =
1068    cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
1069
1070  // Initialize CmpIndVar to the preincremented IV.
1071  Value *CmpIndVar = IndVar;
1072  bool UsePostInc = false;
1073
1074  // If the exiting block is the same as the backedge block, we prefer to
1075  // compare against the post-incremented value, otherwise we must compare
1076  // against the preincremented value.
1077  if (ExitingBB == L->getLoopLatch()) {
1078    // For pointer IVs, we chose to not strip inbounds which requires us not
1079    // to add a potentially UB introducing use.  We need to either a) show
1080    // the loop test we're modifying is already in post-inc form, or b) show
1081    // that adding a use must not introduce UB.
1082    bool SafeToPostInc =
1083        IndVar->getType()->isIntegerTy() ||
1084        isLoopExitTestBasedOn(IncVar, ExitingBB) ||
1085        mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
1086    if (SafeToPostInc) {
1087      UsePostInc = true;
1088      CmpIndVar = IncVar;
1089    }
1090  }
1091
1092  // It may be necessary to drop nowrap flags on the incrementing instruction
1093  // if either LFTR moves from a pre-inc check to a post-inc check (in which
1094  // case the increment might have previously been poison on the last iteration
1095  // only) or if LFTR switches to a different IV that was previously dynamically
1096  // dead (and as such may be arbitrarily poison). We remove any nowrap flags
1097  // that SCEV didn't infer for the post-inc addrec (even if we use a pre-inc
1098  // check), because the pre-inc addrec flags may be adopted from the original
1099  // instruction, while SCEV has to explicitly prove the post-inc nowrap flags.
1100  // TODO: This handling is inaccurate for one case: If we switch to a
1101  // dynamically dead IV that wraps on the first loop iteration only, which is
1102  // not covered by the post-inc addrec. (If the new IV was not dynamically
1103  // dead, it could not be poison on the first iteration in the first place.)
1104  if (auto *BO = dyn_cast<BinaryOperator>(IncVar)) {
1105    const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IncVar));
1106    if (BO->hasNoUnsignedWrap())
1107      BO->setHasNoUnsignedWrap(AR->hasNoUnsignedWrap());
1108    if (BO->hasNoSignedWrap())
1109      BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
1110  }
1111
1112  Value *ExitCnt = genLoopLimit(
1113      IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
1114  assert(ExitCnt->getType()->isPointerTy() ==
1115             IndVar->getType()->isPointerTy() &&
1116         "genLoopLimit missed a cast");
1117
1118  // Insert a new icmp_ne or icmp_eq instruction before the branch.
1119  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1120  ICmpInst::Predicate P;
1121  if (L->contains(BI->getSuccessor(0)))
1122    P = ICmpInst::ICMP_NE;
1123  else
1124    P = ICmpInst::ICMP_EQ;
1125
1126  IRBuilder<> Builder(BI);
1127
1128  // The new loop exit condition should reuse the debug location of the
1129  // original loop exit condition.
1130  if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
1131    Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
1132
1133  // For integer IVs, if we evaluated the limit in the narrower bitwidth to
1134  // avoid the expensive expansion of the limit expression in the wider type,
1135  // emit a truncate to narrow the IV to the ExitCount type.  This is safe
1136  // since we know (from the exit count bitwidth), that we can't self-wrap in
1137  // the narrower type.
1138  unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
1139  unsigned ExitCntSize = SE->getTypeSizeInBits(ExitCnt->getType());
1140  if (CmpIndVarSize > ExitCntSize) {
1141    assert(!CmpIndVar->getType()->isPointerTy() &&
1142           !ExitCnt->getType()->isPointerTy());
1143
1144    // Before resorting to actually inserting the truncate, use the same
1145    // reasoning as from SimplifyIndvar::eliminateTrunc to see if we can extend
1146    // the other side of the comparison instead.  We still evaluate the limit
1147    // in the narrower bitwidth, we just prefer a zext/sext outside the loop to
1148    // a truncate within in.
1149    bool Extended = false;
1150    const SCEV *IV = SE->getSCEV(CmpIndVar);
1151    const SCEV *TruncatedIV = SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
1152                                                  ExitCnt->getType());
1153    const SCEV *ZExtTrunc =
1154      SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
1155
1156    if (ZExtTrunc == IV) {
1157      Extended = true;
1158      ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
1159                                   "wide.trip.count");
1160    } else {
1161      const SCEV *SExtTrunc =
1162        SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
1163      if (SExtTrunc == IV) {
1164        Extended = true;
1165        ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
1166                                     "wide.trip.count");
1167      }
1168    }
1169
1170    if (Extended) {
1171      bool Discard;
1172      L->makeLoopInvariant(ExitCnt, Discard);
1173    } else
1174      CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
1175                                      "lftr.wideiv");
1176  }
1177  LLVM_DEBUG(dbgs() << "INDVARS: Rewriting loop exit condition to:\n"
1178                    << "      LHS:" << *CmpIndVar << '\n'
1179                    << "       op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==")
1180                    << "\n"
1181                    << "      RHS:\t" << *ExitCnt << "\n"
1182                    << "ExitCount:\t" << *ExitCount << "\n"
1183                    << "  was: " << *BI->getCondition() << "\n");
1184
1185  Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
1186  Value *OrigCond = BI->getCondition();
1187  // It's tempting to use replaceAllUsesWith here to fully replace the old
1188  // comparison, but that's not immediately safe, since users of the old
1189  // comparison may not be dominated by the new comparison. Instead, just
1190  // update the branch to use the new comparison; in the common case this
1191  // will make old comparison dead.
1192  BI->setCondition(Cond);
1193  DeadInsts.emplace_back(OrigCond);
1194
1195  ++NumLFTR;
1196  return true;
1197}
1198
1199//===----------------------------------------------------------------------===//
1200//  sinkUnusedInvariants. A late subpass to cleanup loop preheaders.
1201//===----------------------------------------------------------------------===//
1202
1203/// If there's a single exit block, sink any loop-invariant values that
1204/// were defined in the preheader but not used inside the loop into the
1205/// exit block to reduce register pressure in the loop.
1206bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
1207  BasicBlock *ExitBlock = L->getExitBlock();
1208  if (!ExitBlock) return false;
1209
1210  BasicBlock *Preheader = L->getLoopPreheader();
1211  if (!Preheader) return false;
1212
1213  bool MadeAnyChanges = false;
1214  BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
1215  BasicBlock::iterator I(Preheader->getTerminator());
1216  while (I != Preheader->begin()) {
1217    --I;
1218    // New instructions were inserted at the end of the preheader.
1219    if (isa<PHINode>(I))
1220      break;
1221
1222    // Don't move instructions which might have side effects, since the side
1223    // effects need to complete before instructions inside the loop.  Also don't
1224    // move instructions which might read memory, since the loop may modify
1225    // memory. Note that it's okay if the instruction might have undefined
1226    // behavior: LoopSimplify guarantees that the preheader dominates the exit
1227    // block.
1228    if (I->mayHaveSideEffects() || I->mayReadFromMemory())
1229      continue;
1230
1231    // Skip debug info intrinsics.
1232    if (isa<DbgInfoIntrinsic>(I))
1233      continue;
1234
1235    // Skip eh pad instructions.
1236    if (I->isEHPad())
1237      continue;
1238
1239    // Don't sink alloca: we never want to sink static alloca's out of the
1240    // entry block, and correctly sinking dynamic alloca's requires
1241    // checks for stacksave/stackrestore intrinsics.
1242    // FIXME: Refactor this check somehow?
1243    if (isa<AllocaInst>(I))
1244      continue;
1245
1246    // Determine if there is a use in or before the loop (direct or
1247    // otherwise).
1248    bool UsedInLoop = false;
1249    for (Use &U : I->uses()) {
1250      Instruction *User = cast<Instruction>(U.getUser());
1251      BasicBlock *UseBB = User->getParent();
1252      if (PHINode *P = dyn_cast<PHINode>(User)) {
1253        unsigned i =
1254          PHINode::getIncomingValueNumForOperand(U.getOperandNo());
1255        UseBB = P->getIncomingBlock(i);
1256      }
1257      if (UseBB == Preheader || L->contains(UseBB)) {
1258        UsedInLoop = true;
1259        break;
1260      }
1261    }
1262
1263    // If there is, the def must remain in the preheader.
1264    if (UsedInLoop)
1265      continue;
1266
1267    // Otherwise, sink it to the exit block.
1268    Instruction *ToMove = &*I;
1269    bool Done = false;
1270
1271    if (I != Preheader->begin()) {
1272      // Skip debug info intrinsics.
1273      do {
1274        --I;
1275      } while (I->isDebugOrPseudoInst() && I != Preheader->begin());
1276
1277      if (I->isDebugOrPseudoInst() && I == Preheader->begin())
1278        Done = true;
1279    } else {
1280      Done = true;
1281    }
1282
1283    MadeAnyChanges = true;
1284    ToMove->moveBefore(*ExitBlock, InsertPt);
1285    SE->forgetValue(ToMove);
1286    if (Done) break;
1287    InsertPt = ToMove->getIterator();
1288  }
1289
1290  return MadeAnyChanges;
1291}
1292
1293static void replaceExitCond(BranchInst *BI, Value *NewCond,
1294                            SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1295  auto *OldCond = BI->getCondition();
1296  LLVM_DEBUG(dbgs() << "Replacing condition of loop-exiting branch " << *BI
1297                    << " with " << *NewCond << "\n");
1298  BI->setCondition(NewCond);
1299  if (OldCond->use_empty())
1300    DeadInsts.emplace_back(OldCond);
1301}
1302
1303static Constant *createFoldedExitCond(const Loop *L, BasicBlock *ExitingBB,
1304                                      bool IsTaken) {
1305  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1306  bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1307  auto *OldCond = BI->getCondition();
1308  return ConstantInt::get(OldCond->getType(),
1309                          IsTaken ? ExitIfTrue : !ExitIfTrue);
1310}
1311
1312static void foldExit(const Loop *L, BasicBlock *ExitingBB, bool IsTaken,
1313                     SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1314  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1315  auto *NewCond = createFoldedExitCond(L, ExitingBB, IsTaken);
1316  replaceExitCond(BI, NewCond, DeadInsts);
1317}
1318
1319static void replaceLoopPHINodesWithPreheaderValues(
1320    LoopInfo *LI, Loop *L, SmallVectorImpl<WeakTrackingVH> &DeadInsts,
1321    ScalarEvolution &SE) {
1322  assert(L->isLoopSimplifyForm() && "Should only do it in simplify form!");
1323  auto *LoopPreheader = L->getLoopPreheader();
1324  auto *LoopHeader = L->getHeader();
1325  SmallVector<Instruction *> Worklist;
1326  for (auto &PN : LoopHeader->phis()) {
1327    auto *PreheaderIncoming = PN.getIncomingValueForBlock(LoopPreheader);
1328    for (User *U : PN.users())
1329      Worklist.push_back(cast<Instruction>(U));
1330    SE.forgetValue(&PN);
1331    PN.replaceAllUsesWith(PreheaderIncoming);
1332    DeadInsts.emplace_back(&PN);
1333  }
1334
1335  // Replacing with the preheader value will often allow IV users to simplify
1336  // (especially if the preheader value is a constant).
1337  SmallPtrSet<Instruction *, 16> Visited;
1338  while (!Worklist.empty()) {
1339    auto *I = cast<Instruction>(Worklist.pop_back_val());
1340    if (!Visited.insert(I).second)
1341      continue;
1342
1343    // Don't simplify instructions outside the loop.
1344    if (!L->contains(I))
1345      continue;
1346
1347    Value *Res = simplifyInstruction(I, I->getModule()->getDataLayout());
1348    if (Res && LI->replacementPreservesLCSSAForm(I, Res)) {
1349      for (User *U : I->users())
1350        Worklist.push_back(cast<Instruction>(U));
1351      I->replaceAllUsesWith(Res);
1352      DeadInsts.emplace_back(I);
1353    }
1354  }
1355}
1356
1357static Value *
1358createInvariantCond(const Loop *L, BasicBlock *ExitingBB,
1359                    const ScalarEvolution::LoopInvariantPredicate &LIP,
1360                    SCEVExpander &Rewriter) {
1361  ICmpInst::Predicate InvariantPred = LIP.Pred;
1362  BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
1363  Rewriter.setInsertPoint(BI);
1364  auto *LHSV = Rewriter.expandCodeFor(LIP.LHS);
1365  auto *RHSV = Rewriter.expandCodeFor(LIP.RHS);
1366  bool ExitIfTrue = !L->contains(*succ_begin(ExitingBB));
1367  if (ExitIfTrue)
1368    InvariantPred = ICmpInst::getInversePredicate(InvariantPred);
1369  IRBuilder<> Builder(BI);
1370  return Builder.CreateICmp(InvariantPred, LHSV, RHSV,
1371                            BI->getCondition()->getName());
1372}
1373
1374static std::optional<Value *>
1375createReplacement(ICmpInst *ICmp, const Loop *L, BasicBlock *ExitingBB,
1376                  const SCEV *MaxIter, bool Inverted, bool SkipLastIter,
1377                  ScalarEvolution *SE, SCEVExpander &Rewriter) {
1378  ICmpInst::Predicate Pred = ICmp->getPredicate();
1379  Value *LHS = ICmp->getOperand(0);
1380  Value *RHS = ICmp->getOperand(1);
1381
1382  // 'LHS pred RHS' should now mean that we stay in loop.
1383  auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1384  if (Inverted)
1385    Pred = CmpInst::getInversePredicate(Pred);
1386
1387  const SCEV *LHSS = SE->getSCEVAtScope(LHS, L);
1388  const SCEV *RHSS = SE->getSCEVAtScope(RHS, L);
1389  // Can we prove it to be trivially true or false?
1390  if (auto EV = SE->evaluatePredicateAt(Pred, LHSS, RHSS, BI))
1391    return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ !*EV);
1392
1393  auto *ARTy = LHSS->getType();
1394  auto *MaxIterTy = MaxIter->getType();
1395  // If possible, adjust types.
1396  if (SE->getTypeSizeInBits(ARTy) > SE->getTypeSizeInBits(MaxIterTy))
1397    MaxIter = SE->getZeroExtendExpr(MaxIter, ARTy);
1398  else if (SE->getTypeSizeInBits(ARTy) < SE->getTypeSizeInBits(MaxIterTy)) {
1399    const SCEV *MinusOne = SE->getMinusOne(ARTy);
1400    auto *MaxAllowedIter = SE->getZeroExtendExpr(MinusOne, MaxIterTy);
1401    if (SE->isKnownPredicateAt(ICmpInst::ICMP_ULE, MaxIter, MaxAllowedIter, BI))
1402      MaxIter = SE->getTruncateExpr(MaxIter, ARTy);
1403  }
1404
1405  if (SkipLastIter) {
1406    // Semantically skip last iter is "subtract 1, do not bother about unsigned
1407    // wrap". getLoopInvariantExitCondDuringFirstIterations knows how to deal
1408    // with umin in a smart way, but umin(a, b) - 1 will likely not simplify.
1409    // So we manually construct umin(a - 1, b - 1).
1410    SmallVector<const SCEV *, 4> Elements;
1411    if (auto *UMin = dyn_cast<SCEVUMinExpr>(MaxIter)) {
1412      for (auto *Op : UMin->operands())
1413        Elements.push_back(SE->getMinusSCEV(Op, SE->getOne(Op->getType())));
1414      MaxIter = SE->getUMinFromMismatchedTypes(Elements);
1415    } else
1416      MaxIter = SE->getMinusSCEV(MaxIter, SE->getOne(MaxIter->getType()));
1417  }
1418
1419  // Check if there is a loop-invariant predicate equivalent to our check.
1420  auto LIP = SE->getLoopInvariantExitCondDuringFirstIterations(Pred, LHSS, RHSS,
1421                                                               L, BI, MaxIter);
1422  if (!LIP)
1423    return std::nullopt;
1424
1425  // Can we prove it to be trivially true?
1426  if (SE->isKnownPredicateAt(LIP->Pred, LIP->LHS, LIP->RHS, BI))
1427    return createFoldedExitCond(L, ExitingBB, /*IsTaken*/ false);
1428  else
1429    return createInvariantCond(L, ExitingBB, *LIP, Rewriter);
1430}
1431
1432static bool optimizeLoopExitWithUnknownExitCount(
1433    const Loop *L, BranchInst *BI, BasicBlock *ExitingBB, const SCEV *MaxIter,
1434    bool SkipLastIter, ScalarEvolution *SE, SCEVExpander &Rewriter,
1435    SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
1436  assert(
1437      (L->contains(BI->getSuccessor(0)) != L->contains(BI->getSuccessor(1))) &&
1438      "Not a loop exit!");
1439
1440  // For branch that stays in loop by TRUE condition, go through AND. For branch
1441  // that stays in loop by FALSE condition, go through OR. Both gives the
1442  // similar logic: "stay in loop iff all conditions are true(false)".
1443  bool Inverted = L->contains(BI->getSuccessor(1));
1444  SmallVector<ICmpInst *, 4> LeafConditions;
1445  SmallVector<Value *, 4> Worklist;
1446  SmallPtrSet<Value *, 4> Visited;
1447  Value *OldCond = BI->getCondition();
1448  Visited.insert(OldCond);
1449  Worklist.push_back(OldCond);
1450
1451  auto GoThrough = [&](Value *V) {
1452    Value *LHS = nullptr, *RHS = nullptr;
1453    if (Inverted) {
1454      if (!match(V, m_LogicalOr(m_Value(LHS), m_Value(RHS))))
1455        return false;
1456    } else {
1457      if (!match(V, m_LogicalAnd(m_Value(LHS), m_Value(RHS))))
1458        return false;
1459    }
1460    if (Visited.insert(LHS).second)
1461      Worklist.push_back(LHS);
1462    if (Visited.insert(RHS).second)
1463      Worklist.push_back(RHS);
1464    return true;
1465  };
1466
1467  do {
1468    Value *Curr = Worklist.pop_back_val();
1469    // Go through AND/OR conditions. Collect leaf ICMPs. We only care about
1470    // those with one use, to avoid instruction duplication.
1471    if (Curr->hasOneUse())
1472      if (!GoThrough(Curr))
1473        if (auto *ICmp = dyn_cast<ICmpInst>(Curr))
1474          LeafConditions.push_back(ICmp);
1475  } while (!Worklist.empty());
1476
1477  // If the current basic block has the same exit count as the whole loop, and
1478  // it consists of multiple icmp's, try to collect all icmp's that give exact
1479  // same exit count. For all other icmp's, we could use one less iteration,
1480  // because their value on the last iteration doesn't really matter.
1481  SmallPtrSet<ICmpInst *, 4> ICmpsFailingOnLastIter;
1482  if (!SkipLastIter && LeafConditions.size() > 1 &&
1483      SE->getExitCount(L, ExitingBB,
1484                       ScalarEvolution::ExitCountKind::SymbolicMaximum) ==
1485          MaxIter)
1486    for (auto *ICmp : LeafConditions) {
1487      auto EL = SE->computeExitLimitFromCond(L, ICmp, Inverted,
1488                                             /*ControlsExit*/ false);
1489      auto *ExitMax = EL.SymbolicMaxNotTaken;
1490      if (isa<SCEVCouldNotCompute>(ExitMax))
1491        continue;
1492      // They could be of different types (specifically this happens after
1493      // IV widening).
1494      auto *WiderType =
1495          SE->getWiderType(ExitMax->getType(), MaxIter->getType());
1496      auto *WideExitMax = SE->getNoopOrZeroExtend(ExitMax, WiderType);
1497      auto *WideMaxIter = SE->getNoopOrZeroExtend(MaxIter, WiderType);
1498      if (WideExitMax == WideMaxIter)
1499        ICmpsFailingOnLastIter.insert(ICmp);
1500    }
1501
1502  bool Changed = false;
1503  for (auto *OldCond : LeafConditions) {
1504    // Skip last iteration for this icmp under one of two conditions:
1505    // - We do it for all conditions;
1506    // - There is another ICmp that would fail on last iter, so this one doesn't
1507    // really matter.
1508    bool OptimisticSkipLastIter = SkipLastIter;
1509    if (!OptimisticSkipLastIter) {
1510      if (ICmpsFailingOnLastIter.size() > 1)
1511        OptimisticSkipLastIter = true;
1512      else if (ICmpsFailingOnLastIter.size() == 1)
1513        OptimisticSkipLastIter = !ICmpsFailingOnLastIter.count(OldCond);
1514    }
1515    if (auto Replaced =
1516            createReplacement(OldCond, L, ExitingBB, MaxIter, Inverted,
1517                              OptimisticSkipLastIter, SE, Rewriter)) {
1518      Changed = true;
1519      auto *NewCond = *Replaced;
1520      if (auto *NCI = dyn_cast<Instruction>(NewCond)) {
1521        NCI->setName(OldCond->getName() + ".first_iter");
1522        NCI->moveBefore(cast<Instruction>(OldCond));
1523      }
1524      LLVM_DEBUG(dbgs() << "Unknown exit count: Replacing " << *OldCond
1525                        << " with " << *NewCond << "\n");
1526      assert(OldCond->hasOneUse() && "Must be!");
1527      OldCond->replaceAllUsesWith(NewCond);
1528      DeadInsts.push_back(OldCond);
1529      // Make sure we no longer consider this condition as failing on last
1530      // iteration.
1531      ICmpsFailingOnLastIter.erase(OldCond);
1532    }
1533  }
1534  return Changed;
1535}
1536
1537bool IndVarSimplify::canonicalizeExitCondition(Loop *L) {
1538  // Note: This is duplicating a particular part on SimplifyIndVars reasoning.
1539  // We need to duplicate it because given icmp zext(small-iv), C, IVUsers
1540  // never reaches the icmp since the zext doesn't fold to an AddRec unless
1541  // it already has flags.  The alternative to this would be to extending the
1542  // set of "interesting" IV users to include the icmp, but doing that
1543  // regresses results in practice by querying SCEVs before trip counts which
1544  // rely on them which results in SCEV caching sub-optimal answers.  The
1545  // concern about caching sub-optimal results is why we only query SCEVs of
1546  // the loop invariant RHS here.
1547  SmallVector<BasicBlock*, 16> ExitingBlocks;
1548  L->getExitingBlocks(ExitingBlocks);
1549  bool Changed = false;
1550  for (auto *ExitingBB : ExitingBlocks) {
1551    auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1552    if (!BI)
1553      continue;
1554    assert(BI->isConditional() && "exit branch must be conditional");
1555
1556    auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1557    if (!ICmp || !ICmp->hasOneUse())
1558      continue;
1559
1560    auto *LHS = ICmp->getOperand(0);
1561    auto *RHS = ICmp->getOperand(1);
1562    // For the range reasoning, avoid computing SCEVs in the loop to avoid
1563    // poisoning cache with sub-optimal results.  For the must-execute case,
1564    // this is a neccessary precondition for correctness.
1565    if (!L->isLoopInvariant(RHS)) {
1566      if (!L->isLoopInvariant(LHS))
1567        continue;
1568      // Same logic applies for the inverse case
1569      std::swap(LHS, RHS);
1570    }
1571
1572    // Match (icmp signed-cond zext, RHS)
1573    Value *LHSOp = nullptr;
1574    if (!match(LHS, m_ZExt(m_Value(LHSOp))) || !ICmp->isSigned())
1575      continue;
1576
1577    const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1578    const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1579    const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1580    auto FullCR = ConstantRange::getFull(InnerBitWidth);
1581    FullCR = FullCR.zeroExtend(OuterBitWidth);
1582    auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1583    if (FullCR.contains(RHSCR)) {
1584      // We have now matched icmp signed-cond zext(X), zext(Y'), and can thus
1585      // replace the signed condition with the unsigned version.
1586      ICmp->setPredicate(ICmp->getUnsignedPredicate());
1587      Changed = true;
1588      // Note: No SCEV invalidation needed.  We've changed the predicate, but
1589      // have not changed exit counts, or the values produced by the compare.
1590      continue;
1591    }
1592  }
1593
1594  // Now that we've canonicalized the condition to match the extend,
1595  // see if we can rotate the extend out of the loop.
1596  for (auto *ExitingBB : ExitingBlocks) {
1597    auto *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1598    if (!BI)
1599      continue;
1600    assert(BI->isConditional() && "exit branch must be conditional");
1601
1602    auto *ICmp = dyn_cast<ICmpInst>(BI->getCondition());
1603    if (!ICmp || !ICmp->hasOneUse() || !ICmp->isUnsigned())
1604      continue;
1605
1606    bool Swapped = false;
1607    auto *LHS = ICmp->getOperand(0);
1608    auto *RHS = ICmp->getOperand(1);
1609    if (L->isLoopInvariant(LHS) == L->isLoopInvariant(RHS))
1610      // Nothing to rotate
1611      continue;
1612    if (L->isLoopInvariant(LHS)) {
1613      // Same logic applies for the inverse case until we actually pick
1614      // which operand of the compare to update.
1615      Swapped = true;
1616      std::swap(LHS, RHS);
1617    }
1618    assert(!L->isLoopInvariant(LHS) && L->isLoopInvariant(RHS));
1619
1620    // Match (icmp unsigned-cond zext, RHS)
1621    // TODO: Extend to handle corresponding sext/signed-cmp case
1622    // TODO: Extend to other invertible functions
1623    Value *LHSOp = nullptr;
1624    if (!match(LHS, m_ZExt(m_Value(LHSOp))))
1625      continue;
1626
1627    // In general, we only rotate if we can do so without increasing the number
1628    // of instructions.  The exception is when we have an zext(add-rec).  The
1629    // reason for allowing this exception is that we know we need to get rid
1630    // of the zext for SCEV to be able to compute a trip count for said loops;
1631    // we consider the new trip count valuable enough to increase instruction
1632    // count by one.
1633    if (!LHS->hasOneUse() && !isa<SCEVAddRecExpr>(SE->getSCEV(LHSOp)))
1634      continue;
1635
1636    // Given a icmp unsigned-cond zext(Op) where zext(trunc(RHS)) == RHS
1637    // replace with an icmp of the form icmp unsigned-cond Op, trunc(RHS)
1638    // when zext is loop varying and RHS is loop invariant.  This converts
1639    // loop varying work to loop-invariant work.
1640    auto doRotateTransform = [&]() {
1641      assert(ICmp->isUnsigned() && "must have proven unsigned already");
1642      auto *NewRHS =
1643        CastInst::Create(Instruction::Trunc, RHS, LHSOp->getType(), "",
1644                         L->getLoopPreheader()->getTerminator());
1645      ICmp->setOperand(Swapped ? 1 : 0, LHSOp);
1646      ICmp->setOperand(Swapped ? 0 : 1, NewRHS);
1647      if (LHS->use_empty())
1648        DeadInsts.push_back(LHS);
1649    };
1650
1651
1652    const DataLayout &DL = ExitingBB->getModule()->getDataLayout();
1653    const unsigned InnerBitWidth = DL.getTypeSizeInBits(LHSOp->getType());
1654    const unsigned OuterBitWidth = DL.getTypeSizeInBits(RHS->getType());
1655    auto FullCR = ConstantRange::getFull(InnerBitWidth);
1656    FullCR = FullCR.zeroExtend(OuterBitWidth);
1657    auto RHSCR = SE->getUnsignedRange(SE->applyLoopGuards(SE->getSCEV(RHS), L));
1658    if (FullCR.contains(RHSCR)) {
1659      doRotateTransform();
1660      Changed = true;
1661      // Note, we are leaving SCEV in an unfortunately imprecise case here
1662      // as rotation tends to reveal information about trip counts not
1663      // previously visible.
1664      continue;
1665    }
1666  }
1667
1668  return Changed;
1669}
1670
1671bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
1672  SmallVector<BasicBlock*, 16> ExitingBlocks;
1673  L->getExitingBlocks(ExitingBlocks);
1674
1675  // Remove all exits which aren't both rewriteable and execute on every
1676  // iteration.
1677  llvm::erase_if(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1678    // If our exitting block exits multiple loops, we can only rewrite the
1679    // innermost one.  Otherwise, we're changing how many times the innermost
1680    // loop runs before it exits.
1681    if (LI->getLoopFor(ExitingBB) != L)
1682      return true;
1683
1684    // Can't rewrite non-branch yet.
1685    BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1686    if (!BI)
1687      return true;
1688
1689    // Likewise, the loop latch must be dominated by the exiting BB.
1690    if (!DT->dominates(ExitingBB, L->getLoopLatch()))
1691      return true;
1692
1693    if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) {
1694      // If already constant, nothing to do. However, if this is an
1695      // unconditional exit, we can still replace header phis with their
1696      // preheader value.
1697      if (!L->contains(BI->getSuccessor(CI->isNullValue())))
1698        replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1699      return true;
1700    }
1701
1702    return false;
1703  });
1704
1705  if (ExitingBlocks.empty())
1706    return false;
1707
1708  // Get a symbolic upper bound on the loop backedge taken count.
1709  const SCEV *MaxBECount = SE->getSymbolicMaxBackedgeTakenCount(L);
1710  if (isa<SCEVCouldNotCompute>(MaxBECount))
1711    return false;
1712
1713  // Visit our exit blocks in order of dominance. We know from the fact that
1714  // all exits must dominate the latch, so there is a total dominance order
1715  // between them.
1716  llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
1717               // std::sort sorts in ascending order, so we want the inverse of
1718               // the normal dominance relation.
1719               if (A == B) return false;
1720               if (DT->properlyDominates(A, B))
1721                 return true;
1722               else {
1723                 assert(DT->properlyDominates(B, A) &&
1724                        "expected total dominance order!");
1725                 return false;
1726               }
1727  });
1728#ifdef ASSERT
1729  for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
1730    assert(DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]));
1731  }
1732#endif
1733
1734  bool Changed = false;
1735  bool SkipLastIter = false;
1736  const SCEV *CurrMaxExit = SE->getCouldNotCompute();
1737  auto UpdateSkipLastIter = [&](const SCEV *MaxExitCount) {
1738    if (SkipLastIter || isa<SCEVCouldNotCompute>(MaxExitCount))
1739      return;
1740    if (isa<SCEVCouldNotCompute>(CurrMaxExit))
1741      CurrMaxExit = MaxExitCount;
1742    else
1743      CurrMaxExit = SE->getUMinFromMismatchedTypes(CurrMaxExit, MaxExitCount);
1744    // If the loop has more than 1 iteration, all further checks will be
1745    // executed 1 iteration less.
1746    if (CurrMaxExit == MaxBECount)
1747      SkipLastIter = true;
1748  };
1749  SmallSet<const SCEV *, 8> DominatingExactExitCounts;
1750  for (BasicBlock *ExitingBB : ExitingBlocks) {
1751    const SCEV *ExactExitCount = SE->getExitCount(L, ExitingBB);
1752    const SCEV *MaxExitCount = SE->getExitCount(
1753        L, ExitingBB, ScalarEvolution::ExitCountKind::SymbolicMaximum);
1754    if (isa<SCEVCouldNotCompute>(ExactExitCount)) {
1755      // Okay, we do not know the exit count here. Can we at least prove that it
1756      // will remain the same within iteration space?
1757      auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1758      auto OptimizeCond = [&](bool SkipLastIter) {
1759        return optimizeLoopExitWithUnknownExitCount(L, BI, ExitingBB,
1760                                                    MaxBECount, SkipLastIter,
1761                                                    SE, Rewriter, DeadInsts);
1762      };
1763
1764      // TODO: We might have proved that we can skip the last iteration for
1765      // this check. In this case, we only want to check the condition on the
1766      // pre-last iteration (MaxBECount - 1). However, there is a nasty
1767      // corner case:
1768      //
1769      //   for (i = len; i != 0; i--) { ... check (i ult X) ... }
1770      //
1771      // If we could not prove that len != 0, then we also could not prove that
1772      // (len - 1) is not a UINT_MAX. If we simply query (len - 1), then
1773      // OptimizeCond will likely not prove anything for it, even if it could
1774      // prove the same fact for len.
1775      //
1776      // As a temporary solution, we query both last and pre-last iterations in
1777      // hope that we will be able to prove triviality for at least one of
1778      // them. We can stop querying MaxBECount for this case once SCEV
1779      // understands that (MaxBECount - 1) will not overflow here.
1780      if (OptimizeCond(false))
1781        Changed = true;
1782      else if (SkipLastIter && OptimizeCond(true))
1783        Changed = true;
1784      UpdateSkipLastIter(MaxExitCount);
1785      continue;
1786    }
1787
1788    UpdateSkipLastIter(ExactExitCount);
1789
1790    // If we know we'd exit on the first iteration, rewrite the exit to
1791    // reflect this.  This does not imply the loop must exit through this
1792    // exit; there may be an earlier one taken on the first iteration.
1793    // We know that the backedge can't be taken, so we replace all
1794    // the header PHIs with values coming from the preheader.
1795    if (ExactExitCount->isZero()) {
1796      foldExit(L, ExitingBB, true, DeadInsts);
1797      replaceLoopPHINodesWithPreheaderValues(LI, L, DeadInsts, *SE);
1798      Changed = true;
1799      continue;
1800    }
1801
1802    assert(ExactExitCount->getType()->isIntegerTy() &&
1803           MaxBECount->getType()->isIntegerTy() &&
1804           "Exit counts must be integers");
1805
1806    Type *WiderType =
1807        SE->getWiderType(MaxBECount->getType(), ExactExitCount->getType());
1808    ExactExitCount = SE->getNoopOrZeroExtend(ExactExitCount, WiderType);
1809    MaxBECount = SE->getNoopOrZeroExtend(MaxBECount, WiderType);
1810    assert(MaxBECount->getType() == ExactExitCount->getType());
1811
1812    // Can we prove that some other exit must be taken strictly before this
1813    // one?
1814    if (SE->isLoopEntryGuardedByCond(L, CmpInst::ICMP_ULT, MaxBECount,
1815                                     ExactExitCount)) {
1816      foldExit(L, ExitingBB, false, DeadInsts);
1817      Changed = true;
1818      continue;
1819    }
1820
1821    // As we run, keep track of which exit counts we've encountered.  If we
1822    // find a duplicate, we've found an exit which would have exited on the
1823    // exiting iteration, but (from the visit order) strictly follows another
1824    // which does the same and is thus dead.
1825    if (!DominatingExactExitCounts.insert(ExactExitCount).second) {
1826      foldExit(L, ExitingBB, false, DeadInsts);
1827      Changed = true;
1828      continue;
1829    }
1830
1831    // TODO: There might be another oppurtunity to leverage SCEV's reasoning
1832    // here.  If we kept track of the min of dominanting exits so far, we could
1833    // discharge exits with EC >= MDEC. This is less powerful than the existing
1834    // transform (since later exits aren't considered), but potentially more
1835    // powerful for any case where SCEV can prove a >=u b, but neither a == b
1836    // or a >u b.  Such a case is not currently known.
1837  }
1838  return Changed;
1839}
1840
1841bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
1842  SmallVector<BasicBlock*, 16> ExitingBlocks;
1843  L->getExitingBlocks(ExitingBlocks);
1844
1845  // Finally, see if we can rewrite our exit conditions into a loop invariant
1846  // form. If we have a read-only loop, and we can tell that we must exit down
1847  // a path which does not need any of the values computed within the loop, we
1848  // can rewrite the loop to exit on the first iteration.  Note that this
1849  // doesn't either a) tell us the loop exits on the first iteration (unless
1850  // *all* exits are predicateable) or b) tell us *which* exit might be taken.
1851  // This transformation looks a lot like a restricted form of dead loop
1852  // elimination, but restricted to read-only loops and without neccesssarily
1853  // needing to kill the loop entirely.
1854  if (!LoopPredication)
1855    return false;
1856
1857  // Note: ExactBTC is the exact backedge taken count *iff* the loop exits
1858  // through *explicit* control flow.  We have to eliminate the possibility of
1859  // implicit exits (see below) before we know it's truly exact.
1860  const SCEV *ExactBTC = SE->getBackedgeTakenCount(L);
1861  if (isa<SCEVCouldNotCompute>(ExactBTC) || !Rewriter.isSafeToExpand(ExactBTC))
1862    return false;
1863
1864  assert(SE->isLoopInvariant(ExactBTC, L) && "BTC must be loop invariant");
1865  assert(ExactBTC->getType()->isIntegerTy() && "BTC must be integer");
1866
1867  auto BadExit = [&](BasicBlock *ExitingBB) {
1868    // If our exiting block exits multiple loops, we can only rewrite the
1869    // innermost one.  Otherwise, we're changing how many times the innermost
1870    // loop runs before it exits.
1871    if (LI->getLoopFor(ExitingBB) != L)
1872      return true;
1873
1874    // Can't rewrite non-branch yet.
1875    BranchInst *BI = dyn_cast<BranchInst>(ExitingBB->getTerminator());
1876    if (!BI)
1877      return true;
1878
1879    // If already constant, nothing to do.
1880    if (isa<Constant>(BI->getCondition()))
1881      return true;
1882
1883    // If the exit block has phis, we need to be able to compute the values
1884    // within the loop which contains them.  This assumes trivially lcssa phis
1885    // have already been removed; TODO: generalize
1886    BasicBlock *ExitBlock =
1887    BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
1888    if (!ExitBlock->phis().empty())
1889      return true;
1890
1891    const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1892    if (isa<SCEVCouldNotCompute>(ExitCount) ||
1893        !Rewriter.isSafeToExpand(ExitCount))
1894      return true;
1895
1896    assert(SE->isLoopInvariant(ExitCount, L) &&
1897           "Exit count must be loop invariant");
1898    assert(ExitCount->getType()->isIntegerTy() && "Exit count must be integer");
1899    return false;
1900  };
1901
1902  // If we have any exits which can't be predicated themselves, than we can't
1903  // predicate any exit which isn't guaranteed to execute before it.  Consider
1904  // two exits (a) and (b) which would both exit on the same iteration.  If we
1905  // can predicate (b), but not (a), and (a) preceeds (b) along some path, then
1906  // we could convert a loop from exiting through (a) to one exiting through
1907  // (b).  Note that this problem exists only for exits with the same exit
1908  // count, and we could be more aggressive when exit counts are known inequal.
1909  llvm::sort(ExitingBlocks,
1910            [&](BasicBlock *A, BasicBlock *B) {
1911              // std::sort sorts in ascending order, so we want the inverse of
1912              // the normal dominance relation, plus a tie breaker for blocks
1913              // unordered by dominance.
1914              if (DT->properlyDominates(A, B)) return true;
1915              if (DT->properlyDominates(B, A)) return false;
1916              return A->getName() < B->getName();
1917            });
1918  // Check to see if our exit blocks are a total order (i.e. a linear chain of
1919  // exits before the backedge).  If they aren't, reasoning about reachability
1920  // is complicated and we choose not to for now.
1921  for (unsigned i = 1; i < ExitingBlocks.size(); i++)
1922    if (!DT->dominates(ExitingBlocks[i-1], ExitingBlocks[i]))
1923      return false;
1924
1925  // Given our sorted total order, we know that exit[j] must be evaluated
1926  // after all exit[i] such j > i.
1927  for (unsigned i = 0, e = ExitingBlocks.size(); i < e; i++)
1928    if (BadExit(ExitingBlocks[i])) {
1929      ExitingBlocks.resize(i);
1930      break;
1931    }
1932
1933  if (ExitingBlocks.empty())
1934    return false;
1935
1936  // We rely on not being able to reach an exiting block on a later iteration
1937  // then it's statically compute exit count.  The implementaton of
1938  // getExitCount currently has this invariant, but assert it here so that
1939  // breakage is obvious if this ever changes..
1940  assert(llvm::all_of(ExitingBlocks, [&](BasicBlock *ExitingBB) {
1941        return DT->dominates(ExitingBB, L->getLoopLatch());
1942      }));
1943
1944  // At this point, ExitingBlocks consists of only those blocks which are
1945  // predicatable.  Given that, we know we have at least one exit we can
1946  // predicate if the loop is doesn't have side effects and doesn't have any
1947  // implicit exits (because then our exact BTC isn't actually exact).
1948  // @Reviewers - As structured, this is O(I^2) for loop nests.  Any
1949  // suggestions on how to improve this?  I can obviously bail out for outer
1950  // loops, but that seems less than ideal.  MemorySSA can find memory writes,
1951  // is that enough for *all* side effects?
1952  for (BasicBlock *BB : L->blocks())
1953    for (auto &I : *BB)
1954      // TODO:isGuaranteedToTransfer
1955      if (I.mayHaveSideEffects())
1956        return false;
1957
1958  bool Changed = false;
1959  // Finally, do the actual predication for all predicatable blocks.  A couple
1960  // of notes here:
1961  // 1) We don't bother to constant fold dominated exits with identical exit
1962  //    counts; that's simply a form of CSE/equality propagation and we leave
1963  //    it for dedicated passes.
1964  // 2) We insert the comparison at the branch.  Hoisting introduces additional
1965  //    legality constraints and we leave that to dedicated logic.  We want to
1966  //    predicate even if we can't insert a loop invariant expression as
1967  //    peeling or unrolling will likely reduce the cost of the otherwise loop
1968  //    varying check.
1969  Rewriter.setInsertPoint(L->getLoopPreheader()->getTerminator());
1970  IRBuilder<> B(L->getLoopPreheader()->getTerminator());
1971  Value *ExactBTCV = nullptr; // Lazily generated if needed.
1972  for (BasicBlock *ExitingBB : ExitingBlocks) {
1973    const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
1974
1975    auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
1976    Value *NewCond;
1977    if (ExitCount == ExactBTC) {
1978      NewCond = L->contains(BI->getSuccessor(0)) ?
1979        B.getFalse() : B.getTrue();
1980    } else {
1981      Value *ECV = Rewriter.expandCodeFor(ExitCount);
1982      if (!ExactBTCV)
1983        ExactBTCV = Rewriter.expandCodeFor(ExactBTC);
1984      Value *RHS = ExactBTCV;
1985      if (ECV->getType() != RHS->getType()) {
1986        Type *WiderTy = SE->getWiderType(ECV->getType(), RHS->getType());
1987        ECV = B.CreateZExt(ECV, WiderTy);
1988        RHS = B.CreateZExt(RHS, WiderTy);
1989      }
1990      auto Pred = L->contains(BI->getSuccessor(0)) ?
1991        ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
1992      NewCond = B.CreateICmp(Pred, ECV, RHS);
1993    }
1994    Value *OldCond = BI->getCondition();
1995    BI->setCondition(NewCond);
1996    if (OldCond->use_empty())
1997      DeadInsts.emplace_back(OldCond);
1998    Changed = true;
1999  }
2000
2001  return Changed;
2002}
2003
2004//===----------------------------------------------------------------------===//
2005//  IndVarSimplify driver. Manage several subpasses of IV simplification.
2006//===----------------------------------------------------------------------===//
2007
2008bool IndVarSimplify::run(Loop *L) {
2009  // We need (and expect!) the incoming loop to be in LCSSA.
2010  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2011         "LCSSA required to run indvars!");
2012
2013  // If LoopSimplify form is not available, stay out of trouble. Some notes:
2014  //  - LSR currently only supports LoopSimplify-form loops. Indvars'
2015  //    canonicalization can be a pessimization without LSR to "clean up"
2016  //    afterwards.
2017  //  - We depend on having a preheader; in particular,
2018  //    Loop::getCanonicalInductionVariable only supports loops with preheaders,
2019  //    and we're in trouble if we can't find the induction variable even when
2020  //    we've manually inserted one.
2021  //  - LFTR relies on having a single backedge.
2022  if (!L->isLoopSimplifyForm())
2023    return false;
2024
2025#ifndef NDEBUG
2026  // Used below for a consistency check only
2027  // Note: Since the result returned by ScalarEvolution may depend on the order
2028  // in which previous results are added to its cache, the call to
2029  // getBackedgeTakenCount() may change following SCEV queries.
2030  const SCEV *BackedgeTakenCount;
2031  if (VerifyIndvars)
2032    BackedgeTakenCount = SE->getBackedgeTakenCount(L);
2033#endif
2034
2035  bool Changed = false;
2036  // If there are any floating-point recurrences, attempt to
2037  // transform them to use integer recurrences.
2038  Changed |= rewriteNonIntegerIVs(L);
2039
2040  // Create a rewriter object which we'll use to transform the code with.
2041  SCEVExpander Rewriter(*SE, DL, "indvars");
2042#ifndef NDEBUG
2043  Rewriter.setDebugType(DEBUG_TYPE);
2044#endif
2045
2046  // Eliminate redundant IV users.
2047  //
2048  // Simplification works best when run before other consumers of SCEV. We
2049  // attempt to avoid evaluating SCEVs for sign/zero extend operations until
2050  // other expressions involving loop IVs have been evaluated. This helps SCEV
2051  // set no-wrap flags before normalizing sign/zero extension.
2052  Rewriter.disableCanonicalMode();
2053  Changed |= simplifyAndExtend(L, Rewriter, LI);
2054
2055  // Check to see if we can compute the final value of any expressions
2056  // that are recurrent in the loop, and substitute the exit values from the
2057  // loop into any instructions outside of the loop that use the final values
2058  // of the current expressions.
2059  if (ReplaceExitValue != NeverRepl) {
2060    if (int Rewrites = rewriteLoopExitValues(L, LI, TLI, SE, TTI, Rewriter, DT,
2061                                             ReplaceExitValue, DeadInsts)) {
2062      NumReplaced += Rewrites;
2063      Changed = true;
2064    }
2065  }
2066
2067  // Eliminate redundant IV cycles.
2068  NumElimIV += Rewriter.replaceCongruentIVs(L, DT, DeadInsts, TTI);
2069
2070  // Try to convert exit conditions to unsigned and rotate computation
2071  // out of the loop.  Note: Handles invalidation internally if needed.
2072  Changed |= canonicalizeExitCondition(L);
2073
2074  // Try to eliminate loop exits based on analyzeable exit counts
2075  if (optimizeLoopExits(L, Rewriter))  {
2076    Changed = true;
2077    // Given we've changed exit counts, notify SCEV
2078    // Some nested loops may share same folded exit basic block,
2079    // thus we need to notify top most loop.
2080    SE->forgetTopmostLoop(L);
2081  }
2082
2083  // Try to form loop invariant tests for loop exits by changing how many
2084  // iterations of the loop run when that is unobservable.
2085  if (predicateLoopExits(L, Rewriter)) {
2086    Changed = true;
2087    // Given we've changed exit counts, notify SCEV
2088    SE->forgetLoop(L);
2089  }
2090
2091  // If we have a trip count expression, rewrite the loop's exit condition
2092  // using it.
2093  if (!DisableLFTR) {
2094    BasicBlock *PreHeader = L->getLoopPreheader();
2095
2096    SmallVector<BasicBlock*, 16> ExitingBlocks;
2097    L->getExitingBlocks(ExitingBlocks);
2098    for (BasicBlock *ExitingBB : ExitingBlocks) {
2099      // Can't rewrite non-branch yet.
2100      if (!isa<BranchInst>(ExitingBB->getTerminator()))
2101        continue;
2102
2103      // If our exitting block exits multiple loops, we can only rewrite the
2104      // innermost one.  Otherwise, we're changing how many times the innermost
2105      // loop runs before it exits.
2106      if (LI->getLoopFor(ExitingBB) != L)
2107        continue;
2108
2109      if (!needsLFTR(L, ExitingBB))
2110        continue;
2111
2112      const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
2113      if (isa<SCEVCouldNotCompute>(ExitCount))
2114        continue;
2115
2116      // This was handled above, but as we form SCEVs, we can sometimes refine
2117      // existing ones; this allows exit counts to be folded to zero which
2118      // weren't when optimizeLoopExits saw them.  Arguably, we should iterate
2119      // until stable to handle cases like this better.
2120      if (ExitCount->isZero())
2121        continue;
2122
2123      PHINode *IndVar = FindLoopCounter(L, ExitingBB, ExitCount, SE, DT);
2124      if (!IndVar)
2125        continue;
2126
2127      // Avoid high cost expansions.  Note: This heuristic is questionable in
2128      // that our definition of "high cost" is not exactly principled.
2129      if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
2130                                       TTI, PreHeader->getTerminator()))
2131        continue;
2132
2133      // Check preconditions for proper SCEVExpander operation. SCEV does not
2134      // express SCEVExpander's dependencies, such as LoopSimplify. Instead
2135      // any pass that uses the SCEVExpander must do it. This does not work
2136      // well for loop passes because SCEVExpander makes assumptions about
2137      // all loops, while LoopPassManager only forces the current loop to be
2138      // simplified.
2139      //
2140      // FIXME: SCEV expansion has no way to bail out, so the caller must
2141      // explicitly check any assumptions made by SCEV. Brittle.
2142      const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(ExitCount);
2143      if (!AR || AR->getLoop()->getLoopPreheader())
2144        Changed |= linearFunctionTestReplace(L, ExitingBB,
2145                                             ExitCount, IndVar,
2146                                             Rewriter);
2147    }
2148  }
2149  // Clear the rewriter cache, because values that are in the rewriter's cache
2150  // can be deleted in the loop below, causing the AssertingVH in the cache to
2151  // trigger.
2152  Rewriter.clear();
2153
2154  // Now that we're done iterating through lists, clean up any instructions
2155  // which are now dead.
2156  while (!DeadInsts.empty()) {
2157    Value *V = DeadInsts.pop_back_val();
2158
2159    if (PHINode *PHI = dyn_cast_or_null<PHINode>(V))
2160      Changed |= RecursivelyDeleteDeadPHINode(PHI, TLI, MSSAU.get());
2161    else if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
2162      Changed |=
2163          RecursivelyDeleteTriviallyDeadInstructions(Inst, TLI, MSSAU.get());
2164  }
2165
2166  // The Rewriter may not be used from this point on.
2167
2168  // Loop-invariant instructions in the preheader that aren't used in the
2169  // loop may be sunk below the loop to reduce register pressure.
2170  Changed |= sinkUnusedInvariants(L);
2171
2172  // rewriteFirstIterationLoopExitValues does not rely on the computation of
2173  // trip count and therefore can further simplify exit values in addition to
2174  // rewriteLoopExitValues.
2175  Changed |= rewriteFirstIterationLoopExitValues(L);
2176
2177  // Clean up dead instructions.
2178  Changed |= DeleteDeadPHIs(L->getHeader(), TLI, MSSAU.get());
2179
2180  // Check a post-condition.
2181  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
2182         "Indvars did not preserve LCSSA!");
2183
2184  // Verify that LFTR, and any other change have not interfered with SCEV's
2185  // ability to compute trip count.  We may have *changed* the exit count, but
2186  // only by reducing it.
2187#ifndef NDEBUG
2188  if (VerifyIndvars && !isa<SCEVCouldNotCompute>(BackedgeTakenCount)) {
2189    SE->forgetLoop(L);
2190    const SCEV *NewBECount = SE->getBackedgeTakenCount(L);
2191    if (SE->getTypeSizeInBits(BackedgeTakenCount->getType()) <
2192        SE->getTypeSizeInBits(NewBECount->getType()))
2193      NewBECount = SE->getTruncateOrNoop(NewBECount,
2194                                         BackedgeTakenCount->getType());
2195    else
2196      BackedgeTakenCount = SE->getTruncateOrNoop(BackedgeTakenCount,
2197                                                 NewBECount->getType());
2198    assert(!SE->isKnownPredicate(ICmpInst::ICMP_ULT, BackedgeTakenCount,
2199                                 NewBECount) && "indvars must preserve SCEV");
2200  }
2201  if (VerifyMemorySSA && MSSAU)
2202    MSSAU->getMemorySSA()->verifyMemorySSA();
2203#endif
2204
2205  return Changed;
2206}
2207
2208PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM,
2209                                          LoopStandardAnalysisResults &AR,
2210                                          LPMUpdater &) {
2211  Function *F = L.getHeader()->getParent();
2212  const DataLayout &DL = F->getParent()->getDataLayout();
2213
2214  IndVarSimplify IVS(&AR.LI, &AR.SE, &AR.DT, DL, &AR.TLI, &AR.TTI, AR.MSSA,
2215                     WidenIndVars && AllowIVWidening);
2216  if (!IVS.run(&L))
2217    return PreservedAnalyses::all();
2218
2219  auto PA = getLoopPassPreservedAnalyses();
2220  PA.preserveSet<CFGAnalyses>();
2221  if (AR.MSSA)
2222    PA.preserve<MemorySSAAnalysis>();
2223  return PA;
2224}
2225
2226namespace {
2227
2228struct IndVarSimplifyLegacyPass : public LoopPass {
2229  static char ID; // Pass identification, replacement for typeid
2230
2231  IndVarSimplifyLegacyPass() : LoopPass(ID) {
2232    initializeIndVarSimplifyLegacyPassPass(*PassRegistry::getPassRegistry());
2233  }
2234
2235  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
2236    if (skipLoop(L))
2237      return false;
2238
2239    auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2240    auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2241    auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2242    auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2243    auto *TLI = TLIP ? &TLIP->getTLI(*L->getHeader()->getParent()) : nullptr;
2244    auto *TTIP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
2245    auto *TTI = TTIP ? &TTIP->getTTI(*L->getHeader()->getParent()) : nullptr;
2246    const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
2247    auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
2248    MemorySSA *MSSA = nullptr;
2249    if (MSSAAnalysis)
2250      MSSA = &MSSAAnalysis->getMSSA();
2251
2252    IndVarSimplify IVS(LI, SE, DT, DL, TLI, TTI, MSSA, AllowIVWidening);
2253    return IVS.run(L);
2254  }
2255
2256  void getAnalysisUsage(AnalysisUsage &AU) const override {
2257    AU.setPreservesCFG();
2258    AU.addPreserved<MemorySSAWrapperPass>();
2259    getLoopAnalysisUsage(AU);
2260  }
2261};
2262
2263} // end anonymous namespace
2264
2265char IndVarSimplifyLegacyPass::ID = 0;
2266
2267INITIALIZE_PASS_BEGIN(IndVarSimplifyLegacyPass, "indvars",
2268                      "Induction Variable Simplification", false, false)
2269INITIALIZE_PASS_DEPENDENCY(LoopPass)
2270INITIALIZE_PASS_END(IndVarSimplifyLegacyPass, "indvars",
2271                    "Induction Variable Simplification", false, false)
2272
2273Pass *llvm::createIndVarSimplifyPass() {
2274  return new IndVarSimplifyLegacyPass();
2275}
2276