1283625Sdim//===-- InductiveRangeCheckElimination.cpp - ------------------------------===// 2283625Sdim// 3283625Sdim// The LLVM Compiler Infrastructure 4283625Sdim// 5283625Sdim// This file is distributed under the University of Illinois Open Source 6283625Sdim// License. See LICENSE.TXT for details. 7283625Sdim// 8283625Sdim//===----------------------------------------------------------------------===// 9283625Sdim// The InductiveRangeCheckElimination pass splits a loop's iteration space into 10283625Sdim// three disjoint ranges. It does that in a way such that the loop running in 11283625Sdim// the middle loop provably does not need range checks. As an example, it will 12283625Sdim// convert 13283625Sdim// 14283625Sdim// len = < known positive > 15283625Sdim// for (i = 0; i < n; i++) { 16283625Sdim// if (0 <= i && i < len) { 17283625Sdim// do_something(); 18283625Sdim// } else { 19283625Sdim// throw_out_of_bounds(); 20283625Sdim// } 21283625Sdim// } 22283625Sdim// 23283625Sdim// to 24283625Sdim// 25283625Sdim// len = < known positive > 26283625Sdim// limit = smin(n, len) 27283625Sdim// // no first segment 28283625Sdim// for (i = 0; i < limit; i++) { 29283625Sdim// if (0 <= i && i < len) { // this check is fully redundant 30283625Sdim// do_something(); 31283625Sdim// } else { 32283625Sdim// throw_out_of_bounds(); 33283625Sdim// } 34283625Sdim// } 35283625Sdim// for (i = limit; i < n; i++) { 36283625Sdim// if (0 <= i && i < len) { 37283625Sdim// do_something(); 38283625Sdim// } else { 39283625Sdim// throw_out_of_bounds(); 40283625Sdim// } 41283625Sdim// } 42283625Sdim//===----------------------------------------------------------------------===// 43283625Sdim 44283625Sdim#include "llvm/ADT/Optional.h" 45283625Sdim#include "llvm/Analysis/BranchProbabilityInfo.h" 46283625Sdim#include "llvm/Analysis/InstructionSimplify.h" 47283625Sdim#include "llvm/Analysis/LoopInfo.h" 48283625Sdim#include "llvm/Analysis/LoopPass.h" 49283625Sdim#include "llvm/Analysis/ScalarEvolution.h" 50283625Sdim#include "llvm/Analysis/ScalarEvolutionExpander.h" 51283625Sdim#include "llvm/Analysis/ScalarEvolutionExpressions.h" 52283625Sdim#include "llvm/Analysis/ValueTracking.h" 53283625Sdim#include "llvm/IR/Dominators.h" 54283625Sdim#include "llvm/IR/Function.h" 55283625Sdim#include "llvm/IR/IRBuilder.h" 56283625Sdim#include "llvm/IR/Instructions.h" 57283625Sdim#include "llvm/IR/Module.h" 58283625Sdim#include "llvm/IR/PatternMatch.h" 59283625Sdim#include "llvm/IR/ValueHandle.h" 60283625Sdim#include "llvm/IR/Verifier.h" 61283625Sdim#include "llvm/Pass.h" 62283625Sdim#include "llvm/Support/Debug.h" 63283625Sdim#include "llvm/Support/raw_ostream.h" 64283625Sdim#include "llvm/Transforms/Scalar.h" 65283625Sdim#include "llvm/Transforms/Utils/BasicBlockUtils.h" 66283625Sdim#include "llvm/Transforms/Utils/Cloning.h" 67283625Sdim#include "llvm/Transforms/Utils/LoopUtils.h" 68283625Sdim#include "llvm/Transforms/Utils/SimplifyIndVar.h" 69283625Sdim#include "llvm/Transforms/Utils/UnrollLoop.h" 70283625Sdim#include <array> 71283625Sdim 72283625Sdimusing namespace llvm; 73283625Sdim 74283625Sdimstatic cl::opt<unsigned> LoopSizeCutoff("irce-loop-size-cutoff", cl::Hidden, 75283625Sdim cl::init(64)); 76283625Sdim 77283625Sdimstatic cl::opt<bool> PrintChangedLoops("irce-print-changed-loops", cl::Hidden, 78283625Sdim cl::init(false)); 79283625Sdim 80283625Sdimstatic cl::opt<bool> PrintRangeChecks("irce-print-range-checks", cl::Hidden, 81283625Sdim cl::init(false)); 82283625Sdim 83283625Sdimstatic cl::opt<int> MaxExitProbReciprocal("irce-max-exit-prob-reciprocal", 84283625Sdim cl::Hidden, cl::init(10)); 85283625Sdim 86283625Sdim#define DEBUG_TYPE "irce" 87283625Sdim 88283625Sdimnamespace { 89283625Sdim 90283625Sdim/// An inductive range check is conditional branch in a loop with 91283625Sdim/// 92283625Sdim/// 1. a very cold successor (i.e. the branch jumps to that successor very 93283625Sdim/// rarely) 94283625Sdim/// 95283625Sdim/// and 96283625Sdim/// 97283625Sdim/// 2. a condition that is provably true for some contiguous range of values 98283625Sdim/// taken by the containing loop's induction variable. 99283625Sdim/// 100283625Sdimclass InductiveRangeCheck { 101283625Sdim // Classifies a range check 102283625Sdim enum RangeCheckKind : unsigned { 103283625Sdim // Range check of the form "0 <= I". 104283625Sdim RANGE_CHECK_LOWER = 1, 105283625Sdim 106283625Sdim // Range check of the form "I < L" where L is known positive. 107283625Sdim RANGE_CHECK_UPPER = 2, 108283625Sdim 109283625Sdim // The logical and of the RANGE_CHECK_LOWER and RANGE_CHECK_UPPER 110283625Sdim // conditions. 111283625Sdim RANGE_CHECK_BOTH = RANGE_CHECK_LOWER | RANGE_CHECK_UPPER, 112283625Sdim 113283625Sdim // Unrecognized range check condition. 114283625Sdim RANGE_CHECK_UNKNOWN = (unsigned)-1 115283625Sdim }; 116283625Sdim 117283625Sdim static const char *rangeCheckKindToStr(RangeCheckKind); 118283625Sdim 119283625Sdim const SCEV *Offset; 120283625Sdim const SCEV *Scale; 121283625Sdim Value *Length; 122283625Sdim BranchInst *Branch; 123283625Sdim RangeCheckKind Kind; 124283625Sdim 125283625Sdim static RangeCheckKind parseRangeCheckICmp(Loop *L, ICmpInst *ICI, 126283625Sdim ScalarEvolution &SE, Value *&Index, 127283625Sdim Value *&Length); 128283625Sdim 129283625Sdim static InductiveRangeCheck::RangeCheckKind 130283625Sdim parseRangeCheck(Loop *L, ScalarEvolution &SE, Value *Condition, 131283625Sdim const SCEV *&Index, Value *&UpperLimit); 132283625Sdim 133283625Sdim InductiveRangeCheck() : 134283625Sdim Offset(nullptr), Scale(nullptr), Length(nullptr), Branch(nullptr) { } 135283625Sdim 136283625Sdimpublic: 137283625Sdim const SCEV *getOffset() const { return Offset; } 138283625Sdim const SCEV *getScale() const { return Scale; } 139283625Sdim Value *getLength() const { return Length; } 140283625Sdim 141283625Sdim void print(raw_ostream &OS) const { 142283625Sdim OS << "InductiveRangeCheck:\n"; 143283625Sdim OS << " Kind: " << rangeCheckKindToStr(Kind) << "\n"; 144283625Sdim OS << " Offset: "; 145283625Sdim Offset->print(OS); 146283625Sdim OS << " Scale: "; 147283625Sdim Scale->print(OS); 148283625Sdim OS << " Length: "; 149283625Sdim if (Length) 150283625Sdim Length->print(OS); 151283625Sdim else 152283625Sdim OS << "(null)"; 153283625Sdim OS << "\n Branch: "; 154283625Sdim getBranch()->print(OS); 155283625Sdim OS << "\n"; 156283625Sdim } 157283625Sdim 158283625Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 159283625Sdim void dump() { 160283625Sdim print(dbgs()); 161283625Sdim } 162283625Sdim#endif 163283625Sdim 164283625Sdim BranchInst *getBranch() const { return Branch; } 165283625Sdim 166283625Sdim /// Represents an signed integer range [Range.getBegin(), Range.getEnd()). If 167283625Sdim /// R.getEnd() sle R.getBegin(), then R denotes the empty range. 168283625Sdim 169283625Sdim class Range { 170283625Sdim const SCEV *Begin; 171283625Sdim const SCEV *End; 172283625Sdim 173283625Sdim public: 174283625Sdim Range(const SCEV *Begin, const SCEV *End) : Begin(Begin), End(End) { 175283625Sdim assert(Begin->getType() == End->getType() && "ill-typed range!"); 176283625Sdim } 177283625Sdim 178283625Sdim Type *getType() const { return Begin->getType(); } 179283625Sdim const SCEV *getBegin() const { return Begin; } 180283625Sdim const SCEV *getEnd() const { return End; } 181283625Sdim }; 182283625Sdim 183283625Sdim typedef SpecificBumpPtrAllocator<InductiveRangeCheck> AllocatorTy; 184283625Sdim 185283625Sdim /// This is the value the condition of the branch needs to evaluate to for the 186283625Sdim /// branch to take the hot successor (see (1) above). 187283625Sdim bool getPassingDirection() { return true; } 188283625Sdim 189283625Sdim /// Computes a range for the induction variable (IndVar) in which the range 190283625Sdim /// check is redundant and can be constant-folded away. The induction 191283625Sdim /// variable is not required to be the canonical {0,+,1} induction variable. 192283625Sdim Optional<Range> computeSafeIterationSpace(ScalarEvolution &SE, 193283625Sdim const SCEVAddRecExpr *IndVar, 194283625Sdim IRBuilder<> &B) const; 195283625Sdim 196283625Sdim /// Create an inductive range check out of BI if possible, else return 197283625Sdim /// nullptr. 198283625Sdim static InductiveRangeCheck *create(AllocatorTy &Alloc, BranchInst *BI, 199283625Sdim Loop *L, ScalarEvolution &SE, 200283625Sdim BranchProbabilityInfo &BPI); 201283625Sdim}; 202283625Sdim 203283625Sdimclass InductiveRangeCheckElimination : public LoopPass { 204283625Sdim InductiveRangeCheck::AllocatorTy Allocator; 205283625Sdim 206283625Sdimpublic: 207283625Sdim static char ID; 208283625Sdim InductiveRangeCheckElimination() : LoopPass(ID) { 209283625Sdim initializeInductiveRangeCheckEliminationPass( 210283625Sdim *PassRegistry::getPassRegistry()); 211283625Sdim } 212283625Sdim 213283625Sdim void getAnalysisUsage(AnalysisUsage &AU) const override { 214283625Sdim AU.addRequired<LoopInfoWrapperPass>(); 215283625Sdim AU.addRequiredID(LoopSimplifyID); 216283625Sdim AU.addRequiredID(LCSSAID); 217296417Sdim AU.addRequired<ScalarEvolutionWrapperPass>(); 218296417Sdim AU.addRequired<BranchProbabilityInfoWrapperPass>(); 219283625Sdim } 220283625Sdim 221283625Sdim bool runOnLoop(Loop *L, LPPassManager &LPM) override; 222283625Sdim}; 223283625Sdim 224283625Sdimchar InductiveRangeCheckElimination::ID = 0; 225285181Sdim} 226283625Sdim 227296417SdimINITIALIZE_PASS_BEGIN(InductiveRangeCheckElimination, "irce", 228296417Sdim "Inductive range check elimination", false, false) 229296417SdimINITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 230296417SdimINITIALIZE_PASS_DEPENDENCY(LoopSimplify) 231296417SdimINITIALIZE_PASS_DEPENDENCY(LCSSA) 232296417SdimINITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 233296417SdimINITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) 234296417SdimINITIALIZE_PASS_END(InductiveRangeCheckElimination, "irce", 235296417Sdim "Inductive range check elimination", false, false) 236283625Sdim 237283625Sdimconst char *InductiveRangeCheck::rangeCheckKindToStr( 238283625Sdim InductiveRangeCheck::RangeCheckKind RCK) { 239283625Sdim switch (RCK) { 240283625Sdim case InductiveRangeCheck::RANGE_CHECK_UNKNOWN: 241283625Sdim return "RANGE_CHECK_UNKNOWN"; 242283625Sdim 243283625Sdim case InductiveRangeCheck::RANGE_CHECK_UPPER: 244283625Sdim return "RANGE_CHECK_UPPER"; 245283625Sdim 246283625Sdim case InductiveRangeCheck::RANGE_CHECK_LOWER: 247283625Sdim return "RANGE_CHECK_LOWER"; 248283625Sdim 249283625Sdim case InductiveRangeCheck::RANGE_CHECK_BOTH: 250283625Sdim return "RANGE_CHECK_BOTH"; 251283625Sdim } 252283625Sdim 253283625Sdim llvm_unreachable("unknown range check type!"); 254283625Sdim} 255283625Sdim 256283625Sdim/// Parse a single ICmp instruction, `ICI`, into a range check. If `ICI` 257283625Sdim/// cannot 258283625Sdim/// be interpreted as a range check, return `RANGE_CHECK_UNKNOWN` and set 259283625Sdim/// `Index` and `Length` to `nullptr`. Otherwise set `Index` to the value 260283625Sdim/// being 261283625Sdim/// range checked, and set `Length` to the upper limit `Index` is being range 262283625Sdim/// checked with if (and only if) the range check type is stronger or equal to 263283625Sdim/// RANGE_CHECK_UPPER. 264283625Sdim/// 265283625SdimInductiveRangeCheck::RangeCheckKind 266283625SdimInductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI, 267283625Sdim ScalarEvolution &SE, Value *&Index, 268283625Sdim Value *&Length) { 269283625Sdim 270283625Sdim auto IsNonNegativeAndNotLoopVarying = [&SE, L](Value *V) { 271283625Sdim const SCEV *S = SE.getSCEV(V); 272283625Sdim if (isa<SCEVCouldNotCompute>(S)) 273283625Sdim return false; 274283625Sdim 275283625Sdim return SE.getLoopDisposition(S, L) == ScalarEvolution::LoopInvariant && 276283625Sdim SE.isKnownNonNegative(S); 277283625Sdim }; 278283625Sdim 279283625Sdim using namespace llvm::PatternMatch; 280283625Sdim 281283625Sdim ICmpInst::Predicate Pred = ICI->getPredicate(); 282283625Sdim Value *LHS = ICI->getOperand(0); 283283625Sdim Value *RHS = ICI->getOperand(1); 284283625Sdim 285283625Sdim switch (Pred) { 286283625Sdim default: 287283625Sdim return RANGE_CHECK_UNKNOWN; 288283625Sdim 289283625Sdim case ICmpInst::ICMP_SLE: 290283625Sdim std::swap(LHS, RHS); 291283625Sdim // fallthrough 292283625Sdim case ICmpInst::ICMP_SGE: 293283625Sdim if (match(RHS, m_ConstantInt<0>())) { 294283625Sdim Index = LHS; 295283625Sdim return RANGE_CHECK_LOWER; 296283625Sdim } 297283625Sdim return RANGE_CHECK_UNKNOWN; 298283625Sdim 299283625Sdim case ICmpInst::ICMP_SLT: 300283625Sdim std::swap(LHS, RHS); 301283625Sdim // fallthrough 302283625Sdim case ICmpInst::ICMP_SGT: 303283625Sdim if (match(RHS, m_ConstantInt<-1>())) { 304283625Sdim Index = LHS; 305283625Sdim return RANGE_CHECK_LOWER; 306283625Sdim } 307283625Sdim 308283625Sdim if (IsNonNegativeAndNotLoopVarying(LHS)) { 309283625Sdim Index = RHS; 310283625Sdim Length = LHS; 311283625Sdim return RANGE_CHECK_UPPER; 312283625Sdim } 313283625Sdim return RANGE_CHECK_UNKNOWN; 314283625Sdim 315283625Sdim case ICmpInst::ICMP_ULT: 316283625Sdim std::swap(LHS, RHS); 317283625Sdim // fallthrough 318283625Sdim case ICmpInst::ICMP_UGT: 319283625Sdim if (IsNonNegativeAndNotLoopVarying(LHS)) { 320283625Sdim Index = RHS; 321283625Sdim Length = LHS; 322283625Sdim return RANGE_CHECK_BOTH; 323283625Sdim } 324283625Sdim return RANGE_CHECK_UNKNOWN; 325283625Sdim } 326283625Sdim 327283625Sdim llvm_unreachable("default clause returns!"); 328283625Sdim} 329283625Sdim 330283625Sdim/// Parses an arbitrary condition into a range check. `Length` is set only if 331283625Sdim/// the range check is recognized to be `RANGE_CHECK_UPPER` or stronger. 332283625SdimInductiveRangeCheck::RangeCheckKind 333283625SdimInductiveRangeCheck::parseRangeCheck(Loop *L, ScalarEvolution &SE, 334283625Sdim Value *Condition, const SCEV *&Index, 335283625Sdim Value *&Length) { 336283625Sdim using namespace llvm::PatternMatch; 337283625Sdim 338283625Sdim Value *A = nullptr; 339283625Sdim Value *B = nullptr; 340283625Sdim 341283625Sdim if (match(Condition, m_And(m_Value(A), m_Value(B)))) { 342283625Sdim Value *IndexA = nullptr, *IndexB = nullptr; 343283625Sdim Value *LengthA = nullptr, *LengthB = nullptr; 344283625Sdim ICmpInst *ICmpA = dyn_cast<ICmpInst>(A), *ICmpB = dyn_cast<ICmpInst>(B); 345283625Sdim 346283625Sdim if (!ICmpA || !ICmpB) 347283625Sdim return InductiveRangeCheck::RANGE_CHECK_UNKNOWN; 348283625Sdim 349283625Sdim auto RCKindA = parseRangeCheckICmp(L, ICmpA, SE, IndexA, LengthA); 350283625Sdim auto RCKindB = parseRangeCheckICmp(L, ICmpB, SE, IndexB, LengthB); 351283625Sdim 352283625Sdim if (RCKindA == InductiveRangeCheck::RANGE_CHECK_UNKNOWN || 353283625Sdim RCKindB == InductiveRangeCheck::RANGE_CHECK_UNKNOWN) 354283625Sdim return InductiveRangeCheck::RANGE_CHECK_UNKNOWN; 355283625Sdim 356283625Sdim if (IndexA != IndexB) 357283625Sdim return InductiveRangeCheck::RANGE_CHECK_UNKNOWN; 358283625Sdim 359283625Sdim if (LengthA != nullptr && LengthB != nullptr && LengthA != LengthB) 360283625Sdim return InductiveRangeCheck::RANGE_CHECK_UNKNOWN; 361283625Sdim 362283625Sdim Index = SE.getSCEV(IndexA); 363283625Sdim if (isa<SCEVCouldNotCompute>(Index)) 364283625Sdim return InductiveRangeCheck::RANGE_CHECK_UNKNOWN; 365283625Sdim 366283625Sdim Length = LengthA == nullptr ? LengthB : LengthA; 367283625Sdim 368283625Sdim return (InductiveRangeCheck::RangeCheckKind)(RCKindA | RCKindB); 369283625Sdim } 370283625Sdim 371283625Sdim if (ICmpInst *ICI = dyn_cast<ICmpInst>(Condition)) { 372283625Sdim Value *IndexVal = nullptr; 373283625Sdim 374283625Sdim auto RCKind = parseRangeCheckICmp(L, ICI, SE, IndexVal, Length); 375283625Sdim 376283625Sdim if (RCKind == InductiveRangeCheck::RANGE_CHECK_UNKNOWN) 377283625Sdim return InductiveRangeCheck::RANGE_CHECK_UNKNOWN; 378283625Sdim 379283625Sdim Index = SE.getSCEV(IndexVal); 380283625Sdim if (isa<SCEVCouldNotCompute>(Index)) 381283625Sdim return InductiveRangeCheck::RANGE_CHECK_UNKNOWN; 382283625Sdim 383283625Sdim return RCKind; 384283625Sdim } 385283625Sdim 386283625Sdim return InductiveRangeCheck::RANGE_CHECK_UNKNOWN; 387283625Sdim} 388283625Sdim 389283625Sdim 390283625SdimInductiveRangeCheck * 391283625SdimInductiveRangeCheck::create(InductiveRangeCheck::AllocatorTy &A, BranchInst *BI, 392283625Sdim Loop *L, ScalarEvolution &SE, 393283625Sdim BranchProbabilityInfo &BPI) { 394283625Sdim 395283625Sdim if (BI->isUnconditional() || BI->getParent() == L->getLoopLatch()) 396283625Sdim return nullptr; 397283625Sdim 398283625Sdim BranchProbability LikelyTaken(15, 16); 399283625Sdim 400283625Sdim if (BPI.getEdgeProbability(BI->getParent(), (unsigned) 0) < LikelyTaken) 401283625Sdim return nullptr; 402283625Sdim 403283625Sdim Value *Length = nullptr; 404283625Sdim const SCEV *IndexSCEV = nullptr; 405283625Sdim 406283625Sdim auto RCKind = InductiveRangeCheck::parseRangeCheck(L, SE, BI->getCondition(), 407283625Sdim IndexSCEV, Length); 408283625Sdim 409283625Sdim if (RCKind == InductiveRangeCheck::RANGE_CHECK_UNKNOWN) 410283625Sdim return nullptr; 411283625Sdim 412283625Sdim assert(IndexSCEV && "contract with SplitRangeCheckCondition!"); 413283625Sdim assert((!(RCKind & InductiveRangeCheck::RANGE_CHECK_UPPER) || Length) && 414283625Sdim "contract with SplitRangeCheckCondition!"); 415283625Sdim 416283625Sdim const SCEVAddRecExpr *IndexAddRec = dyn_cast<SCEVAddRecExpr>(IndexSCEV); 417283625Sdim bool IsAffineIndex = 418283625Sdim IndexAddRec && (IndexAddRec->getLoop() == L) && IndexAddRec->isAffine(); 419283625Sdim 420283625Sdim if (!IsAffineIndex) 421283625Sdim return nullptr; 422283625Sdim 423283625Sdim InductiveRangeCheck *IRC = new (A.Allocate()) InductiveRangeCheck; 424283625Sdim IRC->Length = Length; 425283625Sdim IRC->Offset = IndexAddRec->getStart(); 426283625Sdim IRC->Scale = IndexAddRec->getStepRecurrence(SE); 427283625Sdim IRC->Branch = BI; 428283625Sdim IRC->Kind = RCKind; 429283625Sdim return IRC; 430283625Sdim} 431283625Sdim 432283625Sdimnamespace { 433283625Sdim 434283625Sdim// Keeps track of the structure of a loop. This is similar to llvm::Loop, 435283625Sdim// except that it is more lightweight and can track the state of a loop through 436283625Sdim// changing and potentially invalid IR. This structure also formalizes the 437283625Sdim// kinds of loops we can deal with -- ones that have a single latch that is also 438283625Sdim// an exiting block *and* have a canonical induction variable. 439283625Sdimstruct LoopStructure { 440283625Sdim const char *Tag; 441283625Sdim 442283625Sdim BasicBlock *Header; 443283625Sdim BasicBlock *Latch; 444283625Sdim 445283625Sdim // `Latch's terminator instruction is `LatchBr', and it's `LatchBrExitIdx'th 446283625Sdim // successor is `LatchExit', the exit block of the loop. 447283625Sdim BranchInst *LatchBr; 448283625Sdim BasicBlock *LatchExit; 449283625Sdim unsigned LatchBrExitIdx; 450283625Sdim 451283625Sdim Value *IndVarNext; 452283625Sdim Value *IndVarStart; 453283625Sdim Value *LoopExitAt; 454283625Sdim bool IndVarIncreasing; 455283625Sdim 456283625Sdim LoopStructure() 457283625Sdim : Tag(""), Header(nullptr), Latch(nullptr), LatchBr(nullptr), 458283625Sdim LatchExit(nullptr), LatchBrExitIdx(-1), IndVarNext(nullptr), 459283625Sdim IndVarStart(nullptr), LoopExitAt(nullptr), IndVarIncreasing(false) {} 460283625Sdim 461283625Sdim template <typename M> LoopStructure map(M Map) const { 462283625Sdim LoopStructure Result; 463283625Sdim Result.Tag = Tag; 464283625Sdim Result.Header = cast<BasicBlock>(Map(Header)); 465283625Sdim Result.Latch = cast<BasicBlock>(Map(Latch)); 466283625Sdim Result.LatchBr = cast<BranchInst>(Map(LatchBr)); 467283625Sdim Result.LatchExit = cast<BasicBlock>(Map(LatchExit)); 468283625Sdim Result.LatchBrExitIdx = LatchBrExitIdx; 469283625Sdim Result.IndVarNext = Map(IndVarNext); 470283625Sdim Result.IndVarStart = Map(IndVarStart); 471283625Sdim Result.LoopExitAt = Map(LoopExitAt); 472283625Sdim Result.IndVarIncreasing = IndVarIncreasing; 473283625Sdim return Result; 474283625Sdim } 475283625Sdim 476283625Sdim static Optional<LoopStructure> parseLoopStructure(ScalarEvolution &, 477283625Sdim BranchProbabilityInfo &BPI, 478283625Sdim Loop &, 479283625Sdim const char *&); 480283625Sdim}; 481283625Sdim 482283625Sdim/// This class is used to constrain loops to run within a given iteration space. 483283625Sdim/// The algorithm this class implements is given a Loop and a range [Begin, 484283625Sdim/// End). The algorithm then tries to break out a "main loop" out of the loop 485283625Sdim/// it is given in a way that the "main loop" runs with the induction variable 486283625Sdim/// in a subset of [Begin, End). The algorithm emits appropriate pre and post 487283625Sdim/// loops to run any remaining iterations. The pre loop runs any iterations in 488283625Sdim/// which the induction variable is < Begin, and the post loop runs any 489283625Sdim/// iterations in which the induction variable is >= End. 490283625Sdim/// 491283625Sdimclass LoopConstrainer { 492283625Sdim // The representation of a clone of the original loop we started out with. 493283625Sdim struct ClonedLoop { 494283625Sdim // The cloned blocks 495283625Sdim std::vector<BasicBlock *> Blocks; 496283625Sdim 497283625Sdim // `Map` maps values in the clonee into values in the cloned version 498283625Sdim ValueToValueMapTy Map; 499283625Sdim 500283625Sdim // An instance of `LoopStructure` for the cloned loop 501283625Sdim LoopStructure Structure; 502283625Sdim }; 503283625Sdim 504283625Sdim // Result of rewriting the range of a loop. See changeIterationSpaceEnd for 505283625Sdim // more details on what these fields mean. 506283625Sdim struct RewrittenRangeInfo { 507283625Sdim BasicBlock *PseudoExit; 508283625Sdim BasicBlock *ExitSelector; 509283625Sdim std::vector<PHINode *> PHIValuesAtPseudoExit; 510283625Sdim PHINode *IndVarEnd; 511283625Sdim 512283625Sdim RewrittenRangeInfo() 513283625Sdim : PseudoExit(nullptr), ExitSelector(nullptr), IndVarEnd(nullptr) {} 514283625Sdim }; 515283625Sdim 516283625Sdim // Calculated subranges we restrict the iteration space of the main loop to. 517283625Sdim // See the implementation of `calculateSubRanges' for more details on how 518283625Sdim // these fields are computed. `LowLimit` is None if there is no restriction 519283625Sdim // on low end of the restricted iteration space of the main loop. `HighLimit` 520283625Sdim // is None if there is no restriction on high end of the restricted iteration 521283625Sdim // space of the main loop. 522283625Sdim 523283625Sdim struct SubRanges { 524283625Sdim Optional<const SCEV *> LowLimit; 525283625Sdim Optional<const SCEV *> HighLimit; 526283625Sdim }; 527283625Sdim 528283625Sdim // A utility function that does a `replaceUsesOfWith' on the incoming block 529283625Sdim // set of a `PHINode' -- replaces instances of `Block' in the `PHINode's 530283625Sdim // incoming block list with `ReplaceBy'. 531283625Sdim static void replacePHIBlock(PHINode *PN, BasicBlock *Block, 532283625Sdim BasicBlock *ReplaceBy); 533283625Sdim 534283625Sdim // Compute a safe set of limits for the main loop to run in -- effectively the 535283625Sdim // intersection of `Range' and the iteration space of the original loop. 536283625Sdim // Return None if unable to compute the set of subranges. 537283625Sdim // 538283625Sdim Optional<SubRanges> calculateSubRanges() const; 539283625Sdim 540283625Sdim // Clone `OriginalLoop' and return the result in CLResult. The IR after 541283625Sdim // running `cloneLoop' is well formed except for the PHI nodes in CLResult -- 542283625Sdim // the PHI nodes say that there is an incoming edge from `OriginalPreheader` 543283625Sdim // but there is no such edge. 544283625Sdim // 545283625Sdim void cloneLoop(ClonedLoop &CLResult, const char *Tag) const; 546283625Sdim 547283625Sdim // Rewrite the iteration space of the loop denoted by (LS, Preheader). The 548283625Sdim // iteration space of the rewritten loop ends at ExitLoopAt. The start of the 549283625Sdim // iteration space is not changed. `ExitLoopAt' is assumed to be slt 550283625Sdim // `OriginalHeaderCount'. 551283625Sdim // 552283625Sdim // If there are iterations left to execute, control is made to jump to 553283625Sdim // `ContinuationBlock', otherwise they take the normal loop exit. The 554283625Sdim // returned `RewrittenRangeInfo' object is populated as follows: 555283625Sdim // 556283625Sdim // .PseudoExit is a basic block that unconditionally branches to 557283625Sdim // `ContinuationBlock'. 558283625Sdim // 559283625Sdim // .ExitSelector is a basic block that decides, on exit from the loop, 560283625Sdim // whether to branch to the "true" exit or to `PseudoExit'. 561283625Sdim // 562283625Sdim // .PHIValuesAtPseudoExit are PHINodes in `PseudoExit' that compute the value 563283625Sdim // for each PHINode in the loop header on taking the pseudo exit. 564283625Sdim // 565283625Sdim // After changeIterationSpaceEnd, `Preheader' is no longer a legitimate 566283625Sdim // preheader because it is made to branch to the loop header only 567283625Sdim // conditionally. 568283625Sdim // 569283625Sdim RewrittenRangeInfo 570283625Sdim changeIterationSpaceEnd(const LoopStructure &LS, BasicBlock *Preheader, 571283625Sdim Value *ExitLoopAt, 572283625Sdim BasicBlock *ContinuationBlock) const; 573283625Sdim 574283625Sdim // The loop denoted by `LS' has `OldPreheader' as its preheader. This 575283625Sdim // function creates a new preheader for `LS' and returns it. 576283625Sdim // 577283625Sdim BasicBlock *createPreheader(const LoopStructure &LS, BasicBlock *OldPreheader, 578283625Sdim const char *Tag) const; 579283625Sdim 580283625Sdim // `ContinuationBlockAndPreheader' was the continuation block for some call to 581283625Sdim // `changeIterationSpaceEnd' and is the preheader to the loop denoted by `LS'. 582283625Sdim // This function rewrites the PHI nodes in `LS.Header' to start with the 583283625Sdim // correct value. 584283625Sdim void rewriteIncomingValuesForPHIs( 585283625Sdim LoopStructure &LS, BasicBlock *ContinuationBlockAndPreheader, 586283625Sdim const LoopConstrainer::RewrittenRangeInfo &RRI) const; 587283625Sdim 588283625Sdim // Even though we do not preserve any passes at this time, we at least need to 589283625Sdim // keep the parent loop structure consistent. The `LPPassManager' seems to 590283625Sdim // verify this after running a loop pass. This function adds the list of 591283625Sdim // blocks denoted by BBs to this loops parent loop if required. 592283625Sdim void addToParentLoopIfNeeded(ArrayRef<BasicBlock *> BBs); 593283625Sdim 594283625Sdim // Some global state. 595283625Sdim Function &F; 596283625Sdim LLVMContext &Ctx; 597283625Sdim ScalarEvolution &SE; 598283625Sdim 599283625Sdim // Information about the original loop we started out with. 600283625Sdim Loop &OriginalLoop; 601283625Sdim LoopInfo &OriginalLoopInfo; 602283625Sdim const SCEV *LatchTakenCount; 603283625Sdim BasicBlock *OriginalPreheader; 604283625Sdim 605283625Sdim // The preheader of the main loop. This may or may not be different from 606283625Sdim // `OriginalPreheader'. 607283625Sdim BasicBlock *MainLoopPreheader; 608283625Sdim 609283625Sdim // The range we need to run the main loop in. 610283625Sdim InductiveRangeCheck::Range Range; 611283625Sdim 612283625Sdim // The structure of the main loop (see comment at the beginning of this class 613283625Sdim // for a definition) 614283625Sdim LoopStructure MainLoopStructure; 615283625Sdim 616283625Sdimpublic: 617283625Sdim LoopConstrainer(Loop &L, LoopInfo &LI, const LoopStructure &LS, 618283625Sdim ScalarEvolution &SE, InductiveRangeCheck::Range R) 619283625Sdim : F(*L.getHeader()->getParent()), Ctx(L.getHeader()->getContext()), 620283625Sdim SE(SE), OriginalLoop(L), OriginalLoopInfo(LI), LatchTakenCount(nullptr), 621283625Sdim OriginalPreheader(nullptr), MainLoopPreheader(nullptr), Range(R), 622283625Sdim MainLoopStructure(LS) {} 623283625Sdim 624283625Sdim // Entry point for the algorithm. Returns true on success. 625283625Sdim bool run(); 626283625Sdim}; 627283625Sdim 628285181Sdim} 629283625Sdim 630283625Sdimvoid LoopConstrainer::replacePHIBlock(PHINode *PN, BasicBlock *Block, 631283625Sdim BasicBlock *ReplaceBy) { 632283625Sdim for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 633283625Sdim if (PN->getIncomingBlock(i) == Block) 634283625Sdim PN->setIncomingBlock(i, ReplaceBy); 635283625Sdim} 636283625Sdim 637283625Sdimstatic bool CanBeSMax(ScalarEvolution &SE, const SCEV *S) { 638283625Sdim APInt SMax = 639283625Sdim APInt::getSignedMaxValue(cast<IntegerType>(S->getType())->getBitWidth()); 640283625Sdim return SE.getSignedRange(S).contains(SMax) && 641283625Sdim SE.getUnsignedRange(S).contains(SMax); 642283625Sdim} 643283625Sdim 644283625Sdimstatic bool CanBeSMin(ScalarEvolution &SE, const SCEV *S) { 645283625Sdim APInt SMin = 646283625Sdim APInt::getSignedMinValue(cast<IntegerType>(S->getType())->getBitWidth()); 647283625Sdim return SE.getSignedRange(S).contains(SMin) && 648283625Sdim SE.getUnsignedRange(S).contains(SMin); 649283625Sdim} 650283625Sdim 651283625SdimOptional<LoopStructure> 652283625SdimLoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BPI, 653283625Sdim Loop &L, const char *&FailureReason) { 654283625Sdim assert(L.isLoopSimplifyForm() && "should follow from addRequired<>"); 655283625Sdim 656283625Sdim BasicBlock *Latch = L.getLoopLatch(); 657283625Sdim if (!L.isLoopExiting(Latch)) { 658283625Sdim FailureReason = "no loop latch"; 659283625Sdim return None; 660283625Sdim } 661283625Sdim 662283625Sdim BasicBlock *Header = L.getHeader(); 663283625Sdim BasicBlock *Preheader = L.getLoopPreheader(); 664283625Sdim if (!Preheader) { 665283625Sdim FailureReason = "no preheader"; 666283625Sdim return None; 667283625Sdim } 668283625Sdim 669283625Sdim BranchInst *LatchBr = dyn_cast<BranchInst>(&*Latch->rbegin()); 670283625Sdim if (!LatchBr || LatchBr->isUnconditional()) { 671283625Sdim FailureReason = "latch terminator not conditional branch"; 672283625Sdim return None; 673283625Sdim } 674283625Sdim 675283625Sdim unsigned LatchBrExitIdx = LatchBr->getSuccessor(0) == Header ? 1 : 0; 676283625Sdim 677283625Sdim BranchProbability ExitProbability = 678283625Sdim BPI.getEdgeProbability(LatchBr->getParent(), LatchBrExitIdx); 679283625Sdim 680283625Sdim if (ExitProbability > BranchProbability(1, MaxExitProbReciprocal)) { 681283625Sdim FailureReason = "short running loop, not profitable"; 682283625Sdim return None; 683283625Sdim } 684283625Sdim 685283625Sdim ICmpInst *ICI = dyn_cast<ICmpInst>(LatchBr->getCondition()); 686283625Sdim if (!ICI || !isa<IntegerType>(ICI->getOperand(0)->getType())) { 687283625Sdim FailureReason = "latch terminator branch not conditional on integral icmp"; 688283625Sdim return None; 689283625Sdim } 690283625Sdim 691283625Sdim const SCEV *LatchCount = SE.getExitCount(&L, Latch); 692283625Sdim if (isa<SCEVCouldNotCompute>(LatchCount)) { 693283625Sdim FailureReason = "could not compute latch count"; 694283625Sdim return None; 695283625Sdim } 696283625Sdim 697283625Sdim ICmpInst::Predicate Pred = ICI->getPredicate(); 698283625Sdim Value *LeftValue = ICI->getOperand(0); 699283625Sdim const SCEV *LeftSCEV = SE.getSCEV(LeftValue); 700283625Sdim IntegerType *IndVarTy = cast<IntegerType>(LeftValue->getType()); 701283625Sdim 702283625Sdim Value *RightValue = ICI->getOperand(1); 703283625Sdim const SCEV *RightSCEV = SE.getSCEV(RightValue); 704283625Sdim 705283625Sdim // We canonicalize `ICI` such that `LeftSCEV` is an add recurrence. 706283625Sdim if (!isa<SCEVAddRecExpr>(LeftSCEV)) { 707283625Sdim if (isa<SCEVAddRecExpr>(RightSCEV)) { 708283625Sdim std::swap(LeftSCEV, RightSCEV); 709283625Sdim std::swap(LeftValue, RightValue); 710283625Sdim Pred = ICmpInst::getSwappedPredicate(Pred); 711283625Sdim } else { 712283625Sdim FailureReason = "no add recurrences in the icmp"; 713283625Sdim return None; 714283625Sdim } 715283625Sdim } 716283625Sdim 717283625Sdim auto HasNoSignedWrap = [&](const SCEVAddRecExpr *AR) { 718283625Sdim if (AR->getNoWrapFlags(SCEV::FlagNSW)) 719283625Sdim return true; 720283625Sdim 721283625Sdim IntegerType *Ty = cast<IntegerType>(AR->getType()); 722283625Sdim IntegerType *WideTy = 723283625Sdim IntegerType::get(Ty->getContext(), Ty->getBitWidth() * 2); 724283625Sdim 725283625Sdim const SCEVAddRecExpr *ExtendAfterOp = 726283625Sdim dyn_cast<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy)); 727283625Sdim if (ExtendAfterOp) { 728283625Sdim const SCEV *ExtendedStart = SE.getSignExtendExpr(AR->getStart(), WideTy); 729283625Sdim const SCEV *ExtendedStep = 730283625Sdim SE.getSignExtendExpr(AR->getStepRecurrence(SE), WideTy); 731283625Sdim 732283625Sdim bool NoSignedWrap = ExtendAfterOp->getStart() == ExtendedStart && 733283625Sdim ExtendAfterOp->getStepRecurrence(SE) == ExtendedStep; 734283625Sdim 735283625Sdim if (NoSignedWrap) 736283625Sdim return true; 737283625Sdim } 738283625Sdim 739283625Sdim // We may have proved this when computing the sign extension above. 740283625Sdim return AR->getNoWrapFlags(SCEV::FlagNSW) != SCEV::FlagAnyWrap; 741283625Sdim }; 742283625Sdim 743283625Sdim auto IsInductionVar = [&](const SCEVAddRecExpr *AR, bool &IsIncreasing) { 744283625Sdim if (!AR->isAffine()) 745283625Sdim return false; 746283625Sdim 747283625Sdim // Currently we only work with induction variables that have been proved to 748283625Sdim // not wrap. This restriction can potentially be lifted in the future. 749283625Sdim 750283625Sdim if (!HasNoSignedWrap(AR)) 751283625Sdim return false; 752283625Sdim 753283625Sdim if (const SCEVConstant *StepExpr = 754283625Sdim dyn_cast<SCEVConstant>(AR->getStepRecurrence(SE))) { 755283625Sdim ConstantInt *StepCI = StepExpr->getValue(); 756283625Sdim if (StepCI->isOne() || StepCI->isMinusOne()) { 757283625Sdim IsIncreasing = StepCI->isOne(); 758283625Sdim return true; 759283625Sdim } 760283625Sdim } 761283625Sdim 762283625Sdim return false; 763283625Sdim }; 764283625Sdim 765283625Sdim // `ICI` is interpreted as taking the backedge if the *next* value of the 766283625Sdim // induction variable satisfies some constraint. 767283625Sdim 768283625Sdim const SCEVAddRecExpr *IndVarNext = cast<SCEVAddRecExpr>(LeftSCEV); 769283625Sdim bool IsIncreasing = false; 770283625Sdim if (!IsInductionVar(IndVarNext, IsIncreasing)) { 771283625Sdim FailureReason = "LHS in icmp not induction variable"; 772283625Sdim return None; 773283625Sdim } 774283625Sdim 775283625Sdim ConstantInt *One = ConstantInt::get(IndVarTy, 1); 776283625Sdim // TODO: generalize the predicates here to also match their unsigned variants. 777283625Sdim if (IsIncreasing) { 778283625Sdim bool FoundExpectedPred = 779283625Sdim (Pred == ICmpInst::ICMP_SLT && LatchBrExitIdx == 1) || 780283625Sdim (Pred == ICmpInst::ICMP_SGT && LatchBrExitIdx == 0); 781283625Sdim 782283625Sdim if (!FoundExpectedPred) { 783283625Sdim FailureReason = "expected icmp slt semantically, found something else"; 784283625Sdim return None; 785283625Sdim } 786283625Sdim 787283625Sdim if (LatchBrExitIdx == 0) { 788283625Sdim if (CanBeSMax(SE, RightSCEV)) { 789283625Sdim // TODO: this restriction is easily removable -- we just have to 790283625Sdim // remember that the icmp was an slt and not an sle. 791283625Sdim FailureReason = "limit may overflow when coercing sle to slt"; 792283625Sdim return None; 793283625Sdim } 794283625Sdim 795283625Sdim IRBuilder<> B(&*Preheader->rbegin()); 796283625Sdim RightValue = B.CreateAdd(RightValue, One); 797283625Sdim } 798283625Sdim 799283625Sdim } else { 800283625Sdim bool FoundExpectedPred = 801283625Sdim (Pred == ICmpInst::ICMP_SGT && LatchBrExitIdx == 1) || 802283625Sdim (Pred == ICmpInst::ICMP_SLT && LatchBrExitIdx == 0); 803283625Sdim 804283625Sdim if (!FoundExpectedPred) { 805283625Sdim FailureReason = "expected icmp sgt semantically, found something else"; 806283625Sdim return None; 807283625Sdim } 808283625Sdim 809283625Sdim if (LatchBrExitIdx == 0) { 810283625Sdim if (CanBeSMin(SE, RightSCEV)) { 811283625Sdim // TODO: this restriction is easily removable -- we just have to 812283625Sdim // remember that the icmp was an sgt and not an sge. 813283625Sdim FailureReason = "limit may overflow when coercing sge to sgt"; 814283625Sdim return None; 815283625Sdim } 816283625Sdim 817283625Sdim IRBuilder<> B(&*Preheader->rbegin()); 818283625Sdim RightValue = B.CreateSub(RightValue, One); 819283625Sdim } 820283625Sdim } 821283625Sdim 822283625Sdim const SCEV *StartNext = IndVarNext->getStart(); 823283625Sdim const SCEV *Addend = SE.getNegativeSCEV(IndVarNext->getStepRecurrence(SE)); 824283625Sdim const SCEV *IndVarStart = SE.getAddExpr(StartNext, Addend); 825283625Sdim 826283625Sdim BasicBlock *LatchExit = LatchBr->getSuccessor(LatchBrExitIdx); 827283625Sdim 828283625Sdim assert(SE.getLoopDisposition(LatchCount, &L) == 829283625Sdim ScalarEvolution::LoopInvariant && 830283625Sdim "loop variant exit count doesn't make sense!"); 831283625Sdim 832283625Sdim assert(!L.contains(LatchExit) && "expected an exit block!"); 833283625Sdim const DataLayout &DL = Preheader->getModule()->getDataLayout(); 834283625Sdim Value *IndVarStartV = 835283625Sdim SCEVExpander(SE, DL, "irce") 836283625Sdim .expandCodeFor(IndVarStart, IndVarTy, &*Preheader->rbegin()); 837283625Sdim IndVarStartV->setName("indvar.start"); 838283625Sdim 839283625Sdim LoopStructure Result; 840283625Sdim 841283625Sdim Result.Tag = "main"; 842283625Sdim Result.Header = Header; 843283625Sdim Result.Latch = Latch; 844283625Sdim Result.LatchBr = LatchBr; 845283625Sdim Result.LatchExit = LatchExit; 846283625Sdim Result.LatchBrExitIdx = LatchBrExitIdx; 847283625Sdim Result.IndVarStart = IndVarStartV; 848283625Sdim Result.IndVarNext = LeftValue; 849283625Sdim Result.IndVarIncreasing = IsIncreasing; 850283625Sdim Result.LoopExitAt = RightValue; 851283625Sdim 852283625Sdim FailureReason = nullptr; 853283625Sdim 854283625Sdim return Result; 855283625Sdim} 856283625Sdim 857283625SdimOptional<LoopConstrainer::SubRanges> 858283625SdimLoopConstrainer::calculateSubRanges() const { 859283625Sdim IntegerType *Ty = cast<IntegerType>(LatchTakenCount->getType()); 860283625Sdim 861283625Sdim if (Range.getType() != Ty) 862283625Sdim return None; 863283625Sdim 864283625Sdim LoopConstrainer::SubRanges Result; 865283625Sdim 866283625Sdim // I think we can be more aggressive here and make this nuw / nsw if the 867283625Sdim // addition that feeds into the icmp for the latch's terminating branch is nuw 868283625Sdim // / nsw. In any case, a wrapping 2's complement addition is safe. 869283625Sdim ConstantInt *One = ConstantInt::get(Ty, 1); 870283625Sdim const SCEV *Start = SE.getSCEV(MainLoopStructure.IndVarStart); 871283625Sdim const SCEV *End = SE.getSCEV(MainLoopStructure.LoopExitAt); 872283625Sdim 873283625Sdim bool Increasing = MainLoopStructure.IndVarIncreasing; 874283625Sdim 875283625Sdim // We compute `Smallest` and `Greatest` such that [Smallest, Greatest) is the 876283625Sdim // range of values the induction variable takes. 877283625Sdim 878283625Sdim const SCEV *Smallest = nullptr, *Greatest = nullptr; 879283625Sdim 880283625Sdim if (Increasing) { 881283625Sdim Smallest = Start; 882283625Sdim Greatest = End; 883283625Sdim } else { 884283625Sdim // These two computations may sign-overflow. Here is why that is okay: 885283625Sdim // 886283625Sdim // We know that the induction variable does not sign-overflow on any 887283625Sdim // iteration except the last one, and it starts at `Start` and ends at 888283625Sdim // `End`, decrementing by one every time. 889283625Sdim // 890283625Sdim // * if `Smallest` sign-overflows we know `End` is `INT_SMAX`. Since the 891283625Sdim // induction variable is decreasing we know that that the smallest value 892283625Sdim // the loop body is actually executed with is `INT_SMIN` == `Smallest`. 893283625Sdim // 894283625Sdim // * if `Greatest` sign-overflows, we know it can only be `INT_SMIN`. In 895283625Sdim // that case, `Clamp` will always return `Smallest` and 896283625Sdim // [`Result.LowLimit`, `Result.HighLimit`) = [`Smallest`, `Smallest`) 897283625Sdim // will be an empty range. Returning an empty range is always safe. 898283625Sdim // 899283625Sdim 900283625Sdim Smallest = SE.getAddExpr(End, SE.getSCEV(One)); 901283625Sdim Greatest = SE.getAddExpr(Start, SE.getSCEV(One)); 902283625Sdim } 903283625Sdim 904283625Sdim auto Clamp = [this, Smallest, Greatest](const SCEV *S) { 905283625Sdim return SE.getSMaxExpr(Smallest, SE.getSMinExpr(Greatest, S)); 906283625Sdim }; 907283625Sdim 908283625Sdim // In some cases we can prove that we don't need a pre or post loop 909283625Sdim 910283625Sdim bool ProvablyNoPreloop = 911283625Sdim SE.isKnownPredicate(ICmpInst::ICMP_SLE, Range.getBegin(), Smallest); 912283625Sdim if (!ProvablyNoPreloop) 913283625Sdim Result.LowLimit = Clamp(Range.getBegin()); 914283625Sdim 915283625Sdim bool ProvablyNoPostLoop = 916283625Sdim SE.isKnownPredicate(ICmpInst::ICMP_SLE, Greatest, Range.getEnd()); 917283625Sdim if (!ProvablyNoPostLoop) 918283625Sdim Result.HighLimit = Clamp(Range.getEnd()); 919283625Sdim 920283625Sdim return Result; 921283625Sdim} 922283625Sdim 923283625Sdimvoid LoopConstrainer::cloneLoop(LoopConstrainer::ClonedLoop &Result, 924283625Sdim const char *Tag) const { 925283625Sdim for (BasicBlock *BB : OriginalLoop.getBlocks()) { 926283625Sdim BasicBlock *Clone = CloneBasicBlock(BB, Result.Map, Twine(".") + Tag, &F); 927283625Sdim Result.Blocks.push_back(Clone); 928283625Sdim Result.Map[BB] = Clone; 929283625Sdim } 930283625Sdim 931283625Sdim auto GetClonedValue = [&Result](Value *V) { 932283625Sdim assert(V && "null values not in domain!"); 933283625Sdim auto It = Result.Map.find(V); 934283625Sdim if (It == Result.Map.end()) 935283625Sdim return V; 936283625Sdim return static_cast<Value *>(It->second); 937283625Sdim }; 938283625Sdim 939283625Sdim Result.Structure = MainLoopStructure.map(GetClonedValue); 940283625Sdim Result.Structure.Tag = Tag; 941283625Sdim 942283625Sdim for (unsigned i = 0, e = Result.Blocks.size(); i != e; ++i) { 943283625Sdim BasicBlock *ClonedBB = Result.Blocks[i]; 944283625Sdim BasicBlock *OriginalBB = OriginalLoop.getBlocks()[i]; 945283625Sdim 946283625Sdim assert(Result.Map[OriginalBB] == ClonedBB && "invariant!"); 947283625Sdim 948283625Sdim for (Instruction &I : *ClonedBB) 949283625Sdim RemapInstruction(&I, Result.Map, 950283625Sdim RF_NoModuleLevelChanges | RF_IgnoreMissingEntries); 951283625Sdim 952283625Sdim // Exit blocks will now have one more predecessor and their PHI nodes need 953283625Sdim // to be edited to reflect that. No phi nodes need to be introduced because 954283625Sdim // the loop is in LCSSA. 955283625Sdim 956283625Sdim for (auto SBBI = succ_begin(OriginalBB), SBBE = succ_end(OriginalBB); 957283625Sdim SBBI != SBBE; ++SBBI) { 958283625Sdim 959283625Sdim if (OriginalLoop.contains(*SBBI)) 960283625Sdim continue; // not an exit block 961283625Sdim 962283625Sdim for (Instruction &I : **SBBI) { 963283625Sdim if (!isa<PHINode>(&I)) 964283625Sdim break; 965283625Sdim 966283625Sdim PHINode *PN = cast<PHINode>(&I); 967283625Sdim Value *OldIncoming = PN->getIncomingValueForBlock(OriginalBB); 968283625Sdim PN->addIncoming(GetClonedValue(OldIncoming), ClonedBB); 969283625Sdim } 970283625Sdim } 971283625Sdim } 972283625Sdim} 973283625Sdim 974283625SdimLoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd( 975283625Sdim const LoopStructure &LS, BasicBlock *Preheader, Value *ExitSubloopAt, 976283625Sdim BasicBlock *ContinuationBlock) const { 977283625Sdim 978283625Sdim // We start with a loop with a single latch: 979283625Sdim // 980283625Sdim // +--------------------+ 981283625Sdim // | | 982283625Sdim // | preheader | 983283625Sdim // | | 984283625Sdim // +--------+-----------+ 985283625Sdim // | ----------------\ 986283625Sdim // | / | 987283625Sdim // +--------v----v------+ | 988283625Sdim // | | | 989283625Sdim // | header | | 990283625Sdim // | | | 991283625Sdim // +--------------------+ | 992283625Sdim // | 993283625Sdim // ..... | 994283625Sdim // | 995283625Sdim // +--------------------+ | 996283625Sdim // | | | 997283625Sdim // | latch >----------/ 998283625Sdim // | | 999283625Sdim // +-------v------------+ 1000283625Sdim // | 1001283625Sdim // | 1002283625Sdim // | +--------------------+ 1003283625Sdim // | | | 1004283625Sdim // +---> original exit | 1005283625Sdim // | | 1006283625Sdim // +--------------------+ 1007283625Sdim // 1008283625Sdim // We change the control flow to look like 1009283625Sdim // 1010283625Sdim // 1011283625Sdim // +--------------------+ 1012283625Sdim // | | 1013283625Sdim // | preheader >-------------------------+ 1014283625Sdim // | | | 1015283625Sdim // +--------v-----------+ | 1016283625Sdim // | /-------------+ | 1017283625Sdim // | / | | 1018283625Sdim // +--------v--v--------+ | | 1019283625Sdim // | | | | 1020283625Sdim // | header | | +--------+ | 1021283625Sdim // | | | | | | 1022283625Sdim // +--------------------+ | | +-----v-----v-----------+ 1023283625Sdim // | | | | 1024283625Sdim // | | | .pseudo.exit | 1025283625Sdim // | | | | 1026283625Sdim // | | +-----------v-----------+ 1027283625Sdim // | | | 1028283625Sdim // ..... | | | 1029283625Sdim // | | +--------v-------------+ 1030283625Sdim // +--------------------+ | | | | 1031283625Sdim // | | | | | ContinuationBlock | 1032283625Sdim // | latch >------+ | | | 1033283625Sdim // | | | +----------------------+ 1034283625Sdim // +---------v----------+ | 1035283625Sdim // | | 1036283625Sdim // | | 1037283625Sdim // | +---------------^-----+ 1038283625Sdim // | | | 1039283625Sdim // +-----> .exit.selector | 1040283625Sdim // | | 1041283625Sdim // +----------v----------+ 1042283625Sdim // | 1043283625Sdim // +--------------------+ | 1044283625Sdim // | | | 1045283625Sdim // | original exit <----+ 1046283625Sdim // | | 1047283625Sdim // +--------------------+ 1048283625Sdim // 1049283625Sdim 1050283625Sdim RewrittenRangeInfo RRI; 1051283625Sdim 1052283625Sdim auto BBInsertLocation = std::next(Function::iterator(LS.Latch)); 1053283625Sdim RRI.ExitSelector = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".exit.selector", 1054296417Sdim &F, &*BBInsertLocation); 1055283625Sdim RRI.PseudoExit = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".pseudo.exit", &F, 1056296417Sdim &*BBInsertLocation); 1057283625Sdim 1058283625Sdim BranchInst *PreheaderJump = cast<BranchInst>(&*Preheader->rbegin()); 1059283625Sdim bool Increasing = LS.IndVarIncreasing; 1060283625Sdim 1061283625Sdim IRBuilder<> B(PreheaderJump); 1062283625Sdim 1063283625Sdim // EnterLoopCond - is it okay to start executing this `LS'? 1064283625Sdim Value *EnterLoopCond = Increasing 1065283625Sdim ? B.CreateICmpSLT(LS.IndVarStart, ExitSubloopAt) 1066283625Sdim : B.CreateICmpSGT(LS.IndVarStart, ExitSubloopAt); 1067283625Sdim 1068283625Sdim B.CreateCondBr(EnterLoopCond, LS.Header, RRI.PseudoExit); 1069283625Sdim PreheaderJump->eraseFromParent(); 1070283625Sdim 1071283625Sdim LS.LatchBr->setSuccessor(LS.LatchBrExitIdx, RRI.ExitSelector); 1072283625Sdim B.SetInsertPoint(LS.LatchBr); 1073283625Sdim Value *TakeBackedgeLoopCond = 1074283625Sdim Increasing ? B.CreateICmpSLT(LS.IndVarNext, ExitSubloopAt) 1075283625Sdim : B.CreateICmpSGT(LS.IndVarNext, ExitSubloopAt); 1076283625Sdim Value *CondForBranch = LS.LatchBrExitIdx == 1 1077283625Sdim ? TakeBackedgeLoopCond 1078283625Sdim : B.CreateNot(TakeBackedgeLoopCond); 1079283625Sdim 1080283625Sdim LS.LatchBr->setCondition(CondForBranch); 1081283625Sdim 1082283625Sdim B.SetInsertPoint(RRI.ExitSelector); 1083283625Sdim 1084283625Sdim // IterationsLeft - are there any more iterations left, given the original 1085283625Sdim // upper bound on the induction variable? If not, we branch to the "real" 1086283625Sdim // exit. 1087283625Sdim Value *IterationsLeft = Increasing 1088283625Sdim ? B.CreateICmpSLT(LS.IndVarNext, LS.LoopExitAt) 1089283625Sdim : B.CreateICmpSGT(LS.IndVarNext, LS.LoopExitAt); 1090283625Sdim B.CreateCondBr(IterationsLeft, RRI.PseudoExit, LS.LatchExit); 1091283625Sdim 1092283625Sdim BranchInst *BranchToContinuation = 1093283625Sdim BranchInst::Create(ContinuationBlock, RRI.PseudoExit); 1094283625Sdim 1095283625Sdim // We emit PHI nodes into `RRI.PseudoExit' that compute the "latest" value of 1096283625Sdim // each of the PHI nodes in the loop header. This feeds into the initial 1097283625Sdim // value of the same PHI nodes if/when we continue execution. 1098283625Sdim for (Instruction &I : *LS.Header) { 1099283625Sdim if (!isa<PHINode>(&I)) 1100283625Sdim break; 1101283625Sdim 1102283625Sdim PHINode *PN = cast<PHINode>(&I); 1103283625Sdim 1104283625Sdim PHINode *NewPHI = PHINode::Create(PN->getType(), 2, PN->getName() + ".copy", 1105283625Sdim BranchToContinuation); 1106283625Sdim 1107283625Sdim NewPHI->addIncoming(PN->getIncomingValueForBlock(Preheader), Preheader); 1108283625Sdim NewPHI->addIncoming(PN->getIncomingValueForBlock(LS.Latch), 1109283625Sdim RRI.ExitSelector); 1110283625Sdim RRI.PHIValuesAtPseudoExit.push_back(NewPHI); 1111283625Sdim } 1112283625Sdim 1113283625Sdim RRI.IndVarEnd = PHINode::Create(LS.IndVarNext->getType(), 2, "indvar.end", 1114283625Sdim BranchToContinuation); 1115283625Sdim RRI.IndVarEnd->addIncoming(LS.IndVarStart, Preheader); 1116283625Sdim RRI.IndVarEnd->addIncoming(LS.IndVarNext, RRI.ExitSelector); 1117283625Sdim 1118283625Sdim // The latch exit now has a branch from `RRI.ExitSelector' instead of 1119283625Sdim // `LS.Latch'. The PHI nodes need to be updated to reflect that. 1120283625Sdim for (Instruction &I : *LS.LatchExit) { 1121283625Sdim if (PHINode *PN = dyn_cast<PHINode>(&I)) 1122283625Sdim replacePHIBlock(PN, LS.Latch, RRI.ExitSelector); 1123283625Sdim else 1124283625Sdim break; 1125283625Sdim } 1126283625Sdim 1127283625Sdim return RRI; 1128283625Sdim} 1129283625Sdim 1130283625Sdimvoid LoopConstrainer::rewriteIncomingValuesForPHIs( 1131283625Sdim LoopStructure &LS, BasicBlock *ContinuationBlock, 1132283625Sdim const LoopConstrainer::RewrittenRangeInfo &RRI) const { 1133283625Sdim 1134283625Sdim unsigned PHIIndex = 0; 1135283625Sdim for (Instruction &I : *LS.Header) { 1136283625Sdim if (!isa<PHINode>(&I)) 1137283625Sdim break; 1138283625Sdim 1139283625Sdim PHINode *PN = cast<PHINode>(&I); 1140283625Sdim 1141283625Sdim for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) 1142283625Sdim if (PN->getIncomingBlock(i) == ContinuationBlock) 1143283625Sdim PN->setIncomingValue(i, RRI.PHIValuesAtPseudoExit[PHIIndex++]); 1144283625Sdim } 1145283625Sdim 1146283625Sdim LS.IndVarStart = RRI.IndVarEnd; 1147283625Sdim} 1148283625Sdim 1149283625SdimBasicBlock *LoopConstrainer::createPreheader(const LoopStructure &LS, 1150283625Sdim BasicBlock *OldPreheader, 1151283625Sdim const char *Tag) const { 1152283625Sdim 1153283625Sdim BasicBlock *Preheader = BasicBlock::Create(Ctx, Tag, &F, LS.Header); 1154283625Sdim BranchInst::Create(LS.Header, Preheader); 1155283625Sdim 1156283625Sdim for (Instruction &I : *LS.Header) { 1157283625Sdim if (!isa<PHINode>(&I)) 1158283625Sdim break; 1159283625Sdim 1160283625Sdim PHINode *PN = cast<PHINode>(&I); 1161283625Sdim for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) 1162283625Sdim replacePHIBlock(PN, OldPreheader, Preheader); 1163283625Sdim } 1164283625Sdim 1165283625Sdim return Preheader; 1166283625Sdim} 1167283625Sdim 1168283625Sdimvoid LoopConstrainer::addToParentLoopIfNeeded(ArrayRef<BasicBlock *> BBs) { 1169283625Sdim Loop *ParentLoop = OriginalLoop.getParentLoop(); 1170283625Sdim if (!ParentLoop) 1171283625Sdim return; 1172283625Sdim 1173283625Sdim for (BasicBlock *BB : BBs) 1174283625Sdim ParentLoop->addBasicBlockToLoop(BB, OriginalLoopInfo); 1175283625Sdim} 1176283625Sdim 1177283625Sdimbool LoopConstrainer::run() { 1178283625Sdim BasicBlock *Preheader = nullptr; 1179283625Sdim LatchTakenCount = SE.getExitCount(&OriginalLoop, MainLoopStructure.Latch); 1180283625Sdim Preheader = OriginalLoop.getLoopPreheader(); 1181283625Sdim assert(!isa<SCEVCouldNotCompute>(LatchTakenCount) && Preheader != nullptr && 1182283625Sdim "preconditions!"); 1183283625Sdim 1184283625Sdim OriginalPreheader = Preheader; 1185283625Sdim MainLoopPreheader = Preheader; 1186283625Sdim 1187283625Sdim Optional<SubRanges> MaybeSR = calculateSubRanges(); 1188283625Sdim if (!MaybeSR.hasValue()) { 1189283625Sdim DEBUG(dbgs() << "irce: could not compute subranges\n"); 1190283625Sdim return false; 1191283625Sdim } 1192283625Sdim 1193283625Sdim SubRanges SR = MaybeSR.getValue(); 1194283625Sdim bool Increasing = MainLoopStructure.IndVarIncreasing; 1195283625Sdim IntegerType *IVTy = 1196283625Sdim cast<IntegerType>(MainLoopStructure.IndVarNext->getType()); 1197283625Sdim 1198283625Sdim SCEVExpander Expander(SE, F.getParent()->getDataLayout(), "irce"); 1199283625Sdim Instruction *InsertPt = OriginalPreheader->getTerminator(); 1200283625Sdim 1201283625Sdim // It would have been better to make `PreLoop' and `PostLoop' 1202283625Sdim // `Optional<ClonedLoop>'s, but `ValueToValueMapTy' does not have a copy 1203283625Sdim // constructor. 1204283625Sdim ClonedLoop PreLoop, PostLoop; 1205283625Sdim bool NeedsPreLoop = 1206283625Sdim Increasing ? SR.LowLimit.hasValue() : SR.HighLimit.hasValue(); 1207283625Sdim bool NeedsPostLoop = 1208283625Sdim Increasing ? SR.HighLimit.hasValue() : SR.LowLimit.hasValue(); 1209283625Sdim 1210283625Sdim Value *ExitPreLoopAt = nullptr; 1211283625Sdim Value *ExitMainLoopAt = nullptr; 1212283625Sdim const SCEVConstant *MinusOneS = 1213283625Sdim cast<SCEVConstant>(SE.getConstant(IVTy, -1, true /* isSigned */)); 1214283625Sdim 1215283625Sdim if (NeedsPreLoop) { 1216283625Sdim const SCEV *ExitPreLoopAtSCEV = nullptr; 1217283625Sdim 1218283625Sdim if (Increasing) 1219283625Sdim ExitPreLoopAtSCEV = *SR.LowLimit; 1220283625Sdim else { 1221283625Sdim if (CanBeSMin(SE, *SR.HighLimit)) { 1222283625Sdim DEBUG(dbgs() << "irce: could not prove no-overflow when computing " 1223283625Sdim << "preloop exit limit. HighLimit = " << *(*SR.HighLimit) 1224283625Sdim << "\n"); 1225283625Sdim return false; 1226283625Sdim } 1227283625Sdim ExitPreLoopAtSCEV = SE.getAddExpr(*SR.HighLimit, MinusOneS); 1228283625Sdim } 1229283625Sdim 1230283625Sdim ExitPreLoopAt = Expander.expandCodeFor(ExitPreLoopAtSCEV, IVTy, InsertPt); 1231283625Sdim ExitPreLoopAt->setName("exit.preloop.at"); 1232283625Sdim } 1233283625Sdim 1234283625Sdim if (NeedsPostLoop) { 1235283625Sdim const SCEV *ExitMainLoopAtSCEV = nullptr; 1236283625Sdim 1237283625Sdim if (Increasing) 1238283625Sdim ExitMainLoopAtSCEV = *SR.HighLimit; 1239283625Sdim else { 1240283625Sdim if (CanBeSMin(SE, *SR.LowLimit)) { 1241283625Sdim DEBUG(dbgs() << "irce: could not prove no-overflow when computing " 1242283625Sdim << "mainloop exit limit. LowLimit = " << *(*SR.LowLimit) 1243283625Sdim << "\n"); 1244283625Sdim return false; 1245283625Sdim } 1246283625Sdim ExitMainLoopAtSCEV = SE.getAddExpr(*SR.LowLimit, MinusOneS); 1247283625Sdim } 1248283625Sdim 1249283625Sdim ExitMainLoopAt = Expander.expandCodeFor(ExitMainLoopAtSCEV, IVTy, InsertPt); 1250283625Sdim ExitMainLoopAt->setName("exit.mainloop.at"); 1251283625Sdim } 1252283625Sdim 1253283625Sdim // We clone these ahead of time so that we don't have to deal with changing 1254283625Sdim // and temporarily invalid IR as we transform the loops. 1255283625Sdim if (NeedsPreLoop) 1256283625Sdim cloneLoop(PreLoop, "preloop"); 1257283625Sdim if (NeedsPostLoop) 1258283625Sdim cloneLoop(PostLoop, "postloop"); 1259283625Sdim 1260283625Sdim RewrittenRangeInfo PreLoopRRI; 1261283625Sdim 1262283625Sdim if (NeedsPreLoop) { 1263283625Sdim Preheader->getTerminator()->replaceUsesOfWith(MainLoopStructure.Header, 1264283625Sdim PreLoop.Structure.Header); 1265283625Sdim 1266283625Sdim MainLoopPreheader = 1267283625Sdim createPreheader(MainLoopStructure, Preheader, "mainloop"); 1268283625Sdim PreLoopRRI = changeIterationSpaceEnd(PreLoop.Structure, Preheader, 1269283625Sdim ExitPreLoopAt, MainLoopPreheader); 1270283625Sdim rewriteIncomingValuesForPHIs(MainLoopStructure, MainLoopPreheader, 1271283625Sdim PreLoopRRI); 1272283625Sdim } 1273283625Sdim 1274283625Sdim BasicBlock *PostLoopPreheader = nullptr; 1275283625Sdim RewrittenRangeInfo PostLoopRRI; 1276283625Sdim 1277283625Sdim if (NeedsPostLoop) { 1278283625Sdim PostLoopPreheader = 1279283625Sdim createPreheader(PostLoop.Structure, Preheader, "postloop"); 1280283625Sdim PostLoopRRI = changeIterationSpaceEnd(MainLoopStructure, MainLoopPreheader, 1281283625Sdim ExitMainLoopAt, PostLoopPreheader); 1282283625Sdim rewriteIncomingValuesForPHIs(PostLoop.Structure, PostLoopPreheader, 1283283625Sdim PostLoopRRI); 1284283625Sdim } 1285283625Sdim 1286283625Sdim BasicBlock *NewMainLoopPreheader = 1287283625Sdim MainLoopPreheader != Preheader ? MainLoopPreheader : nullptr; 1288283625Sdim BasicBlock *NewBlocks[] = {PostLoopPreheader, PreLoopRRI.PseudoExit, 1289283625Sdim PreLoopRRI.ExitSelector, PostLoopRRI.PseudoExit, 1290283625Sdim PostLoopRRI.ExitSelector, NewMainLoopPreheader}; 1291283625Sdim 1292283625Sdim // Some of the above may be nullptr, filter them out before passing to 1293283625Sdim // addToParentLoopIfNeeded. 1294283625Sdim auto NewBlocksEnd = 1295283625Sdim std::remove(std::begin(NewBlocks), std::end(NewBlocks), nullptr); 1296283625Sdim 1297283625Sdim addToParentLoopIfNeeded(makeArrayRef(std::begin(NewBlocks), NewBlocksEnd)); 1298283625Sdim addToParentLoopIfNeeded(PreLoop.Blocks); 1299283625Sdim addToParentLoopIfNeeded(PostLoop.Blocks); 1300283625Sdim 1301283625Sdim return true; 1302283625Sdim} 1303283625Sdim 1304283625Sdim/// Computes and returns a range of values for the induction variable (IndVar) 1305283625Sdim/// in which the range check can be safely elided. If it cannot compute such a 1306283625Sdim/// range, returns None. 1307283625SdimOptional<InductiveRangeCheck::Range> 1308283625SdimInductiveRangeCheck::computeSafeIterationSpace(ScalarEvolution &SE, 1309283625Sdim const SCEVAddRecExpr *IndVar, 1310283625Sdim IRBuilder<> &) const { 1311283625Sdim // IndVar is of the form "A + B * I" (where "I" is the canonical induction 1312283625Sdim // variable, that may or may not exist as a real llvm::Value in the loop) and 1313283625Sdim // this inductive range check is a range check on the "C + D * I" ("C" is 1314283625Sdim // getOffset() and "D" is getScale()). We rewrite the value being range 1315283625Sdim // checked to "M + N * IndVar" where "N" = "D * B^(-1)" and "M" = "C - NA". 1316283625Sdim // Currently we support this only for "B" = "D" = { 1 or -1 }, but the code 1317283625Sdim // can be generalized as needed. 1318283625Sdim // 1319283625Sdim // The actual inequalities we solve are of the form 1320283625Sdim // 1321283625Sdim // 0 <= M + 1 * IndVar < L given L >= 0 (i.e. N == 1) 1322283625Sdim // 1323283625Sdim // The inequality is satisfied by -M <= IndVar < (L - M) [^1]. All additions 1324283625Sdim // and subtractions are twos-complement wrapping and comparisons are signed. 1325283625Sdim // 1326283625Sdim // Proof: 1327283625Sdim // 1328283625Sdim // If there exists IndVar such that -M <= IndVar < (L - M) then it follows 1329283625Sdim // that -M <= (-M + L) [== Eq. 1]. Since L >= 0, if (-M + L) sign-overflows 1330283625Sdim // then (-M + L) < (-M). Hence by [Eq. 1], (-M + L) could not have 1331283625Sdim // overflown. 1332283625Sdim // 1333283625Sdim // This means IndVar = t + (-M) for t in [0, L). Hence (IndVar + M) = t. 1334283625Sdim // Hence 0 <= (IndVar + M) < L 1335283625Sdim 1336283625Sdim // [^1]: Note that the solution does _not_ apply if L < 0; consider values M = 1337283625Sdim // 127, IndVar = 126 and L = -2 in an i8 world. 1338283625Sdim 1339283625Sdim if (!IndVar->isAffine()) 1340283625Sdim return None; 1341283625Sdim 1342283625Sdim const SCEV *A = IndVar->getStart(); 1343283625Sdim const SCEVConstant *B = dyn_cast<SCEVConstant>(IndVar->getStepRecurrence(SE)); 1344283625Sdim if (!B) 1345283625Sdim return None; 1346283625Sdim 1347283625Sdim const SCEV *C = getOffset(); 1348283625Sdim const SCEVConstant *D = dyn_cast<SCEVConstant>(getScale()); 1349283625Sdim if (D != B) 1350283625Sdim return None; 1351283625Sdim 1352283625Sdim ConstantInt *ConstD = D->getValue(); 1353283625Sdim if (!(ConstD->isMinusOne() || ConstD->isOne())) 1354283625Sdim return None; 1355283625Sdim 1356283625Sdim const SCEV *M = SE.getMinusSCEV(C, A); 1357283625Sdim 1358283625Sdim const SCEV *Begin = SE.getNegativeSCEV(M); 1359283625Sdim const SCEV *UpperLimit = nullptr; 1360283625Sdim 1361283625Sdim // We strengthen "0 <= I" to "0 <= I < INT_SMAX" and "I < L" to "0 <= I < L". 1362283625Sdim // We can potentially do much better here. 1363283625Sdim if (Value *V = getLength()) { 1364283625Sdim UpperLimit = SE.getSCEV(V); 1365283625Sdim } else { 1366283625Sdim assert(Kind == InductiveRangeCheck::RANGE_CHECK_LOWER && "invariant!"); 1367283625Sdim unsigned BitWidth = cast<IntegerType>(IndVar->getType())->getBitWidth(); 1368283625Sdim UpperLimit = SE.getConstant(APInt::getSignedMaxValue(BitWidth)); 1369283625Sdim } 1370283625Sdim 1371283625Sdim const SCEV *End = SE.getMinusSCEV(UpperLimit, M); 1372283625Sdim return InductiveRangeCheck::Range(Begin, End); 1373283625Sdim} 1374283625Sdim 1375283625Sdimstatic Optional<InductiveRangeCheck::Range> 1376283625SdimIntersectRange(ScalarEvolution &SE, 1377283625Sdim const Optional<InductiveRangeCheck::Range> &R1, 1378283625Sdim const InductiveRangeCheck::Range &R2, IRBuilder<> &B) { 1379283625Sdim if (!R1.hasValue()) 1380283625Sdim return R2; 1381283625Sdim auto &R1Value = R1.getValue(); 1382283625Sdim 1383283625Sdim // TODO: we could widen the smaller range and have this work; but for now we 1384283625Sdim // bail out to keep things simple. 1385283625Sdim if (R1Value.getType() != R2.getType()) 1386283625Sdim return None; 1387283625Sdim 1388283625Sdim const SCEV *NewBegin = SE.getSMaxExpr(R1Value.getBegin(), R2.getBegin()); 1389283625Sdim const SCEV *NewEnd = SE.getSMinExpr(R1Value.getEnd(), R2.getEnd()); 1390283625Sdim 1391283625Sdim return InductiveRangeCheck::Range(NewBegin, NewEnd); 1392283625Sdim} 1393283625Sdim 1394283625Sdimbool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) { 1395283625Sdim if (L->getBlocks().size() >= LoopSizeCutoff) { 1396283625Sdim DEBUG(dbgs() << "irce: giving up constraining loop, too large\n";); 1397283625Sdim return false; 1398283625Sdim } 1399283625Sdim 1400283625Sdim BasicBlock *Preheader = L->getLoopPreheader(); 1401283625Sdim if (!Preheader) { 1402283625Sdim DEBUG(dbgs() << "irce: loop has no preheader, leaving\n"); 1403283625Sdim return false; 1404283625Sdim } 1405283625Sdim 1406283625Sdim LLVMContext &Context = Preheader->getContext(); 1407283625Sdim InductiveRangeCheck::AllocatorTy IRCAlloc; 1408283625Sdim SmallVector<InductiveRangeCheck *, 16> RangeChecks; 1409296417Sdim ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 1410296417Sdim BranchProbabilityInfo &BPI = 1411296417Sdim getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI(); 1412283625Sdim 1413283625Sdim for (auto BBI : L->getBlocks()) 1414283625Sdim if (BranchInst *TBI = dyn_cast<BranchInst>(BBI->getTerminator())) 1415283625Sdim if (InductiveRangeCheck *IRC = 1416283625Sdim InductiveRangeCheck::create(IRCAlloc, TBI, L, SE, BPI)) 1417283625Sdim RangeChecks.push_back(IRC); 1418283625Sdim 1419283625Sdim if (RangeChecks.empty()) 1420283625Sdim return false; 1421283625Sdim 1422283625Sdim auto PrintRecognizedRangeChecks = [&](raw_ostream &OS) { 1423283625Sdim OS << "irce: looking at loop "; L->print(OS); 1424283625Sdim OS << "irce: loop has " << RangeChecks.size() 1425283625Sdim << " inductive range checks: \n"; 1426283625Sdim for (InductiveRangeCheck *IRC : RangeChecks) 1427283625Sdim IRC->print(OS); 1428283625Sdim }; 1429283625Sdim 1430283625Sdim DEBUG(PrintRecognizedRangeChecks(dbgs())); 1431283625Sdim 1432283625Sdim if (PrintRangeChecks) 1433283625Sdim PrintRecognizedRangeChecks(errs()); 1434283625Sdim 1435283625Sdim const char *FailureReason = nullptr; 1436283625Sdim Optional<LoopStructure> MaybeLoopStructure = 1437283625Sdim LoopStructure::parseLoopStructure(SE, BPI, *L, FailureReason); 1438283625Sdim if (!MaybeLoopStructure.hasValue()) { 1439283625Sdim DEBUG(dbgs() << "irce: could not parse loop structure: " << FailureReason 1440283625Sdim << "\n";); 1441283625Sdim return false; 1442283625Sdim } 1443283625Sdim LoopStructure LS = MaybeLoopStructure.getValue(); 1444283625Sdim bool Increasing = LS.IndVarIncreasing; 1445283625Sdim const SCEV *MinusOne = 1446283625Sdim SE.getConstant(LS.IndVarNext->getType(), Increasing ? -1 : 1, true); 1447283625Sdim const SCEVAddRecExpr *IndVar = 1448283625Sdim cast<SCEVAddRecExpr>(SE.getAddExpr(SE.getSCEV(LS.IndVarNext), MinusOne)); 1449283625Sdim 1450283625Sdim Optional<InductiveRangeCheck::Range> SafeIterRange; 1451283625Sdim Instruction *ExprInsertPt = Preheader->getTerminator(); 1452283625Sdim 1453283625Sdim SmallVector<InductiveRangeCheck *, 4> RangeChecksToEliminate; 1454283625Sdim 1455283625Sdim IRBuilder<> B(ExprInsertPt); 1456283625Sdim for (InductiveRangeCheck *IRC : RangeChecks) { 1457283625Sdim auto Result = IRC->computeSafeIterationSpace(SE, IndVar, B); 1458283625Sdim if (Result.hasValue()) { 1459283625Sdim auto MaybeSafeIterRange = 1460283625Sdim IntersectRange(SE, SafeIterRange, Result.getValue(), B); 1461283625Sdim if (MaybeSafeIterRange.hasValue()) { 1462283625Sdim RangeChecksToEliminate.push_back(IRC); 1463283625Sdim SafeIterRange = MaybeSafeIterRange.getValue(); 1464283625Sdim } 1465283625Sdim } 1466283625Sdim } 1467283625Sdim 1468283625Sdim if (!SafeIterRange.hasValue()) 1469283625Sdim return false; 1470283625Sdim 1471283625Sdim LoopConstrainer LC(*L, getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), LS, 1472283625Sdim SE, SafeIterRange.getValue()); 1473283625Sdim bool Changed = LC.run(); 1474283625Sdim 1475283625Sdim if (Changed) { 1476283625Sdim auto PrintConstrainedLoopInfo = [L]() { 1477283625Sdim dbgs() << "irce: in function "; 1478283625Sdim dbgs() << L->getHeader()->getParent()->getName() << ": "; 1479283625Sdim dbgs() << "constrained "; 1480283625Sdim L->print(dbgs()); 1481283625Sdim }; 1482283625Sdim 1483283625Sdim DEBUG(PrintConstrainedLoopInfo()); 1484283625Sdim 1485283625Sdim if (PrintChangedLoops) 1486283625Sdim PrintConstrainedLoopInfo(); 1487283625Sdim 1488283625Sdim // Optimize away the now-redundant range checks. 1489283625Sdim 1490283625Sdim for (InductiveRangeCheck *IRC : RangeChecksToEliminate) { 1491283625Sdim ConstantInt *FoldedRangeCheck = IRC->getPassingDirection() 1492283625Sdim ? ConstantInt::getTrue(Context) 1493283625Sdim : ConstantInt::getFalse(Context); 1494283625Sdim IRC->getBranch()->setCondition(FoldedRangeCheck); 1495283625Sdim } 1496283625Sdim } 1497283625Sdim 1498283625Sdim return Changed; 1499283625Sdim} 1500283625Sdim 1501283625SdimPass *llvm::createInductiveRangeCheckEliminationPass() { 1502283625Sdim return new InductiveRangeCheckElimination; 1503283625Sdim} 1504