1280031Sdim//===- ScalarEvolution.cpp - Scalar Evolution Analysis --------------------===// 2193323Sed// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6193323Sed// 7193323Sed//===----------------------------------------------------------------------===// 8193323Sed// 9193323Sed// This file contains the implementation of the scalar evolution analysis 10193323Sed// engine, which is used primarily to analyze expressions involving induction 11193323Sed// variables in loops. 12193323Sed// 13193323Sed// There are several aspects to this library. First is the representation of 14193323Sed// scalar expressions, which are represented as subclasses of the SCEV class. 15193323Sed// These classes are used to represent certain types of subexpressions that we 16198090Srdivacky// can handle. We only create one SCEV of a particular shape, so 17198090Srdivacky// pointer-comparisons for equality are legal. 18193323Sed// 19193323Sed// One important aspect of the SCEV objects is that they are never cyclic, even 20193323Sed// if there is a cycle in the dataflow for an expression (ie, a PHI node). If 21193323Sed// the PHI node is one of the idioms that we can represent (e.g., a polynomial 22193323Sed// recurrence) then we represent it directly as a recurrence node, otherwise we 23193323Sed// represent it as a SCEVUnknown node. 24193323Sed// 25193323Sed// In addition to being able to represent expressions of various types, we also 26193323Sed// have folders that are used to build the *canonical* representation for a 27193323Sed// particular expression. These folders are capable of using a variety of 28193323Sed// rewrite rules to simplify the expressions. 29193323Sed// 30193323Sed// Once the folders are defined, we can implement the more interesting 31193323Sed// higher-level code, such as the code that recognizes PHI nodes of various 32193323Sed// types, computes the execution count of a loop, etc. 33193323Sed// 34193323Sed// TODO: We should use these routines and value representations to implement 35193323Sed// dependence analysis! 36193323Sed// 37193323Sed//===----------------------------------------------------------------------===// 38193323Sed// 39193323Sed// There are several good references for the techniques used in this analysis. 40193323Sed// 41193323Sed// Chains of recurrences -- a method to expedite the evaluation 42193323Sed// of closed-form functions 43193323Sed// Olaf Bachmann, Paul S. Wang, Eugene V. Zima 44193323Sed// 45193323Sed// On computational properties of chains of recurrences 46193323Sed// Eugene V. Zima 47193323Sed// 48193323Sed// Symbolic Evaluation of Chains of Recurrences for Loop Optimization 49193323Sed// Robert A. van Engelen 50193323Sed// 51193323Sed// Efficient Symbolic Analysis for Optimizing Compilers 52193323Sed// Robert A. van Engelen 53193323Sed// 54193323Sed// Using the chains of recurrences algebra for data dependence testing and 55193323Sed// induction variable substitution 56193323Sed// MS Thesis, Johnie Birch 57193323Sed// 58193323Sed//===----------------------------------------------------------------------===// 59193323Sed 60249423Sdim#include "llvm/Analysis/ScalarEvolution.h" 61327952Sdim#include "llvm/ADT/APInt.h" 62327952Sdim#include "llvm/ADT/ArrayRef.h" 63327952Sdim#include "llvm/ADT/DenseMap.h" 64327952Sdim#include "llvm/ADT/DepthFirstIterator.h" 65327952Sdim#include "llvm/ADT/EquivalenceClasses.h" 66327952Sdim#include "llvm/ADT/FoldingSet.h" 67327952Sdim#include "llvm/ADT/None.h" 68280031Sdim#include "llvm/ADT/Optional.h" 69249423Sdim#include "llvm/ADT/STLExtras.h" 70314564Sdim#include "llvm/ADT/ScopeExit.h" 71314564Sdim#include "llvm/ADT/Sequence.h" 72327952Sdim#include "llvm/ADT/SetVector.h" 73249423Sdim#include "llvm/ADT/SmallPtrSet.h" 74327952Sdim#include "llvm/ADT/SmallSet.h" 75327952Sdim#include "llvm/ADT/SmallVector.h" 76249423Sdim#include "llvm/ADT/Statistic.h" 77327952Sdim#include "llvm/ADT/StringRef.h" 78280031Sdim#include "llvm/Analysis/AssumptionCache.h" 79193323Sed#include "llvm/Analysis/ConstantFolding.h" 80218893Sdim#include "llvm/Analysis/InstructionSimplify.h" 81193323Sed#include "llvm/Analysis/LoopInfo.h" 82249423Sdim#include "llvm/Analysis/ScalarEvolutionExpressions.h" 83288943Sdim#include "llvm/Analysis/TargetLibraryInfo.h" 84194612Sed#include "llvm/Analysis/ValueTracking.h" 85341825Sdim#include "llvm/Config/llvm-config.h" 86327952Sdim#include "llvm/IR/Argument.h" 87327952Sdim#include "llvm/IR/BasicBlock.h" 88327952Sdim#include "llvm/IR/CFG.h" 89327952Sdim#include "llvm/IR/CallSite.h" 90327952Sdim#include "llvm/IR/Constant.h" 91276479Sdim#include "llvm/IR/ConstantRange.h" 92249423Sdim#include "llvm/IR/Constants.h" 93249423Sdim#include "llvm/IR/DataLayout.h" 94249423Sdim#include "llvm/IR/DerivedTypes.h" 95276479Sdim#include "llvm/IR/Dominators.h" 96327952Sdim#include "llvm/IR/Function.h" 97249423Sdim#include "llvm/IR/GlobalAlias.h" 98327952Sdim#include "llvm/IR/GlobalValue.h" 99249423Sdim#include "llvm/IR/GlobalVariable.h" 100276479Sdim#include "llvm/IR/InstIterator.h" 101327952Sdim#include "llvm/IR/InstrTypes.h" 102327952Sdim#include "llvm/IR/Instruction.h" 103249423Sdim#include "llvm/IR/Instructions.h" 104327952Sdim#include "llvm/IR/IntrinsicInst.h" 105327952Sdim#include "llvm/IR/Intrinsics.h" 106249423Sdim#include "llvm/IR/LLVMContext.h" 107280031Sdim#include "llvm/IR/Metadata.h" 108249423Sdim#include "llvm/IR/Operator.h" 109296417Sdim#include "llvm/IR/PatternMatch.h" 110327952Sdim#include "llvm/IR/Type.h" 111327952Sdim#include "llvm/IR/Use.h" 112327952Sdim#include "llvm/IR/User.h" 113327952Sdim#include "llvm/IR/Value.h" 114344779Sdim#include "llvm/IR/Verifier.h" 115360784Sdim#include "llvm/InitializePasses.h" 116327952Sdim#include "llvm/Pass.h" 117327952Sdim#include "llvm/Support/Casting.h" 118193323Sed#include "llvm/Support/CommandLine.h" 119327952Sdim#include "llvm/Support/Compiler.h" 120201360Srdivacky#include "llvm/Support/Debug.h" 121198090Srdivacky#include "llvm/Support/ErrorHandling.h" 122321369Sdim#include "llvm/Support/KnownBits.h" 123321369Sdim#include "llvm/Support/SaveAndRestore.h" 124193323Sed#include "llvm/Support/raw_ostream.h" 125193323Sed#include <algorithm> 126327952Sdim#include <cassert> 127327952Sdim#include <climits> 128327952Sdim#include <cstddef> 129327952Sdim#include <cstdint> 130327952Sdim#include <cstdlib> 131327952Sdim#include <map> 132327952Sdim#include <memory> 133327952Sdim#include <tuple> 134327952Sdim#include <utility> 135327952Sdim#include <vector> 136327952Sdim 137193323Sedusing namespace llvm; 138193323Sed 139276479Sdim#define DEBUG_TYPE "scalar-evolution" 140276479Sdim 141193323SedSTATISTIC(NumArrayLenItCounts, 142193323Sed "Number of trip counts computed with array length"); 143193323SedSTATISTIC(NumTripCountsComputed, 144193323Sed "Number of loops with predictable loop counts"); 145193323SedSTATISTIC(NumTripCountsNotComputed, 146193323Sed "Number of loops without predictable loop counts"); 147193323SedSTATISTIC(NumBruteForceTripCountsComputed, 148193323Sed "Number of loops with trip counts computed by force"); 149193323Sed 150193323Sedstatic cl::opt<unsigned> 151193323SedMaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden, 152360784Sdim cl::ZeroOrMore, 153193323Sed cl::desc("Maximum number of iterations SCEV will " 154195098Sed "symbolically execute a constant " 155195098Sed "derived loop"), 156193323Sed cl::init(100)); 157193323Sed 158309124Sdim// FIXME: Enable this with EXPENSIVE_CHECKS when the test suite is clean. 159327952Sdimstatic cl::opt<bool> VerifySCEV( 160327952Sdim "verify-scev", cl::Hidden, 161327952Sdim cl::desc("Verify ScalarEvolution's backedge taken counts (slow)")); 162360784Sdimstatic cl::opt<bool> VerifySCEVStrict( 163360784Sdim "verify-scev-strict", cl::Hidden, 164360784Sdim cl::desc("Enable stricter verification with -verify-scev is passed")); 165243830Sdimstatic cl::opt<bool> 166327952Sdim VerifySCEVMap("verify-scev-maps", cl::Hidden, 167309124Sdim cl::desc("Verify no dangling value in ScalarEvolution's " 168309124Sdim "ExprValueMap (slow)")); 169243830Sdim 170344779Sdimstatic cl::opt<bool> VerifyIR( 171344779Sdim "scev-verify-ir", cl::Hidden, 172344779Sdim cl::desc("Verify IR correctness when making sensitive SCEV queries (slow)"), 173344779Sdim cl::init(false)); 174344779Sdim 175314564Sdimstatic cl::opt<unsigned> MulOpsInlineThreshold( 176314564Sdim "scev-mulops-inline-threshold", cl::Hidden, 177314564Sdim cl::desc("Threshold for inlining multiplication operands into a SCEV"), 178321369Sdim cl::init(32)); 179314564Sdim 180321369Sdimstatic cl::opt<unsigned> AddOpsInlineThreshold( 181321369Sdim "scev-addops-inline-threshold", cl::Hidden, 182321369Sdim cl::desc("Threshold for inlining addition operands into a SCEV"), 183321369Sdim cl::init(500)); 184321369Sdim 185314795Sdimstatic cl::opt<unsigned> MaxSCEVCompareDepth( 186314795Sdim "scalar-evolution-max-scev-compare-depth", cl::Hidden, 187314795Sdim cl::desc("Maximum depth of recursive SCEV complexity comparisons"), 188314795Sdim cl::init(32)); 189314795Sdim 190321369Sdimstatic cl::opt<unsigned> MaxSCEVOperationsImplicationDepth( 191321369Sdim "scalar-evolution-max-scev-operations-implication-depth", cl::Hidden, 192321369Sdim cl::desc("Maximum depth of recursive SCEV operations implication analysis"), 193321369Sdim cl::init(2)); 194321369Sdim 195314795Sdimstatic cl::opt<unsigned> MaxValueCompareDepth( 196314795Sdim "scalar-evolution-max-value-compare-depth", cl::Hidden, 197314795Sdim cl::desc("Maximum depth of recursive value complexity comparisons"), 198314795Sdim cl::init(2)); 199314795Sdim 200321369Sdimstatic cl::opt<unsigned> 201321369Sdim MaxArithDepth("scalar-evolution-max-arith-depth", cl::Hidden, 202321369Sdim cl::desc("Maximum depth of recursive arithmetics"), 203321369Sdim cl::init(32)); 204321369Sdim 205321369Sdimstatic cl::opt<unsigned> MaxConstantEvolvingDepth( 206321369Sdim "scalar-evolution-max-constant-evolving-depth", cl::Hidden, 207321369Sdim cl::desc("Maximum depth of recursive constant evolving"), cl::init(32)); 208321369Sdim 209321369Sdimstatic cl::opt<unsigned> 210353358Sdim MaxCastDepth("scalar-evolution-max-cast-depth", cl::Hidden, 211353358Sdim cl::desc("Maximum depth of recursive SExt/ZExt/Trunc"), 212353358Sdim cl::init(8)); 213321369Sdim 214322740Sdimstatic cl::opt<unsigned> 215322740Sdim MaxAddRecSize("scalar-evolution-max-add-rec-size", cl::Hidden, 216322740Sdim cl::desc("Max coefficients in AddRec during evolving"), 217344779Sdim cl::init(8)); 218322740Sdim 219353358Sdimstatic cl::opt<unsigned> 220353358Sdim HugeExprThreshold("scalar-evolution-huge-expr-threshold", cl::Hidden, 221353358Sdim cl::desc("Size of the expression which is considered huge"), 222353358Sdim cl::init(4096)); 223353358Sdim 224360784Sdimstatic cl::opt<bool> 225360784SdimClassifyExpressions("scalar-evolution-classify-expressions", 226360784Sdim cl::Hidden, cl::init(true), 227360784Sdim cl::desc("When printing analysis, include information on every instruction")); 228360784Sdim 229360784Sdim 230193323Sed//===----------------------------------------------------------------------===// 231193323Sed// SCEV class definitions 232193323Sed//===----------------------------------------------------------------------===// 233193323Sed 234193323Sed//===----------------------------------------------------------------------===// 235193323Sed// Implementation of the SCEV class. 236193323Sed// 237195340Sed 238321369Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 239321369SdimLLVM_DUMP_METHOD void SCEV::dump() const { 240201360Srdivacky print(dbgs()); 241201360Srdivacky dbgs() << '\n'; 242193323Sed} 243321369Sdim#endif 244193323Sed 245218893Sdimvoid SCEV::print(raw_ostream &OS) const { 246276479Sdim switch (static_cast<SCEVTypes>(getSCEVType())) { 247218893Sdim case scConstant: 248276479Sdim cast<SCEVConstant>(this)->getValue()->printAsOperand(OS, false); 249218893Sdim return; 250218893Sdim case scTruncate: { 251218893Sdim const SCEVTruncateExpr *Trunc = cast<SCEVTruncateExpr>(this); 252218893Sdim const SCEV *Op = Trunc->getOperand(); 253218893Sdim OS << "(trunc " << *Op->getType() << " " << *Op << " to " 254218893Sdim << *Trunc->getType() << ")"; 255218893Sdim return; 256218893Sdim } 257218893Sdim case scZeroExtend: { 258218893Sdim const SCEVZeroExtendExpr *ZExt = cast<SCEVZeroExtendExpr>(this); 259218893Sdim const SCEV *Op = ZExt->getOperand(); 260218893Sdim OS << "(zext " << *Op->getType() << " " << *Op << " to " 261218893Sdim << *ZExt->getType() << ")"; 262218893Sdim return; 263218893Sdim } 264218893Sdim case scSignExtend: { 265218893Sdim const SCEVSignExtendExpr *SExt = cast<SCEVSignExtendExpr>(this); 266218893Sdim const SCEV *Op = SExt->getOperand(); 267218893Sdim OS << "(sext " << *Op->getType() << " " << *Op << " to " 268218893Sdim << *SExt->getType() << ")"; 269218893Sdim return; 270218893Sdim } 271218893Sdim case scAddRecExpr: { 272218893Sdim const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(this); 273218893Sdim OS << "{" << *AR->getOperand(0); 274218893Sdim for (unsigned i = 1, e = AR->getNumOperands(); i != e; ++i) 275218893Sdim OS << ",+," << *AR->getOperand(i); 276218893Sdim OS << "}<"; 277309124Sdim if (AR->hasNoUnsignedWrap()) 278218893Sdim OS << "nuw><"; 279309124Sdim if (AR->hasNoSignedWrap()) 280218893Sdim OS << "nsw><"; 281309124Sdim if (AR->hasNoSelfWrap() && 282221345Sdim !AR->getNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW))) 283221345Sdim OS << "nw><"; 284276479Sdim AR->getLoop()->getHeader()->printAsOperand(OS, /*PrintType=*/false); 285218893Sdim OS << ">"; 286218893Sdim return; 287218893Sdim } 288218893Sdim case scAddExpr: 289218893Sdim case scMulExpr: 290218893Sdim case scUMaxExpr: 291353358Sdim case scSMaxExpr: 292353358Sdim case scUMinExpr: 293353358Sdim case scSMinExpr: { 294218893Sdim const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this); 295276479Sdim const char *OpStr = nullptr; 296218893Sdim switch (NAry->getSCEVType()) { 297218893Sdim case scAddExpr: OpStr = " + "; break; 298218893Sdim case scMulExpr: OpStr = " * "; break; 299218893Sdim case scUMaxExpr: OpStr = " umax "; break; 300218893Sdim case scSMaxExpr: OpStr = " smax "; break; 301353358Sdim case scUMinExpr: 302353358Sdim OpStr = " umin "; 303353358Sdim break; 304353358Sdim case scSMinExpr: 305353358Sdim OpStr = " smin "; 306353358Sdim break; 307218893Sdim } 308218893Sdim OS << "("; 309218893Sdim for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); 310218893Sdim I != E; ++I) { 311218893Sdim OS << **I; 312276479Sdim if (std::next(I) != E) 313218893Sdim OS << OpStr; 314218893Sdim } 315218893Sdim OS << ")"; 316234353Sdim switch (NAry->getSCEVType()) { 317234353Sdim case scAddExpr: 318234353Sdim case scMulExpr: 319309124Sdim if (NAry->hasNoUnsignedWrap()) 320234353Sdim OS << "<nuw>"; 321309124Sdim if (NAry->hasNoSignedWrap()) 322234353Sdim OS << "<nsw>"; 323234353Sdim } 324218893Sdim return; 325218893Sdim } 326218893Sdim case scUDivExpr: { 327218893Sdim const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(this); 328218893Sdim OS << "(" << *UDiv->getLHS() << " /u " << *UDiv->getRHS() << ")"; 329218893Sdim return; 330218893Sdim } 331218893Sdim case scUnknown: { 332218893Sdim const SCEVUnknown *U = cast<SCEVUnknown>(this); 333226633Sdim Type *AllocTy; 334218893Sdim if (U->isSizeOf(AllocTy)) { 335218893Sdim OS << "sizeof(" << *AllocTy << ")"; 336218893Sdim return; 337218893Sdim } 338218893Sdim if (U->isAlignOf(AllocTy)) { 339218893Sdim OS << "alignof(" << *AllocTy << ")"; 340218893Sdim return; 341218893Sdim } 342221345Sdim 343226633Sdim Type *CTy; 344218893Sdim Constant *FieldNo; 345218893Sdim if (U->isOffsetOf(CTy, FieldNo)) { 346218893Sdim OS << "offsetof(" << *CTy << ", "; 347276479Sdim FieldNo->printAsOperand(OS, false); 348218893Sdim OS << ")"; 349218893Sdim return; 350218893Sdim } 351221345Sdim 352218893Sdim // Otherwise just print it normally. 353276479Sdim U->getValue()->printAsOperand(OS, false); 354218893Sdim return; 355218893Sdim } 356218893Sdim case scCouldNotCompute: 357218893Sdim OS << "***COULDNOTCOMPUTE***"; 358218893Sdim return; 359218893Sdim } 360218893Sdim llvm_unreachable("Unknown SCEV kind!"); 361218893Sdim} 362218893Sdim 363226633SdimType *SCEV::getType() const { 364276479Sdim switch (static_cast<SCEVTypes>(getSCEVType())) { 365218893Sdim case scConstant: 366218893Sdim return cast<SCEVConstant>(this)->getType(); 367218893Sdim case scTruncate: 368218893Sdim case scZeroExtend: 369218893Sdim case scSignExtend: 370218893Sdim return cast<SCEVCastExpr>(this)->getType(); 371218893Sdim case scAddRecExpr: 372218893Sdim case scMulExpr: 373218893Sdim case scUMaxExpr: 374218893Sdim case scSMaxExpr: 375353358Sdim case scUMinExpr: 376353358Sdim case scSMinExpr: 377218893Sdim return cast<SCEVNAryExpr>(this)->getType(); 378218893Sdim case scAddExpr: 379218893Sdim return cast<SCEVAddExpr>(this)->getType(); 380218893Sdim case scUDivExpr: 381218893Sdim return cast<SCEVUDivExpr>(this)->getType(); 382218893Sdim case scUnknown: 383218893Sdim return cast<SCEVUnknown>(this)->getType(); 384218893Sdim case scCouldNotCompute: 385218893Sdim llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); 386218893Sdim } 387276479Sdim llvm_unreachable("Unknown SCEV kind!"); 388218893Sdim} 389218893Sdim 390193323Sedbool SCEV::isZero() const { 391193323Sed if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this)) 392193323Sed return SC->getValue()->isZero(); 393193323Sed return false; 394193323Sed} 395193323Sed 396193323Sedbool SCEV::isOne() const { 397193323Sed if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this)) 398193323Sed return SC->getValue()->isOne(); 399193323Sed return false; 400193323Sed} 401193323Sed 402195098Sedbool SCEV::isAllOnesValue() const { 403195098Sed if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(this)) 404321369Sdim return SC->getValue()->isMinusOne(); 405195098Sed return false; 406195098Sed} 407195098Sed 408234353Sdimbool SCEV::isNonConstantNegative() const { 409234353Sdim const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(this); 410234353Sdim if (!Mul) return false; 411234353Sdim 412234353Sdim // If there is a constant factor, it will be first. 413234353Sdim const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0)); 414234353Sdim if (!SC) return false; 415234353Sdim 416234353Sdim // Return true if the value is negative, this matches things like (-42 * V). 417296417Sdim return SC->getAPInt().isNegative(); 418234353Sdim} 419234353Sdim 420194710SedSCEVCouldNotCompute::SCEVCouldNotCompute() : 421353358Sdim SCEV(FoldingSetNodeIDRef(), scCouldNotCompute, 0) {} 422193323Sed 423193323Sedbool SCEVCouldNotCompute::classof(const SCEV *S) { 424193323Sed return S->getSCEVType() == scCouldNotCompute; 425193323Sed} 426193323Sed 427198090Srdivackyconst SCEV *ScalarEvolution::getConstant(ConstantInt *V) { 428195340Sed FoldingSetNodeID ID; 429195340Sed ID.AddInteger(scConstant); 430195340Sed ID.AddPointer(V); 431276479Sdim void *IP = nullptr; 432195340Sed if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; 433205407Srdivacky SCEV *S = new (SCEVAllocator) SCEVConstant(ID.Intern(SCEVAllocator), V); 434195340Sed UniqueSCEVs.InsertNode(S, IP); 435195340Sed return S; 436193323Sed} 437193323Sed 438276479Sdimconst SCEV *ScalarEvolution::getConstant(const APInt &Val) { 439198090Srdivacky return getConstant(ConstantInt::get(getContext(), Val)); 440193323Sed} 441193323Sed 442198090Srdivackyconst SCEV * 443226633SdimScalarEvolution::getConstant(Type *Ty, uint64_t V, bool isSigned) { 444226633Sdim IntegerType *ITy = cast<IntegerType>(getEffectiveSCEVType(Ty)); 445207618Srdivacky return getConstant(ConstantInt::get(ITy, V, isSigned)); 446194612Sed} 447194612Sed 448205407SrdivackySCEVCastExpr::SCEVCastExpr(const FoldingSetNodeIDRef ID, 449226633Sdim unsigned SCEVTy, const SCEV *op, Type *ty) 450353358Sdim : SCEV(ID, SCEVTy, computeExpressionSize(op)), Op(op), Ty(ty) {} 451193323Sed 452205407SrdivackySCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeIDRef ID, 453226633Sdim const SCEV *op, Type *ty) 454198090Srdivacky : SCEVCastExpr(ID, scTruncate, op, ty) { 455341825Sdim assert(Op->getType()->isIntOrPtrTy() && Ty->isIntOrPtrTy() && 456193323Sed "Cannot truncate non-integer value!"); 457193323Sed} 458193323Sed 459205407SrdivackySCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeIDRef ID, 460226633Sdim const SCEV *op, Type *ty) 461198090Srdivacky : SCEVCastExpr(ID, scZeroExtend, op, ty) { 462341825Sdim assert(Op->getType()->isIntOrPtrTy() && Ty->isIntOrPtrTy() && 463193323Sed "Cannot zero extend non-integer value!"); 464193323Sed} 465193323Sed 466205407SrdivackySCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeIDRef ID, 467226633Sdim const SCEV *op, Type *ty) 468198090Srdivacky : SCEVCastExpr(ID, scSignExtend, op, ty) { 469341825Sdim assert(Op->getType()->isIntOrPtrTy() && Ty->isIntOrPtrTy() && 470193323Sed "Cannot sign extend non-integer value!"); 471193323Sed} 472193323Sed 473212904Sdimvoid SCEVUnknown::deleted() { 474218893Sdim // Clear this SCEVUnknown from various maps. 475218893Sdim SE->forgetMemoizedResults(this); 476212904Sdim 477212904Sdim // Remove this SCEVUnknown from the uniquing map. 478212904Sdim SE->UniqueSCEVs.RemoveNode(this); 479212904Sdim 480212904Sdim // Release the value. 481276479Sdim setValPtr(nullptr); 482212904Sdim} 483212904Sdim 484212904Sdimvoid SCEVUnknown::allUsesReplacedWith(Value *New) { 485212904Sdim // Remove this SCEVUnknown from the uniquing map. 486212904Sdim SE->UniqueSCEVs.RemoveNode(this); 487212904Sdim 488212904Sdim // Update this SCEVUnknown to point to the new value. This is needed 489212904Sdim // because there may still be outstanding SCEVs which still point to 490212904Sdim // this SCEVUnknown. 491212904Sdim setValPtr(New); 492212904Sdim} 493212904Sdim 494226633Sdimbool SCEVUnknown::isSizeOf(Type *&AllocTy) const { 495212904Sdim if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue())) 496203954Srdivacky if (VCE->getOpcode() == Instruction::PtrToInt) 497203954Srdivacky if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0))) 498203954Srdivacky if (CE->getOpcode() == Instruction::GetElementPtr && 499203954Srdivacky CE->getOperand(0)->isNullValue() && 500203954Srdivacky CE->getNumOperands() == 2) 501203954Srdivacky if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(1))) 502203954Srdivacky if (CI->isOne()) { 503203954Srdivacky AllocTy = cast<PointerType>(CE->getOperand(0)->getType()) 504203954Srdivacky ->getElementType(); 505203954Srdivacky return true; 506203954Srdivacky } 507203954Srdivacky 508203954Srdivacky return false; 509203954Srdivacky} 510203954Srdivacky 511226633Sdimbool SCEVUnknown::isAlignOf(Type *&AllocTy) const { 512212904Sdim if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue())) 513203954Srdivacky if (VCE->getOpcode() == Instruction::PtrToInt) 514203954Srdivacky if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0))) 515203954Srdivacky if (CE->getOpcode() == Instruction::GetElementPtr && 516203954Srdivacky CE->getOperand(0)->isNullValue()) { 517226633Sdim Type *Ty = 518203954Srdivacky cast<PointerType>(CE->getOperand(0)->getType())->getElementType(); 519226633Sdim if (StructType *STy = dyn_cast<StructType>(Ty)) 520203954Srdivacky if (!STy->isPacked() && 521203954Srdivacky CE->getNumOperands() == 3 && 522203954Srdivacky CE->getOperand(1)->isNullValue()) { 523203954Srdivacky if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(2))) 524203954Srdivacky if (CI->isOne() && 525203954Srdivacky STy->getNumElements() == 2 && 526203954Srdivacky STy->getElementType(0)->isIntegerTy(1)) { 527203954Srdivacky AllocTy = STy->getElementType(1); 528203954Srdivacky return true; 529203954Srdivacky } 530203954Srdivacky } 531203954Srdivacky } 532203954Srdivacky 533203954Srdivacky return false; 534203954Srdivacky} 535203954Srdivacky 536226633Sdimbool SCEVUnknown::isOffsetOf(Type *&CTy, Constant *&FieldNo) const { 537212904Sdim if (ConstantExpr *VCE = dyn_cast<ConstantExpr>(getValue())) 538203954Srdivacky if (VCE->getOpcode() == Instruction::PtrToInt) 539203954Srdivacky if (ConstantExpr *CE = dyn_cast<ConstantExpr>(VCE->getOperand(0))) 540203954Srdivacky if (CE->getOpcode() == Instruction::GetElementPtr && 541203954Srdivacky CE->getNumOperands() == 3 && 542203954Srdivacky CE->getOperand(0)->isNullValue() && 543203954Srdivacky CE->getOperand(1)->isNullValue()) { 544226633Sdim Type *Ty = 545203954Srdivacky cast<PointerType>(CE->getOperand(0)->getType())->getElementType(); 546203954Srdivacky // Ignore vector types here so that ScalarEvolutionExpander doesn't 547203954Srdivacky // emit getelementptrs that index into vectors. 548204642Srdivacky if (Ty->isStructTy() || Ty->isArrayTy()) { 549203954Srdivacky CTy = Ty; 550203954Srdivacky FieldNo = CE->getOperand(2); 551203954Srdivacky return true; 552203954Srdivacky } 553203954Srdivacky } 554203954Srdivacky 555203954Srdivacky return false; 556203954Srdivacky} 557203954Srdivacky 558193323Sed//===----------------------------------------------------------------------===// 559193323Sed// SCEV Utilities 560193323Sed//===----------------------------------------------------------------------===// 561193323Sed 562314564Sdim/// Compare the two values \p LV and \p RV in terms of their "complexity" where 563314564Sdim/// "complexity" is a partial (and somewhat ad-hoc) relation used to order 564314564Sdim/// operands in SCEV expressions. \p EqCache is a set of pairs of values that 565314564Sdim/// have been previously deemed to be "equally complex" by this routine. It is 566314564Sdim/// intended to avoid exponential time complexity in cases like: 567314564Sdim/// 568314564Sdim/// %a = f(%x, %y) 569314564Sdim/// %b = f(%a, %a) 570314564Sdim/// %c = f(%b, %b) 571314564Sdim/// 572314564Sdim/// %d = f(%x, %y) 573314564Sdim/// %e = f(%d, %d) 574314564Sdim/// %f = f(%e, %e) 575314564Sdim/// 576314564Sdim/// CompareValueComplexity(%f, %c) 577314564Sdim/// 578314564Sdim/// Since we do not continue running this routine on expression trees once we 579314564Sdim/// have seen unequal values, there is no need to track them in the cache. 580314564Sdimstatic int 581327952SdimCompareValueComplexity(EquivalenceClasses<const Value *> &EqCacheValue, 582314564Sdim const LoopInfo *const LI, Value *LV, Value *RV, 583314795Sdim unsigned Depth) { 584327952Sdim if (Depth > MaxValueCompareDepth || EqCacheValue.isEquivalent(LV, RV)) 585314564Sdim return 0; 586193323Sed 587314564Sdim // Order pointer values after integer values. This helps SCEVExpander form 588314564Sdim // GEPs. 589314564Sdim bool LIsPointer = LV->getType()->isPointerTy(), 590314564Sdim RIsPointer = RV->getType()->isPointerTy(); 591314564Sdim if (LIsPointer != RIsPointer) 592314564Sdim return (int)LIsPointer - (int)RIsPointer; 593314564Sdim 594314564Sdim // Compare getValueID values. 595314564Sdim unsigned LID = LV->getValueID(), RID = RV->getValueID(); 596314564Sdim if (LID != RID) 597314564Sdim return (int)LID - (int)RID; 598314564Sdim 599314564Sdim // Sort arguments by their position. 600314564Sdim if (const auto *LA = dyn_cast<Argument>(LV)) { 601314564Sdim const auto *RA = cast<Argument>(RV); 602314564Sdim unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo(); 603314564Sdim return (int)LArgNo - (int)RArgNo; 604296417Sdim } 605212904Sdim 606314564Sdim if (const auto *LGV = dyn_cast<GlobalValue>(LV)) { 607314564Sdim const auto *RGV = cast<GlobalValue>(RV); 608198090Srdivacky 609314564Sdim const auto IsGVNameSemantic = [&](const GlobalValue *GV) { 610314564Sdim auto LT = GV->getLinkage(); 611314564Sdim return !(GlobalValue::isPrivateLinkage(LT) || 612314564Sdim GlobalValue::isInternalLinkage(LT)); 613314564Sdim }; 614193323Sed 615314564Sdim // Use the names to distinguish the two values, but only if the 616314564Sdim // names are semantically important. 617314564Sdim if (IsGVNameSemantic(LGV) && IsGVNameSemantic(RGV)) 618314564Sdim return LGV->getName().compare(RGV->getName()); 619314564Sdim } 620193323Sed 621314564Sdim // For instructions, compare their loop depth, and their operand count. This 622314564Sdim // is pretty loose. 623314564Sdim if (const auto *LInst = dyn_cast<Instruction>(LV)) { 624314564Sdim const auto *RInst = cast<Instruction>(RV); 625212904Sdim 626314564Sdim // Compare loop depths. 627314564Sdim const BasicBlock *LParent = LInst->getParent(), 628314564Sdim *RParent = RInst->getParent(); 629314564Sdim if (LParent != RParent) { 630314564Sdim unsigned LDepth = LI->getLoopDepth(LParent), 631314564Sdim RDepth = LI->getLoopDepth(RParent); 632314564Sdim if (LDepth != RDepth) 633314564Sdim return (int)LDepth - (int)RDepth; 634314564Sdim } 635193323Sed 636314564Sdim // Compare the number of operands. 637314564Sdim unsigned LNumOps = LInst->getNumOperands(), 638314564Sdim RNumOps = RInst->getNumOperands(); 639314564Sdim if (LNumOps != RNumOps) 640314564Sdim return (int)LNumOps - (int)RNumOps; 641193323Sed 642314564Sdim for (unsigned Idx : seq(0u, LNumOps)) { 643314564Sdim int Result = 644327952Sdim CompareValueComplexity(EqCacheValue, LI, LInst->getOperand(Idx), 645314795Sdim RInst->getOperand(Idx), Depth + 1); 646314564Sdim if (Result != 0) 647314564Sdim return Result; 648314564Sdim } 649314564Sdim } 650193323Sed 651327952Sdim EqCacheValue.unionSets(LV, RV); 652314564Sdim return 0; 653314564Sdim} 654193323Sed 655314564Sdim// Return negative, zero, or positive, if LHS is less than, equal to, or greater 656314564Sdim// than RHS, respectively. A three-way result allows recursive comparisons to be 657314564Sdim// more efficient. 658314795Sdimstatic int CompareSCEVComplexity( 659327952Sdim EquivalenceClasses<const SCEV *> &EqCacheSCEV, 660327952Sdim EquivalenceClasses<const Value *> &EqCacheValue, 661314795Sdim const LoopInfo *const LI, const SCEV *LHS, const SCEV *RHS, 662321369Sdim DominatorTree &DT, unsigned Depth = 0) { 663314564Sdim // Fast-path: SCEVs are uniqued so we can do a quick equality check. 664314564Sdim if (LHS == RHS) 665314564Sdim return 0; 666193323Sed 667314564Sdim // Primarily, sort the SCEVs by their getSCEVType(). 668314564Sdim unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType(); 669314564Sdim if (LType != RType) 670314564Sdim return (int)LType - (int)RType; 671193323Sed 672327952Sdim if (Depth > MaxSCEVCompareDepth || EqCacheSCEV.isEquivalent(LHS, RHS)) 673314795Sdim return 0; 674314564Sdim // Aside from the getSCEVType() ordering, the particular ordering 675314564Sdim // isn't very important except that it's beneficial to be consistent, 676314564Sdim // so that (a + b) and (b + a) don't end up as different expressions. 677314564Sdim switch (static_cast<SCEVTypes>(LType)) { 678314564Sdim case scUnknown: { 679314564Sdim const SCEVUnknown *LU = cast<SCEVUnknown>(LHS); 680314564Sdim const SCEVUnknown *RU = cast<SCEVUnknown>(RHS); 681212904Sdim 682327952Sdim int X = CompareValueComplexity(EqCacheValue, LI, LU->getValue(), 683327952Sdim RU->getValue(), Depth + 1); 684314795Sdim if (X == 0) 685327952Sdim EqCacheSCEV.unionSets(LHS, RHS); 686314795Sdim return X; 687314564Sdim } 688193323Sed 689314564Sdim case scConstant: { 690314564Sdim const SCEVConstant *LC = cast<SCEVConstant>(LHS); 691314564Sdim const SCEVConstant *RC = cast<SCEVConstant>(RHS); 692212904Sdim 693314564Sdim // Compare constant values. 694314564Sdim const APInt &LA = LC->getAPInt(); 695314564Sdim const APInt &RA = RC->getAPInt(); 696314564Sdim unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth(); 697314564Sdim if (LBitWidth != RBitWidth) 698314564Sdim return (int)LBitWidth - (int)RBitWidth; 699314564Sdim return LA.ult(RA) ? -1 : 1; 700314564Sdim } 701212904Sdim 702314564Sdim case scAddRecExpr: { 703314564Sdim const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS); 704314564Sdim const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS); 705212904Sdim 706321369Sdim // There is always a dominance between two recs that are used by one SCEV, 707321369Sdim // so we can safely sort recs by loop header dominance. We require such 708321369Sdim // order in getAddExpr. 709314564Sdim const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop(); 710314564Sdim if (LLoop != RLoop) { 711321369Sdim const BasicBlock *LHead = LLoop->getHeader(), *RHead = RLoop->getHeader(); 712321369Sdim assert(LHead != RHead && "Two loops share the same header?"); 713321369Sdim if (DT.dominates(LHead, RHead)) 714321369Sdim return 1; 715321369Sdim else 716321369Sdim assert(DT.dominates(RHead, LHead) && 717321369Sdim "No dominance between recurrences used by one SCEV?"); 718321369Sdim return -1; 719314564Sdim } 720212904Sdim 721314564Sdim // Addrec complexity grows with operand count. 722314564Sdim unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands(); 723314564Sdim if (LNumOps != RNumOps) 724314564Sdim return (int)LNumOps - (int)RNumOps; 725194612Sed 726314564Sdim // Lexicographically compare. 727314564Sdim for (unsigned i = 0; i != LNumOps; ++i) { 728327952Sdim int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, 729327952Sdim LA->getOperand(i), RA->getOperand(i), DT, 730327952Sdim Depth + 1); 731314564Sdim if (X != 0) 732314564Sdim return X; 733296417Sdim } 734327952Sdim EqCacheSCEV.unionSets(LHS, RHS); 735314564Sdim return 0; 736314564Sdim } 737212904Sdim 738314564Sdim case scAddExpr: 739314564Sdim case scMulExpr: 740314564Sdim case scSMaxExpr: 741353358Sdim case scUMaxExpr: 742353358Sdim case scSMinExpr: 743353358Sdim case scUMinExpr: { 744314564Sdim const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS); 745314564Sdim const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS); 746261991Sdim 747314564Sdim // Lexicographically compare n-ary expressions. 748314564Sdim unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands(); 749314564Sdim if (LNumOps != RNumOps) 750296417Sdim return (int)LNumOps - (int)RNumOps; 751193323Sed 752314564Sdim for (unsigned i = 0; i != LNumOps; ++i) { 753327952Sdim int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, 754327952Sdim LC->getOperand(i), RC->getOperand(i), DT, 755327952Sdim Depth + 1); 756296417Sdim if (X != 0) 757296417Sdim return X; 758296417Sdim } 759327952Sdim EqCacheSCEV.unionSets(LHS, RHS); 760314795Sdim return 0; 761314564Sdim } 762193323Sed 763314564Sdim case scUDivExpr: { 764314564Sdim const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS); 765314564Sdim const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS); 766296417Sdim 767314564Sdim // Lexicographically compare udiv expressions. 768327952Sdim int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LC->getLHS(), 769327952Sdim RC->getLHS(), DT, Depth + 1); 770314564Sdim if (X != 0) 771314564Sdim return X; 772327952Sdim X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LC->getRHS(), 773327952Sdim RC->getRHS(), DT, Depth + 1); 774314795Sdim if (X == 0) 775327952Sdim EqCacheSCEV.unionSets(LHS, RHS); 776314795Sdim return X; 777314564Sdim } 778193323Sed 779314564Sdim case scTruncate: 780314564Sdim case scZeroExtend: 781314564Sdim case scSignExtend: { 782314564Sdim const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS); 783314564Sdim const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS); 784314564Sdim 785314564Sdim // Compare cast expressions by operand. 786327952Sdim int X = CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, 787327952Sdim LC->getOperand(), RC->getOperand(), DT, 788327952Sdim Depth + 1); 789314795Sdim if (X == 0) 790327952Sdim EqCacheSCEV.unionSets(LHS, RHS); 791314795Sdim return X; 792296417Sdim } 793296417Sdim 794314564Sdim case scCouldNotCompute: 795314564Sdim llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); 796314564Sdim } 797314564Sdim llvm_unreachable("Unknown SCEV kind!"); 798314564Sdim} 799314564Sdim 800309124Sdim/// Given a list of SCEV objects, order them by their complexity, and group 801309124Sdim/// objects of the same complexity together by value. When this routine is 802309124Sdim/// finished, we know that any duplicates in the vector are consecutive and that 803309124Sdim/// complexity is monotonically increasing. 804193323Sed/// 805204642Srdivacky/// Note that we go take special precautions to ensure that we get deterministic 806193323Sed/// results from this routine. In other words, we don't want the results of 807193323Sed/// this to depend on where the addresses of various SCEV objects happened to 808193323Sed/// land in memory. 809198090Srdivackystatic void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops, 810321369Sdim LoopInfo *LI, DominatorTree &DT) { 811193323Sed if (Ops.size() < 2) return; // Noop 812314795Sdim 813327952Sdim EquivalenceClasses<const SCEV *> EqCacheSCEV; 814327952Sdim EquivalenceClasses<const Value *> EqCacheValue; 815193323Sed if (Ops.size() == 2) { 816193323Sed // This is the common case, which also happens to be trivially simple. 817193323Sed // Special case it. 818212904Sdim const SCEV *&LHS = Ops[0], *&RHS = Ops[1]; 819327952Sdim if (CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, RHS, LHS, DT) < 0) 820212904Sdim std::swap(LHS, RHS); 821193323Sed return; 822193323Sed } 823193323Sed 824193323Sed // Do the rough sort by complexity. 825353358Sdim llvm::stable_sort(Ops, [&](const SCEV *LHS, const SCEV *RHS) { 826353358Sdim return CompareSCEVComplexity(EqCacheSCEV, EqCacheValue, LI, LHS, RHS, DT) < 827353358Sdim 0; 828353358Sdim }); 829193323Sed 830193323Sed // Now that we are sorted by complexity, group elements of the same 831193323Sed // complexity. Note that this is, at worst, N^2, but the vector is likely to 832193323Sed // be extremely short in practice. Note that we take this approach because we 833193323Sed // do not want to depend on the addresses of the objects we are grouping. 834193323Sed for (unsigned i = 0, e = Ops.size(); i != e-2; ++i) { 835193323Sed const SCEV *S = Ops[i]; 836193323Sed unsigned Complexity = S->getSCEVType(); 837193323Sed 838193323Sed // If there are any objects of the same complexity and same value as this 839193323Sed // one, group them. 840193323Sed for (unsigned j = i+1; j != e && Ops[j]->getSCEVType() == Complexity; ++j) { 841193323Sed if (Ops[j] == S) { // Found a duplicate. 842193323Sed // Move it to immediately after i'th element. 843193323Sed std::swap(Ops[i+1], Ops[j]); 844193323Sed ++i; // no need to rescan it. 845193323Sed if (i == e-2) return; // Done! 846193323Sed } 847193323Sed } 848193323Sed } 849193323Sed} 850193323Sed 851296417Sdim// Returns the size of the SCEV S. 852296417Sdimstatic inline int sizeOfSCEV(const SCEV *S) { 853296417Sdim struct FindSCEVSize { 854327952Sdim int Size = 0; 855193323Sed 856327952Sdim FindSCEVSize() = default; 857327952Sdim 858296417Sdim bool follow(const SCEV *S) { 859296417Sdim ++Size; 860296417Sdim // Keep looking at all operands of S. 861296417Sdim return true; 862296417Sdim } 863327952Sdim 864296417Sdim bool isDone() const { 865296417Sdim return false; 866296417Sdim } 867296417Sdim }; 868193323Sed 869280031Sdim FindSCEVSize F; 870280031Sdim SCEVTraversal<FindSCEVSize> ST(F); 871280031Sdim ST.visitAll(S); 872280031Sdim return F.Size; 873280031Sdim} 874280031Sdim 875353358Sdim/// Returns true if the subtree of \p S contains at least HugeExprThreshold 876353358Sdim/// nodes. 877353358Sdimstatic bool isHugeExpression(const SCEV *S) { 878353358Sdim return S->getExpressionSize() >= HugeExprThreshold; 879353358Sdim} 880353358Sdim 881353358Sdim/// Returns true of \p Ops contains a huge SCEV (see definition above). 882353358Sdimstatic bool hasHugeExpression(ArrayRef<const SCEV *> Ops) { 883353358Sdim return any_of(Ops, isHugeExpression); 884353358Sdim} 885353358Sdim 886280031Sdimnamespace { 887280031Sdim 888280031Sdimstruct SCEVDivision : public SCEVVisitor<SCEVDivision, void> { 889280031Sdimpublic: 890280031Sdim // Computes the Quotient and Remainder of the division of Numerator by 891280031Sdim // Denominator. 892280031Sdim static void divide(ScalarEvolution &SE, const SCEV *Numerator, 893280031Sdim const SCEV *Denominator, const SCEV **Quotient, 894280031Sdim const SCEV **Remainder) { 895280031Sdim assert(Numerator && Denominator && "Uninitialized SCEV"); 896280031Sdim 897280031Sdim SCEVDivision D(SE, Numerator, Denominator); 898280031Sdim 899280031Sdim // Check for the trivial case here to avoid having to check for it in the 900280031Sdim // rest of the code. 901280031Sdim if (Numerator == Denominator) { 902280031Sdim *Quotient = D.One; 903280031Sdim *Remainder = D.Zero; 904280031Sdim return; 905280031Sdim } 906280031Sdim 907280031Sdim if (Numerator->isZero()) { 908280031Sdim *Quotient = D.Zero; 909280031Sdim *Remainder = D.Zero; 910280031Sdim return; 911280031Sdim } 912280031Sdim 913288943Sdim // A simple case when N/1. The quotient is N. 914288943Sdim if (Denominator->isOne()) { 915288943Sdim *Quotient = Numerator; 916288943Sdim *Remainder = D.Zero; 917288943Sdim return; 918288943Sdim } 919288943Sdim 920280031Sdim // Split the Denominator when it is a product. 921309124Sdim if (const SCEVMulExpr *T = dyn_cast<SCEVMulExpr>(Denominator)) { 922280031Sdim const SCEV *Q, *R; 923280031Sdim *Quotient = Numerator; 924280031Sdim for (const SCEV *Op : T->operands()) { 925280031Sdim divide(SE, *Quotient, Op, &Q, &R); 926280031Sdim *Quotient = Q; 927280031Sdim 928280031Sdim // Bail out when the Numerator is not divisible by one of the terms of 929280031Sdim // the Denominator. 930280031Sdim if (!R->isZero()) { 931280031Sdim *Quotient = D.Zero; 932280031Sdim *Remainder = Numerator; 933280031Sdim return; 934280031Sdim } 935280031Sdim } 936280031Sdim *Remainder = D.Zero; 937280031Sdim return; 938280031Sdim } 939280031Sdim 940280031Sdim D.visit(Numerator); 941280031Sdim *Quotient = D.Quotient; 942280031Sdim *Remainder = D.Remainder; 943280031Sdim } 944280031Sdim 945280031Sdim // Except in the trivial case described above, we do not know how to divide 946280031Sdim // Expr by Denominator for the following functions with empty implementation. 947280031Sdim void visitTruncateExpr(const SCEVTruncateExpr *Numerator) {} 948280031Sdim void visitZeroExtendExpr(const SCEVZeroExtendExpr *Numerator) {} 949280031Sdim void visitSignExtendExpr(const SCEVSignExtendExpr *Numerator) {} 950280031Sdim void visitUDivExpr(const SCEVUDivExpr *Numerator) {} 951280031Sdim void visitSMaxExpr(const SCEVSMaxExpr *Numerator) {} 952280031Sdim void visitUMaxExpr(const SCEVUMaxExpr *Numerator) {} 953353358Sdim void visitSMinExpr(const SCEVSMinExpr *Numerator) {} 954353358Sdim void visitUMinExpr(const SCEVUMinExpr *Numerator) {} 955280031Sdim void visitUnknown(const SCEVUnknown *Numerator) {} 956280031Sdim void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {} 957280031Sdim 958280031Sdim void visitConstant(const SCEVConstant *Numerator) { 959280031Sdim if (const SCEVConstant *D = dyn_cast<SCEVConstant>(Denominator)) { 960296417Sdim APInt NumeratorVal = Numerator->getAPInt(); 961296417Sdim APInt DenominatorVal = D->getAPInt(); 962280031Sdim uint32_t NumeratorBW = NumeratorVal.getBitWidth(); 963280031Sdim uint32_t DenominatorBW = DenominatorVal.getBitWidth(); 964280031Sdim 965280031Sdim if (NumeratorBW > DenominatorBW) 966280031Sdim DenominatorVal = DenominatorVal.sext(NumeratorBW); 967280031Sdim else if (NumeratorBW < DenominatorBW) 968280031Sdim NumeratorVal = NumeratorVal.sext(DenominatorBW); 969280031Sdim 970280031Sdim APInt QuotientVal(NumeratorVal.getBitWidth(), 0); 971280031Sdim APInt RemainderVal(NumeratorVal.getBitWidth(), 0); 972280031Sdim APInt::sdivrem(NumeratorVal, DenominatorVal, QuotientVal, RemainderVal); 973280031Sdim Quotient = SE.getConstant(QuotientVal); 974280031Sdim Remainder = SE.getConstant(RemainderVal); 975280031Sdim return; 976280031Sdim } 977280031Sdim } 978280031Sdim 979280031Sdim void visitAddRecExpr(const SCEVAddRecExpr *Numerator) { 980280031Sdim const SCEV *StartQ, *StartR, *StepQ, *StepR; 981296417Sdim if (!Numerator->isAffine()) 982296417Sdim return cannotDivide(Numerator); 983280031Sdim divide(SE, Numerator->getStart(), Denominator, &StartQ, &StartR); 984280031Sdim divide(SE, Numerator->getStepRecurrence(SE), Denominator, &StepQ, &StepR); 985288943Sdim // Bail out if the types do not match. 986288943Sdim Type *Ty = Denominator->getType(); 987288943Sdim if (Ty != StartQ->getType() || Ty != StartR->getType() || 988296417Sdim Ty != StepQ->getType() || Ty != StepR->getType()) 989296417Sdim return cannotDivide(Numerator); 990280031Sdim Quotient = SE.getAddRecExpr(StartQ, StepQ, Numerator->getLoop(), 991280031Sdim Numerator->getNoWrapFlags()); 992280031Sdim Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(), 993280031Sdim Numerator->getNoWrapFlags()); 994280031Sdim } 995280031Sdim 996280031Sdim void visitAddExpr(const SCEVAddExpr *Numerator) { 997280031Sdim SmallVector<const SCEV *, 2> Qs, Rs; 998280031Sdim Type *Ty = Denominator->getType(); 999280031Sdim 1000280031Sdim for (const SCEV *Op : Numerator->operands()) { 1001280031Sdim const SCEV *Q, *R; 1002280031Sdim divide(SE, Op, Denominator, &Q, &R); 1003280031Sdim 1004280031Sdim // Bail out if types do not match. 1005296417Sdim if (Ty != Q->getType() || Ty != R->getType()) 1006296417Sdim return cannotDivide(Numerator); 1007280031Sdim 1008280031Sdim Qs.push_back(Q); 1009280031Sdim Rs.push_back(R); 1010280031Sdim } 1011280031Sdim 1012280031Sdim if (Qs.size() == 1) { 1013280031Sdim Quotient = Qs[0]; 1014280031Sdim Remainder = Rs[0]; 1015280031Sdim return; 1016280031Sdim } 1017280031Sdim 1018280031Sdim Quotient = SE.getAddExpr(Qs); 1019280031Sdim Remainder = SE.getAddExpr(Rs); 1020280031Sdim } 1021280031Sdim 1022280031Sdim void visitMulExpr(const SCEVMulExpr *Numerator) { 1023280031Sdim SmallVector<const SCEV *, 2> Qs; 1024280031Sdim Type *Ty = Denominator->getType(); 1025280031Sdim 1026280031Sdim bool FoundDenominatorTerm = false; 1027280031Sdim for (const SCEV *Op : Numerator->operands()) { 1028280031Sdim // Bail out if types do not match. 1029296417Sdim if (Ty != Op->getType()) 1030296417Sdim return cannotDivide(Numerator); 1031280031Sdim 1032280031Sdim if (FoundDenominatorTerm) { 1033280031Sdim Qs.push_back(Op); 1034280031Sdim continue; 1035280031Sdim } 1036280031Sdim 1037280031Sdim // Check whether Denominator divides one of the product operands. 1038280031Sdim const SCEV *Q, *R; 1039280031Sdim divide(SE, Op, Denominator, &Q, &R); 1040280031Sdim if (!R->isZero()) { 1041280031Sdim Qs.push_back(Op); 1042280031Sdim continue; 1043280031Sdim } 1044280031Sdim 1045280031Sdim // Bail out if types do not match. 1046296417Sdim if (Ty != Q->getType()) 1047296417Sdim return cannotDivide(Numerator); 1048280031Sdim 1049280031Sdim FoundDenominatorTerm = true; 1050280031Sdim Qs.push_back(Q); 1051280031Sdim } 1052280031Sdim 1053280031Sdim if (FoundDenominatorTerm) { 1054280031Sdim Remainder = Zero; 1055280031Sdim if (Qs.size() == 1) 1056280031Sdim Quotient = Qs[0]; 1057280031Sdim else 1058280031Sdim Quotient = SE.getMulExpr(Qs); 1059280031Sdim return; 1060280031Sdim } 1061280031Sdim 1062296417Sdim if (!isa<SCEVUnknown>(Denominator)) 1063296417Sdim return cannotDivide(Numerator); 1064280031Sdim 1065280031Sdim // The Remainder is obtained by replacing Denominator by 0 in Numerator. 1066280031Sdim ValueToValueMap RewriteMap; 1067280031Sdim RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] = 1068280031Sdim cast<SCEVConstant>(Zero)->getValue(); 1069280031Sdim Remainder = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true); 1070280031Sdim 1071280031Sdim if (Remainder->isZero()) { 1072280031Sdim // The Quotient is obtained by replacing Denominator by 1 in Numerator. 1073280031Sdim RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] = 1074280031Sdim cast<SCEVConstant>(One)->getValue(); 1075280031Sdim Quotient = 1076280031Sdim SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true); 1077280031Sdim return; 1078280031Sdim } 1079280031Sdim 1080280031Sdim // Quotient is (Numerator - Remainder) divided by Denominator. 1081280031Sdim const SCEV *Q, *R; 1082280031Sdim const SCEV *Diff = SE.getMinusSCEV(Numerator, Remainder); 1083296417Sdim // This SCEV does not seem to simplify: fail the division here. 1084296417Sdim if (sizeOfSCEV(Diff) > sizeOfSCEV(Numerator)) 1085296417Sdim return cannotDivide(Numerator); 1086280031Sdim divide(SE, Diff, Denominator, &Q, &R); 1087296417Sdim if (R != Zero) 1088296417Sdim return cannotDivide(Numerator); 1089280031Sdim Quotient = Q; 1090280031Sdim } 1091280031Sdim 1092280031Sdimprivate: 1093280031Sdim SCEVDivision(ScalarEvolution &S, const SCEV *Numerator, 1094280031Sdim const SCEV *Denominator) 1095280031Sdim : SE(S), Denominator(Denominator) { 1096296417Sdim Zero = SE.getZero(Denominator->getType()); 1097296417Sdim One = SE.getOne(Denominator->getType()); 1098280031Sdim 1099296417Sdim // We generally do not know how to divide Expr by Denominator. We 1100296417Sdim // initialize the division to a "cannot divide" state to simplify the rest 1101296417Sdim // of the code. 1102296417Sdim cannotDivide(Numerator); 1103296417Sdim } 1104296417Sdim 1105296417Sdim // Convenience function for giving up on the division. We set the quotient to 1106296417Sdim // be equal to zero and the remainder to be equal to the numerator. 1107296417Sdim void cannotDivide(const SCEV *Numerator) { 1108280031Sdim Quotient = Zero; 1109280031Sdim Remainder = Numerator; 1110280031Sdim } 1111280031Sdim 1112280031Sdim ScalarEvolution &SE; 1113280031Sdim const SCEV *Denominator, *Quotient, *Remainder, *Zero, *One; 1114280031Sdim}; 1115280031Sdim 1116327952Sdim} // end anonymous namespace 1117280031Sdim 1118193323Sed//===----------------------------------------------------------------------===// 1119193323Sed// Simple SCEV method implementations 1120193323Sed//===----------------------------------------------------------------------===// 1121193323Sed 1122309124Sdim/// Compute BC(It, K). The result has width W. Assume, K > 0. 1123198090Srdivackystatic const SCEV *BinomialCoefficient(const SCEV *It, unsigned K, 1124198090Srdivacky ScalarEvolution &SE, 1125226633Sdim Type *ResultTy) { 1126193323Sed // Handle the simplest case efficiently. 1127193323Sed if (K == 1) 1128193323Sed return SE.getTruncateOrZeroExtend(It, ResultTy); 1129193323Sed 1130193323Sed // We are using the following formula for BC(It, K): 1131193323Sed // 1132193323Sed // BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / K! 1133193323Sed // 1134193323Sed // Suppose, W is the bitwidth of the return value. We must be prepared for 1135193323Sed // overflow. Hence, we must assure that the result of our computation is 1136193323Sed // equal to the accurate one modulo 2^W. Unfortunately, division isn't 1137193323Sed // safe in modular arithmetic. 1138193323Sed // 1139193323Sed // However, this code doesn't use exactly that formula; the formula it uses 1140195098Sed // is something like the following, where T is the number of factors of 2 in 1141193323Sed // K! (i.e. trailing zeros in the binary representation of K!), and ^ is 1142193323Sed // exponentiation: 1143193323Sed // 1144193323Sed // BC(It, K) = (It * (It - 1) * ... * (It - K + 1)) / 2^T / (K! / 2^T) 1145193323Sed // 1146193323Sed // This formula is trivially equivalent to the previous formula. However, 1147193323Sed // this formula can be implemented much more efficiently. The trick is that 1148193323Sed // K! / 2^T is odd, and exact division by an odd number *is* safe in modular 1149193323Sed // arithmetic. To do exact division in modular arithmetic, all we have 1150193323Sed // to do is multiply by the inverse. Therefore, this step can be done at 1151193323Sed // width W. 1152195098Sed // 1153193323Sed // The next issue is how to safely do the division by 2^T. The way this 1154193323Sed // is done is by doing the multiplication step at a width of at least W + T 1155193323Sed // bits. This way, the bottom W+T bits of the product are accurate. Then, 1156193323Sed // when we perform the division by 2^T (which is equivalent to a right shift 1157193323Sed // by T), the bottom W bits are accurate. Extra bits are okay; they'll get 1158193323Sed // truncated out after the division by 2^T. 1159193323Sed // 1160193323Sed // In comparison to just directly using the first formula, this technique 1161193323Sed // is much more efficient; using the first formula requires W * K bits, 1162193323Sed // but this formula less than W + K bits. Also, the first formula requires 1163193323Sed // a division step, whereas this formula only requires multiplies and shifts. 1164193323Sed // 1165193323Sed // It doesn't matter whether the subtraction step is done in the calculation 1166193323Sed // width or the input iteration count's width; if the subtraction overflows, 1167193323Sed // the result must be zero anyway. We prefer here to do it in the width of 1168193323Sed // the induction variable because it helps a lot for certain cases; CodeGen 1169193323Sed // isn't smart enough to ignore the overflow, which leads to much less 1170193323Sed // efficient code if the width of the subtraction is wider than the native 1171193323Sed // register width. 1172193323Sed // 1173193323Sed // (It's possible to not widen at all by pulling out factors of 2 before 1174193323Sed // the multiplication; for example, K=2 can be calculated as 1175193323Sed // It/2*(It+(It*INT_MIN/INT_MIN)+-1). However, it requires 1176193323Sed // extra arithmetic, so it's not an obvious win, and it gets 1177193323Sed // much more complicated for K > 3.) 1178193323Sed 1179193323Sed // Protection from insane SCEVs; this bound is conservative, 1180193323Sed // but it probably doesn't matter. 1181193323Sed if (K > 1000) 1182193323Sed return SE.getCouldNotCompute(); 1183193323Sed 1184193323Sed unsigned W = SE.getTypeSizeInBits(ResultTy); 1185193323Sed 1186193323Sed // Calculate K! / 2^T and T; we divide out the factors of two before 1187193323Sed // multiplying for calculating K! / 2^T to avoid overflow. 1188193323Sed // Other overflow doesn't matter because we only care about the bottom 1189193323Sed // W bits of the result. 1190193323Sed APInt OddFactorial(W, 1); 1191193323Sed unsigned T = 1; 1192193323Sed for (unsigned i = 3; i <= K; ++i) { 1193193323Sed APInt Mult(W, i); 1194193323Sed unsigned TwoFactors = Mult.countTrailingZeros(); 1195193323Sed T += TwoFactors; 1196321369Sdim Mult.lshrInPlace(TwoFactors); 1197193323Sed OddFactorial *= Mult; 1198193323Sed } 1199193323Sed 1200193323Sed // We need at least W + T bits for the multiplication step 1201193323Sed unsigned CalculationBits = W + T; 1202193323Sed 1203204642Srdivacky // Calculate 2^T, at width T+W. 1204261991Sdim APInt DivFactor = APInt::getOneBitSet(CalculationBits, T); 1205193323Sed 1206193323Sed // Calculate the multiplicative inverse of K! / 2^T; 1207193323Sed // this multiplication factor will perform the exact division by 1208193323Sed // K! / 2^T. 1209193323Sed APInt Mod = APInt::getSignedMinValue(W+1); 1210193323Sed APInt MultiplyFactor = OddFactorial.zext(W+1); 1211193323Sed MultiplyFactor = MultiplyFactor.multiplicativeInverse(Mod); 1212193323Sed MultiplyFactor = MultiplyFactor.trunc(W); 1213193323Sed 1214193323Sed // Calculate the product, at width T+W 1215226633Sdim IntegerType *CalculationTy = IntegerType::get(SE.getContext(), 1216198090Srdivacky CalculationBits); 1217198090Srdivacky const SCEV *Dividend = SE.getTruncateOrZeroExtend(It, CalculationTy); 1218193323Sed for (unsigned i = 1; i != K; ++i) { 1219207618Srdivacky const SCEV *S = SE.getMinusSCEV(It, SE.getConstant(It->getType(), i)); 1220193323Sed Dividend = SE.getMulExpr(Dividend, 1221193323Sed SE.getTruncateOrZeroExtend(S, CalculationTy)); 1222193323Sed } 1223193323Sed 1224193323Sed // Divide by 2^T 1225198090Srdivacky const SCEV *DivResult = SE.getUDivExpr(Dividend, SE.getConstant(DivFactor)); 1226193323Sed 1227193323Sed // Truncate the result, and divide by K! / 2^T. 1228193323Sed 1229193323Sed return SE.getMulExpr(SE.getConstant(MultiplyFactor), 1230193323Sed SE.getTruncateOrZeroExtend(DivResult, ResultTy)); 1231193323Sed} 1232193323Sed 1233309124Sdim/// Return the value of this chain of recurrences at the specified iteration 1234309124Sdim/// number. We can evaluate this recurrence by multiplying each element in the 1235309124Sdim/// chain by the binomial coefficient corresponding to it. In other words, we 1236309124Sdim/// can evaluate {A,+,B,+,C,+,D} as: 1237193323Sed/// 1238193323Sed/// A*BC(It, 0) + B*BC(It, 1) + C*BC(It, 2) + D*BC(It, 3) 1239193323Sed/// 1240193323Sed/// where BC(It, k) stands for binomial coefficient. 1241198090Srdivackyconst SCEV *SCEVAddRecExpr::evaluateAtIteration(const SCEV *It, 1242198090Srdivacky ScalarEvolution &SE) const { 1243198090Srdivacky const SCEV *Result = getStart(); 1244193323Sed for (unsigned i = 1, e = getNumOperands(); i != e; ++i) { 1245193323Sed // The computation is correct in the face of overflow provided that the 1246193323Sed // multiplication is performed _after_ the evaluation of the binomial 1247193323Sed // coefficient. 1248198090Srdivacky const SCEV *Coeff = BinomialCoefficient(It, i, SE, getType()); 1249193323Sed if (isa<SCEVCouldNotCompute>(Coeff)) 1250193323Sed return Coeff; 1251193323Sed 1252193323Sed Result = SE.getAddExpr(Result, SE.getMulExpr(getOperand(i), Coeff)); 1253193323Sed } 1254193323Sed return Result; 1255193323Sed} 1256193323Sed 1257193323Sed//===----------------------------------------------------------------------===// 1258193323Sed// SCEV Expression folder implementations 1259193323Sed//===----------------------------------------------------------------------===// 1260193323Sed 1261353358Sdimconst SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op, Type *Ty, 1262353358Sdim unsigned Depth) { 1263193323Sed assert(getTypeSizeInBits(Op->getType()) > getTypeSizeInBits(Ty) && 1264193323Sed "This is not a truncating conversion!"); 1265193323Sed assert(isSCEVable(Ty) && 1266193323Sed "This is not a conversion to a SCEVable type!"); 1267193323Sed Ty = getEffectiveSCEVType(Ty); 1268193323Sed 1269198090Srdivacky FoldingSetNodeID ID; 1270198090Srdivacky ID.AddInteger(scTruncate); 1271198090Srdivacky ID.AddPointer(Op); 1272198090Srdivacky ID.AddPointer(Ty); 1273276479Sdim void *IP = nullptr; 1274198090Srdivacky if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; 1275198090Srdivacky 1276195340Sed // Fold if the operand is constant. 1277193323Sed if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) 1278195098Sed return getConstant( 1279239462Sdim cast<ConstantInt>(ConstantExpr::getTrunc(SC->getValue(), Ty))); 1280193323Sed 1281193323Sed // trunc(trunc(x)) --> trunc(x) 1282193323Sed if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) 1283353358Sdim return getTruncateExpr(ST->getOperand(), Ty, Depth + 1); 1284193323Sed 1285193323Sed // trunc(sext(x)) --> sext(x) if widening or trunc(x) if narrowing 1286193323Sed if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op)) 1287353358Sdim return getTruncateOrSignExtend(SS->getOperand(), Ty, Depth + 1); 1288193323Sed 1289193323Sed // trunc(zext(x)) --> zext(x) if widening or trunc(x) if narrowing 1290193323Sed if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op)) 1291353358Sdim return getTruncateOrZeroExtend(SZ->getOperand(), Ty, Depth + 1); 1292193323Sed 1293353358Sdim if (Depth > MaxCastDepth) { 1294353358Sdim SCEV *S = 1295353358Sdim new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator), Op, Ty); 1296353358Sdim UniqueSCEVs.InsertNode(S, IP); 1297353358Sdim addToLoopUseLists(S); 1298353358Sdim return S; 1299353358Sdim } 1300353358Sdim 1301341825Sdim // trunc(x1 + ... + xN) --> trunc(x1) + ... + trunc(xN) and 1302341825Sdim // trunc(x1 * ... * xN) --> trunc(x1) * ... * trunc(xN), 1303341825Sdim // if after transforming we have at most one truncate, not counting truncates 1304341825Sdim // that replace other casts. 1305341825Sdim if (isa<SCEVAddExpr>(Op) || isa<SCEVMulExpr>(Op)) { 1306341825Sdim auto *CommOp = cast<SCEVCommutativeExpr>(Op); 1307218893Sdim SmallVector<const SCEV *, 4> Operands; 1308341825Sdim unsigned numTruncs = 0; 1309341825Sdim for (unsigned i = 0, e = CommOp->getNumOperands(); i != e && numTruncs < 2; 1310341825Sdim ++i) { 1311353358Sdim const SCEV *S = getTruncateExpr(CommOp->getOperand(i), Ty, Depth + 1); 1312341825Sdim if (!isa<SCEVCastExpr>(CommOp->getOperand(i)) && isa<SCEVTruncateExpr>(S)) 1313341825Sdim numTruncs++; 1314218893Sdim Operands.push_back(S); 1315218893Sdim } 1316341825Sdim if (numTruncs < 2) { 1317341825Sdim if (isa<SCEVAddExpr>(Op)) 1318341825Sdim return getAddExpr(Operands); 1319341825Sdim else if (isa<SCEVMulExpr>(Op)) 1320341825Sdim return getMulExpr(Operands); 1321341825Sdim else 1322341825Sdim llvm_unreachable("Unexpected SCEV type for Op."); 1323218893Sdim } 1324341825Sdim // Although we checked in the beginning that ID is not in the cache, it is 1325341825Sdim // possible that during recursion and different modification ID was inserted 1326341825Sdim // into the cache. So if we find it, just return it. 1327327952Sdim if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) 1328327952Sdim return S; 1329218893Sdim } 1330218893Sdim 1331194612Sed // If the input value is a chrec scev, truncate the chrec's operands. 1332193323Sed if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) { 1333198090Srdivacky SmallVector<const SCEV *, 4> Operands; 1334296417Sdim for (const SCEV *Op : AddRec->operands()) 1335353358Sdim Operands.push_back(getTruncateExpr(Op, Ty, Depth + 1)); 1336221345Sdim return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap); 1337193323Sed } 1338193323Sed 1339210299Sed // The cast wasn't folded; create an explicit cast node. We can reuse 1340210299Sed // the existing insert position since if we get here, we won't have 1341210299Sed // made any changes which would invalidate it. 1342205407Srdivacky SCEV *S = new (SCEVAllocator) SCEVTruncateExpr(ID.Intern(SCEVAllocator), 1343205407Srdivacky Op, Ty); 1344195340Sed UniqueSCEVs.InsertNode(S, IP); 1345327952Sdim addToLoopUseLists(S); 1346195340Sed return S; 1347193323Sed} 1348193323Sed 1349288943Sdim// Get the limit of a recurrence such that incrementing by Step cannot cause 1350288943Sdim// signed overflow as long as the value of the recurrence within the 1351288943Sdim// loop does not exceed this limit before incrementing. 1352288943Sdimstatic const SCEV *getSignedOverflowLimitForStep(const SCEV *Step, 1353288943Sdim ICmpInst::Predicate *Pred, 1354288943Sdim ScalarEvolution *SE) { 1355288943Sdim unsigned BitWidth = SE->getTypeSizeInBits(Step->getType()); 1356288943Sdim if (SE->isKnownPositive(Step)) { 1357288943Sdim *Pred = ICmpInst::ICMP_SLT; 1358288943Sdim return SE->getConstant(APInt::getSignedMinValue(BitWidth) - 1359321369Sdim SE->getSignedRangeMax(Step)); 1360288943Sdim } 1361288943Sdim if (SE->isKnownNegative(Step)) { 1362288943Sdim *Pred = ICmpInst::ICMP_SGT; 1363288943Sdim return SE->getConstant(APInt::getSignedMaxValue(BitWidth) - 1364321369Sdim SE->getSignedRangeMin(Step)); 1365288943Sdim } 1366288943Sdim return nullptr; 1367288943Sdim} 1368288943Sdim 1369288943Sdim// Get the limit of a recurrence such that incrementing by Step cannot cause 1370288943Sdim// unsigned overflow as long as the value of the recurrence within the loop does 1371288943Sdim// not exceed this limit before incrementing. 1372288943Sdimstatic const SCEV *getUnsignedOverflowLimitForStep(const SCEV *Step, 1373288943Sdim ICmpInst::Predicate *Pred, 1374288943Sdim ScalarEvolution *SE) { 1375288943Sdim unsigned BitWidth = SE->getTypeSizeInBits(Step->getType()); 1376288943Sdim *Pred = ICmpInst::ICMP_ULT; 1377288943Sdim 1378288943Sdim return SE->getConstant(APInt::getMinValue(BitWidth) - 1379321369Sdim SE->getUnsignedRangeMax(Step)); 1380288943Sdim} 1381288943Sdim 1382288943Sdimnamespace { 1383288943Sdim 1384288943Sdimstruct ExtendOpTraitsBase { 1385321369Sdim typedef const SCEV *(ScalarEvolution::*GetExtendExprTy)(const SCEV *, Type *, 1386321369Sdim unsigned); 1387288943Sdim}; 1388288943Sdim 1389288943Sdim// Used to make code generic over signed and unsigned overflow. 1390288943Sdimtemplate <typename ExtendOp> struct ExtendOpTraits { 1391288943Sdim // Members present: 1392288943Sdim // 1393288943Sdim // static const SCEV::NoWrapFlags WrapType; 1394288943Sdim // 1395288943Sdim // static const ExtendOpTraitsBase::GetExtendExprTy GetExtendExpr; 1396288943Sdim // 1397288943Sdim // static const SCEV *getOverflowLimitForStep(const SCEV *Step, 1398288943Sdim // ICmpInst::Predicate *Pred, 1399288943Sdim // ScalarEvolution *SE); 1400288943Sdim}; 1401288943Sdim 1402288943Sdimtemplate <> 1403288943Sdimstruct ExtendOpTraits<SCEVSignExtendExpr> : public ExtendOpTraitsBase { 1404288943Sdim static const SCEV::NoWrapFlags WrapType = SCEV::FlagNSW; 1405288943Sdim 1406288943Sdim static const GetExtendExprTy GetExtendExpr; 1407288943Sdim 1408288943Sdim static const SCEV *getOverflowLimitForStep(const SCEV *Step, 1409288943Sdim ICmpInst::Predicate *Pred, 1410288943Sdim ScalarEvolution *SE) { 1411288943Sdim return getSignedOverflowLimitForStep(Step, Pred, SE); 1412288943Sdim } 1413288943Sdim}; 1414288943Sdim 1415288943Sdimconst ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits< 1416288943Sdim SCEVSignExtendExpr>::GetExtendExpr = &ScalarEvolution::getSignExtendExpr; 1417288943Sdim 1418288943Sdimtemplate <> 1419288943Sdimstruct ExtendOpTraits<SCEVZeroExtendExpr> : public ExtendOpTraitsBase { 1420288943Sdim static const SCEV::NoWrapFlags WrapType = SCEV::FlagNUW; 1421288943Sdim 1422288943Sdim static const GetExtendExprTy GetExtendExpr; 1423288943Sdim 1424288943Sdim static const SCEV *getOverflowLimitForStep(const SCEV *Step, 1425288943Sdim ICmpInst::Predicate *Pred, 1426288943Sdim ScalarEvolution *SE) { 1427288943Sdim return getUnsignedOverflowLimitForStep(Step, Pred, SE); 1428288943Sdim } 1429288943Sdim}; 1430288943Sdim 1431288943Sdimconst ExtendOpTraitsBase::GetExtendExprTy ExtendOpTraits< 1432288943Sdim SCEVZeroExtendExpr>::GetExtendExpr = &ScalarEvolution::getZeroExtendExpr; 1433288943Sdim 1434327952Sdim} // end anonymous namespace 1435327952Sdim 1436288943Sdim// The recurrence AR has been shown to have no signed/unsigned wrap or something 1437288943Sdim// close to it. Typically, if we can prove NSW/NUW for AR, then we can just as 1438288943Sdim// easily prove NSW/NUW for its preincrement or postincrement sibling. This 1439288943Sdim// allows normalizing a sign/zero extended AddRec as such: {sext/zext(Step + 1440288943Sdim// Start),+,Step} => {(Step + sext/zext(Start),+,Step} As a result, the 1441288943Sdim// expression "Step + sext/zext(PreIncAR)" is congruent with 1442288943Sdim// "sext/zext(PostIncAR)" 1443288943Sdimtemplate <typename ExtendOpTy> 1444288943Sdimstatic const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty, 1445321369Sdim ScalarEvolution *SE, unsigned Depth) { 1446288943Sdim auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType; 1447288943Sdim auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr; 1448288943Sdim 1449288943Sdim const Loop *L = AR->getLoop(); 1450288943Sdim const SCEV *Start = AR->getStart(); 1451288943Sdim const SCEV *Step = AR->getStepRecurrence(*SE); 1452288943Sdim 1453288943Sdim // Check for a simple looking step prior to loop entry. 1454288943Sdim const SCEVAddExpr *SA = dyn_cast<SCEVAddExpr>(Start); 1455288943Sdim if (!SA) 1456288943Sdim return nullptr; 1457288943Sdim 1458288943Sdim // Create an AddExpr for "PreStart" after subtracting Step. Full SCEV 1459288943Sdim // subtraction is expensive. For this purpose, perform a quick and dirty 1460288943Sdim // difference, by checking for Step in the operand list. 1461288943Sdim SmallVector<const SCEV *, 4> DiffOps; 1462288943Sdim for (const SCEV *Op : SA->operands()) 1463288943Sdim if (Op != Step) 1464288943Sdim DiffOps.push_back(Op); 1465288943Sdim 1466288943Sdim if (DiffOps.size() == SA->getNumOperands()) 1467288943Sdim return nullptr; 1468288943Sdim 1469288943Sdim // Try to prove `WrapType` (SCEV::FlagNSW or SCEV::FlagNUW) on `PreStart` + 1470288943Sdim // `Step`: 1471288943Sdim 1472288943Sdim // 1. NSW/NUW flags on the step increment. 1473296417Sdim auto PreStartFlags = 1474296417Sdim ScalarEvolution::maskFlags(SA->getNoWrapFlags(), SCEV::FlagNUW); 1475296417Sdim const SCEV *PreStart = SE->getAddExpr(DiffOps, PreStartFlags); 1476288943Sdim const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>( 1477288943Sdim SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap)); 1478288943Sdim 1479288943Sdim // "{S,+,X} is <nsw>/<nuw>" and "the backedge is taken at least once" implies 1480288943Sdim // "S+X does not sign/unsign-overflow". 1481288943Sdim // 1482288943Sdim 1483288943Sdim const SCEV *BECount = SE->getBackedgeTakenCount(L); 1484288943Sdim if (PreAR && PreAR->getNoWrapFlags(WrapType) && 1485288943Sdim !isa<SCEVCouldNotCompute>(BECount) && SE->isKnownPositive(BECount)) 1486288943Sdim return PreStart; 1487288943Sdim 1488288943Sdim // 2. Direct overflow check on the step operation's expression. 1489288943Sdim unsigned BitWidth = SE->getTypeSizeInBits(AR->getType()); 1490288943Sdim Type *WideTy = IntegerType::get(SE->getContext(), BitWidth * 2); 1491288943Sdim const SCEV *OperandExtendedStart = 1492321369Sdim SE->getAddExpr((SE->*GetExtendExpr)(PreStart, WideTy, Depth), 1493321369Sdim (SE->*GetExtendExpr)(Step, WideTy, Depth)); 1494321369Sdim if ((SE->*GetExtendExpr)(Start, WideTy, Depth) == OperandExtendedStart) { 1495288943Sdim if (PreAR && AR->getNoWrapFlags(WrapType)) { 1496288943Sdim // If we know `AR` == {`PreStart`+`Step`,+,`Step`} is `WrapType` (FlagNSW 1497288943Sdim // or FlagNUW) and that `PreStart` + `Step` is `WrapType` too, then 1498288943Sdim // `PreAR` == {`PreStart`,+,`Step`} is also `WrapType`. Cache this fact. 1499288943Sdim const_cast<SCEVAddRecExpr *>(PreAR)->setNoWrapFlags(WrapType); 1500288943Sdim } 1501288943Sdim return PreStart; 1502288943Sdim } 1503288943Sdim 1504288943Sdim // 3. Loop precondition. 1505288943Sdim ICmpInst::Predicate Pred; 1506288943Sdim const SCEV *OverflowLimit = 1507288943Sdim ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(Step, &Pred, SE); 1508288943Sdim 1509288943Sdim if (OverflowLimit && 1510296417Sdim SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) 1511288943Sdim return PreStart; 1512296417Sdim 1513288943Sdim return nullptr; 1514288943Sdim} 1515288943Sdim 1516288943Sdim// Get the normalized zero or sign extended expression for this AddRec's Start. 1517288943Sdimtemplate <typename ExtendOpTy> 1518288943Sdimstatic const SCEV *getExtendAddRecStart(const SCEVAddRecExpr *AR, Type *Ty, 1519321369Sdim ScalarEvolution *SE, 1520321369Sdim unsigned Depth) { 1521288943Sdim auto GetExtendExpr = ExtendOpTraits<ExtendOpTy>::GetExtendExpr; 1522288943Sdim 1523321369Sdim const SCEV *PreStart = getPreStartForExtend<ExtendOpTy>(AR, Ty, SE, Depth); 1524288943Sdim if (!PreStart) 1525321369Sdim return (SE->*GetExtendExpr)(AR->getStart(), Ty, Depth); 1526288943Sdim 1527321369Sdim return SE->getAddExpr((SE->*GetExtendExpr)(AR->getStepRecurrence(*SE), Ty, 1528321369Sdim Depth), 1529321369Sdim (SE->*GetExtendExpr)(PreStart, Ty, Depth)); 1530288943Sdim} 1531288943Sdim 1532288943Sdim// Try to prove away overflow by looking at "nearby" add recurrences. A 1533288943Sdim// motivating example for this rule: if we know `{0,+,4}` is `ult` `-1` and it 1534288943Sdim// does not itself wrap then we can conclude that `{1,+,4}` is `nuw`. 1535288943Sdim// 1536288943Sdim// Formally: 1537288943Sdim// 1538288943Sdim// {S,+,X} == {S-T,+,X} + T 1539288943Sdim// => Ext({S,+,X}) == Ext({S-T,+,X} + T) 1540288943Sdim// 1541288943Sdim// If ({S-T,+,X} + T) does not overflow ... (1) 1542288943Sdim// 1543288943Sdim// RHS == Ext({S-T,+,X} + T) == Ext({S-T,+,X}) + Ext(T) 1544288943Sdim// 1545288943Sdim// If {S-T,+,X} does not overflow ... (2) 1546288943Sdim// 1547288943Sdim// RHS == Ext({S-T,+,X}) + Ext(T) == {Ext(S-T),+,Ext(X)} + Ext(T) 1548288943Sdim// == {Ext(S-T)+Ext(T),+,Ext(X)} 1549288943Sdim// 1550288943Sdim// If (S-T)+T does not overflow ... (3) 1551288943Sdim// 1552288943Sdim// RHS == {Ext(S-T)+Ext(T),+,Ext(X)} == {Ext(S-T+T),+,Ext(X)} 1553288943Sdim// == {Ext(S),+,Ext(X)} == LHS 1554288943Sdim// 1555288943Sdim// Thus, if (1), (2) and (3) are true for some T, then 1556288943Sdim// Ext({S,+,X}) == {Ext(S),+,Ext(X)} 1557288943Sdim// 1558288943Sdim// (3) is implied by (1) -- "(S-T)+T does not overflow" is simply "({S-T,+,X}+T) 1559288943Sdim// does not overflow" restricted to the 0th iteration. Therefore we only need 1560288943Sdim// to check for (1) and (2). 1561288943Sdim// 1562288943Sdim// In the current context, S is `Start`, X is `Step`, Ext is `ExtendOpTy` and T 1563288943Sdim// is `Delta` (defined below). 1564288943Sdimtemplate <typename ExtendOpTy> 1565288943Sdimbool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start, 1566288943Sdim const SCEV *Step, 1567288943Sdim const Loop *L) { 1568288943Sdim auto WrapType = ExtendOpTraits<ExtendOpTy>::WrapType; 1569288943Sdim 1570288943Sdim // We restrict `Start` to a constant to prevent SCEV from spending too much 1571288943Sdim // time here. It is correct (but more expensive) to continue with a 1572288943Sdim // non-constant `Start` and do a general SCEV subtraction to compute 1573288943Sdim // `PreStart` below. 1574288943Sdim const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start); 1575288943Sdim if (!StartC) 1576288943Sdim return false; 1577288943Sdim 1578296417Sdim APInt StartAI = StartC->getAPInt(); 1579288943Sdim 1580288943Sdim for (unsigned Delta : {-2, -1, 1, 2}) { 1581288943Sdim const SCEV *PreStart = getConstant(StartAI - Delta); 1582288943Sdim 1583296417Sdim FoldingSetNodeID ID; 1584296417Sdim ID.AddInteger(scAddRecExpr); 1585296417Sdim ID.AddPointer(PreStart); 1586296417Sdim ID.AddPointer(Step); 1587296417Sdim ID.AddPointer(L); 1588296417Sdim void *IP = nullptr; 1589296417Sdim const auto *PreAR = 1590296417Sdim static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); 1591296417Sdim 1592288943Sdim // Give up if we don't already have the add recurrence we need because 1593288943Sdim // actually constructing an add recurrence is relatively expensive. 1594288943Sdim if (PreAR && PreAR->getNoWrapFlags(WrapType)) { // proves (2) 1595288943Sdim const SCEV *DeltaS = getConstant(StartC->getType(), Delta); 1596288943Sdim ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; 1597288943Sdim const SCEV *Limit = ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep( 1598288943Sdim DeltaS, &Pred, this); 1599288943Sdim if (Limit && isKnownPredicate(Pred, PreAR, Limit)) // proves (1) 1600288943Sdim return true; 1601288943Sdim } 1602288943Sdim } 1603288943Sdim 1604288943Sdim return false; 1605288943Sdim} 1606288943Sdim 1607341825Sdim// Finds an integer D for an expression (C + x + y + ...) such that the top 1608341825Sdim// level addition in (D + (C - D + x + y + ...)) would not wrap (signed or 1609341825Sdim// unsigned) and the number of trailing zeros of (C - D + x + y + ...) is 1610341825Sdim// maximized, where C is the \p ConstantTerm, x, y, ... are arbitrary SCEVs, and 1611341825Sdim// the (C + x + y + ...) expression is \p WholeAddExpr. 1612341825Sdimstatic APInt extractConstantWithoutWrapping(ScalarEvolution &SE, 1613341825Sdim const SCEVConstant *ConstantTerm, 1614341825Sdim const SCEVAddExpr *WholeAddExpr) { 1615341825Sdim const APInt C = ConstantTerm->getAPInt(); 1616341825Sdim const unsigned BitWidth = C.getBitWidth(); 1617341825Sdim // Find number of trailing zeros of (x + y + ...) w/o the C first: 1618341825Sdim uint32_t TZ = BitWidth; 1619341825Sdim for (unsigned I = 1, E = WholeAddExpr->getNumOperands(); I < E && TZ; ++I) 1620341825Sdim TZ = std::min(TZ, SE.GetMinTrailingZeros(WholeAddExpr->getOperand(I))); 1621341825Sdim if (TZ) { 1622341825Sdim // Set D to be as many least significant bits of C as possible while still 1623341825Sdim // guaranteeing that adding D to (C - D + x + y + ...) won't cause a wrap: 1624341825Sdim return TZ < BitWidth ? C.trunc(TZ).zext(BitWidth) : C; 1625341825Sdim } 1626341825Sdim return APInt(BitWidth, 0); 1627341825Sdim} 1628341825Sdim 1629341825Sdim// Finds an integer D for an affine AddRec expression {C,+,x} such that the top 1630341825Sdim// level addition in (D + {C-D,+,x}) would not wrap (signed or unsigned) and the 1631341825Sdim// number of trailing zeros of (C - D + x * n) is maximized, where C is the \p 1632341825Sdim// ConstantStart, x is an arbitrary \p Step, and n is the loop trip count. 1633341825Sdimstatic APInt extractConstantWithoutWrapping(ScalarEvolution &SE, 1634341825Sdim const APInt &ConstantStart, 1635341825Sdim const SCEV *Step) { 1636341825Sdim const unsigned BitWidth = ConstantStart.getBitWidth(); 1637341825Sdim const uint32_t TZ = SE.GetMinTrailingZeros(Step); 1638341825Sdim if (TZ) 1639341825Sdim return TZ < BitWidth ? ConstantStart.trunc(TZ).zext(BitWidth) 1640341825Sdim : ConstantStart; 1641341825Sdim return APInt(BitWidth, 0); 1642341825Sdim} 1643341825Sdim 1644321369Sdimconst SCEV * 1645321369SdimScalarEvolution::getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { 1646193323Sed assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && 1647193323Sed "This is not an extending conversion!"); 1648193323Sed assert(isSCEVable(Ty) && 1649193323Sed "This is not a conversion to a SCEVable type!"); 1650193323Sed Ty = getEffectiveSCEVType(Ty); 1651193323Sed 1652195340Sed // Fold if the operand is constant. 1653210299Sed if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) 1654210299Sed return getConstant( 1655239462Sdim cast<ConstantInt>(ConstantExpr::getZExt(SC->getValue(), Ty))); 1656193323Sed 1657193323Sed // zext(zext(x)) --> zext(x) 1658193323Sed if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op)) 1659321369Sdim return getZeroExtendExpr(SZ->getOperand(), Ty, Depth + 1); 1660193323Sed 1661198090Srdivacky // Before doing any expensive analysis, check to see if we've already 1662198090Srdivacky // computed a SCEV for this Op and Ty. 1663198090Srdivacky FoldingSetNodeID ID; 1664198090Srdivacky ID.AddInteger(scZeroExtend); 1665198090Srdivacky ID.AddPointer(Op); 1666198090Srdivacky ID.AddPointer(Ty); 1667276479Sdim void *IP = nullptr; 1668198090Srdivacky if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; 1669353358Sdim if (Depth > MaxCastDepth) { 1670321369Sdim SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator), 1671321369Sdim Op, Ty); 1672321369Sdim UniqueSCEVs.InsertNode(S, IP); 1673327952Sdim addToLoopUseLists(S); 1674321369Sdim return S; 1675321369Sdim } 1676198090Srdivacky 1677218893Sdim // zext(trunc(x)) --> zext(x) or x or trunc(x) 1678218893Sdim if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) { 1679218893Sdim // It's possible the bits taken off by the truncate were all zero bits. If 1680218893Sdim // so, we should be able to simplify this further. 1681218893Sdim const SCEV *X = ST->getOperand(); 1682218893Sdim ConstantRange CR = getUnsignedRange(X); 1683218893Sdim unsigned TruncBits = getTypeSizeInBits(ST->getType()); 1684218893Sdim unsigned NewBits = getTypeSizeInBits(Ty); 1685218893Sdim if (CR.truncate(TruncBits).zeroExtend(NewBits).contains( 1686218893Sdim CR.zextOrTrunc(NewBits))) 1687353358Sdim return getTruncateOrZeroExtend(X, Ty, Depth); 1688218893Sdim } 1689218893Sdim 1690193323Sed // If the input value is a chrec scev, and we can prove that the value 1691193323Sed // did not overflow the old, smaller, value, we can zero extend all of the 1692193323Sed // operands (often constants). This allows analysis of something like 1693193323Sed // this: for (unsigned char X = 0; X < 100; ++X) { int Y = X; } 1694193323Sed if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) 1695193323Sed if (AR->isAffine()) { 1696198090Srdivacky const SCEV *Start = AR->getStart(); 1697198090Srdivacky const SCEV *Step = AR->getStepRecurrence(*this); 1698198090Srdivacky unsigned BitWidth = getTypeSizeInBits(AR->getType()); 1699198090Srdivacky const Loop *L = AR->getLoop(); 1700198090Srdivacky 1701309124Sdim if (!AR->hasNoUnsignedWrap()) { 1702309124Sdim auto NewFlags = proveNoWrapViaConstantRanges(AR); 1703309124Sdim const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(NewFlags); 1704309124Sdim } 1705309124Sdim 1706198090Srdivacky // If we have special knowledge that this addrec won't overflow, 1707198090Srdivacky // we don't need to do any further analysis. 1708309124Sdim if (AR->hasNoUnsignedWrap()) 1709288943Sdim return getAddRecExpr( 1710321369Sdim getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Depth + 1), 1711321369Sdim getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags()); 1712198090Srdivacky 1713193323Sed // Check whether the backedge-taken count is SCEVCouldNotCompute. 1714193323Sed // Note that this serves two purposes: It filters out loops that are 1715193323Sed // simply not analyzable, and it covers the case where this code is 1716193323Sed // being called from within backedge-taken count analysis, such that 1717193323Sed // attempting to ask for the backedge-taken count would likely result 1718193323Sed // in infinite recursion. In the later case, the analysis code will 1719193323Sed // cope with a conservative value, and it will take care to purge 1720193323Sed // that value once it has finished. 1721360784Sdim const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(L); 1722193323Sed if (!isa<SCEVCouldNotCompute>(MaxBECount)) { 1723193323Sed // Manually compute the final value for AR, checking for 1724193323Sed // overflow. 1725193323Sed 1726193323Sed // Check whether the backedge-taken count can be losslessly casted to 1727193323Sed // the addrec's type. The count is always unsigned. 1728198090Srdivacky const SCEV *CastedMaxBECount = 1729353358Sdim getTruncateOrZeroExtend(MaxBECount, Start->getType(), Depth); 1730353358Sdim const SCEV *RecastedMaxBECount = getTruncateOrZeroExtend( 1731353358Sdim CastedMaxBECount, MaxBECount->getType(), Depth); 1732193323Sed if (MaxBECount == RecastedMaxBECount) { 1733226633Sdim Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); 1734193323Sed // Check whether Start+Step*MaxBECount has no unsigned overflow. 1735321369Sdim const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step, 1736321369Sdim SCEV::FlagAnyWrap, Depth + 1); 1737321369Sdim const SCEV *ZAdd = getZeroExtendExpr(getAddExpr(Start, ZMul, 1738321369Sdim SCEV::FlagAnyWrap, 1739321369Sdim Depth + 1), 1740321369Sdim WideTy, Depth + 1); 1741321369Sdim const SCEV *WideStart = getZeroExtendExpr(Start, WideTy, Depth + 1); 1742239462Sdim const SCEV *WideMaxBECount = 1743321369Sdim getZeroExtendExpr(CastedMaxBECount, WideTy, Depth + 1); 1744198090Srdivacky const SCEV *OperandExtendedAdd = 1745239462Sdim getAddExpr(WideStart, 1746239462Sdim getMulExpr(WideMaxBECount, 1747321369Sdim getZeroExtendExpr(Step, WideTy, Depth + 1), 1748321369Sdim SCEV::FlagAnyWrap, Depth + 1), 1749321369Sdim SCEV::FlagAnyWrap, Depth + 1); 1750239462Sdim if (ZAdd == OperandExtendedAdd) { 1751221345Sdim // Cache knowledge of AR NUW, which is propagated to this AddRec. 1752221345Sdim const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW); 1753193323Sed // Return the expression with the addrec on the outside. 1754288943Sdim return getAddRecExpr( 1755321369Sdim getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, 1756321369Sdim Depth + 1), 1757321369Sdim getZeroExtendExpr(Step, Ty, Depth + 1), L, 1758321369Sdim AR->getNoWrapFlags()); 1759221345Sdim } 1760193323Sed // Similar to above, only this time treat the step value as signed. 1761193323Sed // This covers loops that count down. 1762193323Sed OperandExtendedAdd = 1763239462Sdim getAddExpr(WideStart, 1764239462Sdim getMulExpr(WideMaxBECount, 1765321369Sdim getSignExtendExpr(Step, WideTy, Depth + 1), 1766321369Sdim SCEV::FlagAnyWrap, Depth + 1), 1767321369Sdim SCEV::FlagAnyWrap, Depth + 1); 1768239462Sdim if (ZAdd == OperandExtendedAdd) { 1769221345Sdim // Cache knowledge of AR NW, which is propagated to this AddRec. 1770221345Sdim // Negative step causes unsigned wrap, but it still can't self-wrap. 1771221345Sdim const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW); 1772193323Sed // Return the expression with the addrec on the outside. 1773288943Sdim return getAddRecExpr( 1774321369Sdim getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, 1775321369Sdim Depth + 1), 1776321369Sdim getSignExtendExpr(Step, Ty, Depth + 1), L, 1777321369Sdim AR->getNoWrapFlags()); 1778221345Sdim } 1779193323Sed } 1780309124Sdim } 1781198090Srdivacky 1782309124Sdim // Normally, in the cases we can prove no-overflow via a 1783309124Sdim // backedge guarding condition, we can also compute a backedge 1784309124Sdim // taken count for the loop. The exceptions are assumptions and 1785309124Sdim // guards present in the loop -- SCEV is not great at exploiting 1786309124Sdim // these to compute max backedge taken counts, but can still use 1787309124Sdim // these to prove lack of overflow. Use this fact to avoid 1788309124Sdim // doing extra work that may not pay off. 1789309124Sdim if (!isa<SCEVCouldNotCompute>(MaxBECount) || HasGuards || 1790309124Sdim !AC.assumptions().empty()) { 1791309124Sdim // If the backedge is guarded by a comparison with the pre-inc 1792309124Sdim // value the addrec is safe. Also, if the entry is guarded by 1793309124Sdim // a comparison with the start value and the backedge is 1794309124Sdim // guarded by a comparison with the post-inc value, the addrec 1795309124Sdim // is safe. 1796198090Srdivacky if (isKnownPositive(Step)) { 1797198090Srdivacky const SCEV *N = getConstant(APInt::getMinValue(BitWidth) - 1798321369Sdim getUnsignedRangeMax(Step)); 1799198090Srdivacky if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_ULT, AR, N) || 1800341825Sdim isKnownOnEveryIteration(ICmpInst::ICMP_ULT, AR, N)) { 1801309124Sdim // Cache knowledge of AR NUW, which is propagated to this 1802309124Sdim // AddRec. 1803221345Sdim const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW); 1804198090Srdivacky // Return the expression with the addrec on the outside. 1805288943Sdim return getAddRecExpr( 1806321369Sdim getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, 1807321369Sdim Depth + 1), 1808321369Sdim getZeroExtendExpr(Step, Ty, Depth + 1), L, 1809321369Sdim AR->getNoWrapFlags()); 1810221345Sdim } 1811198090Srdivacky } else if (isKnownNegative(Step)) { 1812198090Srdivacky const SCEV *N = getConstant(APInt::getMaxValue(BitWidth) - 1813321369Sdim getSignedRangeMin(Step)); 1814207618Srdivacky if (isLoopBackedgeGuardedByCond(L, ICmpInst::ICMP_UGT, AR, N) || 1815341825Sdim isKnownOnEveryIteration(ICmpInst::ICMP_UGT, AR, N)) { 1816309124Sdim // Cache knowledge of AR NW, which is propagated to this 1817309124Sdim // AddRec. Negative step causes unsigned wrap, but it 1818309124Sdim // still can't self-wrap. 1819221345Sdim const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW); 1820198090Srdivacky // Return the expression with the addrec on the outside. 1821288943Sdim return getAddRecExpr( 1822321369Sdim getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, 1823321369Sdim Depth + 1), 1824321369Sdim getSignExtendExpr(Step, Ty, Depth + 1), L, 1825321369Sdim AR->getNoWrapFlags()); 1826221345Sdim } 1827198090Srdivacky } 1828193323Sed } 1829288943Sdim 1830341825Sdim // zext({C,+,Step}) --> (zext(D) + zext({C-D,+,Step}))<nuw><nsw> 1831341825Sdim // if D + (C - D + Step * n) could be proven to not unsigned wrap 1832341825Sdim // where D maximizes the number of trailing zeros of (C - D + Step * n) 1833341825Sdim if (const auto *SC = dyn_cast<SCEVConstant>(Start)) { 1834341825Sdim const APInt &C = SC->getAPInt(); 1835341825Sdim const APInt &D = extractConstantWithoutWrapping(*this, C, Step); 1836341825Sdim if (D != 0) { 1837341825Sdim const SCEV *SZExtD = getZeroExtendExpr(getConstant(D), Ty, Depth); 1838341825Sdim const SCEV *SResidual = 1839341825Sdim getAddRecExpr(getConstant(C - D), Step, L, AR->getNoWrapFlags()); 1840341825Sdim const SCEV *SZExtR = getZeroExtendExpr(SResidual, Ty, Depth + 1); 1841341825Sdim return getAddExpr(SZExtD, SZExtR, 1842341825Sdim (SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW), 1843341825Sdim Depth + 1); 1844341825Sdim } 1845341825Sdim } 1846341825Sdim 1847288943Sdim if (proveNoWrapByVaryingStart<SCEVZeroExtendExpr>(Start, Step, L)) { 1848288943Sdim const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNUW); 1849288943Sdim return getAddRecExpr( 1850321369Sdim getExtendAddRecStart<SCEVZeroExtendExpr>(AR, Ty, this, Depth + 1), 1851321369Sdim getZeroExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags()); 1852288943Sdim } 1853193323Sed } 1854193323Sed 1855341825Sdim // zext(A % B) --> zext(A) % zext(B) 1856341825Sdim { 1857341825Sdim const SCEV *LHS; 1858341825Sdim const SCEV *RHS; 1859341825Sdim if (matchURem(Op, LHS, RHS)) 1860341825Sdim return getURemExpr(getZeroExtendExpr(LHS, Ty, Depth + 1), 1861341825Sdim getZeroExtendExpr(RHS, Ty, Depth + 1)); 1862341825Sdim } 1863341825Sdim 1864341825Sdim // zext(A / B) --> zext(A) / zext(B). 1865341825Sdim if (auto *Div = dyn_cast<SCEVUDivExpr>(Op)) 1866341825Sdim return getUDivExpr(getZeroExtendExpr(Div->getLHS(), Ty, Depth + 1), 1867341825Sdim getZeroExtendExpr(Div->getRHS(), Ty, Depth + 1)); 1868341825Sdim 1869296417Sdim if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) { 1870296417Sdim // zext((A + B + ...)<nuw>) --> (zext(A) + zext(B) + ...)<nuw> 1871309124Sdim if (SA->hasNoUnsignedWrap()) { 1872296417Sdim // If the addition does not unsign overflow then we can, by definition, 1873296417Sdim // commute the zero extension with the addition operation. 1874296417Sdim SmallVector<const SCEV *, 4> Ops; 1875296417Sdim for (const auto *Op : SA->operands()) 1876321369Sdim Ops.push_back(getZeroExtendExpr(Op, Ty, Depth + 1)); 1877321369Sdim return getAddExpr(Ops, SCEV::FlagNUW, Depth + 1); 1878296417Sdim } 1879341825Sdim 1880341825Sdim // zext(C + x + y + ...) --> (zext(D) + zext((C - D) + x + y + ...)) 1881341825Sdim // if D + (C - D + x + y + ...) could be proven to not unsigned wrap 1882341825Sdim // where D maximizes the number of trailing zeros of (C - D + x + y + ...) 1883341825Sdim // 1884341825Sdim // Often address arithmetics contain expressions like 1885341825Sdim // (zext (add (shl X, C1), C2)), for instance, (zext (5 + (4 * X))). 1886341825Sdim // This transformation is useful while proving that such expressions are 1887341825Sdim // equal or differ by a small constant amount, see LoadStoreVectorizer pass. 1888341825Sdim if (const auto *SC = dyn_cast<SCEVConstant>(SA->getOperand(0))) { 1889341825Sdim const APInt &D = extractConstantWithoutWrapping(*this, SC, SA); 1890341825Sdim if (D != 0) { 1891341825Sdim const SCEV *SZExtD = getZeroExtendExpr(getConstant(D), Ty, Depth); 1892341825Sdim const SCEV *SResidual = 1893341825Sdim getAddExpr(getConstant(-D), SA, SCEV::FlagAnyWrap, Depth); 1894341825Sdim const SCEV *SZExtR = getZeroExtendExpr(SResidual, Ty, Depth + 1); 1895341825Sdim return getAddExpr(SZExtD, SZExtR, 1896341825Sdim (SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW), 1897341825Sdim Depth + 1); 1898341825Sdim } 1899341825Sdim } 1900296417Sdim } 1901296417Sdim 1902341825Sdim if (auto *SM = dyn_cast<SCEVMulExpr>(Op)) { 1903341825Sdim // zext((A * B * ...)<nuw>) --> (zext(A) * zext(B) * ...)<nuw> 1904341825Sdim if (SM->hasNoUnsignedWrap()) { 1905341825Sdim // If the multiply does not unsign overflow then we can, by definition, 1906341825Sdim // commute the zero extension with the multiply operation. 1907341825Sdim SmallVector<const SCEV *, 4> Ops; 1908341825Sdim for (const auto *Op : SM->operands()) 1909341825Sdim Ops.push_back(getZeroExtendExpr(Op, Ty, Depth + 1)); 1910341825Sdim return getMulExpr(Ops, SCEV::FlagNUW, Depth + 1); 1911341825Sdim } 1912341825Sdim 1913341825Sdim // zext(2^K * (trunc X to iN)) to iM -> 1914341825Sdim // 2^K * (zext(trunc X to i{N-K}) to iM)<nuw> 1915341825Sdim // 1916341825Sdim // Proof: 1917341825Sdim // 1918341825Sdim // zext(2^K * (trunc X to iN)) to iM 1919341825Sdim // = zext((trunc X to iN) << K) to iM 1920341825Sdim // = zext((trunc X to i{N-K}) << K)<nuw> to iM 1921341825Sdim // (because shl removes the top K bits) 1922341825Sdim // = zext((2^K * (trunc X to i{N-K}))<nuw>) to iM 1923341825Sdim // = (2^K * (zext(trunc X to i{N-K}) to iM))<nuw>. 1924341825Sdim // 1925341825Sdim if (SM->getNumOperands() == 2) 1926341825Sdim if (auto *MulLHS = dyn_cast<SCEVConstant>(SM->getOperand(0))) 1927341825Sdim if (MulLHS->getAPInt().isPowerOf2()) 1928341825Sdim if (auto *TruncRHS = dyn_cast<SCEVTruncateExpr>(SM->getOperand(1))) { 1929341825Sdim int NewTruncBits = getTypeSizeInBits(TruncRHS->getType()) - 1930341825Sdim MulLHS->getAPInt().logBase2(); 1931341825Sdim Type *NewTruncTy = IntegerType::get(getContext(), NewTruncBits); 1932341825Sdim return getMulExpr( 1933341825Sdim getZeroExtendExpr(MulLHS, Ty), 1934341825Sdim getZeroExtendExpr( 1935341825Sdim getTruncateExpr(TruncRHS->getOperand(), NewTruncTy), Ty), 1936341825Sdim SCEV::FlagNUW, Depth + 1); 1937341825Sdim } 1938341825Sdim } 1939341825Sdim 1940198090Srdivacky // The cast wasn't folded; create an explicit cast node. 1941198090Srdivacky // Recompute the insert position, as it may have been invalidated. 1942195340Sed if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; 1943205407Srdivacky SCEV *S = new (SCEVAllocator) SCEVZeroExtendExpr(ID.Intern(SCEVAllocator), 1944205407Srdivacky Op, Ty); 1945195340Sed UniqueSCEVs.InsertNode(S, IP); 1946327952Sdim addToLoopUseLists(S); 1947195340Sed return S; 1948193323Sed} 1949193323Sed 1950321369Sdimconst SCEV * 1951321369SdimScalarEvolution::getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth) { 1952193323Sed assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && 1953193323Sed "This is not an extending conversion!"); 1954193323Sed assert(isSCEVable(Ty) && 1955193323Sed "This is not a conversion to a SCEVable type!"); 1956193323Sed Ty = getEffectiveSCEVType(Ty); 1957193323Sed 1958195340Sed // Fold if the operand is constant. 1959210299Sed if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) 1960210299Sed return getConstant( 1961239462Sdim cast<ConstantInt>(ConstantExpr::getSExt(SC->getValue(), Ty))); 1962193323Sed 1963193323Sed // sext(sext(x)) --> sext(x) 1964193323Sed if (const SCEVSignExtendExpr *SS = dyn_cast<SCEVSignExtendExpr>(Op)) 1965321369Sdim return getSignExtendExpr(SS->getOperand(), Ty, Depth + 1); 1966193323Sed 1967218893Sdim // sext(zext(x)) --> zext(x) 1968218893Sdim if (const SCEVZeroExtendExpr *SZ = dyn_cast<SCEVZeroExtendExpr>(Op)) 1969321369Sdim return getZeroExtendExpr(SZ->getOperand(), Ty, Depth + 1); 1970218893Sdim 1971198090Srdivacky // Before doing any expensive analysis, check to see if we've already 1972198090Srdivacky // computed a SCEV for this Op and Ty. 1973198090Srdivacky FoldingSetNodeID ID; 1974198090Srdivacky ID.AddInteger(scSignExtend); 1975198090Srdivacky ID.AddPointer(Op); 1976198090Srdivacky ID.AddPointer(Ty); 1977276479Sdim void *IP = nullptr; 1978198090Srdivacky if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; 1979321369Sdim // Limit recursion depth. 1980353358Sdim if (Depth > MaxCastDepth) { 1981321369Sdim SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator), 1982321369Sdim Op, Ty); 1983321369Sdim UniqueSCEVs.InsertNode(S, IP); 1984327952Sdim addToLoopUseLists(S); 1985321369Sdim return S; 1986321369Sdim } 1987198090Srdivacky 1988218893Sdim // sext(trunc(x)) --> sext(x) or x or trunc(x) 1989218893Sdim if (const SCEVTruncateExpr *ST = dyn_cast<SCEVTruncateExpr>(Op)) { 1990218893Sdim // It's possible the bits taken off by the truncate were all sign bits. If 1991218893Sdim // so, we should be able to simplify this further. 1992218893Sdim const SCEV *X = ST->getOperand(); 1993218893Sdim ConstantRange CR = getSignedRange(X); 1994218893Sdim unsigned TruncBits = getTypeSizeInBits(ST->getType()); 1995218893Sdim unsigned NewBits = getTypeSizeInBits(Ty); 1996218893Sdim if (CR.truncate(TruncBits).signExtend(NewBits).contains( 1997218893Sdim CR.sextOrTrunc(NewBits))) 1998353358Sdim return getTruncateOrSignExtend(X, Ty, Depth); 1999218893Sdim } 2000218893Sdim 2001296417Sdim if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) { 2002296417Sdim // sext((A + B + ...)<nsw>) --> (sext(A) + sext(B) + ...)<nsw> 2003309124Sdim if (SA->hasNoSignedWrap()) { 2004296417Sdim // If the addition does not sign overflow then we can, by definition, 2005296417Sdim // commute the sign extension with the addition operation. 2006296417Sdim SmallVector<const SCEV *, 4> Ops; 2007296417Sdim for (const auto *Op : SA->operands()) 2008321369Sdim Ops.push_back(getSignExtendExpr(Op, Ty, Depth + 1)); 2009321369Sdim return getAddExpr(Ops, SCEV::FlagNSW, Depth + 1); 2010296417Sdim } 2011341825Sdim 2012341825Sdim // sext(C + x + y + ...) --> (sext(D) + sext((C - D) + x + y + ...)) 2013341825Sdim // if D + (C - D + x + y + ...) could be proven to not signed wrap 2014341825Sdim // where D maximizes the number of trailing zeros of (C - D + x + y + ...) 2015341825Sdim // 2016341825Sdim // For instance, this will bring two seemingly different expressions: 2017341825Sdim // 1 + sext(5 + 20 * %x + 24 * %y) and 2018341825Sdim // sext(6 + 20 * %x + 24 * %y) 2019341825Sdim // to the same form: 2020341825Sdim // 2 + sext(4 + 20 * %x + 24 * %y) 2021341825Sdim if (const auto *SC = dyn_cast<SCEVConstant>(SA->getOperand(0))) { 2022341825Sdim const APInt &D = extractConstantWithoutWrapping(*this, SC, SA); 2023341825Sdim if (D != 0) { 2024341825Sdim const SCEV *SSExtD = getSignExtendExpr(getConstant(D), Ty, Depth); 2025341825Sdim const SCEV *SResidual = 2026341825Sdim getAddExpr(getConstant(-D), SA, SCEV::FlagAnyWrap, Depth); 2027341825Sdim const SCEV *SSExtR = getSignExtendExpr(SResidual, Ty, Depth + 1); 2028341825Sdim return getAddExpr(SSExtD, SSExtR, 2029341825Sdim (SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW), 2030341825Sdim Depth + 1); 2031341825Sdim } 2032341825Sdim } 2033276479Sdim } 2034193323Sed // If the input value is a chrec scev, and we can prove that the value 2035193323Sed // did not overflow the old, smaller, value, we can sign extend all of the 2036193323Sed // operands (often constants). This allows analysis of something like 2037193323Sed // this: for (signed char X = 0; X < 100; ++X) { int Y = X; } 2038193323Sed if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) 2039193323Sed if (AR->isAffine()) { 2040198090Srdivacky const SCEV *Start = AR->getStart(); 2041198090Srdivacky const SCEV *Step = AR->getStepRecurrence(*this); 2042198090Srdivacky unsigned BitWidth = getTypeSizeInBits(AR->getType()); 2043198090Srdivacky const Loop *L = AR->getLoop(); 2044198090Srdivacky 2045309124Sdim if (!AR->hasNoSignedWrap()) { 2046309124Sdim auto NewFlags = proveNoWrapViaConstantRanges(AR); 2047309124Sdim const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(NewFlags); 2048309124Sdim } 2049309124Sdim 2050198090Srdivacky // If we have special knowledge that this addrec won't overflow, 2051198090Srdivacky // we don't need to do any further analysis. 2052309124Sdim if (AR->hasNoSignedWrap()) 2053288943Sdim return getAddRecExpr( 2054321369Sdim getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1), 2055321369Sdim getSignExtendExpr(Step, Ty, Depth + 1), L, SCEV::FlagNSW); 2056198090Srdivacky 2057193323Sed // Check whether the backedge-taken count is SCEVCouldNotCompute. 2058193323Sed // Note that this serves two purposes: It filters out loops that are 2059193323Sed // simply not analyzable, and it covers the case where this code is 2060193323Sed // being called from within backedge-taken count analysis, such that 2061193323Sed // attempting to ask for the backedge-taken count would likely result 2062193323Sed // in infinite recursion. In the later case, the analysis code will 2063193323Sed // cope with a conservative value, and it will take care to purge 2064193323Sed // that value once it has finished. 2065360784Sdim const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(L); 2066193323Sed if (!isa<SCEVCouldNotCompute>(MaxBECount)) { 2067193323Sed // Manually compute the final value for AR, checking for 2068193323Sed // overflow. 2069193323Sed 2070193323Sed // Check whether the backedge-taken count can be losslessly casted to 2071193323Sed // the addrec's type. The count is always unsigned. 2072198090Srdivacky const SCEV *CastedMaxBECount = 2073353358Sdim getTruncateOrZeroExtend(MaxBECount, Start->getType(), Depth); 2074353358Sdim const SCEV *RecastedMaxBECount = getTruncateOrZeroExtend( 2075353358Sdim CastedMaxBECount, MaxBECount->getType(), Depth); 2076193323Sed if (MaxBECount == RecastedMaxBECount) { 2077226633Sdim Type *WideTy = IntegerType::get(getContext(), BitWidth * 2); 2078193323Sed // Check whether Start+Step*MaxBECount has no signed overflow. 2079321369Sdim const SCEV *SMul = getMulExpr(CastedMaxBECount, Step, 2080321369Sdim SCEV::FlagAnyWrap, Depth + 1); 2081321369Sdim const SCEV *SAdd = getSignExtendExpr(getAddExpr(Start, SMul, 2082321369Sdim SCEV::FlagAnyWrap, 2083321369Sdim Depth + 1), 2084321369Sdim WideTy, Depth + 1); 2085321369Sdim const SCEV *WideStart = getSignExtendExpr(Start, WideTy, Depth + 1); 2086239462Sdim const SCEV *WideMaxBECount = 2087321369Sdim getZeroExtendExpr(CastedMaxBECount, WideTy, Depth + 1); 2088198090Srdivacky const SCEV *OperandExtendedAdd = 2089239462Sdim getAddExpr(WideStart, 2090239462Sdim getMulExpr(WideMaxBECount, 2091321369Sdim getSignExtendExpr(Step, WideTy, Depth + 1), 2092321369Sdim SCEV::FlagAnyWrap, Depth + 1), 2093321369Sdim SCEV::FlagAnyWrap, Depth + 1); 2094239462Sdim if (SAdd == OperandExtendedAdd) { 2095221345Sdim // Cache knowledge of AR NSW, which is propagated to this AddRec. 2096221345Sdim const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); 2097193323Sed // Return the expression with the addrec on the outside. 2098288943Sdim return getAddRecExpr( 2099321369Sdim getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, 2100321369Sdim Depth + 1), 2101321369Sdim getSignExtendExpr(Step, Ty, Depth + 1), L, 2102321369Sdim AR->getNoWrapFlags()); 2103221345Sdim } 2104198090Srdivacky // Similar to above, only this time treat the step value as unsigned. 2105198090Srdivacky // This covers loops that count up with an unsigned step. 2106198090Srdivacky OperandExtendedAdd = 2107239462Sdim getAddExpr(WideStart, 2108239462Sdim getMulExpr(WideMaxBECount, 2109321369Sdim getZeroExtendExpr(Step, WideTy, Depth + 1), 2110321369Sdim SCEV::FlagAnyWrap, Depth + 1), 2111321369Sdim SCEV::FlagAnyWrap, Depth + 1); 2112239462Sdim if (SAdd == OperandExtendedAdd) { 2113288943Sdim // If AR wraps around then 2114288943Sdim // 2115288943Sdim // abs(Step) * MaxBECount > unsigned-max(AR->getType()) 2116288943Sdim // => SAdd != OperandExtendedAdd 2117288943Sdim // 2118288943Sdim // Thus (AR is not NW => SAdd != OperandExtendedAdd) <=> 2119288943Sdim // (SAdd == OperandExtendedAdd => AR is NW) 2120288943Sdim 2121288943Sdim const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNW); 2122288943Sdim 2123198090Srdivacky // Return the expression with the addrec on the outside. 2124288943Sdim return getAddRecExpr( 2125321369Sdim getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, 2126321369Sdim Depth + 1), 2127321369Sdim getZeroExtendExpr(Step, Ty, Depth + 1), L, 2128321369Sdim AR->getNoWrapFlags()); 2129221345Sdim } 2130193323Sed } 2131309124Sdim } 2132198090Srdivacky 2133309124Sdim // Normally, in the cases we can prove no-overflow via a 2134309124Sdim // backedge guarding condition, we can also compute a backedge 2135309124Sdim // taken count for the loop. The exceptions are assumptions and 2136309124Sdim // guards present in the loop -- SCEV is not great at exploiting 2137309124Sdim // these to compute max backedge taken counts, but can still use 2138309124Sdim // these to prove lack of overflow. Use this fact to avoid 2139309124Sdim // doing extra work that may not pay off. 2140309124Sdim 2141309124Sdim if (!isa<SCEVCouldNotCompute>(MaxBECount) || HasGuards || 2142309124Sdim !AC.assumptions().empty()) { 2143309124Sdim // If the backedge is guarded by a comparison with the pre-inc 2144309124Sdim // value the addrec is safe. Also, if the entry is guarded by 2145309124Sdim // a comparison with the start value and the backedge is 2146309124Sdim // guarded by a comparison with the post-inc value, the addrec 2147309124Sdim // is safe. 2148223017Sdim ICmpInst::Predicate Pred; 2149288943Sdim const SCEV *OverflowLimit = 2150288943Sdim getSignedOverflowLimitForStep(Step, &Pred, this); 2151223017Sdim if (OverflowLimit && 2152223017Sdim (isLoopBackedgeGuardedByCond(L, Pred, AR, OverflowLimit) || 2153341825Sdim isKnownOnEveryIteration(Pred, AR, OverflowLimit))) { 2154223017Sdim // Cache knowledge of AR NSW, then propagate NSW to the wide AddRec. 2155223017Sdim const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); 2156288943Sdim return getAddRecExpr( 2157321369Sdim getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1), 2158321369Sdim getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags()); 2159198090Srdivacky } 2160193323Sed } 2161309124Sdim 2162341825Sdim // sext({C,+,Step}) --> (sext(D) + sext({C-D,+,Step}))<nuw><nsw> 2163341825Sdim // if D + (C - D + Step * n) could be proven to not signed wrap 2164341825Sdim // where D maximizes the number of trailing zeros of (C - D + Step * n) 2165341825Sdim if (const auto *SC = dyn_cast<SCEVConstant>(Start)) { 2166341825Sdim const APInt &C = SC->getAPInt(); 2167341825Sdim const APInt &D = extractConstantWithoutWrapping(*this, C, Step); 2168341825Sdim if (D != 0) { 2169341825Sdim const SCEV *SSExtD = getSignExtendExpr(getConstant(D), Ty, Depth); 2170341825Sdim const SCEV *SResidual = 2171341825Sdim getAddRecExpr(getConstant(C - D), Step, L, AR->getNoWrapFlags()); 2172341825Sdim const SCEV *SSExtR = getSignExtendExpr(SResidual, Ty, Depth + 1); 2173341825Sdim return getAddExpr(SSExtD, SSExtR, 2174341825Sdim (SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNUW), 2175341825Sdim Depth + 1); 2176276479Sdim } 2177276479Sdim } 2178288943Sdim 2179288943Sdim if (proveNoWrapByVaryingStart<SCEVSignExtendExpr>(Start, Step, L)) { 2180288943Sdim const_cast<SCEVAddRecExpr *>(AR)->setNoWrapFlags(SCEV::FlagNSW); 2181288943Sdim return getAddRecExpr( 2182321369Sdim getExtendAddRecStart<SCEVSignExtendExpr>(AR, Ty, this, Depth + 1), 2183321369Sdim getSignExtendExpr(Step, Ty, Depth + 1), L, AR->getNoWrapFlags()); 2184288943Sdim } 2185193323Sed } 2186193323Sed 2187309124Sdim // If the input value is provably positive and we could not simplify 2188309124Sdim // away the sext build a zext instead. 2189309124Sdim if (isKnownNonNegative(Op)) 2190321369Sdim return getZeroExtendExpr(Op, Ty, Depth + 1); 2191309124Sdim 2192198090Srdivacky // The cast wasn't folded; create an explicit cast node. 2193198090Srdivacky // Recompute the insert position, as it may have been invalidated. 2194195340Sed if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; 2195205407Srdivacky SCEV *S = new (SCEVAllocator) SCEVSignExtendExpr(ID.Intern(SCEVAllocator), 2196205407Srdivacky Op, Ty); 2197195340Sed UniqueSCEVs.InsertNode(S, IP); 2198327952Sdim addToLoopUseLists(S); 2199195340Sed return S; 2200193323Sed} 2201193323Sed 2202194178Sed/// getAnyExtendExpr - Return a SCEV for the given operand extended with 2203194178Sed/// unspecified bits out to the given type. 2204198090Srdivackyconst SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op, 2205226633Sdim Type *Ty) { 2206194178Sed assert(getTypeSizeInBits(Op->getType()) < getTypeSizeInBits(Ty) && 2207194178Sed "This is not an extending conversion!"); 2208194178Sed assert(isSCEVable(Ty) && 2209194178Sed "This is not a conversion to a SCEVable type!"); 2210194178Sed Ty = getEffectiveSCEVType(Ty); 2211194178Sed 2212194178Sed // Sign-extend negative constants. 2213194178Sed if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) 2214296417Sdim if (SC->getAPInt().isNegative()) 2215194178Sed return getSignExtendExpr(Op, Ty); 2216194178Sed 2217194178Sed // Peel off a truncate cast. 2218194178Sed if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Op)) { 2219198090Srdivacky const SCEV *NewOp = T->getOperand(); 2220194178Sed if (getTypeSizeInBits(NewOp->getType()) < getTypeSizeInBits(Ty)) 2221194178Sed return getAnyExtendExpr(NewOp, Ty); 2222194178Sed return getTruncateOrNoop(NewOp, Ty); 2223194178Sed } 2224194178Sed 2225194178Sed // Next try a zext cast. If the cast is folded, use it. 2226198090Srdivacky const SCEV *ZExt = getZeroExtendExpr(Op, Ty); 2227194178Sed if (!isa<SCEVZeroExtendExpr>(ZExt)) 2228194178Sed return ZExt; 2229194178Sed 2230194178Sed // Next try a sext cast. If the cast is folded, use it. 2231198090Srdivacky const SCEV *SExt = getSignExtendExpr(Op, Ty); 2232194178Sed if (!isa<SCEVSignExtendExpr>(SExt)) 2233194178Sed return SExt; 2234194178Sed 2235202878Srdivacky // Force the cast to be folded into the operands of an addrec. 2236202878Srdivacky if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Op)) { 2237202878Srdivacky SmallVector<const SCEV *, 4> Ops; 2238276479Sdim for (const SCEV *Op : AR->operands()) 2239276479Sdim Ops.push_back(getAnyExtendExpr(Op, Ty)); 2240221345Sdim return getAddRecExpr(Ops, AR->getLoop(), SCEV::FlagNW); 2241202878Srdivacky } 2242202878Srdivacky 2243194178Sed // If the expression is obviously signed, use the sext cast value. 2244194178Sed if (isa<SCEVSMaxExpr>(Op)) 2245194178Sed return SExt; 2246194178Sed 2247194178Sed // Absent any other information, use the zext cast value. 2248194178Sed return ZExt; 2249194178Sed} 2250194178Sed 2251309124Sdim/// Process the given Ops list, which is a list of operands to be added under 2252309124Sdim/// the given scale, update the given map. This is a helper function for 2253309124Sdim/// getAddRecExpr. As an example of what it does, given a sequence of operands 2254309124Sdim/// that would form an add expression like this: 2255194612Sed/// 2256276479Sdim/// m + n + 13 + (A * (o + p + (B * (q + m + 29)))) + r + (-1 * r) 2257194612Sed/// 2258194612Sed/// where A and B are constants, update the map with these values: 2259194612Sed/// 2260194612Sed/// (m, 1+A*B), (n, 1), (o, A), (p, A), (q, A*B), (r, 0) 2261194612Sed/// 2262194612Sed/// and add 13 + A*B*29 to AccumulatedConstant. 2263194612Sed/// This will allow getAddRecExpr to produce this: 2264194612Sed/// 2265194612Sed/// 13+A*B*29 + n + (m * (1+A*B)) + ((o + p) * A) + (q * A*B) 2266194612Sed/// 2267194612Sed/// This form often exposes folding opportunities that are hidden in 2268194612Sed/// the original operand list. 2269194612Sed/// 2270194612Sed/// Return true iff it appears that any interesting folding opportunities 2271194612Sed/// may be exposed. This helps getAddRecExpr short-circuit extra work in 2272194612Sed/// the common case where no interesting opportunities are present, and 2273194612Sed/// is also used as a check to avoid infinite recursion. 2274194612Sedstatic bool 2275198090SrdivackyCollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M, 2276261991Sdim SmallVectorImpl<const SCEV *> &NewOps, 2277194612Sed APInt &AccumulatedConstant, 2278205407Srdivacky const SCEV *const *Ops, size_t NumOperands, 2279194612Sed const APInt &Scale, 2280194612Sed ScalarEvolution &SE) { 2281194612Sed bool Interesting = false; 2282194612Sed 2283210299Sed // Iterate over the add operands. They are sorted, with constants first. 2284210299Sed unsigned i = 0; 2285210299Sed while (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) { 2286210299Sed ++i; 2287210299Sed // Pull a buried constant out to the outside. 2288210299Sed if (Scale != 1 || AccumulatedConstant != 0 || C->getValue()->isZero()) 2289210299Sed Interesting = true; 2290296417Sdim AccumulatedConstant += Scale * C->getAPInt(); 2291210299Sed } 2292210299Sed 2293210299Sed // Next comes everything else. We're especially interested in multiplies 2294210299Sed // here, but they're in the middle, so just visit the rest with one loop. 2295210299Sed for (; i != NumOperands; ++i) { 2296194612Sed const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[i]); 2297194612Sed if (Mul && isa<SCEVConstant>(Mul->getOperand(0))) { 2298194612Sed APInt NewScale = 2299296417Sdim Scale * cast<SCEVConstant>(Mul->getOperand(0))->getAPInt(); 2300194612Sed if (Mul->getNumOperands() == 2 && isa<SCEVAddExpr>(Mul->getOperand(1))) { 2301194612Sed // A multiplication of a constant with another add; recurse. 2302205407Srdivacky const SCEVAddExpr *Add = cast<SCEVAddExpr>(Mul->getOperand(1)); 2303194612Sed Interesting |= 2304194612Sed CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant, 2305205407Srdivacky Add->op_begin(), Add->getNumOperands(), 2306194612Sed NewScale, SE); 2307194612Sed } else { 2308194612Sed // A multiplication of a constant with some other value. Update 2309194612Sed // the map. 2310198090Srdivacky SmallVector<const SCEV *, 4> MulOps(Mul->op_begin()+1, Mul->op_end()); 2311198090Srdivacky const SCEV *Key = SE.getMulExpr(MulOps); 2312309124Sdim auto Pair = M.insert({Key, NewScale}); 2313194612Sed if (Pair.second) { 2314194612Sed NewOps.push_back(Pair.first->first); 2315194612Sed } else { 2316194612Sed Pair.first->second += NewScale; 2317194612Sed // The map already had an entry for this value, which may indicate 2318194612Sed // a folding opportunity. 2319194612Sed Interesting = true; 2320194612Sed } 2321194612Sed } 2322194612Sed } else { 2323194612Sed // An ordinary operand. Update the map. 2324198090Srdivacky std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair = 2325309124Sdim M.insert({Ops[i], Scale}); 2326194612Sed if (Pair.second) { 2327194612Sed NewOps.push_back(Pair.first->first); 2328194612Sed } else { 2329194612Sed Pair.first->second += Scale; 2330194612Sed // The map already had an entry for this value, which may indicate 2331194612Sed // a folding opportunity. 2332194612Sed Interesting = true; 2333194612Sed } 2334194612Sed } 2335194612Sed } 2336194612Sed 2337194612Sed return Interesting; 2338194612Sed} 2339194612Sed 2340280031Sdim// We're trying to construct a SCEV of type `Type' with `Ops' as operands and 2341280031Sdim// `OldFlags' as can't-wrap behavior. Infer a more aggressive set of 2342280031Sdim// can't-overflow flags for the operation if possible. 2343280031Sdimstatic SCEV::NoWrapFlags 2344280031SdimStrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type, 2345353358Sdim const ArrayRef<const SCEV *> Ops, 2346296417Sdim SCEV::NoWrapFlags Flags) { 2347280031Sdim using namespace std::placeholders; 2348280031Sdim 2349327952Sdim using OBO = OverflowingBinaryOperator; 2350327952Sdim 2351280031Sdim bool CanAnalyze = 2352280031Sdim Type == scAddExpr || Type == scAddRecExpr || Type == scMulExpr; 2353280031Sdim (void)CanAnalyze; 2354280031Sdim assert(CanAnalyze && "don't call from other places!"); 2355280031Sdim 2356280031Sdim int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW; 2357280031Sdim SCEV::NoWrapFlags SignOrUnsignWrap = 2358296417Sdim ScalarEvolution::maskFlags(Flags, SignOrUnsignMask); 2359280031Sdim 2360280031Sdim // If FlagNSW is true and all the operands are non-negative, infer FlagNUW. 2361296417Sdim auto IsKnownNonNegative = [&](const SCEV *S) { 2362296417Sdim return SE->isKnownNonNegative(S); 2363296417Sdim }; 2364280031Sdim 2365296417Sdim if (SignOrUnsignWrap == SCEV::FlagNSW && all_of(Ops, IsKnownNonNegative)) 2366296417Sdim Flags = 2367296417Sdim ScalarEvolution::setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask); 2368280031Sdim 2369296417Sdim SignOrUnsignWrap = ScalarEvolution::maskFlags(Flags, SignOrUnsignMask); 2370296417Sdim 2371341825Sdim if (SignOrUnsignWrap != SignOrUnsignMask && 2372341825Sdim (Type == scAddExpr || Type == scMulExpr) && Ops.size() == 2 && 2373341825Sdim isa<SCEVConstant>(Ops[0])) { 2374296417Sdim 2375341825Sdim auto Opcode = [&] { 2376341825Sdim switch (Type) { 2377341825Sdim case scAddExpr: 2378341825Sdim return Instruction::Add; 2379341825Sdim case scMulExpr: 2380341825Sdim return Instruction::Mul; 2381341825Sdim default: 2382341825Sdim llvm_unreachable("Unexpected SCEV op."); 2383341825Sdim } 2384341825Sdim }(); 2385296417Sdim 2386296417Sdim const APInt &C = cast<SCEVConstant>(Ops[0])->getAPInt(); 2387341825Sdim 2388341825Sdim // (A <opcode> C) --> (A <opcode> C)<nsw> if the op doesn't sign overflow. 2389296417Sdim if (!(SignOrUnsignWrap & SCEV::FlagNSW)) { 2390309124Sdim auto NSWRegion = ConstantRange::makeGuaranteedNoWrapRegion( 2391341825Sdim Opcode, C, OBO::NoSignedWrap); 2392296417Sdim if (NSWRegion.contains(SE->getSignedRange(Ops[1]))) 2393296417Sdim Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW); 2394296417Sdim } 2395341825Sdim 2396341825Sdim // (A <opcode> C) --> (A <opcode> C)<nuw> if the op doesn't unsign overflow. 2397296417Sdim if (!(SignOrUnsignWrap & SCEV::FlagNUW)) { 2398309124Sdim auto NUWRegion = ConstantRange::makeGuaranteedNoWrapRegion( 2399341825Sdim Opcode, C, OBO::NoUnsignedWrap); 2400296417Sdim if (NUWRegion.contains(SE->getUnsignedRange(Ops[1]))) 2401296417Sdim Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW); 2402296417Sdim } 2403296417Sdim } 2404296417Sdim 2405296417Sdim return Flags; 2406280031Sdim} 2407280031Sdim 2408321369Sdimbool ScalarEvolution::isAvailableAtLoopEntry(const SCEV *S, const Loop *L) { 2409341825Sdim return isLoopInvariant(S, L) && properlyDominates(S, L->getHeader()); 2410321369Sdim} 2411321369Sdim 2412309124Sdim/// Get a canonical add expression, or something simpler if possible. 2413198090Srdivackyconst SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops, 2414321369Sdim SCEV::NoWrapFlags Flags, 2415321369Sdim unsigned Depth) { 2416221345Sdim assert(!(Flags & ~(SCEV::FlagNUW | SCEV::FlagNSW)) && 2417221345Sdim "only nuw or nsw allowed"); 2418193323Sed assert(!Ops.empty() && "Cannot get empty add!"); 2419193323Sed if (Ops.size() == 1) return Ops[0]; 2420193323Sed#ifndef NDEBUG 2421226633Sdim Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); 2422193323Sed for (unsigned i = 1, e = Ops.size(); i != e; ++i) 2423210299Sed assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && 2424193323Sed "SCEVAddExpr operand types don't match!"); 2425193323Sed#endif 2426193323Sed 2427296417Sdim // Sort by complexity, this groups all similar expression types together. 2428321369Sdim GroupByComplexity(Ops, &LI, DT); 2429296417Sdim 2430280031Sdim Flags = StrengthenNoWrapFlags(this, scAddExpr, Ops, Flags); 2431202878Srdivacky 2432193323Sed // If there are any constants, fold them together. 2433193323Sed unsigned Idx = 0; 2434193323Sed if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) { 2435193323Sed ++Idx; 2436193323Sed assert(Idx < Ops.size()); 2437193323Sed while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { 2438193323Sed // We found two constants, fold them together! 2439296417Sdim Ops[0] = getConstant(LHSC->getAPInt() + RHSC->getAPInt()); 2440194612Sed if (Ops.size() == 2) return Ops[0]; 2441193323Sed Ops.erase(Ops.begin()+1); // Erase the folded element 2442193323Sed LHSC = cast<SCEVConstant>(Ops[0]); 2443193323Sed } 2444193323Sed 2445193323Sed // If we are left with a constant zero being added, strip it off. 2446207618Srdivacky if (LHSC->getValue()->isZero()) { 2447193323Sed Ops.erase(Ops.begin()); 2448193323Sed --Idx; 2449193323Sed } 2450207618Srdivacky 2451207618Srdivacky if (Ops.size() == 1) return Ops[0]; 2452193323Sed } 2453193323Sed 2454321369Sdim // Limit recursion calls depth. 2455353358Sdim if (Depth > MaxArithDepth || hasHugeExpression(Ops)) 2456321369Sdim return getOrCreateAddExpr(Ops, Flags); 2457321369Sdim 2458212904Sdim // Okay, check to see if the same value occurs in the operand list more than 2459212904Sdim // once. If so, merge them together into an multiply expression. Since we 2460212904Sdim // sorted the list, these values are required to be adjacent. 2461226633Sdim Type *Ty = Ops[0]->getType(); 2462212904Sdim bool FoundMatch = false; 2463212904Sdim for (unsigned i = 0, e = Ops.size(); i != e-1; ++i) 2464193323Sed if (Ops[i] == Ops[i+1]) { // X + Y + Y --> X + Y*2 2465212904Sdim // Scan ahead to count how many equal operands there are. 2466212904Sdim unsigned Count = 2; 2467212904Sdim while (i+Count != e && Ops[i+Count] == Ops[i]) 2468212904Sdim ++Count; 2469212904Sdim // Merge the values into a multiply. 2470212904Sdim const SCEV *Scale = getConstant(Ty, Count); 2471321369Sdim const SCEV *Mul = getMulExpr(Scale, Ops[i], SCEV::FlagAnyWrap, Depth + 1); 2472212904Sdim if (Ops.size() == Count) 2473193323Sed return Mul; 2474212904Sdim Ops[i] = Mul; 2475212904Sdim Ops.erase(Ops.begin()+i+1, Ops.begin()+i+Count); 2476212904Sdim --i; e -= Count - 1; 2477212904Sdim FoundMatch = true; 2478193323Sed } 2479212904Sdim if (FoundMatch) 2480327952Sdim return getAddExpr(Ops, Flags, Depth + 1); 2481193323Sed 2482193323Sed // Check for truncates. If all the operands are truncated from the same 2483193323Sed // type, see if factoring out the truncate would permit the result to be 2484327952Sdim // folded. eg., n*trunc(x) + m*trunc(y) --> trunc(trunc(m)*x + trunc(n)*y) 2485193323Sed // if the contents of the resulting outer trunc fold to something simple. 2486327952Sdim auto FindTruncSrcType = [&]() -> Type * { 2487327952Sdim // We're ultimately looking to fold an addrec of truncs and muls of only 2488327952Sdim // constants and truncs, so if we find any other types of SCEV 2489327952Sdim // as operands of the addrec then we bail and return nullptr here. 2490327952Sdim // Otherwise, we return the type of the operand of a trunc that we find. 2491327952Sdim if (auto *T = dyn_cast<SCEVTruncateExpr>(Ops[Idx])) 2492327952Sdim return T->getOperand()->getType(); 2493327952Sdim if (const auto *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) { 2494327952Sdim const auto *LastOp = Mul->getOperand(Mul->getNumOperands() - 1); 2495327952Sdim if (const auto *T = dyn_cast<SCEVTruncateExpr>(LastOp)) 2496327952Sdim return T->getOperand()->getType(); 2497327952Sdim } 2498327952Sdim return nullptr; 2499327952Sdim }; 2500327952Sdim if (auto *SrcType = FindTruncSrcType()) { 2501198090Srdivacky SmallVector<const SCEV *, 8> LargeOps; 2502193323Sed bool Ok = true; 2503193323Sed // Check all the operands to see if they can be represented in the 2504193323Sed // source type of the truncate. 2505193323Sed for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 2506193323Sed if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(Ops[i])) { 2507193323Sed if (T->getOperand()->getType() != SrcType) { 2508193323Sed Ok = false; 2509193323Sed break; 2510193323Sed } 2511193323Sed LargeOps.push_back(T->getOperand()); 2512193323Sed } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[i])) { 2513207618Srdivacky LargeOps.push_back(getAnyExtendExpr(C, SrcType)); 2514193323Sed } else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Ops[i])) { 2515198090Srdivacky SmallVector<const SCEV *, 8> LargeMulOps; 2516193323Sed for (unsigned j = 0, f = M->getNumOperands(); j != f && Ok; ++j) { 2517193323Sed if (const SCEVTruncateExpr *T = 2518193323Sed dyn_cast<SCEVTruncateExpr>(M->getOperand(j))) { 2519193323Sed if (T->getOperand()->getType() != SrcType) { 2520193323Sed Ok = false; 2521193323Sed break; 2522193323Sed } 2523193323Sed LargeMulOps.push_back(T->getOperand()); 2524296417Sdim } else if (const auto *C = dyn_cast<SCEVConstant>(M->getOperand(j))) { 2525207618Srdivacky LargeMulOps.push_back(getAnyExtendExpr(C, SrcType)); 2526193323Sed } else { 2527193323Sed Ok = false; 2528193323Sed break; 2529193323Sed } 2530193323Sed } 2531193323Sed if (Ok) 2532321369Sdim LargeOps.push_back(getMulExpr(LargeMulOps, SCEV::FlagAnyWrap, Depth + 1)); 2533193323Sed } else { 2534193323Sed Ok = false; 2535193323Sed break; 2536193323Sed } 2537193323Sed } 2538193323Sed if (Ok) { 2539193323Sed // Evaluate the expression in the larger type. 2540341825Sdim const SCEV *Fold = getAddExpr(LargeOps, SCEV::FlagAnyWrap, Depth + 1); 2541193323Sed // If it folds to something simple, use it. Otherwise, don't. 2542193323Sed if (isa<SCEVConstant>(Fold) || isa<SCEVUnknown>(Fold)) 2543327952Sdim return getTruncateExpr(Fold, Ty); 2544193323Sed } 2545193323Sed } 2546193323Sed 2547193323Sed // Skip past any other cast SCEVs. 2548193323Sed while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddExpr) 2549193323Sed ++Idx; 2550193323Sed 2551193323Sed // If there are add operands they would be next. 2552193323Sed if (Idx < Ops.size()) { 2553193323Sed bool DeletedAdd = false; 2554193323Sed while (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[Idx])) { 2555321369Sdim if (Ops.size() > AddOpsInlineThreshold || 2556321369Sdim Add->getNumOperands() > AddOpsInlineThreshold) 2557321369Sdim break; 2558193323Sed // If we have an add, expand the add operands onto the end of the operands 2559193323Sed // list. 2560193323Sed Ops.erase(Ops.begin()+Idx); 2561210299Sed Ops.append(Add->op_begin(), Add->op_end()); 2562193323Sed DeletedAdd = true; 2563193323Sed } 2564193323Sed 2565193323Sed // If we deleted at least one add, we added operands to the end of the list, 2566193323Sed // and they are not necessarily sorted. Recurse to resort and resimplify 2567204642Srdivacky // any operands we just acquired. 2568193323Sed if (DeletedAdd) 2569321369Sdim return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); 2570193323Sed } 2571193323Sed 2572193323Sed // Skip over the add expression until we get to a multiply. 2573193323Sed while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr) 2574193323Sed ++Idx; 2575193323Sed 2576194612Sed // Check to see if there are any folding opportunities present with 2577194612Sed // operands multiplied by constant values. 2578194612Sed if (Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx])) { 2579194612Sed uint64_t BitWidth = getTypeSizeInBits(Ty); 2580198090Srdivacky DenseMap<const SCEV *, APInt> M; 2581198090Srdivacky SmallVector<const SCEV *, 8> NewOps; 2582194612Sed APInt AccumulatedConstant(BitWidth, 0); 2583194612Sed if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant, 2584205407Srdivacky Ops.data(), Ops.size(), 2585205407Srdivacky APInt(BitWidth, 1), *this)) { 2586296417Sdim struct APIntCompare { 2587296417Sdim bool operator()(const APInt &LHS, const APInt &RHS) const { 2588296417Sdim return LHS.ult(RHS); 2589296417Sdim } 2590296417Sdim }; 2591296417Sdim 2592194612Sed // Some interesting folding opportunity is present, so its worthwhile to 2593194612Sed // re-generate the operands list. Group the operands by constant scale, 2594194612Sed // to avoid multiplying by the same constant scale multiple times. 2595198090Srdivacky std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists; 2596296417Sdim for (const SCEV *NewOp : NewOps) 2597296417Sdim MulOpLists[M.find(NewOp)->second].push_back(NewOp); 2598194612Sed // Re-generate the operands list. 2599194612Sed Ops.clear(); 2600194612Sed if (AccumulatedConstant != 0) 2601194612Sed Ops.push_back(getConstant(AccumulatedConstant)); 2602296417Sdim for (auto &MulOp : MulOpLists) 2603296417Sdim if (MulOp.first != 0) 2604321369Sdim Ops.push_back(getMulExpr( 2605321369Sdim getConstant(MulOp.first), 2606321369Sdim getAddExpr(MulOp.second, SCEV::FlagAnyWrap, Depth + 1), 2607321369Sdim SCEV::FlagAnyWrap, Depth + 1)); 2608194612Sed if (Ops.empty()) 2609296417Sdim return getZero(Ty); 2610194612Sed if (Ops.size() == 1) 2611194612Sed return Ops[0]; 2612321369Sdim return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); 2613194612Sed } 2614194612Sed } 2615194612Sed 2616193323Sed // If we are adding something to a multiply expression, make sure the 2617193323Sed // something is not already an operand of the multiply. If so, merge it into 2618193323Sed // the multiply. 2619193323Sed for (; Idx < Ops.size() && isa<SCEVMulExpr>(Ops[Idx]); ++Idx) { 2620193323Sed const SCEVMulExpr *Mul = cast<SCEVMulExpr>(Ops[Idx]); 2621193323Sed for (unsigned MulOp = 0, e = Mul->getNumOperands(); MulOp != e; ++MulOp) { 2622193323Sed const SCEV *MulOpSCEV = Mul->getOperand(MulOp); 2623212904Sdim if (isa<SCEVConstant>(MulOpSCEV)) 2624212904Sdim continue; 2625193323Sed for (unsigned AddOp = 0, e = Ops.size(); AddOp != e; ++AddOp) 2626212904Sdim if (MulOpSCEV == Ops[AddOp]) { 2627193323Sed // Fold W + X + (X * Y * Z) --> W + (X * ((Y*Z)+1)) 2628198090Srdivacky const SCEV *InnerMul = Mul->getOperand(MulOp == 0); 2629193323Sed if (Mul->getNumOperands() != 2) { 2630193323Sed // If the multiply has more than two operands, we must get the 2631193323Sed // Y*Z term. 2632212904Sdim SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(), 2633212904Sdim Mul->op_begin()+MulOp); 2634212904Sdim MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end()); 2635321369Sdim InnerMul = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1); 2636193323Sed } 2637321369Sdim SmallVector<const SCEV *, 2> TwoOps = {getOne(Ty), InnerMul}; 2638321369Sdim const SCEV *AddOne = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1); 2639321369Sdim const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV, 2640321369Sdim SCEV::FlagAnyWrap, Depth + 1); 2641193323Sed if (Ops.size() == 2) return OuterMul; 2642193323Sed if (AddOp < Idx) { 2643193323Sed Ops.erase(Ops.begin()+AddOp); 2644193323Sed Ops.erase(Ops.begin()+Idx-1); 2645193323Sed } else { 2646193323Sed Ops.erase(Ops.begin()+Idx); 2647193323Sed Ops.erase(Ops.begin()+AddOp-1); 2648193323Sed } 2649193323Sed Ops.push_back(OuterMul); 2650321369Sdim return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); 2651193323Sed } 2652193323Sed 2653193323Sed // Check this multiply against other multiplies being added together. 2654193323Sed for (unsigned OtherMulIdx = Idx+1; 2655193323Sed OtherMulIdx < Ops.size() && isa<SCEVMulExpr>(Ops[OtherMulIdx]); 2656193323Sed ++OtherMulIdx) { 2657193323Sed const SCEVMulExpr *OtherMul = cast<SCEVMulExpr>(Ops[OtherMulIdx]); 2658193323Sed // If MulOp occurs in OtherMul, we can fold the two multiplies 2659193323Sed // together. 2660193323Sed for (unsigned OMulOp = 0, e = OtherMul->getNumOperands(); 2661193323Sed OMulOp != e; ++OMulOp) 2662193323Sed if (OtherMul->getOperand(OMulOp) == MulOpSCEV) { 2663193323Sed // Fold X + (A*B*C) + (A*D*E) --> X + (A*(B*C+D*E)) 2664198090Srdivacky const SCEV *InnerMul1 = Mul->getOperand(MulOp == 0); 2665193323Sed if (Mul->getNumOperands() != 2) { 2666195098Sed SmallVector<const SCEV *, 4> MulOps(Mul->op_begin(), 2667212904Sdim Mul->op_begin()+MulOp); 2668212904Sdim MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end()); 2669321369Sdim InnerMul1 = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1); 2670193323Sed } 2671198090Srdivacky const SCEV *InnerMul2 = OtherMul->getOperand(OMulOp == 0); 2672193323Sed if (OtherMul->getNumOperands() != 2) { 2673195098Sed SmallVector<const SCEV *, 4> MulOps(OtherMul->op_begin(), 2674212904Sdim OtherMul->op_begin()+OMulOp); 2675212904Sdim MulOps.append(OtherMul->op_begin()+OMulOp+1, OtherMul->op_end()); 2676321369Sdim InnerMul2 = getMulExpr(MulOps, SCEV::FlagAnyWrap, Depth + 1); 2677193323Sed } 2678321369Sdim SmallVector<const SCEV *, 2> TwoOps = {InnerMul1, InnerMul2}; 2679321369Sdim const SCEV *InnerMulSum = 2680321369Sdim getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1); 2681321369Sdim const SCEV *OuterMul = getMulExpr(MulOpSCEV, InnerMulSum, 2682321369Sdim SCEV::FlagAnyWrap, Depth + 1); 2683193323Sed if (Ops.size() == 2) return OuterMul; 2684193323Sed Ops.erase(Ops.begin()+Idx); 2685193323Sed Ops.erase(Ops.begin()+OtherMulIdx-1); 2686193323Sed Ops.push_back(OuterMul); 2687321369Sdim return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); 2688193323Sed } 2689193323Sed } 2690193323Sed } 2691193323Sed } 2692193323Sed 2693193323Sed // If there are any add recurrences in the operands list, see if any other 2694193323Sed // added values are loop invariant. If so, we can fold them into the 2695193323Sed // recurrence. 2696193323Sed while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr) 2697193323Sed ++Idx; 2698193323Sed 2699193323Sed // Scan over all recurrences, trying to fold loop invariants into them. 2700193323Sed for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) { 2701193323Sed // Scan all of the other operands to this add and add them to the vector if 2702193323Sed // they are loop invariant w.r.t. the recurrence. 2703198090Srdivacky SmallVector<const SCEV *, 8> LIOps; 2704193323Sed const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]); 2705207618Srdivacky const Loop *AddRecLoop = AddRec->getLoop(); 2706193323Sed for (unsigned i = 0, e = Ops.size(); i != e; ++i) 2707321369Sdim if (isAvailableAtLoopEntry(Ops[i], AddRecLoop)) { 2708193323Sed LIOps.push_back(Ops[i]); 2709193323Sed Ops.erase(Ops.begin()+i); 2710193323Sed --i; --e; 2711193323Sed } 2712193323Sed 2713193323Sed // If we found some loop invariants, fold them into the recurrence. 2714193323Sed if (!LIOps.empty()) { 2715193323Sed // NLI + LI + {Start,+,Step} --> NLI + {LI+Start,+,Step} 2716193323Sed LIOps.push_back(AddRec->getStart()); 2717193323Sed 2718198090Srdivacky SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(), 2719201360Srdivacky AddRec->op_end()); 2720309124Sdim // This follows from the fact that the no-wrap flags on the outer add 2721309124Sdim // expression are applicable on the 0th iteration, when the add recurrence 2722309124Sdim // will be equal to its start value. 2723321369Sdim AddRecOps[0] = getAddExpr(LIOps, Flags, Depth + 1); 2724193323Sed 2725210299Sed // Build the new addrec. Propagate the NUW and NSW flags if both the 2726210299Sed // outer add and the inner addrec are guaranteed to have no overflow. 2727221345Sdim // Always propagate NW. 2728221345Sdim Flags = AddRec->getNoWrapFlags(setFlags(Flags, SCEV::FlagNW)); 2729221345Sdim const SCEV *NewRec = getAddRecExpr(AddRecOps, AddRecLoop, Flags); 2730201360Srdivacky 2731193323Sed // If all of the other operands were loop invariant, we are done. 2732193323Sed if (Ops.size() == 1) return NewRec; 2733193323Sed 2734226633Sdim // Otherwise, add the folded AddRec by the non-invariant parts. 2735193323Sed for (unsigned i = 0;; ++i) 2736193323Sed if (Ops[i] == AddRec) { 2737193323Sed Ops[i] = NewRec; 2738193323Sed break; 2739193323Sed } 2740321369Sdim return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); 2741193323Sed } 2742193323Sed 2743193323Sed // Okay, if there weren't any loop invariants to be folded, check to see if 2744193323Sed // there are multiple AddRec's with the same loop induction variable being 2745193323Sed // added together. If so, we can fold them. 2746193323Sed for (unsigned OtherIdx = Idx+1; 2747212904Sdim OtherIdx < Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]); 2748321369Sdim ++OtherIdx) { 2749321369Sdim // We expect the AddRecExpr's to be sorted in reverse dominance order, 2750321369Sdim // so that the 1st found AddRecExpr is dominated by all others. 2751321369Sdim assert(DT.dominates( 2752321369Sdim cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()->getHeader(), 2753321369Sdim AddRec->getLoop()->getHeader()) && 2754321369Sdim "AddRecExprs are not sorted in reverse dominance order?"); 2755212904Sdim if (AddRecLoop == cast<SCEVAddRecExpr>(Ops[OtherIdx])->getLoop()) { 2756212904Sdim // Other + {A,+,B}<L> + {C,+,D}<L> --> Other + {A+C,+,B+D}<L> 2757212904Sdim SmallVector<const SCEV *, 4> AddRecOps(AddRec->op_begin(), 2758212904Sdim AddRec->op_end()); 2759212904Sdim for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]); 2760321369Sdim ++OtherIdx) { 2761321369Sdim const auto *OtherAddRec = cast<SCEVAddRecExpr>(Ops[OtherIdx]); 2762321369Sdim if (OtherAddRec->getLoop() == AddRecLoop) { 2763321369Sdim for (unsigned i = 0, e = OtherAddRec->getNumOperands(); 2764321369Sdim i != e; ++i) { 2765321369Sdim if (i >= AddRecOps.size()) { 2766321369Sdim AddRecOps.append(OtherAddRec->op_begin()+i, 2767321369Sdim OtherAddRec->op_end()); 2768321369Sdim break; 2769212904Sdim } 2770321369Sdim SmallVector<const SCEV *, 2> TwoOps = { 2771321369Sdim AddRecOps[i], OtherAddRec->getOperand(i)}; 2772321369Sdim AddRecOps[i] = getAddExpr(TwoOps, SCEV::FlagAnyWrap, Depth + 1); 2773193323Sed } 2774321369Sdim Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; 2775321369Sdim } 2776321369Sdim } 2777221345Sdim // Step size has changed, so we cannot guarantee no self-wraparound. 2778221345Sdim Ops[Idx] = getAddRecExpr(AddRecOps, AddRecLoop, SCEV::FlagAnyWrap); 2779321369Sdim return getAddExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); 2780193323Sed } 2781321369Sdim } 2782193323Sed 2783193323Sed // Otherwise couldn't fold anything into this recurrence. Move onto the 2784193323Sed // next one. 2785193323Sed } 2786193323Sed 2787193323Sed // Okay, it looks like we really DO need an add expr. Check to see if we 2788193323Sed // already have one, otherwise create a new one. 2789321369Sdim return getOrCreateAddExpr(Ops, Flags); 2790321369Sdim} 2791321369Sdim 2792321369Sdimconst SCEV * 2793353358SdimScalarEvolution::getOrCreateAddExpr(ArrayRef<const SCEV *> Ops, 2794321369Sdim SCEV::NoWrapFlags Flags) { 2795195340Sed FoldingSetNodeID ID; 2796195340Sed ID.AddInteger(scAddExpr); 2797327952Sdim for (const SCEV *Op : Ops) 2798327952Sdim ID.AddPointer(Op); 2799276479Sdim void *IP = nullptr; 2800202878Srdivacky SCEVAddExpr *S = 2801321369Sdim static_cast<SCEVAddExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); 2802202878Srdivacky if (!S) { 2803205407Srdivacky const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); 2804205407Srdivacky std::uninitialized_copy(Ops.begin(), Ops.end(), O); 2805321369Sdim S = new (SCEVAllocator) 2806321369Sdim SCEVAddExpr(ID.Intern(SCEVAllocator), O, Ops.size()); 2807321369Sdim UniqueSCEVs.InsertNode(S, IP); 2808327952Sdim addToLoopUseLists(S); 2809321369Sdim } 2810321369Sdim S->setNoWrapFlags(Flags); 2811321369Sdim return S; 2812321369Sdim} 2813321369Sdim 2814321369Sdimconst SCEV * 2815353358SdimScalarEvolution::getOrCreateAddRecExpr(ArrayRef<const SCEV *> Ops, 2816344779Sdim const Loop *L, SCEV::NoWrapFlags Flags) { 2817344779Sdim FoldingSetNodeID ID; 2818344779Sdim ID.AddInteger(scAddRecExpr); 2819344779Sdim for (unsigned i = 0, e = Ops.size(); i != e; ++i) 2820344779Sdim ID.AddPointer(Ops[i]); 2821344779Sdim ID.AddPointer(L); 2822344779Sdim void *IP = nullptr; 2823344779Sdim SCEVAddRecExpr *S = 2824344779Sdim static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); 2825344779Sdim if (!S) { 2826344779Sdim const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); 2827344779Sdim std::uninitialized_copy(Ops.begin(), Ops.end(), O); 2828344779Sdim S = new (SCEVAllocator) 2829344779Sdim SCEVAddRecExpr(ID.Intern(SCEVAllocator), O, Ops.size(), L); 2830344779Sdim UniqueSCEVs.InsertNode(S, IP); 2831344779Sdim addToLoopUseLists(S); 2832344779Sdim } 2833344779Sdim S->setNoWrapFlags(Flags); 2834344779Sdim return S; 2835344779Sdim} 2836344779Sdim 2837344779Sdimconst SCEV * 2838353358SdimScalarEvolution::getOrCreateMulExpr(ArrayRef<const SCEV *> Ops, 2839321369Sdim SCEV::NoWrapFlags Flags) { 2840321369Sdim FoldingSetNodeID ID; 2841321369Sdim ID.AddInteger(scMulExpr); 2842321369Sdim for (unsigned i = 0, e = Ops.size(); i != e; ++i) 2843321369Sdim ID.AddPointer(Ops[i]); 2844321369Sdim void *IP = nullptr; 2845321369Sdim SCEVMulExpr *S = 2846321369Sdim static_cast<SCEVMulExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); 2847321369Sdim if (!S) { 2848321369Sdim const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); 2849321369Sdim std::uninitialized_copy(Ops.begin(), Ops.end(), O); 2850321369Sdim S = new (SCEVAllocator) SCEVMulExpr(ID.Intern(SCEVAllocator), 2851205407Srdivacky O, Ops.size()); 2852202878Srdivacky UniqueSCEVs.InsertNode(S, IP); 2853327952Sdim addToLoopUseLists(S); 2854202878Srdivacky } 2855221345Sdim S->setNoWrapFlags(Flags); 2856195340Sed return S; 2857193323Sed} 2858193323Sed 2859226633Sdimstatic uint64_t umul_ov(uint64_t i, uint64_t j, bool &Overflow) { 2860226633Sdim uint64_t k = i*j; 2861226633Sdim if (j > 1 && k / j != i) Overflow = true; 2862226633Sdim return k; 2863226633Sdim} 2864226633Sdim 2865226633Sdim/// Compute the result of "n choose k", the binomial coefficient. If an 2866226633Sdim/// intermediate computation overflows, Overflow will be set and the return will 2867239462Sdim/// be garbage. Overflow is not cleared on absence of overflow. 2868226633Sdimstatic uint64_t Choose(uint64_t n, uint64_t k, bool &Overflow) { 2869226633Sdim // We use the multiplicative formula: 2870226633Sdim // n(n-1)(n-2)...(n-(k-1)) / k(k-1)(k-2)...1 . 2871226633Sdim // At each iteration, we take the n-th term of the numeral and divide by the 2872226633Sdim // (k-n)th term of the denominator. This division will always produce an 2873226633Sdim // integral result, and helps reduce the chance of overflow in the 2874226633Sdim // intermediate computations. However, we can still overflow even when the 2875226633Sdim // final result would fit. 2876226633Sdim 2877226633Sdim if (n == 0 || n == k) return 1; 2878226633Sdim if (k > n) return 0; 2879226633Sdim 2880226633Sdim if (k > n/2) 2881226633Sdim k = n-k; 2882226633Sdim 2883226633Sdim uint64_t r = 1; 2884226633Sdim for (uint64_t i = 1; i <= k; ++i) { 2885226633Sdim r = umul_ov(r, n-(i-1), Overflow); 2886226633Sdim r /= i; 2887226633Sdim } 2888226633Sdim return r; 2889226633Sdim} 2890226633Sdim 2891280031Sdim/// Determine if any of the operands in this SCEV are a constant or if 2892280031Sdim/// any of the add or multiply expressions in this SCEV contain a constant. 2893327952Sdimstatic bool containsConstantInAddMulChain(const SCEV *StartExpr) { 2894327952Sdim struct FindConstantInAddMulChain { 2895327952Sdim bool FoundConstant = false; 2896280031Sdim 2897327952Sdim bool follow(const SCEV *S) { 2898327952Sdim FoundConstant |= isa<SCEVConstant>(S); 2899327952Sdim return isa<SCEVAddExpr>(S) || isa<SCEVMulExpr>(S); 2900280031Sdim } 2901327952Sdim 2902327952Sdim bool isDone() const { 2903327952Sdim return FoundConstant; 2904327952Sdim } 2905327952Sdim }; 2906327952Sdim 2907327952Sdim FindConstantInAddMulChain F; 2908327952Sdim SCEVTraversal<FindConstantInAddMulChain> ST(F); 2909327952Sdim ST.visitAll(StartExpr); 2910327952Sdim return F.FoundConstant; 2911280031Sdim} 2912280031Sdim 2913309124Sdim/// Get a canonical multiply expression, or something simpler if possible. 2914198090Srdivackyconst SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops, 2915321369Sdim SCEV::NoWrapFlags Flags, 2916321369Sdim unsigned Depth) { 2917221345Sdim assert(Flags == maskFlags(Flags, SCEV::FlagNUW | SCEV::FlagNSW) && 2918221345Sdim "only nuw or nsw allowed"); 2919193323Sed assert(!Ops.empty() && "Cannot get empty mul!"); 2920202878Srdivacky if (Ops.size() == 1) return Ops[0]; 2921193323Sed#ifndef NDEBUG 2922226633Sdim Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); 2923193323Sed for (unsigned i = 1, e = Ops.size(); i != e; ++i) 2924212904Sdim assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && 2925193323Sed "SCEVMulExpr operand types don't match!"); 2926193323Sed#endif 2927193323Sed 2928296417Sdim // Sort by complexity, this groups all similar expression types together. 2929321369Sdim GroupByComplexity(Ops, &LI, DT); 2930296417Sdim 2931280031Sdim Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags); 2932202878Srdivacky 2933321369Sdim // Limit recursion calls depth. 2934353358Sdim if (Depth > MaxArithDepth || hasHugeExpression(Ops)) 2935321369Sdim return getOrCreateMulExpr(Ops, Flags); 2936321369Sdim 2937193323Sed // If there are any constants, fold them together. 2938193323Sed unsigned Idx = 0; 2939193323Sed if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) { 2940193323Sed 2941193323Sed if (Ops.size() == 2) 2942341825Sdim // C1*(C2+V) -> C1*C2 + C1*V 2943341825Sdim if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) 2944341825Sdim // If any of Add's ops are Adds or Muls with a constant, apply this 2945341825Sdim // transformation as well. 2946341825Sdim // 2947341825Sdim // TODO: There are some cases where this transformation is not 2948341825Sdim // profitable; for example, Add = (C0 + X) * Y + Z. Maybe the scope of 2949341825Sdim // this transformation should be narrowed down. 2950341825Sdim if (Add->getNumOperands() == 2 && containsConstantInAddMulChain(Add)) 2951341825Sdim return getAddExpr(getMulExpr(LHSC, Add->getOperand(0), 2952341825Sdim SCEV::FlagAnyWrap, Depth + 1), 2953341825Sdim getMulExpr(LHSC, Add->getOperand(1), 2954341825Sdim SCEV::FlagAnyWrap, Depth + 1), 2955341825Sdim SCEV::FlagAnyWrap, Depth + 1); 2956193323Sed 2957193323Sed ++Idx; 2958193323Sed while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { 2959193323Sed // We found two constants, fold them together! 2960296417Sdim ConstantInt *Fold = 2961296417Sdim ConstantInt::get(getContext(), LHSC->getAPInt() * RHSC->getAPInt()); 2962193323Sed Ops[0] = getConstant(Fold); 2963193323Sed Ops.erase(Ops.begin()+1); // Erase the folded element 2964193323Sed if (Ops.size() == 1) return Ops[0]; 2965193323Sed LHSC = cast<SCEVConstant>(Ops[0]); 2966193323Sed } 2967193323Sed 2968193323Sed // If we are left with a constant one being multiplied, strip it off. 2969321369Sdim if (cast<SCEVConstant>(Ops[0])->getValue()->isOne()) { 2970193323Sed Ops.erase(Ops.begin()); 2971193323Sed --Idx; 2972193323Sed } else if (cast<SCEVConstant>(Ops[0])->getValue()->isZero()) { 2973193323Sed // If we have a multiply of zero, it will always be zero. 2974193323Sed return Ops[0]; 2975202878Srdivacky } else if (Ops[0]->isAllOnesValue()) { 2976202878Srdivacky // If we have a mul by -1 of an add, try distributing the -1 among the 2977202878Srdivacky // add operands. 2978221345Sdim if (Ops.size() == 2) { 2979202878Srdivacky if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) { 2980202878Srdivacky SmallVector<const SCEV *, 4> NewOps; 2981202878Srdivacky bool AnyFolded = false; 2982296417Sdim for (const SCEV *AddOp : Add->operands()) { 2983321369Sdim const SCEV *Mul = getMulExpr(Ops[0], AddOp, SCEV::FlagAnyWrap, 2984321369Sdim Depth + 1); 2985202878Srdivacky if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true; 2986202878Srdivacky NewOps.push_back(Mul); 2987202878Srdivacky } 2988202878Srdivacky if (AnyFolded) 2989321369Sdim return getAddExpr(NewOps, SCEV::FlagAnyWrap, Depth + 1); 2990296417Sdim } else if (const auto *AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) { 2991221345Sdim // Negation preserves a recurrence's no self-wrap property. 2992221345Sdim SmallVector<const SCEV *, 4> Operands; 2993296417Sdim for (const SCEV *AddRecOp : AddRec->operands()) 2994321369Sdim Operands.push_back(getMulExpr(Ops[0], AddRecOp, SCEV::FlagAnyWrap, 2995321369Sdim Depth + 1)); 2996296417Sdim 2997221345Sdim return getAddRecExpr(Operands, AddRec->getLoop(), 2998221345Sdim AddRec->getNoWrapFlags(SCEV::FlagNW)); 2999221345Sdim } 3000221345Sdim } 3001193323Sed } 3002207618Srdivacky 3003207618Srdivacky if (Ops.size() == 1) 3004207618Srdivacky return Ops[0]; 3005193323Sed } 3006193323Sed 3007193323Sed // Skip over the add expression until we get to a multiply. 3008193323Sed while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scMulExpr) 3009193323Sed ++Idx; 3010193323Sed 3011193323Sed // If there are mul operands inline them all into this expression. 3012193323Sed if (Idx < Ops.size()) { 3013193323Sed bool DeletedMul = false; 3014193323Sed while (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) { 3015314564Sdim if (Ops.size() > MulOpsInlineThreshold) 3016314564Sdim break; 3017321369Sdim // If we have an mul, expand the mul operands onto the end of the 3018321369Sdim // operands list. 3019193323Sed Ops.erase(Ops.begin()+Idx); 3020210299Sed Ops.append(Mul->op_begin(), Mul->op_end()); 3021193323Sed DeletedMul = true; 3022193323Sed } 3023193323Sed 3024321369Sdim // If we deleted at least one mul, we added operands to the end of the 3025321369Sdim // list, and they are not necessarily sorted. Recurse to resort and 3026321369Sdim // resimplify any operands we just acquired. 3027193323Sed if (DeletedMul) 3028321369Sdim return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); 3029193323Sed } 3030193323Sed 3031193323Sed // If there are any add recurrences in the operands list, see if any other 3032193323Sed // added values are loop invariant. If so, we can fold them into the 3033193323Sed // recurrence. 3034193323Sed while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scAddRecExpr) 3035193323Sed ++Idx; 3036193323Sed 3037193323Sed // Scan over all recurrences, trying to fold loop invariants into them. 3038193323Sed for (; Idx < Ops.size() && isa<SCEVAddRecExpr>(Ops[Idx]); ++Idx) { 3039321369Sdim // Scan all of the other operands to this mul and add them to the vector 3040321369Sdim // if they are loop invariant w.r.t. the recurrence. 3041198090Srdivacky SmallVector<const SCEV *, 8> LIOps; 3042193323Sed const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]); 3043212904Sdim const Loop *AddRecLoop = AddRec->getLoop(); 3044193323Sed for (unsigned i = 0, e = Ops.size(); i != e; ++i) 3045321369Sdim if (isAvailableAtLoopEntry(Ops[i], AddRecLoop)) { 3046193323Sed LIOps.push_back(Ops[i]); 3047193323Sed Ops.erase(Ops.begin()+i); 3048193323Sed --i; --e; 3049193323Sed } 3050193323Sed 3051193323Sed // If we found some loop invariants, fold them into the recurrence. 3052193323Sed if (!LIOps.empty()) { 3053193323Sed // NLI * LI * {Start,+,Step} --> NLI * {LI*Start,+,LI*Step} 3054198090Srdivacky SmallVector<const SCEV *, 4> NewOps; 3055193323Sed NewOps.reserve(AddRec->getNumOperands()); 3056321369Sdim const SCEV *Scale = getMulExpr(LIOps, SCEV::FlagAnyWrap, Depth + 1); 3057210299Sed for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) 3058321369Sdim NewOps.push_back(getMulExpr(Scale, AddRec->getOperand(i), 3059321369Sdim SCEV::FlagAnyWrap, Depth + 1)); 3060193323Sed 3061210299Sed // Build the new addrec. Propagate the NUW and NSW flags if both the 3062210299Sed // outer mul and the inner addrec are guaranteed to have no overflow. 3063221345Sdim // 3064221345Sdim // No self-wrap cannot be guaranteed after changing the step size, but 3065221345Sdim // will be inferred if either NUW or NSW is true. 3066221345Sdim Flags = AddRec->getNoWrapFlags(clearFlags(Flags, SCEV::FlagNW)); 3067221345Sdim const SCEV *NewRec = getAddRecExpr(NewOps, AddRecLoop, Flags); 3068193323Sed 3069193323Sed // If all of the other operands were loop invariant, we are done. 3070193323Sed if (Ops.size() == 1) return NewRec; 3071193323Sed 3072226633Sdim // Otherwise, multiply the folded AddRec by the non-invariant parts. 3073193323Sed for (unsigned i = 0;; ++i) 3074193323Sed if (Ops[i] == AddRec) { 3075193323Sed Ops[i] = NewRec; 3076193323Sed break; 3077193323Sed } 3078321369Sdim return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); 3079193323Sed } 3080193323Sed 3081321369Sdim // Okay, if there weren't any loop invariants to be folded, check to see 3082321369Sdim // if there are multiple AddRec's with the same loop induction variable 3083321369Sdim // being multiplied together. If so, we can fold them. 3084280031Sdim 3085280031Sdim // {A1,+,A2,+,...,+,An}<L> * {B1,+,B2,+,...,+,Bn}<L> 3086280031Sdim // = {x=1 in [ sum y=x..2x [ sum z=max(y-x, y-n)..min(x,n) [ 3087280031Sdim // choose(x, 2x)*choose(2x-y, x-z)*A_{y-z}*B_z 3088280031Sdim // ]]],+,...up to x=2n}. 3089280031Sdim // Note that the arguments to choose() are always integers with values 3090280031Sdim // known at compile time, never SCEV objects. 3091280031Sdim // 3092280031Sdim // The implementation avoids pointless extra computations when the two 3093280031Sdim // addrec's are of different length (mathematically, it's equivalent to 3094280031Sdim // an infinite stream of zeros on the right). 3095280031Sdim bool OpsModified = false; 3096193323Sed for (unsigned OtherIdx = Idx+1; 3097280031Sdim OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]); 3098226633Sdim ++OtherIdx) { 3099280031Sdim const SCEVAddRecExpr *OtherAddRec = 3100280031Sdim dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]); 3101280031Sdim if (!OtherAddRec || OtherAddRec->getLoop() != AddRecLoop) 3102239462Sdim continue; 3103239462Sdim 3104322740Sdim // Limit max number of arguments to avoid creation of unreasonably big 3105322740Sdim // SCEVAddRecs with very complex operands. 3106322740Sdim if (AddRec->getNumOperands() + OtherAddRec->getNumOperands() - 1 > 3107353358Sdim MaxAddRecSize || isHugeExpression(AddRec) || 3108353358Sdim isHugeExpression(OtherAddRec)) 3109322740Sdim continue; 3110322740Sdim 3111280031Sdim bool Overflow = false; 3112280031Sdim Type *Ty = AddRec->getType(); 3113280031Sdim bool LargerThan64Bits = getTypeSizeInBits(Ty) > 64; 3114280031Sdim SmallVector<const SCEV*, 7> AddRecOps; 3115280031Sdim for (int x = 0, xe = AddRec->getNumOperands() + 3116280031Sdim OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) { 3117344779Sdim SmallVector <const SCEV *, 7> SumOps; 3118280031Sdim for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) { 3119280031Sdim uint64_t Coeff1 = Choose(x, 2*x - y, Overflow); 3120280031Sdim for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1), 3121280031Sdim ze = std::min(x+1, (int)OtherAddRec->getNumOperands()); 3122280031Sdim z < ze && !Overflow; ++z) { 3123280031Sdim uint64_t Coeff2 = Choose(2*x - y, x-z, Overflow); 3124280031Sdim uint64_t Coeff; 3125280031Sdim if (LargerThan64Bits) 3126280031Sdim Coeff = umul_ov(Coeff1, Coeff2, Overflow); 3127280031Sdim else 3128280031Sdim Coeff = Coeff1*Coeff2; 3129280031Sdim const SCEV *CoeffTerm = getConstant(Ty, Coeff); 3130280031Sdim const SCEV *Term1 = AddRec->getOperand(y-z); 3131280031Sdim const SCEV *Term2 = OtherAddRec->getOperand(z); 3132344779Sdim SumOps.push_back(getMulExpr(CoeffTerm, Term1, Term2, 3133344779Sdim SCEV::FlagAnyWrap, Depth + 1)); 3134239462Sdim } 3135239462Sdim } 3136344779Sdim if (SumOps.empty()) 3137344779Sdim SumOps.push_back(getZero(Ty)); 3138344779Sdim AddRecOps.push_back(getAddExpr(SumOps, SCEV::FlagAnyWrap, Depth + 1)); 3139193323Sed } 3140280031Sdim if (!Overflow) { 3141353358Sdim const SCEV *NewAddRec = getAddRecExpr(AddRecOps, AddRecLoop, 3142280031Sdim SCEV::FlagAnyWrap); 3143280031Sdim if (Ops.size() == 2) return NewAddRec; 3144280031Sdim Ops[Idx] = NewAddRec; 3145280031Sdim Ops.erase(Ops.begin() + OtherIdx); --OtherIdx; 3146280031Sdim OpsModified = true; 3147280031Sdim AddRec = dyn_cast<SCEVAddRecExpr>(NewAddRec); 3148280031Sdim if (!AddRec) 3149280031Sdim break; 3150280031Sdim } 3151226633Sdim } 3152280031Sdim if (OpsModified) 3153321369Sdim return getMulExpr(Ops, SCEV::FlagAnyWrap, Depth + 1); 3154193323Sed 3155193323Sed // Otherwise couldn't fold anything into this recurrence. Move onto the 3156193323Sed // next one. 3157193323Sed } 3158193323Sed 3159193323Sed // Okay, it looks like we really DO need an mul expr. Check to see if we 3160193323Sed // already have one, otherwise create a new one. 3161321369Sdim return getOrCreateMulExpr(Ops, Flags); 3162193323Sed} 3163193323Sed 3164327952Sdim/// Represents an unsigned remainder expression based on unsigned division. 3165327952Sdimconst SCEV *ScalarEvolution::getURemExpr(const SCEV *LHS, 3166327952Sdim const SCEV *RHS) { 3167327952Sdim assert(getEffectiveSCEVType(LHS->getType()) == 3168327952Sdim getEffectiveSCEVType(RHS->getType()) && 3169327952Sdim "SCEVURemExpr operand types don't match!"); 3170327952Sdim 3171327952Sdim // Short-circuit easy cases 3172327952Sdim if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) { 3173327952Sdim // If constant is one, the result is trivial 3174327952Sdim if (RHSC->getValue()->isOne()) 3175327952Sdim return getZero(LHS->getType()); // X urem 1 --> 0 3176327952Sdim 3177327952Sdim // If constant is a power of two, fold into a zext(trunc(LHS)). 3178327952Sdim if (RHSC->getAPInt().isPowerOf2()) { 3179327952Sdim Type *FullTy = LHS->getType(); 3180327952Sdim Type *TruncTy = 3181327952Sdim IntegerType::get(getContext(), RHSC->getAPInt().logBase2()); 3182327952Sdim return getZeroExtendExpr(getTruncateExpr(LHS, TruncTy), FullTy); 3183327952Sdim } 3184327952Sdim } 3185327952Sdim 3186327952Sdim // Fallback to %a == %x urem %y == %x -<nuw> ((%x udiv %y) *<nuw> %y) 3187327952Sdim const SCEV *UDiv = getUDivExpr(LHS, RHS); 3188327952Sdim const SCEV *Mult = getMulExpr(UDiv, RHS, SCEV::FlagNUW); 3189327952Sdim return getMinusSCEV(LHS, Mult, SCEV::FlagNUW); 3190327952Sdim} 3191327952Sdim 3192309124Sdim/// Get a canonical unsigned division expression, or something simpler if 3193309124Sdim/// possible. 3194195098Sedconst SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS, 3195195098Sed const SCEV *RHS) { 3196193323Sed assert(getEffectiveSCEVType(LHS->getType()) == 3197193323Sed getEffectiveSCEVType(RHS->getType()) && 3198193323Sed "SCEVUDivExpr operand types don't match!"); 3199193323Sed 3200193323Sed if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) { 3201321369Sdim if (RHSC->getValue()->isOne()) 3202198090Srdivacky return LHS; // X udiv 1 --> x 3203207618Srdivacky // If the denominator is zero, the result of the udiv is undefined. Don't 3204207618Srdivacky // try to analyze it, because the resolution chosen here may differ from 3205207618Srdivacky // the resolution chosen in other parts of the compiler. 3206207618Srdivacky if (!RHSC->getValue()->isZero()) { 3207207618Srdivacky // Determine if the division can be folded into the operands of 3208207618Srdivacky // its operands. 3209207618Srdivacky // TODO: Generalize this to non-constants by using known-bits information. 3210226633Sdim Type *Ty = LHS->getType(); 3211296417Sdim unsigned LZ = RHSC->getAPInt().countLeadingZeros(); 3212212904Sdim unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1; 3213207618Srdivacky // For non-power-of-two values, effectively round the value up to the 3214207618Srdivacky // nearest power of two. 3215296417Sdim if (!RHSC->getAPInt().isPowerOf2()) 3216207618Srdivacky ++MaxShiftAmt; 3217226633Sdim IntegerType *ExtTy = 3218207618Srdivacky IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt); 3219207618Srdivacky if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) 3220207618Srdivacky if (const SCEVConstant *Step = 3221226633Sdim dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this))) { 3222226633Sdim // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded. 3223296417Sdim const APInt &StepInt = Step->getAPInt(); 3224296417Sdim const APInt &DivInt = RHSC->getAPInt(); 3225226633Sdim if (!StepInt.urem(DivInt) && 3226207618Srdivacky getZeroExtendExpr(AR, ExtTy) == 3227207618Srdivacky getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy), 3228207618Srdivacky getZeroExtendExpr(Step, ExtTy), 3229221345Sdim AR->getLoop(), SCEV::FlagAnyWrap)) { 3230207618Srdivacky SmallVector<const SCEV *, 4> Operands; 3231296417Sdim for (const SCEV *Op : AR->operands()) 3232296417Sdim Operands.push_back(getUDivExpr(Op, RHS)); 3233296417Sdim return getAddRecExpr(Operands, AR->getLoop(), SCEV::FlagNW); 3234193323Sed } 3235226633Sdim /// Get a canonical UDivExpr for a recurrence. 3236226633Sdim /// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0. 3237226633Sdim // We can currently only fold X%N if X is constant. 3238226633Sdim const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart()); 3239226633Sdim if (StartC && !DivInt.urem(StepInt) && 3240226633Sdim getZeroExtendExpr(AR, ExtTy) == 3241226633Sdim getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy), 3242226633Sdim getZeroExtendExpr(Step, ExtTy), 3243226633Sdim AR->getLoop(), SCEV::FlagAnyWrap)) { 3244296417Sdim const APInt &StartInt = StartC->getAPInt(); 3245226633Sdim const APInt &StartRem = StartInt.urem(StepInt); 3246226633Sdim if (StartRem != 0) 3247226633Sdim LHS = getAddRecExpr(getConstant(StartInt - StartRem), Step, 3248226633Sdim AR->getLoop(), SCEV::FlagNW); 3249226633Sdim } 3250226633Sdim } 3251207618Srdivacky // (A*B)/C --> A*(B/C) if safe and B/C can be folded. 3252207618Srdivacky if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) { 3253207618Srdivacky SmallVector<const SCEV *, 4> Operands; 3254296417Sdim for (const SCEV *Op : M->operands()) 3255296417Sdim Operands.push_back(getZeroExtendExpr(Op, ExtTy)); 3256207618Srdivacky if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands)) 3257207618Srdivacky // Find an operand that's safely divisible. 3258207618Srdivacky for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { 3259207618Srdivacky const SCEV *Op = M->getOperand(i); 3260207618Srdivacky const SCEV *Div = getUDivExpr(Op, RHSC); 3261207618Srdivacky if (!isa<SCEVUDivExpr>(Div) && getMulExpr(Div, RHSC) == Op) { 3262207618Srdivacky Operands = SmallVector<const SCEV *, 4>(M->op_begin(), 3263207618Srdivacky M->op_end()); 3264207618Srdivacky Operands[i] = Div; 3265207618Srdivacky return getMulExpr(Operands); 3266207618Srdivacky } 3267207618Srdivacky } 3268207618Srdivacky } 3269341825Sdim 3270341825Sdim // (A/B)/C --> A/(B*C) if safe and B*C can be folded. 3271341825Sdim if (const SCEVUDivExpr *OtherDiv = dyn_cast<SCEVUDivExpr>(LHS)) { 3272341825Sdim if (auto *DivisorConstant = 3273341825Sdim dyn_cast<SCEVConstant>(OtherDiv->getRHS())) { 3274341825Sdim bool Overflow = false; 3275341825Sdim APInt NewRHS = 3276341825Sdim DivisorConstant->getAPInt().umul_ov(RHSC->getAPInt(), Overflow); 3277341825Sdim if (Overflow) { 3278341825Sdim return getConstant(RHSC->getType(), 0, false); 3279341825Sdim } 3280341825Sdim return getUDivExpr(OtherDiv->getLHS(), getConstant(NewRHS)); 3281341825Sdim } 3282341825Sdim } 3283341825Sdim 3284207618Srdivacky // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded. 3285221345Sdim if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(LHS)) { 3286207618Srdivacky SmallVector<const SCEV *, 4> Operands; 3287296417Sdim for (const SCEV *Op : A->operands()) 3288296417Sdim Operands.push_back(getZeroExtendExpr(Op, ExtTy)); 3289207618Srdivacky if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) { 3290207618Srdivacky Operands.clear(); 3291207618Srdivacky for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) { 3292207618Srdivacky const SCEV *Op = getUDivExpr(A->getOperand(i), RHS); 3293207618Srdivacky if (isa<SCEVUDivExpr>(Op) || 3294207618Srdivacky getMulExpr(Op, RHS) != A->getOperand(i)) 3295207618Srdivacky break; 3296207618Srdivacky Operands.push_back(Op); 3297207618Srdivacky } 3298207618Srdivacky if (Operands.size() == A->getNumOperands()) 3299207618Srdivacky return getAddExpr(Operands); 3300193323Sed } 3301193323Sed } 3302193323Sed 3303207618Srdivacky // Fold if both operands are constant. 3304207618Srdivacky if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) { 3305207618Srdivacky Constant *LHSCV = LHSC->getValue(); 3306207618Srdivacky Constant *RHSCV = RHSC->getValue(); 3307207618Srdivacky return getConstant(cast<ConstantInt>(ConstantExpr::getUDiv(LHSCV, 3308207618Srdivacky RHSCV))); 3309207618Srdivacky } 3310193323Sed } 3311193323Sed } 3312193323Sed 3313195340Sed FoldingSetNodeID ID; 3314195340Sed ID.AddInteger(scUDivExpr); 3315195340Sed ID.AddPointer(LHS); 3316195340Sed ID.AddPointer(RHS); 3317276479Sdim void *IP = nullptr; 3318195340Sed if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; 3319205407Srdivacky SCEV *S = new (SCEVAllocator) SCEVUDivExpr(ID.Intern(SCEVAllocator), 3320205407Srdivacky LHS, RHS); 3321195340Sed UniqueSCEVs.InsertNode(S, IP); 3322327952Sdim addToLoopUseLists(S); 3323195340Sed return S; 3324193323Sed} 3325193323Sed 3326276479Sdimstatic const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) { 3327296417Sdim APInt A = C1->getAPInt().abs(); 3328296417Sdim APInt B = C2->getAPInt().abs(); 3329276479Sdim uint32_t ABW = A.getBitWidth(); 3330276479Sdim uint32_t BBW = B.getBitWidth(); 3331193323Sed 3332276479Sdim if (ABW > BBW) 3333276479Sdim B = B.zext(ABW); 3334276479Sdim else if (ABW < BBW) 3335276479Sdim A = A.zext(BBW); 3336276479Sdim 3337321369Sdim return APIntOps::GreatestCommonDivisor(std::move(A), std::move(B)); 3338276479Sdim} 3339276479Sdim 3340309124Sdim/// Get a canonical unsigned division expression, or something simpler if 3341309124Sdim/// possible. There is no representation for an exact udiv in SCEV IR, but we 3342309124Sdim/// can attempt to remove factors from the LHS and RHS. We can't do this when 3343309124Sdim/// it's not exact because the udiv may be clearing bits. 3344276479Sdimconst SCEV *ScalarEvolution::getUDivExactExpr(const SCEV *LHS, 3345276479Sdim const SCEV *RHS) { 3346276479Sdim // TODO: we could try to find factors in all sorts of things, but for now we 3347276479Sdim // just deal with u/exact (multiply, constant). See SCEVDivision towards the 3348276479Sdim // end of this file for inspiration. 3349276479Sdim 3350276479Sdim const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS); 3351321369Sdim if (!Mul || !Mul->hasNoUnsignedWrap()) 3352276479Sdim return getUDivExpr(LHS, RHS); 3353276479Sdim 3354276479Sdim if (const SCEVConstant *RHSCst = dyn_cast<SCEVConstant>(RHS)) { 3355276479Sdim // If the mulexpr multiplies by a constant, then that constant must be the 3356276479Sdim // first element of the mulexpr. 3357296417Sdim if (const auto *LHSCst = dyn_cast<SCEVConstant>(Mul->getOperand(0))) { 3358276479Sdim if (LHSCst == RHSCst) { 3359276479Sdim SmallVector<const SCEV *, 2> Operands; 3360276479Sdim Operands.append(Mul->op_begin() + 1, Mul->op_end()); 3361276479Sdim return getMulExpr(Operands); 3362276479Sdim } 3363276479Sdim 3364276479Sdim // We can't just assume that LHSCst divides RHSCst cleanly, it could be 3365276479Sdim // that there's a factor provided by one of the other terms. We need to 3366276479Sdim // check. 3367276479Sdim APInt Factor = gcd(LHSCst, RHSCst); 3368276479Sdim if (!Factor.isIntN(1)) { 3369296417Sdim LHSCst = 3370296417Sdim cast<SCEVConstant>(getConstant(LHSCst->getAPInt().udiv(Factor))); 3371296417Sdim RHSCst = 3372296417Sdim cast<SCEVConstant>(getConstant(RHSCst->getAPInt().udiv(Factor))); 3373276479Sdim SmallVector<const SCEV *, 2> Operands; 3374276479Sdim Operands.push_back(LHSCst); 3375276479Sdim Operands.append(Mul->op_begin() + 1, Mul->op_end()); 3376276479Sdim LHS = getMulExpr(Operands); 3377276479Sdim RHS = RHSCst; 3378276479Sdim Mul = dyn_cast<SCEVMulExpr>(LHS); 3379276479Sdim if (!Mul) 3380276479Sdim return getUDivExactExpr(LHS, RHS); 3381276479Sdim } 3382276479Sdim } 3383276479Sdim } 3384276479Sdim 3385276479Sdim for (int i = 0, e = Mul->getNumOperands(); i != e; ++i) { 3386276479Sdim if (Mul->getOperand(i) == RHS) { 3387276479Sdim SmallVector<const SCEV *, 2> Operands; 3388276479Sdim Operands.append(Mul->op_begin(), Mul->op_begin() + i); 3389276479Sdim Operands.append(Mul->op_begin() + i + 1, Mul->op_end()); 3390276479Sdim return getMulExpr(Operands); 3391276479Sdim } 3392276479Sdim } 3393276479Sdim 3394276479Sdim return getUDivExpr(LHS, RHS); 3395276479Sdim} 3396276479Sdim 3397309124Sdim/// Get an add recurrence expression for the specified loop. Simplify the 3398309124Sdim/// expression as much as possible. 3399221345Sdimconst SCEV *ScalarEvolution::getAddRecExpr(const SCEV *Start, const SCEV *Step, 3400221345Sdim const Loop *L, 3401221345Sdim SCEV::NoWrapFlags Flags) { 3402198090Srdivacky SmallVector<const SCEV *, 4> Operands; 3403193323Sed Operands.push_back(Start); 3404193323Sed if (const SCEVAddRecExpr *StepChrec = dyn_cast<SCEVAddRecExpr>(Step)) 3405193323Sed if (StepChrec->getLoop() == L) { 3406210299Sed Operands.append(StepChrec->op_begin(), StepChrec->op_end()); 3407221345Sdim return getAddRecExpr(Operands, L, maskFlags(Flags, SCEV::FlagNW)); 3408193323Sed } 3409193323Sed 3410193323Sed Operands.push_back(Step); 3411221345Sdim return getAddRecExpr(Operands, L, Flags); 3412193323Sed} 3413193323Sed 3414309124Sdim/// Get an add recurrence expression for the specified loop. Simplify the 3415309124Sdim/// expression as much as possible. 3416195098Sedconst SCEV * 3417198090SrdivackyScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands, 3418221345Sdim const Loop *L, SCEV::NoWrapFlags Flags) { 3419193323Sed if (Operands.size() == 1) return Operands[0]; 3420193323Sed#ifndef NDEBUG 3421226633Sdim Type *ETy = getEffectiveSCEVType(Operands[0]->getType()); 3422193323Sed for (unsigned i = 1, e = Operands.size(); i != e; ++i) 3423212904Sdim assert(getEffectiveSCEVType(Operands[i]->getType()) == ETy && 3424193323Sed "SCEVAddRecExpr operand types don't match!"); 3425218893Sdim for (unsigned i = 0, e = Operands.size(); i != e; ++i) 3426218893Sdim assert(isLoopInvariant(Operands[i], L) && 3427218893Sdim "SCEVAddRecExpr operand is not loop-invariant!"); 3428193323Sed#endif 3429193323Sed 3430193323Sed if (Operands.back()->isZero()) { 3431193323Sed Operands.pop_back(); 3432221345Sdim return getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); // {X,+,0} --> X 3433193323Sed } 3434193323Sed 3435360784Sdim // It's tempting to want to call getConstantMaxBackedgeTakenCount count here and 3436204642Srdivacky // use that information to infer NUW and NSW flags. However, computing a 3437204642Srdivacky // BE count requires calling getAddRecExpr, so we may not yet have a 3438204642Srdivacky // meaningful BE count at this point (and if we don't, we'd be stuck 3439204642Srdivacky // with a SCEVCouldNotCompute as the cached BE count). 3440204642Srdivacky 3441280031Sdim Flags = StrengthenNoWrapFlags(this, scAddRecExpr, Operands, Flags); 3442202878Srdivacky 3443193323Sed // Canonicalize nested AddRecs in by nesting them in order of loop depth. 3444193323Sed if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) { 3445201360Srdivacky const Loop *NestedLoop = NestedAR->getLoop(); 3446296417Sdim if (L->contains(NestedLoop) 3447296417Sdim ? (L->getLoopDepth() < NestedLoop->getLoopDepth()) 3448296417Sdim : (!NestedLoop->contains(L) && 3449296417Sdim DT.dominates(L->getHeader(), NestedLoop->getHeader()))) { 3450198090Srdivacky SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(), 3451201360Srdivacky NestedAR->op_end()); 3452193323Sed Operands[0] = NestedAR->getStart(); 3453195098Sed // AddRecs require their operands be loop-invariant with respect to their 3454195098Sed // loops. Don't perform this transformation if it would break this 3455195098Sed // requirement. 3456296417Sdim bool AllInvariant = all_of( 3457296417Sdim Operands, [&](const SCEV *Op) { return isLoopInvariant(Op, L); }); 3458296417Sdim 3459195098Sed if (AllInvariant) { 3460221345Sdim // Create a recurrence for the outer loop with the same step size. 3461221345Sdim // 3462221345Sdim // The outer recurrence keeps its NW flag but only keeps NUW/NSW if the 3463221345Sdim // inner recurrence has the same property. 3464221345Sdim SCEV::NoWrapFlags OuterFlags = 3465221345Sdim maskFlags(Flags, SCEV::FlagNW | NestedAR->getNoWrapFlags()); 3466221345Sdim 3467221345Sdim NestedOperands[0] = getAddRecExpr(Operands, L, OuterFlags); 3468296417Sdim AllInvariant = all_of(NestedOperands, [&](const SCEV *Op) { 3469296417Sdim return isLoopInvariant(Op, NestedLoop); 3470296417Sdim }); 3471296417Sdim 3472221345Sdim if (AllInvariant) { 3473195098Sed // Ok, both add recurrences are valid after the transformation. 3474221345Sdim // 3475221345Sdim // The inner recurrence keeps its NW flag but only keeps NUW/NSW if 3476221345Sdim // the outer recurrence has the same property. 3477221345Sdim SCEV::NoWrapFlags InnerFlags = 3478221345Sdim maskFlags(NestedAR->getNoWrapFlags(), SCEV::FlagNW | Flags); 3479221345Sdim return getAddRecExpr(NestedOperands, NestedLoop, InnerFlags); 3480221345Sdim } 3481195098Sed } 3482195098Sed // Reset Operands to its original state. 3483195098Sed Operands[0] = NestedAR; 3484193323Sed } 3485193323Sed } 3486193323Sed 3487202878Srdivacky // Okay, it looks like we really DO need an addrec expr. Check to see if we 3488202878Srdivacky // already have one, otherwise create a new one. 3489344779Sdim return getOrCreateAddRecExpr(Operands, L, Flags); 3490193323Sed} 3491193323Sed 3492288943Sdimconst SCEV * 3493314564SdimScalarEvolution::getGEPExpr(GEPOperator *GEP, 3494314564Sdim const SmallVectorImpl<const SCEV *> &IndexExprs) { 3495314564Sdim const SCEV *BaseExpr = getSCEV(GEP->getPointerOperand()); 3496288943Sdim // getSCEV(Base)->getType() has the same address space as Base->getType() 3497288943Sdim // because SCEV::getType() preserves the address space. 3498360784Sdim Type *IntIdxTy = getEffectiveSCEVType(BaseExpr->getType()); 3499288943Sdim // FIXME(PR23527): Don't blindly transfer the inbounds flag from the GEP 3500288943Sdim // instruction to its SCEV, because the Instruction may be guarded by control 3501288943Sdim // flow and the no-overflow bits may not be valid for the expression in any 3502296417Sdim // context. This can be fixed similarly to how these flags are handled for 3503296417Sdim // adds. 3504314564Sdim SCEV::NoWrapFlags Wrap = GEP->isInBounds() ? SCEV::FlagNSW 3505314564Sdim : SCEV::FlagAnyWrap; 3506288943Sdim 3507360784Sdim const SCEV *TotalOffset = getZero(IntIdxTy); 3508314564Sdim // The array size is unimportant. The first thing we do on CurTy is getting 3509288943Sdim // its element type. 3510314564Sdim Type *CurTy = ArrayType::get(GEP->getSourceElementType(), 0); 3511288943Sdim for (const SCEV *IndexExpr : IndexExprs) { 3512288943Sdim // Compute the (potentially symbolic) offset in bytes for this index. 3513288943Sdim if (StructType *STy = dyn_cast<StructType>(CurTy)) { 3514288943Sdim // For a struct, add the member offset. 3515288943Sdim ConstantInt *Index = cast<SCEVConstant>(IndexExpr)->getValue(); 3516288943Sdim unsigned FieldNo = Index->getZExtValue(); 3517360784Sdim const SCEV *FieldOffset = getOffsetOfExpr(IntIdxTy, STy, FieldNo); 3518288943Sdim 3519288943Sdim // Add the field offset to the running total offset. 3520288943Sdim TotalOffset = getAddExpr(TotalOffset, FieldOffset); 3521288943Sdim 3522288943Sdim // Update CurTy to the type of the field at Index. 3523288943Sdim CurTy = STy->getTypeAtIndex(Index); 3524288943Sdim } else { 3525288943Sdim // Update CurTy to its element type. 3526288943Sdim CurTy = cast<SequentialType>(CurTy)->getElementType(); 3527288943Sdim // For an array, add the element offset, explicitly scaled. 3528360784Sdim const SCEV *ElementSize = getSizeOfExpr(IntIdxTy, CurTy); 3529288943Sdim // Getelementptr indices are signed. 3530360784Sdim IndexExpr = getTruncateOrSignExtend(IndexExpr, IntIdxTy); 3531288943Sdim 3532288943Sdim // Multiply the index by the element size to compute the element offset. 3533288943Sdim const SCEV *LocalOffset = getMulExpr(IndexExpr, ElementSize, Wrap); 3534288943Sdim 3535288943Sdim // Add the element offset to the running total offset. 3536288943Sdim TotalOffset = getAddExpr(TotalOffset, LocalOffset); 3537288943Sdim } 3538288943Sdim } 3539288943Sdim 3540288943Sdim // Add the total offset from all the GEP indices to the base. 3541288943Sdim return getAddExpr(BaseExpr, TotalOffset, Wrap); 3542288943Sdim} 3543288943Sdim 3544353358Sdimstd::tuple<const SCEV *, FoldingSetNodeID, void *> 3545353358SdimScalarEvolution::findExistingSCEVInCache(int SCEVType, 3546353358Sdim ArrayRef<const SCEV *> Ops) { 3547353358Sdim FoldingSetNodeID ID; 3548353358Sdim void *IP = nullptr; 3549353358Sdim ID.AddInteger(SCEVType); 3550353358Sdim for (unsigned i = 0, e = Ops.size(); i != e; ++i) 3551353358Sdim ID.AddPointer(Ops[i]); 3552353358Sdim return std::tuple<const SCEV *, FoldingSetNodeID, void *>( 3553353358Sdim UniqueSCEVs.FindNodeOrInsertPos(ID, IP), std::move(ID), IP); 3554193323Sed} 3555193323Sed 3556353358Sdimconst SCEV *ScalarEvolution::getMinMaxExpr(unsigned Kind, 3557353358Sdim SmallVectorImpl<const SCEV *> &Ops) { 3558353358Sdim assert(!Ops.empty() && "Cannot get empty (u|s)(min|max)!"); 3559193323Sed if (Ops.size() == 1) return Ops[0]; 3560193323Sed#ifndef NDEBUG 3561226633Sdim Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); 3562193323Sed for (unsigned i = 1, e = Ops.size(); i != e; ++i) 3563212904Sdim assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && 3564353358Sdim "Operand types don't match!"); 3565193323Sed#endif 3566193323Sed 3567353358Sdim bool IsSigned = Kind == scSMaxExpr || Kind == scSMinExpr; 3568353358Sdim bool IsMax = Kind == scSMaxExpr || Kind == scUMaxExpr; 3569353358Sdim 3570193323Sed // Sort by complexity, this groups all similar expression types together. 3571321369Sdim GroupByComplexity(Ops, &LI, DT); 3572193323Sed 3573353358Sdim // Check if we have created the same expression before. 3574353358Sdim if (const SCEV *S = std::get<0>(findExistingSCEVInCache(Kind, Ops))) { 3575353358Sdim return S; 3576353358Sdim } 3577353358Sdim 3578193323Sed // If there are any constants, fold them together. 3579193323Sed unsigned Idx = 0; 3580193323Sed if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) { 3581193323Sed ++Idx; 3582193323Sed assert(Idx < Ops.size()); 3583353358Sdim auto FoldOp = [&](const APInt &LHS, const APInt &RHS) { 3584353358Sdim if (Kind == scSMaxExpr) 3585353358Sdim return APIntOps::smax(LHS, RHS); 3586353358Sdim else if (Kind == scSMinExpr) 3587353358Sdim return APIntOps::smin(LHS, RHS); 3588353358Sdim else if (Kind == scUMaxExpr) 3589353358Sdim return APIntOps::umax(LHS, RHS); 3590353358Sdim else if (Kind == scUMinExpr) 3591353358Sdim return APIntOps::umin(LHS, RHS); 3592353358Sdim llvm_unreachable("Unknown SCEV min/max opcode"); 3593353358Sdim }; 3594353358Sdim 3595193323Sed while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) { 3596193323Sed // We found two constants, fold them together! 3597296417Sdim ConstantInt *Fold = ConstantInt::get( 3598353358Sdim getContext(), FoldOp(LHSC->getAPInt(), RHSC->getAPInt())); 3599193323Sed Ops[0] = getConstant(Fold); 3600193323Sed Ops.erase(Ops.begin()+1); // Erase the folded element 3601193323Sed if (Ops.size() == 1) return Ops[0]; 3602193323Sed LHSC = cast<SCEVConstant>(Ops[0]); 3603193323Sed } 3604193323Sed 3605353358Sdim bool IsMinV = LHSC->getValue()->isMinValue(IsSigned); 3606353358Sdim bool IsMaxV = LHSC->getValue()->isMaxValue(IsSigned); 3607353358Sdim 3608353358Sdim if (IsMax ? IsMinV : IsMaxV) { 3609353358Sdim // If we are left with a constant minimum(/maximum)-int, strip it off. 3610193323Sed Ops.erase(Ops.begin()); 3611193323Sed --Idx; 3612353358Sdim } else if (IsMax ? IsMaxV : IsMinV) { 3613353358Sdim // If we have a max(/min) with a constant maximum(/minimum)-int, 3614353358Sdim // it will always be the extremum. 3615353358Sdim return LHSC; 3616193323Sed } 3617207618Srdivacky 3618207618Srdivacky if (Ops.size() == 1) return Ops[0]; 3619193323Sed } 3620193323Sed 3621353358Sdim // Find the first operation of the same kind 3622353358Sdim while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < Kind) 3623193323Sed ++Idx; 3624193323Sed 3625353358Sdim // Check to see if one of the operands is of the same kind. If so, expand its 3626353358Sdim // operands onto our operand list, and recurse to simplify. 3627193323Sed if (Idx < Ops.size()) { 3628353358Sdim bool DeletedAny = false; 3629353358Sdim while (Ops[Idx]->getSCEVType() == Kind) { 3630353358Sdim const SCEVMinMaxExpr *SMME = cast<SCEVMinMaxExpr>(Ops[Idx]); 3631193323Sed Ops.erase(Ops.begin()+Idx); 3632353358Sdim Ops.append(SMME->op_begin(), SMME->op_end()); 3633353358Sdim DeletedAny = true; 3634193323Sed } 3635193323Sed 3636353358Sdim if (DeletedAny) 3637353358Sdim return getMinMaxExpr(Kind, Ops); 3638193323Sed } 3639193323Sed 3640193323Sed // Okay, check to see if the same value occurs in the operand list twice. If 3641193323Sed // so, delete one. Since we sorted the list, these values are required to 3642193323Sed // be adjacent. 3643353358Sdim llvm::CmpInst::Predicate GEPred = 3644353358Sdim IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; 3645353358Sdim llvm::CmpInst::Predicate LEPred = 3646353358Sdim IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; 3647353358Sdim llvm::CmpInst::Predicate FirstPred = IsMax ? GEPred : LEPred; 3648353358Sdim llvm::CmpInst::Predicate SecondPred = IsMax ? LEPred : GEPred; 3649353358Sdim for (unsigned i = 0, e = Ops.size() - 1; i != e; ++i) { 3650353358Sdim if (Ops[i] == Ops[i + 1] || 3651353358Sdim isKnownViaNonRecursiveReasoning(FirstPred, Ops[i], Ops[i + 1])) { 3652353358Sdim // X op Y op Y --> X op Y 3653353358Sdim // X op Y --> X, if we know X, Y are ordered appropriately 3654353358Sdim Ops.erase(Ops.begin() + i + 1, Ops.begin() + i + 2); 3655353358Sdim --i; 3656353358Sdim --e; 3657353358Sdim } else if (isKnownViaNonRecursiveReasoning(SecondPred, Ops[i], 3658353358Sdim Ops[i + 1])) { 3659353358Sdim // X op Y --> Y, if we know X, Y are ordered appropriately 3660353358Sdim Ops.erase(Ops.begin() + i, Ops.begin() + i + 1); 3661353358Sdim --i; 3662353358Sdim --e; 3663193323Sed } 3664353358Sdim } 3665193323Sed 3666193323Sed if (Ops.size() == 1) return Ops[0]; 3667193323Sed 3668193323Sed assert(!Ops.empty() && "Reduced smax down to nothing!"); 3669193323Sed 3670353358Sdim // Okay, it looks like we really DO need an expr. Check to see if we 3671193323Sed // already have one, otherwise create a new one. 3672353358Sdim const SCEV *ExistingSCEV; 3673195340Sed FoldingSetNodeID ID; 3674353358Sdim void *IP; 3675353358Sdim std::tie(ExistingSCEV, ID, IP) = findExistingSCEVInCache(Kind, Ops); 3676353358Sdim if (ExistingSCEV) 3677353358Sdim return ExistingSCEV; 3678205407Srdivacky const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size()); 3679205407Srdivacky std::uninitialized_copy(Ops.begin(), Ops.end(), O); 3680353358Sdim SCEV *S = new (SCEVAllocator) SCEVMinMaxExpr( 3681353358Sdim ID.Intern(SCEVAllocator), static_cast<SCEVTypes>(Kind), O, Ops.size()); 3682353358Sdim 3683195340Sed UniqueSCEVs.InsertNode(S, IP); 3684327952Sdim addToLoopUseLists(S); 3685195340Sed return S; 3686193323Sed} 3687193323Sed 3688353358Sdimconst SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS, const SCEV *RHS) { 3689309124Sdim SmallVector<const SCEV *, 2> Ops = {LHS, RHS}; 3690353358Sdim return getSMaxExpr(Ops); 3691193323Sed} 3692193323Sed 3693353358Sdimconst SCEV *ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { 3694353358Sdim return getMinMaxExpr(scSMaxExpr, Ops); 3695353358Sdim} 3696193323Sed 3697353358Sdimconst SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS, const SCEV *RHS) { 3698353358Sdim SmallVector<const SCEV *, 2> Ops = {LHS, RHS}; 3699353358Sdim return getUMaxExpr(Ops); 3700353358Sdim} 3701193323Sed 3702353358Sdimconst SCEV *ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) { 3703353358Sdim return getMinMaxExpr(scUMaxExpr, Ops); 3704193323Sed} 3705193323Sed 3706195098Sedconst SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS, 3707195098Sed const SCEV *RHS) { 3708341825Sdim SmallVector<const SCEV *, 2> Ops = { LHS, RHS }; 3709341825Sdim return getSMinExpr(Ops); 3710194612Sed} 3711194612Sed 3712341825Sdimconst SCEV *ScalarEvolution::getSMinExpr(SmallVectorImpl<const SCEV *> &Ops) { 3713353358Sdim return getMinMaxExpr(scSMinExpr, Ops); 3714341825Sdim} 3715341825Sdim 3716195098Sedconst SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS, 3717195098Sed const SCEV *RHS) { 3718341825Sdim SmallVector<const SCEV *, 2> Ops = { LHS, RHS }; 3719341825Sdim return getUMinExpr(Ops); 3720194612Sed} 3721194612Sed 3722341825Sdimconst SCEV *ScalarEvolution::getUMinExpr(SmallVectorImpl<const SCEV *> &Ops) { 3723353358Sdim return getMinMaxExpr(scUMinExpr, Ops); 3724341825Sdim} 3725341825Sdim 3726261991Sdimconst SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) { 3727288943Sdim // We can bypass creating a target-independent 3728207618Srdivacky // constant expression and then folding it back into a ConstantInt. 3729207618Srdivacky // This is just a compile-time optimization. 3730296417Sdim return getConstant(IntTy, getDataLayout().getTypeAllocSize(AllocTy)); 3731203954Srdivacky} 3732198090Srdivacky 3733261991Sdimconst SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy, 3734261991Sdim StructType *STy, 3735203954Srdivacky unsigned FieldNo) { 3736288943Sdim // We can bypass creating a target-independent 3737207618Srdivacky // constant expression and then folding it back into a ConstantInt. 3738207618Srdivacky // This is just a compile-time optimization. 3739288943Sdim return getConstant( 3740296417Sdim IntTy, getDataLayout().getStructLayout(STy)->getElementOffset(FieldNo)); 3741198090Srdivacky} 3742198090Srdivacky 3743198090Srdivackyconst SCEV *ScalarEvolution::getUnknown(Value *V) { 3744195098Sed // Don't attempt to do anything other than create a SCEVUnknown object 3745195098Sed // here. createSCEV only calls getUnknown after checking for all other 3746195098Sed // interesting possibilities, and any other code that calls getUnknown 3747195098Sed // is doing so in order to hide a value from SCEV canonicalization. 3748195098Sed 3749195340Sed FoldingSetNodeID ID; 3750195340Sed ID.AddInteger(scUnknown); 3751195340Sed ID.AddPointer(V); 3752276479Sdim void *IP = nullptr; 3753212904Sdim if (SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) { 3754212904Sdim assert(cast<SCEVUnknown>(S)->getValue() == V && 3755212904Sdim "Stale SCEVUnknown in uniquing map!"); 3756212904Sdim return S; 3757212904Sdim } 3758212904Sdim SCEV *S = new (SCEVAllocator) SCEVUnknown(ID.Intern(SCEVAllocator), V, this, 3759212904Sdim FirstUnknown); 3760212904Sdim FirstUnknown = cast<SCEVUnknown>(S); 3761195340Sed UniqueSCEVs.InsertNode(S, IP); 3762195340Sed return S; 3763193323Sed} 3764193323Sed 3765193323Sed//===----------------------------------------------------------------------===// 3766193323Sed// Basic SCEV Analysis and PHI Idiom Recognition Code 3767193323Sed// 3768193323Sed 3769309124Sdim/// Test if values of the given type are analyzable within the SCEV 3770309124Sdim/// framework. This primarily includes integer types, and it can optionally 3771309124Sdim/// include pointer types if the ScalarEvolution class has access to 3772309124Sdim/// target-specific information. 3773226633Sdimbool ScalarEvolution::isSCEVable(Type *Ty) const { 3774198090Srdivacky // Integers and pointers are always SCEVable. 3775341825Sdim return Ty->isIntOrPtrTy(); 3776193323Sed} 3777193323Sed 3778309124Sdim/// Return the size in bits of the specified type, for which isSCEVable must 3779309124Sdim/// return true. 3780226633Sdimuint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const { 3781193323Sed assert(isSCEVable(Ty) && "Type is not SCEVable!"); 3782341825Sdim if (Ty->isPointerTy()) 3783341825Sdim return getDataLayout().getIndexTypeSizeInBits(Ty); 3784296417Sdim return getDataLayout().getTypeSizeInBits(Ty); 3785193323Sed} 3786193323Sed 3787309124Sdim/// Return a type with the same bitwidth as the given type and which represents 3788309124Sdim/// how SCEV will treat the given type, for which isSCEVable must return 3789360784Sdim/// true. For pointer types, this is the pointer index sized integer type. 3790226633SdimType *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const { 3791193323Sed assert(isSCEVable(Ty) && "Type is not SCEVable!"); 3792193323Sed 3793296417Sdim if (Ty->isIntegerTy()) 3794193323Sed return Ty; 3795193323Sed 3796198090Srdivacky // The only other support type is pointer. 3797204642Srdivacky assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!"); 3798360784Sdim return getDataLayout().getIndexType(Ty); 3799193323Sed} 3800193323Sed 3801321369SdimType *ScalarEvolution::getWiderType(Type *T1, Type *T2) const { 3802321369Sdim return getTypeSizeInBits(T1) >= getTypeSizeInBits(T2) ? T1 : T2; 3803321369Sdim} 3804321369Sdim 3805198090Srdivackyconst SCEV *ScalarEvolution::getCouldNotCompute() { 3806296417Sdim return CouldNotCompute.get(); 3807193323Sed} 3808193323Sed 3809296417Sdimbool ScalarEvolution::checkValidity(const SCEV *S) const { 3810314564Sdim bool ContainsNulls = SCEVExprContains(S, [](const SCEV *S) { 3811314564Sdim auto *SU = dyn_cast<SCEVUnknown>(S); 3812314564Sdim return SU && SU->getValue() == nullptr; 3813314564Sdim }); 3814261991Sdim 3815314564Sdim return !ContainsNulls; 3816261991Sdim} 3817261991Sdim 3818309124Sdimbool ScalarEvolution::containsAddRecurrence(const SCEV *S) { 3819314564Sdim HasRecMapType::iterator I = HasRecMap.find(S); 3820309124Sdim if (I != HasRecMap.end()) 3821309124Sdim return I->second; 3822309124Sdim 3823314564Sdim bool FoundAddRec = SCEVExprContains(S, isa<SCEVAddRecExpr, const SCEV *>); 3824314564Sdim HasRecMap.insert({S, FoundAddRec}); 3825314564Sdim return FoundAddRec; 3826309124Sdim} 3827309124Sdim 3828312832Sdim/// Try to split a SCEVAddExpr into a pair of {SCEV, ConstantInt}. 3829312832Sdim/// If \p S is a SCEVAddExpr and is composed of a sub SCEV S' and an 3830312832Sdim/// offset I, then return {S', I}, else return {\p S, nullptr}. 3831312832Sdimstatic std::pair<const SCEV *, ConstantInt *> splitAddExpr(const SCEV *S) { 3832312832Sdim const auto *Add = dyn_cast<SCEVAddExpr>(S); 3833312832Sdim if (!Add) 3834312832Sdim return {S, nullptr}; 3835312832Sdim 3836312832Sdim if (Add->getNumOperands() != 2) 3837312832Sdim return {S, nullptr}; 3838312832Sdim 3839312832Sdim auto *ConstOp = dyn_cast<SCEVConstant>(Add->getOperand(0)); 3840312832Sdim if (!ConstOp) 3841312832Sdim return {S, nullptr}; 3842312832Sdim 3843312832Sdim return {Add->getOperand(1), ConstOp->getValue()}; 3844312832Sdim} 3845312832Sdim 3846312832Sdim/// Return the ValueOffsetPair set for \p S. \p S can be represented 3847312832Sdim/// by the value and offset from any ValueOffsetPair in the set. 3848312832SdimSetVector<ScalarEvolution::ValueOffsetPair> * 3849312832SdimScalarEvolution::getSCEVValues(const SCEV *S) { 3850309124Sdim ExprValueMapType::iterator SI = ExprValueMap.find_as(S); 3851309124Sdim if (SI == ExprValueMap.end()) 3852309124Sdim return nullptr; 3853309124Sdim#ifndef NDEBUG 3854309124Sdim if (VerifySCEVMap) { 3855309124Sdim // Check there is no dangling Value in the set returned. 3856309124Sdim for (const auto &VE : SI->second) 3857312832Sdim assert(ValueExprMap.count(VE.first)); 3858309124Sdim } 3859309124Sdim#endif 3860309124Sdim return &SI->second; 3861309124Sdim} 3862309124Sdim 3863312832Sdim/// Erase Value from ValueExprMap and ExprValueMap. ValueExprMap.erase(V) 3864312832Sdim/// cannot be used separately. eraseValueFromMap should be used to remove 3865312832Sdim/// V from ValueExprMap and ExprValueMap at the same time. 3866309124Sdimvoid ScalarEvolution::eraseValueFromMap(Value *V) { 3867309124Sdim ValueExprMapType::iterator I = ValueExprMap.find_as(V); 3868309124Sdim if (I != ValueExprMap.end()) { 3869309124Sdim const SCEV *S = I->second; 3870312832Sdim // Remove {V, 0} from the set of ExprValueMap[S] 3871312832Sdim if (SetVector<ValueOffsetPair> *SV = getSCEVValues(S)) 3872312832Sdim SV->remove({V, nullptr}); 3873312832Sdim 3874312832Sdim // Remove {V, Offset} from the set of ExprValueMap[Stripped] 3875312832Sdim const SCEV *Stripped; 3876312832Sdim ConstantInt *Offset; 3877312832Sdim std::tie(Stripped, Offset) = splitAddExpr(S); 3878312832Sdim if (Offset != nullptr) { 3879312832Sdim if (SetVector<ValueOffsetPair> *SV = getSCEVValues(Stripped)) 3880312832Sdim SV->remove({V, Offset}); 3881312832Sdim } 3882309124Sdim ValueExprMap.erase(V); 3883309124Sdim } 3884309124Sdim} 3885309124Sdim 3886341825Sdim/// Check whether value has nuw/nsw/exact set but SCEV does not. 3887353358Sdim/// TODO: In reality it is better to check the poison recursively 3888341825Sdim/// but this is better than nothing. 3889341825Sdimstatic bool SCEVLostPoisonFlags(const SCEV *S, const Value *V) { 3890341825Sdim if (auto *I = dyn_cast<Instruction>(V)) { 3891341825Sdim if (isa<OverflowingBinaryOperator>(I)) { 3892341825Sdim if (auto *NS = dyn_cast<SCEVNAryExpr>(S)) { 3893341825Sdim if (I->hasNoSignedWrap() && !NS->hasNoSignedWrap()) 3894341825Sdim return true; 3895341825Sdim if (I->hasNoUnsignedWrap() && !NS->hasNoUnsignedWrap()) 3896341825Sdim return true; 3897341825Sdim } 3898341825Sdim } else if (isa<PossiblyExactOperator>(I) && I->isExact()) 3899341825Sdim return true; 3900341825Sdim } 3901341825Sdim return false; 3902341825Sdim} 3903341825Sdim 3904309124Sdim/// Return an existing SCEV if it exists, otherwise analyze the expression and 3905309124Sdim/// create a new one. 3906198090Srdivackyconst SCEV *ScalarEvolution::getSCEV(Value *V) { 3907193323Sed assert(isSCEVable(V->getType()) && "Value is not SCEVable!"); 3908193323Sed 3909296417Sdim const SCEV *S = getExistingSCEV(V); 3910296417Sdim if (S == nullptr) { 3911296417Sdim S = createSCEV(V); 3912309124Sdim // During PHI resolution, it is possible to create two SCEVs for the same 3913309124Sdim // V, so it is needed to double check whether V->S is inserted into 3914312832Sdim // ValueExprMap before insert S->{V, 0} into ExprValueMap. 3915309124Sdim std::pair<ValueExprMapType::iterator, bool> Pair = 3916309124Sdim ValueExprMap.insert({SCEVCallbackVH(V, this), S}); 3917341825Sdim if (Pair.second && !SCEVLostPoisonFlags(S, V)) { 3918312832Sdim ExprValueMap[S].insert({V, nullptr}); 3919312832Sdim 3920312832Sdim // If S == Stripped + Offset, add Stripped -> {V, Offset} into 3921312832Sdim // ExprValueMap. 3922312832Sdim const SCEV *Stripped = S; 3923312832Sdim ConstantInt *Offset = nullptr; 3924312832Sdim std::tie(Stripped, Offset) = splitAddExpr(S); 3925312832Sdim // If stripped is SCEVUnknown, don't bother to save 3926312832Sdim // Stripped -> {V, offset}. It doesn't simplify and sometimes even 3927312832Sdim // increase the complexity of the expansion code. 3928312832Sdim // If V is GetElementPtrInst, don't save Stripped -> {V, offset} 3929312832Sdim // because it may generate add/sub instead of GEP in SCEV expansion. 3930312832Sdim if (Offset != nullptr && !isa<SCEVUnknown>(Stripped) && 3931312832Sdim !isa<GetElementPtrInst>(V)) 3932312832Sdim ExprValueMap[Stripped].insert({V, Offset}); 3933312832Sdim } 3934296417Sdim } 3935296417Sdim return S; 3936296417Sdim} 3937296417Sdim 3938296417Sdimconst SCEV *ScalarEvolution::getExistingSCEV(Value *V) { 3939296417Sdim assert(isSCEVable(V->getType()) && "Value is not SCEVable!"); 3940296417Sdim 3941261991Sdim ValueExprMapType::iterator I = ValueExprMap.find_as(V); 3942261991Sdim if (I != ValueExprMap.end()) { 3943261991Sdim const SCEV *S = I->second; 3944261991Sdim if (checkValidity(S)) 3945261991Sdim return S; 3946312832Sdim eraseValueFromMap(V); 3947309124Sdim forgetMemoizedResults(S); 3948261991Sdim } 3949296417Sdim return nullptr; 3950193323Sed} 3951193323Sed 3952309124Sdim/// Return a SCEV corresponding to -V = -1*V 3953296417Sdimconst SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V, 3954296417Sdim SCEV::NoWrapFlags Flags) { 3955193323Sed if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V)) 3956198090Srdivacky return getConstant( 3957198090Srdivacky cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue()))); 3958193323Sed 3959226633Sdim Type *Ty = V->getType(); 3960193323Sed Ty = getEffectiveSCEVType(Ty); 3961296417Sdim return getMulExpr( 3962296417Sdim V, getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))), Flags); 3963193323Sed} 3964193323Sed 3965353358Sdim/// If Expr computes ~A, return A else return nullptr 3966353358Sdimstatic const SCEV *MatchNotExpr(const SCEV *Expr) { 3967353358Sdim const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Expr); 3968353358Sdim if (!Add || Add->getNumOperands() != 2 || 3969353358Sdim !Add->getOperand(0)->isAllOnesValue()) 3970353358Sdim return nullptr; 3971353358Sdim 3972353358Sdim const SCEVMulExpr *AddRHS = dyn_cast<SCEVMulExpr>(Add->getOperand(1)); 3973353358Sdim if (!AddRHS || AddRHS->getNumOperands() != 2 || 3974353358Sdim !AddRHS->getOperand(0)->isAllOnesValue()) 3975353358Sdim return nullptr; 3976353358Sdim 3977353358Sdim return AddRHS->getOperand(1); 3978353358Sdim} 3979353358Sdim 3980309124Sdim/// Return a SCEV corresponding to ~V = -1-V 3981198090Srdivackyconst SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) { 3982193323Sed if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V)) 3983198090Srdivacky return getConstant( 3984198090Srdivacky cast<ConstantInt>(ConstantExpr::getNot(VC->getValue()))); 3985193323Sed 3986353358Sdim // Fold ~(u|s)(min|max)(~x, ~y) to (u|s)(max|min)(x, y) 3987353358Sdim if (const SCEVMinMaxExpr *MME = dyn_cast<SCEVMinMaxExpr>(V)) { 3988353358Sdim auto MatchMinMaxNegation = [&](const SCEVMinMaxExpr *MME) { 3989353358Sdim SmallVector<const SCEV *, 2> MatchedOperands; 3990353358Sdim for (const SCEV *Operand : MME->operands()) { 3991353358Sdim const SCEV *Matched = MatchNotExpr(Operand); 3992353358Sdim if (!Matched) 3993353358Sdim return (const SCEV *)nullptr; 3994353358Sdim MatchedOperands.push_back(Matched); 3995353358Sdim } 3996353358Sdim return getMinMaxExpr( 3997353358Sdim SCEVMinMaxExpr::negate(static_cast<SCEVTypes>(MME->getSCEVType())), 3998353358Sdim MatchedOperands); 3999353358Sdim }; 4000353358Sdim if (const SCEV *Replaced = MatchMinMaxNegation(MME)) 4001353358Sdim return Replaced; 4002353358Sdim } 4003353358Sdim 4004226633Sdim Type *Ty = V->getType(); 4005193323Sed Ty = getEffectiveSCEVType(Ty); 4006198090Srdivacky const SCEV *AllOnes = 4007198090Srdivacky getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))); 4008193323Sed return getMinusSCEV(AllOnes, V); 4009193323Sed} 4010193323Sed 4011218893Sdimconst SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS, 4012321369Sdim SCEV::NoWrapFlags Flags, 4013321369Sdim unsigned Depth) { 4014212904Sdim // Fast path: X - X --> 0. 4015212904Sdim if (LHS == RHS) 4016296417Sdim return getZero(LHS->getType()); 4017212904Sdim 4018296417Sdim // We represent LHS - RHS as LHS + (-1)*RHS. This transformation 4019296417Sdim // makes it so that we cannot make much use of NUW. 4020296417Sdim auto AddFlags = SCEV::FlagAnyWrap; 4021296417Sdim const bool RHSIsNotMinSigned = 4022321369Sdim !getSignedRangeMin(RHS).isMinSignedValue(); 4023296417Sdim if (maskFlags(Flags, SCEV::FlagNSW) == SCEV::FlagNSW) { 4024296417Sdim // Let M be the minimum representable signed value. Then (-1)*RHS 4025296417Sdim // signed-wraps if and only if RHS is M. That can happen even for 4026296417Sdim // a NSW subtraction because e.g. (-1)*M signed-wraps even though 4027296417Sdim // -1 - M does not. So to transfer NSW from LHS - RHS to LHS + 4028296417Sdim // (-1)*RHS, we need to prove that RHS != M. 4029296417Sdim // 4030296417Sdim // If LHS is non-negative and we know that LHS - RHS does not 4031296417Sdim // signed-wrap, then RHS cannot be M. So we can rule out signed-wrap 4032296417Sdim // either by proving that RHS > M or that LHS >= 0. 4033296417Sdim if (RHSIsNotMinSigned || isKnownNonNegative(LHS)) { 4034296417Sdim AddFlags = SCEV::FlagNSW; 4035296417Sdim } 4036296417Sdim } 4037296417Sdim 4038296417Sdim // FIXME: Find a correct way to transfer NSW to (-1)*M when LHS - 4039296417Sdim // RHS is NSW and LHS >= 0. 4040296417Sdim // 4041296417Sdim // The difficulty here is that the NSW flag may have been proven 4042296417Sdim // relative to a loop that is to be found in a recurrence in LHS and 4043296417Sdim // not in RHS. Applying NSW to (-1)*M may then let the NSW have a 4044296417Sdim // larger scope than intended. 4045296417Sdim auto NegFlags = RHSIsNotMinSigned ? SCEV::FlagNSW : SCEV::FlagAnyWrap; 4046296417Sdim 4047321369Sdim return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags, Depth); 4048193323Sed} 4049193323Sed 4050353358Sdimconst SCEV *ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V, Type *Ty, 4051353358Sdim unsigned Depth) { 4052226633Sdim Type *SrcTy = V->getType(); 4053341825Sdim assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() && 4054193323Sed "Cannot truncate or zero extend with non-integer arguments!"); 4055193323Sed if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) 4056193323Sed return V; // No conversion 4057193323Sed if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty)) 4058353358Sdim return getTruncateExpr(V, Ty, Depth); 4059353358Sdim return getZeroExtendExpr(V, Ty, Depth); 4060193323Sed} 4061193323Sed 4062353358Sdimconst SCEV *ScalarEvolution::getTruncateOrSignExtend(const SCEV *V, Type *Ty, 4063353358Sdim unsigned Depth) { 4064226633Sdim Type *SrcTy = V->getType(); 4065341825Sdim assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() && 4066193323Sed "Cannot truncate or zero extend with non-integer arguments!"); 4067193323Sed if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) 4068193323Sed return V; // No conversion 4069193323Sed if (getTypeSizeInBits(SrcTy) > getTypeSizeInBits(Ty)) 4070353358Sdim return getTruncateExpr(V, Ty, Depth); 4071353358Sdim return getSignExtendExpr(V, Ty, Depth); 4072193323Sed} 4073193323Sed 4074198090Srdivackyconst SCEV * 4075226633SdimScalarEvolution::getNoopOrZeroExtend(const SCEV *V, Type *Ty) { 4076226633Sdim Type *SrcTy = V->getType(); 4077341825Sdim assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() && 4078193323Sed "Cannot noop or zero extend with non-integer arguments!"); 4079193323Sed assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && 4080193323Sed "getNoopOrZeroExtend cannot truncate!"); 4081193323Sed if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) 4082193323Sed return V; // No conversion 4083193323Sed return getZeroExtendExpr(V, Ty); 4084193323Sed} 4085193323Sed 4086198090Srdivackyconst SCEV * 4087226633SdimScalarEvolution::getNoopOrSignExtend(const SCEV *V, Type *Ty) { 4088226633Sdim Type *SrcTy = V->getType(); 4089341825Sdim assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() && 4090193323Sed "Cannot noop or sign extend with non-integer arguments!"); 4091193323Sed assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && 4092193323Sed "getNoopOrSignExtend cannot truncate!"); 4093193323Sed if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) 4094193323Sed return V; // No conversion 4095193323Sed return getSignExtendExpr(V, Ty); 4096193323Sed} 4097193323Sed 4098198090Srdivackyconst SCEV * 4099226633SdimScalarEvolution::getNoopOrAnyExtend(const SCEV *V, Type *Ty) { 4100226633Sdim Type *SrcTy = V->getType(); 4101341825Sdim assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() && 4102194178Sed "Cannot noop or any extend with non-integer arguments!"); 4103194178Sed assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) && 4104194178Sed "getNoopOrAnyExtend cannot truncate!"); 4105194178Sed if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) 4106194178Sed return V; // No conversion 4107194178Sed return getAnyExtendExpr(V, Ty); 4108194178Sed} 4109194178Sed 4110198090Srdivackyconst SCEV * 4111226633SdimScalarEvolution::getTruncateOrNoop(const SCEV *V, Type *Ty) { 4112226633Sdim Type *SrcTy = V->getType(); 4113341825Sdim assert(SrcTy->isIntOrPtrTy() && Ty->isIntOrPtrTy() && 4114193323Sed "Cannot truncate or noop with non-integer arguments!"); 4115193323Sed assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) && 4116193323Sed "getTruncateOrNoop cannot extend!"); 4117193323Sed if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty)) 4118193323Sed return V; // No conversion 4119193323Sed return getTruncateExpr(V, Ty); 4120193323Sed} 4121193323Sed 4122195098Sedconst SCEV *ScalarEvolution::getUMaxFromMismatchedTypes(const SCEV *LHS, 4123195098Sed const SCEV *RHS) { 4124198090Srdivacky const SCEV *PromotedLHS = LHS; 4125198090Srdivacky const SCEV *PromotedRHS = RHS; 4126194612Sed 4127194612Sed if (getTypeSizeInBits(LHS->getType()) > getTypeSizeInBits(RHS->getType())) 4128194612Sed PromotedRHS = getZeroExtendExpr(RHS, LHS->getType()); 4129194612Sed else 4130194612Sed PromotedLHS = getNoopOrZeroExtend(LHS, RHS->getType()); 4131194612Sed 4132194612Sed return getUMaxExpr(PromotedLHS, PromotedRHS); 4133194612Sed} 4134194612Sed 4135195098Sedconst SCEV *ScalarEvolution::getUMinFromMismatchedTypes(const SCEV *LHS, 4136195098Sed const SCEV *RHS) { 4137341825Sdim SmallVector<const SCEV *, 2> Ops = { LHS, RHS }; 4138341825Sdim return getUMinFromMismatchedTypes(Ops); 4139341825Sdim} 4140194710Sed 4141341825Sdimconst SCEV *ScalarEvolution::getUMinFromMismatchedTypes( 4142341825Sdim SmallVectorImpl<const SCEV *> &Ops) { 4143341825Sdim assert(!Ops.empty() && "At least one operand must be!"); 4144341825Sdim // Trivial case. 4145341825Sdim if (Ops.size() == 1) 4146341825Sdim return Ops[0]; 4147194710Sed 4148341825Sdim // Find the max type first. 4149341825Sdim Type *MaxType = nullptr; 4150341825Sdim for (auto *S : Ops) 4151341825Sdim if (MaxType) 4152341825Sdim MaxType = getWiderType(MaxType, S->getType()); 4153341825Sdim else 4154341825Sdim MaxType = S->getType(); 4155341825Sdim 4156341825Sdim // Extend all ops to max type. 4157341825Sdim SmallVector<const SCEV *, 2> PromotedOps; 4158341825Sdim for (auto *S : Ops) 4159341825Sdim PromotedOps.push_back(getNoopOrZeroExtend(S, MaxType)); 4160341825Sdim 4161341825Sdim // Generate umin. 4162341825Sdim return getUMinExpr(PromotedOps); 4163194710Sed} 4164194710Sed 4165221345Sdimconst SCEV *ScalarEvolution::getPointerBase(const SCEV *V) { 4166221345Sdim // A pointer operand may evaluate to a nonpointer expression, such as null. 4167221345Sdim if (!V->getType()->isPointerTy()) 4168221345Sdim return V; 4169221345Sdim 4170221345Sdim if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(V)) { 4171221345Sdim return getPointerBase(Cast->getOperand()); 4172296417Sdim } else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) { 4173276479Sdim const SCEV *PtrOp = nullptr; 4174296417Sdim for (const SCEV *NAryOp : NAry->operands()) { 4175296417Sdim if (NAryOp->getType()->isPointerTy()) { 4176221345Sdim // Cannot find the base of an expression with multiple pointer operands. 4177221345Sdim if (PtrOp) 4178221345Sdim return V; 4179296417Sdim PtrOp = NAryOp; 4180221345Sdim } 4181221345Sdim } 4182221345Sdim if (!PtrOp) 4183221345Sdim return V; 4184221345Sdim return getPointerBase(PtrOp); 4185221345Sdim } 4186221345Sdim return V; 4187221345Sdim} 4188221345Sdim 4189309124Sdim/// Push users of the given Instruction onto the given Worklist. 4190198090Srdivackystatic void 4191198090SrdivackyPushDefUseChildren(Instruction *I, 4192198090Srdivacky SmallVectorImpl<Instruction *> &Worklist) { 4193198090Srdivacky // Push the def-use children onto the Worklist stack. 4194276479Sdim for (User *U : I->users()) 4195276479Sdim Worklist.push_back(cast<Instruction>(U)); 4196198090Srdivacky} 4197198090Srdivacky 4198309124Sdimvoid ScalarEvolution::forgetSymbolicName(Instruction *PN, const SCEV *SymName) { 4199198090Srdivacky SmallVector<Instruction *, 16> Worklist; 4200204642Srdivacky PushDefUseChildren(PN, Worklist); 4201193323Sed 4202198090Srdivacky SmallPtrSet<Instruction *, 8> Visited; 4203204642Srdivacky Visited.insert(PN); 4204198090Srdivacky while (!Worklist.empty()) { 4205198090Srdivacky Instruction *I = Worklist.pop_back_val(); 4206280031Sdim if (!Visited.insert(I).second) 4207280031Sdim continue; 4208193323Sed 4209296417Sdim auto It = ValueExprMap.find_as(static_cast<Value *>(I)); 4210212904Sdim if (It != ValueExprMap.end()) { 4211218893Sdim const SCEV *Old = It->second; 4212218893Sdim 4213198090Srdivacky // Short-circuit the def-use traversal if the symbolic name 4214198090Srdivacky // ceases to appear in expressions. 4215218893Sdim if (Old != SymName && !hasOperand(Old, SymName)) 4216198090Srdivacky continue; 4217193323Sed 4218198090Srdivacky // SCEVUnknown for a PHI either means that it has an unrecognized 4219204642Srdivacky // structure, it's a PHI that's in the progress of being computed 4220204642Srdivacky // by createNodeForPHI, or it's a single-value PHI. In the first case, 4221204642Srdivacky // additional loop trip count information isn't going to change anything. 4222204642Srdivacky // In the second case, createNodeForPHI will perform the necessary 4223204642Srdivacky // updates on its own when it gets to that point. In the third, we do 4224204642Srdivacky // want to forget the SCEVUnknown. 4225204642Srdivacky if (!isa<PHINode>(I) || 4226218893Sdim !isa<SCEVUnknown>(Old) || 4227218893Sdim (I != PN && Old == SymName)) { 4228312832Sdim eraseValueFromMap(It->first); 4229218893Sdim forgetMemoizedResults(Old); 4230198090Srdivacky } 4231198090Srdivacky } 4232198090Srdivacky 4233198090Srdivacky PushDefUseChildren(I, Worklist); 4234198090Srdivacky } 4235193323Sed} 4236193323Sed 4237296417Sdimnamespace { 4238327952Sdim 4239341825Sdim/// Takes SCEV S and Loop L. For each AddRec sub-expression, use its start 4240341825Sdim/// expression in case its Loop is L. If it is not L then 4241341825Sdim/// if IgnoreOtherLoops is true then use AddRec itself 4242341825Sdim/// otherwise rewrite cannot be done. 4243341825Sdim/// If SCEV contains non-invariant unknown SCEV rewrite cannot be done. 4244296417Sdimclass SCEVInitRewriter : public SCEVRewriteVisitor<SCEVInitRewriter> { 4245296417Sdimpublic: 4246341825Sdim static const SCEV *rewrite(const SCEV *S, const Loop *L, ScalarEvolution &SE, 4247341825Sdim bool IgnoreOtherLoops = true) { 4248296417Sdim SCEVInitRewriter Rewriter(L, SE); 4249309124Sdim const SCEV *Result = Rewriter.visit(S); 4250341825Sdim if (Rewriter.hasSeenLoopVariantSCEVUnknown()) 4251341825Sdim return SE.getCouldNotCompute(); 4252341825Sdim return Rewriter.hasSeenOtherLoops() && !IgnoreOtherLoops 4253341825Sdim ? SE.getCouldNotCompute() 4254341825Sdim : Result; 4255296417Sdim } 4256193323Sed 4257296417Sdim const SCEV *visitUnknown(const SCEVUnknown *Expr) { 4258321369Sdim if (!SE.isLoopInvariant(Expr, L)) 4259341825Sdim SeenLoopVariantSCEVUnknown = true; 4260296417Sdim return Expr; 4261296417Sdim } 4262193323Sed 4263296417Sdim const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { 4264341825Sdim // Only re-write AddRecExprs for this loop. 4265296417Sdim if (Expr->getLoop() == L) 4266296417Sdim return Expr->getStart(); 4267341825Sdim SeenOtherLoops = true; 4268296417Sdim return Expr; 4269296417Sdim } 4270193323Sed 4271341825Sdim bool hasSeenLoopVariantSCEVUnknown() { return SeenLoopVariantSCEVUnknown; } 4272193323Sed 4273341825Sdim bool hasSeenOtherLoops() { return SeenOtherLoops; } 4274341825Sdim 4275296417Sdimprivate: 4276327952Sdim explicit SCEVInitRewriter(const Loop *L, ScalarEvolution &SE) 4277327952Sdim : SCEVRewriteVisitor(SE), L(L) {} 4278327952Sdim 4279296417Sdim const Loop *L; 4280341825Sdim bool SeenLoopVariantSCEVUnknown = false; 4281341825Sdim bool SeenOtherLoops = false; 4282296417Sdim}; 4283202878Srdivacky 4284341825Sdim/// Takes SCEV S and Loop L. For each AddRec sub-expression, use its post 4285341825Sdim/// increment expression in case its Loop is L. If it is not L then 4286341825Sdim/// use AddRec itself. 4287341825Sdim/// If SCEV contains non-invariant unknown SCEV rewrite cannot be done. 4288341825Sdimclass SCEVPostIncRewriter : public SCEVRewriteVisitor<SCEVPostIncRewriter> { 4289341825Sdimpublic: 4290341825Sdim static const SCEV *rewrite(const SCEV *S, const Loop *L, ScalarEvolution &SE) { 4291341825Sdim SCEVPostIncRewriter Rewriter(L, SE); 4292341825Sdim const SCEV *Result = Rewriter.visit(S); 4293341825Sdim return Rewriter.hasSeenLoopVariantSCEVUnknown() 4294341825Sdim ? SE.getCouldNotCompute() 4295341825Sdim : Result; 4296341825Sdim } 4297341825Sdim 4298341825Sdim const SCEV *visitUnknown(const SCEVUnknown *Expr) { 4299341825Sdim if (!SE.isLoopInvariant(Expr, L)) 4300341825Sdim SeenLoopVariantSCEVUnknown = true; 4301341825Sdim return Expr; 4302341825Sdim } 4303341825Sdim 4304341825Sdim const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { 4305341825Sdim // Only re-write AddRecExprs for this loop. 4306341825Sdim if (Expr->getLoop() == L) 4307341825Sdim return Expr->getPostIncExpr(SE); 4308341825Sdim SeenOtherLoops = true; 4309341825Sdim return Expr; 4310341825Sdim } 4311341825Sdim 4312341825Sdim bool hasSeenLoopVariantSCEVUnknown() { return SeenLoopVariantSCEVUnknown; } 4313341825Sdim 4314341825Sdim bool hasSeenOtherLoops() { return SeenOtherLoops; } 4315341825Sdim 4316341825Sdimprivate: 4317341825Sdim explicit SCEVPostIncRewriter(const Loop *L, ScalarEvolution &SE) 4318341825Sdim : SCEVRewriteVisitor(SE), L(L) {} 4319341825Sdim 4320341825Sdim const Loop *L; 4321341825Sdim bool SeenLoopVariantSCEVUnknown = false; 4322341825Sdim bool SeenOtherLoops = false; 4323341825Sdim}; 4324341825Sdim 4325327952Sdim/// This class evaluates the compare condition by matching it against the 4326327952Sdim/// condition of loop latch. If there is a match we assume a true value 4327327952Sdim/// for the condition while building SCEV nodes. 4328327952Sdimclass SCEVBackedgeConditionFolder 4329327952Sdim : public SCEVRewriteVisitor<SCEVBackedgeConditionFolder> { 4330327952Sdimpublic: 4331327952Sdim static const SCEV *rewrite(const SCEV *S, const Loop *L, 4332327952Sdim ScalarEvolution &SE) { 4333327952Sdim bool IsPosBECond = false; 4334327952Sdim Value *BECond = nullptr; 4335327952Sdim if (BasicBlock *Latch = L->getLoopLatch()) { 4336327952Sdim BranchInst *BI = dyn_cast<BranchInst>(Latch->getTerminator()); 4337327952Sdim if (BI && BI->isConditional()) { 4338327952Sdim assert(BI->getSuccessor(0) != BI->getSuccessor(1) && 4339327952Sdim "Both outgoing branches should not target same header!"); 4340327952Sdim BECond = BI->getCondition(); 4341327952Sdim IsPosBECond = BI->getSuccessor(0) == L->getHeader(); 4342327952Sdim } else { 4343327952Sdim return S; 4344327952Sdim } 4345327952Sdim } 4346327952Sdim SCEVBackedgeConditionFolder Rewriter(L, BECond, IsPosBECond, SE); 4347327952Sdim return Rewriter.visit(S); 4348327952Sdim } 4349327952Sdim 4350327952Sdim const SCEV *visitUnknown(const SCEVUnknown *Expr) { 4351327952Sdim const SCEV *Result = Expr; 4352327952Sdim bool InvariantF = SE.isLoopInvariant(Expr, L); 4353327952Sdim 4354327952Sdim if (!InvariantF) { 4355327952Sdim Instruction *I = cast<Instruction>(Expr->getValue()); 4356327952Sdim switch (I->getOpcode()) { 4357327952Sdim case Instruction::Select: { 4358327952Sdim SelectInst *SI = cast<SelectInst>(I); 4359327952Sdim Optional<const SCEV *> Res = 4360327952Sdim compareWithBackedgeCondition(SI->getCondition()); 4361327952Sdim if (Res.hasValue()) { 4362327952Sdim bool IsOne = cast<SCEVConstant>(Res.getValue())->getValue()->isOne(); 4363327952Sdim Result = SE.getSCEV(IsOne ? SI->getTrueValue() : SI->getFalseValue()); 4364327952Sdim } 4365327952Sdim break; 4366327952Sdim } 4367327952Sdim default: { 4368327952Sdim Optional<const SCEV *> Res = compareWithBackedgeCondition(I); 4369327952Sdim if (Res.hasValue()) 4370327952Sdim Result = Res.getValue(); 4371327952Sdim break; 4372327952Sdim } 4373327952Sdim } 4374327952Sdim } 4375327952Sdim return Result; 4376327952Sdim } 4377327952Sdim 4378327952Sdimprivate: 4379327952Sdim explicit SCEVBackedgeConditionFolder(const Loop *L, Value *BECond, 4380327952Sdim bool IsPosBECond, ScalarEvolution &SE) 4381327952Sdim : SCEVRewriteVisitor(SE), L(L), BackedgeCond(BECond), 4382327952Sdim IsPositiveBECond(IsPosBECond) {} 4383327952Sdim 4384327952Sdim Optional<const SCEV *> compareWithBackedgeCondition(Value *IC); 4385327952Sdim 4386327952Sdim const Loop *L; 4387327952Sdim /// Loop back condition. 4388327952Sdim Value *BackedgeCond = nullptr; 4389327952Sdim /// Set to true if loop back is on positive branch condition. 4390327952Sdim bool IsPositiveBECond; 4391327952Sdim}; 4392327952Sdim 4393327952SdimOptional<const SCEV *> 4394327952SdimSCEVBackedgeConditionFolder::compareWithBackedgeCondition(Value *IC) { 4395327952Sdim 4396327952Sdim // If value matches the backedge condition for loop latch, 4397327952Sdim // then return a constant evolution node based on loopback 4398327952Sdim // branch taken. 4399327952Sdim if (BackedgeCond == IC) 4400327952Sdim return IsPositiveBECond ? SE.getOne(Type::getInt1Ty(SE.getContext())) 4401327952Sdim : SE.getZero(Type::getInt1Ty(SE.getContext())); 4402327952Sdim return None; 4403327952Sdim} 4404327952Sdim 4405296417Sdimclass SCEVShiftRewriter : public SCEVRewriteVisitor<SCEVShiftRewriter> { 4406296417Sdimpublic: 4407309124Sdim static const SCEV *rewrite(const SCEV *S, const Loop *L, 4408296417Sdim ScalarEvolution &SE) { 4409296417Sdim SCEVShiftRewriter Rewriter(L, SE); 4410309124Sdim const SCEV *Result = Rewriter.visit(S); 4411296417Sdim return Rewriter.isValid() ? Result : SE.getCouldNotCompute(); 4412296417Sdim } 4413261991Sdim 4414296417Sdim const SCEV *visitUnknown(const SCEVUnknown *Expr) { 4415296417Sdim // Only allow AddRecExprs for this loop. 4416321369Sdim if (!SE.isLoopInvariant(Expr, L)) 4417296417Sdim Valid = false; 4418296417Sdim return Expr; 4419296417Sdim } 4420202878Srdivacky 4421296417Sdim const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { 4422296417Sdim if (Expr->getLoop() == L && Expr->isAffine()) 4423296417Sdim return SE.getMinusSCEV(Expr, Expr->getStepRecurrence(SE)); 4424296417Sdim Valid = false; 4425296417Sdim return Expr; 4426296417Sdim } 4427327952Sdim 4428296417Sdim bool isValid() { return Valid; } 4429193323Sed 4430296417Sdimprivate: 4431327952Sdim explicit SCEVShiftRewriter(const Loop *L, ScalarEvolution &SE) 4432327952Sdim : SCEVRewriteVisitor(SE), L(L) {} 4433327952Sdim 4434296417Sdim const Loop *L; 4435327952Sdim bool Valid = true; 4436296417Sdim}; 4437327952Sdim 4438296417Sdim} // end anonymous namespace 4439198090Srdivacky 4440309124SdimSCEV::NoWrapFlags 4441309124SdimScalarEvolution::proveNoWrapViaConstantRanges(const SCEVAddRecExpr *AR) { 4442309124Sdim if (!AR->isAffine()) 4443309124Sdim return SCEV::FlagAnyWrap; 4444309124Sdim 4445327952Sdim using OBO = OverflowingBinaryOperator; 4446327952Sdim 4447309124Sdim SCEV::NoWrapFlags Result = SCEV::FlagAnyWrap; 4448309124Sdim 4449309124Sdim if (!AR->hasNoSignedWrap()) { 4450309124Sdim ConstantRange AddRecRange = getSignedRange(AR); 4451309124Sdim ConstantRange IncRange = getSignedRange(AR->getStepRecurrence(*this)); 4452309124Sdim 4453309124Sdim auto NSWRegion = ConstantRange::makeGuaranteedNoWrapRegion( 4454309124Sdim Instruction::Add, IncRange, OBO::NoSignedWrap); 4455309124Sdim if (NSWRegion.contains(AddRecRange)) 4456309124Sdim Result = ScalarEvolution::setFlags(Result, SCEV::FlagNSW); 4457309124Sdim } 4458309124Sdim 4459309124Sdim if (!AR->hasNoUnsignedWrap()) { 4460309124Sdim ConstantRange AddRecRange = getUnsignedRange(AR); 4461309124Sdim ConstantRange IncRange = getUnsignedRange(AR->getStepRecurrence(*this)); 4462309124Sdim 4463309124Sdim auto NUWRegion = ConstantRange::makeGuaranteedNoWrapRegion( 4464309124Sdim Instruction::Add, IncRange, OBO::NoUnsignedWrap); 4465309124Sdim if (NUWRegion.contains(AddRecRange)) 4466309124Sdim Result = ScalarEvolution::setFlags(Result, SCEV::FlagNUW); 4467309124Sdim } 4468309124Sdim 4469309124Sdim return Result; 4470309124Sdim} 4471309124Sdim 4472309124Sdimnamespace { 4473327952Sdim 4474309124Sdim/// Represents an abstract binary operation. This may exist as a 4475309124Sdim/// normal instruction or constant expression, or may have been 4476309124Sdim/// derived from an expression tree. 4477309124Sdimstruct BinaryOp { 4478309124Sdim unsigned Opcode; 4479309124Sdim Value *LHS; 4480309124Sdim Value *RHS; 4481327952Sdim bool IsNSW = false; 4482327952Sdim bool IsNUW = false; 4483309124Sdim 4484309124Sdim /// Op is set if this BinaryOp corresponds to a concrete LLVM instruction or 4485309124Sdim /// constant expression. 4486327952Sdim Operator *Op = nullptr; 4487309124Sdim 4488309124Sdim explicit BinaryOp(Operator *Op) 4489309124Sdim : Opcode(Op->getOpcode()), LHS(Op->getOperand(0)), RHS(Op->getOperand(1)), 4490327952Sdim Op(Op) { 4491309124Sdim if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(Op)) { 4492309124Sdim IsNSW = OBO->hasNoSignedWrap(); 4493309124Sdim IsNUW = OBO->hasNoUnsignedWrap(); 4494309124Sdim } 4495309124Sdim } 4496309124Sdim 4497309124Sdim explicit BinaryOp(unsigned Opcode, Value *LHS, Value *RHS, bool IsNSW = false, 4498309124Sdim bool IsNUW = false) 4499327952Sdim : Opcode(Opcode), LHS(LHS), RHS(RHS), IsNSW(IsNSW), IsNUW(IsNUW) {} 4500309124Sdim}; 4501309124Sdim 4502327952Sdim} // end anonymous namespace 4503309124Sdim 4504309124Sdim/// Try to map \p V into a BinaryOp, and return \c None on failure. 4505309124Sdimstatic Optional<BinaryOp> MatchBinaryOp(Value *V, DominatorTree &DT) { 4506309124Sdim auto *Op = dyn_cast<Operator>(V); 4507309124Sdim if (!Op) 4508309124Sdim return None; 4509309124Sdim 4510309124Sdim // Implementation detail: all the cleverness here should happen without 4511309124Sdim // creating new SCEV expressions -- our caller knowns tricks to avoid creating 4512309124Sdim // SCEV expressions when possible, and we should not break that. 4513309124Sdim 4514309124Sdim switch (Op->getOpcode()) { 4515309124Sdim case Instruction::Add: 4516309124Sdim case Instruction::Sub: 4517309124Sdim case Instruction::Mul: 4518309124Sdim case Instruction::UDiv: 4519327952Sdim case Instruction::URem: 4520309124Sdim case Instruction::And: 4521309124Sdim case Instruction::Or: 4522309124Sdim case Instruction::AShr: 4523309124Sdim case Instruction::Shl: 4524309124Sdim return BinaryOp(Op); 4525309124Sdim 4526309124Sdim case Instruction::Xor: 4527309124Sdim if (auto *RHSC = dyn_cast<ConstantInt>(Op->getOperand(1))) 4528321369Sdim // If the RHS of the xor is a signmask, then this is just an add. 4529321369Sdim // Instcombine turns add of signmask into xor as a strength reduction step. 4530321369Sdim if (RHSC->getValue().isSignMask()) 4531309124Sdim return BinaryOp(Instruction::Add, Op->getOperand(0), Op->getOperand(1)); 4532309124Sdim return BinaryOp(Op); 4533309124Sdim 4534309124Sdim case Instruction::LShr: 4535309124Sdim // Turn logical shift right of a constant into a unsigned divide. 4536309124Sdim if (ConstantInt *SA = dyn_cast<ConstantInt>(Op->getOperand(1))) { 4537309124Sdim uint32_t BitWidth = cast<IntegerType>(Op->getType())->getBitWidth(); 4538309124Sdim 4539309124Sdim // If the shift count is not less than the bitwidth, the result of 4540309124Sdim // the shift is undefined. Don't try to analyze it, because the 4541309124Sdim // resolution chosen here may differ from the resolution chosen in 4542309124Sdim // other parts of the compiler. 4543309124Sdim if (SA->getValue().ult(BitWidth)) { 4544309124Sdim Constant *X = 4545309124Sdim ConstantInt::get(SA->getContext(), 4546309124Sdim APInt::getOneBitSet(BitWidth, SA->getZExtValue())); 4547309124Sdim return BinaryOp(Instruction::UDiv, Op->getOperand(0), X); 4548309124Sdim } 4549309124Sdim } 4550309124Sdim return BinaryOp(Op); 4551309124Sdim 4552309124Sdim case Instruction::ExtractValue: { 4553309124Sdim auto *EVI = cast<ExtractValueInst>(Op); 4554309124Sdim if (EVI->getNumIndices() != 1 || EVI->getIndices()[0] != 0) 4555309124Sdim break; 4556309124Sdim 4557353358Sdim auto *WO = dyn_cast<WithOverflowInst>(EVI->getAggregateOperand()); 4558353358Sdim if (!WO) 4559309124Sdim break; 4560309124Sdim 4561353358Sdim Instruction::BinaryOps BinOp = WO->getBinaryOp(); 4562353358Sdim bool Signed = WO->isSigned(); 4563353358Sdim // TODO: Should add nuw/nsw flags for mul as well. 4564353358Sdim if (BinOp == Instruction::Mul || !isOverflowIntrinsicNoWrap(WO, DT)) 4565353358Sdim return BinaryOp(BinOp, WO->getLHS(), WO->getRHS()); 4566309124Sdim 4567353358Sdim // Now that we know that all uses of the arithmetic-result component of 4568353358Sdim // CI are guarded by the overflow check, we can go ahead and pretend 4569353358Sdim // that the arithmetic is non-overflowing. 4570353358Sdim return BinaryOp(BinOp, WO->getLHS(), WO->getRHS(), 4571353358Sdim /* IsNSW = */ Signed, /* IsNUW = */ !Signed); 4572309124Sdim } 4573309124Sdim 4574309124Sdim default: 4575309124Sdim break; 4576309124Sdim } 4577309124Sdim 4578360784Sdim // Recognise intrinsic loop.decrement.reg, and as this has exactly the same 4579360784Sdim // semantics as a Sub, return a binary sub expression. 4580360784Sdim if (auto *II = dyn_cast<IntrinsicInst>(V)) 4581360784Sdim if (II->getIntrinsicID() == Intrinsic::loop_decrement_reg) 4582360784Sdim return BinaryOp(Instruction::Sub, II->getOperand(0), II->getOperand(1)); 4583360784Sdim 4584309124Sdim return None; 4585309124Sdim} 4586309124Sdim 4587327952Sdim/// Helper function to createAddRecFromPHIWithCasts. We have a phi 4588321369Sdim/// node whose symbolic (unknown) SCEV is \p SymbolicPHI, which is updated via 4589327952Sdim/// the loop backedge by a SCEVAddExpr, possibly also with a few casts on the 4590327952Sdim/// way. This function checks if \p Op, an operand of this SCEVAddExpr, 4591321369Sdim/// follows one of the following patterns: 4592321369Sdim/// Op == (SExt ix (Trunc iy (%SymbolicPHI) to ix) to iy) 4593321369Sdim/// Op == (ZExt ix (Trunc iy (%SymbolicPHI) to ix) to iy) 4594321369Sdim/// If the SCEV expression of \p Op conforms with one of the expected patterns 4595321369Sdim/// we return the type of the truncation operation, and indicate whether the 4596327952Sdim/// truncated type should be treated as signed/unsigned by setting 4597321369Sdim/// \p Signed to true/false, respectively. 4598321369Sdimstatic Type *isSimpleCastedPHI(const SCEV *Op, const SCEVUnknown *SymbolicPHI, 4599321369Sdim bool &Signed, ScalarEvolution &SE) { 4600327952Sdim // The case where Op == SymbolicPHI (that is, with no type conversions on 4601327952Sdim // the way) is handled by the regular add recurrence creating logic and 4602321369Sdim // would have already been triggered in createAddRecForPHI. Reaching it here 4603327952Sdim // means that createAddRecFromPHI had failed for this PHI before (e.g., 4604321369Sdim // because one of the other operands of the SCEVAddExpr updating this PHI is 4605327952Sdim // not invariant). 4606321369Sdim // 4607327952Sdim // Here we look for the case where Op = (ext(trunc(SymbolicPHI))), and in 4608321369Sdim // this case predicates that allow us to prove that Op == SymbolicPHI will 4609321369Sdim // be added. 4610321369Sdim if (Op == SymbolicPHI) 4611321369Sdim return nullptr; 4612321369Sdim 4613321369Sdim unsigned SourceBits = SE.getTypeSizeInBits(SymbolicPHI->getType()); 4614321369Sdim unsigned NewBits = SE.getTypeSizeInBits(Op->getType()); 4615321369Sdim if (SourceBits != NewBits) 4616321369Sdim return nullptr; 4617321369Sdim 4618321369Sdim const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(Op); 4619321369Sdim const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(Op); 4620321369Sdim if (!SExt && !ZExt) 4621321369Sdim return nullptr; 4622321369Sdim const SCEVTruncateExpr *Trunc = 4623321369Sdim SExt ? dyn_cast<SCEVTruncateExpr>(SExt->getOperand()) 4624321369Sdim : dyn_cast<SCEVTruncateExpr>(ZExt->getOperand()); 4625321369Sdim if (!Trunc) 4626321369Sdim return nullptr; 4627321369Sdim const SCEV *X = Trunc->getOperand(); 4628321369Sdim if (X != SymbolicPHI) 4629321369Sdim return nullptr; 4630327952Sdim Signed = SExt != nullptr; 4631321369Sdim return Trunc->getType(); 4632321369Sdim} 4633321369Sdim 4634321369Sdimstatic const Loop *isIntegerLoopHeaderPHI(const PHINode *PN, LoopInfo &LI) { 4635321369Sdim if (!PN->getType()->isIntegerTy()) 4636321369Sdim return nullptr; 4637321369Sdim const Loop *L = LI.getLoopFor(PN->getParent()); 4638321369Sdim if (!L || L->getHeader() != PN->getParent()) 4639321369Sdim return nullptr; 4640321369Sdim return L; 4641321369Sdim} 4642321369Sdim 4643321369Sdim// Analyze \p SymbolicPHI, a SCEV expression of a phi node, and check if the 4644321369Sdim// computation that updates the phi follows the following pattern: 4645321369Sdim// (SExt/ZExt ix (Trunc iy (%SymbolicPHI) to ix) to iy) + InvariantAccum 4646321369Sdim// which correspond to a phi->trunc->sext/zext->add->phi update chain. 4647321369Sdim// If so, try to see if it can be rewritten as an AddRecExpr under some 4648321369Sdim// Predicates. If successful, return them as a pair. Also cache the results 4649321369Sdim// of the analysis. 4650321369Sdim// 4651321369Sdim// Example usage scenario: 4652321369Sdim// Say the Rewriter is called for the following SCEV: 4653321369Sdim// 8 * ((sext i32 (trunc i64 %X to i32) to i64) + %Step) 4654321369Sdim// where: 4655321369Sdim// %X = phi i64 (%Start, %BEValue) 4656321369Sdim// It will visitMul->visitAdd->visitSExt->visitTrunc->visitUnknown(%X), 4657321369Sdim// and call this function with %SymbolicPHI = %X. 4658321369Sdim// 4659327952Sdim// The analysis will find that the value coming around the backedge has 4660321369Sdim// the following SCEV: 4661321369Sdim// BEValue = ((sext i32 (trunc i64 %X to i32) to i64) + %Step) 4662321369Sdim// Upon concluding that this matches the desired pattern, the function 4663321369Sdim// will return the pair {NewAddRec, SmallPredsVec} where: 4664321369Sdim// NewAddRec = {%Start,+,%Step} 4665321369Sdim// SmallPredsVec = {P1, P2, P3} as follows: 4666321369Sdim// P1(WrapPred): AR: {trunc(%Start),+,(trunc %Step)}<nsw> Flags: <nssw> 4667321369Sdim// P2(EqualPred): %Start == (sext i32 (trunc i64 %Start to i32) to i64) 4668321369Sdim// P3(EqualPred): %Step == (sext i32 (trunc i64 %Step to i32) to i64) 4669321369Sdim// The returned pair means that SymbolicPHI can be rewritten into NewAddRec 4670321369Sdim// under the predicates {P1,P2,P3}. 4671321369Sdim// This predicated rewrite will be cached in PredicatedSCEVRewrites: 4672327952Sdim// PredicatedSCEVRewrites[{%X,L}] = {NewAddRec, {P1,P2,P3)} 4673321369Sdim// 4674321369Sdim// TODO's: 4675321369Sdim// 4676321369Sdim// 1) Extend the Induction descriptor to also support inductions that involve 4677327952Sdim// casts: When needed (namely, when we are called in the context of the 4678327952Sdim// vectorizer induction analysis), a Set of cast instructions will be 4679321369Sdim// populated by this method, and provided back to isInductionPHI. This is 4680321369Sdim// needed to allow the vectorizer to properly record them to be ignored by 4681321369Sdim// the cost model and to avoid vectorizing them (otherwise these casts, 4682327952Sdim// which are redundant under the runtime overflow checks, will be 4683327952Sdim// vectorized, which can be costly). 4684321369Sdim// 4685321369Sdim// 2) Support additional induction/PHISCEV patterns: We also want to support 4686327952Sdim// inductions where the sext-trunc / zext-trunc operations (partly) occur 4687321369Sdim// after the induction update operation (the induction increment): 4688321369Sdim// 4689321369Sdim// (Trunc iy (SExt/ZExt ix (%SymbolicPHI + InvariantAccum) to iy) to ix) 4690321369Sdim// which correspond to a phi->add->trunc->sext/zext->phi update chain. 4691321369Sdim// 4692321369Sdim// (Trunc iy ((SExt/ZExt ix (%SymbolicPhi) to iy) + InvariantAccum) to ix) 4693321369Sdim// which correspond to a phi->trunc->add->sext/zext->phi update chain. 4694321369Sdim// 4695321369Sdim// 3) Outline common code with createAddRecFromPHI to avoid duplication. 4696321369SdimOptional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>> 4697321369SdimScalarEvolution::createAddRecFromPHIWithCastsImpl(const SCEVUnknown *SymbolicPHI) { 4698321369Sdim SmallVector<const SCEVPredicate *, 3> Predicates; 4699321369Sdim 4700327952Sdim // *** Part1: Analyze if we have a phi-with-cast pattern for which we can 4701321369Sdim // return an AddRec expression under some predicate. 4702327952Sdim 4703321369Sdim auto *PN = cast<PHINode>(SymbolicPHI->getValue()); 4704321369Sdim const Loop *L = isIntegerLoopHeaderPHI(PN, LI); 4705327952Sdim assert(L && "Expecting an integer loop header phi"); 4706321369Sdim 4707321369Sdim // The loop may have multiple entrances or multiple exits; we can analyze 4708321369Sdim // this phi as an addrec if it has a unique entry value and a unique 4709321369Sdim // backedge value. 4710321369Sdim Value *BEValueV = nullptr, *StartValueV = nullptr; 4711321369Sdim for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 4712321369Sdim Value *V = PN->getIncomingValue(i); 4713321369Sdim if (L->contains(PN->getIncomingBlock(i))) { 4714321369Sdim if (!BEValueV) { 4715321369Sdim BEValueV = V; 4716321369Sdim } else if (BEValueV != V) { 4717321369Sdim BEValueV = nullptr; 4718321369Sdim break; 4719321369Sdim } 4720321369Sdim } else if (!StartValueV) { 4721321369Sdim StartValueV = V; 4722321369Sdim } else if (StartValueV != V) { 4723321369Sdim StartValueV = nullptr; 4724321369Sdim break; 4725321369Sdim } 4726321369Sdim } 4727321369Sdim if (!BEValueV || !StartValueV) 4728321369Sdim return None; 4729321369Sdim 4730321369Sdim const SCEV *BEValue = getSCEV(BEValueV); 4731321369Sdim 4732321369Sdim // If the value coming around the backedge is an add with the symbolic 4733321369Sdim // value we just inserted, possibly with casts that we can ignore under 4734321369Sdim // an appropriate runtime guard, then we found a simple induction variable! 4735321369Sdim const auto *Add = dyn_cast<SCEVAddExpr>(BEValue); 4736321369Sdim if (!Add) 4737321369Sdim return None; 4738321369Sdim 4739321369Sdim // If there is a single occurrence of the symbolic value, possibly 4740327952Sdim // casted, replace it with a recurrence. 4741321369Sdim unsigned FoundIndex = Add->getNumOperands(); 4742321369Sdim Type *TruncTy = nullptr; 4743321369Sdim bool Signed; 4744321369Sdim for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) 4745327952Sdim if ((TruncTy = 4746321369Sdim isSimpleCastedPHI(Add->getOperand(i), SymbolicPHI, Signed, *this))) 4747321369Sdim if (FoundIndex == e) { 4748321369Sdim FoundIndex = i; 4749321369Sdim break; 4750321369Sdim } 4751321369Sdim 4752321369Sdim if (FoundIndex == Add->getNumOperands()) 4753321369Sdim return None; 4754321369Sdim 4755321369Sdim // Create an add with everything but the specified operand. 4756321369Sdim SmallVector<const SCEV *, 8> Ops; 4757321369Sdim for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) 4758321369Sdim if (i != FoundIndex) 4759321369Sdim Ops.push_back(Add->getOperand(i)); 4760321369Sdim const SCEV *Accum = getAddExpr(Ops); 4761321369Sdim 4762321369Sdim // The runtime checks will not be valid if the step amount is 4763321369Sdim // varying inside the loop. 4764321369Sdim if (!isLoopInvariant(Accum, L)) 4765321369Sdim return None; 4766321369Sdim 4767327952Sdim // *** Part2: Create the predicates 4768321369Sdim 4769321369Sdim // Analysis was successful: we have a phi-with-cast pattern for which we 4770321369Sdim // can return an AddRec expression under the following predicates: 4771321369Sdim // 4772321369Sdim // P1: A Wrap predicate that guarantees that Trunc(Start) + i*Trunc(Accum) 4773321369Sdim // fits within the truncated type (does not overflow) for i = 0 to n-1. 4774327952Sdim // P2: An Equal predicate that guarantees that 4775321369Sdim // Start = (Ext ix (Trunc iy (Start) to ix) to iy) 4776327952Sdim // P3: An Equal predicate that guarantees that 4777321369Sdim // Accum = (Ext ix (Trunc iy (Accum) to ix) to iy) 4778321369Sdim // 4779327952Sdim // As we next prove, the above predicates guarantee that: 4780321369Sdim // Start + i*Accum = (Ext ix (Trunc iy ( Start + i*Accum ) to ix) to iy) 4781321369Sdim // 4782321369Sdim // 4783321369Sdim // More formally, we want to prove that: 4784327952Sdim // Expr(i+1) = Start + (i+1) * Accum 4785327952Sdim // = (Ext ix (Trunc iy (Expr(i)) to ix) to iy) + Accum 4786321369Sdim // 4787321369Sdim // Given that: 4788327952Sdim // 1) Expr(0) = Start 4789327952Sdim // 2) Expr(1) = Start + Accum 4790321369Sdim // = (Ext ix (Trunc iy (Start) to ix) to iy) + Accum :: from P2 4791321369Sdim // 3) Induction hypothesis (step i): 4792327952Sdim // Expr(i) = (Ext ix (Trunc iy (Expr(i-1)) to ix) to iy) + Accum 4793321369Sdim // 4794321369Sdim // Proof: 4795321369Sdim // Expr(i+1) = 4796321369Sdim // = Start + (i+1)*Accum 4797321369Sdim // = (Start + i*Accum) + Accum 4798327952Sdim // = Expr(i) + Accum 4799327952Sdim // = (Ext ix (Trunc iy (Expr(i-1)) to ix) to iy) + Accum + Accum 4800321369Sdim // :: from step i 4801321369Sdim // 4802327952Sdim // = (Ext ix (Trunc iy (Start + (i-1)*Accum) to ix) to iy) + Accum + Accum 4803321369Sdim // 4804321369Sdim // = (Ext ix (Trunc iy (Start + (i-1)*Accum) to ix) to iy) 4805321369Sdim // + (Ext ix (Trunc iy (Accum) to ix) to iy) 4806321369Sdim // + Accum :: from P3 4807321369Sdim // 4808327952Sdim // = (Ext ix (Trunc iy ((Start + (i-1)*Accum) + Accum) to ix) to iy) 4809321369Sdim // + Accum :: from P1: Ext(x)+Ext(y)=>Ext(x+y) 4810321369Sdim // 4811321369Sdim // = (Ext ix (Trunc iy (Start + i*Accum) to ix) to iy) + Accum 4812327952Sdim // = (Ext ix (Trunc iy (Expr(i)) to ix) to iy) + Accum 4813321369Sdim // 4814321369Sdim // By induction, the same applies to all iterations 1<=i<n: 4815321369Sdim // 4816327952Sdim 4817321369Sdim // Create a truncated addrec for which we will add a no overflow check (P1). 4818321369Sdim const SCEV *StartVal = getSCEV(StartValueV); 4819327952Sdim const SCEV *PHISCEV = 4820321369Sdim getAddRecExpr(getTruncateExpr(StartVal, TruncTy), 4821327952Sdim getTruncateExpr(Accum, TruncTy), L, SCEV::FlagAnyWrap); 4822321369Sdim 4823327952Sdim // PHISCEV can be either a SCEVConstant or a SCEVAddRecExpr. 4824327952Sdim // ex: If truncated Accum is 0 and StartVal is a constant, then PHISCEV 4825327952Sdim // will be constant. 4826327952Sdim // 4827327952Sdim // If PHISCEV is a constant, then P1 degenerates into P2 or P3, so we don't 4828327952Sdim // add P1. 4829327952Sdim if (const auto *AR = dyn_cast<SCEVAddRecExpr>(PHISCEV)) { 4830327952Sdim SCEVWrapPredicate::IncrementWrapFlags AddedFlags = 4831327952Sdim Signed ? SCEVWrapPredicate::IncrementNSSW 4832327952Sdim : SCEVWrapPredicate::IncrementNUSW; 4833327952Sdim const SCEVPredicate *AddRecPred = getWrapPredicate(AR, AddedFlags); 4834327952Sdim Predicates.push_back(AddRecPred); 4835327952Sdim } 4836321369Sdim 4837321369Sdim // Create the Equal Predicates P2,P3: 4838327952Sdim 4839327952Sdim // It is possible that the predicates P2 and/or P3 are computable at 4840327952Sdim // compile time due to StartVal and/or Accum being constants. 4841327952Sdim // If either one is, then we can check that now and escape if either P2 4842327952Sdim // or P3 is false. 4843327952Sdim 4844327952Sdim // Construct the extended SCEV: (Ext ix (Trunc iy (Expr) to ix) to iy) 4845327952Sdim // for each of StartVal and Accum 4846341825Sdim auto getExtendedExpr = [&](const SCEV *Expr, 4847327952Sdim bool CreateSignExtend) -> const SCEV * { 4848327952Sdim assert(isLoopInvariant(Expr, L) && "Expr is expected to be invariant"); 4849321369Sdim const SCEV *TruncatedExpr = getTruncateExpr(Expr, TruncTy); 4850321369Sdim const SCEV *ExtendedExpr = 4851327952Sdim CreateSignExtend ? getSignExtendExpr(TruncatedExpr, Expr->getType()) 4852327952Sdim : getZeroExtendExpr(TruncatedExpr, Expr->getType()); 4853327952Sdim return ExtendedExpr; 4854327952Sdim }; 4855327952Sdim 4856327952Sdim // Given: 4857327952Sdim // ExtendedExpr = (Ext ix (Trunc iy (Expr) to ix) to iy 4858327952Sdim // = getExtendedExpr(Expr) 4859327952Sdim // Determine whether the predicate P: Expr == ExtendedExpr 4860327952Sdim // is known to be false at compile time 4861327952Sdim auto PredIsKnownFalse = [&](const SCEV *Expr, 4862327952Sdim const SCEV *ExtendedExpr) -> bool { 4863327952Sdim return Expr != ExtendedExpr && 4864327952Sdim isKnownPredicate(ICmpInst::ICMP_NE, Expr, ExtendedExpr); 4865327952Sdim }; 4866327952Sdim 4867327952Sdim const SCEV *StartExtended = getExtendedExpr(StartVal, Signed); 4868327952Sdim if (PredIsKnownFalse(StartVal, StartExtended)) { 4869341825Sdim LLVM_DEBUG(dbgs() << "P2 is compile-time false\n";); 4870327952Sdim return None; 4871327952Sdim } 4872327952Sdim 4873327952Sdim // The Step is always Signed (because the overflow checks are either 4874327952Sdim // NSSW or NUSW) 4875327952Sdim const SCEV *AccumExtended = getExtendedExpr(Accum, /*CreateSignExtend=*/true); 4876327952Sdim if (PredIsKnownFalse(Accum, AccumExtended)) { 4877341825Sdim LLVM_DEBUG(dbgs() << "P3 is compile-time false\n";); 4878327952Sdim return None; 4879327952Sdim } 4880327952Sdim 4881327952Sdim auto AppendPredicate = [&](const SCEV *Expr, 4882327952Sdim const SCEV *ExtendedExpr) -> void { 4883321369Sdim if (Expr != ExtendedExpr && 4884321369Sdim !isKnownPredicate(ICmpInst::ICMP_EQ, Expr, ExtendedExpr)) { 4885321369Sdim const SCEVPredicate *Pred = getEqualPredicate(Expr, ExtendedExpr); 4886341825Sdim LLVM_DEBUG(dbgs() << "Added Predicate: " << *Pred); 4887321369Sdim Predicates.push_back(Pred); 4888321369Sdim } 4889321369Sdim }; 4890327952Sdim 4891327952Sdim AppendPredicate(StartVal, StartExtended); 4892327952Sdim AppendPredicate(Accum, AccumExtended); 4893327952Sdim 4894321369Sdim // *** Part3: Predicates are ready. Now go ahead and create the new addrec in 4895321369Sdim // which the casts had been folded away. The caller can rewrite SymbolicPHI 4896321369Sdim // into NewAR if it will also add the runtime overflow checks specified in 4897327952Sdim // Predicates. 4898321369Sdim auto *NewAR = getAddRecExpr(StartVal, Accum, L, SCEV::FlagAnyWrap); 4899321369Sdim 4900321369Sdim std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>> PredRewrite = 4901321369Sdim std::make_pair(NewAR, Predicates); 4902321369Sdim // Remember the result of the analysis for this SCEV at this locayyytion. 4903321369Sdim PredicatedSCEVRewrites[{SymbolicPHI, L}] = PredRewrite; 4904321369Sdim return PredRewrite; 4905321369Sdim} 4906321369Sdim 4907321369SdimOptional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>> 4908321369SdimScalarEvolution::createAddRecFromPHIWithCasts(const SCEVUnknown *SymbolicPHI) { 4909321369Sdim auto *PN = cast<PHINode>(SymbolicPHI->getValue()); 4910321369Sdim const Loop *L = isIntegerLoopHeaderPHI(PN, LI); 4911321369Sdim if (!L) 4912321369Sdim return None; 4913321369Sdim 4914321369Sdim // Check to see if we already analyzed this PHI. 4915321369Sdim auto I = PredicatedSCEVRewrites.find({SymbolicPHI, L}); 4916321369Sdim if (I != PredicatedSCEVRewrites.end()) { 4917321369Sdim std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>> Rewrite = 4918321369Sdim I->second; 4919321369Sdim // Analysis was done before and failed to create an AddRec: 4920327952Sdim if (Rewrite.first == SymbolicPHI) 4921321369Sdim return None; 4922321369Sdim // Analysis was done before and succeeded to create an AddRec under 4923321369Sdim // a predicate: 4924321369Sdim assert(isa<SCEVAddRecExpr>(Rewrite.first) && "Expected an AddRec"); 4925321369Sdim assert(!(Rewrite.second).empty() && "Expected to find Predicates"); 4926321369Sdim return Rewrite; 4927321369Sdim } 4928321369Sdim 4929321369Sdim Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>> 4930321369Sdim Rewrite = createAddRecFromPHIWithCastsImpl(SymbolicPHI); 4931321369Sdim 4932321369Sdim // Record in the cache that the analysis failed 4933321369Sdim if (!Rewrite) { 4934321369Sdim SmallVector<const SCEVPredicate *, 3> Predicates; 4935321369Sdim PredicatedSCEVRewrites[{SymbolicPHI, L}] = {SymbolicPHI, Predicates}; 4936321369Sdim return None; 4937321369Sdim } 4938321369Sdim 4939321369Sdim return Rewrite; 4940321369Sdim} 4941321369Sdim 4942341825Sdim// FIXME: This utility is currently required because the Rewriter currently 4943341825Sdim// does not rewrite this expression: 4944341825Sdim// {0, +, (sext ix (trunc iy to ix) to iy)} 4945327952Sdim// into {0, +, %step}, 4946341825Sdim// even when the following Equal predicate exists: 4947327952Sdim// "%step == (sext ix (trunc iy to ix) to iy)". 4948327952Sdimbool PredicatedScalarEvolution::areAddRecsEqualWithPreds( 4949327952Sdim const SCEVAddRecExpr *AR1, const SCEVAddRecExpr *AR2) const { 4950327952Sdim if (AR1 == AR2) 4951327952Sdim return true; 4952327952Sdim 4953327952Sdim auto areExprsEqual = [&](const SCEV *Expr1, const SCEV *Expr2) -> bool { 4954327952Sdim if (Expr1 != Expr2 && !Preds.implies(SE.getEqualPredicate(Expr1, Expr2)) && 4955327952Sdim !Preds.implies(SE.getEqualPredicate(Expr2, Expr1))) 4956327952Sdim return false; 4957327952Sdim return true; 4958327952Sdim }; 4959327952Sdim 4960327952Sdim if (!areExprsEqual(AR1->getStart(), AR2->getStart()) || 4961327952Sdim !areExprsEqual(AR1->getStepRecurrence(SE), AR2->getStepRecurrence(SE))) 4962327952Sdim return false; 4963327952Sdim return true; 4964327952Sdim} 4965327952Sdim 4966321369Sdim/// A helper function for createAddRecFromPHI to handle simple cases. 4967321369Sdim/// 4968321369Sdim/// This function tries to find an AddRec expression for the simplest (yet most 4969321369Sdim/// common) cases: PN = PHI(Start, OP(Self, LoopInvariant)). 4970321369Sdim/// If it fails, createAddRecFromPHI will use a more general, but slow, 4971321369Sdim/// technique for finding the AddRec expression. 4972321369Sdimconst SCEV *ScalarEvolution::createSimpleAffineAddRec(PHINode *PN, 4973321369Sdim Value *BEValueV, 4974321369Sdim Value *StartValueV) { 4975321369Sdim const Loop *L = LI.getLoopFor(PN->getParent()); 4976321369Sdim assert(L && L->getHeader() == PN->getParent()); 4977321369Sdim assert(BEValueV && StartValueV); 4978321369Sdim 4979321369Sdim auto BO = MatchBinaryOp(BEValueV, DT); 4980321369Sdim if (!BO) 4981321369Sdim return nullptr; 4982321369Sdim 4983321369Sdim if (BO->Opcode != Instruction::Add) 4984321369Sdim return nullptr; 4985321369Sdim 4986321369Sdim const SCEV *Accum = nullptr; 4987321369Sdim if (BO->LHS == PN && L->isLoopInvariant(BO->RHS)) 4988321369Sdim Accum = getSCEV(BO->RHS); 4989321369Sdim else if (BO->RHS == PN && L->isLoopInvariant(BO->LHS)) 4990321369Sdim Accum = getSCEV(BO->LHS); 4991321369Sdim 4992321369Sdim if (!Accum) 4993321369Sdim return nullptr; 4994321369Sdim 4995321369Sdim SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; 4996321369Sdim if (BO->IsNUW) 4997321369Sdim Flags = setFlags(Flags, SCEV::FlagNUW); 4998321369Sdim if (BO->IsNSW) 4999321369Sdim Flags = setFlags(Flags, SCEV::FlagNSW); 5000321369Sdim 5001321369Sdim const SCEV *StartVal = getSCEV(StartValueV); 5002321369Sdim const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags); 5003321369Sdim 5004321369Sdim ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV; 5005321369Sdim 5006321369Sdim // We can add Flags to the post-inc expression only if we 5007321369Sdim // know that it is *undefined behavior* for BEValueV to 5008321369Sdim // overflow. 5009321369Sdim if (auto *BEInst = dyn_cast<Instruction>(BEValueV)) 5010321369Sdim if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L)) 5011360784Sdim (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags); 5012321369Sdim 5013321369Sdim return PHISCEV; 5014321369Sdim} 5015321369Sdim 5016296417Sdimconst SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) { 5017296417Sdim const Loop *L = LI.getLoopFor(PN->getParent()); 5018296417Sdim if (!L || L->getHeader() != PN->getParent()) 5019296417Sdim return nullptr; 5020296417Sdim 5021296417Sdim // The loop may have multiple entrances or multiple exits; we can analyze 5022296417Sdim // this phi as an addrec if it has a unique entry value and a unique 5023296417Sdim // backedge value. 5024296417Sdim Value *BEValueV = nullptr, *StartValueV = nullptr; 5025296417Sdim for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 5026296417Sdim Value *V = PN->getIncomingValue(i); 5027296417Sdim if (L->contains(PN->getIncomingBlock(i))) { 5028296417Sdim if (!BEValueV) { 5029296417Sdim BEValueV = V; 5030296417Sdim } else if (BEValueV != V) { 5031296417Sdim BEValueV = nullptr; 5032296417Sdim break; 5033296417Sdim } 5034296417Sdim } else if (!StartValueV) { 5035296417Sdim StartValueV = V; 5036296417Sdim } else if (StartValueV != V) { 5037296417Sdim StartValueV = nullptr; 5038296417Sdim break; 5039296417Sdim } 5040296417Sdim } 5041321369Sdim if (!BEValueV || !StartValueV) 5042321369Sdim return nullptr; 5043296417Sdim 5044321369Sdim assert(ValueExprMap.find_as(PN) == ValueExprMap.end() && 5045321369Sdim "PHI node already processed?"); 5046296417Sdim 5047321369Sdim // First, try to find AddRec expression without creating a fictituos symbolic 5048321369Sdim // value for PN. 5049321369Sdim if (auto *S = createSimpleAffineAddRec(PN, BEValueV, StartValueV)) 5050321369Sdim return S; 5051296417Sdim 5052321369Sdim // Handle PHI node value symbolically. 5053321369Sdim const SCEV *SymbolicName = getUnknown(PN); 5054321369Sdim ValueExprMap.insert({SCEVCallbackVH(PN, this), SymbolicName}); 5055193323Sed 5056321369Sdim // Using this symbolic name for the PHI, analyze the value coming around 5057321369Sdim // the back-edge. 5058321369Sdim const SCEV *BEValue = getSCEV(BEValueV); 5059193323Sed 5060321369Sdim // NOTE: If BEValue is loop invariant, we know that the PHI node just 5061321369Sdim // has a special value for the first iteration of the loop. 5062296417Sdim 5063321369Sdim // If the value coming around the backedge is an add with the symbolic 5064321369Sdim // value we just inserted, then we found a simple induction variable! 5065321369Sdim if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) { 5066321369Sdim // If there is a single occurrence of the symbolic value, replace it 5067321369Sdim // with a recurrence. 5068321369Sdim unsigned FoundIndex = Add->getNumOperands(); 5069321369Sdim for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) 5070321369Sdim if (Add->getOperand(i) == SymbolicName) 5071321369Sdim if (FoundIndex == e) { 5072321369Sdim FoundIndex = i; 5073321369Sdim break; 5074321369Sdim } 5075296417Sdim 5076321369Sdim if (FoundIndex != Add->getNumOperands()) { 5077321369Sdim // Create an add with everything but the specified operand. 5078321369Sdim SmallVector<const SCEV *, 8> Ops; 5079321369Sdim for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) 5080321369Sdim if (i != FoundIndex) 5081327952Sdim Ops.push_back(SCEVBackedgeConditionFolder::rewrite(Add->getOperand(i), 5082327952Sdim L, *this)); 5083321369Sdim const SCEV *Accum = getAddExpr(Ops); 5084296417Sdim 5085321369Sdim // This is not a valid addrec if the step amount is varying each 5086321369Sdim // loop iteration, but is not itself an addrec in this loop. 5087321369Sdim if (isLoopInvariant(Accum, L) || 5088321369Sdim (isa<SCEVAddRecExpr>(Accum) && 5089321369Sdim cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) { 5090321369Sdim SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; 5091321369Sdim 5092321369Sdim if (auto BO = MatchBinaryOp(BEValueV, DT)) { 5093321369Sdim if (BO->Opcode == Instruction::Add && BO->LHS == PN) { 5094321369Sdim if (BO->IsNUW) 5095321369Sdim Flags = setFlags(Flags, SCEV::FlagNUW); 5096321369Sdim if (BO->IsNSW) 5097321369Sdim Flags = setFlags(Flags, SCEV::FlagNSW); 5098193323Sed } 5099321369Sdim } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) { 5100321369Sdim // If the increment is an inbounds GEP, then we know the address 5101321369Sdim // space cannot be wrapped around. We cannot make any guarantee 5102321369Sdim // about signed or unsigned overflow because pointers are 5103321369Sdim // unsigned but we may have a negative index from the base 5104321369Sdim // pointer. We can guarantee that no unsigned wrap occurs if the 5105321369Sdim // indices form a positive value. 5106321369Sdim if (GEP->isInBounds() && GEP->getOperand(0) == PN) { 5107321369Sdim Flags = setFlags(Flags, SCEV::FlagNW); 5108296417Sdim 5109321369Sdim const SCEV *Ptr = getSCEV(GEP->getPointerOperand()); 5110321369Sdim if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr))) 5111321369Sdim Flags = setFlags(Flags, SCEV::FlagNUW); 5112321369Sdim } 5113296417Sdim 5114321369Sdim // We cannot transfer nuw and nsw flags from subtraction 5115321369Sdim // operations -- sub nuw X, Y is not the same as add nuw X, -Y 5116321369Sdim // for instance. 5117321369Sdim } 5118309124Sdim 5119321369Sdim const SCEV *StartVal = getSCEV(StartValueV); 5120321369Sdim const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags); 5121309124Sdim 5122321369Sdim // Okay, for the entire analysis of this edge we assumed the PHI 5123321369Sdim // to be symbolic. We now need to go back and purge all of the 5124321369Sdim // entries for the scalars that use the symbolic expression. 5125321369Sdim forgetSymbolicName(PN, SymbolicName); 5126321369Sdim ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV; 5127321369Sdim 5128321369Sdim // We can add Flags to the post-inc expression only if we 5129321369Sdim // know that it is *undefined behavior* for BEValueV to 5130321369Sdim // overflow. 5131321369Sdim if (auto *BEInst = dyn_cast<Instruction>(BEValueV)) 5132321369Sdim if (isLoopInvariant(Accum, L) && isAddRecNeverPoison(BEInst, L)) 5133321369Sdim (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags); 5134321369Sdim 5135321369Sdim return PHISCEV; 5136193323Sed } 5137321369Sdim } 5138321369Sdim } else { 5139321369Sdim // Otherwise, this could be a loop like this: 5140321369Sdim // i = 0; for (j = 1; ..; ++j) { .... i = j; } 5141321369Sdim // In this case, j = {1,+,1} and BEValue is j. 5142321369Sdim // Because the other in-value of i (0) fits the evolution of BEValue 5143321369Sdim // i really is an addrec evolution. 5144321369Sdim // 5145321369Sdim // We can generalize this saying that i is the shifted value of BEValue 5146321369Sdim // by one iteration: 5147321369Sdim // PHI(f(0), f({1,+,1})) --> f({0,+,1}) 5148321369Sdim const SCEV *Shifted = SCEVShiftRewriter::rewrite(BEValue, L, *this); 5149341825Sdim const SCEV *Start = SCEVInitRewriter::rewrite(Shifted, L, *this, false); 5150321369Sdim if (Shifted != getCouldNotCompute() && 5151321369Sdim Start != getCouldNotCompute()) { 5152321369Sdim const SCEV *StartVal = getSCEV(StartValueV); 5153321369Sdim if (Start == StartVal) { 5154321369Sdim // Okay, for the entire analysis of this edge we assumed the PHI 5155321369Sdim // to be symbolic. We now need to go back and purge all of the 5156321369Sdim // entries for the scalars that use the symbolic expression. 5157321369Sdim forgetSymbolicName(PN, SymbolicName); 5158321369Sdim ValueExprMap[SCEVCallbackVH(PN, this)] = Shifted; 5159321369Sdim return Shifted; 5160296417Sdim } 5161207618Srdivacky } 5162296417Sdim } 5163193323Sed 5164321369Sdim // Remove the temporary PHI node SCEV that has been inserted while intending 5165321369Sdim // to create an AddRecExpr for this PHI node. We can not keep this temporary 5166321369Sdim // as it will prevent later (possibly simpler) SCEV expressions to be added 5167321369Sdim // to the ValueExprMap. 5168321369Sdim eraseValueFromMap(PN); 5169321369Sdim 5170296417Sdim return nullptr; 5171296417Sdim} 5172296417Sdim 5173296417Sdim// Checks if the SCEV S is available at BB. S is considered available at BB 5174296417Sdim// if S can be materialized at BB without introducing a fault. 5175296417Sdimstatic bool IsAvailableOnEntry(const Loop *L, DominatorTree &DT, const SCEV *S, 5176296417Sdim BasicBlock *BB) { 5177296417Sdim struct CheckAvailable { 5178296417Sdim bool TraversalDone = false; 5179296417Sdim bool Available = true; 5180296417Sdim 5181296417Sdim const Loop *L = nullptr; // The loop BB is in (can be nullptr) 5182296417Sdim BasicBlock *BB = nullptr; 5183296417Sdim DominatorTree &DT; 5184296417Sdim 5185296417Sdim CheckAvailable(const Loop *L, BasicBlock *BB, DominatorTree &DT) 5186296417Sdim : L(L), BB(BB), DT(DT) {} 5187296417Sdim 5188296417Sdim bool setUnavailable() { 5189296417Sdim TraversalDone = true; 5190296417Sdim Available = false; 5191296417Sdim return false; 5192296417Sdim } 5193296417Sdim 5194296417Sdim bool follow(const SCEV *S) { 5195296417Sdim switch (S->getSCEVType()) { 5196296417Sdim case scConstant: case scTruncate: case scZeroExtend: case scSignExtend: 5197296417Sdim case scAddExpr: case scMulExpr: case scUMaxExpr: case scSMaxExpr: 5198353358Sdim case scUMinExpr: 5199353358Sdim case scSMinExpr: 5200296417Sdim // These expressions are available if their operand(s) is/are. 5201296417Sdim return true; 5202296417Sdim 5203296417Sdim case scAddRecExpr: { 5204296417Sdim // We allow add recurrences that are on the loop BB is in, or some 5205296417Sdim // outer loop. This guarantees availability because the value of the 5206296417Sdim // add recurrence at BB is simply the "current" value of the induction 5207296417Sdim // variable. We can relax this in the future; for instance an add 5208296417Sdim // recurrence on a sibling dominating loop is also available at BB. 5209296417Sdim const auto *ARLoop = cast<SCEVAddRecExpr>(S)->getLoop(); 5210296417Sdim if (L && (ARLoop == L || ARLoop->contains(L))) 5211296417Sdim return true; 5212296417Sdim 5213296417Sdim return setUnavailable(); 5214296417Sdim } 5215296417Sdim 5216296417Sdim case scUnknown: { 5217296417Sdim // For SCEVUnknown, we check for simple dominance. 5218296417Sdim const auto *SU = cast<SCEVUnknown>(S); 5219296417Sdim Value *V = SU->getValue(); 5220296417Sdim 5221296417Sdim if (isa<Argument>(V)) 5222296417Sdim return false; 5223296417Sdim 5224296417Sdim if (isa<Instruction>(V) && DT.dominates(cast<Instruction>(V), BB)) 5225296417Sdim return false; 5226296417Sdim 5227296417Sdim return setUnavailable(); 5228296417Sdim } 5229296417Sdim 5230296417Sdim case scUDivExpr: 5231296417Sdim case scCouldNotCompute: 5232296417Sdim // We do not try to smart about these at all. 5233296417Sdim return setUnavailable(); 5234296417Sdim } 5235296417Sdim llvm_unreachable("switch should be fully covered!"); 5236296417Sdim } 5237296417Sdim 5238296417Sdim bool isDone() { return TraversalDone; } 5239296417Sdim }; 5240296417Sdim 5241296417Sdim CheckAvailable CA(L, BB, DT); 5242296417Sdim SCEVTraversal<CheckAvailable> ST(CA); 5243296417Sdim 5244296417Sdim ST.visitAll(S); 5245296417Sdim return CA.Available; 5246296417Sdim} 5247296417Sdim 5248296417Sdim// Try to match a control flow sequence that branches out at BI and merges back 5249296417Sdim// at Merge into a "C ? LHS : RHS" select pattern. Return true on a successful 5250296417Sdim// match. 5251296417Sdimstatic bool BrPHIToSelect(DominatorTree &DT, BranchInst *BI, PHINode *Merge, 5252296417Sdim Value *&C, Value *&LHS, Value *&RHS) { 5253296417Sdim C = BI->getCondition(); 5254296417Sdim 5255296417Sdim BasicBlockEdge LeftEdge(BI->getParent(), BI->getSuccessor(0)); 5256296417Sdim BasicBlockEdge RightEdge(BI->getParent(), BI->getSuccessor(1)); 5257296417Sdim 5258296417Sdim if (!LeftEdge.isSingleEdge()) 5259296417Sdim return false; 5260296417Sdim 5261296417Sdim assert(RightEdge.isSingleEdge() && "Follows from LeftEdge.isSingleEdge()"); 5262296417Sdim 5263296417Sdim Use &LeftUse = Merge->getOperandUse(0); 5264296417Sdim Use &RightUse = Merge->getOperandUse(1); 5265296417Sdim 5266296417Sdim if (DT.dominates(LeftEdge, LeftUse) && DT.dominates(RightEdge, RightUse)) { 5267296417Sdim LHS = LeftUse; 5268296417Sdim RHS = RightUse; 5269296417Sdim return true; 5270296417Sdim } 5271296417Sdim 5272296417Sdim if (DT.dominates(LeftEdge, RightUse) && DT.dominates(RightEdge, LeftUse)) { 5273296417Sdim LHS = RightUse; 5274296417Sdim RHS = LeftUse; 5275296417Sdim return true; 5276296417Sdim } 5277296417Sdim 5278296417Sdim return false; 5279296417Sdim} 5280296417Sdim 5281296417Sdimconst SCEV *ScalarEvolution::createNodeFromSelectLikePHI(PHINode *PN) { 5282314564Sdim auto IsReachable = 5283314564Sdim [&](BasicBlock *BB) { return DT.isReachableFromEntry(BB); }; 5284314564Sdim if (PN->getNumIncomingValues() == 2 && all_of(PN->blocks(), IsReachable)) { 5285296417Sdim const Loop *L = LI.getLoopFor(PN->getParent()); 5286296417Sdim 5287296417Sdim // We don't want to break LCSSA, even in a SCEV expression tree. 5288296417Sdim for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 5289296417Sdim if (LI.getLoopFor(PN->getIncomingBlock(i)) != L) 5290296417Sdim return nullptr; 5291296417Sdim 5292296417Sdim // Try to match 5293296417Sdim // 5294296417Sdim // br %cond, label %left, label %right 5295296417Sdim // left: 5296296417Sdim // br label %merge 5297296417Sdim // right: 5298296417Sdim // br label %merge 5299296417Sdim // merge: 5300296417Sdim // V = phi [ %x, %left ], [ %y, %right ] 5301296417Sdim // 5302296417Sdim // as "select %cond, %x, %y" 5303296417Sdim 5304296417Sdim BasicBlock *IDom = DT[PN->getParent()]->getIDom()->getBlock(); 5305296417Sdim assert(IDom && "At least the entry block should dominate PN"); 5306296417Sdim 5307296417Sdim auto *BI = dyn_cast<BranchInst>(IDom->getTerminator()); 5308296417Sdim Value *Cond = nullptr, *LHS = nullptr, *RHS = nullptr; 5309296417Sdim 5310296417Sdim if (BI && BI->isConditional() && 5311296417Sdim BrPHIToSelect(DT, BI, PN, Cond, LHS, RHS) && 5312296417Sdim IsAvailableOnEntry(L, DT, getSCEV(LHS), PN->getParent()) && 5313296417Sdim IsAvailableOnEntry(L, DT, getSCEV(RHS), PN->getParent())) 5314296417Sdim return createNodeForSelectOrPHI(PN, Cond, LHS, RHS); 5315296417Sdim } 5316296417Sdim 5317296417Sdim return nullptr; 5318296417Sdim} 5319296417Sdim 5320296417Sdimconst SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { 5321296417Sdim if (const SCEV *S = createAddRecFromPHI(PN)) 5322296417Sdim return S; 5323296417Sdim 5324296417Sdim if (const SCEV *S = createNodeFromSelectLikePHI(PN)) 5325296417Sdim return S; 5326296417Sdim 5327204642Srdivacky // If the PHI has a single incoming value, follow that value, unless the 5328204642Srdivacky // PHI's incoming blocks are in a different loop, in which case doing so 5329204642Srdivacky // risks breaking LCSSA form. Instcombine would normally zap these, but 5330204642Srdivacky // it doesn't have DominatorTree information, so it may miss cases. 5331321369Sdim if (Value *V = SimplifyInstruction(PN, {getDataLayout(), &TLI, &DT, &AC})) 5332296417Sdim if (LI.replacementPreservesLCSSAForm(PN, V)) 5333204642Srdivacky return getSCEV(V); 5334198090Srdivacky 5335193323Sed // If it's not a loop phi, we can't handle it yet. 5336193323Sed return getUnknown(PN); 5337193323Sed} 5338193323Sed 5339296417Sdimconst SCEV *ScalarEvolution::createNodeForSelectOrPHI(Instruction *I, 5340296417Sdim Value *Cond, 5341296417Sdim Value *TrueVal, 5342296417Sdim Value *FalseVal) { 5343296417Sdim // Handle "constant" branch or select. This can occur for instance when a 5344296417Sdim // loop pass transforms an inner loop and moves on to process the outer loop. 5345296417Sdim if (auto *CI = dyn_cast<ConstantInt>(Cond)) 5346296417Sdim return getSCEV(CI->isOne() ? TrueVal : FalseVal); 5347296417Sdim 5348296417Sdim // Try to match some simple smax or umax patterns. 5349296417Sdim auto *ICI = dyn_cast<ICmpInst>(Cond); 5350296417Sdim if (!ICI) 5351296417Sdim return getUnknown(I); 5352296417Sdim 5353296417Sdim Value *LHS = ICI->getOperand(0); 5354296417Sdim Value *RHS = ICI->getOperand(1); 5355296417Sdim 5356296417Sdim switch (ICI->getPredicate()) { 5357296417Sdim case ICmpInst::ICMP_SLT: 5358296417Sdim case ICmpInst::ICMP_SLE: 5359296417Sdim std::swap(LHS, RHS); 5360314564Sdim LLVM_FALLTHROUGH; 5361296417Sdim case ICmpInst::ICMP_SGT: 5362296417Sdim case ICmpInst::ICMP_SGE: 5363296417Sdim // a >s b ? a+x : b+x -> smax(a, b)+x 5364296417Sdim // a >s b ? b+x : a+x -> smin(a, b)+x 5365296417Sdim if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType())) { 5366296417Sdim const SCEV *LS = getNoopOrSignExtend(getSCEV(LHS), I->getType()); 5367296417Sdim const SCEV *RS = getNoopOrSignExtend(getSCEV(RHS), I->getType()); 5368296417Sdim const SCEV *LA = getSCEV(TrueVal); 5369296417Sdim const SCEV *RA = getSCEV(FalseVal); 5370296417Sdim const SCEV *LDiff = getMinusSCEV(LA, LS); 5371296417Sdim const SCEV *RDiff = getMinusSCEV(RA, RS); 5372296417Sdim if (LDiff == RDiff) 5373296417Sdim return getAddExpr(getSMaxExpr(LS, RS), LDiff); 5374296417Sdim LDiff = getMinusSCEV(LA, RS); 5375296417Sdim RDiff = getMinusSCEV(RA, LS); 5376296417Sdim if (LDiff == RDiff) 5377296417Sdim return getAddExpr(getSMinExpr(LS, RS), LDiff); 5378296417Sdim } 5379296417Sdim break; 5380296417Sdim case ICmpInst::ICMP_ULT: 5381296417Sdim case ICmpInst::ICMP_ULE: 5382296417Sdim std::swap(LHS, RHS); 5383314564Sdim LLVM_FALLTHROUGH; 5384296417Sdim case ICmpInst::ICMP_UGT: 5385296417Sdim case ICmpInst::ICMP_UGE: 5386296417Sdim // a >u b ? a+x : b+x -> umax(a, b)+x 5387296417Sdim // a >u b ? b+x : a+x -> umin(a, b)+x 5388296417Sdim if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType())) { 5389296417Sdim const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType()); 5390296417Sdim const SCEV *RS = getNoopOrZeroExtend(getSCEV(RHS), I->getType()); 5391296417Sdim const SCEV *LA = getSCEV(TrueVal); 5392296417Sdim const SCEV *RA = getSCEV(FalseVal); 5393296417Sdim const SCEV *LDiff = getMinusSCEV(LA, LS); 5394296417Sdim const SCEV *RDiff = getMinusSCEV(RA, RS); 5395296417Sdim if (LDiff == RDiff) 5396296417Sdim return getAddExpr(getUMaxExpr(LS, RS), LDiff); 5397296417Sdim LDiff = getMinusSCEV(LA, RS); 5398296417Sdim RDiff = getMinusSCEV(RA, LS); 5399296417Sdim if (LDiff == RDiff) 5400296417Sdim return getAddExpr(getUMinExpr(LS, RS), LDiff); 5401296417Sdim } 5402296417Sdim break; 5403296417Sdim case ICmpInst::ICMP_NE: 5404296417Sdim // n != 0 ? n+x : 1+x -> umax(n, 1)+x 5405296417Sdim if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType()) && 5406296417Sdim isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) { 5407296417Sdim const SCEV *One = getOne(I->getType()); 5408296417Sdim const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType()); 5409296417Sdim const SCEV *LA = getSCEV(TrueVal); 5410296417Sdim const SCEV *RA = getSCEV(FalseVal); 5411296417Sdim const SCEV *LDiff = getMinusSCEV(LA, LS); 5412296417Sdim const SCEV *RDiff = getMinusSCEV(RA, One); 5413296417Sdim if (LDiff == RDiff) 5414296417Sdim return getAddExpr(getUMaxExpr(One, LS), LDiff); 5415296417Sdim } 5416296417Sdim break; 5417296417Sdim case ICmpInst::ICMP_EQ: 5418296417Sdim // n == 0 ? 1+x : n+x -> umax(n, 1)+x 5419296417Sdim if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType()) && 5420296417Sdim isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) { 5421296417Sdim const SCEV *One = getOne(I->getType()); 5422296417Sdim const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType()); 5423296417Sdim const SCEV *LA = getSCEV(TrueVal); 5424296417Sdim const SCEV *RA = getSCEV(FalseVal); 5425296417Sdim const SCEV *LDiff = getMinusSCEV(LA, One); 5426296417Sdim const SCEV *RDiff = getMinusSCEV(RA, LS); 5427296417Sdim if (LDiff == RDiff) 5428296417Sdim return getAddExpr(getUMaxExpr(One, LS), LDiff); 5429296417Sdim } 5430296417Sdim break; 5431296417Sdim default: 5432296417Sdim break; 5433296417Sdim } 5434296417Sdim 5435296417Sdim return getUnknown(I); 5436296417Sdim} 5437296417Sdim 5438309124Sdim/// Expand GEP instructions into add and multiply operations. This allows them 5439309124Sdim/// to be analyzed by regular SCEV code. 5440201360Srdivackyconst SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { 5441261991Sdim // Don't attempt to analyze GEPs over unsized objects. 5442309124Sdim if (!GEP->getSourceElementType()->isSized()) 5443261991Sdim return getUnknown(GEP); 5444193323Sed 5445288943Sdim SmallVector<const SCEV *, 4> IndexExprs; 5446288943Sdim for (auto Index = GEP->idx_begin(); Index != GEP->idx_end(); ++Index) 5447288943Sdim IndexExprs.push_back(getSCEV(*Index)); 5448314564Sdim return getGEPExpr(GEP, IndexExprs); 5449193323Sed} 5450193323Sed 5451321369Sdimuint32_t ScalarEvolution::GetMinTrailingZerosImpl(const SCEV *S) { 5452193323Sed if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) 5453296417Sdim return C->getAPInt().countTrailingZeros(); 5454193323Sed 5455193323Sed if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(S)) 5456194612Sed return std::min(GetMinTrailingZeros(T->getOperand()), 5457194612Sed (uint32_t)getTypeSizeInBits(T->getType())); 5458193323Sed 5459193323Sed if (const SCEVZeroExtendExpr *E = dyn_cast<SCEVZeroExtendExpr>(S)) { 5460194612Sed uint32_t OpRes = GetMinTrailingZeros(E->getOperand()); 5461321369Sdim return OpRes == getTypeSizeInBits(E->getOperand()->getType()) 5462321369Sdim ? getTypeSizeInBits(E->getType()) 5463321369Sdim : OpRes; 5464193323Sed } 5465193323Sed 5466193323Sed if (const SCEVSignExtendExpr *E = dyn_cast<SCEVSignExtendExpr>(S)) { 5467194612Sed uint32_t OpRes = GetMinTrailingZeros(E->getOperand()); 5468321369Sdim return OpRes == getTypeSizeInBits(E->getOperand()->getType()) 5469321369Sdim ? getTypeSizeInBits(E->getType()) 5470321369Sdim : OpRes; 5471193323Sed } 5472193323Sed 5473193323Sed if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) { 5474193323Sed // The result is the min of all operands results. 5475194612Sed uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0)); 5476193323Sed for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i) 5477194612Sed MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i))); 5478193323Sed return MinOpRes; 5479193323Sed } 5480193323Sed 5481193323Sed if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) { 5482193323Sed // The result is the sum of all operands results. 5483194612Sed uint32_t SumOpRes = GetMinTrailingZeros(M->getOperand(0)); 5484194612Sed uint32_t BitWidth = getTypeSizeInBits(M->getType()); 5485193323Sed for (unsigned i = 1, e = M->getNumOperands(); 5486193323Sed SumOpRes != BitWidth && i != e; ++i) 5487321369Sdim SumOpRes = 5488321369Sdim std::min(SumOpRes + GetMinTrailingZeros(M->getOperand(i)), BitWidth); 5489193323Sed return SumOpRes; 5490193323Sed } 5491193323Sed 5492193323Sed if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) { 5493193323Sed // The result is the min of all operands results. 5494194612Sed uint32_t MinOpRes = GetMinTrailingZeros(A->getOperand(0)); 5495193323Sed for (unsigned i = 1, e = A->getNumOperands(); MinOpRes && i != e; ++i) 5496194612Sed MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(A->getOperand(i))); 5497193323Sed return MinOpRes; 5498193323Sed } 5499193323Sed 5500193323Sed if (const SCEVSMaxExpr *M = dyn_cast<SCEVSMaxExpr>(S)) { 5501193323Sed // The result is the min of all operands results. 5502194612Sed uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0)); 5503193323Sed for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i) 5504194612Sed MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i))); 5505193323Sed return MinOpRes; 5506193323Sed } 5507193323Sed 5508193323Sed if (const SCEVUMaxExpr *M = dyn_cast<SCEVUMaxExpr>(S)) { 5509193323Sed // The result is the min of all operands results. 5510194612Sed uint32_t MinOpRes = GetMinTrailingZeros(M->getOperand(0)); 5511193323Sed for (unsigned i = 1, e = M->getNumOperands(); MinOpRes && i != e; ++i) 5512194612Sed MinOpRes = std::min(MinOpRes, GetMinTrailingZeros(M->getOperand(i))); 5513193323Sed return MinOpRes; 5514193323Sed } 5515193323Sed 5516194612Sed if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { 5517194612Sed // For a SCEVUnknown, ask ValueTracking. 5518321369Sdim KnownBits Known = computeKnownBits(U->getValue(), getDataLayout(), 0, &AC, nullptr, &DT); 5519321369Sdim return Known.countMinTrailingZeros(); 5520194612Sed } 5521194612Sed 5522194612Sed // SCEVUDivExpr 5523193323Sed return 0; 5524193323Sed} 5525193323Sed 5526321369Sdimuint32_t ScalarEvolution::GetMinTrailingZeros(const SCEV *S) { 5527321369Sdim auto I = MinTrailingZerosCache.find(S); 5528321369Sdim if (I != MinTrailingZerosCache.end()) 5529321369Sdim return I->second; 5530321369Sdim 5531321369Sdim uint32_t Result = GetMinTrailingZerosImpl(S); 5532321369Sdim auto InsertPair = MinTrailingZerosCache.insert({S, Result}); 5533321369Sdim assert(InsertPair.second && "Should insert a new key"); 5534321369Sdim return InsertPair.first->second; 5535321369Sdim} 5536321369Sdim 5537309124Sdim/// Helper method to assign a range to V from metadata present in the IR. 5538280031Sdimstatic Optional<ConstantRange> GetRangeFromMetadata(Value *V) { 5539296417Sdim if (Instruction *I = dyn_cast<Instruction>(V)) 5540296417Sdim if (MDNode *MD = I->getMetadata(LLVMContext::MD_range)) 5541296417Sdim return getConstantRangeFromMetadata(*MD); 5542280031Sdim 5543280031Sdim return None; 5544280031Sdim} 5545280031Sdim 5546309124Sdim/// Determine the range for a particular SCEV. If SignHint is 5547288943Sdim/// HINT_RANGE_UNSIGNED (resp. HINT_RANGE_SIGNED) then getRange prefers ranges 5548288943Sdim/// with a "cleaner" unsigned (resp. signed) representation. 5549321369Sdimconst ConstantRange & 5550321369SdimScalarEvolution::getRangeRef(const SCEV *S, 5551321369Sdim ScalarEvolution::RangeSignHint SignHint) { 5552288943Sdim DenseMap<const SCEV *, ConstantRange> &Cache = 5553288943Sdim SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED ? UnsignedRanges 5554288943Sdim : SignedRanges; 5555353358Sdim ConstantRange::PreferredRangeType RangeType = 5556353358Sdim SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED 5557353358Sdim ? ConstantRange::Unsigned : ConstantRange::Signed; 5558288943Sdim 5559218893Sdim // See if we've computed this range already. 5560288943Sdim DenseMap<const SCEV *, ConstantRange>::iterator I = Cache.find(S); 5561288943Sdim if (I != Cache.end()) 5562218893Sdim return I->second; 5563194612Sed 5564194612Sed if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) 5565296417Sdim return setRange(C, SignHint, ConstantRange(C->getAPInt())); 5566194612Sed 5567203954Srdivacky unsigned BitWidth = getTypeSizeInBits(S->getType()); 5568203954Srdivacky ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true); 5569360784Sdim using OBO = OverflowingBinaryOperator; 5570203954Srdivacky 5571288943Sdim // If the value has known zeros, the maximum value will have those known zeros 5572288943Sdim // as well. 5573203954Srdivacky uint32_t TZ = GetMinTrailingZeros(S); 5574288943Sdim if (TZ != 0) { 5575288943Sdim if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) 5576288943Sdim ConservativeResult = 5577288943Sdim ConstantRange(APInt::getMinValue(BitWidth), 5578288943Sdim APInt::getMaxValue(BitWidth).lshr(TZ).shl(TZ) + 1); 5579288943Sdim else 5580288943Sdim ConservativeResult = ConstantRange( 5581288943Sdim APInt::getSignedMinValue(BitWidth), 5582288943Sdim APInt::getSignedMaxValue(BitWidth).ashr(TZ).shl(TZ) + 1); 5583288943Sdim } 5584203954Srdivacky 5585198090Srdivacky if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { 5586321369Sdim ConstantRange X = getRangeRef(Add->getOperand(0), SignHint); 5587360784Sdim unsigned WrapType = OBO::AnyWrap; 5588360784Sdim if (Add->hasNoSignedWrap()) 5589360784Sdim WrapType |= OBO::NoSignedWrap; 5590360784Sdim if (Add->hasNoUnsignedWrap()) 5591360784Sdim WrapType |= OBO::NoUnsignedWrap; 5592198090Srdivacky for (unsigned i = 1, e = Add->getNumOperands(); i != e; ++i) 5593360784Sdim X = X.addWithNoWrap(getRangeRef(Add->getOperand(i), SignHint), 5594360784Sdim WrapType, RangeType); 5595353358Sdim return setRange(Add, SignHint, 5596353358Sdim ConservativeResult.intersectWith(X, RangeType)); 5597194612Sed } 5598194612Sed 5599198090Srdivacky if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) { 5600321369Sdim ConstantRange X = getRangeRef(Mul->getOperand(0), SignHint); 5601198090Srdivacky for (unsigned i = 1, e = Mul->getNumOperands(); i != e; ++i) 5602321369Sdim X = X.multiply(getRangeRef(Mul->getOperand(i), SignHint)); 5603353358Sdim return setRange(Mul, SignHint, 5604353358Sdim ConservativeResult.intersectWith(X, RangeType)); 5605198090Srdivacky } 5606198090Srdivacky 5607198090Srdivacky if (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(S)) { 5608321369Sdim ConstantRange X = getRangeRef(SMax->getOperand(0), SignHint); 5609198090Srdivacky for (unsigned i = 1, e = SMax->getNumOperands(); i != e; ++i) 5610321369Sdim X = X.smax(getRangeRef(SMax->getOperand(i), SignHint)); 5611353358Sdim return setRange(SMax, SignHint, 5612353358Sdim ConservativeResult.intersectWith(X, RangeType)); 5613198090Srdivacky } 5614198090Srdivacky 5615198090Srdivacky if (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(S)) { 5616321369Sdim ConstantRange X = getRangeRef(UMax->getOperand(0), SignHint); 5617198090Srdivacky for (unsigned i = 1, e = UMax->getNumOperands(); i != e; ++i) 5618321369Sdim X = X.umax(getRangeRef(UMax->getOperand(i), SignHint)); 5619353358Sdim return setRange(UMax, SignHint, 5620353358Sdim ConservativeResult.intersectWith(X, RangeType)); 5621198090Srdivacky } 5622198090Srdivacky 5623360784Sdim if (const SCEVSMinExpr *SMin = dyn_cast<SCEVSMinExpr>(S)) { 5624360784Sdim ConstantRange X = getRangeRef(SMin->getOperand(0), SignHint); 5625360784Sdim for (unsigned i = 1, e = SMin->getNumOperands(); i != e; ++i) 5626360784Sdim X = X.smin(getRangeRef(SMin->getOperand(i), SignHint)); 5627360784Sdim return setRange(SMin, SignHint, 5628360784Sdim ConservativeResult.intersectWith(X, RangeType)); 5629360784Sdim } 5630360784Sdim 5631360784Sdim if (const SCEVUMinExpr *UMin = dyn_cast<SCEVUMinExpr>(S)) { 5632360784Sdim ConstantRange X = getRangeRef(UMin->getOperand(0), SignHint); 5633360784Sdim for (unsigned i = 1, e = UMin->getNumOperands(); i != e; ++i) 5634360784Sdim X = X.umin(getRangeRef(UMin->getOperand(i), SignHint)); 5635360784Sdim return setRange(UMin, SignHint, 5636360784Sdim ConservativeResult.intersectWith(X, RangeType)); 5637360784Sdim } 5638360784Sdim 5639198090Srdivacky if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) { 5640321369Sdim ConstantRange X = getRangeRef(UDiv->getLHS(), SignHint); 5641321369Sdim ConstantRange Y = getRangeRef(UDiv->getRHS(), SignHint); 5642288943Sdim return setRange(UDiv, SignHint, 5643353358Sdim ConservativeResult.intersectWith(X.udiv(Y), RangeType)); 5644198090Srdivacky } 5645198090Srdivacky 5646198090Srdivacky if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) { 5647321369Sdim ConstantRange X = getRangeRef(ZExt->getOperand(), SignHint); 5648288943Sdim return setRange(ZExt, SignHint, 5649353358Sdim ConservativeResult.intersectWith(X.zeroExtend(BitWidth), 5650353358Sdim RangeType)); 5651198090Srdivacky } 5652198090Srdivacky 5653198090Srdivacky if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) { 5654321369Sdim ConstantRange X = getRangeRef(SExt->getOperand(), SignHint); 5655288943Sdim return setRange(SExt, SignHint, 5656353358Sdim ConservativeResult.intersectWith(X.signExtend(BitWidth), 5657353358Sdim RangeType)); 5658198090Srdivacky } 5659198090Srdivacky 5660198090Srdivacky if (const SCEVTruncateExpr *Trunc = dyn_cast<SCEVTruncateExpr>(S)) { 5661321369Sdim ConstantRange X = getRangeRef(Trunc->getOperand(), SignHint); 5662288943Sdim return setRange(Trunc, SignHint, 5663353358Sdim ConservativeResult.intersectWith(X.truncate(BitWidth), 5664353358Sdim RangeType)); 5665198090Srdivacky } 5666198090Srdivacky 5667198090Srdivacky if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(S)) { 5668202878Srdivacky // If there's no unsigned wrap, the value will never be less than its 5669202878Srdivacky // initial value. 5670360784Sdim if (AddRec->hasNoUnsignedWrap()) { 5671360784Sdim APInt UnsignedMinValue = getUnsignedRangeMin(AddRec->getStart()); 5672360784Sdim if (!UnsignedMinValue.isNullValue()) 5673360784Sdim ConservativeResult = ConservativeResult.intersectWith( 5674360784Sdim ConstantRange(UnsignedMinValue, APInt(BitWidth, 0)), RangeType); 5675360784Sdim } 5676202878Srdivacky 5677360784Sdim // If there's no signed wrap, and all the operands except initial value have 5678360784Sdim // the same sign or zero, the value won't ever be: 5679360784Sdim // 1: smaller than initial value if operands are non negative, 5680360784Sdim // 2: bigger than initial value if operands are non positive. 5681360784Sdim // For both cases, value can not cross signed min/max boundary. 5682309124Sdim if (AddRec->hasNoSignedWrap()) { 5683202878Srdivacky bool AllNonNeg = true; 5684202878Srdivacky bool AllNonPos = true; 5685360784Sdim for (unsigned i = 1, e = AddRec->getNumOperands(); i != e; ++i) { 5686360784Sdim if (!isKnownNonNegative(AddRec->getOperand(i))) 5687360784Sdim AllNonNeg = false; 5688360784Sdim if (!isKnownNonPositive(AddRec->getOperand(i))) 5689360784Sdim AllNonPos = false; 5690202878Srdivacky } 5691202878Srdivacky if (AllNonNeg) 5692203954Srdivacky ConservativeResult = ConservativeResult.intersectWith( 5693360784Sdim ConstantRange::getNonEmpty(getSignedRangeMin(AddRec->getStart()), 5694360784Sdim APInt::getSignedMinValue(BitWidth)), 5695360784Sdim RangeType); 5696202878Srdivacky else if (AllNonPos) 5697203954Srdivacky ConservativeResult = ConservativeResult.intersectWith( 5698360784Sdim ConstantRange::getNonEmpty( 5699360784Sdim APInt::getSignedMinValue(BitWidth), 5700360784Sdim getSignedRangeMax(AddRec->getStart()) + 1), 5701360784Sdim RangeType); 5702202878Srdivacky } 5703202878Srdivacky 5704198090Srdivacky // TODO: non-affine addrec 5705203954Srdivacky if (AddRec->isAffine()) { 5706360784Sdim const SCEV *MaxBECount = getConstantMaxBackedgeTakenCount(AddRec->getLoop()); 5707203954Srdivacky if (!isa<SCEVCouldNotCompute>(MaxBECount) && 5708203954Srdivacky getTypeSizeInBits(MaxBECount->getType()) <= BitWidth) { 5709309124Sdim auto RangeFromAffine = getRangeForAffineAR( 5710309124Sdim AddRec->getStart(), AddRec->getStepRecurrence(*this), MaxBECount, 5711309124Sdim BitWidth); 5712309124Sdim if (!RangeFromAffine.isFullSet()) 5713309124Sdim ConservativeResult = 5714353358Sdim ConservativeResult.intersectWith(RangeFromAffine, RangeType); 5715288943Sdim 5716309124Sdim auto RangeFromFactoring = getRangeViaFactoring( 5717309124Sdim AddRec->getStart(), AddRec->getStepRecurrence(*this), MaxBECount, 5718309124Sdim BitWidth); 5719309124Sdim if (!RangeFromFactoring.isFullSet()) 5720309124Sdim ConservativeResult = 5721353358Sdim ConservativeResult.intersectWith(RangeFromFactoring, RangeType); 5722195098Sed } 5723195098Sed } 5724202878Srdivacky 5725321369Sdim return setRange(AddRec, SignHint, std::move(ConservativeResult)); 5726195098Sed } 5727195098Sed 5728194612Sed if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { 5729280031Sdim // Check if the IR explicitly contains !range metadata. 5730280031Sdim Optional<ConstantRange> MDRange = GetRangeFromMetadata(U->getValue()); 5731280031Sdim if (MDRange.hasValue()) 5732353358Sdim ConservativeResult = ConservativeResult.intersectWith(MDRange.getValue(), 5733353358Sdim RangeType); 5734280031Sdim 5735288943Sdim // Split here to avoid paying the compile-time cost of calling both 5736288943Sdim // computeKnownBits and ComputeNumSignBits. This restriction can be lifted 5737288943Sdim // if needed. 5738296417Sdim const DataLayout &DL = getDataLayout(); 5739288943Sdim if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) { 5740288943Sdim // For a SCEVUnknown, ask ValueTracking. 5741321369Sdim KnownBits Known = computeKnownBits(U->getValue(), DL, 0, &AC, nullptr, &DT); 5742360784Sdim if (Known.getBitWidth() != BitWidth) 5743360784Sdim Known = Known.zextOrTrunc(BitWidth, true); 5744360784Sdim // If Known does not result in full-set, intersect with it. 5745360784Sdim if (Known.getMinValue() != Known.getMaxValue() + 1) 5746360784Sdim ConservativeResult = ConservativeResult.intersectWith( 5747360784Sdim ConstantRange(Known.getMinValue(), Known.getMaxValue() + 1), 5748360784Sdim RangeType); 5749288943Sdim } else { 5750288943Sdim assert(SignHint == ScalarEvolution::HINT_RANGE_SIGNED && 5751288943Sdim "generalize as needed!"); 5752296417Sdim unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, &AC, nullptr, &DT); 5753360784Sdim // If the pointer size is larger than the index size type, this can cause 5754360784Sdim // NS to be larger than BitWidth. So compensate for this. 5755360784Sdim if (U->getType()->isPointerTy()) { 5756360784Sdim unsigned ptrSize = DL.getPointerTypeSizeInBits(U->getType()); 5757360784Sdim int ptrIdxDiff = ptrSize - BitWidth; 5758360784Sdim if (ptrIdxDiff > 0 && ptrSize > BitWidth && NS > (unsigned)ptrIdxDiff) 5759360784Sdim NS -= ptrIdxDiff; 5760360784Sdim } 5761360784Sdim 5762288943Sdim if (NS > 1) 5763288943Sdim ConservativeResult = ConservativeResult.intersectWith( 5764288943Sdim ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1), 5765353358Sdim APInt::getSignedMaxValue(BitWidth).ashr(NS - 1) + 1), 5766353358Sdim RangeType); 5767288943Sdim } 5768288943Sdim 5769341825Sdim // A range of Phi is a subset of union of all ranges of its input. 5770341825Sdim if (const PHINode *Phi = dyn_cast<PHINode>(U->getValue())) { 5771341825Sdim // Make sure that we do not run over cycled Phis. 5772341825Sdim if (PendingPhiRanges.insert(Phi).second) { 5773341825Sdim ConstantRange RangeFromOps(BitWidth, /*isFullSet=*/false); 5774341825Sdim for (auto &Op : Phi->operands()) { 5775341825Sdim auto OpRange = getRangeRef(getSCEV(Op), SignHint); 5776341825Sdim RangeFromOps = RangeFromOps.unionWith(OpRange); 5777341825Sdim // No point to continue if we already have a full set. 5778341825Sdim if (RangeFromOps.isFullSet()) 5779341825Sdim break; 5780341825Sdim } 5781353358Sdim ConservativeResult = 5782353358Sdim ConservativeResult.intersectWith(RangeFromOps, RangeType); 5783341825Sdim bool Erased = PendingPhiRanges.erase(Phi); 5784341825Sdim assert(Erased && "Failed to erase Phi properly?"); 5785341825Sdim (void) Erased; 5786341825Sdim } 5787341825Sdim } 5788341825Sdim 5789321369Sdim return setRange(U, SignHint, std::move(ConservativeResult)); 5790194612Sed } 5791194612Sed 5792321369Sdim return setRange(S, SignHint, std::move(ConservativeResult)); 5793194612Sed} 5794194612Sed 5795321369Sdim// Given a StartRange, Step and MaxBECount for an expression compute a range of 5796321369Sdim// values that the expression can take. Initially, the expression has a value 5797321369Sdim// from StartRange and then is changed by Step up to MaxBECount times. Signed 5798321369Sdim// argument defines if we treat Step as signed or unsigned. 5799321369Sdimstatic ConstantRange getRangeForAffineARHelper(APInt Step, 5800321369Sdim const ConstantRange &StartRange, 5801321369Sdim const APInt &MaxBECount, 5802321369Sdim unsigned BitWidth, bool Signed) { 5803321369Sdim // If either Step or MaxBECount is 0, then the expression won't change, and we 5804321369Sdim // just need to return the initial range. 5805321369Sdim if (Step == 0 || MaxBECount == 0) 5806321369Sdim return StartRange; 5807321369Sdim 5808321369Sdim // If we don't know anything about the initial value (i.e. StartRange is 5809321369Sdim // FullRange), then we don't know anything about the final range either. 5810321369Sdim // Return FullRange. 5811321369Sdim if (StartRange.isFullSet()) 5812353358Sdim return ConstantRange::getFull(BitWidth); 5813321369Sdim 5814321369Sdim // If Step is signed and negative, then we use its absolute value, but we also 5815321369Sdim // note that we're moving in the opposite direction. 5816321369Sdim bool Descending = Signed && Step.isNegative(); 5817321369Sdim 5818321369Sdim if (Signed) 5819321369Sdim // This is correct even for INT_SMIN. Let's look at i8 to illustrate this: 5820321369Sdim // abs(INT_SMIN) = abs(-128) = abs(0x80) = -0x80 = 0x80 = 128. 5821321369Sdim // This equations hold true due to the well-defined wrap-around behavior of 5822321369Sdim // APInt. 5823321369Sdim Step = Step.abs(); 5824321369Sdim 5825321369Sdim // Check if Offset is more than full span of BitWidth. If it is, the 5826321369Sdim // expression is guaranteed to overflow. 5827321369Sdim if (APInt::getMaxValue(StartRange.getBitWidth()).udiv(Step).ult(MaxBECount)) 5828353358Sdim return ConstantRange::getFull(BitWidth); 5829321369Sdim 5830321369Sdim // Offset is by how much the expression can change. Checks above guarantee no 5831321369Sdim // overflow here. 5832321369Sdim APInt Offset = Step * MaxBECount; 5833321369Sdim 5834321369Sdim // Minimum value of the final range will match the minimal value of StartRange 5835321369Sdim // if the expression is increasing and will be decreased by Offset otherwise. 5836321369Sdim // Maximum value of the final range will match the maximal value of StartRange 5837321369Sdim // if the expression is decreasing and will be increased by Offset otherwise. 5838321369Sdim APInt StartLower = StartRange.getLower(); 5839321369Sdim APInt StartUpper = StartRange.getUpper() - 1; 5840321369Sdim APInt MovedBoundary = Descending ? (StartLower - std::move(Offset)) 5841321369Sdim : (StartUpper + std::move(Offset)); 5842321369Sdim 5843321369Sdim // It's possible that the new minimum/maximum value will fall into the initial 5844321369Sdim // range (due to wrap around). This means that the expression can take any 5845321369Sdim // value in this bitwidth, and we have to return full range. 5846321369Sdim if (StartRange.contains(MovedBoundary)) 5847353358Sdim return ConstantRange::getFull(BitWidth); 5848321369Sdim 5849321369Sdim APInt NewLower = 5850321369Sdim Descending ? std::move(MovedBoundary) : std::move(StartLower); 5851321369Sdim APInt NewUpper = 5852321369Sdim Descending ? std::move(StartUpper) : std::move(MovedBoundary); 5853321369Sdim NewUpper += 1; 5854321369Sdim 5855321369Sdim // No overflow detected, return [StartLower, StartUpper + Offset + 1) range. 5856353358Sdim return ConstantRange::getNonEmpty(std::move(NewLower), std::move(NewUpper)); 5857321369Sdim} 5858321369Sdim 5859309124SdimConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start, 5860309124Sdim const SCEV *Step, 5861309124Sdim const SCEV *MaxBECount, 5862309124Sdim unsigned BitWidth) { 5863309124Sdim assert(!isa<SCEVCouldNotCompute>(MaxBECount) && 5864309124Sdim getTypeSizeInBits(MaxBECount->getType()) <= BitWidth && 5865309124Sdim "Precondition!"); 5866309124Sdim 5867309124Sdim MaxBECount = getNoopOrZeroExtend(MaxBECount, Start->getType()); 5868321369Sdim APInt MaxBECountValue = getUnsignedRangeMax(MaxBECount); 5869309124Sdim 5870321369Sdim // First, consider step signed. 5871321369Sdim ConstantRange StartSRange = getSignedRange(Start); 5872309124Sdim ConstantRange StepSRange = getSignedRange(Step); 5873309124Sdim 5874321369Sdim // If Step can be both positive and negative, we need to find ranges for the 5875321369Sdim // maximum absolute step values in both directions and union them. 5876321369Sdim ConstantRange SR = 5877321369Sdim getRangeForAffineARHelper(StepSRange.getSignedMin(), StartSRange, 5878321369Sdim MaxBECountValue, BitWidth, /* Signed = */ true); 5879321369Sdim SR = SR.unionWith(getRangeForAffineARHelper(StepSRange.getSignedMax(), 5880321369Sdim StartSRange, MaxBECountValue, 5881321369Sdim BitWidth, /* Signed = */ true)); 5882309124Sdim 5883321369Sdim // Next, consider step unsigned. 5884321369Sdim ConstantRange UR = getRangeForAffineARHelper( 5885321369Sdim getUnsignedRangeMax(Step), getUnsignedRange(Start), 5886321369Sdim MaxBECountValue, BitWidth, /* Signed = */ false); 5887309124Sdim 5888321369Sdim // Finally, intersect signed and unsigned ranges. 5889353358Sdim return SR.intersectWith(UR, ConstantRange::Smallest); 5890309124Sdim} 5891309124Sdim 5892309124SdimConstantRange ScalarEvolution::getRangeViaFactoring(const SCEV *Start, 5893309124Sdim const SCEV *Step, 5894309124Sdim const SCEV *MaxBECount, 5895309124Sdim unsigned BitWidth) { 5896309124Sdim // RangeOf({C?A:B,+,C?P:Q}) == RangeOf(C?{A,+,P}:{B,+,Q}) 5897309124Sdim // == RangeOf({A,+,P}) union RangeOf({B,+,Q}) 5898309124Sdim 5899309124Sdim struct SelectPattern { 5900309124Sdim Value *Condition = nullptr; 5901309124Sdim APInt TrueValue; 5902309124Sdim APInt FalseValue; 5903309124Sdim 5904309124Sdim explicit SelectPattern(ScalarEvolution &SE, unsigned BitWidth, 5905309124Sdim const SCEV *S) { 5906309124Sdim Optional<unsigned> CastOp; 5907309124Sdim APInt Offset(BitWidth, 0); 5908309124Sdim 5909309124Sdim assert(SE.getTypeSizeInBits(S->getType()) == BitWidth && 5910309124Sdim "Should be!"); 5911309124Sdim 5912309124Sdim // Peel off a constant offset: 5913309124Sdim if (auto *SA = dyn_cast<SCEVAddExpr>(S)) { 5914309124Sdim // In the future we could consider being smarter here and handle 5915309124Sdim // {Start+Step,+,Step} too. 5916309124Sdim if (SA->getNumOperands() != 2 || !isa<SCEVConstant>(SA->getOperand(0))) 5917309124Sdim return; 5918309124Sdim 5919309124Sdim Offset = cast<SCEVConstant>(SA->getOperand(0))->getAPInt(); 5920309124Sdim S = SA->getOperand(1); 5921309124Sdim } 5922309124Sdim 5923309124Sdim // Peel off a cast operation 5924309124Sdim if (auto *SCast = dyn_cast<SCEVCastExpr>(S)) { 5925309124Sdim CastOp = SCast->getSCEVType(); 5926309124Sdim S = SCast->getOperand(); 5927309124Sdim } 5928309124Sdim 5929309124Sdim using namespace llvm::PatternMatch; 5930309124Sdim 5931309124Sdim auto *SU = dyn_cast<SCEVUnknown>(S); 5932309124Sdim const APInt *TrueVal, *FalseVal; 5933309124Sdim if (!SU || 5934309124Sdim !match(SU->getValue(), m_Select(m_Value(Condition), m_APInt(TrueVal), 5935309124Sdim m_APInt(FalseVal)))) { 5936309124Sdim Condition = nullptr; 5937309124Sdim return; 5938309124Sdim } 5939309124Sdim 5940309124Sdim TrueValue = *TrueVal; 5941309124Sdim FalseValue = *FalseVal; 5942309124Sdim 5943309124Sdim // Re-apply the cast we peeled off earlier 5944309124Sdim if (CastOp.hasValue()) 5945309124Sdim switch (*CastOp) { 5946309124Sdim default: 5947309124Sdim llvm_unreachable("Unknown SCEV cast type!"); 5948309124Sdim 5949309124Sdim case scTruncate: 5950309124Sdim TrueValue = TrueValue.trunc(BitWidth); 5951309124Sdim FalseValue = FalseValue.trunc(BitWidth); 5952309124Sdim break; 5953309124Sdim case scZeroExtend: 5954309124Sdim TrueValue = TrueValue.zext(BitWidth); 5955309124Sdim FalseValue = FalseValue.zext(BitWidth); 5956309124Sdim break; 5957309124Sdim case scSignExtend: 5958309124Sdim TrueValue = TrueValue.sext(BitWidth); 5959309124Sdim FalseValue = FalseValue.sext(BitWidth); 5960309124Sdim break; 5961309124Sdim } 5962309124Sdim 5963309124Sdim // Re-apply the constant offset we peeled off earlier 5964309124Sdim TrueValue += Offset; 5965309124Sdim FalseValue += Offset; 5966309124Sdim } 5967309124Sdim 5968309124Sdim bool isRecognized() { return Condition != nullptr; } 5969309124Sdim }; 5970309124Sdim 5971309124Sdim SelectPattern StartPattern(*this, BitWidth, Start); 5972309124Sdim if (!StartPattern.isRecognized()) 5973353358Sdim return ConstantRange::getFull(BitWidth); 5974309124Sdim 5975309124Sdim SelectPattern StepPattern(*this, BitWidth, Step); 5976309124Sdim if (!StepPattern.isRecognized()) 5977353358Sdim return ConstantRange::getFull(BitWidth); 5978309124Sdim 5979309124Sdim if (StartPattern.Condition != StepPattern.Condition) { 5980309124Sdim // We don't handle this case today; but we could, by considering four 5981309124Sdim // possibilities below instead of two. I'm not sure if there are cases where 5982309124Sdim // that will help over what getRange already does, though. 5983353358Sdim return ConstantRange::getFull(BitWidth); 5984309124Sdim } 5985309124Sdim 5986309124Sdim // NB! Calling ScalarEvolution::getConstant is fine, but we should not try to 5987309124Sdim // construct arbitrary general SCEV expressions here. This function is called 5988309124Sdim // from deep in the call stack, and calling getSCEV (on a sext instruction, 5989309124Sdim // say) can end up caching a suboptimal value. 5990309124Sdim 5991309124Sdim // FIXME: without the explicit `this` receiver below, MSVC errors out with 5992309124Sdim // C2352 and C2512 (otherwise it isn't needed). 5993309124Sdim 5994309124Sdim const SCEV *TrueStart = this->getConstant(StartPattern.TrueValue); 5995309124Sdim const SCEV *TrueStep = this->getConstant(StepPattern.TrueValue); 5996309124Sdim const SCEV *FalseStart = this->getConstant(StartPattern.FalseValue); 5997309124Sdim const SCEV *FalseStep = this->getConstant(StepPattern.FalseValue); 5998309124Sdim 5999309124Sdim ConstantRange TrueRange = 6000309124Sdim this->getRangeForAffineAR(TrueStart, TrueStep, MaxBECount, BitWidth); 6001309124Sdim ConstantRange FalseRange = 6002309124Sdim this->getRangeForAffineAR(FalseStart, FalseStep, MaxBECount, BitWidth); 6003309124Sdim 6004309124Sdim return TrueRange.unionWith(FalseRange); 6005309124Sdim} 6006309124Sdim 6007296417SdimSCEV::NoWrapFlags ScalarEvolution::getNoWrapFlagsFromUB(const Value *V) { 6008296417Sdim if (isa<ConstantExpr>(V)) return SCEV::FlagAnyWrap; 6009296417Sdim const BinaryOperator *BinOp = cast<BinaryOperator>(V); 6010296417Sdim 6011296417Sdim // Return early if there are no flags to propagate to the SCEV. 6012296417Sdim SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; 6013296417Sdim if (BinOp->hasNoUnsignedWrap()) 6014296417Sdim Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW); 6015296417Sdim if (BinOp->hasNoSignedWrap()) 6016296417Sdim Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW); 6017309124Sdim if (Flags == SCEV::FlagAnyWrap) 6018296417Sdim return SCEV::FlagAnyWrap; 6019296417Sdim 6020309124Sdim return isSCEVExprNeverPoison(BinOp) ? Flags : SCEV::FlagAnyWrap; 6021309124Sdim} 6022296417Sdim 6023309124Sdimbool ScalarEvolution::isSCEVExprNeverPoison(const Instruction *I) { 6024309124Sdim // Here we check that I is in the header of the innermost loop containing I, 6025309124Sdim // since we only deal with instructions in the loop header. The actual loop we 6026309124Sdim // need to check later will come from an add recurrence, but getting that 6027309124Sdim // requires computing the SCEV of the operands, which can be expensive. This 6028309124Sdim // check we can do cheaply to rule out some cases early. 6029309124Sdim Loop *InnermostContainingLoop = LI.getLoopFor(I->getParent()); 6030309124Sdim if (InnermostContainingLoop == nullptr || 6031309124Sdim InnermostContainingLoop->getHeader() != I->getParent()) 6032309124Sdim return false; 6033296417Sdim 6034309124Sdim // Only proceed if we can prove that I does not yield poison. 6035321369Sdim if (!programUndefinedIfFullPoison(I)) 6036321369Sdim return false; 6037309124Sdim 6038309124Sdim // At this point we know that if I is executed, then it does not wrap 6039309124Sdim // according to at least one of NSW or NUW. If I is not executed, then we do 6040309124Sdim // not know if the calculation that I represents would wrap. Multiple 6041309124Sdim // instructions can map to the same SCEV. If we apply NSW or NUW from I to 6042296417Sdim // the SCEV, we must guarantee no wrapping for that SCEV also when it is 6043296417Sdim // derived from other instructions that map to the same SCEV. We cannot make 6044309124Sdim // that guarantee for cases where I is not executed. So we need to find the 6045309124Sdim // loop that I is considered in relation to and prove that I is executed for 6046309124Sdim // every iteration of that loop. That implies that the value that I 6047296417Sdim // calculates does not wrap anywhere in the loop, so then we can apply the 6048296417Sdim // flags to the SCEV. 6049296417Sdim // 6050309124Sdim // We check isLoopInvariant to disambiguate in case we are adding recurrences 6051309124Sdim // from different loops, so that we know which loop to prove that I is 6052309124Sdim // executed in. 6053309124Sdim for (unsigned OpIndex = 0; OpIndex < I->getNumOperands(); ++OpIndex) { 6054309124Sdim // I could be an extractvalue from a call to an overflow intrinsic. 6055309124Sdim // TODO: We can do better here in some cases. 6056309124Sdim if (!isSCEVable(I->getOperand(OpIndex)->getType())) 6057309124Sdim return false; 6058309124Sdim const SCEV *Op = getSCEV(I->getOperand(OpIndex)); 6059296417Sdim if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) { 6060309124Sdim bool AllOtherOpsLoopInvariant = true; 6061309124Sdim for (unsigned OtherOpIndex = 0; OtherOpIndex < I->getNumOperands(); 6062309124Sdim ++OtherOpIndex) { 6063309124Sdim if (OtherOpIndex != OpIndex) { 6064309124Sdim const SCEV *OtherOp = getSCEV(I->getOperand(OtherOpIndex)); 6065309124Sdim if (!isLoopInvariant(OtherOp, AddRec->getLoop())) { 6066309124Sdim AllOtherOpsLoopInvariant = false; 6067309124Sdim break; 6068309124Sdim } 6069309124Sdim } 6070309124Sdim } 6071309124Sdim if (AllOtherOpsLoopInvariant && 6072309124Sdim isGuaranteedToExecuteForEveryIteration(I, AddRec->getLoop())) 6073309124Sdim return true; 6074296417Sdim } 6075296417Sdim } 6076309124Sdim return false; 6077296417Sdim} 6078296417Sdim 6079309124Sdimbool ScalarEvolution::isAddRecNeverPoison(const Instruction *I, const Loop *L) { 6080309124Sdim // If we know that \c I can never be poison period, then that's enough. 6081309124Sdim if (isSCEVExprNeverPoison(I)) 6082309124Sdim return true; 6083309124Sdim 6084309124Sdim // For an add recurrence specifically, we assume that infinite loops without 6085309124Sdim // side effects are undefined behavior, and then reason as follows: 6086309124Sdim // 6087309124Sdim // If the add recurrence is poison in any iteration, it is poison on all 6088309124Sdim // future iterations (since incrementing poison yields poison). If the result 6089309124Sdim // of the add recurrence is fed into the loop latch condition and the loop 6090309124Sdim // does not contain any throws or exiting blocks other than the latch, we now 6091309124Sdim // have the ability to "choose" whether the backedge is taken or not (by 6092309124Sdim // choosing a sufficiently evil value for the poison feeding into the branch) 6093309124Sdim // for every iteration including and after the one in which \p I first became 6094309124Sdim // poison. There are two possibilities (let's call the iteration in which \p 6095309124Sdim // I first became poison as K): 6096309124Sdim // 6097309124Sdim // 1. In the set of iterations including and after K, the loop body executes 6098309124Sdim // no side effects. In this case executing the backege an infinte number 6099309124Sdim // of times will yield undefined behavior. 6100309124Sdim // 6101309124Sdim // 2. In the set of iterations including and after K, the loop body executes 6102309124Sdim // at least one side effect. In this case, that specific instance of side 6103309124Sdim // effect is control dependent on poison, which also yields undefined 6104309124Sdim // behavior. 6105309124Sdim 6106309124Sdim auto *ExitingBB = L->getExitingBlock(); 6107309124Sdim auto *LatchBB = L->getLoopLatch(); 6108309124Sdim if (!ExitingBB || !LatchBB || ExitingBB != LatchBB) 6109309124Sdim return false; 6110309124Sdim 6111309124Sdim SmallPtrSet<const Instruction *, 16> Pushed; 6112309124Sdim SmallVector<const Instruction *, 8> PoisonStack; 6113309124Sdim 6114309124Sdim // We start by assuming \c I, the post-inc add recurrence, is poison. Only 6115309124Sdim // things that are known to be fully poison under that assumption go on the 6116309124Sdim // PoisonStack. 6117309124Sdim Pushed.insert(I); 6118309124Sdim PoisonStack.push_back(I); 6119309124Sdim 6120309124Sdim bool LatchControlDependentOnPoison = false; 6121309124Sdim while (!PoisonStack.empty() && !LatchControlDependentOnPoison) { 6122309124Sdim const Instruction *Poison = PoisonStack.pop_back_val(); 6123309124Sdim 6124309124Sdim for (auto *PoisonUser : Poison->users()) { 6125309124Sdim if (propagatesFullPoison(cast<Instruction>(PoisonUser))) { 6126309124Sdim if (Pushed.insert(cast<Instruction>(PoisonUser)).second) 6127309124Sdim PoisonStack.push_back(cast<Instruction>(PoisonUser)); 6128309124Sdim } else if (auto *BI = dyn_cast<BranchInst>(PoisonUser)) { 6129309124Sdim assert(BI->isConditional() && "Only possibility!"); 6130309124Sdim if (BI->getParent() == LatchBB) { 6131309124Sdim LatchControlDependentOnPoison = true; 6132309124Sdim break; 6133309124Sdim } 6134309124Sdim } 6135309124Sdim } 6136309124Sdim } 6137309124Sdim 6138309124Sdim return LatchControlDependentOnPoison && loopHasNoAbnormalExits(L); 6139309124Sdim} 6140309124Sdim 6141314564SdimScalarEvolution::LoopProperties 6142314564SdimScalarEvolution::getLoopProperties(const Loop *L) { 6143327952Sdim using LoopProperties = ScalarEvolution::LoopProperties; 6144314564Sdim 6145314564Sdim auto Itr = LoopPropertiesCache.find(L); 6146314564Sdim if (Itr == LoopPropertiesCache.end()) { 6147314564Sdim auto HasSideEffects = [](Instruction *I) { 6148314564Sdim if (auto *SI = dyn_cast<StoreInst>(I)) 6149314564Sdim return !SI->isSimple(); 6150314564Sdim 6151314564Sdim return I->mayHaveSideEffects(); 6152309124Sdim }; 6153309124Sdim 6154314564Sdim LoopProperties LP = {/* HasNoAbnormalExits */ true, 6155314564Sdim /*HasNoSideEffects*/ true}; 6156314564Sdim 6157314564Sdim for (auto *BB : L->getBlocks()) 6158314564Sdim for (auto &I : *BB) { 6159314564Sdim if (!isGuaranteedToTransferExecutionToSuccessor(&I)) 6160314564Sdim LP.HasNoAbnormalExits = false; 6161314564Sdim if (HasSideEffects(&I)) 6162314564Sdim LP.HasNoSideEffects = false; 6163314564Sdim if (!LP.HasNoAbnormalExits && !LP.HasNoSideEffects) 6164314564Sdim break; // We're already as pessimistic as we can get. 6165314564Sdim } 6166314564Sdim 6167314564Sdim auto InsertPair = LoopPropertiesCache.insert({L, LP}); 6168309124Sdim assert(InsertPair.second && "We just checked!"); 6169309124Sdim Itr = InsertPair.first; 6170309124Sdim } 6171309124Sdim 6172309124Sdim return Itr->second; 6173309124Sdim} 6174309124Sdim 6175198090Srdivackyconst SCEV *ScalarEvolution::createSCEV(Value *V) { 6176193323Sed if (!isSCEVable(V->getType())) 6177193323Sed return getUnknown(V); 6178193323Sed 6179204961Srdivacky if (Instruction *I = dyn_cast<Instruction>(V)) { 6180204961Srdivacky // Don't attempt to analyze instructions in blocks that aren't 6181204961Srdivacky // reachable. Such instructions don't matter, and they aren't required 6182204961Srdivacky // to obey basic rules for definitions dominating uses which this 6183204961Srdivacky // analysis depends on. 6184296417Sdim if (!DT.isReachableFromEntry(I->getParent())) 6185353358Sdim return getUnknown(UndefValue::get(V->getType())); 6186309124Sdim } else if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) 6187195098Sed return getConstant(CI); 6188195098Sed else if (isa<ConstantPointerNull>(V)) 6189296417Sdim return getZero(V->getType()); 6190198090Srdivacky else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) 6191309124Sdim return GA->isInterposable() ? getUnknown(V) : getSCEV(GA->getAliasee()); 6192309124Sdim else if (!isa<ConstantExpr>(V)) 6193193323Sed return getUnknown(V); 6194193323Sed 6195198090Srdivacky Operator *U = cast<Operator>(V); 6196309124Sdim if (auto BO = MatchBinaryOp(U, DT)) { 6197309124Sdim switch (BO->Opcode) { 6198309124Sdim case Instruction::Add: { 6199309124Sdim // The simple thing to do would be to just call getSCEV on both operands 6200309124Sdim // and call getAddExpr with the result. However if we're looking at a 6201309124Sdim // bunch of things all added together, this can be quite inefficient, 6202309124Sdim // because it leads to N-1 getAddExpr calls for N ultimate operands. 6203309124Sdim // Instead, gather up all the operands and make a single getAddExpr call. 6204309124Sdim // LLVM IR canonical form means we need only traverse the left operands. 6205309124Sdim SmallVector<const SCEV *, 4> AddOps; 6206309124Sdim do { 6207309124Sdim if (BO->Op) { 6208309124Sdim if (auto *OpSCEV = getExistingSCEV(BO->Op)) { 6209309124Sdim AddOps.push_back(OpSCEV); 6210309124Sdim break; 6211309124Sdim } 6212296417Sdim 6213309124Sdim // If a NUW or NSW flag can be applied to the SCEV for this 6214309124Sdim // addition, then compute the SCEV for this addition by itself 6215309124Sdim // with a separate call to getAddExpr. We need to do that 6216309124Sdim // instead of pushing the operands of the addition onto AddOps, 6217309124Sdim // since the flags are only known to apply to this particular 6218309124Sdim // addition - they may not apply to other additions that can be 6219309124Sdim // formed with operands from AddOps. 6220309124Sdim const SCEV *RHS = getSCEV(BO->RHS); 6221309124Sdim SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(BO->Op); 6222309124Sdim if (Flags != SCEV::FlagAnyWrap) { 6223309124Sdim const SCEV *LHS = getSCEV(BO->LHS); 6224309124Sdim if (BO->Opcode == Instruction::Sub) 6225309124Sdim AddOps.push_back(getMinusSCEV(LHS, RHS, Flags)); 6226309124Sdim else 6227309124Sdim AddOps.push_back(getAddExpr(LHS, RHS, Flags)); 6228309124Sdim break; 6229309124Sdim } 6230309124Sdim } 6231296417Sdim 6232309124Sdim if (BO->Opcode == Instruction::Sub) 6233309124Sdim AddOps.push_back(getNegativeSCEV(getSCEV(BO->RHS))); 6234296417Sdim else 6235309124Sdim AddOps.push_back(getSCEV(BO->RHS)); 6236296417Sdim 6237309124Sdim auto NewBO = MatchBinaryOp(BO->LHS, DT); 6238309124Sdim if (!NewBO || (NewBO->Opcode != Instruction::Add && 6239309124Sdim NewBO->Opcode != Instruction::Sub)) { 6240309124Sdim AddOps.push_back(getSCEV(BO->LHS)); 6241309124Sdim break; 6242309124Sdim } 6243309124Sdim BO = NewBO; 6244309124Sdim } while (true); 6245309124Sdim 6246309124Sdim return getAddExpr(AddOps); 6247212904Sdim } 6248296417Sdim 6249309124Sdim case Instruction::Mul: { 6250309124Sdim SmallVector<const SCEV *, 4> MulOps; 6251309124Sdim do { 6252309124Sdim if (BO->Op) { 6253309124Sdim if (auto *OpSCEV = getExistingSCEV(BO->Op)) { 6254309124Sdim MulOps.push_back(OpSCEV); 6255309124Sdim break; 6256309124Sdim } 6257296417Sdim 6258309124Sdim SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(BO->Op); 6259309124Sdim if (Flags != SCEV::FlagAnyWrap) { 6260309124Sdim MulOps.push_back( 6261309124Sdim getMulExpr(getSCEV(BO->LHS), getSCEV(BO->RHS), Flags)); 6262309124Sdim break; 6263309124Sdim } 6264309124Sdim } 6265296417Sdim 6266309124Sdim MulOps.push_back(getSCEV(BO->RHS)); 6267309124Sdim auto NewBO = MatchBinaryOp(BO->LHS, DT); 6268309124Sdim if (!NewBO || NewBO->Opcode != Instruction::Mul) { 6269309124Sdim MulOps.push_back(getSCEV(BO->LHS)); 6270309124Sdim break; 6271309124Sdim } 6272309124Sdim BO = NewBO; 6273309124Sdim } while (true); 6274296417Sdim 6275309124Sdim return getMulExpr(MulOps); 6276212904Sdim } 6277309124Sdim case Instruction::UDiv: 6278309124Sdim return getUDivExpr(getSCEV(BO->LHS), getSCEV(BO->RHS)); 6279327952Sdim case Instruction::URem: 6280327952Sdim return getURemExpr(getSCEV(BO->LHS), getSCEV(BO->RHS)); 6281309124Sdim case Instruction::Sub: { 6282309124Sdim SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; 6283309124Sdim if (BO->Op) 6284309124Sdim Flags = getNoWrapFlagsFromUB(BO->Op); 6285309124Sdim return getMinusSCEV(getSCEV(BO->LHS), getSCEV(BO->RHS), Flags); 6286309124Sdim } 6287309124Sdim case Instruction::And: 6288309124Sdim // For an expression like x&255 that merely masks off the high bits, 6289309124Sdim // use zext(trunc(x)) as the SCEV expression. 6290309124Sdim if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS)) { 6291321369Sdim if (CI->isZero()) 6292309124Sdim return getSCEV(BO->RHS); 6293321369Sdim if (CI->isMinusOne()) 6294309124Sdim return getSCEV(BO->LHS); 6295309124Sdim const APInt &A = CI->getValue(); 6296194612Sed 6297309124Sdim // Instcombine's ShrinkDemandedConstant may strip bits out of 6298309124Sdim // constants, obscuring what would otherwise be a low-bits mask. 6299309124Sdim // Use computeKnownBits to compute what ShrinkDemandedConstant 6300309124Sdim // knew about to reconstruct a low-bits mask value. 6301309124Sdim unsigned LZ = A.countLeadingZeros(); 6302309124Sdim unsigned TZ = A.countTrailingZeros(); 6303309124Sdim unsigned BitWidth = A.getBitWidth(); 6304321369Sdim KnownBits Known(BitWidth); 6305321369Sdim computeKnownBits(BO->LHS, Known, getDataLayout(), 6306309124Sdim 0, &AC, nullptr, &DT); 6307194612Sed 6308309124Sdim APInt EffectiveMask = 6309309124Sdim APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ); 6310321369Sdim if ((LZ != 0 || TZ != 0) && !((~A & ~Known.Zero) & EffectiveMask)) { 6311321369Sdim const SCEV *MulCount = getConstant(APInt::getOneBitSet(BitWidth, TZ)); 6312321369Sdim const SCEV *LHS = getSCEV(BO->LHS); 6313321369Sdim const SCEV *ShiftedLHS = nullptr; 6314321369Sdim if (auto *LHSMul = dyn_cast<SCEVMulExpr>(LHS)) { 6315321369Sdim if (auto *OpC = dyn_cast<SCEVConstant>(LHSMul->getOperand(0))) { 6316321369Sdim // For an expression like (x * 8) & 8, simplify the multiply. 6317321369Sdim unsigned MulZeros = OpC->getAPInt().countTrailingZeros(); 6318321369Sdim unsigned GCD = std::min(MulZeros, TZ); 6319321369Sdim APInt DivAmt = APInt::getOneBitSet(BitWidth, TZ - GCD); 6320321369Sdim SmallVector<const SCEV*, 4> MulOps; 6321321369Sdim MulOps.push_back(getConstant(OpC->getAPInt().lshr(GCD))); 6322321369Sdim MulOps.append(LHSMul->op_begin() + 1, LHSMul->op_end()); 6323321369Sdim auto *NewMul = getMulExpr(MulOps, LHSMul->getNoWrapFlags()); 6324321369Sdim ShiftedLHS = getUDivExpr(NewMul, getConstant(DivAmt)); 6325321369Sdim } 6326321369Sdim } 6327321369Sdim if (!ShiftedLHS) 6328321369Sdim ShiftedLHS = getUDivExpr(LHS, MulCount); 6329309124Sdim return getMulExpr( 6330309124Sdim getZeroExtendExpr( 6331321369Sdim getTruncateExpr(ShiftedLHS, 6332309124Sdim IntegerType::get(getContext(), BitWidth - LZ - TZ)), 6333309124Sdim BO->LHS->getType()), 6334309124Sdim MulCount); 6335309124Sdim } 6336276479Sdim } 6337309124Sdim break; 6338194612Sed 6339309124Sdim case Instruction::Or: 6340309124Sdim // If the RHS of the Or is a constant, we may have something like: 6341309124Sdim // X*4+1 which got turned into X*4|1. Handle this as an Add so loop 6342309124Sdim // optimizations will transparently handle this case. 6343309124Sdim // 6344309124Sdim // In order for this transformation to be safe, the LHS must be of the 6345309124Sdim // form X*(2^n) and the Or constant must be less than 2^n. 6346309124Sdim if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS)) { 6347309124Sdim const SCEV *LHS = getSCEV(BO->LHS); 6348309124Sdim const APInt &CIVal = CI->getValue(); 6349309124Sdim if (GetMinTrailingZeros(LHS) >= 6350309124Sdim (CIVal.getBitWidth() - CIVal.countLeadingZeros())) { 6351309124Sdim // Build a plain add SCEV. 6352309124Sdim const SCEV *S = getAddExpr(LHS, getSCEV(CI)); 6353309124Sdim // If the LHS of the add was an addrec and it has no-wrap flags, 6354309124Sdim // transfer the no-wrap flags, since an or won't introduce a wrap. 6355309124Sdim if (const SCEVAddRecExpr *NewAR = dyn_cast<SCEVAddRecExpr>(S)) { 6356309124Sdim const SCEVAddRecExpr *OldAR = cast<SCEVAddRecExpr>(LHS); 6357309124Sdim const_cast<SCEVAddRecExpr *>(NewAR)->setNoWrapFlags( 6358309124Sdim OldAR->getNoWrapFlags()); 6359309124Sdim } 6360309124Sdim return S; 6361198090Srdivacky } 6362198090Srdivacky } 6363309124Sdim break; 6364193323Sed 6365309124Sdim case Instruction::Xor: 6366309124Sdim if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS)) { 6367309124Sdim // If the RHS of xor is -1, then this is a not operation. 6368321369Sdim if (CI->isMinusOne()) 6369309124Sdim return getNotSCEV(getSCEV(BO->LHS)); 6370193323Sed 6371309124Sdim // Model xor(and(x, C), C) as and(~x, C), if C is a low-bits mask. 6372309124Sdim // This is a variant of the check for xor with -1, and it handles 6373309124Sdim // the case where instcombine has trimmed non-demanded bits out 6374309124Sdim // of an xor with -1. 6375309124Sdim if (auto *LBO = dyn_cast<BinaryOperator>(BO->LHS)) 6376309124Sdim if (ConstantInt *LCI = dyn_cast<ConstantInt>(LBO->getOperand(1))) 6377309124Sdim if (LBO->getOpcode() == Instruction::And && 6378309124Sdim LCI->getValue() == CI->getValue()) 6379309124Sdim if (const SCEVZeroExtendExpr *Z = 6380309124Sdim dyn_cast<SCEVZeroExtendExpr>(getSCEV(BO->LHS))) { 6381309124Sdim Type *UTy = BO->LHS->getType(); 6382309124Sdim const SCEV *Z0 = Z->getOperand(); 6383309124Sdim Type *Z0Ty = Z0->getType(); 6384309124Sdim unsigned Z0TySize = getTypeSizeInBits(Z0Ty); 6385194612Sed 6386309124Sdim // If C is a low-bits mask, the zero extend is serving to 6387309124Sdim // mask off the high bits. Complement the operand and 6388309124Sdim // re-apply the zext. 6389321369Sdim if (CI->getValue().isMask(Z0TySize)) 6390309124Sdim return getZeroExtendExpr(getNotSCEV(Z0), UTy); 6391194612Sed 6392309124Sdim // If C is a single bit, it may be in the sign-bit position 6393309124Sdim // before the zero-extend. In this case, represent the xor 6394309124Sdim // using an add, which is equivalent, and re-apply the zext. 6395309124Sdim APInt Trunc = CI->getValue().trunc(Z0TySize); 6396309124Sdim if (Trunc.zext(getTypeSizeInBits(UTy)) == CI->getValue() && 6397321369Sdim Trunc.isSignMask()) 6398309124Sdim return getZeroExtendExpr(getAddExpr(Z0, getConstant(Trunc)), 6399309124Sdim UTy); 6400309124Sdim } 6401309124Sdim } 6402309124Sdim break; 6403193323Sed 6404341825Sdim case Instruction::Shl: 6405341825Sdim // Turn shift left of a constant amount into a multiply. 6406341825Sdim if (ConstantInt *SA = dyn_cast<ConstantInt>(BO->RHS)) { 6407341825Sdim uint32_t BitWidth = cast<IntegerType>(SA->getType())->getBitWidth(); 6408207618Srdivacky 6409341825Sdim // If the shift count is not less than the bitwidth, the result of 6410341825Sdim // the shift is undefined. Don't try to analyze it, because the 6411341825Sdim // resolution chosen here may differ from the resolution chosen in 6412341825Sdim // other parts of the compiler. 6413341825Sdim if (SA->getValue().uge(BitWidth)) 6414341825Sdim break; 6415207618Srdivacky 6416341825Sdim // It is currently not resolved how to interpret NSW for left 6417341825Sdim // shift by BitWidth - 1, so we avoid applying flags in that 6418341825Sdim // case. Remove this check (or this comment) once the situation 6419341825Sdim // is resolved. See 6420341825Sdim // http://lists.llvm.org/pipermail/llvm-dev/2015-April/084195.html 6421341825Sdim // and http://reviews.llvm.org/D8890 . 6422341825Sdim auto Flags = SCEV::FlagAnyWrap; 6423341825Sdim if (BO->Op && SA->getValue().ult(BitWidth - 1)) 6424341825Sdim Flags = getNoWrapFlagsFromUB(BO->Op); 6425296417Sdim 6426341825Sdim Constant *X = ConstantInt::get( 6427341825Sdim getContext(), APInt::getOneBitSet(BitWidth, SA->getZExtValue())); 6428341825Sdim return getMulExpr(getSCEV(BO->LHS), getSCEV(X), Flags); 6429341825Sdim } 6430341825Sdim break; 6431193323Sed 6432327952Sdim case Instruction::AShr: { 6433321369Sdim // AShr X, C, where C is a constant. 6434321369Sdim ConstantInt *CI = dyn_cast<ConstantInt>(BO->RHS); 6435321369Sdim if (!CI) 6436321369Sdim break; 6437207618Srdivacky 6438321369Sdim Type *OuterTy = BO->LHS->getType(); 6439321369Sdim uint64_t BitWidth = getTypeSizeInBits(OuterTy); 6440321369Sdim // If the shift count is not less than the bitwidth, the result of 6441321369Sdim // the shift is undefined. Don't try to analyze it, because the 6442321369Sdim // resolution chosen here may differ from the resolution chosen in 6443321369Sdim // other parts of the compiler. 6444321369Sdim if (CI->getValue().uge(BitWidth)) 6445321369Sdim break; 6446207618Srdivacky 6447321369Sdim if (CI->isZero()) 6448321369Sdim return getSCEV(BO->LHS); // shift by zero --> noop 6449321369Sdim 6450321369Sdim uint64_t AShrAmt = CI->getZExtValue(); 6451321369Sdim Type *TruncTy = IntegerType::get(getContext(), BitWidth - AShrAmt); 6452321369Sdim 6453321369Sdim Operator *L = dyn_cast<Operator>(BO->LHS); 6454321369Sdim if (L && L->getOpcode() == Instruction::Shl) { 6455321369Sdim // X = Shl A, n 6456321369Sdim // Y = AShr X, m 6457321369Sdim // Both n and m are constant. 6458321369Sdim 6459321369Sdim const SCEV *ShlOp0SCEV = getSCEV(L->getOperand(0)); 6460321369Sdim if (L->getOperand(1) == BO->RHS) 6461321369Sdim // For a two-shift sext-inreg, i.e. n = m, 6462321369Sdim // use sext(trunc(x)) as the SCEV expression. 6463321369Sdim return getSignExtendExpr( 6464321369Sdim getTruncateExpr(ShlOp0SCEV, TruncTy), OuterTy); 6465321369Sdim 6466321369Sdim ConstantInt *ShlAmtCI = dyn_cast<ConstantInt>(L->getOperand(1)); 6467321369Sdim if (ShlAmtCI && ShlAmtCI->getValue().ult(BitWidth)) { 6468321369Sdim uint64_t ShlAmt = ShlAmtCI->getZExtValue(); 6469321369Sdim if (ShlAmt > AShrAmt) { 6470321369Sdim // When n > m, use sext(mul(trunc(x), 2^(n-m)))) as the SCEV 6471321369Sdim // expression. We already checked that ShlAmt < BitWidth, so 6472321369Sdim // the multiplier, 1 << (ShlAmt - AShrAmt), fits into TruncTy as 6473321369Sdim // ShlAmt - AShrAmt < Amt. 6474321369Sdim APInt Mul = APInt::getOneBitSet(BitWidth - AShrAmt, 6475321369Sdim ShlAmt - AShrAmt); 6476309124Sdim return getSignExtendExpr( 6477321369Sdim getMulExpr(getTruncateExpr(ShlOp0SCEV, TruncTy), 6478321369Sdim getConstant(Mul)), OuterTy); 6479309124Sdim } 6480321369Sdim } 6481321369Sdim } 6482309124Sdim break; 6483193323Sed } 6484327952Sdim } 6485309124Sdim } 6486193323Sed 6487309124Sdim switch (U->getOpcode()) { 6488193323Sed case Instruction::Trunc: 6489193323Sed return getTruncateExpr(getSCEV(U->getOperand(0)), U->getType()); 6490193323Sed 6491193323Sed case Instruction::ZExt: 6492193323Sed return getZeroExtendExpr(getSCEV(U->getOperand(0)), U->getType()); 6493193323Sed 6494193323Sed case Instruction::SExt: 6495327952Sdim if (auto BO = MatchBinaryOp(U->getOperand(0), DT)) { 6496327952Sdim // The NSW flag of a subtract does not always survive the conversion to 6497327952Sdim // A + (-1)*B. By pushing sign extension onto its operands we are much 6498327952Sdim // more likely to preserve NSW and allow later AddRec optimisations. 6499327952Sdim // 6500327952Sdim // NOTE: This is effectively duplicating this logic from getSignExtend: 6501327952Sdim // sext((A + B + ...)<nsw>) --> (sext(A) + sext(B) + ...)<nsw> 6502327952Sdim // but by that point the NSW information has potentially been lost. 6503327952Sdim if (BO->Opcode == Instruction::Sub && BO->IsNSW) { 6504327952Sdim Type *Ty = U->getType(); 6505327952Sdim auto *V1 = getSignExtendExpr(getSCEV(BO->LHS), Ty); 6506327952Sdim auto *V2 = getSignExtendExpr(getSCEV(BO->RHS), Ty); 6507327952Sdim return getMinusSCEV(V1, V2, SCEV::FlagNSW); 6508327952Sdim } 6509327952Sdim } 6510193323Sed return getSignExtendExpr(getSCEV(U->getOperand(0)), U->getType()); 6511193323Sed 6512193323Sed case Instruction::BitCast: 6513193323Sed // BitCasts are no-op casts so we just eliminate the cast. 6514193323Sed if (isSCEVable(U->getType()) && isSCEVable(U->getOperand(0)->getType())) 6515193323Sed return getSCEV(U->getOperand(0)); 6516193323Sed break; 6517193323Sed 6518203954Srdivacky // It's tempting to handle inttoptr and ptrtoint as no-ops, however this can 6519203954Srdivacky // lead to pointer expressions which cannot safely be expanded to GEPs, 6520203954Srdivacky // because ScalarEvolution doesn't respect the GEP aliasing rules when 6521203954Srdivacky // simplifying integer expressions. 6522193323Sed 6523193323Sed case Instruction::GetElementPtr: 6524201360Srdivacky return createNodeForGEP(cast<GEPOperator>(U)); 6525193323Sed 6526193323Sed case Instruction::PHI: 6527193323Sed return createNodeForPHI(cast<PHINode>(U)); 6528193323Sed 6529193323Sed case Instruction::Select: 6530296417Sdim // U can also be a select constant expr, which let fall through. Since 6531296417Sdim // createNodeForSelect only works for a condition that is an `ICmpInst`, and 6532296417Sdim // constant expressions cannot have instructions as operands, we'd have 6533296417Sdim // returned getUnknown for a select constant expressions anyway. 6534296417Sdim if (isa<Instruction>(U)) 6535296417Sdim return createNodeForSelectOrPHI(cast<Instruction>(U), U->getOperand(0), 6536296417Sdim U->getOperand(1), U->getOperand(2)); 6537309124Sdim break; 6538193323Sed 6539309124Sdim case Instruction::Call: 6540309124Sdim case Instruction::Invoke: 6541309124Sdim if (Value *RV = CallSite(U).getReturnedArgOperand()) 6542309124Sdim return getSCEV(RV); 6543193323Sed break; 6544193323Sed } 6545193323Sed 6546193323Sed return getUnknown(V); 6547193323Sed} 6548193323Sed 6549193323Sed//===----------------------------------------------------------------------===// 6550193323Sed// Iteration Count Computation Code 6551193323Sed// 6552193323Sed 6553314564Sdimstatic unsigned getConstantTripCount(const SCEVConstant *ExitCount) { 6554314564Sdim if (!ExitCount) 6555314564Sdim return 0; 6556314564Sdim 6557314564Sdim ConstantInt *ExitConst = ExitCount->getValue(); 6558314564Sdim 6559314564Sdim // Guard against huge trip counts. 6560314564Sdim if (ExitConst->getValue().getActiveBits() > 32) 6561314564Sdim return 0; 6562314564Sdim 6563314564Sdim // In case of integer overflow, this returns 0, which is correct. 6564314564Sdim return ((unsigned)ExitConst->getZExtValue()) + 1; 6565314564Sdim} 6566314564Sdim 6567321369Sdimunsigned ScalarEvolution::getSmallConstantTripCount(const Loop *L) { 6568280031Sdim if (BasicBlock *ExitingBB = L->getExitingBlock()) 6569280031Sdim return getSmallConstantTripCount(L, ExitingBB); 6570280031Sdim 6571280031Sdim // No trip count information for multiple exits. 6572280031Sdim return 0; 6573280031Sdim} 6574280031Sdim 6575321369Sdimunsigned ScalarEvolution::getSmallConstantTripCount(const Loop *L, 6576280031Sdim BasicBlock *ExitingBlock) { 6577280031Sdim assert(ExitingBlock && "Must pass a non-null exiting block!"); 6578280031Sdim assert(L->isLoopExiting(ExitingBlock) && 6579280031Sdim "Exiting block must actually branch out of the loop!"); 6580226633Sdim const SCEVConstant *ExitCount = 6581280031Sdim dyn_cast<SCEVConstant>(getExitCount(L, ExitingBlock)); 6582314564Sdim return getConstantTripCount(ExitCount); 6583314564Sdim} 6584226633Sdim 6585321369Sdimunsigned ScalarEvolution::getSmallConstantMaxTripCount(const Loop *L) { 6586314564Sdim const auto *MaxExitCount = 6587360784Sdim dyn_cast<SCEVConstant>(getConstantMaxBackedgeTakenCount(L)); 6588314564Sdim return getConstantTripCount(MaxExitCount); 6589226633Sdim} 6590226633Sdim 6591321369Sdimunsigned ScalarEvolution::getSmallConstantTripMultiple(const Loop *L) { 6592280031Sdim if (BasicBlock *ExitingBB = L->getExitingBlock()) 6593280031Sdim return getSmallConstantTripMultiple(L, ExitingBB); 6594280031Sdim 6595280031Sdim // No trip multiple information for multiple exits. 6596280031Sdim return 0; 6597280031Sdim} 6598280031Sdim 6599309124Sdim/// Returns the largest constant divisor of the trip count of this loop as a 6600309124Sdim/// normal unsigned value, if possible. This means that the actual trip count is 6601309124Sdim/// always a multiple of the returned value (don't forget the trip count could 6602309124Sdim/// very well be zero as well!). 6603226633Sdim/// 6604226633Sdim/// Returns 1 if the trip count is unknown or not guaranteed to be the 6605226633Sdim/// multiple of a constant (which is also the case if the trip count is simply 6606226633Sdim/// constant, use getSmallConstantTripCount for that case), Will also return 1 6607226633Sdim/// if the trip count is very large (>= 2^32). 6608234353Sdim/// 6609234353Sdim/// As explained in the comments for getSmallConstantTripCount, this assumes 6610234353Sdim/// that control exits the loop via ExitingBlock. 6611280031Sdimunsigned 6612321369SdimScalarEvolution::getSmallConstantTripMultiple(const Loop *L, 6613280031Sdim BasicBlock *ExitingBlock) { 6614280031Sdim assert(ExitingBlock && "Must pass a non-null exiting block!"); 6615280031Sdim assert(L->isLoopExiting(ExitingBlock) && 6616280031Sdim "Exiting block must actually branch out of the loop!"); 6617280031Sdim const SCEV *ExitCount = getExitCount(L, ExitingBlock); 6618226633Sdim if (ExitCount == getCouldNotCompute()) 6619226633Sdim return 1; 6620226633Sdim 6621226633Sdim // Get the trip count from the BE count by adding 1. 6622321369Sdim const SCEV *TCExpr = getAddExpr(ExitCount, getOne(ExitCount->getType())); 6623226633Sdim 6624321369Sdim const SCEVConstant *TC = dyn_cast<SCEVConstant>(TCExpr); 6625321369Sdim if (!TC) 6626321369Sdim // Attempt to factor more general cases. Returns the greatest power of 6627321369Sdim // two divisor. If overflow happens, the trip count expression is still 6628321369Sdim // divisible by the greatest power of 2 divisor returned. 6629321369Sdim return 1U << std::min((uint32_t)31, GetMinTrailingZeros(TCExpr)); 6630226633Sdim 6631321369Sdim ConstantInt *Result = TC->getValue(); 6632226633Sdim 6633243830Sdim // Guard against huge trip counts (this requires checking 6634243830Sdim // for zero to handle the case where the trip count == -1 and the 6635243830Sdim // addition wraps). 6636243830Sdim if (!Result || Result->getValue().getActiveBits() > 32 || 6637243830Sdim Result->getValue().getActiveBits() == 0) 6638226633Sdim return 1; 6639226633Sdim 6640226633Sdim return (unsigned)Result->getZExtValue(); 6641226633Sdim} 6642226633Sdim 6643321369Sdimconst SCEV *ScalarEvolution::getExitCount(const Loop *L, 6644360784Sdim BasicBlock *ExitingBlock, 6645360784Sdim ExitCountKind Kind) { 6646360784Sdim switch (Kind) { 6647360784Sdim case Exact: 6648360784Sdim return getBackedgeTakenInfo(L).getExact(ExitingBlock, this); 6649360784Sdim case ConstantMaximum: 6650360784Sdim return getBackedgeTakenInfo(L).getMax(ExitingBlock, this); 6651360784Sdim }; 6652360784Sdim llvm_unreachable("Invalid ExitCountKind!"); 6653226633Sdim} 6654226633Sdim 6655309124Sdimconst SCEV * 6656309124SdimScalarEvolution::getPredicatedBackedgeTakenCount(const Loop *L, 6657309124Sdim SCEVUnionPredicate &Preds) { 6658341825Sdim return getPredicatedBackedgeTakenInfo(L).getExact(L, this, &Preds); 6659309124Sdim} 6660309124Sdim 6661360784Sdimconst SCEV *ScalarEvolution::getBackedgeTakenCount(const Loop *L, 6662360784Sdim ExitCountKind Kind) { 6663360784Sdim switch (Kind) { 6664360784Sdim case Exact: 6665360784Sdim return getBackedgeTakenInfo(L).getExact(L, this); 6666360784Sdim case ConstantMaximum: 6667360784Sdim return getBackedgeTakenInfo(L).getMax(this); 6668360784Sdim }; 6669360784Sdim llvm_unreachable("Invalid ExitCountKind!"); 6670193323Sed} 6671193323Sed 6672314564Sdimbool ScalarEvolution::isBackedgeTakenCountMaxOrZero(const Loop *L) { 6673314564Sdim return getBackedgeTakenInfo(L).isMaxOrZero(this); 6674314564Sdim} 6675314564Sdim 6676309124Sdim/// Push PHI nodes in the header of the given loop onto the given Worklist. 6677198090Srdivackystatic void 6678198090SrdivackyPushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) { 6679198090Srdivacky BasicBlock *Header = L->getHeader(); 6680198090Srdivacky 6681198090Srdivacky // Push all Loop-header PHIs onto the Worklist stack. 6682327952Sdim for (PHINode &PN : Header->phis()) 6683327952Sdim Worklist.push_back(&PN); 6684198090Srdivacky} 6685198090Srdivacky 6686193323Sedconst ScalarEvolution::BackedgeTakenInfo & 6687309124SdimScalarEvolution::getPredicatedBackedgeTakenInfo(const Loop *L) { 6688309124Sdim auto &BTI = getBackedgeTakenInfo(L); 6689309124Sdim if (BTI.hasFullInfo()) 6690309124Sdim return BTI; 6691309124Sdim 6692309124Sdim auto Pair = PredicatedBackedgeTakenCounts.insert({L, BackedgeTakenInfo()}); 6693309124Sdim 6694309124Sdim if (!Pair.second) 6695309124Sdim return Pair.first->second; 6696309124Sdim 6697309124Sdim BackedgeTakenInfo Result = 6698309124Sdim computeBackedgeTakenCount(L, /*AllowPredicates=*/true); 6699309124Sdim 6700314564Sdim return PredicatedBackedgeTakenCounts.find(L)->second = std::move(Result); 6701309124Sdim} 6702309124Sdim 6703309124Sdimconst ScalarEvolution::BackedgeTakenInfo & 6704193323SedScalarEvolution::getBackedgeTakenInfo(const Loop *L) { 6705226633Sdim // Initially insert an invalid entry for this loop. If the insertion 6706204642Srdivacky // succeeds, proceed to actually compute a backedge-taken count and 6707193323Sed // update the value. The temporary CouldNotCompute value tells SCEV 6708193323Sed // code elsewhere that it shouldn't attempt to request a new 6709193323Sed // backedge-taken count, which could result in infinite recursion. 6710223017Sdim std::pair<DenseMap<const Loop *, BackedgeTakenInfo>::iterator, bool> Pair = 6711309124Sdim BackedgeTakenCounts.insert({L, BackedgeTakenInfo()}); 6712218893Sdim if (!Pair.second) 6713218893Sdim return Pair.first->second; 6714193323Sed 6715296417Sdim // computeBackedgeTakenCount may allocate memory for its result. Inserting it 6716226633Sdim // into the BackedgeTakenCounts map transfers ownership. Otherwise, the result 6717226633Sdim // must be cleared in this scope. 6718296417Sdim BackedgeTakenInfo Result = computeBackedgeTakenCount(L); 6719226633Sdim 6720341825Sdim // In product build, there are no usage of statistic. 6721341825Sdim (void)NumTripCountsComputed; 6722341825Sdim (void)NumTripCountsNotComputed; 6723341825Sdim#if LLVM_ENABLE_STATS || !defined(NDEBUG) 6724341825Sdim const SCEV *BEExact = Result.getExact(L, this); 6725341825Sdim if (BEExact != getCouldNotCompute()) { 6726341825Sdim assert(isLoopInvariant(BEExact, L) && 6727226633Sdim isLoopInvariant(Result.getMax(this), L) && 6728218893Sdim "Computed backedge-taken count isn't loop invariant for loop!"); 6729218893Sdim ++NumTripCountsComputed; 6730218893Sdim } 6731226633Sdim else if (Result.getMax(this) == getCouldNotCompute() && 6732226633Sdim isa<PHINode>(L->getHeader()->begin())) { 6733226633Sdim // Only count loops that have phi nodes as not being computable. 6734226633Sdim ++NumTripCountsNotComputed; 6735226633Sdim } 6736341825Sdim#endif // LLVM_ENABLE_STATS || !defined(NDEBUG) 6737193323Sed 6738218893Sdim // Now that we know more about the trip count for this loop, forget any 6739218893Sdim // existing SCEV values for PHI nodes in this loop since they are only 6740218893Sdim // conservative estimates made without the benefit of trip count 6741218893Sdim // information. This is similar to the code in forgetLoop, except that 6742218893Sdim // it handles SCEVUnknown PHI nodes specially. 6743226633Sdim if (Result.hasAnyInfo()) { 6744218893Sdim SmallVector<Instruction *, 16> Worklist; 6745218893Sdim PushLoopPHIs(L, Worklist); 6746198090Srdivacky 6747327952Sdim SmallPtrSet<Instruction *, 8> Discovered; 6748218893Sdim while (!Worklist.empty()) { 6749218893Sdim Instruction *I = Worklist.pop_back_val(); 6750198090Srdivacky 6751218893Sdim ValueExprMapType::iterator It = 6752239462Sdim ValueExprMap.find_as(static_cast<Value *>(I)); 6753218893Sdim if (It != ValueExprMap.end()) { 6754218893Sdim const SCEV *Old = It->second; 6755218893Sdim 6756218893Sdim // SCEVUnknown for a PHI either means that it has an unrecognized 6757218893Sdim // structure, or it's a PHI that's in the progress of being computed 6758218893Sdim // by createNodeForPHI. In the former case, additional loop trip 6759218893Sdim // count information isn't going to change anything. In the later 6760218893Sdim // case, createNodeForPHI will perform the necessary updates on its 6761218893Sdim // own when it gets to that point. 6762218893Sdim if (!isa<PHINode>(I) || !isa<SCEVUnknown>(Old)) { 6763312832Sdim eraseValueFromMap(It->first); 6764218893Sdim forgetMemoizedResults(Old); 6765198090Srdivacky } 6766218893Sdim if (PHINode *PN = dyn_cast<PHINode>(I)) 6767218893Sdim ConstantEvolutionLoopExitValue.erase(PN); 6768218893Sdim } 6769198090Srdivacky 6770327952Sdim // Since we don't need to invalidate anything for correctness and we're 6771327952Sdim // only invalidating to make SCEV's results more precise, we get to stop 6772327952Sdim // early to avoid invalidating too much. This is especially important in 6773327952Sdim // cases like: 6774327952Sdim // 6775327952Sdim // %v = f(pn0, pn1) // pn0 and pn1 used through some other phi node 6776327952Sdim // loop0: 6777327952Sdim // %pn0 = phi 6778327952Sdim // ... 6779327952Sdim // loop1: 6780327952Sdim // %pn1 = phi 6781327952Sdim // ... 6782327952Sdim // 6783327952Sdim // where both loop0 and loop1's backedge taken count uses the SCEV 6784327952Sdim // expression for %v. If we don't have the early stop below then in cases 6785327952Sdim // like the above, getBackedgeTakenInfo(loop1) will clear out the trip 6786327952Sdim // count for loop0 and getBackedgeTakenInfo(loop0) will clear out the trip 6787327952Sdim // count for loop1, effectively nullifying SCEV's trip count cache. 6788327952Sdim for (auto *U : I->users()) 6789327952Sdim if (auto *I = dyn_cast<Instruction>(U)) { 6790327952Sdim auto *LoopForUser = LI.getLoopFor(I->getParent()); 6791327952Sdim if (LoopForUser && L->contains(LoopForUser) && 6792327952Sdim Discovered.insert(I).second) 6793327952Sdim Worklist.push_back(I); 6794327952Sdim } 6795198090Srdivacky } 6796193323Sed } 6797221345Sdim 6798221345Sdim // Re-lookup the insert position, since the call to 6799296417Sdim // computeBackedgeTakenCount above could result in a 6800221345Sdim // recusive call to getBackedgeTakenInfo (on a different 6801221345Sdim // loop), which would invalidate the iterator computed 6802221345Sdim // earlier. 6803314564Sdim return BackedgeTakenCounts.find(L)->second = std::move(Result); 6804193323Sed} 6805193323Sed 6806353358Sdimvoid ScalarEvolution::forgetAllLoops() { 6807353358Sdim // This method is intended to forget all info about loops. It should 6808353358Sdim // invalidate caches as if the following happened: 6809353358Sdim // - The trip counts of all loops have changed arbitrarily 6810353358Sdim // - Every llvm::Value has been updated in place to produce a different 6811353358Sdim // result. 6812353358Sdim BackedgeTakenCounts.clear(); 6813353358Sdim PredicatedBackedgeTakenCounts.clear(); 6814353358Sdim LoopPropertiesCache.clear(); 6815353358Sdim ConstantEvolutionLoopExitValue.clear(); 6816353358Sdim ValueExprMap.clear(); 6817353358Sdim ValuesAtScopes.clear(); 6818353358Sdim LoopDispositions.clear(); 6819353358Sdim BlockDispositions.clear(); 6820353358Sdim UnsignedRanges.clear(); 6821353358Sdim SignedRanges.clear(); 6822353358Sdim ExprValueMap.clear(); 6823353358Sdim HasRecMap.clear(); 6824353358Sdim MinTrailingZerosCache.clear(); 6825353358Sdim PredicatedSCEVRewrites.clear(); 6826353358Sdim} 6827353358Sdim 6828198892Srdivackyvoid ScalarEvolution::forgetLoop(const Loop *L) { 6829198892Srdivacky // Drop any stored trip count value. 6830309124Sdim auto RemoveLoopFromBackedgeMap = 6831327952Sdim [](DenseMap<const Loop *, BackedgeTakenInfo> &Map, const Loop *L) { 6832309124Sdim auto BTCPos = Map.find(L); 6833309124Sdim if (BTCPos != Map.end()) { 6834309124Sdim BTCPos->second.clear(); 6835309124Sdim Map.erase(BTCPos); 6836309124Sdim } 6837309124Sdim }; 6838193323Sed 6839327952Sdim SmallVector<const Loop *, 16> LoopWorklist(1, L); 6840327952Sdim SmallVector<Instruction *, 32> Worklist; 6841327952Sdim SmallPtrSet<Instruction *, 16> Visited; 6842309124Sdim 6843327952Sdim // Iterate over all the loops and sub-loops to drop SCEV information. 6844327952Sdim while (!LoopWorklist.empty()) { 6845327952Sdim auto *CurrL = LoopWorklist.pop_back_val(); 6846321369Sdim 6847327952Sdim RemoveLoopFromBackedgeMap(BackedgeTakenCounts, CurrL); 6848327952Sdim RemoveLoopFromBackedgeMap(PredicatedBackedgeTakenCounts, CurrL); 6849193323Sed 6850327952Sdim // Drop information about predicated SCEV rewrites for this loop. 6851327952Sdim for (auto I = PredicatedSCEVRewrites.begin(); 6852327952Sdim I != PredicatedSCEVRewrites.end();) { 6853327952Sdim std::pair<const SCEV *, const Loop *> Entry = I->first; 6854327952Sdim if (Entry.second == CurrL) 6855327952Sdim PredicatedSCEVRewrites.erase(I++); 6856327952Sdim else 6857327952Sdim ++I; 6858327952Sdim } 6859198090Srdivacky 6860327952Sdim auto LoopUsersItr = LoopUsers.find(CurrL); 6861327952Sdim if (LoopUsersItr != LoopUsers.end()) { 6862327952Sdim for (auto *S : LoopUsersItr->second) 6863327952Sdim forgetMemoizedResults(S); 6864327952Sdim LoopUsers.erase(LoopUsersItr); 6865198090Srdivacky } 6866198090Srdivacky 6867327952Sdim // Drop information about expressions based on loop-header PHIs. 6868327952Sdim PushLoopPHIs(CurrL, Worklist); 6869218893Sdim 6870327952Sdim while (!Worklist.empty()) { 6871327952Sdim Instruction *I = Worklist.pop_back_val(); 6872327952Sdim if (!Visited.insert(I).second) 6873327952Sdim continue; 6874309124Sdim 6875327952Sdim ValueExprMapType::iterator It = 6876327952Sdim ValueExprMap.find_as(static_cast<Value *>(I)); 6877327952Sdim if (It != ValueExprMap.end()) { 6878327952Sdim eraseValueFromMap(It->first); 6879327952Sdim forgetMemoizedResults(It->second); 6880327952Sdim if (PHINode *PN = dyn_cast<PHINode>(I)) 6881327952Sdim ConstantEvolutionLoopExitValue.erase(PN); 6882327952Sdim } 6883327952Sdim 6884327952Sdim PushDefUseChildren(I, Worklist); 6885327952Sdim } 6886327952Sdim 6887327952Sdim LoopPropertiesCache.erase(CurrL); 6888327952Sdim // Forget all contained loops too, to avoid dangling entries in the 6889327952Sdim // ValuesAtScopes map. 6890327952Sdim LoopWorklist.append(CurrL->begin(), CurrL->end()); 6891327952Sdim } 6892193323Sed} 6893193323Sed 6894341825Sdimvoid ScalarEvolution::forgetTopmostLoop(const Loop *L) { 6895341825Sdim while (Loop *Parent = L->getParentLoop()) 6896341825Sdim L = Parent; 6897341825Sdim forgetLoop(L); 6898341825Sdim} 6899341825Sdim 6900204642Srdivackyvoid ScalarEvolution::forgetValue(Value *V) { 6901204642Srdivacky Instruction *I = dyn_cast<Instruction>(V); 6902204642Srdivacky if (!I) return; 6903204642Srdivacky 6904204642Srdivacky // Drop information about expressions based on loop-header PHIs. 6905204642Srdivacky SmallVector<Instruction *, 16> Worklist; 6906204642Srdivacky Worklist.push_back(I); 6907204642Srdivacky 6908204642Srdivacky SmallPtrSet<Instruction *, 8> Visited; 6909204642Srdivacky while (!Worklist.empty()) { 6910204642Srdivacky I = Worklist.pop_back_val(); 6911280031Sdim if (!Visited.insert(I).second) 6912280031Sdim continue; 6913204642Srdivacky 6914239462Sdim ValueExprMapType::iterator It = 6915239462Sdim ValueExprMap.find_as(static_cast<Value *>(I)); 6916212904Sdim if (It != ValueExprMap.end()) { 6917312832Sdim eraseValueFromMap(It->first); 6918218893Sdim forgetMemoizedResults(It->second); 6919204642Srdivacky if (PHINode *PN = dyn_cast<PHINode>(I)) 6920204642Srdivacky ConstantEvolutionLoopExitValue.erase(PN); 6921204642Srdivacky } 6922204642Srdivacky 6923204642Srdivacky PushDefUseChildren(I, Worklist); 6924204642Srdivacky } 6925204642Srdivacky} 6926204642Srdivacky 6927309124Sdim/// Get the exact loop backedge taken count considering all loop exits. A 6928341825Sdim/// computable result can only be returned for loops with all exiting blocks 6929341825Sdim/// dominating the latch. howFarToZero assumes that the limit of each loop test 6930341825Sdim/// is never skipped. This is a valid assumption as long as the loop exits via 6931341825Sdim/// that test. For precise results, it is the caller's responsibility to specify 6932341825Sdim/// the relevant loop exiting block using getExact(ExitingBlock, SE). 6933226633Sdimconst SCEV * 6934341825SdimScalarEvolution::BackedgeTakenInfo::getExact(const Loop *L, ScalarEvolution *SE, 6935314564Sdim SCEVUnionPredicate *Preds) const { 6936226633Sdim // If any exits were not computable, the loop is not computable. 6937314564Sdim if (!isComplete() || ExitNotTaken.empty()) 6938314564Sdim return SE->getCouldNotCompute(); 6939226633Sdim 6940341825Sdim const BasicBlock *Latch = L->getLoopLatch(); 6941341825Sdim // All exiting blocks we have collected must dominate the only backedge. 6942341825Sdim if (!Latch) 6943341825Sdim return SE->getCouldNotCompute(); 6944341825Sdim 6945341825Sdim // All exiting blocks we have gathered dominate loop's latch, so exact trip 6946341825Sdim // count is simply a minimum out of all these calculated exit counts. 6947341825Sdim SmallVector<const SCEV *, 2> Ops; 6948309124Sdim for (auto &ENT : ExitNotTaken) { 6949341825Sdim const SCEV *BECount = ENT.ExactNotTaken; 6950341825Sdim assert(BECount != SE->getCouldNotCompute() && "Bad exit SCEV!"); 6951341825Sdim assert(SE->DT.dominates(ENT.ExitingBlock, Latch) && 6952341825Sdim "We should only have known counts for exiting blocks that dominate " 6953341825Sdim "latch!"); 6954226633Sdim 6955341825Sdim Ops.push_back(BECount); 6956341825Sdim 6957314564Sdim if (Preds && !ENT.hasAlwaysTruePredicate()) 6958314564Sdim Preds->add(ENT.Predicate.get()); 6959309124Sdim 6960314564Sdim assert((Preds || ENT.hasAlwaysTruePredicate()) && 6961309124Sdim "Predicate should be always true!"); 6962226633Sdim } 6963309124Sdim 6964341825Sdim return SE->getUMinFromMismatchedTypes(Ops); 6965226633Sdim} 6966226633Sdim 6967309124Sdim/// Get the exact not taken count for this loop exit. 6968226633Sdimconst SCEV * 6969226633SdimScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock, 6970226633Sdim ScalarEvolution *SE) const { 6971309124Sdim for (auto &ENT : ExitNotTaken) 6972314564Sdim if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePredicate()) 6973309124Sdim return ENT.ExactNotTaken; 6974226633Sdim 6975226633Sdim return SE->getCouldNotCompute(); 6976226633Sdim} 6977226633Sdim 6978360784Sdimconst SCEV * 6979360784SdimScalarEvolution::BackedgeTakenInfo::getMax(BasicBlock *ExitingBlock, 6980360784Sdim ScalarEvolution *SE) const { 6981360784Sdim for (auto &ENT : ExitNotTaken) 6982360784Sdim if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePredicate()) 6983360784Sdim return ENT.MaxNotTaken; 6984360784Sdim 6985360784Sdim return SE->getCouldNotCompute(); 6986360784Sdim} 6987360784Sdim 6988226633Sdim/// getMax - Get the max backedge taken count for the loop. 6989226633Sdimconst SCEV * 6990226633SdimScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const { 6991314564Sdim auto PredicateNotAlwaysTrue = [](const ExitNotTakenInfo &ENT) { 6992314564Sdim return !ENT.hasAlwaysTruePredicate(); 6993314564Sdim }; 6994309124Sdim 6995314564Sdim if (any_of(ExitNotTaken, PredicateNotAlwaysTrue) || !getMax()) 6996314564Sdim return SE->getCouldNotCompute(); 6997314564Sdim 6998321369Sdim assert((isa<SCEVCouldNotCompute>(getMax()) || isa<SCEVConstant>(getMax())) && 6999321369Sdim "No point in having a non-constant max backedge taken count!"); 7000314564Sdim return getMax(); 7001226633Sdim} 7002226633Sdim 7003314564Sdimbool ScalarEvolution::BackedgeTakenInfo::isMaxOrZero(ScalarEvolution *SE) const { 7004314564Sdim auto PredicateNotAlwaysTrue = [](const ExitNotTakenInfo &ENT) { 7005314564Sdim return !ENT.hasAlwaysTruePredicate(); 7006314564Sdim }; 7007314564Sdim return MaxOrZero && !any_of(ExitNotTaken, PredicateNotAlwaysTrue); 7008314564Sdim} 7009314564Sdim 7010249423Sdimbool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S, 7011249423Sdim ScalarEvolution *SE) const { 7012314564Sdim if (getMax() && getMax() != SE->getCouldNotCompute() && 7013314564Sdim SE->hasOperand(getMax(), S)) 7014249423Sdim return true; 7015249423Sdim 7016309124Sdim for (auto &ENT : ExitNotTaken) 7017309124Sdim if (ENT.ExactNotTaken != SE->getCouldNotCompute() && 7018309124Sdim SE->hasOperand(ENT.ExactNotTaken, S)) 7019309124Sdim return true; 7020249423Sdim 7021249423Sdim return false; 7022249423Sdim} 7023249423Sdim 7024321369SdimScalarEvolution::ExitLimit::ExitLimit(const SCEV *E) 7025327952Sdim : ExactNotTaken(E), MaxNotTaken(E) { 7026321369Sdim assert((isa<SCEVCouldNotCompute>(MaxNotTaken) || 7027321369Sdim isa<SCEVConstant>(MaxNotTaken)) && 7028321369Sdim "No point in having a non-constant max backedge taken count!"); 7029321369Sdim} 7030321369Sdim 7031321369SdimScalarEvolution::ExitLimit::ExitLimit( 7032321369Sdim const SCEV *E, const SCEV *M, bool MaxOrZero, 7033321369Sdim ArrayRef<const SmallPtrSetImpl<const SCEVPredicate *> *> PredSetList) 7034321369Sdim : ExactNotTaken(E), MaxNotTaken(M), MaxOrZero(MaxOrZero) { 7035321369Sdim assert((isa<SCEVCouldNotCompute>(ExactNotTaken) || 7036321369Sdim !isa<SCEVCouldNotCompute>(MaxNotTaken)) && 7037321369Sdim "Exact is not allowed to be less precise than Max"); 7038321369Sdim assert((isa<SCEVCouldNotCompute>(MaxNotTaken) || 7039321369Sdim isa<SCEVConstant>(MaxNotTaken)) && 7040321369Sdim "No point in having a non-constant max backedge taken count!"); 7041321369Sdim for (auto *PredSet : PredSetList) 7042321369Sdim for (auto *P : *PredSet) 7043321369Sdim addPredicate(P); 7044321369Sdim} 7045321369Sdim 7046321369SdimScalarEvolution::ExitLimit::ExitLimit( 7047321369Sdim const SCEV *E, const SCEV *M, bool MaxOrZero, 7048321369Sdim const SmallPtrSetImpl<const SCEVPredicate *> &PredSet) 7049321369Sdim : ExitLimit(E, M, MaxOrZero, {&PredSet}) { 7050321369Sdim assert((isa<SCEVCouldNotCompute>(MaxNotTaken) || 7051321369Sdim isa<SCEVConstant>(MaxNotTaken)) && 7052321369Sdim "No point in having a non-constant max backedge taken count!"); 7053321369Sdim} 7054321369Sdim 7055321369SdimScalarEvolution::ExitLimit::ExitLimit(const SCEV *E, const SCEV *M, 7056321369Sdim bool MaxOrZero) 7057321369Sdim : ExitLimit(E, M, MaxOrZero, None) { 7058321369Sdim assert((isa<SCEVCouldNotCompute>(MaxNotTaken) || 7059321369Sdim isa<SCEVConstant>(MaxNotTaken)) && 7060321369Sdim "No point in having a non-constant max backedge taken count!"); 7061321369Sdim} 7062321369Sdim 7063226633Sdim/// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each 7064226633Sdim/// computable exit into a persistent ExitNotTakenInfo array. 7065226633SdimScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo( 7066353358Sdim ArrayRef<ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo> 7067353358Sdim ExitCounts, 7068314564Sdim bool Complete, const SCEV *MaxCount, bool MaxOrZero) 7069314564Sdim : MaxAndComplete(MaxCount, Complete), MaxOrZero(MaxOrZero) { 7070327952Sdim using EdgeExitInfo = ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo; 7071327952Sdim 7072314564Sdim ExitNotTaken.reserve(ExitCounts.size()); 7073314564Sdim std::transform( 7074314564Sdim ExitCounts.begin(), ExitCounts.end(), std::back_inserter(ExitNotTaken), 7075314564Sdim [&](const EdgeExitInfo &EEI) { 7076314564Sdim BasicBlock *ExitBB = EEI.first; 7077314564Sdim const ExitLimit &EL = EEI.second; 7078314564Sdim if (EL.Predicates.empty()) 7079360784Sdim return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, EL.MaxNotTaken, 7080360784Sdim nullptr); 7081226633Sdim 7082314564Sdim std::unique_ptr<SCEVUnionPredicate> Predicate(new SCEVUnionPredicate); 7083314564Sdim for (auto *Pred : EL.Predicates) 7084314564Sdim Predicate->add(Pred); 7085226633Sdim 7086360784Sdim return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, EL.MaxNotTaken, 7087360784Sdim std::move(Predicate)); 7088314564Sdim }); 7089321369Sdim assert((isa<SCEVCouldNotCompute>(MaxCount) || isa<SCEVConstant>(MaxCount)) && 7090321369Sdim "No point in having a non-constant max backedge taken count!"); 7091226633Sdim} 7092226633Sdim 7093309124Sdim/// Invalidate this result and free the ExitNotTakenInfo array. 7094226633Sdimvoid ScalarEvolution::BackedgeTakenInfo::clear() { 7095314564Sdim ExitNotTaken.clear(); 7096226633Sdim} 7097226633Sdim 7098309124Sdim/// Compute the number of times the backedge of the specified loop will execute. 7099193323SedScalarEvolution::BackedgeTakenInfo 7100309124SdimScalarEvolution::computeBackedgeTakenCount(const Loop *L, 7101309124Sdim bool AllowPredicates) { 7102201360Srdivacky SmallVector<BasicBlock *, 8> ExitingBlocks; 7103194612Sed L->getExitingBlocks(ExitingBlocks); 7104193323Sed 7105327952Sdim using EdgeExitInfo = ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo; 7106314564Sdim 7107314564Sdim SmallVector<EdgeExitInfo, 4> ExitCounts; 7108226633Sdim bool CouldComputeBECount = true; 7109276479Sdim BasicBlock *Latch = L->getLoopLatch(); // may be NULL. 7110276479Sdim const SCEV *MustExitMaxBECount = nullptr; 7111276479Sdim const SCEV *MayExitMaxBECount = nullptr; 7112314564Sdim bool MustExitMaxOrZero = false; 7113276479Sdim 7114276479Sdim // Compute the ExitLimit for each loop exit. Use this to populate ExitCounts 7115276479Sdim // and compute maxBECount. 7116309124Sdim // Do a union of all the predicates here. 7117194612Sed for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { 7118276479Sdim BasicBlock *ExitBB = ExitingBlocks[i]; 7119360784Sdim 7120360784Sdim // We canonicalize untaken exits to br (constant), ignore them so that 7121360784Sdim // proving an exit untaken doesn't negatively impact our ability to reason 7122360784Sdim // about the loop as whole. 7123360784Sdim if (auto *BI = dyn_cast<BranchInst>(ExitBB->getTerminator())) 7124360784Sdim if (auto *CI = dyn_cast<ConstantInt>(BI->getCondition())) { 7125360784Sdim bool ExitIfTrue = !L->contains(BI->getSuccessor(0)); 7126360784Sdim if ((ExitIfTrue && CI->isZero()) || (!ExitIfTrue && CI->isOne())) 7127360784Sdim continue; 7128360784Sdim } 7129360784Sdim 7130309124Sdim ExitLimit EL = computeExitLimit(L, ExitBB, AllowPredicates); 7131276479Sdim 7132314564Sdim assert((AllowPredicates || EL.Predicates.empty()) && 7133309124Sdim "Predicated exit limit when predicates are not allowed!"); 7134309124Sdim 7135276479Sdim // 1. For each exit that can be computed, add an entry to ExitCounts. 7136276479Sdim // CouldComputeBECount is true only if all exits can be computed. 7137314564Sdim if (EL.ExactNotTaken == getCouldNotCompute()) 7138194612Sed // We couldn't compute an exact value for this exit, so 7139194710Sed // we won't be able to compute an exact value for the loop. 7140226633Sdim CouldComputeBECount = false; 7141226633Sdim else 7142314564Sdim ExitCounts.emplace_back(ExitBB, EL); 7143226633Sdim 7144276479Sdim // 2. Derive the loop's MaxBECount from each exit's max number of 7145276479Sdim // non-exiting iterations. Partition the loop exits into two kinds: 7146276479Sdim // LoopMustExits and LoopMayExits. 7147276479Sdim // 7148280031Sdim // If the exit dominates the loop latch, it is a LoopMustExit otherwise it 7149280031Sdim // is a LoopMayExit. If any computable LoopMustExit is found, then 7150314564Sdim // MaxBECount is the minimum EL.MaxNotTaken of computable 7151314564Sdim // LoopMustExits. Otherwise, MaxBECount is conservatively the maximum 7152314564Sdim // EL.MaxNotTaken, where CouldNotCompute is considered greater than any 7153314564Sdim // computable EL.MaxNotTaken. 7154314564Sdim if (EL.MaxNotTaken != getCouldNotCompute() && Latch && 7155296417Sdim DT.dominates(ExitBB, Latch)) { 7156314564Sdim if (!MustExitMaxBECount) { 7157314564Sdim MustExitMaxBECount = EL.MaxNotTaken; 7158314564Sdim MustExitMaxOrZero = EL.MaxOrZero; 7159314564Sdim } else { 7160276479Sdim MustExitMaxBECount = 7161314564Sdim getUMinFromMismatchedTypes(MustExitMaxBECount, EL.MaxNotTaken); 7162276479Sdim } 7163276479Sdim } else if (MayExitMaxBECount != getCouldNotCompute()) { 7164314564Sdim if (!MayExitMaxBECount || EL.MaxNotTaken == getCouldNotCompute()) 7165314564Sdim MayExitMaxBECount = EL.MaxNotTaken; 7166276479Sdim else { 7167276479Sdim MayExitMaxBECount = 7168314564Sdim getUMaxFromMismatchedTypes(MayExitMaxBECount, EL.MaxNotTaken); 7169276479Sdim } 7170234353Sdim } 7171194612Sed } 7172276479Sdim const SCEV *MaxBECount = MustExitMaxBECount ? MustExitMaxBECount : 7173276479Sdim (MayExitMaxBECount ? MayExitMaxBECount : getCouldNotCompute()); 7174314564Sdim // The loop backedge will be taken the maximum or zero times if there's 7175314564Sdim // a single exit that must be taken the maximum or zero times. 7176314564Sdim bool MaxOrZero = (MustExitMaxOrZero && ExitingBlocks.size() == 1); 7177314564Sdim return BackedgeTakenInfo(std::move(ExitCounts), CouldComputeBECount, 7178314564Sdim MaxBECount, MaxOrZero); 7179194612Sed} 7180194612Sed 7181226633SdimScalarEvolution::ExitLimit 7182309124SdimScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock, 7183327952Sdim bool AllowPredicates) { 7184341825Sdim assert(L->contains(ExitingBlock) && "Exit count for non-loop block?"); 7185341825Sdim // If our exiting block does not dominate the latch, then its connection with 7186341825Sdim // loop's exit limit may be far from trivial. 7187341825Sdim const BasicBlock *Latch = L->getLoopLatch(); 7188341825Sdim if (!Latch || !DT.dominates(ExitingBlock, Latch)) 7189341825Sdim return getCouldNotCompute(); 7190195098Sed 7191280031Sdim bool IsOnlyExit = (L->getExitingBlock() != nullptr); 7192344779Sdim Instruction *Term = ExitingBlock->getTerminator(); 7193276479Sdim if (BranchInst *BI = dyn_cast<BranchInst>(Term)) { 7194276479Sdim assert(BI->isConditional() && "If unconditional, it can't be in loop!"); 7195341825Sdim bool ExitIfTrue = !L->contains(BI->getSuccessor(0)); 7196341825Sdim assert(ExitIfTrue == L->contains(BI->getSuccessor(1)) && 7197341825Sdim "It should have one successor in loop and one exit block!"); 7198276479Sdim // Proceed to the next level to examine the exit condition expression. 7199309124Sdim return computeExitLimitFromCond( 7200341825Sdim L, BI->getCondition(), ExitIfTrue, 7201309124Sdim /*ControlsExit=*/IsOnlyExit, AllowPredicates); 7202276479Sdim } 7203276479Sdim 7204341825Sdim if (SwitchInst *SI = dyn_cast<SwitchInst>(Term)) { 7205341825Sdim // For switch, make sure that there is a single exit from the loop. 7206341825Sdim BasicBlock *Exit = nullptr; 7207341825Sdim for (auto *SBB : successors(ExitingBlock)) 7208341825Sdim if (!L->contains(SBB)) { 7209341825Sdim if (Exit) // Multiple exit successors. 7210341825Sdim return getCouldNotCompute(); 7211341825Sdim Exit = SBB; 7212341825Sdim } 7213341825Sdim assert(Exit && "Exiting block must have at least one exit"); 7214296417Sdim return computeExitLimitFromSingleExitSwitch(L, SI, Exit, 7215280031Sdim /*ControlsExit=*/IsOnlyExit); 7216341825Sdim } 7217276479Sdim 7218276479Sdim return getCouldNotCompute(); 7219194612Sed} 7220194612Sed 7221321369SdimScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCond( 7222341825Sdim const Loop *L, Value *ExitCond, bool ExitIfTrue, 7223321369Sdim bool ControlsExit, bool AllowPredicates) { 7224341825Sdim ScalarEvolution::ExitLimitCacheTy Cache(L, ExitIfTrue, AllowPredicates); 7225341825Sdim return computeExitLimitFromCondCached(Cache, L, ExitCond, ExitIfTrue, 7226321369Sdim ControlsExit, AllowPredicates); 7227321369Sdim} 7228321369Sdim 7229321369SdimOptional<ScalarEvolution::ExitLimit> 7230321369SdimScalarEvolution::ExitLimitCache::find(const Loop *L, Value *ExitCond, 7231341825Sdim bool ExitIfTrue, bool ControlsExit, 7232341825Sdim bool AllowPredicates) { 7233321369Sdim (void)this->L; 7234341825Sdim (void)this->ExitIfTrue; 7235321369Sdim (void)this->AllowPredicates; 7236321369Sdim 7237341825Sdim assert(this->L == L && this->ExitIfTrue == ExitIfTrue && 7238321369Sdim this->AllowPredicates == AllowPredicates && 7239321369Sdim "Variance in assumed invariant key components!"); 7240321369Sdim auto Itr = TripCountMap.find({ExitCond, ControlsExit}); 7241321369Sdim if (Itr == TripCountMap.end()) 7242321369Sdim return None; 7243321369Sdim return Itr->second; 7244321369Sdim} 7245321369Sdim 7246321369Sdimvoid ScalarEvolution::ExitLimitCache::insert(const Loop *L, Value *ExitCond, 7247341825Sdim bool ExitIfTrue, 7248321369Sdim bool ControlsExit, 7249321369Sdim bool AllowPredicates, 7250321369Sdim const ExitLimit &EL) { 7251341825Sdim assert(this->L == L && this->ExitIfTrue == ExitIfTrue && 7252321369Sdim this->AllowPredicates == AllowPredicates && 7253321369Sdim "Variance in assumed invariant key components!"); 7254321369Sdim 7255321369Sdim auto InsertResult = TripCountMap.insert({{ExitCond, ControlsExit}, EL}); 7256321369Sdim assert(InsertResult.second && "Expected successful insertion!"); 7257321369Sdim (void)InsertResult; 7258341825Sdim (void)ExitIfTrue; 7259321369Sdim} 7260321369Sdim 7261321369SdimScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondCached( 7262341825Sdim ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, bool ExitIfTrue, 7263341825Sdim bool ControlsExit, bool AllowPredicates) { 7264321369Sdim 7265321369Sdim if (auto MaybeEL = 7266341825Sdim Cache.find(L, ExitCond, ExitIfTrue, ControlsExit, AllowPredicates)) 7267321369Sdim return *MaybeEL; 7268321369Sdim 7269341825Sdim ExitLimit EL = computeExitLimitFromCondImpl(Cache, L, ExitCond, ExitIfTrue, 7270321369Sdim ControlsExit, AllowPredicates); 7271341825Sdim Cache.insert(L, ExitCond, ExitIfTrue, ControlsExit, AllowPredicates, EL); 7272321369Sdim return EL; 7273321369Sdim} 7274321369Sdim 7275321369SdimScalarEvolution::ExitLimit ScalarEvolution::computeExitLimitFromCondImpl( 7276341825Sdim ExitLimitCacheTy &Cache, const Loop *L, Value *ExitCond, bool ExitIfTrue, 7277341825Sdim bool ControlsExit, bool AllowPredicates) { 7278195098Sed // Check if the controlling expression for this loop is an And or Or. 7279194612Sed if (BinaryOperator *BO = dyn_cast<BinaryOperator>(ExitCond)) { 7280194612Sed if (BO->getOpcode() == Instruction::And) { 7281194612Sed // Recurse on the operands of the and. 7282341825Sdim bool EitherMayExit = !ExitIfTrue; 7283321369Sdim ExitLimit EL0 = computeExitLimitFromCondCached( 7284341825Sdim Cache, L, BO->getOperand(0), ExitIfTrue, 7285341825Sdim ControlsExit && !EitherMayExit, AllowPredicates); 7286321369Sdim ExitLimit EL1 = computeExitLimitFromCondCached( 7287341825Sdim Cache, L, BO->getOperand(1), ExitIfTrue, 7288341825Sdim ControlsExit && !EitherMayExit, AllowPredicates); 7289360784Sdim // Be robust against unsimplified IR for the form "and i1 X, true" 7290360784Sdim if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) 7291360784Sdim return CI->isOne() ? EL0 : EL1; 7292360784Sdim if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(0))) 7293360784Sdim return CI->isOne() ? EL1 : EL0; 7294198090Srdivacky const SCEV *BECount = getCouldNotCompute(); 7295198090Srdivacky const SCEV *MaxBECount = getCouldNotCompute(); 7296251662Sdim if (EitherMayExit) { 7297194612Sed // Both conditions must be true for the loop to continue executing. 7298194612Sed // Choose the less conservative count. 7299314564Sdim if (EL0.ExactNotTaken == getCouldNotCompute() || 7300314564Sdim EL1.ExactNotTaken == getCouldNotCompute()) 7301195340Sed BECount = getCouldNotCompute(); 7302194710Sed else 7303314564Sdim BECount = 7304314564Sdim getUMinFromMismatchedTypes(EL0.ExactNotTaken, EL1.ExactNotTaken); 7305314564Sdim if (EL0.MaxNotTaken == getCouldNotCompute()) 7306314564Sdim MaxBECount = EL1.MaxNotTaken; 7307314564Sdim else if (EL1.MaxNotTaken == getCouldNotCompute()) 7308314564Sdim MaxBECount = EL0.MaxNotTaken; 7309194710Sed else 7310314564Sdim MaxBECount = 7311314564Sdim getUMinFromMismatchedTypes(EL0.MaxNotTaken, EL1.MaxNotTaken); 7312194612Sed } else { 7313212904Sdim // Both conditions must be true at the same time for the loop to exit. 7314212904Sdim // For now, be conservative. 7315314564Sdim if (EL0.MaxNotTaken == EL1.MaxNotTaken) 7316314564Sdim MaxBECount = EL0.MaxNotTaken; 7317314564Sdim if (EL0.ExactNotTaken == EL1.ExactNotTaken) 7318314564Sdim BECount = EL0.ExactNotTaken; 7319194612Sed } 7320194612Sed 7321296417Sdim // There are cases (e.g. PR26207) where computeExitLimitFromCond is able 7322296417Sdim // to be more aggressive when computing BECount than when computing 7323314564Sdim // MaxBECount. In these cases it is possible for EL0.ExactNotTaken and 7324314564Sdim // EL1.ExactNotTaken to match, but for EL0.MaxNotTaken and EL1.MaxNotTaken 7325314564Sdim // to not. 7326296417Sdim if (isa<SCEVCouldNotCompute>(MaxBECount) && 7327296417Sdim !isa<SCEVCouldNotCompute>(BECount)) 7328321369Sdim MaxBECount = getConstant(getUnsignedRangeMax(BECount)); 7329296417Sdim 7330314564Sdim return ExitLimit(BECount, MaxBECount, false, 7331314564Sdim {&EL0.Predicates, &EL1.Predicates}); 7332194612Sed } 7333194612Sed if (BO->getOpcode() == Instruction::Or) { 7334194612Sed // Recurse on the operands of the or. 7335341825Sdim bool EitherMayExit = ExitIfTrue; 7336321369Sdim ExitLimit EL0 = computeExitLimitFromCondCached( 7337341825Sdim Cache, L, BO->getOperand(0), ExitIfTrue, 7338341825Sdim ControlsExit && !EitherMayExit, AllowPredicates); 7339321369Sdim ExitLimit EL1 = computeExitLimitFromCondCached( 7340341825Sdim Cache, L, BO->getOperand(1), ExitIfTrue, 7341341825Sdim ControlsExit && !EitherMayExit, AllowPredicates); 7342360784Sdim // Be robust against unsimplified IR for the form "or i1 X, true" 7343360784Sdim if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(1))) 7344360784Sdim return CI->isZero() ? EL0 : EL1; 7345360784Sdim if (ConstantInt *CI = dyn_cast<ConstantInt>(BO->getOperand(0))) 7346360784Sdim return CI->isZero() ? EL1 : EL0; 7347198090Srdivacky const SCEV *BECount = getCouldNotCompute(); 7348198090Srdivacky const SCEV *MaxBECount = getCouldNotCompute(); 7349251662Sdim if (EitherMayExit) { 7350194612Sed // Both conditions must be false for the loop to continue executing. 7351194612Sed // Choose the less conservative count. 7352314564Sdim if (EL0.ExactNotTaken == getCouldNotCompute() || 7353314564Sdim EL1.ExactNotTaken == getCouldNotCompute()) 7354195340Sed BECount = getCouldNotCompute(); 7355194710Sed else 7356314564Sdim BECount = 7357314564Sdim getUMinFromMismatchedTypes(EL0.ExactNotTaken, EL1.ExactNotTaken); 7358314564Sdim if (EL0.MaxNotTaken == getCouldNotCompute()) 7359314564Sdim MaxBECount = EL1.MaxNotTaken; 7360314564Sdim else if (EL1.MaxNotTaken == getCouldNotCompute()) 7361314564Sdim MaxBECount = EL0.MaxNotTaken; 7362194710Sed else 7363314564Sdim MaxBECount = 7364314564Sdim getUMinFromMismatchedTypes(EL0.MaxNotTaken, EL1.MaxNotTaken); 7365194612Sed } else { 7366212904Sdim // Both conditions must be false at the same time for the loop to exit. 7367212904Sdim // For now, be conservative. 7368314564Sdim if (EL0.MaxNotTaken == EL1.MaxNotTaken) 7369314564Sdim MaxBECount = EL0.MaxNotTaken; 7370314564Sdim if (EL0.ExactNotTaken == EL1.ExactNotTaken) 7371314564Sdim BECount = EL0.ExactNotTaken; 7372194612Sed } 7373353358Sdim // There are cases (e.g. PR26207) where computeExitLimitFromCond is able 7374353358Sdim // to be more aggressive when computing BECount than when computing 7375353358Sdim // MaxBECount. In these cases it is possible for EL0.ExactNotTaken and 7376353358Sdim // EL1.ExactNotTaken to match, but for EL0.MaxNotTaken and EL1.MaxNotTaken 7377353358Sdim // to not. 7378353358Sdim if (isa<SCEVCouldNotCompute>(MaxBECount) && 7379353358Sdim !isa<SCEVCouldNotCompute>(BECount)) 7380353358Sdim MaxBECount = getConstant(getUnsignedRangeMax(BECount)); 7381194612Sed 7382314564Sdim return ExitLimit(BECount, MaxBECount, false, 7383314564Sdim {&EL0.Predicates, &EL1.Predicates}); 7384194612Sed } 7385194612Sed } 7386194612Sed 7387194612Sed // With an icmp, it may be feasible to compute an exact backedge-taken count. 7388204642Srdivacky // Proceed to the next level to examine the icmp. 7389309124Sdim if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond)) { 7390309124Sdim ExitLimit EL = 7391341825Sdim computeExitLimitFromICmp(L, ExitCondICmp, ExitIfTrue, ControlsExit); 7392309124Sdim if (EL.hasFullInfo() || !AllowPredicates) 7393309124Sdim return EL; 7394194612Sed 7395309124Sdim // Try again, but use SCEV predicates this time. 7396341825Sdim return computeExitLimitFromICmp(L, ExitCondICmp, ExitIfTrue, ControlsExit, 7397309124Sdim /*AllowPredicates=*/true); 7398309124Sdim } 7399309124Sdim 7400204642Srdivacky // Check for a constant condition. These are normally stripped out by 7401204642Srdivacky // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to 7402204642Srdivacky // preserve the CFG and is temporarily leaving constant conditions 7403204642Srdivacky // in place. 7404204642Srdivacky if (ConstantInt *CI = dyn_cast<ConstantInt>(ExitCond)) { 7405341825Sdim if (ExitIfTrue == !CI->getZExtValue()) 7406204642Srdivacky // The backedge is always taken. 7407204642Srdivacky return getCouldNotCompute(); 7408204642Srdivacky else 7409204642Srdivacky // The backedge is never taken. 7410296417Sdim return getZero(CI->getType()); 7411204642Srdivacky } 7412204642Srdivacky 7413193323Sed // If it's not an integer or pointer comparison then compute it the hard way. 7414341825Sdim return computeExitCountExhaustively(L, ExitCond, ExitIfTrue); 7415194612Sed} 7416193323Sed 7417226633SdimScalarEvolution::ExitLimit 7418296417SdimScalarEvolution::computeExitLimitFromICmp(const Loop *L, 7419226633Sdim ICmpInst *ExitCond, 7420341825Sdim bool ExitIfTrue, 7421309124Sdim bool ControlsExit, 7422309124Sdim bool AllowPredicates) { 7423193323Sed // If the condition was exit on true, convert the condition to exit on false 7424327952Sdim ICmpInst::Predicate Pred; 7425341825Sdim if (!ExitIfTrue) 7426327952Sdim Pred = ExitCond->getPredicate(); 7427193323Sed else 7428327952Sdim Pred = ExitCond->getInversePredicate(); 7429327952Sdim const ICmpInst::Predicate OriginalPred = Pred; 7430193323Sed 7431193323Sed // Handle common loops like: for (X = "string"; *X; ++X) 7432193323Sed if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0))) 7433193323Sed if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) { 7434226633Sdim ExitLimit ItCnt = 7435327952Sdim computeLoadConstantCompareExitLimit(LI, RHS, L, Pred); 7436204642Srdivacky if (ItCnt.hasAnyInfo()) 7437204642Srdivacky return ItCnt; 7438193323Sed } 7439193323Sed 7440198090Srdivacky const SCEV *LHS = getSCEV(ExitCond->getOperand(0)); 7441198090Srdivacky const SCEV *RHS = getSCEV(ExitCond->getOperand(1)); 7442193323Sed 7443193323Sed // Try to evaluate any dependencies out of the loop. 7444193323Sed LHS = getSCEVAtScope(LHS, L); 7445193323Sed RHS = getSCEVAtScope(RHS, L); 7446193323Sed 7447195098Sed // At this point, we would like to compute how many iterations of the 7448193323Sed // loop the predicate will return true for these inputs. 7449218893Sdim if (isLoopInvariant(LHS, L) && !isLoopInvariant(RHS, L)) { 7450193323Sed // If there is a loop-invariant, force it into the RHS. 7451193323Sed std::swap(LHS, RHS); 7452327952Sdim Pred = ICmpInst::getSwappedPredicate(Pred); 7453193323Sed } 7454193323Sed 7455207618Srdivacky // Simplify the operands before analyzing them. 7456327952Sdim (void)SimplifyICmpOperands(Pred, LHS, RHS); 7457207618Srdivacky 7458193323Sed // If we have a comparison of a chrec against a constant, try to use value 7459193323Sed // ranges to answer this query. 7460193323Sed if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) 7461193323Sed if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS)) 7462193323Sed if (AddRec->getLoop() == L) { 7463193323Sed // Form the constant range. 7464314564Sdim ConstantRange CompRange = 7465327952Sdim ConstantRange::makeExactICmpRegion(Pred, RHSC->getAPInt()); 7466193323Sed 7467198090Srdivacky const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this); 7468193323Sed if (!isa<SCEVCouldNotCompute>(Ret)) return Ret; 7469193323Sed } 7470193323Sed 7471327952Sdim switch (Pred) { 7472193323Sed case ICmpInst::ICMP_NE: { // while (X != Y) 7473193323Sed // Convert to: while (X-Y != 0) 7474309124Sdim ExitLimit EL = howFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit, 7475309124Sdim AllowPredicates); 7476226633Sdim if (EL.hasAnyInfo()) return EL; 7477193323Sed break; 7478193323Sed } 7479198090Srdivacky case ICmpInst::ICMP_EQ: { // while (X == Y) 7480198090Srdivacky // Convert to: while (X-Y == 0) 7481309124Sdim ExitLimit EL = howFarToNonZero(getMinusSCEV(LHS, RHS), L); 7482226633Sdim if (EL.hasAnyInfo()) return EL; 7483193323Sed break; 7484193323Sed } 7485261991Sdim case ICmpInst::ICMP_SLT: 7486261991Sdim case ICmpInst::ICMP_ULT: { // while (X < Y) 7487327952Sdim bool IsSigned = Pred == ICmpInst::ICMP_SLT; 7488309124Sdim ExitLimit EL = howManyLessThans(LHS, RHS, L, IsSigned, ControlsExit, 7489309124Sdim AllowPredicates); 7490226633Sdim if (EL.hasAnyInfo()) return EL; 7491193323Sed break; 7492193323Sed } 7493261991Sdim case ICmpInst::ICMP_SGT: 7494261991Sdim case ICmpInst::ICMP_UGT: { // while (X > Y) 7495327952Sdim bool IsSigned = Pred == ICmpInst::ICMP_SGT; 7496309124Sdim ExitLimit EL = 7497309124Sdim howManyGreaterThans(LHS, RHS, L, IsSigned, ControlsExit, 7498309124Sdim AllowPredicates); 7499226633Sdim if (EL.hasAnyInfo()) return EL; 7500193323Sed break; 7501193323Sed } 7502193323Sed default: 7503193323Sed break; 7504193323Sed } 7505309124Sdim 7506309124Sdim auto *ExhaustiveCount = 7507341825Sdim computeExitCountExhaustively(L, ExitCond, ExitIfTrue); 7508309124Sdim 7509309124Sdim if (!isa<SCEVCouldNotCompute>(ExhaustiveCount)) 7510309124Sdim return ExhaustiveCount; 7511309124Sdim 7512309124Sdim return computeShiftCompareExitLimit(ExitCond->getOperand(0), 7513327952Sdim ExitCond->getOperand(1), L, OriginalPred); 7514193323Sed} 7515193323Sed 7516276479SdimScalarEvolution::ExitLimit 7517296417SdimScalarEvolution::computeExitLimitFromSingleExitSwitch(const Loop *L, 7518276479Sdim SwitchInst *Switch, 7519276479Sdim BasicBlock *ExitingBlock, 7520280031Sdim bool ControlsExit) { 7521276479Sdim assert(!L->contains(ExitingBlock) && "Not an exiting block!"); 7522276479Sdim 7523276479Sdim // Give up if the exit is the default dest of a switch. 7524276479Sdim if (Switch->getDefaultDest() == ExitingBlock) 7525276479Sdim return getCouldNotCompute(); 7526276479Sdim 7527276479Sdim assert(L->contains(Switch->getDefaultDest()) && 7528276479Sdim "Default case must not exit the loop!"); 7529276479Sdim const SCEV *LHS = getSCEVAtScope(Switch->getCondition(), L); 7530276479Sdim const SCEV *RHS = getConstant(Switch->findCaseDest(ExitingBlock)); 7531276479Sdim 7532276479Sdim // while (X != Y) --> while (X-Y != 0) 7533309124Sdim ExitLimit EL = howFarToZero(getMinusSCEV(LHS, RHS), L, ControlsExit); 7534276479Sdim if (EL.hasAnyInfo()) 7535276479Sdim return EL; 7536276479Sdim 7537276479Sdim return getCouldNotCompute(); 7538276479Sdim} 7539276479Sdim 7540193323Sedstatic ConstantInt * 7541193323SedEvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C, 7542193323Sed ScalarEvolution &SE) { 7543198090Srdivacky const SCEV *InVal = SE.getConstant(C); 7544198090Srdivacky const SCEV *Val = AddRec->evaluateAtIteration(InVal, SE); 7545193323Sed assert(isa<SCEVConstant>(Val) && 7546193323Sed "Evaluation of SCEV at constant didn't fold correctly?"); 7547193323Sed return cast<SCEVConstant>(Val)->getValue(); 7548193323Sed} 7549193323Sed 7550309124Sdim/// Given an exit condition of 'icmp op load X, cst', try to see if we can 7551309124Sdim/// compute the backedge execution count. 7552226633SdimScalarEvolution::ExitLimit 7553296417SdimScalarEvolution::computeLoadConstantCompareExitLimit( 7554226633Sdim LoadInst *LI, 7555226633Sdim Constant *RHS, 7556226633Sdim const Loop *L, 7557226633Sdim ICmpInst::Predicate predicate) { 7558195340Sed if (LI->isVolatile()) return getCouldNotCompute(); 7559193323Sed 7560193323Sed // Check to see if the loaded pointer is a getelementptr of a global. 7561204642Srdivacky // TODO: Use SCEV instead of manually grubbing with GEPs. 7562193323Sed GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0)); 7563195340Sed if (!GEP) return getCouldNotCompute(); 7564193323Sed 7565193323Sed // Make sure that it is really a constant global we are gepping, with an 7566193323Sed // initializer, and make sure the first IDX is really 0. 7567193323Sed GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)); 7568198090Srdivacky if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer() || 7569193323Sed GEP->getNumOperands() < 3 || !isa<Constant>(GEP->getOperand(1)) || 7570193323Sed !cast<Constant>(GEP->getOperand(1))->isNullValue()) 7571195340Sed return getCouldNotCompute(); 7572193323Sed 7573193323Sed // Okay, we allow one non-constant index into the GEP instruction. 7574276479Sdim Value *VarIdx = nullptr; 7575234353Sdim std::vector<Constant*> Indexes; 7576193323Sed unsigned VarIdxNum = 0; 7577193323Sed for (unsigned i = 2, e = GEP->getNumOperands(); i != e; ++i) 7578193323Sed if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) { 7579193323Sed Indexes.push_back(CI); 7580193323Sed } else if (!isa<ConstantInt>(GEP->getOperand(i))) { 7581195340Sed if (VarIdx) return getCouldNotCompute(); // Multiple non-constant idx's. 7582193323Sed VarIdx = GEP->getOperand(i); 7583193323Sed VarIdxNum = i-2; 7584276479Sdim Indexes.push_back(nullptr); 7585193323Sed } 7586193323Sed 7587234353Sdim // Loop-invariant loads may be a byproduct of loop optimization. Skip them. 7588234353Sdim if (!VarIdx) 7589234353Sdim return getCouldNotCompute(); 7590234353Sdim 7591193323Sed // Okay, we know we have a (load (gep GV, 0, X)) comparison with a constant. 7592193323Sed // Check to see if X is a loop variant variable value now. 7593198090Srdivacky const SCEV *Idx = getSCEV(VarIdx); 7594193323Sed Idx = getSCEVAtScope(Idx, L); 7595193323Sed 7596193323Sed // We can only recognize very limited forms of loop index expressions, in 7597193323Sed // particular, only affine AddRec's like {C1,+,C2}. 7598193323Sed const SCEVAddRecExpr *IdxExpr = dyn_cast<SCEVAddRecExpr>(Idx); 7599218893Sdim if (!IdxExpr || !IdxExpr->isAffine() || isLoopInvariant(IdxExpr, L) || 7600193323Sed !isa<SCEVConstant>(IdxExpr->getOperand(0)) || 7601193323Sed !isa<SCEVConstant>(IdxExpr->getOperand(1))) 7602195340Sed return getCouldNotCompute(); 7603193323Sed 7604193323Sed unsigned MaxSteps = MaxBruteForceIterations; 7605193323Sed for (unsigned IterationNum = 0; IterationNum != MaxSteps; ++IterationNum) { 7606198090Srdivacky ConstantInt *ItCst = ConstantInt::get( 7607198090Srdivacky cast<IntegerType>(IdxExpr->getType()), IterationNum); 7608193323Sed ConstantInt *Val = EvaluateConstantChrecAtConstant(IdxExpr, ItCst, *this); 7609193323Sed 7610193323Sed // Form the GEP offset. 7611193323Sed Indexes[VarIdxNum] = Val; 7612193323Sed 7613234353Sdim Constant *Result = ConstantFoldLoadThroughGEPIndices(GV->getInitializer(), 7614234353Sdim Indexes); 7615276479Sdim if (!Result) break; // Cannot compute! 7616193323Sed 7617193323Sed // Evaluate the condition for this iteration. 7618193323Sed Result = ConstantExpr::getICmp(predicate, Result, RHS); 7619193323Sed if (!isa<ConstantInt>(Result)) break; // Couldn't decide for sure 7620193323Sed if (cast<ConstantInt>(Result)->getValue().isMinValue()) { 7621193323Sed ++NumArrayLenItCounts; 7622193323Sed return getConstant(ItCst); // Found terminating iteration! 7623193323Sed } 7624193323Sed } 7625195340Sed return getCouldNotCompute(); 7626193323Sed} 7627193323Sed 7628296417SdimScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit( 7629296417Sdim Value *LHS, Value *RHSV, const Loop *L, ICmpInst::Predicate Pred) { 7630296417Sdim ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV); 7631296417Sdim if (!RHS) 7632296417Sdim return getCouldNotCompute(); 7633193323Sed 7634296417Sdim const BasicBlock *Latch = L->getLoopLatch(); 7635296417Sdim if (!Latch) 7636296417Sdim return getCouldNotCompute(); 7637296417Sdim 7638296417Sdim const BasicBlock *Predecessor = L->getLoopPredecessor(); 7639296417Sdim if (!Predecessor) 7640296417Sdim return getCouldNotCompute(); 7641296417Sdim 7642296417Sdim // Return true if V is of the form "LHS `shift_op` <positive constant>". 7643296417Sdim // Return LHS in OutLHS and shift_opt in OutOpCode. 7644296417Sdim auto MatchPositiveShift = 7645296417Sdim [](Value *V, Value *&OutLHS, Instruction::BinaryOps &OutOpCode) { 7646296417Sdim 7647296417Sdim using namespace PatternMatch; 7648296417Sdim 7649296417Sdim ConstantInt *ShiftAmt; 7650296417Sdim if (match(V, m_LShr(m_Value(OutLHS), m_ConstantInt(ShiftAmt)))) 7651296417Sdim OutOpCode = Instruction::LShr; 7652296417Sdim else if (match(V, m_AShr(m_Value(OutLHS), m_ConstantInt(ShiftAmt)))) 7653296417Sdim OutOpCode = Instruction::AShr; 7654296417Sdim else if (match(V, m_Shl(m_Value(OutLHS), m_ConstantInt(ShiftAmt)))) 7655296417Sdim OutOpCode = Instruction::Shl; 7656296417Sdim else 7657296417Sdim return false; 7658296417Sdim 7659296417Sdim return ShiftAmt->getValue().isStrictlyPositive(); 7660296417Sdim }; 7661296417Sdim 7662296417Sdim // Recognize a "shift recurrence" either of the form %iv or of %iv.shifted in 7663296417Sdim // 7664296417Sdim // loop: 7665296417Sdim // %iv = phi i32 [ %iv.shifted, %loop ], [ %val, %preheader ] 7666296417Sdim // %iv.shifted = lshr i32 %iv, <positive constant> 7667296417Sdim // 7668314564Sdim // Return true on a successful match. Return the corresponding PHI node (%iv 7669296417Sdim // above) in PNOut and the opcode of the shift operation in OpCodeOut. 7670296417Sdim auto MatchShiftRecurrence = 7671296417Sdim [&](Value *V, PHINode *&PNOut, Instruction::BinaryOps &OpCodeOut) { 7672296417Sdim Optional<Instruction::BinaryOps> PostShiftOpCode; 7673296417Sdim 7674296417Sdim { 7675296417Sdim Instruction::BinaryOps OpC; 7676296417Sdim Value *V; 7677296417Sdim 7678296417Sdim // If we encounter a shift instruction, "peel off" the shift operation, 7679296417Sdim // and remember that we did so. Later when we inspect %iv's backedge 7680296417Sdim // value, we will make sure that the backedge value uses the same 7681296417Sdim // operation. 7682296417Sdim // 7683296417Sdim // Note: the peeled shift operation does not have to be the same 7684296417Sdim // instruction as the one feeding into the PHI's backedge value. We only 7685296417Sdim // really care about it being the same *kind* of shift instruction -- 7686296417Sdim // that's all that is required for our later inferences to hold. 7687296417Sdim if (MatchPositiveShift(LHS, V, OpC)) { 7688296417Sdim PostShiftOpCode = OpC; 7689296417Sdim LHS = V; 7690296417Sdim } 7691296417Sdim } 7692296417Sdim 7693296417Sdim PNOut = dyn_cast<PHINode>(LHS); 7694296417Sdim if (!PNOut || PNOut->getParent() != L->getHeader()) 7695296417Sdim return false; 7696296417Sdim 7697296417Sdim Value *BEValue = PNOut->getIncomingValueForBlock(Latch); 7698296417Sdim Value *OpLHS; 7699296417Sdim 7700296417Sdim return 7701296417Sdim // The backedge value for the PHI node must be a shift by a positive 7702296417Sdim // amount 7703296417Sdim MatchPositiveShift(BEValue, OpLHS, OpCodeOut) && 7704296417Sdim 7705296417Sdim // of the PHI node itself 7706296417Sdim OpLHS == PNOut && 7707296417Sdim 7708296417Sdim // and the kind of shift should be match the kind of shift we peeled 7709296417Sdim // off, if any. 7710296417Sdim (!PostShiftOpCode.hasValue() || *PostShiftOpCode == OpCodeOut); 7711296417Sdim }; 7712296417Sdim 7713296417Sdim PHINode *PN; 7714296417Sdim Instruction::BinaryOps OpCode; 7715296417Sdim if (!MatchShiftRecurrence(LHS, PN, OpCode)) 7716296417Sdim return getCouldNotCompute(); 7717296417Sdim 7718296417Sdim const DataLayout &DL = getDataLayout(); 7719296417Sdim 7720296417Sdim // The key rationale for this optimization is that for some kinds of shift 7721296417Sdim // recurrences, the value of the recurrence "stabilizes" to either 0 or -1 7722296417Sdim // within a finite number of iterations. If the condition guarding the 7723296417Sdim // backedge (in the sense that the backedge is taken if the condition is true) 7724296417Sdim // is false for the value the shift recurrence stabilizes to, then we know 7725296417Sdim // that the backedge is taken only a finite number of times. 7726296417Sdim 7727296417Sdim ConstantInt *StableValue = nullptr; 7728296417Sdim switch (OpCode) { 7729296417Sdim default: 7730296417Sdim llvm_unreachable("Impossible case!"); 7731296417Sdim 7732296417Sdim case Instruction::AShr: { 7733296417Sdim // {K,ashr,<positive-constant>} stabilizes to signum(K) in at most 7734296417Sdim // bitwidth(K) iterations. 7735296417Sdim Value *FirstValue = PN->getIncomingValueForBlock(Predecessor); 7736321369Sdim KnownBits Known = computeKnownBits(FirstValue, DL, 0, nullptr, 7737321369Sdim Predecessor->getTerminator(), &DT); 7738296417Sdim auto *Ty = cast<IntegerType>(RHS->getType()); 7739321369Sdim if (Known.isNonNegative()) 7740296417Sdim StableValue = ConstantInt::get(Ty, 0); 7741321369Sdim else if (Known.isNegative()) 7742296417Sdim StableValue = ConstantInt::get(Ty, -1, true); 7743296417Sdim else 7744296417Sdim return getCouldNotCompute(); 7745296417Sdim 7746296417Sdim break; 7747296417Sdim } 7748296417Sdim case Instruction::LShr: 7749296417Sdim case Instruction::Shl: 7750296417Sdim // Both {K,lshr,<positive-constant>} and {K,shl,<positive-constant>} 7751296417Sdim // stabilize to 0 in at most bitwidth(K) iterations. 7752296417Sdim StableValue = ConstantInt::get(cast<IntegerType>(RHS->getType()), 0); 7753296417Sdim break; 7754296417Sdim } 7755296417Sdim 7756296417Sdim auto *Result = 7757296417Sdim ConstantFoldCompareInstOperands(Pred, StableValue, RHS, DL, &TLI); 7758296417Sdim assert(Result->getType()->isIntegerTy(1) && 7759296417Sdim "Otherwise cannot be an operand to a branch instruction"); 7760296417Sdim 7761296417Sdim if (Result->isZeroValue()) { 7762296417Sdim unsigned BitWidth = getTypeSizeInBits(RHS->getType()); 7763296417Sdim const SCEV *UpperBound = 7764296417Sdim getConstant(getEffectiveSCEVType(RHS->getType()), BitWidth); 7765314564Sdim return ExitLimit(getCouldNotCompute(), UpperBound, false); 7766296417Sdim } 7767296417Sdim 7768296417Sdim return getCouldNotCompute(); 7769296417Sdim} 7770296417Sdim 7771309124Sdim/// Return true if we can constant fold an instruction of the specified type, 7772309124Sdim/// assuming that all operands were constants. 7773193323Sedstatic bool CanConstantFold(const Instruction *I) { 7774193323Sed if (isa<BinaryOperator>(I) || isa<CmpInst>(I) || 7775234353Sdim isa<SelectInst>(I) || isa<CastInst>(I) || isa<GetElementPtrInst>(I) || 7776353358Sdim isa<LoadInst>(I) || isa<ExtractValueInst>(I)) 7777193323Sed return true; 7778193323Sed 7779193323Sed if (const CallInst *CI = dyn_cast<CallInst>(I)) 7780193323Sed if (const Function *F = CI->getCalledFunction()) 7781321369Sdim return canConstantFoldCallTo(CI, F); 7782193323Sed return false; 7783193323Sed} 7784193323Sed 7785226633Sdim/// Determine whether this instruction can constant evolve within this loop 7786226633Sdim/// assuming its operands can all constant evolve. 7787226633Sdimstatic bool canConstantEvolve(Instruction *I, const Loop *L) { 7788226633Sdim // An instruction outside of the loop can't be derived from a loop PHI. 7789226633Sdim if (!L->contains(I)) return false; 7790193323Sed 7791226633Sdim if (isa<PHINode>(I)) { 7792288943Sdim // We don't currently keep track of the control flow needed to evaluate 7793288943Sdim // PHIs, so we cannot handle PHIs inside of loops. 7794288943Sdim return L->getHeader() == I->getParent(); 7795193323Sed } 7796193323Sed 7797193323Sed // If we won't be able to constant fold this expression even if the operands 7798226633Sdim // are constants, bail early. 7799226633Sdim return CanConstantFold(I); 7800226633Sdim} 7801193323Sed 7802226633Sdim/// getConstantEvolvingPHIOperands - Implement getConstantEvolvingPHI by 7803226633Sdim/// recursing through each instruction operand until reaching a loop header phi. 7804226633Sdimstatic PHINode * 7805226633SdimgetConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L, 7806321369Sdim DenseMap<Instruction *, PHINode *> &PHIMap, 7807321369Sdim unsigned Depth) { 7808321369Sdim if (Depth > MaxConstantEvolvingDepth) 7809321369Sdim return nullptr; 7810226633Sdim 7811193323Sed // Otherwise, we can evaluate this instruction if all of its operands are 7812193323Sed // constant or derived from a PHI node themselves. 7813276479Sdim PHINode *PHI = nullptr; 7814296417Sdim for (Value *Op : UseInst->operands()) { 7815296417Sdim if (isa<Constant>(Op)) continue; 7816226633Sdim 7817296417Sdim Instruction *OpInst = dyn_cast<Instruction>(Op); 7818276479Sdim if (!OpInst || !canConstantEvolve(OpInst, L)) return nullptr; 7819226633Sdim 7820226633Sdim PHINode *P = dyn_cast<PHINode>(OpInst); 7821226633Sdim if (!P) 7822226633Sdim // If this operand is already visited, reuse the prior result. 7823226633Sdim // We may have P != PHI if this is the deepest point at which the 7824226633Sdim // inconsistent paths meet. 7825226633Sdim P = PHIMap.lookup(OpInst); 7826226633Sdim if (!P) { 7827226633Sdim // Recurse and memoize the results, whether a phi is found or not. 7828226633Sdim // This recursive call invalidates pointers into PHIMap. 7829321369Sdim P = getConstantEvolvingPHIOperands(OpInst, L, PHIMap, Depth + 1); 7830226633Sdim PHIMap[OpInst] = P; 7831193323Sed } 7832276479Sdim if (!P) 7833276479Sdim return nullptr; // Not evolving from PHI 7834276479Sdim if (PHI && PHI != P) 7835276479Sdim return nullptr; // Evolving from multiple different PHIs. 7836226633Sdim PHI = P; 7837226633Sdim } 7838193323Sed // This is a expression evolving from a constant PHI! 7839193323Sed return PHI; 7840193323Sed} 7841193323Sed 7842226633Sdim/// getConstantEvolvingPHI - Given an LLVM value and a loop, return a PHI node 7843226633Sdim/// in the loop that V is derived from. We allow arbitrary operations along the 7844226633Sdim/// way, but the operands of an operation must either be constants or a value 7845226633Sdim/// derived from a constant PHI. If this expression does not fit with these 7846226633Sdim/// constraints, return null. 7847226633Sdimstatic PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) { 7848226633Sdim Instruction *I = dyn_cast<Instruction>(V); 7849276479Sdim if (!I || !canConstantEvolve(I, L)) return nullptr; 7850226633Sdim 7851296417Sdim if (PHINode *PN = dyn_cast<PHINode>(I)) 7852226633Sdim return PN; 7853226633Sdim 7854226633Sdim // Record non-constant instructions contained by the loop. 7855226633Sdim DenseMap<Instruction *, PHINode *> PHIMap; 7856321369Sdim return getConstantEvolvingPHIOperands(I, L, PHIMap, 0); 7857226633Sdim} 7858226633Sdim 7859193323Sed/// EvaluateExpression - Given an expression that passes the 7860193323Sed/// getConstantEvolvingPHI predicate, evaluate its value assuming the PHI node 7861193323Sed/// in the loop has the value PHIVal. If we can't fold this expression for some 7862193323Sed/// reason, return null. 7863226633Sdimstatic Constant *EvaluateExpression(Value *V, const Loop *L, 7864226633Sdim DenseMap<Instruction *, Constant *> &Vals, 7865288943Sdim const DataLayout &DL, 7866234353Sdim const TargetLibraryInfo *TLI) { 7867226633Sdim // Convenient constant check, but redundant for recursive calls. 7868193323Sed if (Constant *C = dyn_cast<Constant>(V)) return C; 7869234353Sdim Instruction *I = dyn_cast<Instruction>(V); 7870276479Sdim if (!I) return nullptr; 7871226633Sdim 7872226633Sdim if (Constant *C = Vals.lookup(I)) return C; 7873193323Sed 7874234353Sdim // An instruction inside the loop depends on a value outside the loop that we 7875234353Sdim // weren't given a mapping for, or a value such as a call inside the loop. 7876276479Sdim if (!canConstantEvolve(I, L)) return nullptr; 7877226633Sdim 7878234353Sdim // An unmapped PHI can be due to a branch or another loop inside this loop, 7879234353Sdim // or due to this not being the initial iteration through a loop where we 7880234353Sdim // couldn't compute the evolution of this particular PHI last time. 7881276479Sdim if (isa<PHINode>(I)) return nullptr; 7882234353Sdim 7883210299Sed std::vector<Constant*> Operands(I->getNumOperands()); 7884193323Sed 7885193323Sed for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { 7886226633Sdim Instruction *Operand = dyn_cast<Instruction>(I->getOperand(i)); 7887226633Sdim if (!Operand) { 7888226633Sdim Operands[i] = dyn_cast<Constant>(I->getOperand(i)); 7889276479Sdim if (!Operands[i]) return nullptr; 7890226633Sdim continue; 7891226633Sdim } 7892276479Sdim Constant *C = EvaluateExpression(Operand, L, Vals, DL, TLI); 7893226633Sdim Vals[Operand] = C; 7894276479Sdim if (!C) return nullptr; 7895226633Sdim Operands[i] = C; 7896193323Sed } 7897193323Sed 7898234353Sdim if (CmpInst *CI = dyn_cast<CmpInst>(I)) 7899199481Srdivacky return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0], 7900276479Sdim Operands[1], DL, TLI); 7901234353Sdim if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 7902234353Sdim if (!LI->isVolatile()) 7903309124Sdim return ConstantFoldLoadFromConstPtr(Operands[0], LI->getType(), DL); 7904234353Sdim } 7905309124Sdim return ConstantFoldInstOperands(I, Operands, DL, TLI); 7906193323Sed} 7907193323Sed 7908296417Sdim 7909296417Sdim// If every incoming value to PN except the one for BB is a specific Constant, 7910296417Sdim// return that, else return nullptr. 7911296417Sdimstatic Constant *getOtherIncomingValue(PHINode *PN, BasicBlock *BB) { 7912296417Sdim Constant *IncomingVal = nullptr; 7913296417Sdim 7914296417Sdim for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 7915296417Sdim if (PN->getIncomingBlock(i) == BB) 7916296417Sdim continue; 7917296417Sdim 7918296417Sdim auto *CurrentVal = dyn_cast<Constant>(PN->getIncomingValue(i)); 7919296417Sdim if (!CurrentVal) 7920296417Sdim return nullptr; 7921296417Sdim 7922296417Sdim if (IncomingVal != CurrentVal) { 7923296417Sdim if (IncomingVal) 7924296417Sdim return nullptr; 7925296417Sdim IncomingVal = CurrentVal; 7926296417Sdim } 7927296417Sdim } 7928296417Sdim 7929296417Sdim return IncomingVal; 7930296417Sdim} 7931296417Sdim 7932193323Sed/// getConstantEvolutionLoopExitValue - If we know that the specified Phi is 7933193323Sed/// in the header of its containing loop, we know the loop executes a 7934193323Sed/// constant number of times, and the PHI node is just a recurrence 7935193323Sed/// involving constants, fold it. 7936195098SedConstant * 7937195098SedScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, 7938201360Srdivacky const APInt &BEs, 7939195098Sed const Loop *L) { 7940296417Sdim auto I = ConstantEvolutionLoopExitValue.find(PN); 7941193323Sed if (I != ConstantEvolutionLoopExitValue.end()) 7942193323Sed return I->second; 7943193323Sed 7944207618Srdivacky if (BEs.ugt(MaxBruteForceIterations)) 7945276479Sdim return ConstantEvolutionLoopExitValue[PN] = nullptr; // Not going to evaluate it. 7946193323Sed 7947193323Sed Constant *&RetVal = ConstantEvolutionLoopExitValue[PN]; 7948193323Sed 7949226633Sdim DenseMap<Instruction *, Constant *> CurrentIterVals; 7950234353Sdim BasicBlock *Header = L->getHeader(); 7951234353Sdim assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!"); 7952226633Sdim 7953296417Sdim BasicBlock *Latch = L->getLoopLatch(); 7954296417Sdim if (!Latch) 7955296417Sdim return nullptr; 7956296417Sdim 7957327952Sdim for (PHINode &PHI : Header->phis()) { 7958327952Sdim if (auto *StartCST = getOtherIncomingValue(&PHI, Latch)) 7959327952Sdim CurrentIterVals[&PHI] = StartCST; 7960234353Sdim } 7961234353Sdim if (!CurrentIterVals.count(PN)) 7962276479Sdim return RetVal = nullptr; 7963193323Sed 7964296417Sdim Value *BEValue = PN->getIncomingValueForBlock(Latch); 7965193323Sed 7966193323Sed // Execute the loop symbolically to determine the exit value. 7967327952Sdim assert(BEs.getActiveBits() < CHAR_BIT * sizeof(unsigned) && 7968327952Sdim "BEs is <= MaxBruteForceIterations which is an 'unsigned'!"); 7969193323Sed 7970193323Sed unsigned NumIterations = BEs.getZExtValue(); // must be in range 7971193323Sed unsigned IterationNum = 0; 7972296417Sdim const DataLayout &DL = getDataLayout(); 7973226633Sdim for (; ; ++IterationNum) { 7974193323Sed if (IterationNum == NumIterations) 7975226633Sdim return RetVal = CurrentIterVals[PN]; // Got exit value! 7976193323Sed 7977234353Sdim // Compute the value of the PHIs for the next iteration. 7978226633Sdim // EvaluateExpression adds non-phi values to the CurrentIterVals map. 7979234353Sdim DenseMap<Instruction *, Constant *> NextIterVals; 7980288943Sdim Constant *NextPHI = 7981296417Sdim EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI); 7982276479Sdim if (!NextPHI) 7983276479Sdim return nullptr; // Couldn't evaluate! 7984226633Sdim NextIterVals[PN] = NextPHI; 7985234353Sdim 7986234353Sdim bool StoppedEvolving = NextPHI == CurrentIterVals[PN]; 7987234353Sdim 7988234353Sdim // Also evaluate the other PHI nodes. However, we don't get to stop if we 7989234353Sdim // cease to be able to evaluate one of them or if they stop evolving, 7990234353Sdim // because that doesn't necessarily prevent us from computing PN. 7991234353Sdim SmallVector<std::pair<PHINode *, Constant *>, 8> PHIsToCompute; 7992296417Sdim for (const auto &I : CurrentIterVals) { 7993296417Sdim PHINode *PHI = dyn_cast<PHINode>(I.first); 7994234353Sdim if (!PHI || PHI == PN || PHI->getParent() != Header) continue; 7995296417Sdim PHIsToCompute.emplace_back(PHI, I.second); 7996234353Sdim } 7997234353Sdim // We use two distinct loops because EvaluateExpression may invalidate any 7998234353Sdim // iterators into CurrentIterVals. 7999296417Sdim for (const auto &I : PHIsToCompute) { 8000296417Sdim PHINode *PHI = I.first; 8001234353Sdim Constant *&NextPHI = NextIterVals[PHI]; 8002234353Sdim if (!NextPHI) { // Not already computed. 8003296417Sdim Value *BEValue = PHI->getIncomingValueForBlock(Latch); 8004296417Sdim NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI); 8005234353Sdim } 8006296417Sdim if (NextPHI != I.second) 8007234353Sdim StoppedEvolving = false; 8008234353Sdim } 8009234353Sdim 8010234353Sdim // If all entries in CurrentIterVals == NextIterVals then we can stop 8011234353Sdim // iterating, the loop can't continue to change. 8012234353Sdim if (StoppedEvolving) 8013234353Sdim return RetVal = CurrentIterVals[PN]; 8014234353Sdim 8015226633Sdim CurrentIterVals.swap(NextIterVals); 8016193323Sed } 8017193323Sed} 8018193323Sed 8019296417Sdimconst SCEV *ScalarEvolution::computeExitCountExhaustively(const Loop *L, 8020234353Sdim Value *Cond, 8021234353Sdim bool ExitWhen) { 8022193323Sed PHINode *PN = getConstantEvolvingPHI(Cond, L); 8023276479Sdim if (!PN) return getCouldNotCompute(); 8024193323Sed 8025210299Sed // If the loop is canonicalized, the PHI will have exactly two entries. 8026210299Sed // That's the only form we support here. 8027210299Sed if (PN->getNumIncomingValues() != 2) return getCouldNotCompute(); 8028210299Sed 8029234353Sdim DenseMap<Instruction *, Constant *> CurrentIterVals; 8030234353Sdim BasicBlock *Header = L->getHeader(); 8031234353Sdim assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!"); 8032234353Sdim 8033296417Sdim BasicBlock *Latch = L->getLoopLatch(); 8034296417Sdim assert(Latch && "Should follow from NumIncomingValues == 2!"); 8035296417Sdim 8036327952Sdim for (PHINode &PHI : Header->phis()) { 8037327952Sdim if (auto *StartCST = getOtherIncomingValue(&PHI, Latch)) 8038327952Sdim CurrentIterVals[&PHI] = StartCST; 8039234353Sdim } 8040234353Sdim if (!CurrentIterVals.count(PN)) 8041234353Sdim return getCouldNotCompute(); 8042193323Sed 8043193323Sed // Okay, we find a PHI node that defines the trip count of this loop. Execute 8044193323Sed // the loop symbolically to determine when the condition gets a value of 8045193323Sed // "ExitWhen". 8046193323Sed unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis. 8047296417Sdim const DataLayout &DL = getDataLayout(); 8048234353Sdim for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){ 8049296417Sdim auto *CondVal = dyn_cast_or_null<ConstantInt>( 8050296417Sdim EvaluateExpression(Cond, L, CurrentIterVals, DL, &TLI)); 8051193323Sed 8052193323Sed // Couldn't symbolically evaluate. 8053195340Sed if (!CondVal) return getCouldNotCompute(); 8054193323Sed 8055193323Sed if (CondVal->getValue() == uint64_t(ExitWhen)) { 8056193323Sed ++NumBruteForceTripCountsComputed; 8057198090Srdivacky return getConstant(Type::getInt32Ty(getContext()), IterationNum); 8058193323Sed } 8059193323Sed 8060234353Sdim // Update all the PHI nodes for the next iteration. 8061234353Sdim DenseMap<Instruction *, Constant *> NextIterVals; 8062234353Sdim 8063234353Sdim // Create a list of which PHIs we need to compute. We want to do this before 8064234353Sdim // calling EvaluateExpression on them because that may invalidate iterators 8065234353Sdim // into CurrentIterVals. 8066234353Sdim SmallVector<PHINode *, 8> PHIsToCompute; 8067296417Sdim for (const auto &I : CurrentIterVals) { 8068296417Sdim PHINode *PHI = dyn_cast<PHINode>(I.first); 8069234353Sdim if (!PHI || PHI->getParent() != Header) continue; 8070234353Sdim PHIsToCompute.push_back(PHI); 8071234353Sdim } 8072296417Sdim for (PHINode *PHI : PHIsToCompute) { 8073234353Sdim Constant *&NextPHI = NextIterVals[PHI]; 8074234353Sdim if (NextPHI) continue; // Already computed! 8075234353Sdim 8076296417Sdim Value *BEValue = PHI->getIncomingValueForBlock(Latch); 8077296417Sdim NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI); 8078234353Sdim } 8079234353Sdim CurrentIterVals.swap(NextIterVals); 8080193323Sed } 8081193323Sed 8082193323Sed // Too many iterations were needed to evaluate. 8083195340Sed return getCouldNotCompute(); 8084193323Sed} 8085193323Sed 8086198090Srdivackyconst SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { 8087296417Sdim SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values = 8088296417Sdim ValuesAtScopes[V]; 8089198090Srdivacky // Check to see if we've folded this expression at this loop before. 8090296417Sdim for (auto &LS : Values) 8091296417Sdim if (LS.first == L) 8092296417Sdim return LS.second ? LS.second : V; 8093296417Sdim 8094296417Sdim Values.emplace_back(L, nullptr); 8095296417Sdim 8096198090Srdivacky // Otherwise compute it. 8097198090Srdivacky const SCEV *C = computeSCEVAtScope(V, L); 8098296417Sdim for (auto &LS : reverse(ValuesAtScopes[V])) 8099296417Sdim if (LS.first == L) { 8100296417Sdim LS.second = C; 8101261991Sdim break; 8102261991Sdim } 8103198090Srdivacky return C; 8104198090Srdivacky} 8105198090Srdivacky 8106234353Sdim/// This builds up a Constant using the ConstantExpr interface. That way, we 8107234353Sdim/// will return Constants for objects which aren't represented by a 8108234353Sdim/// SCEVConstant, because SCEVConstant is restricted to ConstantInt. 8109234353Sdim/// Returns NULL if the SCEV isn't representable as a Constant. 8110234353Sdimstatic Constant *BuildConstantFromSCEV(const SCEV *V) { 8111276479Sdim switch (static_cast<SCEVTypes>(V->getSCEVType())) { 8112234353Sdim case scCouldNotCompute: 8113234353Sdim case scAddRecExpr: 8114234353Sdim break; 8115234353Sdim case scConstant: 8116234353Sdim return cast<SCEVConstant>(V)->getValue(); 8117234353Sdim case scUnknown: 8118234353Sdim return dyn_cast<Constant>(cast<SCEVUnknown>(V)->getValue()); 8119234353Sdim case scSignExtend: { 8120234353Sdim const SCEVSignExtendExpr *SS = cast<SCEVSignExtendExpr>(V); 8121234353Sdim if (Constant *CastOp = BuildConstantFromSCEV(SS->getOperand())) 8122234353Sdim return ConstantExpr::getSExt(CastOp, SS->getType()); 8123234353Sdim break; 8124234353Sdim } 8125234353Sdim case scZeroExtend: { 8126234353Sdim const SCEVZeroExtendExpr *SZ = cast<SCEVZeroExtendExpr>(V); 8127234353Sdim if (Constant *CastOp = BuildConstantFromSCEV(SZ->getOperand())) 8128234353Sdim return ConstantExpr::getZExt(CastOp, SZ->getType()); 8129234353Sdim break; 8130234353Sdim } 8131234353Sdim case scTruncate: { 8132234353Sdim const SCEVTruncateExpr *ST = cast<SCEVTruncateExpr>(V); 8133234353Sdim if (Constant *CastOp = BuildConstantFromSCEV(ST->getOperand())) 8134234353Sdim return ConstantExpr::getTrunc(CastOp, ST->getType()); 8135234353Sdim break; 8136234353Sdim } 8137234353Sdim case scAddExpr: { 8138234353Sdim const SCEVAddExpr *SA = cast<SCEVAddExpr>(V); 8139234353Sdim if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) { 8140261991Sdim if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) { 8141261991Sdim unsigned AS = PTy->getAddressSpace(); 8142261991Sdim Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS); 8143261991Sdim C = ConstantExpr::getBitCast(C, DestPtrTy); 8144261991Sdim } 8145234353Sdim for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) { 8146234353Sdim Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i)); 8147276479Sdim if (!C2) return nullptr; 8148234353Sdim 8149234353Sdim // First pointer! 8150234353Sdim if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) { 8151261991Sdim unsigned AS = C2->getType()->getPointerAddressSpace(); 8152234353Sdim std::swap(C, C2); 8153261991Sdim Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS); 8154234353Sdim // The offsets have been converted to bytes. We can add bytes to an 8155234353Sdim // i8* by GEP with the byte count in the first index. 8156261991Sdim C = ConstantExpr::getBitCast(C, DestPtrTy); 8157234353Sdim } 8158234353Sdim 8159234353Sdim // Don't bother trying to sum two pointers. We probably can't 8160234353Sdim // statically compute a load that results from it anyway. 8161234353Sdim if (C2->getType()->isPointerTy()) 8162276479Sdim return nullptr; 8163234353Sdim 8164261991Sdim if (PointerType *PTy = dyn_cast<PointerType>(C->getType())) { 8165261991Sdim if (PTy->getElementType()->isStructTy()) 8166234353Sdim C2 = ConstantExpr::getIntegerCast( 8167234353Sdim C2, Type::getInt32Ty(C->getContext()), true); 8168288943Sdim C = ConstantExpr::getGetElementPtr(PTy->getElementType(), C, C2); 8169234353Sdim } else 8170234353Sdim C = ConstantExpr::getAdd(C, C2); 8171234353Sdim } 8172234353Sdim return C; 8173234353Sdim } 8174234353Sdim break; 8175234353Sdim } 8176234353Sdim case scMulExpr: { 8177234353Sdim const SCEVMulExpr *SM = cast<SCEVMulExpr>(V); 8178234353Sdim if (Constant *C = BuildConstantFromSCEV(SM->getOperand(0))) { 8179234353Sdim // Don't bother with pointers at all. 8180276479Sdim if (C->getType()->isPointerTy()) return nullptr; 8181234353Sdim for (unsigned i = 1, e = SM->getNumOperands(); i != e; ++i) { 8182234353Sdim Constant *C2 = BuildConstantFromSCEV(SM->getOperand(i)); 8183276479Sdim if (!C2 || C2->getType()->isPointerTy()) return nullptr; 8184234353Sdim C = ConstantExpr::getMul(C, C2); 8185234353Sdim } 8186234353Sdim return C; 8187234353Sdim } 8188234353Sdim break; 8189234353Sdim } 8190234353Sdim case scUDivExpr: { 8191234353Sdim const SCEVUDivExpr *SU = cast<SCEVUDivExpr>(V); 8192234353Sdim if (Constant *LHS = BuildConstantFromSCEV(SU->getLHS())) 8193234353Sdim if (Constant *RHS = BuildConstantFromSCEV(SU->getRHS())) 8194234353Sdim if (LHS->getType() == RHS->getType()) 8195234353Sdim return ConstantExpr::getUDiv(LHS, RHS); 8196234353Sdim break; 8197234353Sdim } 8198276479Sdim case scSMaxExpr: 8199276479Sdim case scUMaxExpr: 8200353358Sdim case scSMinExpr: 8201353358Sdim case scUMinExpr: 8202353358Sdim break; // TODO: smax, umax, smin, umax. 8203234353Sdim } 8204276479Sdim return nullptr; 8205234353Sdim} 8206234353Sdim 8207198090Srdivackyconst SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { 8208193323Sed if (isa<SCEVConstant>(V)) return V; 8209193323Sed 8210193323Sed // If this instruction is evolved from a constant-evolving PHI, compute the 8211193323Sed // exit value from the loop without using SCEVs. 8212193323Sed if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) { 8213193323Sed if (Instruction *I = dyn_cast<Instruction>(SU->getValue())) { 8214353358Sdim if (PHINode *PN = dyn_cast<PHINode>(I)) { 8215353358Sdim const Loop *LI = this->LI[I->getParent()]; 8216353358Sdim // Looking for loop exit value. 8217353358Sdim if (LI && LI->getParentLoop() == L && 8218353358Sdim PN->getParent() == LI->getHeader()) { 8219353358Sdim // Okay, there is no closed form solution for the PHI node. Check 8220353358Sdim // to see if the loop that contains it has a known backedge-taken 8221353358Sdim // count. If so, we may be able to force computation of the exit 8222353358Sdim // value. 8223353358Sdim const SCEV *BackedgeTakenCount = getBackedgeTakenCount(LI); 8224353358Sdim // This trivial case can show up in some degenerate cases where 8225353358Sdim // the incoming IR has not yet been fully simplified. 8226353358Sdim if (BackedgeTakenCount->isZero()) { 8227353358Sdim Value *InitValue = nullptr; 8228353358Sdim bool MultipleInitValues = false; 8229353358Sdim for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) { 8230353358Sdim if (!LI->contains(PN->getIncomingBlock(i))) { 8231353358Sdim if (!InitValue) 8232353358Sdim InitValue = PN->getIncomingValue(i); 8233353358Sdim else if (InitValue != PN->getIncomingValue(i)) { 8234353358Sdim MultipleInitValues = true; 8235353358Sdim break; 8236322740Sdim } 8237322740Sdim } 8238193323Sed } 8239353358Sdim if (!MultipleInitValues && InitValue) 8240353358Sdim return getSCEV(InitValue); 8241193323Sed } 8242353358Sdim // Do we have a loop invariant value flowing around the backedge 8243353358Sdim // for a loop which must execute the backedge? 8244353358Sdim if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) && 8245353358Sdim isKnownPositive(BackedgeTakenCount) && 8246353358Sdim PN->getNumIncomingValues() == 2) { 8247353358Sdim unsigned InLoopPred = LI->contains(PN->getIncomingBlock(0)) ? 0 : 1; 8248353358Sdim const SCEV *OnBackedge = getSCEV(PN->getIncomingValue(InLoopPred)); 8249353358Sdim if (IsAvailableOnEntry(LI, DT, OnBackedge, PN->getParent())) 8250353358Sdim return OnBackedge; 8251353358Sdim } 8252353358Sdim if (auto *BTCC = dyn_cast<SCEVConstant>(BackedgeTakenCount)) { 8253353358Sdim // Okay, we know how many times the containing loop executes. If 8254353358Sdim // this is a constant evolving PHI node, get the final value at 8255353358Sdim // the specified iteration number. 8256353358Sdim Constant *RV = 8257353358Sdim getConstantEvolutionLoopExitValue(PN, BTCC->getAPInt(), LI); 8258353358Sdim if (RV) return getSCEV(RV); 8259353358Sdim } 8260353358Sdim } 8261193323Sed 8262353358Sdim // If there is a single-input Phi, evaluate it at our scope. If we can 8263353358Sdim // prove that this replacement does not break LCSSA form, use new value. 8264353358Sdim if (PN->getNumOperands() == 1) { 8265353358Sdim const SCEV *Input = getSCEV(PN->getOperand(0)); 8266353358Sdim const SCEV *InputAtScope = getSCEVAtScope(Input, L); 8267353358Sdim // TODO: We can generalize it using LI.replacementPreservesLCSSAForm, 8268353358Sdim // for the simplest case just support constants. 8269353358Sdim if (isa<SCEVConstant>(InputAtScope)) return InputAtScope; 8270353358Sdim } 8271353358Sdim } 8272353358Sdim 8273193323Sed // Okay, this is an expression that we cannot symbolically evaluate 8274193323Sed // into a SCEV. Check to see if it's possible to symbolically evaluate 8275193323Sed // the arguments into constants, and if so, try to constant propagate the 8276193323Sed // result. This is particularly useful for computing loop exit values. 8277193323Sed if (CanConstantFold(I)) { 8278210299Sed SmallVector<Constant *, 4> Operands; 8279210299Sed bool MadeImprovement = false; 8280296417Sdim for (Value *Op : I->operands()) { 8281193323Sed if (Constant *C = dyn_cast<Constant>(Op)) { 8282193323Sed Operands.push_back(C); 8283210299Sed continue; 8284210299Sed } 8285193323Sed 8286210299Sed // If any of the operands is non-constant and if they are 8287210299Sed // non-integer and non-pointer, don't even try to analyze them 8288210299Sed // with scev techniques. 8289210299Sed if (!isSCEVable(Op->getType())) 8290210299Sed return V; 8291210299Sed 8292210299Sed const SCEV *OrigV = getSCEV(Op); 8293210299Sed const SCEV *OpV = getSCEVAtScope(OrigV, L); 8294210299Sed MadeImprovement |= OrigV != OpV; 8295210299Sed 8296234353Sdim Constant *C = BuildConstantFromSCEV(OpV); 8297210299Sed if (!C) return V; 8298210299Sed if (C->getType() != Op->getType()) 8299210299Sed C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, 8300210299Sed Op->getType(), 8301210299Sed false), 8302210299Sed C, Op->getType()); 8303210299Sed Operands.push_back(C); 8304193323Sed } 8305195098Sed 8306210299Sed // Check to see if getSCEVAtScope actually made an improvement. 8307210299Sed if (MadeImprovement) { 8308276479Sdim Constant *C = nullptr; 8309296417Sdim const DataLayout &DL = getDataLayout(); 8310210299Sed if (const CmpInst *CI = dyn_cast<CmpInst>(I)) 8311288943Sdim C = ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0], 8312296417Sdim Operands[1], DL, &TLI); 8313234353Sdim else if (const LoadInst *LI = dyn_cast<LoadInst>(I)) { 8314234353Sdim if (!LI->isVolatile()) 8315309124Sdim C = ConstantFoldLoadFromConstPtr(Operands[0], LI->getType(), DL); 8316234353Sdim } else 8317309124Sdim C = ConstantFoldInstOperands(I, Operands, DL, &TLI); 8318210299Sed if (!C) return V; 8319204642Srdivacky return getSCEV(C); 8320210299Sed } 8321193323Sed } 8322193323Sed } 8323193323Sed 8324193323Sed // This is some other type of SCEVUnknown, just return it. 8325193323Sed return V; 8326193323Sed } 8327193323Sed 8328193323Sed if (const SCEVCommutativeExpr *Comm = dyn_cast<SCEVCommutativeExpr>(V)) { 8329193323Sed // Avoid performing the look-up in the common case where the specified 8330193323Sed // expression has no loop-variant portions. 8331193323Sed for (unsigned i = 0, e = Comm->getNumOperands(); i != e; ++i) { 8332198090Srdivacky const SCEV *OpAtScope = getSCEVAtScope(Comm->getOperand(i), L); 8333193323Sed if (OpAtScope != Comm->getOperand(i)) { 8334193323Sed // Okay, at least one of these operands is loop variant but might be 8335193323Sed // foldable. Build a new instance of the folded commutative expression. 8336195098Sed SmallVector<const SCEV *, 8> NewOps(Comm->op_begin(), 8337195098Sed Comm->op_begin()+i); 8338193323Sed NewOps.push_back(OpAtScope); 8339193323Sed 8340193323Sed for (++i; i != e; ++i) { 8341193323Sed OpAtScope = getSCEVAtScope(Comm->getOperand(i), L); 8342193323Sed NewOps.push_back(OpAtScope); 8343193323Sed } 8344193323Sed if (isa<SCEVAddExpr>(Comm)) 8345353358Sdim return getAddExpr(NewOps, Comm->getNoWrapFlags()); 8346193323Sed if (isa<SCEVMulExpr>(Comm)) 8347353358Sdim return getMulExpr(NewOps, Comm->getNoWrapFlags()); 8348353358Sdim if (isa<SCEVMinMaxExpr>(Comm)) 8349353358Sdim return getMinMaxExpr(Comm->getSCEVType(), NewOps); 8350198090Srdivacky llvm_unreachable("Unknown commutative SCEV type!"); 8351193323Sed } 8352193323Sed } 8353193323Sed // If we got here, all operands are loop invariant. 8354193323Sed return Comm; 8355193323Sed } 8356193323Sed 8357193323Sed if (const SCEVUDivExpr *Div = dyn_cast<SCEVUDivExpr>(V)) { 8358198090Srdivacky const SCEV *LHS = getSCEVAtScope(Div->getLHS(), L); 8359198090Srdivacky const SCEV *RHS = getSCEVAtScope(Div->getRHS(), L); 8360193323Sed if (LHS == Div->getLHS() && RHS == Div->getRHS()) 8361193323Sed return Div; // must be loop invariant 8362193323Sed return getUDivExpr(LHS, RHS); 8363193323Sed } 8364193323Sed 8365193323Sed // If this is a loop recurrence for a loop that does not contain L, then we 8366193323Sed // are dealing with the final value computed by the loop. 8367193323Sed if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(V)) { 8368210299Sed // First, attempt to evaluate each operand. 8369210299Sed // Avoid performing the look-up in the common case where the specified 8370210299Sed // expression has no loop-variant portions. 8371210299Sed for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) { 8372210299Sed const SCEV *OpAtScope = getSCEVAtScope(AddRec->getOperand(i), L); 8373210299Sed if (OpAtScope == AddRec->getOperand(i)) 8374210299Sed continue; 8375210299Sed 8376210299Sed // Okay, at least one of these operands is loop variant but might be 8377210299Sed // foldable. Build a new instance of the folded commutative expression. 8378210299Sed SmallVector<const SCEV *, 8> NewOps(AddRec->op_begin(), 8379210299Sed AddRec->op_begin()+i); 8380210299Sed NewOps.push_back(OpAtScope); 8381210299Sed for (++i; i != e; ++i) 8382210299Sed NewOps.push_back(getSCEVAtScope(AddRec->getOperand(i), L)); 8383210299Sed 8384221345Sdim const SCEV *FoldedRec = 8385221345Sdim getAddRecExpr(NewOps, AddRec->getLoop(), 8386221345Sdim AddRec->getNoWrapFlags(SCEV::FlagNW)); 8387221345Sdim AddRec = dyn_cast<SCEVAddRecExpr>(FoldedRec); 8388221345Sdim // The addrec may be folded to a nonrecurrence, for example, if the 8389221345Sdim // induction variable is multiplied by zero after constant folding. Go 8390221345Sdim // ahead and return the folded value. 8391221345Sdim if (!AddRec) 8392221345Sdim return FoldedRec; 8393210299Sed break; 8394210299Sed } 8395210299Sed 8396210299Sed // If the scope is outside the addrec's loop, evaluate it by using the 8397210299Sed // loop exit value of the addrec. 8398210299Sed if (!AddRec->getLoop()->contains(L)) { 8399193323Sed // To evaluate this recurrence, we need to know how many times the AddRec 8400193323Sed // loop iterates. Compute this now. 8401198090Srdivacky const SCEV *BackedgeTakenCount = getBackedgeTakenCount(AddRec->getLoop()); 8402195340Sed if (BackedgeTakenCount == getCouldNotCompute()) return AddRec; 8403193323Sed 8404193323Sed // Then, evaluate the AddRec. 8405193323Sed return AddRec->evaluateAtIteration(BackedgeTakenCount, *this); 8406193323Sed } 8407210299Sed 8408193323Sed return AddRec; 8409193323Sed } 8410193323Sed 8411193323Sed if (const SCEVZeroExtendExpr *Cast = dyn_cast<SCEVZeroExtendExpr>(V)) { 8412198090Srdivacky const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L); 8413193323Sed if (Op == Cast->getOperand()) 8414193323Sed return Cast; // must be loop invariant 8415193323Sed return getZeroExtendExpr(Op, Cast->getType()); 8416193323Sed } 8417193323Sed 8418193323Sed if (const SCEVSignExtendExpr *Cast = dyn_cast<SCEVSignExtendExpr>(V)) { 8419198090Srdivacky const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L); 8420193323Sed if (Op == Cast->getOperand()) 8421193323Sed return Cast; // must be loop invariant 8422193323Sed return getSignExtendExpr(Op, Cast->getType()); 8423193323Sed } 8424193323Sed 8425193323Sed if (const SCEVTruncateExpr *Cast = dyn_cast<SCEVTruncateExpr>(V)) { 8426198090Srdivacky const SCEV *Op = getSCEVAtScope(Cast->getOperand(), L); 8427193323Sed if (Op == Cast->getOperand()) 8428193323Sed return Cast; // must be loop invariant 8429193323Sed return getTruncateExpr(Op, Cast->getType()); 8430193323Sed } 8431193323Sed 8432198090Srdivacky llvm_unreachable("Unknown SCEV type!"); 8433193323Sed} 8434193323Sed 8435198090Srdivackyconst SCEV *ScalarEvolution::getSCEVAtScope(Value *V, const Loop *L) { 8436193323Sed return getSCEVAtScope(getSCEV(V), L); 8437193323Sed} 8438193323Sed 8439341825Sdimconst SCEV *ScalarEvolution::stripInjectiveFunctions(const SCEV *S) const { 8440341825Sdim if (const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(S)) 8441341825Sdim return stripInjectiveFunctions(ZExt->getOperand()); 8442341825Sdim if (const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(S)) 8443341825Sdim return stripInjectiveFunctions(SExt->getOperand()); 8444341825Sdim return S; 8445341825Sdim} 8446341825Sdim 8447309124Sdim/// Finds the minimum unsigned root of the following equation: 8448193323Sed/// 8449193323Sed/// A * X = B (mod N) 8450193323Sed/// 8451193323Sed/// where N = 2^BW and BW is the common bit width of A and B. The signedness of 8452193323Sed/// A and B isn't important. 8453193323Sed/// 8454193323Sed/// If the equation does not have a solution, SCEVCouldNotCompute is returned. 8455321369Sdimstatic const SCEV *SolveLinEquationWithOverflow(const APInt &A, const SCEV *B, 8456193323Sed ScalarEvolution &SE) { 8457193323Sed uint32_t BW = A.getBitWidth(); 8458321369Sdim assert(BW == SE.getTypeSizeInBits(B->getType())); 8459193323Sed assert(A != 0 && "A must be non-zero."); 8460193323Sed 8461193323Sed // 1. D = gcd(A, N) 8462193323Sed // 8463193323Sed // The gcd of A and N may have only one prime factor: 2. The number of 8464193323Sed // trailing zeros in A is its multiplicity 8465193323Sed uint32_t Mult2 = A.countTrailingZeros(); 8466193323Sed // D = 2^Mult2 8467193323Sed 8468193323Sed // 2. Check if B is divisible by D. 8469193323Sed // 8470193323Sed // B is divisible by D if and only if the multiplicity of prime factor 2 for B 8471193323Sed // is not less than multiplicity of this prime factor for D. 8472321369Sdim if (SE.GetMinTrailingZeros(B) < Mult2) 8473193323Sed return SE.getCouldNotCompute(); 8474193323Sed 8475193323Sed // 3. Compute I: the multiplicative inverse of (A / D) in arithmetic 8476193323Sed // modulo (N / D). 8477193323Sed // 8478314564Sdim // If D == 1, (N / D) == N == 2^BW, so we need one extra bit to represent 8479314564Sdim // (N / D) in general. The inverse itself always fits into BW bits, though, 8480314564Sdim // so we immediately truncate it. 8481193323Sed APInt AD = A.lshr(Mult2).zext(BW + 1); // AD = A / D 8482193323Sed APInt Mod(BW + 1, 0); 8483218893Sdim Mod.setBit(BW - Mult2); // Mod = N / D 8484314564Sdim APInt I = AD.multiplicativeInverse(Mod).trunc(BW); 8485193323Sed 8486193323Sed // 4. Compute the minimum unsigned root of the equation: 8487193323Sed // I * (B / D) mod (N / D) 8488314564Sdim // To simplify the computation, we factor out the divide by D: 8489314564Sdim // (I * B mod N) / D 8490321369Sdim const SCEV *D = SE.getConstant(APInt::getOneBitSet(BW, Mult2)); 8491321369Sdim return SE.getUDivExactExpr(SE.getMulExpr(B, SE.getConstant(I)), D); 8492193323Sed} 8493193323Sed 8494344779Sdim/// For a given quadratic addrec, generate coefficients of the corresponding 8495344779Sdim/// quadratic equation, multiplied by a common value to ensure that they are 8496344779Sdim/// integers. 8497344779Sdim/// The returned value is a tuple { A, B, C, M, BitWidth }, where 8498344779Sdim/// Ax^2 + Bx + C is the quadratic function, M is the value that A, B and C 8499344779Sdim/// were multiplied by, and BitWidth is the bit width of the original addrec 8500344779Sdim/// coefficients. 8501344779Sdim/// This function returns None if the addrec coefficients are not compile- 8502344779Sdim/// time constants. 8503344779Sdimstatic Optional<std::tuple<APInt, APInt, APInt, APInt, unsigned>> 8504344779SdimGetQuadraticEquation(const SCEVAddRecExpr *AddRec) { 8505193323Sed assert(AddRec->getNumOperands() == 3 && "This is not a quadratic chrec!"); 8506193323Sed const SCEVConstant *LC = dyn_cast<SCEVConstant>(AddRec->getOperand(0)); 8507193323Sed const SCEVConstant *MC = dyn_cast<SCEVConstant>(AddRec->getOperand(1)); 8508193323Sed const SCEVConstant *NC = dyn_cast<SCEVConstant>(AddRec->getOperand(2)); 8509344779Sdim LLVM_DEBUG(dbgs() << __func__ << ": analyzing quadratic addrec: " 8510344779Sdim << *AddRec << '\n'); 8511193323Sed 8512193323Sed // We currently can only solve this if the coefficients are constants. 8513344779Sdim if (!LC || !MC || !NC) { 8514344779Sdim LLVM_DEBUG(dbgs() << __func__ << ": coefficients are not constant\n"); 8515309124Sdim return None; 8516344779Sdim } 8517193323Sed 8518344779Sdim APInt L = LC->getAPInt(); 8519344779Sdim APInt M = MC->getAPInt(); 8520344779Sdim APInt N = NC->getAPInt(); 8521344779Sdim assert(!N.isNullValue() && "This is not a quadratic addrec"); 8522193323Sed 8523344779Sdim unsigned BitWidth = LC->getAPInt().getBitWidth(); 8524344779Sdim unsigned NewWidth = BitWidth + 1; 8525344779Sdim LLVM_DEBUG(dbgs() << __func__ << ": addrec coeff bw: " 8526344779Sdim << BitWidth << '\n'); 8527344779Sdim // The sign-extension (as opposed to a zero-extension) here matches the 8528344779Sdim // extension used in SolveQuadraticEquationWrap (with the same motivation). 8529344779Sdim N = N.sext(NewWidth); 8530344779Sdim M = M.sext(NewWidth); 8531344779Sdim L = L.sext(NewWidth); 8532193323Sed 8533344779Sdim // The increments are M, M+N, M+2N, ..., so the accumulated values are 8534344779Sdim // L+M, (L+M)+(M+N), (L+M)+(M+N)+(M+2N), ..., that is, 8535344779Sdim // L+M, L+2M+N, L+3M+3N, ... 8536344779Sdim // After n iterations the accumulated value Acc is L + nM + n(n-1)/2 N. 8537344779Sdim // 8538344779Sdim // The equation Acc = 0 is then 8539344779Sdim // L + nM + n(n-1)/2 N = 0, or 2L + 2M n + n(n-1) N = 0. 8540344779Sdim // In a quadratic form it becomes: 8541344779Sdim // N n^2 + (2M-N) n + 2L = 0. 8542193323Sed 8543344779Sdim APInt A = N; 8544344779Sdim APInt B = 2 * M - A; 8545344779Sdim APInt C = 2 * L; 8546344779Sdim APInt T = APInt(NewWidth, 2); 8547344779Sdim LLVM_DEBUG(dbgs() << __func__ << ": equation " << A << "x^2 + " << B 8548344779Sdim << "x + " << C << ", coeff bw: " << NewWidth 8549344779Sdim << ", multiplied by " << T << '\n'); 8550344779Sdim return std::make_tuple(A, B, C, T, BitWidth); 8551344779Sdim} 8552193323Sed 8553344779Sdim/// Helper function to compare optional APInts: 8554344779Sdim/// (a) if X and Y both exist, return min(X, Y), 8555344779Sdim/// (b) if neither X nor Y exist, return None, 8556344779Sdim/// (c) if exactly one of X and Y exists, return that value. 8557344779Sdimstatic Optional<APInt> MinOptional(Optional<APInt> X, Optional<APInt> Y) { 8558344779Sdim if (X.hasValue() && Y.hasValue()) { 8559344779Sdim unsigned W = std::max(X->getBitWidth(), Y->getBitWidth()); 8560344779Sdim APInt XW = X->sextOrSelf(W); 8561344779Sdim APInt YW = Y->sextOrSelf(W); 8562344779Sdim return XW.slt(YW) ? *X : *Y; 8563344779Sdim } 8564344779Sdim if (!X.hasValue() && !Y.hasValue()) 8565344779Sdim return None; 8566344779Sdim return X.hasValue() ? *X : *Y; 8567344779Sdim} 8568239462Sdim 8569344779Sdim/// Helper function to truncate an optional APInt to a given BitWidth. 8570344779Sdim/// When solving addrec-related equations, it is preferable to return a value 8571344779Sdim/// that has the same bit width as the original addrec's coefficients. If the 8572344779Sdim/// solution fits in the original bit width, truncate it (except for i1). 8573344779Sdim/// Returning a value of a different bit width may inhibit some optimizations. 8574344779Sdim/// 8575344779Sdim/// In general, a solution to a quadratic equation generated from an addrec 8576344779Sdim/// may require BW+1 bits, where BW is the bit width of the addrec's 8577344779Sdim/// coefficients. The reason is that the coefficients of the quadratic 8578344779Sdim/// equation are BW+1 bits wide (to avoid truncation when converting from 8579344779Sdim/// the addrec to the equation). 8580344779Sdimstatic Optional<APInt> TruncIfPossible(Optional<APInt> X, unsigned BitWidth) { 8581344779Sdim if (!X.hasValue()) 8582344779Sdim return None; 8583344779Sdim unsigned W = X->getBitWidth(); 8584344779Sdim if (BitWidth > 1 && BitWidth < W && X->isIntN(BitWidth)) 8585344779Sdim return X->trunc(BitWidth); 8586344779Sdim return X; 8587344779Sdim} 8588193323Sed 8589344779Sdim/// Let c(n) be the value of the quadratic chrec {L,+,M,+,N} after n 8590344779Sdim/// iterations. The values L, M, N are assumed to be signed, and they 8591344779Sdim/// should all have the same bit widths. 8592344779Sdim/// Find the least n >= 0 such that c(n) = 0 in the arithmetic modulo 2^BW, 8593344779Sdim/// where BW is the bit width of the addrec's coefficients. 8594344779Sdim/// If the calculated value is a BW-bit integer (for BW > 1), it will be 8595344779Sdim/// returned as such, otherwise the bit width of the returned value may 8596344779Sdim/// be greater than BW. 8597344779Sdim/// 8598344779Sdim/// This function returns None if 8599344779Sdim/// (a) the addrec coefficients are not constant, or 8600344779Sdim/// (b) SolveQuadraticEquationWrap was unable to find a solution. For cases 8601344779Sdim/// like x^2 = 5, no integer solutions exist, in other cases an integer 8602344779Sdim/// solution may exist, but SolveQuadraticEquationWrap may fail to find it. 8603344779Sdimstatic Optional<APInt> 8604344779SdimSolveQuadraticAddRecExact(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) { 8605344779Sdim APInt A, B, C, M; 8606344779Sdim unsigned BitWidth; 8607344779Sdim auto T = GetQuadraticEquation(AddRec); 8608344779Sdim if (!T.hasValue()) 8609321369Sdim return None; 8610193323Sed 8611344779Sdim std::tie(A, B, C, M, BitWidth) = *T; 8612344779Sdim LLVM_DEBUG(dbgs() << __func__ << ": solving for unsigned overflow\n"); 8613344779Sdim Optional<APInt> X = APIntOps::SolveQuadraticEquationWrap(A, B, C, BitWidth+1); 8614344779Sdim if (!X.hasValue()) 8615344779Sdim return None; 8616193323Sed 8617344779Sdim ConstantInt *CX = ConstantInt::get(SE.getContext(), *X); 8618344779Sdim ConstantInt *V = EvaluateConstantChrecAtConstant(AddRec, CX, SE); 8619344779Sdim if (!V->isZero()) 8620321369Sdim return None; 8621198090Srdivacky 8622344779Sdim return TruncIfPossible(X, BitWidth); 8623344779Sdim} 8624321369Sdim 8625344779Sdim/// Let c(n) be the value of the quadratic chrec {0,+,M,+,N} after n 8626344779Sdim/// iterations. The values M, N are assumed to be signed, and they 8627344779Sdim/// should all have the same bit widths. 8628344779Sdim/// Find the least n such that c(n) does not belong to the given range, 8629344779Sdim/// while c(n-1) does. 8630344779Sdim/// 8631344779Sdim/// This function returns None if 8632344779Sdim/// (a) the addrec coefficients are not constant, or 8633344779Sdim/// (b) SolveQuadraticEquationWrap was unable to find a solution for the 8634344779Sdim/// bounds of the range. 8635344779Sdimstatic Optional<APInt> 8636344779SdimSolveQuadraticAddRecRange(const SCEVAddRecExpr *AddRec, 8637344779Sdim const ConstantRange &Range, ScalarEvolution &SE) { 8638344779Sdim assert(AddRec->getOperand(0)->isZero() && 8639344779Sdim "Starting value of addrec should be 0"); 8640344779Sdim LLVM_DEBUG(dbgs() << __func__ << ": solving boundary crossing for range " 8641344779Sdim << Range << ", addrec " << *AddRec << '\n'); 8642344779Sdim // This case is handled in getNumIterationsInRange. Here we can assume that 8643344779Sdim // we start in the range. 8644344779Sdim assert(Range.contains(APInt(SE.getTypeSizeInBits(AddRec->getType()), 0)) && 8645344779Sdim "Addrec's initial value should be in range"); 8646321369Sdim 8647344779Sdim APInt A, B, C, M; 8648344779Sdim unsigned BitWidth; 8649344779Sdim auto T = GetQuadraticEquation(AddRec); 8650344779Sdim if (!T.hasValue()) 8651344779Sdim return None; 8652344779Sdim 8653344779Sdim // Be careful about the return value: there can be two reasons for not 8654344779Sdim // returning an actual number. First, if no solutions to the equations 8655344779Sdim // were found, and second, if the solutions don't leave the given range. 8656344779Sdim // The first case means that the actual solution is "unknown", the second 8657344779Sdim // means that it's known, but not valid. If the solution is unknown, we 8658344779Sdim // cannot make any conclusions. 8659344779Sdim // Return a pair: the optional solution and a flag indicating if the 8660344779Sdim // solution was found. 8661344779Sdim auto SolveForBoundary = [&](APInt Bound) -> std::pair<Optional<APInt>,bool> { 8662344779Sdim // Solve for signed overflow and unsigned overflow, pick the lower 8663344779Sdim // solution. 8664344779Sdim LLVM_DEBUG(dbgs() << "SolveQuadraticAddRecRange: checking boundary " 8665344779Sdim << Bound << " (before multiplying by " << M << ")\n"); 8666344779Sdim Bound *= M; // The quadratic equation multiplier. 8667344779Sdim 8668344779Sdim Optional<APInt> SO = None; 8669344779Sdim if (BitWidth > 1) { 8670344779Sdim LLVM_DEBUG(dbgs() << "SolveQuadraticAddRecRange: solving for " 8671344779Sdim "signed overflow\n"); 8672344779Sdim SO = APIntOps::SolveQuadraticEquationWrap(A, B, -Bound, BitWidth); 8673344779Sdim } 8674344779Sdim LLVM_DEBUG(dbgs() << "SolveQuadraticAddRecRange: solving for " 8675344779Sdim "unsigned overflow\n"); 8676344779Sdim Optional<APInt> UO = APIntOps::SolveQuadraticEquationWrap(A, B, -Bound, 8677344779Sdim BitWidth+1); 8678344779Sdim 8679344779Sdim auto LeavesRange = [&] (const APInt &X) { 8680344779Sdim ConstantInt *C0 = ConstantInt::get(SE.getContext(), X); 8681344779Sdim ConstantInt *V0 = EvaluateConstantChrecAtConstant(AddRec, C0, SE); 8682344779Sdim if (Range.contains(V0->getValue())) 8683344779Sdim return false; 8684344779Sdim // X should be at least 1, so X-1 is non-negative. 8685344779Sdim ConstantInt *C1 = ConstantInt::get(SE.getContext(), X-1); 8686344779Sdim ConstantInt *V1 = EvaluateConstantChrecAtConstant(AddRec, C1, SE); 8687344779Sdim if (Range.contains(V1->getValue())) 8688344779Sdim return true; 8689344779Sdim return false; 8690344779Sdim }; 8691344779Sdim 8692344779Sdim // If SolveQuadraticEquationWrap returns None, it means that there can 8693344779Sdim // be a solution, but the function failed to find it. We cannot treat it 8694344779Sdim // as "no solution". 8695344779Sdim if (!SO.hasValue() || !UO.hasValue()) 8696344779Sdim return { None, false }; 8697344779Sdim 8698344779Sdim // Check the smaller value first to see if it leaves the range. 8699344779Sdim // At this point, both SO and UO must have values. 8700344779Sdim Optional<APInt> Min = MinOptional(SO, UO); 8701344779Sdim if (LeavesRange(*Min)) 8702344779Sdim return { Min, true }; 8703344779Sdim Optional<APInt> Max = Min == SO ? UO : SO; 8704344779Sdim if (LeavesRange(*Max)) 8705344779Sdim return { Max, true }; 8706344779Sdim 8707344779Sdim // Solutions were found, but were eliminated, hence the "true". 8708344779Sdim return { None, true }; 8709344779Sdim }; 8710344779Sdim 8711344779Sdim std::tie(A, B, C, M, BitWidth) = *T; 8712344779Sdim // Lower bound is inclusive, subtract 1 to represent the exiting value. 8713344779Sdim APInt Lower = Range.getLower().sextOrSelf(A.getBitWidth()) - 1; 8714344779Sdim APInt Upper = Range.getUpper().sextOrSelf(A.getBitWidth()); 8715344779Sdim auto SL = SolveForBoundary(Lower); 8716344779Sdim auto SU = SolveForBoundary(Upper); 8717344779Sdim // If any of the solutions was unknown, no meaninigful conclusions can 8718344779Sdim // be made. 8719344779Sdim if (!SL.second || !SU.second) 8720344779Sdim return None; 8721344779Sdim 8722344779Sdim // Claim: The correct solution is not some value between Min and Max. 8723344779Sdim // 8724344779Sdim // Justification: Assuming that Min and Max are different values, one of 8725344779Sdim // them is when the first signed overflow happens, the other is when the 8726344779Sdim // first unsigned overflow happens. Crossing the range boundary is only 8727344779Sdim // possible via an overflow (treating 0 as a special case of it, modeling 8728344779Sdim // an overflow as crossing k*2^W for some k). 8729344779Sdim // 8730344779Sdim // The interesting case here is when Min was eliminated as an invalid 8731344779Sdim // solution, but Max was not. The argument is that if there was another 8732344779Sdim // overflow between Min and Max, it would also have been eliminated if 8733344779Sdim // it was considered. 8734344779Sdim // 8735344779Sdim // For a given boundary, it is possible to have two overflows of the same 8736344779Sdim // type (signed/unsigned) without having the other type in between: this 8737344779Sdim // can happen when the vertex of the parabola is between the iterations 8738344779Sdim // corresponding to the overflows. This is only possible when the two 8739344779Sdim // overflows cross k*2^W for the same k. In such case, if the second one 8740344779Sdim // left the range (and was the first one to do so), the first overflow 8741344779Sdim // would have to enter the range, which would mean that either we had left 8742344779Sdim // the range before or that we started outside of it. Both of these cases 8743344779Sdim // are contradictions. 8744344779Sdim // 8745344779Sdim // Claim: In the case where SolveForBoundary returns None, the correct 8746344779Sdim // solution is not some value between the Max for this boundary and the 8747344779Sdim // Min of the other boundary. 8748344779Sdim // 8749344779Sdim // Justification: Assume that we had such Max_A and Min_B corresponding 8750344779Sdim // to range boundaries A and B and such that Max_A < Min_B. If there was 8751344779Sdim // a solution between Max_A and Min_B, it would have to be caused by an 8752344779Sdim // overflow corresponding to either A or B. It cannot correspond to B, 8753344779Sdim // since Min_B is the first occurrence of such an overflow. If it 8754344779Sdim // corresponded to A, it would have to be either a signed or an unsigned 8755344779Sdim // overflow that is larger than both eliminated overflows for A. But 8756344779Sdim // between the eliminated overflows and this overflow, the values would 8757344779Sdim // cover the entire value space, thus crossing the other boundary, which 8758344779Sdim // is a contradiction. 8759344779Sdim 8760344779Sdim return TruncIfPossible(MinOptional(SL.first, SU.first), BitWidth); 8761193323Sed} 8762193323Sed 8763226633SdimScalarEvolution::ExitLimit 8764309124SdimScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit, 8765309124Sdim bool AllowPredicates) { 8766309124Sdim 8767309124Sdim // This is only used for loops with a "x != y" exit test. The exit condition 8768309124Sdim // is now expressed as a single expression, V = x-y. So the exit test is 8769309124Sdim // effectively V != 0. We know and take advantage of the fact that this 8770309124Sdim // expression only being used in a comparison by zero context. 8771309124Sdim 8772314564Sdim SmallPtrSet<const SCEVPredicate *, 4> Predicates; 8773193323Sed // If the value is a constant 8774193323Sed if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) { 8775193323Sed // If the value is already zero, the branch will execute zero times. 8776193323Sed if (C->getValue()->isZero()) return C; 8777195340Sed return getCouldNotCompute(); // Otherwise it will loop infinitely. 8778193323Sed } 8779193323Sed 8780341825Sdim const SCEVAddRecExpr *AddRec = 8781341825Sdim dyn_cast<SCEVAddRecExpr>(stripInjectiveFunctions(V)); 8782341825Sdim 8783309124Sdim if (!AddRec && AllowPredicates) 8784309124Sdim // Try to make this an AddRec using runtime tests, in the first X 8785309124Sdim // iterations of this loop, where X is the SCEV expression found by the 8786309124Sdim // algorithm below. 8787314564Sdim AddRec = convertSCEVToAddRecWithPredicates(V, L, Predicates); 8788309124Sdim 8789193323Sed if (!AddRec || AddRec->getLoop() != L) 8790195340Sed return getCouldNotCompute(); 8791193323Sed 8792218893Sdim // If this is a quadratic (3-term) AddRec {L,+,M,+,N}, find the roots of 8793218893Sdim // the quadratic equation to solve it. 8794218893Sdim if (AddRec->isQuadratic() && AddRec->getType()->isIntegerTy()) { 8795344779Sdim // We can only use this value if the chrec ends up with an exact zero 8796344779Sdim // value at this index. When solving for "X*X != 5", for example, we 8797344779Sdim // should not accept a root of 2. 8798344779Sdim if (auto S = SolveQuadraticAddRecExact(AddRec, *this)) { 8799344779Sdim const auto *R = cast<SCEVConstant>(getConstant(S.getValue())); 8800344779Sdim return ExitLimit(R, R, false, Predicates); 8801193323Sed } 8802218893Sdim return getCouldNotCompute(); 8803193323Sed } 8804193323Sed 8805218893Sdim // Otherwise we can only handle this if it is affine. 8806218893Sdim if (!AddRec->isAffine()) 8807218893Sdim return getCouldNotCompute(); 8808218893Sdim 8809218893Sdim // If this is an affine expression, the execution count of this branch is 8810218893Sdim // the minimum unsigned root of the following equation: 8811218893Sdim // 8812218893Sdim // Start + Step*N = 0 (mod 2^BW) 8813218893Sdim // 8814218893Sdim // equivalent to: 8815218893Sdim // 8816218893Sdim // Step*N = -Start (mod 2^BW) 8817218893Sdim // 8818218893Sdim // where BW is the common bit width of Start and Step. 8819218893Sdim 8820218893Sdim // Get the initial value for the loop. 8821218893Sdim const SCEV *Start = getSCEVAtScope(AddRec->getStart(), L->getParentLoop()); 8822218893Sdim const SCEV *Step = getSCEVAtScope(AddRec->getOperand(1), L->getParentLoop()); 8823218893Sdim 8824218893Sdim // For now we handle only constant steps. 8825221345Sdim // 8826221345Sdim // TODO: Handle a nonconstant Step given AddRec<NUW>. If the 8827221345Sdim // AddRec is NUW, then (in an unsigned sense) it cannot be counting up to wrap 8828221345Sdim // to 0, it must be counting down to equal 0. Consequently, N = Start / -Step. 8829221345Sdim // We have not yet seen any such cases. 8830218893Sdim const SCEVConstant *StepC = dyn_cast<SCEVConstant>(Step); 8831321369Sdim if (!StepC || StepC->getValue()->isZero()) 8832218893Sdim return getCouldNotCompute(); 8833218893Sdim 8834221345Sdim // For positive steps (counting up until unsigned overflow): 8835221345Sdim // N = -Start/Step (as unsigned) 8836221345Sdim // For negative steps (counting down to zero): 8837221345Sdim // N = Start/-Step 8838221345Sdim // First compute the unsigned distance from zero in the direction of Step. 8839296417Sdim bool CountDown = StepC->getAPInt().isNegative(); 8840221345Sdim const SCEV *Distance = CountDown ? Start : getNegativeSCEV(Start); 8841218893Sdim 8842221345Sdim // Handle unitary steps, which cannot wraparound. 8843221345Sdim // 1*N = -Start; -1*N = Start (mod 2^BW), so: 8844221345Sdim // N = Distance (as unsigned) 8845321369Sdim if (StepC->getValue()->isOne() || StepC->getValue()->isMinusOne()) { 8846321369Sdim APInt MaxBECount = getUnsignedRangeMax(Distance); 8847314564Sdim 8848314564Sdim // When a loop like "for (int i = 0; i != n; ++i) { /* body */ }" is rotated, 8849314564Sdim // we end up with a loop whose backedge-taken count is n - 1. Detect this 8850314564Sdim // case, and see if we can improve the bound. 8851314564Sdim // 8852314564Sdim // Explicitly handling this here is necessary because getUnsignedRange 8853314564Sdim // isn't context-sensitive; it doesn't know that we only care about the 8854314564Sdim // range inside the loop. 8855314564Sdim const SCEV *Zero = getZero(Distance->getType()); 8856314564Sdim const SCEV *One = getOne(Distance->getType()); 8857314564Sdim const SCEV *DistancePlusOne = getAddExpr(Distance, One); 8858314564Sdim if (isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, DistancePlusOne, Zero)) { 8859314564Sdim // If Distance + 1 doesn't overflow, we can compute the maximum distance 8860314564Sdim // as "unsigned_max(Distance + 1) - 1". 8861314564Sdim ConstantRange CR = getUnsignedRange(DistancePlusOne); 8862314564Sdim MaxBECount = APIntOps::umin(MaxBECount, CR.getUnsignedMax() - 1); 8863314564Sdim } 8864314564Sdim return ExitLimit(Distance, getConstant(MaxBECount), false, Predicates); 8865226633Sdim } 8866221345Sdim 8867280031Sdim // If the condition controls loop exit (the loop exits only if the expression 8868280031Sdim // is true) and the addition is no-wrap we can use unsigned divide to 8869280031Sdim // compute the backedge count. In this case, the step may not divide the 8870280031Sdim // distance, but we don't care because if the condition is "missed" the loop 8871280031Sdim // will have undefined behavior due to wrapping. 8872309124Sdim if (ControlsExit && AddRec->hasNoSelfWrap() && 8873309124Sdim loopHasNoAbnormalExits(AddRec->getLoop())) { 8874276479Sdim const SCEV *Exact = 8875280031Sdim getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step); 8876321369Sdim const SCEV *Max = 8877321369Sdim Exact == getCouldNotCompute() 8878321369Sdim ? Exact 8879321369Sdim : getConstant(getUnsignedRangeMax(Exact)); 8880321369Sdim return ExitLimit(Exact, Max, false, Predicates); 8881276479Sdim } 8882251662Sdim 8883321369Sdim // Solve the general equation. 8884321369Sdim const SCEV *E = SolveLinEquationWithOverflow(StepC->getAPInt(), 8885321369Sdim getNegativeSCEV(Start), *this); 8886321369Sdim const SCEV *M = E == getCouldNotCompute() 8887321369Sdim ? E 8888321369Sdim : getConstant(getUnsignedRangeMax(E)); 8889321369Sdim return ExitLimit(E, M, false, Predicates); 8890193323Sed} 8891193323Sed 8892226633SdimScalarEvolution::ExitLimit 8893309124SdimScalarEvolution::howFarToNonZero(const SCEV *V, const Loop *L) { 8894193323Sed // Loops that look like: while (X == 0) are very strange indeed. We don't 8895193323Sed // handle them yet except for the trivial case. This could be expanded in the 8896193323Sed // future as needed. 8897193323Sed 8898193323Sed // If the value is a constant, check to see if it is known to be non-zero 8899193323Sed // already. If so, the backedge will execute zero times. 8900193323Sed if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) { 8901321369Sdim if (!C->getValue()->isZero()) 8902296417Sdim return getZero(C->getType()); 8903195340Sed return getCouldNotCompute(); // Otherwise it will loop infinitely. 8904193323Sed } 8905193323Sed 8906193323Sed // We could implement others, but I really doubt anyone writes loops like 8907193323Sed // this, and if they did, they would already be constant folded. 8908195340Sed return getCouldNotCompute(); 8909193323Sed} 8910193323Sed 8911207618Srdivackystd::pair<BasicBlock *, BasicBlock *> 8912193323SedScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) { 8913193323Sed // If the block has a unique predecessor, then there is no path from the 8914193323Sed // predecessor to the block that does not go through the direct edge 8915193323Sed // from the predecessor to the block. 8916193323Sed if (BasicBlock *Pred = BB->getSinglePredecessor()) 8917309124Sdim return {Pred, BB}; 8918193323Sed 8919193323Sed // A loop's header is defined to be a block that dominates the loop. 8920193323Sed // If the header has a unique predecessor outside the loop, it must be 8921193323Sed // a block that has exactly one successor that can reach the loop. 8922296417Sdim if (Loop *L = LI.getLoopFor(BB)) 8923309124Sdim return {L->getLoopPredecessor(), L->getHeader()}; 8924193323Sed 8925309124Sdim return {nullptr, nullptr}; 8926193323Sed} 8927193323Sed 8928309124Sdim/// SCEV structural equivalence is usually sufficient for testing whether two 8929309124Sdim/// expressions are equal, however for the purposes of looking for a condition 8930309124Sdim/// guarding a loop, it can be useful to be a little more general, since a 8931309124Sdim/// front-end may have replicated the controlling expression. 8932198090Srdivackystatic bool HasSameValue(const SCEV *A, const SCEV *B) { 8933194612Sed // Quick check to see if they are the same SCEV. 8934194612Sed if (A == B) return true; 8935194612Sed 8936296417Sdim auto ComputesEqualValues = [](const Instruction *A, const Instruction *B) { 8937296417Sdim // Not all instructions that are "identical" compute the same value. For 8938296417Sdim // instance, two distinct alloca instructions allocating the same type are 8939296417Sdim // identical and do not read memory; but compute distinct values. 8940296417Sdim return A->isIdenticalTo(B) && (isa<BinaryOperator>(A) || isa<GetElementPtrInst>(A)); 8941296417Sdim }; 8942296417Sdim 8943194612Sed // Otherwise, if they're both SCEVUnknown, it's possible that they hold 8944194612Sed // two different instructions with the same value. Check for this case. 8945194612Sed if (const SCEVUnknown *AU = dyn_cast<SCEVUnknown>(A)) 8946194612Sed if (const SCEVUnknown *BU = dyn_cast<SCEVUnknown>(B)) 8947194612Sed if (const Instruction *AI = dyn_cast<Instruction>(AU->getValue())) 8948194612Sed if (const Instruction *BI = dyn_cast<Instruction>(BU->getValue())) 8949296417Sdim if (ComputesEqualValues(AI, BI)) 8950194612Sed return true; 8951194612Sed 8952194612Sed // Otherwise assume they may have a different value. 8953194612Sed return false; 8954194612Sed} 8955194612Sed 8956207618Srdivackybool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred, 8957239462Sdim const SCEV *&LHS, const SCEV *&RHS, 8958239462Sdim unsigned Depth) { 8959207618Srdivacky bool Changed = false; 8960344779Sdim // Simplifies ICMP to trivial true or false by turning it into '0 == 0' or 8961344779Sdim // '0 != 0'. 8962344779Sdim auto TrivialCase = [&](bool TriviallyTrue) { 8963344779Sdim LHS = RHS = getConstant(ConstantInt::getFalse(getContext())); 8964344779Sdim Pred = TriviallyTrue ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE; 8965344779Sdim return true; 8966344779Sdim }; 8967239462Sdim // If we hit the max recursion limit bail out. 8968239462Sdim if (Depth >= 3) 8969239462Sdim return false; 8970239462Sdim 8971207618Srdivacky // Canonicalize a constant to the right side. 8972207618Srdivacky if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(LHS)) { 8973207618Srdivacky // Check for both operands constant. 8974207618Srdivacky if (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(RHS)) { 8975207618Srdivacky if (ConstantExpr::getICmp(Pred, 8976207618Srdivacky LHSC->getValue(), 8977207618Srdivacky RHSC->getValue())->isNullValue()) 8978344779Sdim return TrivialCase(false); 8979207618Srdivacky else 8980344779Sdim return TrivialCase(true); 8981207618Srdivacky } 8982207618Srdivacky // Otherwise swap the operands to put the constant on the right. 8983207618Srdivacky std::swap(LHS, RHS); 8984207618Srdivacky Pred = ICmpInst::getSwappedPredicate(Pred); 8985207618Srdivacky Changed = true; 8986207618Srdivacky } 8987207618Srdivacky 8988207618Srdivacky // If we're comparing an addrec with a value which is loop-invariant in the 8989207618Srdivacky // addrec's loop, put the addrec on the left. Also make a dominance check, 8990207618Srdivacky // as both operands could be addrecs loop-invariant in each other's loop. 8991207618Srdivacky if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(RHS)) { 8992207618Srdivacky const Loop *L = AR->getLoop(); 8993218893Sdim if (isLoopInvariant(LHS, L) && properlyDominates(LHS, L->getHeader())) { 8994207618Srdivacky std::swap(LHS, RHS); 8995207618Srdivacky Pred = ICmpInst::getSwappedPredicate(Pred); 8996207618Srdivacky Changed = true; 8997207618Srdivacky } 8998207618Srdivacky } 8999207618Srdivacky 9000207618Srdivacky // If there's a constant operand, canonicalize comparisons with boundary 9001207618Srdivacky // cases, and canonicalize *-or-equal comparisons to regular comparisons. 9002207618Srdivacky if (const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS)) { 9003296417Sdim const APInt &RA = RC->getAPInt(); 9004207618Srdivacky 9005314564Sdim bool SimplifiedByConstantRange = false; 9006207618Srdivacky 9007314564Sdim if (!ICmpInst::isEquality(Pred)) { 9008314564Sdim ConstantRange ExactCR = ConstantRange::makeExactICmpRegion(Pred, RA); 9009314564Sdim if (ExactCR.isFullSet()) 9010344779Sdim return TrivialCase(true); 9011314564Sdim else if (ExactCR.isEmptySet()) 9012344779Sdim return TrivialCase(false); 9013314564Sdim 9014314564Sdim APInt NewRHS; 9015314564Sdim CmpInst::Predicate NewPred; 9016314564Sdim if (ExactCR.getEquivalentICmp(NewPred, NewRHS) && 9017314564Sdim ICmpInst::isEquality(NewPred)) { 9018314564Sdim // We were able to convert an inequality to an equality. 9019314564Sdim Pred = NewPred; 9020314564Sdim RHS = getConstant(NewRHS); 9021314564Sdim Changed = SimplifiedByConstantRange = true; 9022207618Srdivacky } 9023314564Sdim } 9024207618Srdivacky 9025314564Sdim if (!SimplifiedByConstantRange) { 9026314564Sdim switch (Pred) { 9027314564Sdim default: 9028207618Srdivacky break; 9029314564Sdim case ICmpInst::ICMP_EQ: 9030314564Sdim case ICmpInst::ICMP_NE: 9031314564Sdim // Fold ((-1) * %a) + %b == 0 (equivalent to %b-%a == 0) into %a == %b. 9032314564Sdim if (!RA) 9033314564Sdim if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(LHS)) 9034314564Sdim if (const SCEVMulExpr *ME = 9035314564Sdim dyn_cast<SCEVMulExpr>(AE->getOperand(0))) 9036314564Sdim if (AE->getNumOperands() == 2 && ME->getNumOperands() == 2 && 9037314564Sdim ME->getOperand(0)->isAllOnesValue()) { 9038314564Sdim RHS = AE->getOperand(1); 9039314564Sdim LHS = ME->getOperand(1); 9040314564Sdim Changed = true; 9041314564Sdim } 9042207618Srdivacky break; 9043207618Srdivacky 9044314564Sdim 9045314564Sdim // The "Should have been caught earlier!" messages refer to the fact 9046314564Sdim // that the ExactCR.isFullSet() or ExactCR.isEmptySet() check above 9047314564Sdim // should have fired on the corresponding cases, and canonicalized the 9048344779Sdim // check to trivial case. 9049314564Sdim 9050314564Sdim case ICmpInst::ICMP_UGE: 9051314564Sdim assert(!RA.isMinValue() && "Should have been caught earlier!"); 9052314564Sdim Pred = ICmpInst::ICMP_UGT; 9053314564Sdim RHS = getConstant(RA - 1); 9054207618Srdivacky Changed = true; 9055207618Srdivacky break; 9056314564Sdim case ICmpInst::ICMP_ULE: 9057314564Sdim assert(!RA.isMaxValue() && "Should have been caught earlier!"); 9058314564Sdim Pred = ICmpInst::ICMP_ULT; 9059207618Srdivacky RHS = getConstant(RA + 1); 9060207618Srdivacky Changed = true; 9061207618Srdivacky break; 9062314564Sdim case ICmpInst::ICMP_SGE: 9063314564Sdim assert(!RA.isMinSignedValue() && "Should have been caught earlier!"); 9064314564Sdim Pred = ICmpInst::ICMP_SGT; 9065207618Srdivacky RHS = getConstant(RA - 1); 9066207618Srdivacky Changed = true; 9067207618Srdivacky break; 9068314564Sdim case ICmpInst::ICMP_SLE: 9069314564Sdim assert(!RA.isMaxSignedValue() && "Should have been caught earlier!"); 9070314564Sdim Pred = ICmpInst::ICMP_SLT; 9071207618Srdivacky RHS = getConstant(RA + 1); 9072207618Srdivacky Changed = true; 9073207618Srdivacky break; 9074207618Srdivacky } 9075207618Srdivacky } 9076207618Srdivacky } 9077207618Srdivacky 9078207618Srdivacky // Check for obvious equality. 9079207618Srdivacky if (HasSameValue(LHS, RHS)) { 9080207618Srdivacky if (ICmpInst::isTrueWhenEqual(Pred)) 9081344779Sdim return TrivialCase(true); 9082207618Srdivacky if (ICmpInst::isFalseWhenEqual(Pred)) 9083344779Sdim return TrivialCase(false); 9084207618Srdivacky } 9085207618Srdivacky 9086207618Srdivacky // If possible, canonicalize GE/LE comparisons to GT/LT comparisons, by 9087207618Srdivacky // adding or subtracting 1 from one of the operands. 9088207618Srdivacky switch (Pred) { 9089207618Srdivacky case ICmpInst::ICMP_SLE: 9090321369Sdim if (!getSignedRangeMax(RHS).isMaxSignedValue()) { 9091207618Srdivacky RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS, 9092221345Sdim SCEV::FlagNSW); 9093207618Srdivacky Pred = ICmpInst::ICMP_SLT; 9094207618Srdivacky Changed = true; 9095321369Sdim } else if (!getSignedRangeMin(LHS).isMinSignedValue()) { 9096207618Srdivacky LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS, 9097221345Sdim SCEV::FlagNSW); 9098207618Srdivacky Pred = ICmpInst::ICMP_SLT; 9099207618Srdivacky Changed = true; 9100207618Srdivacky } 9101207618Srdivacky break; 9102207618Srdivacky case ICmpInst::ICMP_SGE: 9103321369Sdim if (!getSignedRangeMin(RHS).isMinSignedValue()) { 9104207618Srdivacky RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS, 9105221345Sdim SCEV::FlagNSW); 9106207618Srdivacky Pred = ICmpInst::ICMP_SGT; 9107207618Srdivacky Changed = true; 9108321369Sdim } else if (!getSignedRangeMax(LHS).isMaxSignedValue()) { 9109207618Srdivacky LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS, 9110221345Sdim SCEV::FlagNSW); 9111207618Srdivacky Pred = ICmpInst::ICMP_SGT; 9112207618Srdivacky Changed = true; 9113207618Srdivacky } 9114207618Srdivacky break; 9115207618Srdivacky case ICmpInst::ICMP_ULE: 9116321369Sdim if (!getUnsignedRangeMax(RHS).isMaxValue()) { 9117207618Srdivacky RHS = getAddExpr(getConstant(RHS->getType(), 1, true), RHS, 9118221345Sdim SCEV::FlagNUW); 9119207618Srdivacky Pred = ICmpInst::ICMP_ULT; 9120207618Srdivacky Changed = true; 9121321369Sdim } else if (!getUnsignedRangeMin(LHS).isMinValue()) { 9122296417Sdim LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS); 9123207618Srdivacky Pred = ICmpInst::ICMP_ULT; 9124207618Srdivacky Changed = true; 9125207618Srdivacky } 9126207618Srdivacky break; 9127207618Srdivacky case ICmpInst::ICMP_UGE: 9128321369Sdim if (!getUnsignedRangeMin(RHS).isMinValue()) { 9129296417Sdim RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS); 9130207618Srdivacky Pred = ICmpInst::ICMP_UGT; 9131207618Srdivacky Changed = true; 9132321369Sdim } else if (!getUnsignedRangeMax(LHS).isMaxValue()) { 9133207618Srdivacky LHS = getAddExpr(getConstant(RHS->getType(), 1, true), LHS, 9134221345Sdim SCEV::FlagNUW); 9135207618Srdivacky Pred = ICmpInst::ICMP_UGT; 9136207618Srdivacky Changed = true; 9137207618Srdivacky } 9138207618Srdivacky break; 9139207618Srdivacky default: 9140207618Srdivacky break; 9141207618Srdivacky } 9142207618Srdivacky 9143207618Srdivacky // TODO: More simplifications are possible here. 9144207618Srdivacky 9145239462Sdim // Recursively simplify until we either hit a recursion limit or nothing 9146239462Sdim // changes. 9147239462Sdim if (Changed) 9148239462Sdim return SimplifyICmpOperands(Pred, LHS, RHS, Depth+1); 9149239462Sdim 9150207618Srdivacky return Changed; 9151207618Srdivacky} 9152207618Srdivacky 9153198090Srdivackybool ScalarEvolution::isKnownNegative(const SCEV *S) { 9154321369Sdim return getSignedRangeMax(S).isNegative(); 9155198090Srdivacky} 9156198090Srdivacky 9157198090Srdivackybool ScalarEvolution::isKnownPositive(const SCEV *S) { 9158321369Sdim return getSignedRangeMin(S).isStrictlyPositive(); 9159198090Srdivacky} 9160198090Srdivacky 9161198090Srdivackybool ScalarEvolution::isKnownNonNegative(const SCEV *S) { 9162321369Sdim return !getSignedRangeMin(S).isNegative(); 9163198090Srdivacky} 9164198090Srdivacky 9165198090Srdivackybool ScalarEvolution::isKnownNonPositive(const SCEV *S) { 9166321369Sdim return !getSignedRangeMax(S).isStrictlyPositive(); 9167198090Srdivacky} 9168198090Srdivacky 9169198090Srdivackybool ScalarEvolution::isKnownNonZero(const SCEV *S) { 9170198090Srdivacky return isKnownNegative(S) || isKnownPositive(S); 9171198090Srdivacky} 9172198090Srdivacky 9173341825Sdimstd::pair<const SCEV *, const SCEV *> 9174341825SdimScalarEvolution::SplitIntoInitAndPostInc(const Loop *L, const SCEV *S) { 9175341825Sdim // Compute SCEV on entry of loop L. 9176341825Sdim const SCEV *Start = SCEVInitRewriter::rewrite(S, L, *this); 9177341825Sdim if (Start == getCouldNotCompute()) 9178341825Sdim return { Start, Start }; 9179341825Sdim // Compute post increment SCEV for loop L. 9180341825Sdim const SCEV *PostInc = SCEVPostIncRewriter::rewrite(S, L, *this); 9181341825Sdim assert(PostInc != getCouldNotCompute() && "Unexpected could not compute"); 9182341825Sdim return { Start, PostInc }; 9183341825Sdim} 9184341825Sdim 9185341825Sdimbool ScalarEvolution::isKnownViaInduction(ICmpInst::Predicate Pred, 9186341825Sdim const SCEV *LHS, const SCEV *RHS) { 9187341825Sdim // First collect all loops. 9188341825Sdim SmallPtrSet<const Loop *, 8> LoopsUsed; 9189341825Sdim getUsedLoops(LHS, LoopsUsed); 9190341825Sdim getUsedLoops(RHS, LoopsUsed); 9191341825Sdim 9192341825Sdim if (LoopsUsed.empty()) 9193341825Sdim return false; 9194341825Sdim 9195341825Sdim // Domination relationship must be a linear order on collected loops. 9196341825Sdim#ifndef NDEBUG 9197341825Sdim for (auto *L1 : LoopsUsed) 9198341825Sdim for (auto *L2 : LoopsUsed) 9199341825Sdim assert((DT.dominates(L1->getHeader(), L2->getHeader()) || 9200341825Sdim DT.dominates(L2->getHeader(), L1->getHeader())) && 9201341825Sdim "Domination relationship is not a linear order"); 9202341825Sdim#endif 9203341825Sdim 9204341825Sdim const Loop *MDL = 9205341825Sdim *std::max_element(LoopsUsed.begin(), LoopsUsed.end(), 9206341825Sdim [&](const Loop *L1, const Loop *L2) { 9207341825Sdim return DT.properlyDominates(L1->getHeader(), L2->getHeader()); 9208341825Sdim }); 9209341825Sdim 9210341825Sdim // Get init and post increment value for LHS. 9211341825Sdim auto SplitLHS = SplitIntoInitAndPostInc(MDL, LHS); 9212341825Sdim // if LHS contains unknown non-invariant SCEV then bail out. 9213341825Sdim if (SplitLHS.first == getCouldNotCompute()) 9214341825Sdim return false; 9215341825Sdim assert (SplitLHS.second != getCouldNotCompute() && "Unexpected CNC"); 9216341825Sdim // Get init and post increment value for RHS. 9217341825Sdim auto SplitRHS = SplitIntoInitAndPostInc(MDL, RHS); 9218341825Sdim // if RHS contains unknown non-invariant SCEV then bail out. 9219341825Sdim if (SplitRHS.first == getCouldNotCompute()) 9220341825Sdim return false; 9221341825Sdim assert (SplitRHS.second != getCouldNotCompute() && "Unexpected CNC"); 9222341825Sdim // It is possible that init SCEV contains an invariant load but it does 9223341825Sdim // not dominate MDL and is not available at MDL loop entry, so we should 9224341825Sdim // check it here. 9225341825Sdim if (!isAvailableAtLoopEntry(SplitLHS.first, MDL) || 9226341825Sdim !isAvailableAtLoopEntry(SplitRHS.first, MDL)) 9227341825Sdim return false; 9228341825Sdim 9229341825Sdim return isLoopEntryGuardedByCond(MDL, Pred, SplitLHS.first, SplitRHS.first) && 9230341825Sdim isLoopBackedgeGuardedByCond(MDL, Pred, SplitLHS.second, 9231341825Sdim SplitRHS.second); 9232341825Sdim} 9233341825Sdim 9234198090Srdivackybool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred, 9235198090Srdivacky const SCEV *LHS, const SCEV *RHS) { 9236207618Srdivacky // Canonicalize the inputs first. 9237207618Srdivacky (void)SimplifyICmpOperands(Pred, LHS, RHS); 9238198090Srdivacky 9239341825Sdim if (isKnownViaInduction(Pred, LHS, RHS)) 9240276479Sdim return true; 9241207618Srdivacky 9242296417Sdim if (isKnownPredicateViaSplitting(Pred, LHS, RHS)) 9243296417Sdim return true; 9244296417Sdim 9245341825Sdim // Otherwise see what can be done with some simple reasoning. 9246341825Sdim return isKnownViaNonRecursiveReasoning(Pred, LHS, RHS); 9247207618Srdivacky} 9248207618Srdivacky 9249341825Sdimbool ScalarEvolution::isKnownOnEveryIteration(ICmpInst::Predicate Pred, 9250341825Sdim const SCEVAddRecExpr *LHS, 9251341825Sdim const SCEV *RHS) { 9252341825Sdim const Loop *L = LHS->getLoop(); 9253341825Sdim return isLoopEntryGuardedByCond(L, Pred, LHS->getStart(), RHS) && 9254341825Sdim isLoopBackedgeGuardedByCond(L, Pred, LHS->getPostIncExpr(*this), RHS); 9255341825Sdim} 9256341825Sdim 9257296417Sdimbool ScalarEvolution::isMonotonicPredicate(const SCEVAddRecExpr *LHS, 9258296417Sdim ICmpInst::Predicate Pred, 9259296417Sdim bool &Increasing) { 9260296417Sdim bool Result = isMonotonicPredicateImpl(LHS, Pred, Increasing); 9261296417Sdim 9262296417Sdim#ifndef NDEBUG 9263296417Sdim // Verify an invariant: inverting the predicate should turn a monotonically 9264296417Sdim // increasing change to a monotonically decreasing one, and vice versa. 9265296417Sdim bool IncreasingSwapped; 9266296417Sdim bool ResultSwapped = isMonotonicPredicateImpl( 9267296417Sdim LHS, ICmpInst::getSwappedPredicate(Pred), IncreasingSwapped); 9268296417Sdim 9269296417Sdim assert(Result == ResultSwapped && "should be able to analyze both!"); 9270296417Sdim if (ResultSwapped) 9271296417Sdim assert(Increasing == !IncreasingSwapped && 9272296417Sdim "monotonicity should flip as we flip the predicate"); 9273296417Sdim#endif 9274296417Sdim 9275296417Sdim return Result; 9276296417Sdim} 9277296417Sdim 9278296417Sdimbool ScalarEvolution::isMonotonicPredicateImpl(const SCEVAddRecExpr *LHS, 9279296417Sdim ICmpInst::Predicate Pred, 9280296417Sdim bool &Increasing) { 9281296417Sdim 9282296417Sdim // A zero step value for LHS means the induction variable is essentially a 9283296417Sdim // loop invariant value. We don't really depend on the predicate actually 9284296417Sdim // flipping from false to true (for increasing predicates, and the other way 9285296417Sdim // around for decreasing predicates), all we care about is that *if* the 9286296417Sdim // predicate changes then it only changes from false to true. 9287296417Sdim // 9288296417Sdim // A zero step value in itself is not very useful, but there may be places 9289296417Sdim // where SCEV can prove X >= 0 but not prove X > 0, so it is helpful to be 9290296417Sdim // as general as possible. 9291296417Sdim 9292296417Sdim switch (Pred) { 9293296417Sdim default: 9294296417Sdim return false; // Conservative answer 9295296417Sdim 9296296417Sdim case ICmpInst::ICMP_UGT: 9297296417Sdim case ICmpInst::ICMP_UGE: 9298296417Sdim case ICmpInst::ICMP_ULT: 9299296417Sdim case ICmpInst::ICMP_ULE: 9300309124Sdim if (!LHS->hasNoUnsignedWrap()) 9301296417Sdim return false; 9302296417Sdim 9303296417Sdim Increasing = Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE; 9304296417Sdim return true; 9305296417Sdim 9306296417Sdim case ICmpInst::ICMP_SGT: 9307296417Sdim case ICmpInst::ICMP_SGE: 9308296417Sdim case ICmpInst::ICMP_SLT: 9309296417Sdim case ICmpInst::ICMP_SLE: { 9310309124Sdim if (!LHS->hasNoSignedWrap()) 9311296417Sdim return false; 9312296417Sdim 9313296417Sdim const SCEV *Step = LHS->getStepRecurrence(*this); 9314296417Sdim 9315296417Sdim if (isKnownNonNegative(Step)) { 9316296417Sdim Increasing = Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE; 9317296417Sdim return true; 9318296417Sdim } 9319296417Sdim 9320296417Sdim if (isKnownNonPositive(Step)) { 9321296417Sdim Increasing = Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE; 9322296417Sdim return true; 9323296417Sdim } 9324296417Sdim 9325296417Sdim return false; 9326296417Sdim } 9327296417Sdim 9328296417Sdim } 9329296417Sdim 9330296417Sdim llvm_unreachable("switch has default clause!"); 9331296417Sdim} 9332296417Sdim 9333296417Sdimbool ScalarEvolution::isLoopInvariantPredicate( 9334296417Sdim ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L, 9335296417Sdim ICmpInst::Predicate &InvariantPred, const SCEV *&InvariantLHS, 9336296417Sdim const SCEV *&InvariantRHS) { 9337296417Sdim 9338296417Sdim // If there is a loop-invariant, force it into the RHS, otherwise bail out. 9339296417Sdim if (!isLoopInvariant(RHS, L)) { 9340296417Sdim if (!isLoopInvariant(LHS, L)) 9341296417Sdim return false; 9342296417Sdim 9343296417Sdim std::swap(LHS, RHS); 9344296417Sdim Pred = ICmpInst::getSwappedPredicate(Pred); 9345296417Sdim } 9346296417Sdim 9347296417Sdim const SCEVAddRecExpr *ArLHS = dyn_cast<SCEVAddRecExpr>(LHS); 9348296417Sdim if (!ArLHS || ArLHS->getLoop() != L) 9349296417Sdim return false; 9350296417Sdim 9351296417Sdim bool Increasing; 9352296417Sdim if (!isMonotonicPredicate(ArLHS, Pred, Increasing)) 9353296417Sdim return false; 9354296417Sdim 9355296417Sdim // If the predicate "ArLHS `Pred` RHS" monotonically increases from false to 9356296417Sdim // true as the loop iterates, and the backedge is control dependent on 9357296417Sdim // "ArLHS `Pred` RHS" == true then we can reason as follows: 9358296417Sdim // 9359296417Sdim // * if the predicate was false in the first iteration then the predicate 9360296417Sdim // is never evaluated again, since the loop exits without taking the 9361296417Sdim // backedge. 9362296417Sdim // * if the predicate was true in the first iteration then it will 9363296417Sdim // continue to be true for all future iterations since it is 9364296417Sdim // monotonically increasing. 9365296417Sdim // 9366296417Sdim // For both the above possibilities, we can replace the loop varying 9367296417Sdim // predicate with its value on the first iteration of the loop (which is 9368296417Sdim // loop invariant). 9369296417Sdim // 9370296417Sdim // A similar reasoning applies for a monotonically decreasing predicate, by 9371296417Sdim // replacing true with false and false with true in the above two bullets. 9372296417Sdim 9373296417Sdim auto P = Increasing ? Pred : ICmpInst::getInversePredicate(Pred); 9374296417Sdim 9375296417Sdim if (!isLoopBackedgeGuardedByCond(L, P, LHS, RHS)) 9376296417Sdim return false; 9377296417Sdim 9378296417Sdim InvariantPred = Pred; 9379296417Sdim InvariantLHS = ArLHS->getStart(); 9380296417Sdim InvariantRHS = RHS; 9381296417Sdim return true; 9382296417Sdim} 9383296417Sdim 9384309124Sdimbool ScalarEvolution::isKnownPredicateViaConstantRanges( 9385309124Sdim ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) { 9386198090Srdivacky if (HasSameValue(LHS, RHS)) 9387198090Srdivacky return ICmpInst::isTrueWhenEqual(Pred); 9388198090Srdivacky 9389207618Srdivacky // This code is split out from isKnownPredicate because it is called from 9390207618Srdivacky // within isLoopEntryGuardedByCond. 9391198090Srdivacky 9392309124Sdim auto CheckRanges = 9393309124Sdim [&](const ConstantRange &RangeLHS, const ConstantRange &RangeRHS) { 9394309124Sdim return ConstantRange::makeSatisfyingICmpRegion(Pred, RangeRHS) 9395309124Sdim .contains(RangeLHS); 9396309124Sdim }; 9397309124Sdim 9398309124Sdim // The check at the top of the function catches the case where the values are 9399309124Sdim // known to be equal. 9400309124Sdim if (Pred == CmpInst::ICMP_EQ) 9401309124Sdim return false; 9402309124Sdim 9403309124Sdim if (Pred == CmpInst::ICMP_NE) 9404309124Sdim return CheckRanges(getSignedRange(LHS), getSignedRange(RHS)) || 9405309124Sdim CheckRanges(getUnsignedRange(LHS), getUnsignedRange(RHS)) || 9406309124Sdim isKnownNonZero(getMinusSCEV(LHS, RHS)); 9407309124Sdim 9408309124Sdim if (CmpInst::isSigned(Pred)) 9409309124Sdim return CheckRanges(getSignedRange(LHS), getSignedRange(RHS)); 9410309124Sdim 9411309124Sdim return CheckRanges(getUnsignedRange(LHS), getUnsignedRange(RHS)); 9412198090Srdivacky} 9413198090Srdivacky 9414296417Sdimbool ScalarEvolution::isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred, 9415296417Sdim const SCEV *LHS, 9416296417Sdim const SCEV *RHS) { 9417296417Sdim // Match Result to (X + Y)<ExpectedFlags> where Y is a constant integer. 9418296417Sdim // Return Y via OutY. 9419296417Sdim auto MatchBinaryAddToConst = 9420296417Sdim [this](const SCEV *Result, const SCEV *X, APInt &OutY, 9421296417Sdim SCEV::NoWrapFlags ExpectedFlags) { 9422296417Sdim const SCEV *NonConstOp, *ConstOp; 9423296417Sdim SCEV::NoWrapFlags FlagsPresent; 9424296417Sdim 9425296417Sdim if (!splitBinaryAdd(Result, ConstOp, NonConstOp, FlagsPresent) || 9426296417Sdim !isa<SCEVConstant>(ConstOp) || NonConstOp != X) 9427296417Sdim return false; 9428296417Sdim 9429296417Sdim OutY = cast<SCEVConstant>(ConstOp)->getAPInt(); 9430296417Sdim return (FlagsPresent & ExpectedFlags) == ExpectedFlags; 9431296417Sdim }; 9432296417Sdim 9433296417Sdim APInt C; 9434296417Sdim 9435296417Sdim switch (Pred) { 9436296417Sdim default: 9437296417Sdim break; 9438296417Sdim 9439296417Sdim case ICmpInst::ICMP_SGE: 9440296417Sdim std::swap(LHS, RHS); 9441321369Sdim LLVM_FALLTHROUGH; 9442296417Sdim case ICmpInst::ICMP_SLE: 9443296417Sdim // X s<= (X + C)<nsw> if C >= 0 9444296417Sdim if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNSW) && C.isNonNegative()) 9445296417Sdim return true; 9446296417Sdim 9447296417Sdim // (X + C)<nsw> s<= X if C <= 0 9448296417Sdim if (MatchBinaryAddToConst(LHS, RHS, C, SCEV::FlagNSW) && 9449296417Sdim !C.isStrictlyPositive()) 9450296417Sdim return true; 9451296417Sdim break; 9452296417Sdim 9453296417Sdim case ICmpInst::ICMP_SGT: 9454296417Sdim std::swap(LHS, RHS); 9455321369Sdim LLVM_FALLTHROUGH; 9456296417Sdim case ICmpInst::ICMP_SLT: 9457296417Sdim // X s< (X + C)<nsw> if C > 0 9458296417Sdim if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNSW) && 9459296417Sdim C.isStrictlyPositive()) 9460296417Sdim return true; 9461296417Sdim 9462296417Sdim // (X + C)<nsw> s< X if C < 0 9463296417Sdim if (MatchBinaryAddToConst(LHS, RHS, C, SCEV::FlagNSW) && C.isNegative()) 9464296417Sdim return true; 9465296417Sdim break; 9466296417Sdim } 9467296417Sdim 9468296417Sdim return false; 9469296417Sdim} 9470296417Sdim 9471296417Sdimbool ScalarEvolution::isKnownPredicateViaSplitting(ICmpInst::Predicate Pred, 9472296417Sdim const SCEV *LHS, 9473296417Sdim const SCEV *RHS) { 9474296417Sdim if (Pred != ICmpInst::ICMP_ULT || ProvingSplitPredicate) 9475296417Sdim return false; 9476296417Sdim 9477296417Sdim // Allowing arbitrary number of activations of isKnownPredicateViaSplitting on 9478296417Sdim // the stack can result in exponential time complexity. 9479296417Sdim SaveAndRestore<bool> Restore(ProvingSplitPredicate, true); 9480296417Sdim 9481296417Sdim // If L >= 0 then I `ult` L <=> I >= 0 && I `slt` L 9482296417Sdim // 9483296417Sdim // To prove L >= 0 we use isKnownNonNegative whereas to prove I >= 0 we use 9484296417Sdim // isKnownPredicate. isKnownPredicate is more powerful, but also more 9485296417Sdim // expensive; and using isKnownNonNegative(RHS) is sufficient for most of the 9486296417Sdim // interesting cases seen in practice. We can consider "upgrading" L >= 0 to 9487296417Sdim // use isKnownPredicate later if needed. 9488296417Sdim return isKnownNonNegative(RHS) && 9489296417Sdim isKnownPredicate(CmpInst::ICMP_SGE, LHS, getZero(LHS->getType())) && 9490296417Sdim isKnownPredicate(CmpInst::ICMP_SLT, LHS, RHS); 9491296417Sdim} 9492296417Sdim 9493309124Sdimbool ScalarEvolution::isImpliedViaGuard(BasicBlock *BB, 9494309124Sdim ICmpInst::Predicate Pred, 9495309124Sdim const SCEV *LHS, const SCEV *RHS) { 9496309124Sdim // No need to even try if we know the module has no guards. 9497309124Sdim if (!HasGuards) 9498309124Sdim return false; 9499309124Sdim 9500309124Sdim return any_of(*BB, [&](Instruction &I) { 9501309124Sdim using namespace llvm::PatternMatch; 9502309124Sdim 9503309124Sdim Value *Condition; 9504309124Sdim return match(&I, m_Intrinsic<Intrinsic::experimental_guard>( 9505309124Sdim m_Value(Condition))) && 9506309124Sdim isImpliedCond(Pred, LHS, RHS, Condition, false); 9507309124Sdim }); 9508309124Sdim} 9509309124Sdim 9510198090Srdivacky/// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is 9511198090Srdivacky/// protected by a conditional between LHS and RHS. This is used to 9512198090Srdivacky/// to eliminate casts. 9513198090Srdivackybool 9514198090SrdivackyScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L, 9515198090Srdivacky ICmpInst::Predicate Pred, 9516198090Srdivacky const SCEV *LHS, const SCEV *RHS) { 9517193323Sed // Interpret a null as meaning no loop, where there is obviously no guard 9518193323Sed // (interprocedural conditions notwithstanding). 9519198090Srdivacky if (!L) return true; 9520198090Srdivacky 9521344779Sdim if (VerifyIR) 9522344779Sdim assert(!verifyFunction(*L->getHeader()->getParent(), &dbgs()) && 9523344779Sdim "This cannot be done on broken IR!"); 9524344779Sdim 9525344779Sdim 9526341825Sdim if (isKnownViaNonRecursiveReasoning(Pred, LHS, RHS)) 9527309124Sdim return true; 9528280031Sdim 9529198090Srdivacky BasicBlock *Latch = L->getLoopLatch(); 9530198090Srdivacky if (!Latch) 9531198090Srdivacky return false; 9532198090Srdivacky 9533198090Srdivacky BranchInst *LoopContinuePredicate = 9534198090Srdivacky dyn_cast<BranchInst>(Latch->getTerminator()); 9535280031Sdim if (LoopContinuePredicate && LoopContinuePredicate->isConditional() && 9536280031Sdim isImpliedCond(Pred, LHS, RHS, 9537280031Sdim LoopContinuePredicate->getCondition(), 9538280031Sdim LoopContinuePredicate->getSuccessor(0) != L->getHeader())) 9539280031Sdim return true; 9540198090Srdivacky 9541296417Sdim // We don't want more than one activation of the following loops on the stack 9542296417Sdim // -- that can lead to O(n!) time complexity. 9543296417Sdim if (WalkingBEDominatingConds) 9544296417Sdim return false; 9545296417Sdim 9546296417Sdim SaveAndRestore<bool> ClearOnExit(WalkingBEDominatingConds, true); 9547296417Sdim 9548296417Sdim // See if we can exploit a trip count to prove the predicate. 9549296417Sdim const auto &BETakenInfo = getBackedgeTakenInfo(L); 9550296417Sdim const SCEV *LatchBECount = BETakenInfo.getExact(Latch, this); 9551296417Sdim if (LatchBECount != getCouldNotCompute()) { 9552296417Sdim // We know that Latch branches back to the loop header exactly 9553296417Sdim // LatchBECount times. This means the backdege condition at Latch is 9554296417Sdim // equivalent to "{0,+,1} u< LatchBECount". 9555296417Sdim Type *Ty = LatchBECount->getType(); 9556296417Sdim auto NoWrapFlags = SCEV::NoWrapFlags(SCEV::FlagNUW | SCEV::FlagNW); 9557296417Sdim const SCEV *LoopCounter = 9558296417Sdim getAddRecExpr(getZero(Ty), getOne(Ty), L, NoWrapFlags); 9559296417Sdim if (isImpliedCond(Pred, LHS, RHS, ICmpInst::ICMP_ULT, LoopCounter, 9560296417Sdim LatchBECount)) 9561296417Sdim return true; 9562296417Sdim } 9563296417Sdim 9564280031Sdim // Check conditions due to any @llvm.assume intrinsics. 9565296417Sdim for (auto &AssumeVH : AC.assumptions()) { 9566280031Sdim if (!AssumeVH) 9567280031Sdim continue; 9568280031Sdim auto *CI = cast<CallInst>(AssumeVH); 9569296417Sdim if (!DT.dominates(CI, Latch->getTerminator())) 9570280031Sdim continue; 9571280031Sdim 9572280031Sdim if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false)) 9573280031Sdim return true; 9574280031Sdim } 9575280031Sdim 9576288943Sdim // If the loop is not reachable from the entry block, we risk running into an 9577288943Sdim // infinite loop as we walk up into the dom tree. These loops do not matter 9578288943Sdim // anyway, so we just return a conservative answer when we see them. 9579296417Sdim if (!DT.isReachableFromEntry(L->getHeader())) 9580288943Sdim return false; 9581288943Sdim 9582309124Sdim if (isImpliedViaGuard(Latch, Pred, LHS, RHS)) 9583309124Sdim return true; 9584309124Sdim 9585296417Sdim for (DomTreeNode *DTN = DT[Latch], *HeaderDTN = DT[L->getHeader()]; 9586296417Sdim DTN != HeaderDTN; DTN = DTN->getIDom()) { 9587288943Sdim assert(DTN && "should reach the loop header before reaching the root!"); 9588288943Sdim 9589288943Sdim BasicBlock *BB = DTN->getBlock(); 9590309124Sdim if (isImpliedViaGuard(BB, Pred, LHS, RHS)) 9591309124Sdim return true; 9592309124Sdim 9593288943Sdim BasicBlock *PBB = BB->getSinglePredecessor(); 9594288943Sdim if (!PBB) 9595288943Sdim continue; 9596288943Sdim 9597288943Sdim BranchInst *ContinuePredicate = dyn_cast<BranchInst>(PBB->getTerminator()); 9598288943Sdim if (!ContinuePredicate || !ContinuePredicate->isConditional()) 9599288943Sdim continue; 9600288943Sdim 9601288943Sdim Value *Condition = ContinuePredicate->getCondition(); 9602288943Sdim 9603288943Sdim // If we have an edge `E` within the loop body that dominates the only 9604288943Sdim // latch, the condition guarding `E` also guards the backedge. This 9605288943Sdim // reasoning works only for loops with a single latch. 9606288943Sdim 9607288943Sdim BasicBlockEdge DominatingEdge(PBB, BB); 9608288943Sdim if (DominatingEdge.isSingleEdge()) { 9609288943Sdim // We're constructively (and conservatively) enumerating edges within the 9610288943Sdim // loop body that dominate the latch. The dominator tree better agree 9611288943Sdim // with us on this: 9612296417Sdim assert(DT.dominates(DominatingEdge, Latch) && "should be!"); 9613288943Sdim 9614288943Sdim if (isImpliedCond(Pred, LHS, RHS, Condition, 9615288943Sdim BB != ContinuePredicate->getSuccessor(0))) 9616288943Sdim return true; 9617288943Sdim } 9618288943Sdim } 9619288943Sdim 9620280031Sdim return false; 9621198090Srdivacky} 9622198090Srdivacky 9623198090Srdivackybool 9624207618SrdivackyScalarEvolution::isLoopEntryGuardedByCond(const Loop *L, 9625207618Srdivacky ICmpInst::Predicate Pred, 9626207618Srdivacky const SCEV *LHS, const SCEV *RHS) { 9627198090Srdivacky // Interpret a null as meaning no loop, where there is obviously no guard 9628198090Srdivacky // (interprocedural conditions notwithstanding). 9629193323Sed if (!L) return false; 9630193323Sed 9631344779Sdim if (VerifyIR) 9632344779Sdim assert(!verifyFunction(*L->getHeader()->getParent(), &dbgs()) && 9633344779Sdim "This cannot be done on broken IR!"); 9634344779Sdim 9635341825Sdim // Both LHS and RHS must be available at loop entry. 9636341825Sdim assert(isAvailableAtLoopEntry(LHS, L) && 9637341825Sdim "LHS is not available at Loop Entry"); 9638341825Sdim assert(isAvailableAtLoopEntry(RHS, L) && 9639341825Sdim "RHS is not available at Loop Entry"); 9640341825Sdim 9641341825Sdim if (isKnownViaNonRecursiveReasoning(Pred, LHS, RHS)) 9642309124Sdim return true; 9643280031Sdim 9644341825Sdim // If we cannot prove strict comparison (e.g. a > b), maybe we can prove 9645341825Sdim // the facts (a >= b && a != b) separately. A typical situation is when the 9646341825Sdim // non-strict comparison is known from ranges and non-equality is known from 9647341825Sdim // dominating predicates. If we are proving strict comparison, we always try 9648341825Sdim // to prove non-equality and non-strict comparison separately. 9649341825Sdim auto NonStrictPredicate = ICmpInst::getNonStrictPredicate(Pred); 9650341825Sdim const bool ProvingStrictComparison = (Pred != NonStrictPredicate); 9651341825Sdim bool ProvedNonStrictComparison = false; 9652341825Sdim bool ProvedNonEquality = false; 9653341825Sdim 9654341825Sdim if (ProvingStrictComparison) { 9655341825Sdim ProvedNonStrictComparison = 9656341825Sdim isKnownViaNonRecursiveReasoning(NonStrictPredicate, LHS, RHS); 9657341825Sdim ProvedNonEquality = 9658341825Sdim isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_NE, LHS, RHS); 9659341825Sdim if (ProvedNonStrictComparison && ProvedNonEquality) 9660341825Sdim return true; 9661341825Sdim } 9662341825Sdim 9663341825Sdim // Try to prove (Pred, LHS, RHS) using isImpliedViaGuard. 9664341825Sdim auto ProveViaGuard = [&](BasicBlock *Block) { 9665341825Sdim if (isImpliedViaGuard(Block, Pred, LHS, RHS)) 9666341825Sdim return true; 9667341825Sdim if (ProvingStrictComparison) { 9668341825Sdim if (!ProvedNonStrictComparison) 9669341825Sdim ProvedNonStrictComparison = 9670341825Sdim isImpliedViaGuard(Block, NonStrictPredicate, LHS, RHS); 9671341825Sdim if (!ProvedNonEquality) 9672341825Sdim ProvedNonEquality = 9673341825Sdim isImpliedViaGuard(Block, ICmpInst::ICMP_NE, LHS, RHS); 9674341825Sdim if (ProvedNonStrictComparison && ProvedNonEquality) 9675341825Sdim return true; 9676341825Sdim } 9677341825Sdim return false; 9678341825Sdim }; 9679341825Sdim 9680341825Sdim // Try to prove (Pred, LHS, RHS) using isImpliedCond. 9681341825Sdim auto ProveViaCond = [&](Value *Condition, bool Inverse) { 9682341825Sdim if (isImpliedCond(Pred, LHS, RHS, Condition, Inverse)) 9683341825Sdim return true; 9684341825Sdim if (ProvingStrictComparison) { 9685341825Sdim if (!ProvedNonStrictComparison) 9686341825Sdim ProvedNonStrictComparison = 9687341825Sdim isImpliedCond(NonStrictPredicate, LHS, RHS, Condition, Inverse); 9688341825Sdim if (!ProvedNonEquality) 9689341825Sdim ProvedNonEquality = 9690341825Sdim isImpliedCond(ICmpInst::ICMP_NE, LHS, RHS, Condition, Inverse); 9691341825Sdim if (ProvedNonStrictComparison && ProvedNonEquality) 9692341825Sdim return true; 9693341825Sdim } 9694341825Sdim return false; 9695341825Sdim }; 9696341825Sdim 9697193323Sed // Starting at the loop predecessor, climb up the predecessor chain, as long 9698193323Sed // as there are predecessors that can be found that have unique successors 9699193323Sed // leading to the original header. 9700207618Srdivacky for (std::pair<BasicBlock *, BasicBlock *> 9701210299Sed Pair(L->getLoopPredecessor(), L->getHeader()); 9702207618Srdivacky Pair.first; 9703207618Srdivacky Pair = getPredecessorWithUniqueSuccessorForBB(Pair.first)) { 9704193323Sed 9705341825Sdim if (ProveViaGuard(Pair.first)) 9706309124Sdim return true; 9707309124Sdim 9708193323Sed BranchInst *LoopEntryPredicate = 9709207618Srdivacky dyn_cast<BranchInst>(Pair.first->getTerminator()); 9710193323Sed if (!LoopEntryPredicate || 9711193323Sed LoopEntryPredicate->isUnconditional()) 9712193323Sed continue; 9713193323Sed 9714341825Sdim if (ProveViaCond(LoopEntryPredicate->getCondition(), 9715341825Sdim LoopEntryPredicate->getSuccessor(0) != Pair.second)) 9716195098Sed return true; 9717195098Sed } 9718193323Sed 9719280031Sdim // Check conditions due to any @llvm.assume intrinsics. 9720296417Sdim for (auto &AssumeVH : AC.assumptions()) { 9721280031Sdim if (!AssumeVH) 9722280031Sdim continue; 9723280031Sdim auto *CI = cast<CallInst>(AssumeVH); 9724296417Sdim if (!DT.dominates(CI, L->getHeader())) 9725280031Sdim continue; 9726280031Sdim 9727341825Sdim if (ProveViaCond(CI->getArgOperand(0), false)) 9728280031Sdim return true; 9729280031Sdim } 9730280031Sdim 9731195098Sed return false; 9732195098Sed} 9733193323Sed 9734212904Sdimbool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, 9735198090Srdivacky const SCEV *LHS, const SCEV *RHS, 9736212904Sdim Value *FoundCondValue, 9737198090Srdivacky bool Inverse) { 9738314564Sdim if (!PendingLoopPredicates.insert(FoundCondValue).second) 9739239462Sdim return false; 9740239462Sdim 9741314564Sdim auto ClearOnExit = 9742314564Sdim make_scope_exit([&]() { PendingLoopPredicates.erase(FoundCondValue); }); 9743314564Sdim 9744204642Srdivacky // Recursively handle And and Or conditions. 9745212904Sdim if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FoundCondValue)) { 9746195098Sed if (BO->getOpcode() == Instruction::And) { 9747195098Sed if (!Inverse) 9748212904Sdim return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) || 9749212904Sdim isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse); 9750195098Sed } else if (BO->getOpcode() == Instruction::Or) { 9751195098Sed if (Inverse) 9752212904Sdim return isImpliedCond(Pred, LHS, RHS, BO->getOperand(0), Inverse) || 9753212904Sdim isImpliedCond(Pred, LHS, RHS, BO->getOperand(1), Inverse); 9754195098Sed } 9755195098Sed } 9756195098Sed 9757212904Sdim ICmpInst *ICI = dyn_cast<ICmpInst>(FoundCondValue); 9758195098Sed if (!ICI) return false; 9759195098Sed 9760249423Sdim // Now that we found a conditional branch that dominates the loop or controls 9761249423Sdim // the loop latch. Check to see if it is the comparison we are looking for. 9762198090Srdivacky ICmpInst::Predicate FoundPred; 9763195098Sed if (Inverse) 9764198090Srdivacky FoundPred = ICI->getInversePredicate(); 9765195098Sed else 9766198090Srdivacky FoundPred = ICI->getPredicate(); 9767195098Sed 9768198090Srdivacky const SCEV *FoundLHS = getSCEV(ICI->getOperand(0)); 9769198090Srdivacky const SCEV *FoundRHS = getSCEV(ICI->getOperand(1)); 9770198090Srdivacky 9771296417Sdim return isImpliedCond(Pred, LHS, RHS, FoundPred, FoundLHS, FoundRHS); 9772296417Sdim} 9773296417Sdim 9774296417Sdimbool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, 9775296417Sdim const SCEV *RHS, 9776296417Sdim ICmpInst::Predicate FoundPred, 9777296417Sdim const SCEV *FoundLHS, 9778296417Sdim const SCEV *FoundRHS) { 9779288943Sdim // Balance the types. 9780288943Sdim if (getTypeSizeInBits(LHS->getType()) < 9781198090Srdivacky getTypeSizeInBits(FoundLHS->getType())) { 9782288943Sdim if (CmpInst::isSigned(Pred)) { 9783288943Sdim LHS = getSignExtendExpr(LHS, FoundLHS->getType()); 9784288943Sdim RHS = getSignExtendExpr(RHS, FoundLHS->getType()); 9785288943Sdim } else { 9786288943Sdim LHS = getZeroExtendExpr(LHS, FoundLHS->getType()); 9787288943Sdim RHS = getZeroExtendExpr(RHS, FoundLHS->getType()); 9788288943Sdim } 9789288943Sdim } else if (getTypeSizeInBits(LHS->getType()) > 9790288943Sdim getTypeSizeInBits(FoundLHS->getType())) { 9791265925Sdim if (CmpInst::isSigned(FoundPred)) { 9792198090Srdivacky FoundLHS = getSignExtendExpr(FoundLHS, LHS->getType()); 9793198090Srdivacky FoundRHS = getSignExtendExpr(FoundRHS, LHS->getType()); 9794198090Srdivacky } else { 9795198090Srdivacky FoundLHS = getZeroExtendExpr(FoundLHS, LHS->getType()); 9796198090Srdivacky FoundRHS = getZeroExtendExpr(FoundRHS, LHS->getType()); 9797198090Srdivacky } 9798198090Srdivacky } 9799198090Srdivacky 9800198090Srdivacky // Canonicalize the query to match the way instcombine will have 9801198090Srdivacky // canonicalized the comparison. 9802207618Srdivacky if (SimplifyICmpOperands(Pred, LHS, RHS)) 9803207618Srdivacky if (LHS == RHS) 9804207618Srdivacky return CmpInst::isTrueWhenEqual(Pred); 9805207618Srdivacky if (SimplifyICmpOperands(FoundPred, FoundLHS, FoundRHS)) 9806207618Srdivacky if (FoundLHS == FoundRHS) 9807243830Sdim return CmpInst::isFalseWhenEqual(FoundPred); 9808193323Sed 9809198090Srdivacky // Check to see if we can make the LHS or RHS match. 9810198090Srdivacky if (LHS == FoundRHS || RHS == FoundLHS) { 9811198090Srdivacky if (isa<SCEVConstant>(RHS)) { 9812198090Srdivacky std::swap(FoundLHS, FoundRHS); 9813198090Srdivacky FoundPred = ICmpInst::getSwappedPredicate(FoundPred); 9814198090Srdivacky } else { 9815198090Srdivacky std::swap(LHS, RHS); 9816198090Srdivacky Pred = ICmpInst::getSwappedPredicate(Pred); 9817198090Srdivacky } 9818198090Srdivacky } 9819193323Sed 9820198090Srdivacky // Check whether the found predicate is the same as the desired predicate. 9821198090Srdivacky if (FoundPred == Pred) 9822198090Srdivacky return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS); 9823198090Srdivacky 9824198090Srdivacky // Check whether swapping the found predicate makes it the same as the 9825198090Srdivacky // desired predicate. 9826198090Srdivacky if (ICmpInst::getSwappedPredicate(FoundPred) == Pred) { 9827198090Srdivacky if (isa<SCEVConstant>(RHS)) 9828198090Srdivacky return isImpliedCondOperands(Pred, LHS, RHS, FoundRHS, FoundLHS); 9829198090Srdivacky else 9830198090Srdivacky return isImpliedCondOperands(ICmpInst::getSwappedPredicate(Pred), 9831198090Srdivacky RHS, LHS, FoundLHS, FoundRHS); 9832198090Srdivacky } 9833198090Srdivacky 9834296417Sdim // Unsigned comparison is the same as signed comparison when both the operands 9835296417Sdim // are non-negative. 9836296417Sdim if (CmpInst::isUnsigned(FoundPred) && 9837296417Sdim CmpInst::getSignedPredicate(FoundPred) == Pred && 9838296417Sdim isKnownNonNegative(FoundLHS) && isKnownNonNegative(FoundRHS)) 9839296417Sdim return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS); 9840296417Sdim 9841280031Sdim // Check if we can make progress by sharpening ranges. 9842280031Sdim if (FoundPred == ICmpInst::ICMP_NE && 9843280031Sdim (isa<SCEVConstant>(FoundLHS) || isa<SCEVConstant>(FoundRHS))) { 9844280031Sdim 9845280031Sdim const SCEVConstant *C = nullptr; 9846280031Sdim const SCEV *V = nullptr; 9847280031Sdim 9848280031Sdim if (isa<SCEVConstant>(FoundLHS)) { 9849280031Sdim C = cast<SCEVConstant>(FoundLHS); 9850280031Sdim V = FoundRHS; 9851280031Sdim } else { 9852280031Sdim C = cast<SCEVConstant>(FoundRHS); 9853280031Sdim V = FoundLHS; 9854280031Sdim } 9855280031Sdim 9856280031Sdim // The guarding predicate tells us that C != V. If the known range 9857280031Sdim // of V is [C, t), we can sharpen the range to [C + 1, t). The 9858280031Sdim // range we consider has to correspond to same signedness as the 9859280031Sdim // predicate we're interested in folding. 9860280031Sdim 9861280031Sdim APInt Min = ICmpInst::isSigned(Pred) ? 9862321369Sdim getSignedRangeMin(V) : getUnsignedRangeMin(V); 9863280031Sdim 9864296417Sdim if (Min == C->getAPInt()) { 9865280031Sdim // Given (V >= Min && V != Min) we conclude V >= (Min + 1). 9866280031Sdim // This is true even if (Min + 1) wraps around -- in case of 9867280031Sdim // wraparound, (Min + 1) < Min, so (V >= Min => V >= (Min + 1)). 9868280031Sdim 9869280031Sdim APInt SharperMin = Min + 1; 9870280031Sdim 9871280031Sdim switch (Pred) { 9872280031Sdim case ICmpInst::ICMP_SGE: 9873280031Sdim case ICmpInst::ICMP_UGE: 9874280031Sdim // We know V `Pred` SharperMin. If this implies LHS `Pred` 9875280031Sdim // RHS, we're done. 9876280031Sdim if (isImpliedCondOperands(Pred, LHS, RHS, V, 9877280031Sdim getConstant(SharperMin))) 9878280031Sdim return true; 9879321369Sdim LLVM_FALLTHROUGH; 9880280031Sdim 9881280031Sdim case ICmpInst::ICMP_SGT: 9882280031Sdim case ICmpInst::ICMP_UGT: 9883280031Sdim // We know from the range information that (V `Pred` Min || 9884280031Sdim // V == Min). We know from the guarding condition that !(V 9885280031Sdim // == Min). This gives us 9886280031Sdim // 9887280031Sdim // V `Pred` Min || V == Min && !(V == Min) 9888280031Sdim // => V `Pred` Min 9889280031Sdim // 9890280031Sdim // If V `Pred` Min implies LHS `Pred` RHS, we're done. 9891280031Sdim 9892280031Sdim if (isImpliedCondOperands(Pred, LHS, RHS, V, getConstant(Min))) 9893280031Sdim return true; 9894321369Sdim LLVM_FALLTHROUGH; 9895280031Sdim 9896280031Sdim default: 9897280031Sdim // No change 9898280031Sdim break; 9899280031Sdim } 9900280031Sdim } 9901280031Sdim } 9902280031Sdim 9903198090Srdivacky // Check whether the actual condition is beyond sufficient. 9904198090Srdivacky if (FoundPred == ICmpInst::ICMP_EQ) 9905198090Srdivacky if (ICmpInst::isTrueWhenEqual(Pred)) 9906198090Srdivacky if (isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS)) 9907198090Srdivacky return true; 9908198090Srdivacky if (Pred == ICmpInst::ICMP_NE) 9909198090Srdivacky if (!ICmpInst::isTrueWhenEqual(FoundPred)) 9910198090Srdivacky if (isImpliedCondOperands(FoundPred, LHS, RHS, FoundLHS, FoundRHS)) 9911198090Srdivacky return true; 9912198090Srdivacky 9913198090Srdivacky // Otherwise assume the worst. 9914198090Srdivacky return false; 9915193323Sed} 9916193323Sed 9917296417Sdimbool ScalarEvolution::splitBinaryAdd(const SCEV *Expr, 9918296417Sdim const SCEV *&L, const SCEV *&R, 9919296417Sdim SCEV::NoWrapFlags &Flags) { 9920296417Sdim const auto *AE = dyn_cast<SCEVAddExpr>(Expr); 9921296417Sdim if (!AE || AE->getNumOperands() != 2) 9922296417Sdim return false; 9923296417Sdim 9924296417Sdim L = AE->getOperand(0); 9925296417Sdim R = AE->getOperand(1); 9926296417Sdim Flags = AE->getNoWrapFlags(); 9927296417Sdim return true; 9928296417Sdim} 9929296417Sdim 9930314564SdimOptional<APInt> ScalarEvolution::computeConstantDifference(const SCEV *More, 9931314564Sdim const SCEV *Less) { 9932296417Sdim // We avoid subtracting expressions here because this function is usually 9933296417Sdim // fairly deep in the call stack (i.e. is called many times). 9934296417Sdim 9935360784Sdim // X - X = 0. 9936360784Sdim if (More == Less) 9937360784Sdim return APInt(getTypeSizeInBits(More->getType()), 0); 9938360784Sdim 9939296417Sdim if (isa<SCEVAddRecExpr>(Less) && isa<SCEVAddRecExpr>(More)) { 9940296417Sdim const auto *LAR = cast<SCEVAddRecExpr>(Less); 9941296417Sdim const auto *MAR = cast<SCEVAddRecExpr>(More); 9942296417Sdim 9943296417Sdim if (LAR->getLoop() != MAR->getLoop()) 9944314564Sdim return None; 9945296417Sdim 9946296417Sdim // We look at affine expressions only; not for correctness but to keep 9947296417Sdim // getStepRecurrence cheap. 9948296417Sdim if (!LAR->isAffine() || !MAR->isAffine()) 9949314564Sdim return None; 9950296417Sdim 9951296417Sdim if (LAR->getStepRecurrence(*this) != MAR->getStepRecurrence(*this)) 9952314564Sdim return None; 9953296417Sdim 9954296417Sdim Less = LAR->getStart(); 9955296417Sdim More = MAR->getStart(); 9956296417Sdim 9957296417Sdim // fall through 9958296417Sdim } 9959296417Sdim 9960296417Sdim if (isa<SCEVConstant>(Less) && isa<SCEVConstant>(More)) { 9961296417Sdim const auto &M = cast<SCEVConstant>(More)->getAPInt(); 9962296417Sdim const auto &L = cast<SCEVConstant>(Less)->getAPInt(); 9963314564Sdim return M - L; 9964296417Sdim } 9965296417Sdim 9966296417Sdim SCEV::NoWrapFlags Flags; 9967341825Sdim const SCEV *LLess = nullptr, *RLess = nullptr; 9968341825Sdim const SCEV *LMore = nullptr, *RMore = nullptr; 9969341825Sdim const SCEVConstant *C1 = nullptr, *C2 = nullptr; 9970341825Sdim // Compare (X + C1) vs X. 9971341825Sdim if (splitBinaryAdd(Less, LLess, RLess, Flags)) 9972341825Sdim if ((C1 = dyn_cast<SCEVConstant>(LLess))) 9973341825Sdim if (RLess == More) 9974341825Sdim return -(C1->getAPInt()); 9975296417Sdim 9976341825Sdim // Compare X vs (X + C2). 9977341825Sdim if (splitBinaryAdd(More, LMore, RMore, Flags)) 9978341825Sdim if ((C2 = dyn_cast<SCEVConstant>(LMore))) 9979341825Sdim if (RMore == Less) 9980341825Sdim return C2->getAPInt(); 9981296417Sdim 9982341825Sdim // Compare (X + C1) vs (X + C2). 9983341825Sdim if (C1 && C2 && RLess == RMore) 9984341825Sdim return C2->getAPInt() - C1->getAPInt(); 9985341825Sdim 9986314564Sdim return None; 9987296417Sdim} 9988296417Sdim 9989296417Sdimbool ScalarEvolution::isImpliedCondOperandsViaNoOverflow( 9990296417Sdim ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, 9991296417Sdim const SCEV *FoundLHS, const SCEV *FoundRHS) { 9992296417Sdim if (Pred != CmpInst::ICMP_SLT && Pred != CmpInst::ICMP_ULT) 9993296417Sdim return false; 9994296417Sdim 9995296417Sdim const auto *AddRecLHS = dyn_cast<SCEVAddRecExpr>(LHS); 9996296417Sdim if (!AddRecLHS) 9997296417Sdim return false; 9998296417Sdim 9999296417Sdim const auto *AddRecFoundLHS = dyn_cast<SCEVAddRecExpr>(FoundLHS); 10000296417Sdim if (!AddRecFoundLHS) 10001296417Sdim return false; 10002296417Sdim 10003296417Sdim // We'd like to let SCEV reason about control dependencies, so we constrain 10004296417Sdim // both the inequalities to be about add recurrences on the same loop. This 10005296417Sdim // way we can use isLoopEntryGuardedByCond later. 10006296417Sdim 10007296417Sdim const Loop *L = AddRecFoundLHS->getLoop(); 10008296417Sdim if (L != AddRecLHS->getLoop()) 10009296417Sdim return false; 10010296417Sdim 10011296417Sdim // FoundLHS u< FoundRHS u< -C => (FoundLHS + C) u< (FoundRHS + C) ... (1) 10012296417Sdim // 10013296417Sdim // FoundLHS s< FoundRHS s< INT_MIN - C => (FoundLHS + C) s< (FoundRHS + C) 10014296417Sdim // ... (2) 10015296417Sdim // 10016296417Sdim // Informal proof for (2), assuming (1) [*]: 10017296417Sdim // 10018296417Sdim // We'll also assume (A s< B) <=> ((A + INT_MIN) u< (B + INT_MIN)) ... (3)[**] 10019296417Sdim // 10020296417Sdim // Then 10021296417Sdim // 10022296417Sdim // FoundLHS s< FoundRHS s< INT_MIN - C 10023296417Sdim // <=> (FoundLHS + INT_MIN) u< (FoundRHS + INT_MIN) u< -C [ using (3) ] 10024296417Sdim // <=> (FoundLHS + INT_MIN + C) u< (FoundRHS + INT_MIN + C) [ using (1) ] 10025296417Sdim // <=> (FoundLHS + INT_MIN + C + INT_MIN) s< 10026296417Sdim // (FoundRHS + INT_MIN + C + INT_MIN) [ using (3) ] 10027296417Sdim // <=> FoundLHS + C s< FoundRHS + C 10028296417Sdim // 10029296417Sdim // [*]: (1) can be proved by ruling out overflow. 10030296417Sdim // 10031296417Sdim // [**]: This can be proved by analyzing all the four possibilities: 10032296417Sdim // (A s< 0, B s< 0), (A s< 0, B s>= 0), (A s>= 0, B s< 0) and 10033296417Sdim // (A s>= 0, B s>= 0). 10034296417Sdim // 10035296417Sdim // Note: 10036296417Sdim // Despite (2), "FoundRHS s< INT_MIN - C" does not mean that "FoundRHS + C" 10037296417Sdim // will not sign underflow. For instance, say FoundLHS = (i8 -128), FoundRHS 10038296417Sdim // = (i8 -127) and C = (i8 -100). Then INT_MIN - C = (i8 -28), and FoundRHS 10039296417Sdim // s< (INT_MIN - C). Lack of sign overflow / underflow in "FoundRHS + C" is 10040296417Sdim // neither necessary nor sufficient to prove "(FoundLHS + C) s< (FoundRHS + 10041296417Sdim // C)". 10042296417Sdim 10043314564Sdim Optional<APInt> LDiff = computeConstantDifference(LHS, FoundLHS); 10044314564Sdim Optional<APInt> RDiff = computeConstantDifference(RHS, FoundRHS); 10045314564Sdim if (!LDiff || !RDiff || *LDiff != *RDiff) 10046296417Sdim return false; 10047296417Sdim 10048314564Sdim if (LDiff->isMinValue()) 10049296417Sdim return true; 10050296417Sdim 10051296417Sdim APInt FoundRHSLimit; 10052296417Sdim 10053296417Sdim if (Pred == CmpInst::ICMP_ULT) { 10054314564Sdim FoundRHSLimit = -(*RDiff); 10055296417Sdim } else { 10056296417Sdim assert(Pred == CmpInst::ICMP_SLT && "Checked above!"); 10057314564Sdim FoundRHSLimit = APInt::getSignedMinValue(getTypeSizeInBits(RHS->getType())) - *RDiff; 10058296417Sdim } 10059296417Sdim 10060296417Sdim // Try to prove (1) or (2), as needed. 10061341825Sdim return isAvailableAtLoopEntry(FoundRHS, L) && 10062341825Sdim isLoopEntryGuardedByCond(L, Pred, FoundRHS, 10063296417Sdim getConstant(FoundRHSLimit)); 10064296417Sdim} 10065296417Sdim 10066341825Sdimbool ScalarEvolution::isImpliedViaMerge(ICmpInst::Predicate Pred, 10067341825Sdim const SCEV *LHS, const SCEV *RHS, 10068341825Sdim const SCEV *FoundLHS, 10069341825Sdim const SCEV *FoundRHS, unsigned Depth) { 10070341825Sdim const PHINode *LPhi = nullptr, *RPhi = nullptr; 10071341825Sdim 10072341825Sdim auto ClearOnExit = make_scope_exit([&]() { 10073341825Sdim if (LPhi) { 10074341825Sdim bool Erased = PendingMerges.erase(LPhi); 10075341825Sdim assert(Erased && "Failed to erase LPhi!"); 10076341825Sdim (void)Erased; 10077341825Sdim } 10078341825Sdim if (RPhi) { 10079341825Sdim bool Erased = PendingMerges.erase(RPhi); 10080341825Sdim assert(Erased && "Failed to erase RPhi!"); 10081341825Sdim (void)Erased; 10082341825Sdim } 10083341825Sdim }); 10084341825Sdim 10085341825Sdim // Find respective Phis and check that they are not being pending. 10086341825Sdim if (const SCEVUnknown *LU = dyn_cast<SCEVUnknown>(LHS)) 10087341825Sdim if (auto *Phi = dyn_cast<PHINode>(LU->getValue())) { 10088341825Sdim if (!PendingMerges.insert(Phi).second) 10089341825Sdim return false; 10090341825Sdim LPhi = Phi; 10091341825Sdim } 10092341825Sdim if (const SCEVUnknown *RU = dyn_cast<SCEVUnknown>(RHS)) 10093341825Sdim if (auto *Phi = dyn_cast<PHINode>(RU->getValue())) { 10094341825Sdim // If we detect a loop of Phi nodes being processed by this method, for 10095341825Sdim // example: 10096341825Sdim // 10097341825Sdim // %a = phi i32 [ %some1, %preheader ], [ %b, %latch ] 10098341825Sdim // %b = phi i32 [ %some2, %preheader ], [ %a, %latch ] 10099341825Sdim // 10100341825Sdim // we don't want to deal with a case that complex, so return conservative 10101341825Sdim // answer false. 10102341825Sdim if (!PendingMerges.insert(Phi).second) 10103341825Sdim return false; 10104341825Sdim RPhi = Phi; 10105341825Sdim } 10106341825Sdim 10107341825Sdim // If none of LHS, RHS is a Phi, nothing to do here. 10108341825Sdim if (!LPhi && !RPhi) 10109341825Sdim return false; 10110341825Sdim 10111341825Sdim // If there is a SCEVUnknown Phi we are interested in, make it left. 10112341825Sdim if (!LPhi) { 10113341825Sdim std::swap(LHS, RHS); 10114341825Sdim std::swap(FoundLHS, FoundRHS); 10115341825Sdim std::swap(LPhi, RPhi); 10116341825Sdim Pred = ICmpInst::getSwappedPredicate(Pred); 10117341825Sdim } 10118341825Sdim 10119341825Sdim assert(LPhi && "LPhi should definitely be a SCEVUnknown Phi!"); 10120341825Sdim const BasicBlock *LBB = LPhi->getParent(); 10121341825Sdim const SCEVAddRecExpr *RAR = dyn_cast<SCEVAddRecExpr>(RHS); 10122341825Sdim 10123341825Sdim auto ProvedEasily = [&](const SCEV *S1, const SCEV *S2) { 10124341825Sdim return isKnownViaNonRecursiveReasoning(Pred, S1, S2) || 10125341825Sdim isImpliedCondOperandsViaRanges(Pred, S1, S2, FoundLHS, FoundRHS) || 10126341825Sdim isImpliedViaOperations(Pred, S1, S2, FoundLHS, FoundRHS, Depth); 10127341825Sdim }; 10128341825Sdim 10129341825Sdim if (RPhi && RPhi->getParent() == LBB) { 10130341825Sdim // Case one: RHS is also a SCEVUnknown Phi from the same basic block. 10131341825Sdim // If we compare two Phis from the same block, and for each entry block 10132341825Sdim // the predicate is true for incoming values from this block, then the 10133341825Sdim // predicate is also true for the Phis. 10134341825Sdim for (const BasicBlock *IncBB : predecessors(LBB)) { 10135341825Sdim const SCEV *L = getSCEV(LPhi->getIncomingValueForBlock(IncBB)); 10136341825Sdim const SCEV *R = getSCEV(RPhi->getIncomingValueForBlock(IncBB)); 10137341825Sdim if (!ProvedEasily(L, R)) 10138341825Sdim return false; 10139341825Sdim } 10140341825Sdim } else if (RAR && RAR->getLoop()->getHeader() == LBB) { 10141341825Sdim // Case two: RHS is also a Phi from the same basic block, and it is an 10142341825Sdim // AddRec. It means that there is a loop which has both AddRec and Unknown 10143341825Sdim // PHIs, for it we can compare incoming values of AddRec from above the loop 10144341825Sdim // and latch with their respective incoming values of LPhi. 10145341825Sdim // TODO: Generalize to handle loops with many inputs in a header. 10146341825Sdim if (LPhi->getNumIncomingValues() != 2) return false; 10147341825Sdim 10148341825Sdim auto *RLoop = RAR->getLoop(); 10149341825Sdim auto *Predecessor = RLoop->getLoopPredecessor(); 10150341825Sdim assert(Predecessor && "Loop with AddRec with no predecessor?"); 10151341825Sdim const SCEV *L1 = getSCEV(LPhi->getIncomingValueForBlock(Predecessor)); 10152341825Sdim if (!ProvedEasily(L1, RAR->getStart())) 10153341825Sdim return false; 10154341825Sdim auto *Latch = RLoop->getLoopLatch(); 10155341825Sdim assert(Latch && "Loop with AddRec with no latch?"); 10156341825Sdim const SCEV *L2 = getSCEV(LPhi->getIncomingValueForBlock(Latch)); 10157341825Sdim if (!ProvedEasily(L2, RAR->getPostIncExpr(*this))) 10158341825Sdim return false; 10159341825Sdim } else { 10160341825Sdim // In all other cases go over inputs of LHS and compare each of them to RHS, 10161341825Sdim // the predicate is true for (LHS, RHS) if it is true for all such pairs. 10162341825Sdim // At this point RHS is either a non-Phi, or it is a Phi from some block 10163341825Sdim // different from LBB. 10164341825Sdim for (const BasicBlock *IncBB : predecessors(LBB)) { 10165341825Sdim // Check that RHS is available in this block. 10166341825Sdim if (!dominates(RHS, IncBB)) 10167341825Sdim return false; 10168341825Sdim const SCEV *L = getSCEV(LPhi->getIncomingValueForBlock(IncBB)); 10169341825Sdim if (!ProvedEasily(L, RHS)) 10170341825Sdim return false; 10171341825Sdim } 10172341825Sdim } 10173341825Sdim return true; 10174341825Sdim} 10175341825Sdim 10176198090Srdivackybool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred, 10177198090Srdivacky const SCEV *LHS, const SCEV *RHS, 10178198090Srdivacky const SCEV *FoundLHS, 10179198090Srdivacky const SCEV *FoundRHS) { 10180288943Sdim if (isImpliedCondOperandsViaRanges(Pred, LHS, RHS, FoundLHS, FoundRHS)) 10181288943Sdim return true; 10182288943Sdim 10183296417Sdim if (isImpliedCondOperandsViaNoOverflow(Pred, LHS, RHS, FoundLHS, FoundRHS)) 10184296417Sdim return true; 10185296417Sdim 10186198090Srdivacky return isImpliedCondOperandsHelper(Pred, LHS, RHS, 10187198090Srdivacky FoundLHS, FoundRHS) || 10188198090Srdivacky // ~x < ~y --> x > y 10189198090Srdivacky isImpliedCondOperandsHelper(Pred, LHS, RHS, 10190198090Srdivacky getNotSCEV(FoundRHS), 10191198090Srdivacky getNotSCEV(FoundLHS)); 10192198090Srdivacky} 10193198090Srdivacky 10194353358Sdim/// Is MaybeMinMaxExpr an (U|S)(Min|Max) of Candidate and some other values? 10195353358Sdimtemplate <typename MinMaxExprType> 10196353358Sdimstatic bool IsMinMaxConsistingOf(const SCEV *MaybeMinMaxExpr, 10197353358Sdim const SCEV *Candidate) { 10198353358Sdim const MinMaxExprType *MinMaxExpr = dyn_cast<MinMaxExprType>(MaybeMinMaxExpr); 10199353358Sdim if (!MinMaxExpr) 10200280031Sdim return false; 10201280031Sdim 10202353358Sdim return find(MinMaxExpr->operands(), Candidate) != MinMaxExpr->op_end(); 10203280031Sdim} 10204280031Sdim 10205296417Sdimstatic bool IsKnownPredicateViaAddRecStart(ScalarEvolution &SE, 10206296417Sdim ICmpInst::Predicate Pred, 10207296417Sdim const SCEV *LHS, const SCEV *RHS) { 10208296417Sdim // If both sides are affine addrecs for the same loop, with equal 10209296417Sdim // steps, and we know the recurrences don't wrap, then we only 10210296417Sdim // need to check the predicate on the starting values. 10211296417Sdim 10212296417Sdim if (!ICmpInst::isRelational(Pred)) 10213296417Sdim return false; 10214296417Sdim 10215296417Sdim const SCEVAddRecExpr *LAR = dyn_cast<SCEVAddRecExpr>(LHS); 10216296417Sdim if (!LAR) 10217296417Sdim return false; 10218296417Sdim const SCEVAddRecExpr *RAR = dyn_cast<SCEVAddRecExpr>(RHS); 10219296417Sdim if (!RAR) 10220296417Sdim return false; 10221296417Sdim if (LAR->getLoop() != RAR->getLoop()) 10222296417Sdim return false; 10223296417Sdim if (!LAR->isAffine() || !RAR->isAffine()) 10224296417Sdim return false; 10225296417Sdim 10226296417Sdim if (LAR->getStepRecurrence(SE) != RAR->getStepRecurrence(SE)) 10227296417Sdim return false; 10228296417Sdim 10229296417Sdim SCEV::NoWrapFlags NW = ICmpInst::isSigned(Pred) ? 10230296417Sdim SCEV::FlagNSW : SCEV::FlagNUW; 10231296417Sdim if (!LAR->getNoWrapFlags(NW) || !RAR->getNoWrapFlags(NW)) 10232296417Sdim return false; 10233296417Sdim 10234296417Sdim return SE.isKnownPredicate(Pred, LAR->getStart(), RAR->getStart()); 10235296417Sdim} 10236296417Sdim 10237280031Sdim/// Is LHS `Pred` RHS true on the virtue of LHS or RHS being a Min or Max 10238280031Sdim/// expression? 10239280031Sdimstatic bool IsKnownPredicateViaMinOrMax(ScalarEvolution &SE, 10240280031Sdim ICmpInst::Predicate Pred, 10241280031Sdim const SCEV *LHS, const SCEV *RHS) { 10242280031Sdim switch (Pred) { 10243280031Sdim default: 10244280031Sdim return false; 10245280031Sdim 10246280031Sdim case ICmpInst::ICMP_SGE: 10247280031Sdim std::swap(LHS, RHS); 10248314564Sdim LLVM_FALLTHROUGH; 10249280031Sdim case ICmpInst::ICMP_SLE: 10250280031Sdim return 10251353358Sdim // min(A, ...) <= A 10252353358Sdim IsMinMaxConsistingOf<SCEVSMinExpr>(LHS, RHS) || 10253353358Sdim // A <= max(A, ...) 10254353358Sdim IsMinMaxConsistingOf<SCEVSMaxExpr>(RHS, LHS); 10255280031Sdim 10256280031Sdim case ICmpInst::ICMP_UGE: 10257280031Sdim std::swap(LHS, RHS); 10258314564Sdim LLVM_FALLTHROUGH; 10259280031Sdim case ICmpInst::ICMP_ULE: 10260280031Sdim return 10261353358Sdim // min(A, ...) <= A 10262353358Sdim IsMinMaxConsistingOf<SCEVUMinExpr>(LHS, RHS) || 10263353358Sdim // A <= max(A, ...) 10264353358Sdim IsMinMaxConsistingOf<SCEVUMaxExpr>(RHS, LHS); 10265280031Sdim } 10266280031Sdim 10267280031Sdim llvm_unreachable("covered switch fell through?!"); 10268280031Sdim} 10269280031Sdim 10270321369Sdimbool ScalarEvolution::isImpliedViaOperations(ICmpInst::Predicate Pred, 10271321369Sdim const SCEV *LHS, const SCEV *RHS, 10272321369Sdim const SCEV *FoundLHS, 10273321369Sdim const SCEV *FoundRHS, 10274321369Sdim unsigned Depth) { 10275321369Sdim assert(getTypeSizeInBits(LHS->getType()) == 10276321369Sdim getTypeSizeInBits(RHS->getType()) && 10277321369Sdim "LHS and RHS have different sizes?"); 10278321369Sdim assert(getTypeSizeInBits(FoundLHS->getType()) == 10279321369Sdim getTypeSizeInBits(FoundRHS->getType()) && 10280321369Sdim "FoundLHS and FoundRHS have different sizes?"); 10281321369Sdim // We want to avoid hurting the compile time with analysis of too big trees. 10282321369Sdim if (Depth > MaxSCEVOperationsImplicationDepth) 10283321369Sdim return false; 10284321369Sdim // We only want to work with ICMP_SGT comparison so far. 10285321369Sdim // TODO: Extend to ICMP_UGT? 10286321369Sdim if (Pred == ICmpInst::ICMP_SLT) { 10287321369Sdim Pred = ICmpInst::ICMP_SGT; 10288321369Sdim std::swap(LHS, RHS); 10289321369Sdim std::swap(FoundLHS, FoundRHS); 10290321369Sdim } 10291321369Sdim if (Pred != ICmpInst::ICMP_SGT) 10292321369Sdim return false; 10293321369Sdim 10294321369Sdim auto GetOpFromSExt = [&](const SCEV *S) { 10295321369Sdim if (auto *Ext = dyn_cast<SCEVSignExtendExpr>(S)) 10296321369Sdim return Ext->getOperand(); 10297321369Sdim // TODO: If S is a SCEVConstant then you can cheaply "strip" the sext off 10298321369Sdim // the constant in some cases. 10299321369Sdim return S; 10300321369Sdim }; 10301321369Sdim 10302321369Sdim // Acquire values from extensions. 10303341825Sdim auto *OrigLHS = LHS; 10304321369Sdim auto *OrigFoundLHS = FoundLHS; 10305321369Sdim LHS = GetOpFromSExt(LHS); 10306321369Sdim FoundLHS = GetOpFromSExt(FoundLHS); 10307321369Sdim 10308321369Sdim // Is the SGT predicate can be proved trivially or using the found context. 10309321369Sdim auto IsSGTViaContext = [&](const SCEV *S1, const SCEV *S2) { 10310341825Sdim return isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SGT, S1, S2) || 10311321369Sdim isImpliedViaOperations(ICmpInst::ICMP_SGT, S1, S2, OrigFoundLHS, 10312321369Sdim FoundRHS, Depth + 1); 10313321369Sdim }; 10314321369Sdim 10315321369Sdim if (auto *LHSAddExpr = dyn_cast<SCEVAddExpr>(LHS)) { 10316321369Sdim // We want to avoid creation of any new non-constant SCEV. Since we are 10317321369Sdim // going to compare the operands to RHS, we should be certain that we don't 10318321369Sdim // need any size extensions for this. So let's decline all cases when the 10319321369Sdim // sizes of types of LHS and RHS do not match. 10320321369Sdim // TODO: Maybe try to get RHS from sext to catch more cases? 10321321369Sdim if (getTypeSizeInBits(LHS->getType()) != getTypeSizeInBits(RHS->getType())) 10322321369Sdim return false; 10323321369Sdim 10324321369Sdim // Should not overflow. 10325321369Sdim if (!LHSAddExpr->hasNoSignedWrap()) 10326321369Sdim return false; 10327321369Sdim 10328321369Sdim auto *LL = LHSAddExpr->getOperand(0); 10329321369Sdim auto *LR = LHSAddExpr->getOperand(1); 10330321369Sdim auto *MinusOne = getNegativeSCEV(getOne(RHS->getType())); 10331321369Sdim 10332321369Sdim // Checks that S1 >= 0 && S2 > RHS, trivially or using the found context. 10333321369Sdim auto IsSumGreaterThanRHS = [&](const SCEV *S1, const SCEV *S2) { 10334321369Sdim return IsSGTViaContext(S1, MinusOne) && IsSGTViaContext(S2, RHS); 10335321369Sdim }; 10336321369Sdim // Try to prove the following rule: 10337321369Sdim // (LHS = LL + LR) && (LL >= 0) && (LR > RHS) => (LHS > RHS). 10338321369Sdim // (LHS = LL + LR) && (LR >= 0) && (LL > RHS) => (LHS > RHS). 10339321369Sdim if (IsSumGreaterThanRHS(LL, LR) || IsSumGreaterThanRHS(LR, LL)) 10340321369Sdim return true; 10341321369Sdim } else if (auto *LHSUnknownExpr = dyn_cast<SCEVUnknown>(LHS)) { 10342321369Sdim Value *LL, *LR; 10343321369Sdim // FIXME: Once we have SDiv implemented, we can get rid of this matching. 10344327952Sdim 10345321369Sdim using namespace llvm::PatternMatch; 10346327952Sdim 10347321369Sdim if (match(LHSUnknownExpr->getValue(), m_SDiv(m_Value(LL), m_Value(LR)))) { 10348321369Sdim // Rules for division. 10349321369Sdim // We are going to perform some comparisons with Denominator and its 10350321369Sdim // derivative expressions. In general case, creating a SCEV for it may 10351321369Sdim // lead to a complex analysis of the entire graph, and in particular it 10352321369Sdim // can request trip count recalculation for the same loop. This would 10353321369Sdim // cache as SCEVCouldNotCompute to avoid the infinite recursion. To avoid 10354321369Sdim // this, we only want to create SCEVs that are constants in this section. 10355321369Sdim // So we bail if Denominator is not a constant. 10356321369Sdim if (!isa<ConstantInt>(LR)) 10357321369Sdim return false; 10358321369Sdim 10359321369Sdim auto *Denominator = cast<SCEVConstant>(getSCEV(LR)); 10360321369Sdim 10361321369Sdim // We want to make sure that LHS = FoundLHS / Denominator. If it is so, 10362321369Sdim // then a SCEV for the numerator already exists and matches with FoundLHS. 10363321369Sdim auto *Numerator = getExistingSCEV(LL); 10364321369Sdim if (!Numerator || Numerator->getType() != FoundLHS->getType()) 10365321369Sdim return false; 10366321369Sdim 10367321369Sdim // Make sure that the numerator matches with FoundLHS and the denominator 10368321369Sdim // is positive. 10369321369Sdim if (!HasSameValue(Numerator, FoundLHS) || !isKnownPositive(Denominator)) 10370321369Sdim return false; 10371321369Sdim 10372321369Sdim auto *DTy = Denominator->getType(); 10373321369Sdim auto *FRHSTy = FoundRHS->getType(); 10374321369Sdim if (DTy->isPointerTy() != FRHSTy->isPointerTy()) 10375321369Sdim // One of types is a pointer and another one is not. We cannot extend 10376321369Sdim // them properly to a wider type, so let us just reject this case. 10377321369Sdim // TODO: Usage of getEffectiveSCEVType for DTy, FRHSTy etc should help 10378321369Sdim // to avoid this check. 10379321369Sdim return false; 10380321369Sdim 10381321369Sdim // Given that: 10382321369Sdim // FoundLHS > FoundRHS, LHS = FoundLHS / Denominator, Denominator > 0. 10383321369Sdim auto *WTy = getWiderType(DTy, FRHSTy); 10384321369Sdim auto *DenominatorExt = getNoopOrSignExtend(Denominator, WTy); 10385321369Sdim auto *FoundRHSExt = getNoopOrSignExtend(FoundRHS, WTy); 10386321369Sdim 10387321369Sdim // Try to prove the following rule: 10388321369Sdim // (FoundRHS > Denominator - 2) && (RHS <= 0) => (LHS > RHS). 10389321369Sdim // For example, given that FoundLHS > 2. It means that FoundLHS is at 10390321369Sdim // least 3. If we divide it by Denominator < 4, we will have at least 1. 10391321369Sdim auto *DenomMinusTwo = getMinusSCEV(DenominatorExt, getConstant(WTy, 2)); 10392321369Sdim if (isKnownNonPositive(RHS) && 10393321369Sdim IsSGTViaContext(FoundRHSExt, DenomMinusTwo)) 10394321369Sdim return true; 10395321369Sdim 10396321369Sdim // Try to prove the following rule: 10397321369Sdim // (FoundRHS > -1 - Denominator) && (RHS < 0) => (LHS > RHS). 10398321369Sdim // For example, given that FoundLHS > -3. Then FoundLHS is at least -2. 10399321369Sdim // If we divide it by Denominator > 2, then: 10400321369Sdim // 1. If FoundLHS is negative, then the result is 0. 10401321369Sdim // 2. If FoundLHS is non-negative, then the result is non-negative. 10402321369Sdim // Anyways, the result is non-negative. 10403321369Sdim auto *MinusOne = getNegativeSCEV(getOne(WTy)); 10404321369Sdim auto *NegDenomMinusOne = getMinusSCEV(MinusOne, DenominatorExt); 10405321369Sdim if (isKnownNegative(RHS) && 10406321369Sdim IsSGTViaContext(FoundRHSExt, NegDenomMinusOne)) 10407321369Sdim return true; 10408321369Sdim } 10409321369Sdim } 10410321369Sdim 10411341825Sdim // If our expression contained SCEVUnknown Phis, and we split it down and now 10412341825Sdim // need to prove something for them, try to prove the predicate for every 10413341825Sdim // possible incoming values of those Phis. 10414341825Sdim if (isImpliedViaMerge(Pred, OrigLHS, RHS, OrigFoundLHS, FoundRHS, Depth + 1)) 10415341825Sdim return true; 10416341825Sdim 10417321369Sdim return false; 10418321369Sdim} 10419321369Sdim 10420360784Sdimstatic bool isKnownPredicateExtendIdiom(ICmpInst::Predicate Pred, 10421360784Sdim const SCEV *LHS, const SCEV *RHS) { 10422360784Sdim // zext x u<= sext x, sext x s<= zext x 10423360784Sdim switch (Pred) { 10424360784Sdim case ICmpInst::ICMP_SGE: 10425360784Sdim std::swap(LHS, RHS); 10426360784Sdim LLVM_FALLTHROUGH; 10427360784Sdim case ICmpInst::ICMP_SLE: { 10428360784Sdim // If operand >=s 0 then ZExt == SExt. If operand <s 0 then SExt <s ZExt. 10429360784Sdim const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(LHS); 10430360784Sdim const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(RHS); 10431360784Sdim if (SExt && ZExt && SExt->getOperand() == ZExt->getOperand()) 10432360784Sdim return true; 10433360784Sdim break; 10434360784Sdim } 10435360784Sdim case ICmpInst::ICMP_UGE: 10436360784Sdim std::swap(LHS, RHS); 10437360784Sdim LLVM_FALLTHROUGH; 10438360784Sdim case ICmpInst::ICMP_ULE: { 10439360784Sdim // If operand >=s 0 then ZExt == SExt. If operand <s 0 then ZExt <u SExt. 10440360784Sdim const SCEVZeroExtendExpr *ZExt = dyn_cast<SCEVZeroExtendExpr>(LHS); 10441360784Sdim const SCEVSignExtendExpr *SExt = dyn_cast<SCEVSignExtendExpr>(RHS); 10442360784Sdim if (SExt && ZExt && SExt->getOperand() == ZExt->getOperand()) 10443360784Sdim return true; 10444360784Sdim break; 10445360784Sdim } 10446360784Sdim default: 10447360784Sdim break; 10448360784Sdim }; 10449360784Sdim return false; 10450360784Sdim} 10451360784Sdim 10452198090Srdivackybool 10453341825SdimScalarEvolution::isKnownViaNonRecursiveReasoning(ICmpInst::Predicate Pred, 10454321369Sdim const SCEV *LHS, const SCEV *RHS) { 10455360784Sdim return isKnownPredicateExtendIdiom(Pred, LHS, RHS) || 10456360784Sdim isKnownPredicateViaConstantRanges(Pred, LHS, RHS) || 10457321369Sdim IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS) || 10458321369Sdim IsKnownPredicateViaAddRecStart(*this, Pred, LHS, RHS) || 10459321369Sdim isKnownPredicateViaNoOverflow(Pred, LHS, RHS); 10460321369Sdim} 10461321369Sdim 10462321369Sdimbool 10463198090SrdivackyScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred, 10464198090Srdivacky const SCEV *LHS, const SCEV *RHS, 10465198090Srdivacky const SCEV *FoundLHS, 10466198090Srdivacky const SCEV *FoundRHS) { 10467198090Srdivacky switch (Pred) { 10468198090Srdivacky default: llvm_unreachable("Unexpected ICmpInst::Predicate value!"); 10469198090Srdivacky case ICmpInst::ICMP_EQ: 10470198090Srdivacky case ICmpInst::ICMP_NE: 10471198090Srdivacky if (HasSameValue(LHS, FoundLHS) && HasSameValue(RHS, FoundRHS)) 10472198090Srdivacky return true; 10473198090Srdivacky break; 10474198090Srdivacky case ICmpInst::ICMP_SLT: 10475198090Srdivacky case ICmpInst::ICMP_SLE: 10476341825Sdim if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SLE, LHS, FoundLHS) && 10477341825Sdim isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SGE, RHS, FoundRHS)) 10478198090Srdivacky return true; 10479198090Srdivacky break; 10480198090Srdivacky case ICmpInst::ICMP_SGT: 10481198090Srdivacky case ICmpInst::ICMP_SGE: 10482341825Sdim if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SGE, LHS, FoundLHS) && 10483341825Sdim isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_SLE, RHS, FoundRHS)) 10484198090Srdivacky return true; 10485198090Srdivacky break; 10486198090Srdivacky case ICmpInst::ICMP_ULT: 10487198090Srdivacky case ICmpInst::ICMP_ULE: 10488341825Sdim if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_ULE, LHS, FoundLHS) && 10489341825Sdim isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_UGE, RHS, FoundRHS)) 10490198090Srdivacky return true; 10491198090Srdivacky break; 10492198090Srdivacky case ICmpInst::ICMP_UGT: 10493198090Srdivacky case ICmpInst::ICMP_UGE: 10494341825Sdim if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_UGE, LHS, FoundLHS) && 10495341825Sdim isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_ULE, RHS, FoundRHS)) 10496198090Srdivacky return true; 10497198090Srdivacky break; 10498198090Srdivacky } 10499198090Srdivacky 10500321369Sdim // Maybe it can be proved via operations? 10501321369Sdim if (isImpliedViaOperations(Pred, LHS, RHS, FoundLHS, FoundRHS)) 10502321369Sdim return true; 10503321369Sdim 10504198090Srdivacky return false; 10505198090Srdivacky} 10506198090Srdivacky 10507288943Sdimbool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred, 10508288943Sdim const SCEV *LHS, 10509288943Sdim const SCEV *RHS, 10510288943Sdim const SCEV *FoundLHS, 10511288943Sdim const SCEV *FoundRHS) { 10512288943Sdim if (!isa<SCEVConstant>(RHS) || !isa<SCEVConstant>(FoundRHS)) 10513288943Sdim // The restriction on `FoundRHS` be lifted easily -- it exists only to 10514288943Sdim // reduce the compile time impact of this optimization. 10515288943Sdim return false; 10516288943Sdim 10517314564Sdim Optional<APInt> Addend = computeConstantDifference(LHS, FoundLHS); 10518314564Sdim if (!Addend) 10519288943Sdim return false; 10520288943Sdim 10521321369Sdim const APInt &ConstFoundRHS = cast<SCEVConstant>(FoundRHS)->getAPInt(); 10522288943Sdim 10523288943Sdim // `FoundLHSRange` is the range we know `FoundLHS` to be in by virtue of the 10524288943Sdim // antecedent "`FoundLHS` `Pred` `FoundRHS`". 10525288943Sdim ConstantRange FoundLHSRange = 10526288943Sdim ConstantRange::makeAllowedICmpRegion(Pred, ConstFoundRHS); 10527288943Sdim 10528314564Sdim // Since `LHS` is `FoundLHS` + `Addend`, we can compute a range for `LHS`: 10529314564Sdim ConstantRange LHSRange = FoundLHSRange.add(ConstantRange(*Addend)); 10530288943Sdim 10531288943Sdim // We can also compute the range of values for `LHS` that satisfy the 10532288943Sdim // consequent, "`LHS` `Pred` `RHS`": 10533321369Sdim const APInt &ConstRHS = cast<SCEVConstant>(RHS)->getAPInt(); 10534288943Sdim ConstantRange SatisfyingLHSRange = 10535288943Sdim ConstantRange::makeSatisfyingICmpRegion(Pred, ConstRHS); 10536288943Sdim 10537288943Sdim // The antecedent implies the consequent if every value of `LHS` that 10538288943Sdim // satisfies the antecedent also satisfies the consequent. 10539288943Sdim return SatisfyingLHSRange.contains(LHSRange); 10540288943Sdim} 10541288943Sdim 10542261991Sdimbool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride, 10543261991Sdim bool IsSigned, bool NoWrap) { 10544314564Sdim assert(isKnownPositive(Stride) && "Positive stride expected!"); 10545314564Sdim 10546261991Sdim if (NoWrap) return false; 10547203954Srdivacky 10548261991Sdim unsigned BitWidth = getTypeSizeInBits(RHS->getType()); 10549296417Sdim const SCEV *One = getOne(Stride->getType()); 10550221345Sdim 10551261991Sdim if (IsSigned) { 10552321369Sdim APInt MaxRHS = getSignedRangeMax(RHS); 10553261991Sdim APInt MaxValue = APInt::getSignedMaxValue(BitWidth); 10554321369Sdim APInt MaxStrideMinusOne = getSignedRangeMax(getMinusSCEV(Stride, One)); 10555221345Sdim 10556261991Sdim // SMaxRHS + SMaxStrideMinusOne > SMaxValue => overflow! 10557321369Sdim return (std::move(MaxValue) - MaxStrideMinusOne).slt(MaxRHS); 10558261991Sdim } 10559194612Sed 10560321369Sdim APInt MaxRHS = getUnsignedRangeMax(RHS); 10561261991Sdim APInt MaxValue = APInt::getMaxValue(BitWidth); 10562321369Sdim APInt MaxStrideMinusOne = getUnsignedRangeMax(getMinusSCEV(Stride, One)); 10563194612Sed 10564261991Sdim // UMaxRHS + UMaxStrideMinusOne > UMaxValue => overflow! 10565321369Sdim return (std::move(MaxValue) - MaxStrideMinusOne).ult(MaxRHS); 10566261991Sdim} 10567261991Sdim 10568261991Sdimbool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride, 10569261991Sdim bool IsSigned, bool NoWrap) { 10570261991Sdim if (NoWrap) return false; 10571261991Sdim 10572261991Sdim unsigned BitWidth = getTypeSizeInBits(RHS->getType()); 10573296417Sdim const SCEV *One = getOne(Stride->getType()); 10574261991Sdim 10575261991Sdim if (IsSigned) { 10576321369Sdim APInt MinRHS = getSignedRangeMin(RHS); 10577261991Sdim APInt MinValue = APInt::getSignedMinValue(BitWidth); 10578321369Sdim APInt MaxStrideMinusOne = getSignedRangeMax(getMinusSCEV(Stride, One)); 10579261991Sdim 10580261991Sdim // SMinRHS - SMaxStrideMinusOne < SMinValue => overflow! 10581321369Sdim return (std::move(MinValue) + MaxStrideMinusOne).sgt(MinRHS); 10582198090Srdivacky } 10583194612Sed 10584321369Sdim APInt MinRHS = getUnsignedRangeMin(RHS); 10585261991Sdim APInt MinValue = APInt::getMinValue(BitWidth); 10586321369Sdim APInt MaxStrideMinusOne = getUnsignedRangeMax(getMinusSCEV(Stride, One)); 10587261991Sdim 10588261991Sdim // UMinRHS - UMaxStrideMinusOne < UMinValue => overflow! 10589321369Sdim return (std::move(MinValue) + MaxStrideMinusOne).ugt(MinRHS); 10590194612Sed} 10591194612Sed 10592288943Sdimconst SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step, 10593261991Sdim bool Equality) { 10594296417Sdim const SCEV *One = getOne(Step->getType()); 10595261991Sdim Delta = Equality ? getAddExpr(Delta, Step) 10596261991Sdim : getAddExpr(Delta, getMinusSCEV(Step, One)); 10597261991Sdim return getUDivExpr(Delta, Step); 10598261991Sdim} 10599261991Sdim 10600327952Sdimconst SCEV *ScalarEvolution::computeMaxBECountForLT(const SCEV *Start, 10601327952Sdim const SCEV *Stride, 10602327952Sdim const SCEV *End, 10603327952Sdim unsigned BitWidth, 10604327952Sdim bool IsSigned) { 10605327952Sdim 10606327952Sdim assert(!isKnownNonPositive(Stride) && 10607327952Sdim "Stride is expected strictly positive!"); 10608327952Sdim // Calculate the maximum backedge count based on the range of values 10609327952Sdim // permitted by Start, End, and Stride. 10610327952Sdim const SCEV *MaxBECount; 10611327952Sdim APInt MinStart = 10612327952Sdim IsSigned ? getSignedRangeMin(Start) : getUnsignedRangeMin(Start); 10613327952Sdim 10614327952Sdim APInt StrideForMaxBECount = 10615327952Sdim IsSigned ? getSignedRangeMin(Stride) : getUnsignedRangeMin(Stride); 10616327952Sdim 10617327952Sdim // We already know that the stride is positive, so we paper over conservatism 10618327952Sdim // in our range computation by forcing StrideForMaxBECount to be at least one. 10619327952Sdim // In theory this is unnecessary, but we expect MaxBECount to be a 10620327952Sdim // SCEVConstant, and (udiv <constant> 0) is not constant folded by SCEV (there 10621327952Sdim // is nothing to constant fold it to). 10622327952Sdim APInt One(BitWidth, 1, IsSigned); 10623327952Sdim StrideForMaxBECount = APIntOps::smax(One, StrideForMaxBECount); 10624327952Sdim 10625327952Sdim APInt MaxValue = IsSigned ? APInt::getSignedMaxValue(BitWidth) 10626327952Sdim : APInt::getMaxValue(BitWidth); 10627327952Sdim APInt Limit = MaxValue - (StrideForMaxBECount - 1); 10628327952Sdim 10629327952Sdim // Although End can be a MAX expression we estimate MaxEnd considering only 10630327952Sdim // the case End = RHS of the loop termination condition. This is safe because 10631327952Sdim // in the other case (End - Start) is zero, leading to a zero maximum backedge 10632327952Sdim // taken count. 10633327952Sdim APInt MaxEnd = IsSigned ? APIntOps::smin(getSignedRangeMax(End), Limit) 10634327952Sdim : APIntOps::umin(getUnsignedRangeMax(End), Limit); 10635327952Sdim 10636327952Sdim MaxBECount = computeBECount(getConstant(MaxEnd - MinStart) /* Delta */, 10637327952Sdim getConstant(StrideForMaxBECount) /* Step */, 10638327952Sdim false /* Equality */); 10639327952Sdim 10640327952Sdim return MaxBECount; 10641327952Sdim} 10642327952Sdim 10643226633SdimScalarEvolution::ExitLimit 10644309124SdimScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS, 10645261991Sdim const Loop *L, bool IsSigned, 10646309124Sdim bool ControlsExit, bool AllowPredicates) { 10647314564Sdim SmallPtrSet<const SCEVPredicate *, 4> Predicates; 10648193323Sed 10649261991Sdim const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS); 10650314564Sdim bool PredicatedIV = false; 10651314564Sdim 10652314564Sdim if (!IV && AllowPredicates) { 10653309124Sdim // Try to make this an AddRec using runtime tests, in the first X 10654309124Sdim // iterations of this loop, where X is the SCEV expression found by the 10655309124Sdim // algorithm below. 10656314564Sdim IV = convertSCEVToAddRecWithPredicates(LHS, L, Predicates); 10657314564Sdim PredicatedIV = true; 10658314564Sdim } 10659261991Sdim 10660261991Sdim // Avoid weird loops 10661261991Sdim if (!IV || IV->getLoop() != L || !IV->isAffine()) 10662195340Sed return getCouldNotCompute(); 10663193323Sed 10664280031Sdim bool NoWrap = ControlsExit && 10665261991Sdim IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW); 10666193323Sed 10667261991Sdim const SCEV *Stride = IV->getStepRecurrence(*this); 10668193323Sed 10669314564Sdim bool PositiveStride = isKnownPositive(Stride); 10670193323Sed 10671314564Sdim // Avoid negative or zero stride values. 10672314564Sdim if (!PositiveStride) { 10673314564Sdim // We can compute the correct backedge taken count for loops with unknown 10674314564Sdim // strides if we can prove that the loop is not an infinite loop with side 10675314564Sdim // effects. Here's the loop structure we are trying to handle - 10676314564Sdim // 10677314564Sdim // i = start 10678314564Sdim // do { 10679314564Sdim // A[i] = i; 10680314564Sdim // i += s; 10681314564Sdim // } while (i < end); 10682314564Sdim // 10683314564Sdim // The backedge taken count for such loops is evaluated as - 10684314564Sdim // (max(end, start + stride) - start - 1) /u stride 10685314564Sdim // 10686314564Sdim // The additional preconditions that we need to check to prove correctness 10687314564Sdim // of the above formula is as follows - 10688314564Sdim // 10689314564Sdim // a) IV is either nuw or nsw depending upon signedness (indicated by the 10690314564Sdim // NoWrap flag). 10691314564Sdim // b) loop is single exit with no side effects. 10692314564Sdim // 10693314564Sdim // 10694314564Sdim // Precondition a) implies that if the stride is negative, this is a single 10695314564Sdim // trip loop. The backedge taken count formula reduces to zero in this case. 10696314564Sdim // 10697314564Sdim // Precondition b) implies that the unknown stride cannot be zero otherwise 10698314564Sdim // we have UB. 10699314564Sdim // 10700314564Sdim // The positive stride case is the same as isKnownPositive(Stride) returning 10701314564Sdim // true (original behavior of the function). 10702314564Sdim // 10703314564Sdim // We want to make sure that the stride is truly unknown as there are edge 10704314564Sdim // cases where ScalarEvolution propagates no wrap flags to the 10705314564Sdim // post-increment/decrement IV even though the increment/decrement operation 10706314564Sdim // itself is wrapping. The computed backedge taken count may be wrong in 10707314564Sdim // such cases. This is prevented by checking that the stride is not known to 10708314564Sdim // be either positive or non-positive. For example, no wrap flags are 10709314564Sdim // propagated to the post-increment IV of this loop with a trip count of 2 - 10710314564Sdim // 10711314564Sdim // unsigned char i; 10712314564Sdim // for(i=127; i<128; i+=129) 10713314564Sdim // A[i] = i; 10714314564Sdim // 10715314564Sdim if (PredicatedIV || !NoWrap || isKnownNonPositive(Stride) || 10716314564Sdim !loopHasNoSideEffects(L)) 10717314564Sdim return getCouldNotCompute(); 10718314564Sdim } else if (!Stride->isOne() && 10719314564Sdim doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap)) 10720314564Sdim // Avoid proven overflow cases: this will ensure that the backedge taken 10721314564Sdim // count will not generate any unsigned overflow. Relaxed no-overflow 10722314564Sdim // conditions exploit NoWrapFlags, allowing to optimize in presence of 10723314564Sdim // undefined behaviors like the case of C language. 10724261991Sdim return getCouldNotCompute(); 10725193323Sed 10726261991Sdim ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SLT 10727261991Sdim : ICmpInst::ICMP_ULT; 10728261991Sdim const SCEV *Start = IV->getStart(); 10729261991Sdim const SCEV *End = RHS; 10730327952Sdim // When the RHS is not invariant, we do not know the end bound of the loop and 10731327952Sdim // cannot calculate the ExactBECount needed by ExitLimit. However, we can 10732327952Sdim // calculate the MaxBECount, given the start, stride and max value for the end 10733327952Sdim // bound of the loop (RHS), and the fact that IV does not overflow (which is 10734327952Sdim // checked above). 10735327952Sdim if (!isLoopInvariant(RHS, L)) { 10736327952Sdim const SCEV *MaxBECount = computeMaxBECountForLT( 10737327952Sdim Start, Stride, RHS, getTypeSizeInBits(LHS->getType()), IsSigned); 10738327952Sdim return ExitLimit(getCouldNotCompute() /* ExactNotTaken */, MaxBECount, 10739327952Sdim false /*MaxOrZero*/, Predicates); 10740327952Sdim } 10741314564Sdim // If the backedge is taken at least once, then it will be taken 10742314564Sdim // (End-Start)/Stride times (rounded up to a multiple of Stride), where Start 10743314564Sdim // is the LHS value of the less-than comparison the first time it is evaluated 10744314564Sdim // and End is the RHS. 10745314564Sdim const SCEV *BECountIfBackedgeTaken = 10746314564Sdim computeBECount(getMinusSCEV(End, Start), Stride, false); 10747314564Sdim // If the loop entry is guarded by the result of the backedge test of the 10748314564Sdim // first loop iteration, then we know the backedge will be taken at least 10749314564Sdim // once and so the backedge taken count is as above. If not then we use the 10750314564Sdim // expression (max(End,Start)-Start)/Stride to describe the backedge count, 10751314564Sdim // as if the backedge is taken at least once max(End,Start) is End and so the 10752314564Sdim // result is as above, and if not max(End,Start) is Start so we get a backedge 10753314564Sdim // count of zero. 10754314564Sdim const SCEV *BECount; 10755314564Sdim if (isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS)) 10756314564Sdim BECount = BECountIfBackedgeTaken; 10757314564Sdim else { 10758309124Sdim End = IsSigned ? getSMaxExpr(RHS, Start) : getUMaxExpr(RHS, Start); 10759314564Sdim BECount = computeBECount(getMinusSCEV(End, Start), Stride, false); 10760314564Sdim } 10761193323Sed 10762314564Sdim const SCEV *MaxBECount; 10763314564Sdim bool MaxOrZero = false; 10764314564Sdim if (isa<SCEVConstant>(BECount)) 10765314564Sdim MaxBECount = BECount; 10766314564Sdim else if (isa<SCEVConstant>(BECountIfBackedgeTaken)) { 10767314564Sdim // If we know exactly how many times the backedge will be taken if it's 10768314564Sdim // taken at least once, then the backedge count will either be that or 10769314564Sdim // zero. 10770314564Sdim MaxBECount = BECountIfBackedgeTaken; 10771314564Sdim MaxOrZero = true; 10772314564Sdim } else { 10773327952Sdim MaxBECount = computeMaxBECountForLT( 10774327952Sdim Start, Stride, RHS, getTypeSizeInBits(LHS->getType()), IsSigned); 10775314564Sdim } 10776221345Sdim 10777321369Sdim if (isa<SCEVCouldNotCompute>(MaxBECount) && 10778321369Sdim !isa<SCEVCouldNotCompute>(BECount)) 10779321369Sdim MaxBECount = getConstant(getUnsignedRangeMax(BECount)); 10780193323Sed 10781314564Sdim return ExitLimit(BECount, MaxBECount, MaxOrZero, Predicates); 10782193323Sed} 10783193323Sed 10784261991SdimScalarEvolution::ExitLimit 10785309124SdimScalarEvolution::howManyGreaterThans(const SCEV *LHS, const SCEV *RHS, 10786261991Sdim const Loop *L, bool IsSigned, 10787309124Sdim bool ControlsExit, bool AllowPredicates) { 10788314564Sdim SmallPtrSet<const SCEVPredicate *, 4> Predicates; 10789261991Sdim // We handle only IV > Invariant 10790261991Sdim if (!isLoopInvariant(RHS, L)) 10791261991Sdim return getCouldNotCompute(); 10792261991Sdim 10793261991Sdim const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS); 10794309124Sdim if (!IV && AllowPredicates) 10795309124Sdim // Try to make this an AddRec using runtime tests, in the first X 10796309124Sdim // iterations of this loop, where X is the SCEV expression found by the 10797309124Sdim // algorithm below. 10798314564Sdim IV = convertSCEVToAddRecWithPredicates(LHS, L, Predicates); 10799261991Sdim 10800261991Sdim // Avoid weird loops 10801261991Sdim if (!IV || IV->getLoop() != L || !IV->isAffine()) 10802261991Sdim return getCouldNotCompute(); 10803261991Sdim 10804280031Sdim bool NoWrap = ControlsExit && 10805261991Sdim IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW); 10806261991Sdim 10807261991Sdim const SCEV *Stride = getNegativeSCEV(IV->getStepRecurrence(*this)); 10808261991Sdim 10809261991Sdim // Avoid negative or zero stride values 10810261991Sdim if (!isKnownPositive(Stride)) 10811261991Sdim return getCouldNotCompute(); 10812261991Sdim 10813261991Sdim // Avoid proven overflow cases: this will ensure that the backedge taken count 10814261991Sdim // will not generate any unsigned overflow. Relaxed no-overflow conditions 10815288943Sdim // exploit NoWrapFlags, allowing to optimize in presence of undefined 10816261991Sdim // behaviors like the case of C language. 10817261991Sdim if (!Stride->isOne() && doesIVOverflowOnGT(RHS, Stride, IsSigned, NoWrap)) 10818261991Sdim return getCouldNotCompute(); 10819261991Sdim 10820261991Sdim ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SGT 10821261991Sdim : ICmpInst::ICMP_UGT; 10822261991Sdim 10823261991Sdim const SCEV *Start = IV->getStart(); 10824261991Sdim const SCEV *End = RHS; 10825309124Sdim if (!isLoopEntryGuardedByCond(L, Cond, getAddExpr(Start, Stride), RHS)) 10826309124Sdim End = IsSigned ? getSMinExpr(RHS, Start) : getUMinExpr(RHS, Start); 10827261991Sdim 10828261991Sdim const SCEV *BECount = computeBECount(getMinusSCEV(Start, End), Stride, false); 10829261991Sdim 10830321369Sdim APInt MaxStart = IsSigned ? getSignedRangeMax(Start) 10831321369Sdim : getUnsignedRangeMax(Start); 10832261991Sdim 10833321369Sdim APInt MinStride = IsSigned ? getSignedRangeMin(Stride) 10834321369Sdim : getUnsignedRangeMin(Stride); 10835261991Sdim 10836261991Sdim unsigned BitWidth = getTypeSizeInBits(LHS->getType()); 10837261991Sdim APInt Limit = IsSigned ? APInt::getSignedMinValue(BitWidth) + (MinStride - 1) 10838261991Sdim : APInt::getMinValue(BitWidth) + (MinStride - 1); 10839261991Sdim 10840261991Sdim // Although End can be a MIN expression we estimate MinEnd considering only 10841261991Sdim // the case End = RHS. This is safe because in the other case (Start - End) 10842261991Sdim // is zero, leading to a zero maximum backedge taken count. 10843261991Sdim APInt MinEnd = 10844321369Sdim IsSigned ? APIntOps::smax(getSignedRangeMin(RHS), Limit) 10845321369Sdim : APIntOps::umax(getUnsignedRangeMin(RHS), Limit); 10846261991Sdim 10847353358Sdim const SCEV *MaxBECount = isa<SCEVConstant>(BECount) 10848353358Sdim ? BECount 10849353358Sdim : computeBECount(getConstant(MaxStart - MinEnd), 10850353358Sdim getConstant(MinStride), false); 10851261991Sdim 10852261991Sdim if (isa<SCEVCouldNotCompute>(MaxBECount)) 10853261991Sdim MaxBECount = BECount; 10854261991Sdim 10855314564Sdim return ExitLimit(BECount, MaxBECount, false, Predicates); 10856261991Sdim} 10857261991Sdim 10858309124Sdimconst SCEV *SCEVAddRecExpr::getNumIterationsInRange(const ConstantRange &Range, 10859195098Sed ScalarEvolution &SE) const { 10860193323Sed if (Range.isFullSet()) // Infinite loop. 10861193323Sed return SE.getCouldNotCompute(); 10862193323Sed 10863193323Sed // If the start is a non-zero constant, shift the range to simplify things. 10864193323Sed if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart())) 10865193323Sed if (!SC->getValue()->isZero()) { 10866198090Srdivacky SmallVector<const SCEV *, 4> Operands(op_begin(), op_end()); 10867296417Sdim Operands[0] = SE.getZero(SC->getType()); 10868221345Sdim const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop(), 10869221345Sdim getNoWrapFlags(FlagNW)); 10870296417Sdim if (const auto *ShiftedAddRec = dyn_cast<SCEVAddRecExpr>(Shifted)) 10871193323Sed return ShiftedAddRec->getNumIterationsInRange( 10872296417Sdim Range.subtract(SC->getAPInt()), SE); 10873193323Sed // This is strange and shouldn't happen. 10874193323Sed return SE.getCouldNotCompute(); 10875193323Sed } 10876193323Sed 10877193323Sed // The only time we can solve this is when we have all constant indices. 10878193323Sed // Otherwise, we cannot determine the overflow conditions. 10879296417Sdim if (any_of(operands(), [](const SCEV *Op) { return !isa<SCEVConstant>(Op); })) 10880296417Sdim return SE.getCouldNotCompute(); 10881193323Sed 10882193323Sed // Okay at this point we know that all elements of the chrec are constants and 10883193323Sed // that the start element is zero. 10884193323Sed 10885193323Sed // First check to see if the range contains zero. If not, the first 10886193323Sed // iteration exits. 10887193323Sed unsigned BitWidth = SE.getTypeSizeInBits(getType()); 10888193323Sed if (!Range.contains(APInt(BitWidth, 0))) 10889296417Sdim return SE.getZero(getType()); 10890193323Sed 10891193323Sed if (isAffine()) { 10892193323Sed // If this is an affine expression then we have this situation: 10893193323Sed // Solve {0,+,A} in Range === Ax in Range 10894193323Sed 10895193323Sed // We know that zero is in the range. If A is positive then we know that 10896193323Sed // the upper value of the range must be the first possible exit value. 10897193323Sed // If A is negative then the lower of the range is the last possible loop 10898193323Sed // value. Also note that we already checked for a full range. 10899296417Sdim APInt A = cast<SCEVConstant>(getOperand(1))->getAPInt(); 10900321369Sdim APInt End = A.sge(1) ? (Range.getUpper() - 1) : Range.getLower(); 10901193323Sed 10902193323Sed // The exit value should be (End+A)/A. 10903193323Sed APInt ExitVal = (End + A).udiv(A); 10904198090Srdivacky ConstantInt *ExitValue = ConstantInt::get(SE.getContext(), ExitVal); 10905193323Sed 10906193323Sed // Evaluate at the exit value. If we really did fall out of the valid 10907193323Sed // range, then we computed our trip count, otherwise wrap around or other 10908193323Sed // things must have happened. 10909193323Sed ConstantInt *Val = EvaluateConstantChrecAtConstant(this, ExitValue, SE); 10910193323Sed if (Range.contains(Val->getValue())) 10911193323Sed return SE.getCouldNotCompute(); // Something strange happened 10912193323Sed 10913193323Sed // Ensure that the previous value is in the range. This is a sanity check. 10914193323Sed assert(Range.contains( 10915195098Sed EvaluateConstantChrecAtConstant(this, 10916321369Sdim ConstantInt::get(SE.getContext(), ExitVal - 1), SE)->getValue()) && 10917193323Sed "Linear scev computation is off in a bad way!"); 10918193323Sed return SE.getConstant(ExitValue); 10919344779Sdim } 10920193323Sed 10921344779Sdim if (isQuadratic()) { 10922344779Sdim if (auto S = SolveQuadraticAddRecRange(this, Range, SE)) 10923344779Sdim return SE.getConstant(S.getValue()); 10924193323Sed } 10925193323Sed 10926193323Sed return SE.getCouldNotCompute(); 10927193323Sed} 10928193323Sed 10929341825Sdimconst SCEVAddRecExpr * 10930341825SdimSCEVAddRecExpr::getPostIncExpr(ScalarEvolution &SE) const { 10931341825Sdim assert(getNumOperands() > 1 && "AddRec with zero step?"); 10932341825Sdim // There is a temptation to just call getAddExpr(this, getStepRecurrence(SE)), 10933341825Sdim // but in this case we cannot guarantee that the value returned will be an 10934341825Sdim // AddRec because SCEV does not have a fixed point where it stops 10935341825Sdim // simplification: it is legal to return ({rec1} + {rec2}). For example, it 10936341825Sdim // may happen if we reach arithmetic depth limit while simplifying. So we 10937341825Sdim // construct the returned value explicitly. 10938341825Sdim SmallVector<const SCEV *, 3> Ops; 10939341825Sdim // If this is {A,+,B,+,C,...,+,N}, then its step is {B,+,C,+,...,+,N}, and 10940341825Sdim // (this + Step) is {A+B,+,B+C,+...,+,N}. 10941341825Sdim for (unsigned i = 0, e = getNumOperands() - 1; i < e; ++i) 10942341825Sdim Ops.push_back(SE.getAddExpr(getOperand(i), getOperand(i + 1))); 10943341825Sdim // We know that the last operand is not a constant zero (otherwise it would 10944341825Sdim // have been popped out earlier). This guarantees us that if the result has 10945341825Sdim // the same last operand, then it will also not be popped out, meaning that 10946341825Sdim // the returned value will be an AddRec. 10947341825Sdim const SCEV *Last = getOperand(getNumOperands() - 1); 10948341825Sdim assert(!Last->isZero() && "Recurrency with zero step?"); 10949341825Sdim Ops.push_back(Last); 10950341825Sdim return cast<SCEVAddRecExpr>(SE.getAddRecExpr(Ops, getLoop(), 10951341825Sdim SCEV::FlagAnyWrap)); 10952341825Sdim} 10953341825Sdim 10954276479Sdim// Return true when S contains at least an undef value. 10955314564Sdimstatic inline bool containsUndefs(const SCEV *S) { 10956314564Sdim return SCEVExprContains(S, [](const SCEV *S) { 10957314564Sdim if (const auto *SU = dyn_cast<SCEVUnknown>(S)) 10958314564Sdim return isa<UndefValue>(SU->getValue()); 10959314564Sdim return false; 10960314564Sdim }); 10961276479Sdim} 10962276479Sdim 10963276479Sdimnamespace { 10964327952Sdim 10965276479Sdim// Collect all steps of SCEV expressions. 10966276479Sdimstruct SCEVCollectStrides { 10967276479Sdim ScalarEvolution &SE; 10968276479Sdim SmallVectorImpl<const SCEV *> &Strides; 10969276479Sdim 10970276479Sdim SCEVCollectStrides(ScalarEvolution &SE, SmallVectorImpl<const SCEV *> &S) 10971276479Sdim : SE(SE), Strides(S) {} 10972276479Sdim 10973276479Sdim bool follow(const SCEV *S) { 10974276479Sdim if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) 10975276479Sdim Strides.push_back(AR->getStepRecurrence(SE)); 10976276479Sdim return true; 10977276479Sdim } 10978327952Sdim 10979276479Sdim bool isDone() const { return false; } 10980276479Sdim}; 10981276479Sdim 10982276479Sdim// Collect all SCEVUnknown and SCEVMulExpr expressions. 10983276479Sdimstruct SCEVCollectTerms { 10984276479Sdim SmallVectorImpl<const SCEV *> &Terms; 10985276479Sdim 10986327952Sdim SCEVCollectTerms(SmallVectorImpl<const SCEV *> &T) : Terms(T) {} 10987276479Sdim 10988276479Sdim bool follow(const SCEV *S) { 10989314564Sdim if (isa<SCEVUnknown>(S) || isa<SCEVMulExpr>(S) || 10990314564Sdim isa<SCEVSignExtendExpr>(S)) { 10991276479Sdim if (!containsUndefs(S)) 10992276479Sdim Terms.push_back(S); 10993276479Sdim 10994276479Sdim // Stop recursion: once we collected a term, do not walk its operands. 10995276479Sdim return false; 10996276479Sdim } 10997276479Sdim 10998276479Sdim // Keep looking. 10999276479Sdim return true; 11000276479Sdim } 11001327952Sdim 11002276479Sdim bool isDone() const { return false; } 11003276479Sdim}; 11004296417Sdim 11005296417Sdim// Check if a SCEV contains an AddRecExpr. 11006296417Sdimstruct SCEVHasAddRec { 11007296417Sdim bool &ContainsAddRec; 11008296417Sdim 11009296417Sdim SCEVHasAddRec(bool &ContainsAddRec) : ContainsAddRec(ContainsAddRec) { 11010327952Sdim ContainsAddRec = false; 11011296417Sdim } 11012296417Sdim 11013296417Sdim bool follow(const SCEV *S) { 11014296417Sdim if (isa<SCEVAddRecExpr>(S)) { 11015296417Sdim ContainsAddRec = true; 11016296417Sdim 11017296417Sdim // Stop recursion: once we collected a term, do not walk its operands. 11018296417Sdim return false; 11019296417Sdim } 11020296417Sdim 11021296417Sdim // Keep looking. 11022296417Sdim return true; 11023296417Sdim } 11024327952Sdim 11025296417Sdim bool isDone() const { return false; } 11026296417Sdim}; 11027296417Sdim 11028296417Sdim// Find factors that are multiplied with an expression that (possibly as a 11029296417Sdim// subexpression) contains an AddRecExpr. In the expression: 11030296417Sdim// 11031296417Sdim// 8 * (100 + %p * %q * (%a + {0, +, 1}_loop)) 11032296417Sdim// 11033296417Sdim// "%p * %q" are factors multiplied by the expression "(%a + {0, +, 1}_loop)" 11034296417Sdim// that contains the AddRec {0, +, 1}_loop. %p * %q are likely to be array size 11035296417Sdim// parameters as they form a product with an induction variable. 11036296417Sdim// 11037296417Sdim// This collector expects all array size parameters to be in the same MulExpr. 11038296417Sdim// It might be necessary to later add support for collecting parameters that are 11039296417Sdim// spread over different nested MulExpr. 11040296417Sdimstruct SCEVCollectAddRecMultiplies { 11041296417Sdim SmallVectorImpl<const SCEV *> &Terms; 11042296417Sdim ScalarEvolution &SE; 11043296417Sdim 11044296417Sdim SCEVCollectAddRecMultiplies(SmallVectorImpl<const SCEV *> &T, ScalarEvolution &SE) 11045296417Sdim : Terms(T), SE(SE) {} 11046296417Sdim 11047296417Sdim bool follow(const SCEV *S) { 11048296417Sdim if (auto *Mul = dyn_cast<SCEVMulExpr>(S)) { 11049296417Sdim bool HasAddRec = false; 11050296417Sdim SmallVector<const SCEV *, 0> Operands; 11051296417Sdim for (auto Op : Mul->operands()) { 11052321369Sdim const SCEVUnknown *Unknown = dyn_cast<SCEVUnknown>(Op); 11053321369Sdim if (Unknown && !isa<CallInst>(Unknown->getValue())) { 11054296417Sdim Operands.push_back(Op); 11055321369Sdim } else if (Unknown) { 11056321369Sdim HasAddRec = true; 11057296417Sdim } else { 11058360784Sdim bool ContainsAddRec = false; 11059296417Sdim SCEVHasAddRec ContiansAddRec(ContainsAddRec); 11060296417Sdim visitAll(Op, ContiansAddRec); 11061296417Sdim HasAddRec |= ContainsAddRec; 11062296417Sdim } 11063296417Sdim } 11064296417Sdim if (Operands.size() == 0) 11065296417Sdim return true; 11066296417Sdim 11067296417Sdim if (!HasAddRec) 11068296417Sdim return false; 11069296417Sdim 11070296417Sdim Terms.push_back(SE.getMulExpr(Operands)); 11071296417Sdim // Stop recursion: once we collected a term, do not walk its operands. 11072296417Sdim return false; 11073296417Sdim } 11074296417Sdim 11075296417Sdim // Keep looking. 11076296417Sdim return true; 11077296417Sdim } 11078327952Sdim 11079296417Sdim bool isDone() const { return false; } 11080296417Sdim}; 11081276479Sdim 11082327952Sdim} // end anonymous namespace 11083327952Sdim 11084296417Sdim/// Find parametric terms in this SCEVAddRecExpr. We first for parameters in 11085296417Sdim/// two places: 11086296417Sdim/// 1) The strides of AddRec expressions. 11087296417Sdim/// 2) Unknowns that are multiplied with AddRec expressions. 11088288943Sdimvoid ScalarEvolution::collectParametricTerms(const SCEV *Expr, 11089288943Sdim SmallVectorImpl<const SCEV *> &Terms) { 11090276479Sdim SmallVector<const SCEV *, 4> Strides; 11091288943Sdim SCEVCollectStrides StrideCollector(*this, Strides); 11092288943Sdim visitAll(Expr, StrideCollector); 11093276479Sdim 11094341825Sdim LLVM_DEBUG({ 11095341825Sdim dbgs() << "Strides:\n"; 11096341825Sdim for (const SCEV *S : Strides) 11097341825Sdim dbgs() << *S << "\n"; 11098341825Sdim }); 11099276479Sdim 11100276479Sdim for (const SCEV *S : Strides) { 11101276479Sdim SCEVCollectTerms TermCollector(Terms); 11102276479Sdim visitAll(S, TermCollector); 11103276479Sdim } 11104276479Sdim 11105341825Sdim LLVM_DEBUG({ 11106341825Sdim dbgs() << "Terms:\n"; 11107341825Sdim for (const SCEV *T : Terms) 11108341825Sdim dbgs() << *T << "\n"; 11109341825Sdim }); 11110296417Sdim 11111296417Sdim SCEVCollectAddRecMultiplies MulCollector(Terms, *this); 11112296417Sdim visitAll(Expr, MulCollector); 11113276479Sdim} 11114276479Sdim 11115276479Sdimstatic bool findArrayDimensionsRec(ScalarEvolution &SE, 11116276479Sdim SmallVectorImpl<const SCEV *> &Terms, 11117276479Sdim SmallVectorImpl<const SCEV *> &Sizes) { 11118276479Sdim int Last = Terms.size() - 1; 11119276479Sdim const SCEV *Step = Terms[Last]; 11120261991Sdim 11121276479Sdim // End of recursion. 11122276479Sdim if (Last == 0) { 11123276479Sdim if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(Step)) { 11124276479Sdim SmallVector<const SCEV *, 2> Qs; 11125276479Sdim for (const SCEV *Op : M->operands()) 11126276479Sdim if (!isa<SCEVConstant>(Op)) 11127276479Sdim Qs.push_back(Op); 11128261991Sdim 11129276479Sdim Step = SE.getMulExpr(Qs); 11130261991Sdim } 11131261991Sdim 11132276479Sdim Sizes.push_back(Step); 11133276479Sdim return true; 11134261991Sdim } 11135261991Sdim 11136276479Sdim for (const SCEV *&Term : Terms) { 11137276479Sdim // Normalize the terms before the next call to findArrayDimensionsRec. 11138276479Sdim const SCEV *Q, *R; 11139276479Sdim SCEVDivision::divide(SE, Term, Step, &Q, &R); 11140261991Sdim 11141276479Sdim // Bail out when GCD does not evenly divide one of the terms. 11142276479Sdim if (!R->isZero()) 11143276479Sdim return false; 11144261991Sdim 11145276479Sdim Term = Q; 11146261991Sdim } 11147261991Sdim 11148276479Sdim // Remove all SCEVConstants. 11149314564Sdim Terms.erase( 11150314564Sdim remove_if(Terms, [](const SCEV *E) { return isa<SCEVConstant>(E); }), 11151314564Sdim Terms.end()); 11152261991Sdim 11153276479Sdim if (Terms.size() > 0) 11154276479Sdim if (!findArrayDimensionsRec(SE, Terms, Sizes)) 11155276479Sdim return false; 11156261991Sdim 11157276479Sdim Sizes.push_back(Step); 11158276479Sdim return true; 11159276479Sdim} 11160261991Sdim 11161276479Sdim// Returns true when one of the SCEVs of Terms contains a SCEVUnknown parameter. 11162314564Sdimstatic inline bool containsParameters(SmallVectorImpl<const SCEV *> &Terms) { 11163276479Sdim for (const SCEV *T : Terms) 11164314564Sdim if (SCEVExprContains(T, isa<SCEVUnknown, const SCEV *>)) 11165276479Sdim return true; 11166276479Sdim return false; 11167276479Sdim} 11168261991Sdim 11169276479Sdim// Return the number of product terms in S. 11170276479Sdimstatic inline int numberOfTerms(const SCEV *S) { 11171276479Sdim if (const SCEVMulExpr *Expr = dyn_cast<SCEVMulExpr>(S)) 11172276479Sdim return Expr->getNumOperands(); 11173276479Sdim return 1; 11174276479Sdim} 11175261991Sdim 11176276479Sdimstatic const SCEV *removeConstantFactors(ScalarEvolution &SE, const SCEV *T) { 11177276479Sdim if (isa<SCEVConstant>(T)) 11178276479Sdim return nullptr; 11179261991Sdim 11180276479Sdim if (isa<SCEVUnknown>(T)) 11181276479Sdim return T; 11182261991Sdim 11183276479Sdim if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(T)) { 11184276479Sdim SmallVector<const SCEV *, 2> Factors; 11185276479Sdim for (const SCEV *Op : M->operands()) 11186276479Sdim if (!isa<SCEVConstant>(Op)) 11187276479Sdim Factors.push_back(Op); 11188261991Sdim 11189276479Sdim return SE.getMulExpr(Factors); 11190261991Sdim } 11191261991Sdim 11192276479Sdim return T; 11193276479Sdim} 11194261991Sdim 11195276479Sdim/// Return the size of an element read or written by Inst. 11196276479Sdimconst SCEV *ScalarEvolution::getElementSize(Instruction *Inst) { 11197276479Sdim Type *Ty; 11198276479Sdim if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) 11199276479Sdim Ty = Store->getValueOperand()->getType(); 11200276479Sdim else if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) 11201276479Sdim Ty = Load->getType(); 11202276479Sdim else 11203276479Sdim return nullptr; 11204261991Sdim 11205276479Sdim Type *ETy = getEffectiveSCEVType(PointerType::getUnqual(Ty)); 11206276479Sdim return getSizeOfExpr(ETy, Ty); 11207276479Sdim} 11208261991Sdim 11209276479Sdimvoid ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms, 11210276479Sdim SmallVectorImpl<const SCEV *> &Sizes, 11211321369Sdim const SCEV *ElementSize) { 11212276479Sdim if (Terms.size() < 1 || !ElementSize) 11213276479Sdim return; 11214261991Sdim 11215276479Sdim // Early return when Terms do not contain parameters: we do not delinearize 11216276479Sdim // non parametric SCEVs. 11217276479Sdim if (!containsParameters(Terms)) 11218276479Sdim return; 11219261991Sdim 11220341825Sdim LLVM_DEBUG({ 11221341825Sdim dbgs() << "Terms:\n"; 11222341825Sdim for (const SCEV *T : Terms) 11223341825Sdim dbgs() << *T << "\n"; 11224341825Sdim }); 11225261991Sdim 11226276479Sdim // Remove duplicates. 11227321369Sdim array_pod_sort(Terms.begin(), Terms.end()); 11228276479Sdim Terms.erase(std::unique(Terms.begin(), Terms.end()), Terms.end()); 11229261991Sdim 11230276479Sdim // Put larger terms first. 11231344779Sdim llvm::sort(Terms, [](const SCEV *LHS, const SCEV *RHS) { 11232276479Sdim return numberOfTerms(LHS) > numberOfTerms(RHS); 11233276479Sdim }); 11234261991Sdim 11235296417Sdim // Try to divide all terms by the element size. If term is not divisible by 11236296417Sdim // element size, proceed with the original term. 11237276479Sdim for (const SCEV *&Term : Terms) { 11238276479Sdim const SCEV *Q, *R; 11239321369Sdim SCEVDivision::divide(*this, Term, ElementSize, &Q, &R); 11240296417Sdim if (!Q->isZero()) 11241296417Sdim Term = Q; 11242261991Sdim } 11243261991Sdim 11244276479Sdim SmallVector<const SCEV *, 4> NewTerms; 11245261991Sdim 11246276479Sdim // Remove constant factors. 11247276479Sdim for (const SCEV *T : Terms) 11248321369Sdim if (const SCEV *NewT = removeConstantFactors(*this, T)) 11249276479Sdim NewTerms.push_back(NewT); 11250276479Sdim 11251341825Sdim LLVM_DEBUG({ 11252341825Sdim dbgs() << "Terms after sorting:\n"; 11253341825Sdim for (const SCEV *T : NewTerms) 11254341825Sdim dbgs() << *T << "\n"; 11255341825Sdim }); 11256276479Sdim 11257321369Sdim if (NewTerms.empty() || !findArrayDimensionsRec(*this, NewTerms, Sizes)) { 11258276479Sdim Sizes.clear(); 11259276479Sdim return; 11260261991Sdim } 11261261991Sdim 11262276479Sdim // The last element to be pushed into Sizes is the size of an element. 11263276479Sdim Sizes.push_back(ElementSize); 11264276479Sdim 11265341825Sdim LLVM_DEBUG({ 11266341825Sdim dbgs() << "Sizes:\n"; 11267341825Sdim for (const SCEV *S : Sizes) 11268341825Sdim dbgs() << *S << "\n"; 11269341825Sdim }); 11270276479Sdim} 11271276479Sdim 11272288943Sdimvoid ScalarEvolution::computeAccessFunctions( 11273288943Sdim const SCEV *Expr, SmallVectorImpl<const SCEV *> &Subscripts, 11274288943Sdim SmallVectorImpl<const SCEV *> &Sizes) { 11275276479Sdim // Early exit in case this SCEV is not an affine multivariate function. 11276288943Sdim if (Sizes.empty()) 11277276479Sdim return; 11278276479Sdim 11279296417Sdim if (auto *AR = dyn_cast<SCEVAddRecExpr>(Expr)) 11280288943Sdim if (!AR->isAffine()) 11281288943Sdim return; 11282288943Sdim 11283288943Sdim const SCEV *Res = Expr; 11284276479Sdim int Last = Sizes.size() - 1; 11285276479Sdim for (int i = Last; i >= 0; i--) { 11286276479Sdim const SCEV *Q, *R; 11287288943Sdim SCEVDivision::divide(*this, Res, Sizes[i], &Q, &R); 11288276479Sdim 11289341825Sdim LLVM_DEBUG({ 11290341825Sdim dbgs() << "Res: " << *Res << "\n"; 11291341825Sdim dbgs() << "Sizes[i]: " << *Sizes[i] << "\n"; 11292341825Sdim dbgs() << "Res divided by Sizes[i]:\n"; 11293341825Sdim dbgs() << "Quotient: " << *Q << "\n"; 11294341825Sdim dbgs() << "Remainder: " << *R << "\n"; 11295341825Sdim }); 11296276479Sdim 11297276479Sdim Res = Q; 11298276479Sdim 11299276479Sdim // Do not record the last subscript corresponding to the size of elements in 11300276479Sdim // the array. 11301276479Sdim if (i == Last) { 11302276479Sdim 11303276479Sdim // Bail out if the remainder is too complex. 11304276479Sdim if (isa<SCEVAddRecExpr>(R)) { 11305276479Sdim Subscripts.clear(); 11306276479Sdim Sizes.clear(); 11307276479Sdim return; 11308276479Sdim } 11309276479Sdim 11310276479Sdim continue; 11311276479Sdim } 11312276479Sdim 11313276479Sdim // Record the access function for the current subscript. 11314276479Sdim Subscripts.push_back(R); 11315261991Sdim } 11316261991Sdim 11317276479Sdim // Also push in last position the remainder of the last division: it will be 11318276479Sdim // the access function of the innermost dimension. 11319276479Sdim Subscripts.push_back(Res); 11320276479Sdim 11321276479Sdim std::reverse(Subscripts.begin(), Subscripts.end()); 11322276479Sdim 11323341825Sdim LLVM_DEBUG({ 11324341825Sdim dbgs() << "Subscripts:\n"; 11325341825Sdim for (const SCEV *S : Subscripts) 11326341825Sdim dbgs() << *S << "\n"; 11327341825Sdim }); 11328261991Sdim} 11329261991Sdim 11330261991Sdim/// Splits the SCEV into two vectors of SCEVs representing the subscripts and 11331261991Sdim/// sizes of an array access. Returns the remainder of the delinearization that 11332261991Sdim/// is the offset start of the array. The SCEV->delinearize algorithm computes 11333261991Sdim/// the multiples of SCEV coefficients: that is a pattern matching of sub 11334261991Sdim/// expressions in the stride and base of a SCEV corresponding to the 11335261991Sdim/// computation of a GCD (greatest common divisor) of base and stride. When 11336261991Sdim/// SCEV->delinearize fails, it returns the SCEV unchanged. 11337261991Sdim/// 11338261991Sdim/// For example: when analyzing the memory access A[i][j][k] in this loop nest 11339261991Sdim/// 11340261991Sdim/// void foo(long n, long m, long o, double A[n][m][o]) { 11341261991Sdim/// 11342261991Sdim/// for (long i = 0; i < n; i++) 11343261991Sdim/// for (long j = 0; j < m; j++) 11344261991Sdim/// for (long k = 0; k < o; k++) 11345261991Sdim/// A[i][j][k] = 1.0; 11346261991Sdim/// } 11347261991Sdim/// 11348261991Sdim/// the delinearization input is the following AddRec SCEV: 11349261991Sdim/// 11350261991Sdim/// AddRec: {{{%A,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k> 11351261991Sdim/// 11352261991Sdim/// From this SCEV, we are able to say that the base offset of the access is %A 11353261991Sdim/// because it appears as an offset that does not divide any of the strides in 11354261991Sdim/// the loops: 11355261991Sdim/// 11356261991Sdim/// CHECK: Base offset: %A 11357261991Sdim/// 11358261991Sdim/// and then SCEV->delinearize determines the size of some of the dimensions of 11359261991Sdim/// the array as these are the multiples by which the strides are happening: 11360261991Sdim/// 11361261991Sdim/// CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes. 11362261991Sdim/// 11363261991Sdim/// Note that the outermost dimension remains of UnknownSize because there are 11364261991Sdim/// no strides that would help identifying the size of the last dimension: when 11365261991Sdim/// the array has been statically allocated, one could compute the size of that 11366261991Sdim/// dimension by dividing the overall size of the array by the size of the known 11367261991Sdim/// dimensions: %m * %o * 8. 11368261991Sdim/// 11369261991Sdim/// Finally delinearize provides the access functions for the array reference 11370261991Sdim/// that does correspond to A[i][j][k] of the above C testcase: 11371261991Sdim/// 11372261991Sdim/// CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>] 11373261991Sdim/// 11374261991Sdim/// The testcases are checking the output of a function pass: 11375261991Sdim/// DelinearizationPass that walks through all loads and stores of a function 11376261991Sdim/// asking for the SCEV of the memory access with respect to all enclosing 11377261991Sdim/// loops, calling SCEV->delinearize on that and printing the results. 11378288943Sdimvoid ScalarEvolution::delinearize(const SCEV *Expr, 11379276479Sdim SmallVectorImpl<const SCEV *> &Subscripts, 11380276479Sdim SmallVectorImpl<const SCEV *> &Sizes, 11381288943Sdim const SCEV *ElementSize) { 11382276479Sdim // First step: collect parametric terms. 11383276479Sdim SmallVector<const SCEV *, 4> Terms; 11384288943Sdim collectParametricTerms(Expr, Terms); 11385261991Sdim 11386276479Sdim if (Terms.empty()) 11387276479Sdim return; 11388261991Sdim 11389276479Sdim // Second step: find subscript sizes. 11390288943Sdim findArrayDimensions(Terms, Sizes, ElementSize); 11391261991Sdim 11392276479Sdim if (Sizes.empty()) 11393276479Sdim return; 11394261991Sdim 11395276479Sdim // Third step: compute the access functions for each subscript. 11396288943Sdim computeAccessFunctions(Expr, Subscripts, Sizes); 11397261991Sdim 11398276479Sdim if (Subscripts.empty()) 11399276479Sdim return; 11400261991Sdim 11401341825Sdim LLVM_DEBUG({ 11402341825Sdim dbgs() << "succeeded to delinearize " << *Expr << "\n"; 11403341825Sdim dbgs() << "ArrayDecl[UnknownSize]"; 11404341825Sdim for (const SCEV *S : Sizes) 11405341825Sdim dbgs() << "[" << *S << "]"; 11406261991Sdim 11407341825Sdim dbgs() << "\nArrayRef"; 11408341825Sdim for (const SCEV *S : Subscripts) 11409341825Sdim dbgs() << "[" << *S << "]"; 11410341825Sdim dbgs() << "\n"; 11411341825Sdim }); 11412261991Sdim} 11413261991Sdim 11414193323Sed//===----------------------------------------------------------------------===// 11415193323Sed// SCEVCallbackVH Class Implementation 11416193323Sed//===----------------------------------------------------------------------===// 11417193323Sed 11418193323Sedvoid ScalarEvolution::SCEVCallbackVH::deleted() { 11419198090Srdivacky assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!"); 11420193323Sed if (PHINode *PN = dyn_cast<PHINode>(getValPtr())) 11421193323Sed SE->ConstantEvolutionLoopExitValue.erase(PN); 11422309124Sdim SE->eraseValueFromMap(getValPtr()); 11423193323Sed // this now dangles! 11424193323Sed} 11425193323Sed 11426212904Sdimvoid ScalarEvolution::SCEVCallbackVH::allUsesReplacedWith(Value *V) { 11427198090Srdivacky assert(SE && "SCEVCallbackVH called with a null ScalarEvolution!"); 11428193323Sed 11429193323Sed // Forget all the expressions associated with users of the old value, 11430193323Sed // so that future queries will recompute the expressions using the new 11431193323Sed // value. 11432212904Sdim Value *Old = getValPtr(); 11433276479Sdim SmallVector<User *, 16> Worklist(Old->user_begin(), Old->user_end()); 11434198090Srdivacky SmallPtrSet<User *, 8> Visited; 11435193323Sed while (!Worklist.empty()) { 11436193323Sed User *U = Worklist.pop_back_val(); 11437193323Sed // Deleting the Old value will cause this to dangle. Postpone 11438193323Sed // that until everything else is done. 11439212904Sdim if (U == Old) 11440193323Sed continue; 11441280031Sdim if (!Visited.insert(U).second) 11442198090Srdivacky continue; 11443193323Sed if (PHINode *PN = dyn_cast<PHINode>(U)) 11444193323Sed SE->ConstantEvolutionLoopExitValue.erase(PN); 11445309124Sdim SE->eraseValueFromMap(U); 11446276479Sdim Worklist.insert(Worklist.end(), U->user_begin(), U->user_end()); 11447193323Sed } 11448212904Sdim // Delete the Old value. 11449212904Sdim if (PHINode *PN = dyn_cast<PHINode>(Old)) 11450212904Sdim SE->ConstantEvolutionLoopExitValue.erase(PN); 11451309124Sdim SE->eraseValueFromMap(Old); 11452212904Sdim // this now dangles! 11453193323Sed} 11454193323Sed 11455193323SedScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se) 11456193323Sed : CallbackVH(V), SE(se) {} 11457193323Sed 11458193323Sed//===----------------------------------------------------------------------===// 11459193323Sed// ScalarEvolution Class Implementation 11460193323Sed//===----------------------------------------------------------------------===// 11461193323Sed 11462296417SdimScalarEvolution::ScalarEvolution(Function &F, TargetLibraryInfo &TLI, 11463296417Sdim AssumptionCache &AC, DominatorTree &DT, 11464296417Sdim LoopInfo &LI) 11465296417Sdim : F(F), TLI(TLI), AC(AC), DT(DT), LI(LI), 11466327952Sdim CouldNotCompute(new SCEVCouldNotCompute()), ValuesAtScopes(64), 11467327952Sdim LoopDispositions(64), BlockDispositions(64) { 11468309124Sdim // To use guards for proving predicates, we need to scan every instruction in 11469309124Sdim // relevant basic blocks, and not just terminators. Doing this is a waste of 11470309124Sdim // time if the IR does not actually contain any calls to 11471309124Sdim // @llvm.experimental.guard, so do a quick check and remember this beforehand. 11472309124Sdim // 11473309124Sdim // This pessimizes the case where a pass that preserves ScalarEvolution wants 11474309124Sdim // to _add_ guards to the module when there weren't any before, and wants 11475309124Sdim // ScalarEvolution to optimize based on those guards. For now we prefer to be 11476309124Sdim // efficient in lieu of being smart in that rather obscure case. 11477309124Sdim 11478309124Sdim auto *GuardDecl = F.getParent()->getFunction( 11479309124Sdim Intrinsic::getName(Intrinsic::experimental_guard)); 11480309124Sdim HasGuards = GuardDecl && !GuardDecl->use_empty(); 11481309124Sdim} 11482309124Sdim 11483296417SdimScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg) 11484309124Sdim : F(Arg.F), HasGuards(Arg.HasGuards), TLI(Arg.TLI), AC(Arg.AC), DT(Arg.DT), 11485309124Sdim LI(Arg.LI), CouldNotCompute(std::move(Arg.CouldNotCompute)), 11486296417Sdim ValueExprMap(std::move(Arg.ValueExprMap)), 11487314564Sdim PendingLoopPredicates(std::move(Arg.PendingLoopPredicates)), 11488341825Sdim PendingPhiRanges(std::move(Arg.PendingPhiRanges)), 11489341825Sdim PendingMerges(std::move(Arg.PendingMerges)), 11490321369Sdim MinTrailingZerosCache(std::move(Arg.MinTrailingZerosCache)), 11491296417Sdim BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)), 11492309124Sdim PredicatedBackedgeTakenCounts( 11493309124Sdim std::move(Arg.PredicatedBackedgeTakenCounts)), 11494296417Sdim ConstantEvolutionLoopExitValue( 11495296417Sdim std::move(Arg.ConstantEvolutionLoopExitValue)), 11496296417Sdim ValuesAtScopes(std::move(Arg.ValuesAtScopes)), 11497296417Sdim LoopDispositions(std::move(Arg.LoopDispositions)), 11498314564Sdim LoopPropertiesCache(std::move(Arg.LoopPropertiesCache)), 11499296417Sdim BlockDispositions(std::move(Arg.BlockDispositions)), 11500296417Sdim UnsignedRanges(std::move(Arg.UnsignedRanges)), 11501296417Sdim SignedRanges(std::move(Arg.SignedRanges)), 11502296417Sdim UniqueSCEVs(std::move(Arg.UniqueSCEVs)), 11503296417Sdim UniquePreds(std::move(Arg.UniquePreds)), 11504296417Sdim SCEVAllocator(std::move(Arg.SCEVAllocator)), 11505327952Sdim LoopUsers(std::move(Arg.LoopUsers)), 11506321369Sdim PredicatedSCEVRewrites(std::move(Arg.PredicatedSCEVRewrites)), 11507296417Sdim FirstUnknown(Arg.FirstUnknown) { 11508296417Sdim Arg.FirstUnknown = nullptr; 11509193323Sed} 11510193323Sed 11511296417SdimScalarEvolution::~ScalarEvolution() { 11512212904Sdim // Iterate through all the SCEVUnknown instances and call their 11513212904Sdim // destructors, so that they release their references to their values. 11514296417Sdim for (SCEVUnknown *U = FirstUnknown; U;) { 11515296417Sdim SCEVUnknown *Tmp = U; 11516296417Sdim U = U->Next; 11517296417Sdim Tmp->~SCEVUnknown(); 11518296417Sdim } 11519276479Sdim FirstUnknown = nullptr; 11520212904Sdim 11521309124Sdim ExprValueMap.clear(); 11522212904Sdim ValueExprMap.clear(); 11523309124Sdim HasRecMap.clear(); 11524226633Sdim 11525226633Sdim // Free any extra memory created for ExitNotTakenInfo in the unlikely event 11526226633Sdim // that a loop had multiple computable exits. 11527296417Sdim for (auto &BTCI : BackedgeTakenCounts) 11528296417Sdim BTCI.second.clear(); 11529309124Sdim for (auto &BTCI : PredicatedBackedgeTakenCounts) 11530309124Sdim BTCI.second.clear(); 11531226633Sdim 11532239462Sdim assert(PendingLoopPredicates.empty() && "isImpliedCond garbage"); 11533341825Sdim assert(PendingPhiRanges.empty() && "getRangeRef garbage"); 11534341825Sdim assert(PendingMerges.empty() && "isImpliedViaMerge garbage"); 11535288943Sdim assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!"); 11536296417Sdim assert(!ProvingSplitPredicate && "ProvingSplitPredicate garbage!"); 11537193323Sed} 11538193323Sed 11539193323Sedbool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) { 11540193323Sed return !isa<SCEVCouldNotCompute>(getBackedgeTakenCount(L)); 11541193323Sed} 11542193323Sed 11543193323Sedstatic void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE, 11544193323Sed const Loop *L) { 11545193323Sed // Print all inner loops first 11546309124Sdim for (Loop *I : *L) 11547309124Sdim PrintLoopInfo(OS, SE, I); 11548193323Sed 11549202375Srdivacky OS << "Loop "; 11550276479Sdim L->getHeader()->printAsOperand(OS, /*PrintType=*/false); 11551202375Srdivacky OS << ": "; 11552193323Sed 11553353358Sdim SmallVector<BasicBlock *, 8> ExitingBlocks; 11554353358Sdim L->getExitingBlocks(ExitingBlocks); 11555353358Sdim if (ExitingBlocks.size() != 1) 11556193323Sed OS << "<multiple exits> "; 11557193323Sed 11558353358Sdim if (SE->hasLoopInvariantBackedgeTakenCount(L)) 11559353358Sdim OS << "backedge-taken count is " << *SE->getBackedgeTakenCount(L) << "\n"; 11560353358Sdim else 11561353358Sdim OS << "Unpredictable backedge-taken count.\n"; 11562193323Sed 11563353358Sdim if (ExitingBlocks.size() > 1) 11564353358Sdim for (BasicBlock *ExitingBlock : ExitingBlocks) { 11565353358Sdim OS << " exit count for " << ExitingBlock->getName() << ": " 11566353358Sdim << *SE->getExitCount(L, ExitingBlock) << "\n"; 11567353358Sdim } 11568353358Sdim 11569353358Sdim OS << "Loop "; 11570276479Sdim L->getHeader()->printAsOperand(OS, /*PrintType=*/false); 11571202375Srdivacky OS << ": "; 11572195098Sed 11573360784Sdim if (!isa<SCEVCouldNotCompute>(SE->getConstantMaxBackedgeTakenCount(L))) { 11574360784Sdim OS << "max backedge-taken count is " << *SE->getConstantMaxBackedgeTakenCount(L); 11575314564Sdim if (SE->isBackedgeTakenCountMaxOrZero(L)) 11576314564Sdim OS << ", actual taken count either this or zero."; 11577195098Sed } else { 11578195098Sed OS << "Unpredictable max backedge-taken count. "; 11579195098Sed } 11580195098Sed 11581309124Sdim OS << "\n" 11582309124Sdim "Loop "; 11583309124Sdim L->getHeader()->printAsOperand(OS, /*PrintType=*/false); 11584309124Sdim OS << ": "; 11585309124Sdim 11586309124Sdim SCEVUnionPredicate Pred; 11587309124Sdim auto PBT = SE->getPredicatedBackedgeTakenCount(L, Pred); 11588309124Sdim if (!isa<SCEVCouldNotCompute>(PBT)) { 11589309124Sdim OS << "Predicated backedge-taken count is " << *PBT << "\n"; 11590309124Sdim OS << " Predicates:\n"; 11591309124Sdim Pred.print(OS, 4); 11592309124Sdim } else { 11593309124Sdim OS << "Unpredictable predicated backedge-taken count. "; 11594309124Sdim } 11595195098Sed OS << "\n"; 11596321369Sdim 11597321369Sdim if (SE->hasLoopInvariantBackedgeTakenCount(L)) { 11598321369Sdim OS << "Loop "; 11599321369Sdim L->getHeader()->printAsOperand(OS, /*PrintType=*/false); 11600321369Sdim OS << ": "; 11601321369Sdim OS << "Trip multiple is " << SE->getSmallConstantTripMultiple(L) << "\n"; 11602321369Sdim } 11603193323Sed} 11604193323Sed 11605309124Sdimstatic StringRef loopDispositionToStr(ScalarEvolution::LoopDisposition LD) { 11606309124Sdim switch (LD) { 11607309124Sdim case ScalarEvolution::LoopVariant: 11608309124Sdim return "Variant"; 11609309124Sdim case ScalarEvolution::LoopInvariant: 11610309124Sdim return "Invariant"; 11611309124Sdim case ScalarEvolution::LoopComputable: 11612309124Sdim return "Computable"; 11613309124Sdim } 11614309124Sdim llvm_unreachable("Unknown ScalarEvolution::LoopDisposition kind!"); 11615309124Sdim} 11616309124Sdim 11617296417Sdimvoid ScalarEvolution::print(raw_ostream &OS) const { 11618204642Srdivacky // ScalarEvolution's implementation of the print method is to print 11619193323Sed // out SCEV values of all instructions that are interesting. Doing 11620193323Sed // this potentially causes it to create new SCEV objects though, 11621193323Sed // which technically conflicts with the const qualifier. This isn't 11622198090Srdivacky // observable from outside the class though, so casting away the 11623198090Srdivacky // const isn't dangerous. 11624201360Srdivacky ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this); 11625193323Sed 11626360784Sdim if (ClassifyExpressions) { 11627360784Sdim OS << "Classifying expressions for: "; 11628360784Sdim F.printAsOperand(OS, /*PrintType=*/false); 11629360784Sdim OS << "\n"; 11630360784Sdim for (Instruction &I : instructions(F)) 11631360784Sdim if (isSCEVable(I.getType()) && !isa<CmpInst>(I)) { 11632360784Sdim OS << I << '\n'; 11633194612Sed OS << " --> "; 11634360784Sdim const SCEV *SV = SE.getSCEV(&I); 11635360784Sdim SV->print(OS); 11636360784Sdim if (!isa<SCEVCouldNotCompute>(SV)) { 11637288943Sdim OS << " U: "; 11638360784Sdim SE.getUnsignedRange(SV).print(OS); 11639288943Sdim OS << " S: "; 11640360784Sdim SE.getSignedRange(SV).print(OS); 11641288943Sdim } 11642194612Sed 11643360784Sdim const Loop *L = LI.getLoopFor(I.getParent()); 11644360784Sdim 11645360784Sdim const SCEV *AtUse = SE.getSCEVAtScope(SV, L); 11646360784Sdim if (AtUse != SV) { 11647360784Sdim OS << " --> "; 11648360784Sdim AtUse->print(OS); 11649360784Sdim if (!isa<SCEVCouldNotCompute>(AtUse)) { 11650360784Sdim OS << " U: "; 11651360784Sdim SE.getUnsignedRange(AtUse).print(OS); 11652360784Sdim OS << " S: "; 11653360784Sdim SE.getSignedRange(AtUse).print(OS); 11654360784Sdim } 11655193323Sed } 11656309124Sdim 11657360784Sdim if (L) { 11658360784Sdim OS << "\t\t" "Exits: "; 11659360784Sdim const SCEV *ExitValue = SE.getSCEVAtScope(SV, L->getParentLoop()); 11660360784Sdim if (!SE.isLoopInvariant(ExitValue, L)) { 11661360784Sdim OS << "<<Unknown>>"; 11662309124Sdim } else { 11663360784Sdim OS << *ExitValue; 11664309124Sdim } 11665309124Sdim 11666360784Sdim bool First = true; 11667360784Sdim for (auto *Iter = L; Iter; Iter = Iter->getParentLoop()) { 11668360784Sdim if (First) { 11669360784Sdim OS << "\t\t" "LoopDispositions: { "; 11670360784Sdim First = false; 11671360784Sdim } else { 11672360784Sdim OS << ", "; 11673360784Sdim } 11674309124Sdim 11675360784Sdim Iter->getHeader()->printAsOperand(OS, /*PrintType=*/false); 11676360784Sdim OS << ": " << loopDispositionToStr(SE.getLoopDisposition(SV, Iter)); 11677309124Sdim } 11678309124Sdim 11679360784Sdim for (auto *InnerL : depth_first(L)) { 11680360784Sdim if (InnerL == L) 11681360784Sdim continue; 11682360784Sdim if (First) { 11683360784Sdim OS << "\t\t" "LoopDispositions: { "; 11684360784Sdim First = false; 11685360784Sdim } else { 11686360784Sdim OS << ", "; 11687360784Sdim } 11688360784Sdim 11689360784Sdim InnerL->getHeader()->printAsOperand(OS, /*PrintType=*/false); 11690360784Sdim OS << ": " << loopDispositionToStr(SE.getLoopDisposition(SV, InnerL)); 11691360784Sdim } 11692360784Sdim 11693360784Sdim OS << " }"; 11694309124Sdim } 11695309124Sdim 11696360784Sdim OS << "\n"; 11697193323Sed } 11698360784Sdim } 11699193323Sed 11700202375Srdivacky OS << "Determining loop execution counts for: "; 11701296417Sdim F.printAsOperand(OS, /*PrintType=*/false); 11702202375Srdivacky OS << "\n"; 11703309124Sdim for (Loop *I : LI) 11704309124Sdim PrintLoopInfo(OS, &SE, I); 11705193323Sed} 11706193323Sed 11707218893SdimScalarEvolution::LoopDisposition 11708218893SdimScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) { 11709288943Sdim auto &Values = LoopDispositions[S]; 11710288943Sdim for (auto &V : Values) { 11711288943Sdim if (V.getPointer() == L) 11712288943Sdim return V.getInt(); 11713261991Sdim } 11714288943Sdim Values.emplace_back(L, LoopVariant); 11715218893Sdim LoopDisposition D = computeLoopDisposition(S, L); 11716288943Sdim auto &Values2 = LoopDispositions[S]; 11717288943Sdim for (auto &V : make_range(Values2.rbegin(), Values2.rend())) { 11718288943Sdim if (V.getPointer() == L) { 11719288943Sdim V.setInt(D); 11720261991Sdim break; 11721261991Sdim } 11722261991Sdim } 11723261991Sdim return D; 11724218893Sdim} 11725218893Sdim 11726218893SdimScalarEvolution::LoopDisposition 11727218893SdimScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) { 11728276479Sdim switch (static_cast<SCEVTypes>(S->getSCEVType())) { 11729218893Sdim case scConstant: 11730218893Sdim return LoopInvariant; 11731218893Sdim case scTruncate: 11732218893Sdim case scZeroExtend: 11733218893Sdim case scSignExtend: 11734218893Sdim return getLoopDisposition(cast<SCEVCastExpr>(S)->getOperand(), L); 11735218893Sdim case scAddRecExpr: { 11736218893Sdim const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S); 11737218893Sdim 11738218893Sdim // If L is the addrec's loop, it's computable. 11739218893Sdim if (AR->getLoop() == L) 11740218893Sdim return LoopComputable; 11741218893Sdim 11742218893Sdim // Add recurrences are never invariant in the function-body (null loop). 11743218893Sdim if (!L) 11744218893Sdim return LoopVariant; 11745218893Sdim 11746327952Sdim // Everything that is not defined at loop entry is variant. 11747327952Sdim if (DT.dominates(L->getHeader(), AR->getLoop()->getHeader())) 11748218893Sdim return LoopVariant; 11749327952Sdim assert(!L->contains(AR->getLoop()) && "Containing loop's header does not" 11750327952Sdim " dominate the contained loop's header?"); 11751218893Sdim 11752218893Sdim // This recurrence is invariant w.r.t. L if AR's loop contains L. 11753218893Sdim if (AR->getLoop()->contains(L)) 11754218893Sdim return LoopInvariant; 11755218893Sdim 11756218893Sdim // This recurrence is variant w.r.t. L if any of its operands 11757218893Sdim // are variant. 11758296417Sdim for (auto *Op : AR->operands()) 11759296417Sdim if (!isLoopInvariant(Op, L)) 11760218893Sdim return LoopVariant; 11761218893Sdim 11762218893Sdim // Otherwise it's loop-invariant. 11763218893Sdim return LoopInvariant; 11764218893Sdim } 11765218893Sdim case scAddExpr: 11766218893Sdim case scMulExpr: 11767218893Sdim case scUMaxExpr: 11768353358Sdim case scSMaxExpr: 11769353358Sdim case scUMinExpr: 11770353358Sdim case scSMinExpr: { 11771218893Sdim bool HasVarying = false; 11772296417Sdim for (auto *Op : cast<SCEVNAryExpr>(S)->operands()) { 11773296417Sdim LoopDisposition D = getLoopDisposition(Op, L); 11774218893Sdim if (D == LoopVariant) 11775218893Sdim return LoopVariant; 11776218893Sdim if (D == LoopComputable) 11777218893Sdim HasVarying = true; 11778218893Sdim } 11779218893Sdim return HasVarying ? LoopComputable : LoopInvariant; 11780218893Sdim } 11781218893Sdim case scUDivExpr: { 11782218893Sdim const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S); 11783218893Sdim LoopDisposition LD = getLoopDisposition(UDiv->getLHS(), L); 11784218893Sdim if (LD == LoopVariant) 11785218893Sdim return LoopVariant; 11786218893Sdim LoopDisposition RD = getLoopDisposition(UDiv->getRHS(), L); 11787218893Sdim if (RD == LoopVariant) 11788218893Sdim return LoopVariant; 11789218893Sdim return (LD == LoopInvariant && RD == LoopInvariant) ? 11790218893Sdim LoopInvariant : LoopComputable; 11791218893Sdim } 11792218893Sdim case scUnknown: 11793218893Sdim // All non-instruction values are loop invariant. All instructions are loop 11794218893Sdim // invariant if they are not contained in the specified loop. 11795218893Sdim // Instructions are never considered invariant in the function body 11796218893Sdim // (null loop) because they are defined within the "loop". 11797296417Sdim if (auto *I = dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue())) 11798218893Sdim return (L && !L->contains(I)) ? LoopInvariant : LoopVariant; 11799218893Sdim return LoopInvariant; 11800218893Sdim case scCouldNotCompute: 11801218893Sdim llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); 11802218893Sdim } 11803276479Sdim llvm_unreachable("Unknown SCEV kind!"); 11804218893Sdim} 11805218893Sdim 11806218893Sdimbool ScalarEvolution::isLoopInvariant(const SCEV *S, const Loop *L) { 11807218893Sdim return getLoopDisposition(S, L) == LoopInvariant; 11808218893Sdim} 11809218893Sdim 11810218893Sdimbool ScalarEvolution::hasComputableLoopEvolution(const SCEV *S, const Loop *L) { 11811218893Sdim return getLoopDisposition(S, L) == LoopComputable; 11812218893Sdim} 11813218893Sdim 11814218893SdimScalarEvolution::BlockDisposition 11815218893SdimScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) { 11816288943Sdim auto &Values = BlockDispositions[S]; 11817288943Sdim for (auto &V : Values) { 11818288943Sdim if (V.getPointer() == BB) 11819288943Sdim return V.getInt(); 11820261991Sdim } 11821288943Sdim Values.emplace_back(BB, DoesNotDominateBlock); 11822218893Sdim BlockDisposition D = computeBlockDisposition(S, BB); 11823288943Sdim auto &Values2 = BlockDispositions[S]; 11824288943Sdim for (auto &V : make_range(Values2.rbegin(), Values2.rend())) { 11825288943Sdim if (V.getPointer() == BB) { 11826288943Sdim V.setInt(D); 11827261991Sdim break; 11828261991Sdim } 11829261991Sdim } 11830261991Sdim return D; 11831218893Sdim} 11832218893Sdim 11833218893SdimScalarEvolution::BlockDisposition 11834218893SdimScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) { 11835276479Sdim switch (static_cast<SCEVTypes>(S->getSCEVType())) { 11836218893Sdim case scConstant: 11837218893Sdim return ProperlyDominatesBlock; 11838218893Sdim case scTruncate: 11839218893Sdim case scZeroExtend: 11840218893Sdim case scSignExtend: 11841218893Sdim return getBlockDisposition(cast<SCEVCastExpr>(S)->getOperand(), BB); 11842218893Sdim case scAddRecExpr: { 11843218893Sdim // This uses a "dominates" query instead of "properly dominates" query 11844218893Sdim // to test for proper dominance too, because the instruction which 11845218893Sdim // produces the addrec's value is a PHI, and a PHI effectively properly 11846218893Sdim // dominates its entire containing block. 11847218893Sdim const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S); 11848296417Sdim if (!DT.dominates(AR->getLoop()->getHeader(), BB)) 11849218893Sdim return DoesNotDominateBlock; 11850314564Sdim 11851314564Sdim // Fall through into SCEVNAryExpr handling. 11852314564Sdim LLVM_FALLTHROUGH; 11853218893Sdim } 11854218893Sdim case scAddExpr: 11855218893Sdim case scMulExpr: 11856218893Sdim case scUMaxExpr: 11857353358Sdim case scSMaxExpr: 11858353358Sdim case scUMinExpr: 11859353358Sdim case scSMinExpr: { 11860218893Sdim const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S); 11861218893Sdim bool Proper = true; 11862296417Sdim for (const SCEV *NAryOp : NAry->operands()) { 11863296417Sdim BlockDisposition D = getBlockDisposition(NAryOp, BB); 11864218893Sdim if (D == DoesNotDominateBlock) 11865218893Sdim return DoesNotDominateBlock; 11866218893Sdim if (D == DominatesBlock) 11867218893Sdim Proper = false; 11868218893Sdim } 11869218893Sdim return Proper ? ProperlyDominatesBlock : DominatesBlock; 11870218893Sdim } 11871218893Sdim case scUDivExpr: { 11872218893Sdim const SCEVUDivExpr *UDiv = cast<SCEVUDivExpr>(S); 11873218893Sdim const SCEV *LHS = UDiv->getLHS(), *RHS = UDiv->getRHS(); 11874218893Sdim BlockDisposition LD = getBlockDisposition(LHS, BB); 11875218893Sdim if (LD == DoesNotDominateBlock) 11876218893Sdim return DoesNotDominateBlock; 11877218893Sdim BlockDisposition RD = getBlockDisposition(RHS, BB); 11878218893Sdim if (RD == DoesNotDominateBlock) 11879218893Sdim return DoesNotDominateBlock; 11880218893Sdim return (LD == ProperlyDominatesBlock && RD == ProperlyDominatesBlock) ? 11881218893Sdim ProperlyDominatesBlock : DominatesBlock; 11882218893Sdim } 11883218893Sdim case scUnknown: 11884218893Sdim if (Instruction *I = 11885218893Sdim dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue())) { 11886218893Sdim if (I->getParent() == BB) 11887218893Sdim return DominatesBlock; 11888296417Sdim if (DT.properlyDominates(I->getParent(), BB)) 11889218893Sdim return ProperlyDominatesBlock; 11890218893Sdim return DoesNotDominateBlock; 11891218893Sdim } 11892218893Sdim return ProperlyDominatesBlock; 11893218893Sdim case scCouldNotCompute: 11894218893Sdim llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); 11895218893Sdim } 11896276479Sdim llvm_unreachable("Unknown SCEV kind!"); 11897218893Sdim} 11898218893Sdim 11899218893Sdimbool ScalarEvolution::dominates(const SCEV *S, const BasicBlock *BB) { 11900218893Sdim return getBlockDisposition(S, BB) >= DominatesBlock; 11901218893Sdim} 11902218893Sdim 11903218893Sdimbool ScalarEvolution::properlyDominates(const SCEV *S, const BasicBlock *BB) { 11904218893Sdim return getBlockDisposition(S, BB) == ProperlyDominatesBlock; 11905218893Sdim} 11906218893Sdim 11907296417Sdimbool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const { 11908314564Sdim return SCEVExprContains(S, [&](const SCEV *Expr) { return Expr == Op; }); 11909239462Sdim} 11910239462Sdim 11911327952Sdimbool ScalarEvolution::ExitLimit::hasOperand(const SCEV *S) const { 11912327952Sdim auto IsS = [&](const SCEV *X) { return S == X; }; 11913327952Sdim auto ContainsS = [&](const SCEV *X) { 11914327952Sdim return !isa<SCEVCouldNotCompute>(X) && SCEVExprContains(X, IsS); 11915327952Sdim }; 11916327952Sdim return ContainsS(ExactNotTaken) || ContainsS(MaxNotTaken); 11917327952Sdim} 11918327952Sdim 11919327952Sdimvoid 11920327952SdimScalarEvolution::forgetMemoizedResults(const SCEV *S) { 11921218893Sdim ValuesAtScopes.erase(S); 11922218893Sdim LoopDispositions.erase(S); 11923218893Sdim BlockDispositions.erase(S); 11924218893Sdim UnsignedRanges.erase(S); 11925218893Sdim SignedRanges.erase(S); 11926309124Sdim ExprValueMap.erase(S); 11927309124Sdim HasRecMap.erase(S); 11928321369Sdim MinTrailingZerosCache.erase(S); 11929249423Sdim 11930327952Sdim for (auto I = PredicatedSCEVRewrites.begin(); 11931321369Sdim I != PredicatedSCEVRewrites.end();) { 11932321369Sdim std::pair<const SCEV *, const Loop *> Entry = I->first; 11933321369Sdim if (Entry.first == S) 11934321369Sdim PredicatedSCEVRewrites.erase(I++); 11935321369Sdim else 11936321369Sdim ++I; 11937321369Sdim } 11938321369Sdim 11939309124Sdim auto RemoveSCEVFromBackedgeMap = 11940309124Sdim [S, this](DenseMap<const Loop *, BackedgeTakenInfo> &Map) { 11941309124Sdim for (auto I = Map.begin(), E = Map.end(); I != E;) { 11942309124Sdim BackedgeTakenInfo &BEInfo = I->second; 11943309124Sdim if (BEInfo.hasOperand(S, this)) { 11944309124Sdim BEInfo.clear(); 11945309124Sdim Map.erase(I++); 11946309124Sdim } else 11947309124Sdim ++I; 11948309124Sdim } 11949309124Sdim }; 11950309124Sdim 11951309124Sdim RemoveSCEVFromBackedgeMap(BackedgeTakenCounts); 11952309124Sdim RemoveSCEVFromBackedgeMap(PredicatedBackedgeTakenCounts); 11953218893Sdim} 11954243830Sdim 11955341825Sdimvoid 11956341825SdimScalarEvolution::getUsedLoops(const SCEV *S, 11957341825Sdim SmallPtrSetImpl<const Loop *> &LoopsUsed) { 11958327952Sdim struct FindUsedLoops { 11959341825Sdim FindUsedLoops(SmallPtrSetImpl<const Loop *> &LoopsUsed) 11960341825Sdim : LoopsUsed(LoopsUsed) {} 11961341825Sdim SmallPtrSetImpl<const Loop *> &LoopsUsed; 11962327952Sdim bool follow(const SCEV *S) { 11963327952Sdim if (auto *AR = dyn_cast<SCEVAddRecExpr>(S)) 11964327952Sdim LoopsUsed.insert(AR->getLoop()); 11965327952Sdim return true; 11966327952Sdim } 11967327952Sdim 11968327952Sdim bool isDone() const { return false; } 11969327952Sdim }; 11970327952Sdim 11971341825Sdim FindUsedLoops F(LoopsUsed); 11972327952Sdim SCEVTraversal<FindUsedLoops>(F).visitAll(S); 11973341825Sdim} 11974327952Sdim 11975341825Sdimvoid ScalarEvolution::addToLoopUseLists(const SCEV *S) { 11976341825Sdim SmallPtrSet<const Loop *, 8> LoopsUsed; 11977341825Sdim getUsedLoops(S, LoopsUsed); 11978341825Sdim for (auto *L : LoopsUsed) 11979327952Sdim LoopUsers[L].push_back(S); 11980327952Sdim} 11981327952Sdim 11982321369Sdimvoid ScalarEvolution::verify() const { 11983321369Sdim ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this); 11984321369Sdim ScalarEvolution SE2(F, TLI, AC, DT, LI); 11985243830Sdim 11986321369Sdim SmallVector<Loop *, 8> LoopStack(LI.begin(), LI.end()); 11987243830Sdim 11988321369Sdim // Map's SCEV expressions from one ScalarEvolution "universe" to another. 11989321369Sdim struct SCEVMapper : public SCEVRewriteVisitor<SCEVMapper> { 11990327952Sdim SCEVMapper(ScalarEvolution &SE) : SCEVRewriteVisitor<SCEVMapper>(SE) {} 11991327952Sdim 11992321369Sdim const SCEV *visitConstant(const SCEVConstant *Constant) { 11993321369Sdim return SE.getConstant(Constant->getAPInt()); 11994321369Sdim } 11995327952Sdim 11996321369Sdim const SCEV *visitUnknown(const SCEVUnknown *Expr) { 11997321369Sdim return SE.getUnknown(Expr->getValue()); 11998321369Sdim } 11999243830Sdim 12000321369Sdim const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) { 12001321369Sdim return SE.getCouldNotCompute(); 12002321369Sdim } 12003321369Sdim }; 12004243830Sdim 12005321369Sdim SCEVMapper SCM(SE2); 12006243830Sdim 12007321369Sdim while (!LoopStack.empty()) { 12008321369Sdim auto *L = LoopStack.pop_back_val(); 12009321369Sdim LoopStack.insert(LoopStack.end(), L->begin(), L->end()); 12010243830Sdim 12011321369Sdim auto *CurBECount = SCM.visit( 12012321369Sdim const_cast<ScalarEvolution *>(this)->getBackedgeTakenCount(L)); 12013321369Sdim auto *NewBECount = SE2.getBackedgeTakenCount(L); 12014243830Sdim 12015321369Sdim if (CurBECount == SE2.getCouldNotCompute() || 12016321369Sdim NewBECount == SE2.getCouldNotCompute()) { 12017321369Sdim // NB! This situation is legal, but is very suspicious -- whatever pass 12018321369Sdim // change the loop to make a trip count go from could not compute to 12019321369Sdim // computable or vice-versa *should have* invalidated SCEV. However, we 12020321369Sdim // choose not to assert here (for now) since we don't want false 12021321369Sdim // positives. 12022321369Sdim continue; 12023321369Sdim } 12024243830Sdim 12025321369Sdim if (containsUndefs(CurBECount) || containsUndefs(NewBECount)) { 12026321369Sdim // SCEV treats "undef" as an unknown but consistent value (i.e. it does 12027321369Sdim // not propagate undef aggressively). This means we can (and do) fail 12028321369Sdim // verification in cases where a transform makes the trip count of a loop 12029321369Sdim // go from "undef" to "undef+1" (say). The transform is fine, since in 12030321369Sdim // both cases the loop iterates "undef" times, but SCEV thinks we 12031321369Sdim // increased the trip count of the loop by 1 incorrectly. 12032321369Sdim continue; 12033321369Sdim } 12034243830Sdim 12035321369Sdim if (SE.getTypeSizeInBits(CurBECount->getType()) > 12036321369Sdim SE.getTypeSizeInBits(NewBECount->getType())) 12037321369Sdim NewBECount = SE2.getZeroExtendExpr(NewBECount, CurBECount->getType()); 12038321369Sdim else if (SE.getTypeSizeInBits(CurBECount->getType()) < 12039321369Sdim SE.getTypeSizeInBits(NewBECount->getType())) 12040321369Sdim CurBECount = SE2.getZeroExtendExpr(CurBECount, NewBECount->getType()); 12041243830Sdim 12042360784Sdim const SCEV *Delta = SE2.getMinusSCEV(CurBECount, NewBECount); 12043321369Sdim 12044360784Sdim // Unless VerifySCEVStrict is set, we only compare constant deltas. 12045360784Sdim if ((VerifySCEVStrict || isa<SCEVConstant>(Delta)) && !Delta->isZero()) { 12046360784Sdim dbgs() << "Trip Count for " << *L << " Changed!\n"; 12047321369Sdim dbgs() << "Old: " << *CurBECount << "\n"; 12048321369Sdim dbgs() << "New: " << *NewBECount << "\n"; 12049360784Sdim dbgs() << "Delta: " << *Delta << "\n"; 12050243830Sdim std::abort(); 12051243830Sdim } 12052243830Sdim } 12053243830Sdim} 12054296417Sdim 12055314564Sdimbool ScalarEvolution::invalidate( 12056314564Sdim Function &F, const PreservedAnalyses &PA, 12057314564Sdim FunctionAnalysisManager::Invalidator &Inv) { 12058314564Sdim // Invalidate the ScalarEvolution object whenever it isn't preserved or one 12059314564Sdim // of its dependencies is invalidated. 12060314564Sdim auto PAC = PA.getChecker<ScalarEvolutionAnalysis>(); 12061314564Sdim return !(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Function>>()) || 12062314564Sdim Inv.invalidate<AssumptionAnalysis>(F, PA) || 12063314564Sdim Inv.invalidate<DominatorTreeAnalysis>(F, PA) || 12064314564Sdim Inv.invalidate<LoopAnalysis>(F, PA); 12065314564Sdim} 12066296417Sdim 12067314564SdimAnalysisKey ScalarEvolutionAnalysis::Key; 12068314564Sdim 12069296417SdimScalarEvolution ScalarEvolutionAnalysis::run(Function &F, 12070314564Sdim FunctionAnalysisManager &AM) { 12071309124Sdim return ScalarEvolution(F, AM.getResult<TargetLibraryAnalysis>(F), 12072309124Sdim AM.getResult<AssumptionAnalysis>(F), 12073309124Sdim AM.getResult<DominatorTreeAnalysis>(F), 12074309124Sdim AM.getResult<LoopAnalysis>(F)); 12075296417Sdim} 12076296417Sdim 12077296417SdimPreservedAnalyses 12078360784SdimScalarEvolutionVerifierPass::run(Function &F, FunctionAnalysisManager &AM) { 12079360784Sdim AM.getResult<ScalarEvolutionAnalysis>(F).verify(); 12080360784Sdim return PreservedAnalyses::all(); 12081360784Sdim} 12082360784Sdim 12083360784SdimPreservedAnalyses 12084314564SdimScalarEvolutionPrinterPass::run(Function &F, FunctionAnalysisManager &AM) { 12085309124Sdim AM.getResult<ScalarEvolutionAnalysis>(F).print(OS); 12086296417Sdim return PreservedAnalyses::all(); 12087296417Sdim} 12088296417Sdim 12089296417SdimINITIALIZE_PASS_BEGIN(ScalarEvolutionWrapperPass, "scalar-evolution", 12090296417Sdim "Scalar Evolution Analysis", false, true) 12091296417SdimINITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 12092296417SdimINITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 12093296417SdimINITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 12094296417SdimINITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) 12095296417SdimINITIALIZE_PASS_END(ScalarEvolutionWrapperPass, "scalar-evolution", 12096296417Sdim "Scalar Evolution Analysis", false, true) 12097327952Sdim 12098296417Sdimchar ScalarEvolutionWrapperPass::ID = 0; 12099296417Sdim 12100296417SdimScalarEvolutionWrapperPass::ScalarEvolutionWrapperPass() : FunctionPass(ID) { 12101296417Sdim initializeScalarEvolutionWrapperPassPass(*PassRegistry::getPassRegistry()); 12102296417Sdim} 12103296417Sdim 12104296417Sdimbool ScalarEvolutionWrapperPass::runOnFunction(Function &F) { 12105296417Sdim SE.reset(new ScalarEvolution( 12106360784Sdim F, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F), 12107296417Sdim getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F), 12108296417Sdim getAnalysis<DominatorTreeWrapperPass>().getDomTree(), 12109296417Sdim getAnalysis<LoopInfoWrapperPass>().getLoopInfo())); 12110296417Sdim return false; 12111296417Sdim} 12112296417Sdim 12113296417Sdimvoid ScalarEvolutionWrapperPass::releaseMemory() { SE.reset(); } 12114296417Sdim 12115296417Sdimvoid ScalarEvolutionWrapperPass::print(raw_ostream &OS, const Module *) const { 12116296417Sdim SE->print(OS); 12117296417Sdim} 12118296417Sdim 12119296417Sdimvoid ScalarEvolutionWrapperPass::verifyAnalysis() const { 12120296417Sdim if (!VerifySCEV) 12121296417Sdim return; 12122296417Sdim 12123296417Sdim SE->verify(); 12124296417Sdim} 12125296417Sdim 12126296417Sdimvoid ScalarEvolutionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { 12127296417Sdim AU.setPreservesAll(); 12128296417Sdim AU.addRequiredTransitive<AssumptionCacheTracker>(); 12129296417Sdim AU.addRequiredTransitive<LoopInfoWrapperPass>(); 12130296417Sdim AU.addRequiredTransitive<DominatorTreeWrapperPass>(); 12131296417Sdim AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>(); 12132296417Sdim} 12133296417Sdim 12134321369Sdimconst SCEVPredicate *ScalarEvolution::getEqualPredicate(const SCEV *LHS, 12135321369Sdim const SCEV *RHS) { 12136296417Sdim FoldingSetNodeID ID; 12137321369Sdim assert(LHS->getType() == RHS->getType() && 12138321369Sdim "Type mismatch between LHS and RHS"); 12139296417Sdim // Unique this node based on the arguments 12140296417Sdim ID.AddInteger(SCEVPredicate::P_Equal); 12141296417Sdim ID.AddPointer(LHS); 12142296417Sdim ID.AddPointer(RHS); 12143296417Sdim void *IP = nullptr; 12144296417Sdim if (const auto *S = UniquePreds.FindNodeOrInsertPos(ID, IP)) 12145296417Sdim return S; 12146296417Sdim SCEVEqualPredicate *Eq = new (SCEVAllocator) 12147296417Sdim SCEVEqualPredicate(ID.Intern(SCEVAllocator), LHS, RHS); 12148296417Sdim UniquePreds.InsertNode(Eq, IP); 12149296417Sdim return Eq; 12150296417Sdim} 12151296417Sdim 12152309124Sdimconst SCEVPredicate *ScalarEvolution::getWrapPredicate( 12153309124Sdim const SCEVAddRecExpr *AR, 12154309124Sdim SCEVWrapPredicate::IncrementWrapFlags AddedFlags) { 12155309124Sdim FoldingSetNodeID ID; 12156309124Sdim // Unique this node based on the arguments 12157309124Sdim ID.AddInteger(SCEVPredicate::P_Wrap); 12158309124Sdim ID.AddPointer(AR); 12159309124Sdim ID.AddInteger(AddedFlags); 12160309124Sdim void *IP = nullptr; 12161309124Sdim if (const auto *S = UniquePreds.FindNodeOrInsertPos(ID, IP)) 12162309124Sdim return S; 12163309124Sdim auto *OF = new (SCEVAllocator) 12164309124Sdim SCEVWrapPredicate(ID.Intern(SCEVAllocator), AR, AddedFlags); 12165309124Sdim UniquePreds.InsertNode(OF, IP); 12166309124Sdim return OF; 12167309124Sdim} 12168309124Sdim 12169296417Sdimnamespace { 12170309124Sdim 12171296417Sdimclass SCEVPredicateRewriter : public SCEVRewriteVisitor<SCEVPredicateRewriter> { 12172296417Sdimpublic: 12173327952Sdim 12174314564Sdim /// Rewrites \p S in the context of a loop L and the SCEV predication 12175314564Sdim /// infrastructure. 12176314564Sdim /// 12177314564Sdim /// If \p Pred is non-null, the SCEV expression is rewritten to respect the 12178314564Sdim /// equivalences present in \p Pred. 12179314564Sdim /// 12180314564Sdim /// If \p NewPreds is non-null, rewrite is free to add further predicates to 12181314564Sdim /// \p NewPreds such that the result will be an AddRecExpr. 12182309124Sdim static const SCEV *rewrite(const SCEV *S, const Loop *L, ScalarEvolution &SE, 12183314564Sdim SmallPtrSetImpl<const SCEVPredicate *> *NewPreds, 12184314564Sdim SCEVUnionPredicate *Pred) { 12185314564Sdim SCEVPredicateRewriter Rewriter(L, SE, NewPreds, Pred); 12186309124Sdim return Rewriter.visit(S); 12187296417Sdim } 12188296417Sdim 12189296417Sdim const SCEV *visitUnknown(const SCEVUnknown *Expr) { 12190314564Sdim if (Pred) { 12191314564Sdim auto ExprPreds = Pred->getPredicatesForExpr(Expr); 12192314564Sdim for (auto *Pred : ExprPreds) 12193314564Sdim if (const auto *IPred = dyn_cast<SCEVEqualPredicate>(Pred)) 12194314564Sdim if (IPred->getLHS() == Expr) 12195314564Sdim return IPred->getRHS(); 12196314564Sdim } 12197321369Sdim return convertToAddRecWithPreds(Expr); 12198296417Sdim } 12199296417Sdim 12200309124Sdim const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { 12201309124Sdim const SCEV *Operand = visit(Expr->getOperand()); 12202309124Sdim const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Operand); 12203309124Sdim if (AR && AR->getLoop() == L && AR->isAffine()) { 12204309124Sdim // This couldn't be folded because the operand didn't have the nuw 12205309124Sdim // flag. Add the nusw flag as an assumption that we could make. 12206309124Sdim const SCEV *Step = AR->getStepRecurrence(SE); 12207309124Sdim Type *Ty = Expr->getType(); 12208309124Sdim if (addOverflowAssumption(AR, SCEVWrapPredicate::IncrementNUSW)) 12209309124Sdim return SE.getAddRecExpr(SE.getZeroExtendExpr(AR->getStart(), Ty), 12210309124Sdim SE.getSignExtendExpr(Step, Ty), L, 12211309124Sdim AR->getNoWrapFlags()); 12212309124Sdim } 12213309124Sdim return SE.getZeroExtendExpr(Operand, Expr->getType()); 12214309124Sdim } 12215309124Sdim 12216309124Sdim const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { 12217309124Sdim const SCEV *Operand = visit(Expr->getOperand()); 12218309124Sdim const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Operand); 12219309124Sdim if (AR && AR->getLoop() == L && AR->isAffine()) { 12220309124Sdim // This couldn't be folded because the operand didn't have the nsw 12221309124Sdim // flag. Add the nssw flag as an assumption that we could make. 12222309124Sdim const SCEV *Step = AR->getStepRecurrence(SE); 12223309124Sdim Type *Ty = Expr->getType(); 12224309124Sdim if (addOverflowAssumption(AR, SCEVWrapPredicate::IncrementNSSW)) 12225309124Sdim return SE.getAddRecExpr(SE.getSignExtendExpr(AR->getStart(), Ty), 12226309124Sdim SE.getSignExtendExpr(Step, Ty), L, 12227309124Sdim AR->getNoWrapFlags()); 12228309124Sdim } 12229309124Sdim return SE.getSignExtendExpr(Operand, Expr->getType()); 12230309124Sdim } 12231309124Sdim 12232296417Sdimprivate: 12233327952Sdim explicit SCEVPredicateRewriter(const Loop *L, ScalarEvolution &SE, 12234327952Sdim SmallPtrSetImpl<const SCEVPredicate *> *NewPreds, 12235327952Sdim SCEVUnionPredicate *Pred) 12236327952Sdim : SCEVRewriteVisitor(SE), NewPreds(NewPreds), Pred(Pred), L(L) {} 12237327952Sdim 12238321369Sdim bool addOverflowAssumption(const SCEVPredicate *P) { 12239314564Sdim if (!NewPreds) { 12240309124Sdim // Check if we've already made this assumption. 12241321369Sdim return Pred && Pred->implies(P); 12242309124Sdim } 12243321369Sdim NewPreds->insert(P); 12244309124Sdim return true; 12245309124Sdim } 12246309124Sdim 12247321369Sdim bool addOverflowAssumption(const SCEVAddRecExpr *AR, 12248321369Sdim SCEVWrapPredicate::IncrementWrapFlags AddedFlags) { 12249321369Sdim auto *A = SE.getWrapPredicate(AR, AddedFlags); 12250321369Sdim return addOverflowAssumption(A); 12251321369Sdim } 12252321369Sdim 12253321369Sdim // If \p Expr represents a PHINode, we try to see if it can be represented 12254327952Sdim // as an AddRec, possibly under a predicate (PHISCEVPred). If it is possible 12255321369Sdim // to add this predicate as a runtime overflow check, we return the AddRec. 12256327952Sdim // If \p Expr does not meet these conditions (is not a PHI node, or we 12257327952Sdim // couldn't create an AddRec for it, or couldn't add the predicate), we just 12258321369Sdim // return \p Expr. 12259321369Sdim const SCEV *convertToAddRecWithPreds(const SCEVUnknown *Expr) { 12260321369Sdim if (!isa<PHINode>(Expr->getValue())) 12261321369Sdim return Expr; 12262321369Sdim Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>> 12263321369Sdim PredicatedRewrite = SE.createAddRecFromPHIWithCasts(Expr); 12264321369Sdim if (!PredicatedRewrite) 12265321369Sdim return Expr; 12266321369Sdim for (auto *P : PredicatedRewrite->second){ 12267341825Sdim // Wrap predicates from outer loops are not supported. 12268341825Sdim if (auto *WP = dyn_cast<const SCEVWrapPredicate>(P)) { 12269341825Sdim auto *AR = cast<const SCEVAddRecExpr>(WP->getExpr()); 12270341825Sdim if (L != AR->getLoop()) 12271341825Sdim return Expr; 12272341825Sdim } 12273321369Sdim if (!addOverflowAssumption(P)) 12274321369Sdim return Expr; 12275321369Sdim } 12276321369Sdim return PredicatedRewrite->first; 12277321369Sdim } 12278327952Sdim 12279314564Sdim SmallPtrSetImpl<const SCEVPredicate *> *NewPreds; 12280314564Sdim SCEVUnionPredicate *Pred; 12281309124Sdim const Loop *L; 12282296417Sdim}; 12283327952Sdim 12284296417Sdim} // end anonymous namespace 12285296417Sdim 12286309124Sdimconst SCEV *ScalarEvolution::rewriteUsingPredicate(const SCEV *S, const Loop *L, 12287296417Sdim SCEVUnionPredicate &Preds) { 12288314564Sdim return SCEVPredicateRewriter::rewrite(S, L, *this, nullptr, &Preds); 12289296417Sdim} 12290296417Sdim 12291314564Sdimconst SCEVAddRecExpr *ScalarEvolution::convertSCEVToAddRecWithPredicates( 12292314564Sdim const SCEV *S, const Loop *L, 12293314564Sdim SmallPtrSetImpl<const SCEVPredicate *> &Preds) { 12294314564Sdim SmallPtrSet<const SCEVPredicate *, 4> TransformPreds; 12295314564Sdim S = SCEVPredicateRewriter::rewrite(S, L, *this, &TransformPreds, nullptr); 12296309124Sdim auto *AddRec = dyn_cast<SCEVAddRecExpr>(S); 12297309124Sdim 12298309124Sdim if (!AddRec) 12299309124Sdim return nullptr; 12300309124Sdim 12301309124Sdim // Since the transformation was successful, we can now transfer the SCEV 12302309124Sdim // predicates. 12303314564Sdim for (auto *P : TransformPreds) 12304314564Sdim Preds.insert(P); 12305314564Sdim 12306309124Sdim return AddRec; 12307309124Sdim} 12308309124Sdim 12309296417Sdim/// SCEV predicates 12310296417SdimSCEVPredicate::SCEVPredicate(const FoldingSetNodeIDRef ID, 12311296417Sdim SCEVPredicateKind Kind) 12312296417Sdim : FastID(ID), Kind(Kind) {} 12313296417Sdim 12314296417SdimSCEVEqualPredicate::SCEVEqualPredicate(const FoldingSetNodeIDRef ID, 12315321369Sdim const SCEV *LHS, const SCEV *RHS) 12316321369Sdim : SCEVPredicate(ID, P_Equal), LHS(LHS), RHS(RHS) { 12317321369Sdim assert(LHS->getType() == RHS->getType() && "LHS and RHS types don't match"); 12318321369Sdim assert(LHS != RHS && "LHS and RHS are the same SCEV"); 12319321369Sdim} 12320296417Sdim 12321296417Sdimbool SCEVEqualPredicate::implies(const SCEVPredicate *N) const { 12322309124Sdim const auto *Op = dyn_cast<SCEVEqualPredicate>(N); 12323296417Sdim 12324296417Sdim if (!Op) 12325296417Sdim return false; 12326296417Sdim 12327296417Sdim return Op->LHS == LHS && Op->RHS == RHS; 12328296417Sdim} 12329296417Sdim 12330296417Sdimbool SCEVEqualPredicate::isAlwaysTrue() const { return false; } 12331296417Sdim 12332296417Sdimconst SCEV *SCEVEqualPredicate::getExpr() const { return LHS; } 12333296417Sdim 12334296417Sdimvoid SCEVEqualPredicate::print(raw_ostream &OS, unsigned Depth) const { 12335296417Sdim OS.indent(Depth) << "Equal predicate: " << *LHS << " == " << *RHS << "\n"; 12336296417Sdim} 12337296417Sdim 12338309124SdimSCEVWrapPredicate::SCEVWrapPredicate(const FoldingSetNodeIDRef ID, 12339309124Sdim const SCEVAddRecExpr *AR, 12340309124Sdim IncrementWrapFlags Flags) 12341309124Sdim : SCEVPredicate(ID, P_Wrap), AR(AR), Flags(Flags) {} 12342309124Sdim 12343309124Sdimconst SCEV *SCEVWrapPredicate::getExpr() const { return AR; } 12344309124Sdim 12345309124Sdimbool SCEVWrapPredicate::implies(const SCEVPredicate *N) const { 12346309124Sdim const auto *Op = dyn_cast<SCEVWrapPredicate>(N); 12347309124Sdim 12348309124Sdim return Op && Op->AR == AR && setFlags(Flags, Op->Flags) == Flags; 12349309124Sdim} 12350309124Sdim 12351309124Sdimbool SCEVWrapPredicate::isAlwaysTrue() const { 12352309124Sdim SCEV::NoWrapFlags ScevFlags = AR->getNoWrapFlags(); 12353309124Sdim IncrementWrapFlags IFlags = Flags; 12354309124Sdim 12355309124Sdim if (ScalarEvolution::setFlags(ScevFlags, SCEV::FlagNSW) == ScevFlags) 12356309124Sdim IFlags = clearFlags(IFlags, IncrementNSSW); 12357309124Sdim 12358309124Sdim return IFlags == IncrementAnyWrap; 12359309124Sdim} 12360309124Sdim 12361309124Sdimvoid SCEVWrapPredicate::print(raw_ostream &OS, unsigned Depth) const { 12362309124Sdim OS.indent(Depth) << *getExpr() << " Added Flags: "; 12363309124Sdim if (SCEVWrapPredicate::IncrementNUSW & getFlags()) 12364309124Sdim OS << "<nusw>"; 12365309124Sdim if (SCEVWrapPredicate::IncrementNSSW & getFlags()) 12366309124Sdim OS << "<nssw>"; 12367309124Sdim OS << "\n"; 12368309124Sdim} 12369309124Sdim 12370309124SdimSCEVWrapPredicate::IncrementWrapFlags 12371309124SdimSCEVWrapPredicate::getImpliedFlags(const SCEVAddRecExpr *AR, 12372309124Sdim ScalarEvolution &SE) { 12373309124Sdim IncrementWrapFlags ImpliedFlags = IncrementAnyWrap; 12374309124Sdim SCEV::NoWrapFlags StaticFlags = AR->getNoWrapFlags(); 12375309124Sdim 12376309124Sdim // We can safely transfer the NSW flag as NSSW. 12377309124Sdim if (ScalarEvolution::setFlags(StaticFlags, SCEV::FlagNSW) == StaticFlags) 12378309124Sdim ImpliedFlags = IncrementNSSW; 12379309124Sdim 12380309124Sdim if (ScalarEvolution::setFlags(StaticFlags, SCEV::FlagNUW) == StaticFlags) { 12381309124Sdim // If the increment is positive, the SCEV NUW flag will also imply the 12382309124Sdim // WrapPredicate NUSW flag. 12383309124Sdim if (const auto *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(SE))) 12384309124Sdim if (Step->getValue()->getValue().isNonNegative()) 12385309124Sdim ImpliedFlags = setFlags(ImpliedFlags, IncrementNUSW); 12386309124Sdim } 12387309124Sdim 12388309124Sdim return ImpliedFlags; 12389309124Sdim} 12390309124Sdim 12391296417Sdim/// Union predicates don't get cached so create a dummy set ID for it. 12392296417SdimSCEVUnionPredicate::SCEVUnionPredicate() 12393296417Sdim : SCEVPredicate(FoldingSetNodeIDRef(nullptr, 0), P_Union) {} 12394296417Sdim 12395296417Sdimbool SCEVUnionPredicate::isAlwaysTrue() const { 12396296417Sdim return all_of(Preds, 12397296417Sdim [](const SCEVPredicate *I) { return I->isAlwaysTrue(); }); 12398296417Sdim} 12399296417Sdim 12400296417SdimArrayRef<const SCEVPredicate *> 12401296417SdimSCEVUnionPredicate::getPredicatesForExpr(const SCEV *Expr) { 12402296417Sdim auto I = SCEVToPreds.find(Expr); 12403296417Sdim if (I == SCEVToPreds.end()) 12404296417Sdim return ArrayRef<const SCEVPredicate *>(); 12405296417Sdim return I->second; 12406296417Sdim} 12407296417Sdim 12408296417Sdimbool SCEVUnionPredicate::implies(const SCEVPredicate *N) const { 12409309124Sdim if (const auto *Set = dyn_cast<SCEVUnionPredicate>(N)) 12410296417Sdim return all_of(Set->Preds, 12411296417Sdim [this](const SCEVPredicate *I) { return this->implies(I); }); 12412296417Sdim 12413296417Sdim auto ScevPredsIt = SCEVToPreds.find(N->getExpr()); 12414296417Sdim if (ScevPredsIt == SCEVToPreds.end()) 12415296417Sdim return false; 12416296417Sdim auto &SCEVPreds = ScevPredsIt->second; 12417296417Sdim 12418296417Sdim return any_of(SCEVPreds, 12419296417Sdim [N](const SCEVPredicate *I) { return I->implies(N); }); 12420296417Sdim} 12421296417Sdim 12422296417Sdimconst SCEV *SCEVUnionPredicate::getExpr() const { return nullptr; } 12423296417Sdim 12424296417Sdimvoid SCEVUnionPredicate::print(raw_ostream &OS, unsigned Depth) const { 12425296417Sdim for (auto Pred : Preds) 12426296417Sdim Pred->print(OS, Depth); 12427296417Sdim} 12428296417Sdim 12429296417Sdimvoid SCEVUnionPredicate::add(const SCEVPredicate *N) { 12430309124Sdim if (const auto *Set = dyn_cast<SCEVUnionPredicate>(N)) { 12431296417Sdim for (auto Pred : Set->Preds) 12432296417Sdim add(Pred); 12433296417Sdim return; 12434296417Sdim } 12435296417Sdim 12436296417Sdim if (implies(N)) 12437296417Sdim return; 12438296417Sdim 12439296417Sdim const SCEV *Key = N->getExpr(); 12440296417Sdim assert(Key && "Only SCEVUnionPredicate doesn't have an " 12441296417Sdim " associated expression!"); 12442296417Sdim 12443296417Sdim SCEVToPreds[Key].push_back(N); 12444296417Sdim Preds.push_back(N); 12445296417Sdim} 12446296417Sdim 12447309124SdimPredicatedScalarEvolution::PredicatedScalarEvolution(ScalarEvolution &SE, 12448309124Sdim Loop &L) 12449327952Sdim : SE(SE), L(L) {} 12450296417Sdim 12451296417Sdimconst SCEV *PredicatedScalarEvolution::getSCEV(Value *V) { 12452296417Sdim const SCEV *Expr = SE.getSCEV(V); 12453296417Sdim RewriteEntry &Entry = RewriteMap[Expr]; 12454296417Sdim 12455296417Sdim // If we already have an entry and the version matches, return it. 12456296417Sdim if (Entry.second && Generation == Entry.first) 12457296417Sdim return Entry.second; 12458296417Sdim 12459296417Sdim // We found an entry but it's stale. Rewrite the stale entry 12460314564Sdim // according to the current predicate. 12461296417Sdim if (Entry.second) 12462296417Sdim Expr = Entry.second; 12463296417Sdim 12464309124Sdim const SCEV *NewSCEV = SE.rewriteUsingPredicate(Expr, &L, Preds); 12465296417Sdim Entry = {Generation, NewSCEV}; 12466296417Sdim 12467296417Sdim return NewSCEV; 12468296417Sdim} 12469296417Sdim 12470309124Sdimconst SCEV *PredicatedScalarEvolution::getBackedgeTakenCount() { 12471309124Sdim if (!BackedgeCount) { 12472309124Sdim SCEVUnionPredicate BackedgePred; 12473309124Sdim BackedgeCount = SE.getPredicatedBackedgeTakenCount(&L, BackedgePred); 12474309124Sdim addPredicate(BackedgePred); 12475309124Sdim } 12476309124Sdim return BackedgeCount; 12477309124Sdim} 12478309124Sdim 12479296417Sdimvoid PredicatedScalarEvolution::addPredicate(const SCEVPredicate &Pred) { 12480296417Sdim if (Preds.implies(&Pred)) 12481296417Sdim return; 12482296417Sdim Preds.add(&Pred); 12483296417Sdim updateGeneration(); 12484296417Sdim} 12485296417Sdim 12486296417Sdimconst SCEVUnionPredicate &PredicatedScalarEvolution::getUnionPredicate() const { 12487296417Sdim return Preds; 12488296417Sdim} 12489296417Sdim 12490296417Sdimvoid PredicatedScalarEvolution::updateGeneration() { 12491296417Sdim // If the generation number wrapped recompute everything. 12492296417Sdim if (++Generation == 0) { 12493296417Sdim for (auto &II : RewriteMap) { 12494296417Sdim const SCEV *Rewritten = II.second.second; 12495309124Sdim II.second = {Generation, SE.rewriteUsingPredicate(Rewritten, &L, Preds)}; 12496296417Sdim } 12497296417Sdim } 12498296417Sdim} 12499309124Sdim 12500309124Sdimvoid PredicatedScalarEvolution::setNoOverflow( 12501309124Sdim Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags) { 12502309124Sdim const SCEV *Expr = getSCEV(V); 12503309124Sdim const auto *AR = cast<SCEVAddRecExpr>(Expr); 12504309124Sdim 12505309124Sdim auto ImpliedFlags = SCEVWrapPredicate::getImpliedFlags(AR, SE); 12506309124Sdim 12507309124Sdim // Clear the statically implied flags. 12508309124Sdim Flags = SCEVWrapPredicate::clearFlags(Flags, ImpliedFlags); 12509309124Sdim addPredicate(*SE.getWrapPredicate(AR, Flags)); 12510309124Sdim 12511309124Sdim auto II = FlagsMap.insert({V, Flags}); 12512309124Sdim if (!II.second) 12513309124Sdim II.first->second = SCEVWrapPredicate::setFlags(Flags, II.first->second); 12514309124Sdim} 12515309124Sdim 12516309124Sdimbool PredicatedScalarEvolution::hasNoOverflow( 12517309124Sdim Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags) { 12518309124Sdim const SCEV *Expr = getSCEV(V); 12519309124Sdim const auto *AR = cast<SCEVAddRecExpr>(Expr); 12520309124Sdim 12521309124Sdim Flags = SCEVWrapPredicate::clearFlags( 12522309124Sdim Flags, SCEVWrapPredicate::getImpliedFlags(AR, SE)); 12523309124Sdim 12524309124Sdim auto II = FlagsMap.find(V); 12525309124Sdim 12526309124Sdim if (II != FlagsMap.end()) 12527309124Sdim Flags = SCEVWrapPredicate::clearFlags(Flags, II->second); 12528309124Sdim 12529309124Sdim return Flags == SCEVWrapPredicate::IncrementAnyWrap; 12530309124Sdim} 12531309124Sdim 12532309124Sdimconst SCEVAddRecExpr *PredicatedScalarEvolution::getAsAddRec(Value *V) { 12533309124Sdim const SCEV *Expr = this->getSCEV(V); 12534314564Sdim SmallPtrSet<const SCEVPredicate *, 4> NewPreds; 12535314564Sdim auto *New = SE.convertSCEVToAddRecWithPredicates(Expr, &L, NewPreds); 12536309124Sdim 12537309124Sdim if (!New) 12538309124Sdim return nullptr; 12539309124Sdim 12540314564Sdim for (auto *P : NewPreds) 12541314564Sdim Preds.add(P); 12542314564Sdim 12543309124Sdim updateGeneration(); 12544309124Sdim RewriteMap[SE.getSCEV(V)] = {Generation, New}; 12545309124Sdim return New; 12546309124Sdim} 12547309124Sdim 12548309124SdimPredicatedScalarEvolution::PredicatedScalarEvolution( 12549309124Sdim const PredicatedScalarEvolution &Init) 12550309124Sdim : RewriteMap(Init.RewriteMap), SE(Init.SE), L(Init.L), Preds(Init.Preds), 12551309124Sdim Generation(Init.Generation), BackedgeCount(Init.BackedgeCount) { 12552360784Sdim for (auto I : Init.FlagsMap) 12553309124Sdim FlagsMap.insert(I); 12554309124Sdim} 12555309124Sdim 12556309124Sdimvoid PredicatedScalarEvolution::print(raw_ostream &OS, unsigned Depth) const { 12557309124Sdim // For each block. 12558309124Sdim for (auto *BB : L.getBlocks()) 12559309124Sdim for (auto &I : *BB) { 12560309124Sdim if (!SE.isSCEVable(I.getType())) 12561309124Sdim continue; 12562309124Sdim 12563309124Sdim auto *Expr = SE.getSCEV(&I); 12564309124Sdim auto II = RewriteMap.find(Expr); 12565309124Sdim 12566309124Sdim if (II == RewriteMap.end()) 12567309124Sdim continue; 12568309124Sdim 12569309124Sdim // Don't print things that are not interesting. 12570309124Sdim if (II->second.second == Expr) 12571309124Sdim continue; 12572309124Sdim 12573309124Sdim OS.indent(Depth) << "[PSE]" << I << ":\n"; 12574309124Sdim OS.indent(Depth + 2) << *Expr << "\n"; 12575309124Sdim OS.indent(Depth + 2) << "--> " << *II->second.second << "\n"; 12576309124Sdim } 12577309124Sdim} 12578341825Sdim 12579341825Sdim// Match the mathematical pattern A - (A / B) * B, where A and B can be 12580341825Sdim// arbitrary expressions. 12581341825Sdim// It's not always easy, as A and B can be folded (imagine A is X / 2, and B is 12582341825Sdim// 4, A / B becomes X / 8). 12583341825Sdimbool ScalarEvolution::matchURem(const SCEV *Expr, const SCEV *&LHS, 12584341825Sdim const SCEV *&RHS) { 12585341825Sdim const auto *Add = dyn_cast<SCEVAddExpr>(Expr); 12586341825Sdim if (Add == nullptr || Add->getNumOperands() != 2) 12587341825Sdim return false; 12588341825Sdim 12589341825Sdim const SCEV *A = Add->getOperand(1); 12590341825Sdim const auto *Mul = dyn_cast<SCEVMulExpr>(Add->getOperand(0)); 12591341825Sdim 12592341825Sdim if (Mul == nullptr) 12593341825Sdim return false; 12594341825Sdim 12595341825Sdim const auto MatchURemWithDivisor = [&](const SCEV *B) { 12596341825Sdim // (SomeExpr + (-(SomeExpr / B) * B)). 12597341825Sdim if (Expr == getURemExpr(A, B)) { 12598341825Sdim LHS = A; 12599341825Sdim RHS = B; 12600341825Sdim return true; 12601341825Sdim } 12602341825Sdim return false; 12603341825Sdim }; 12604341825Sdim 12605341825Sdim // (SomeExpr + (-1 * (SomeExpr / B) * B)). 12606341825Sdim if (Mul->getNumOperands() == 3 && isa<SCEVConstant>(Mul->getOperand(0))) 12607341825Sdim return MatchURemWithDivisor(Mul->getOperand(1)) || 12608341825Sdim MatchURemWithDivisor(Mul->getOperand(2)); 12609341825Sdim 12610341825Sdim // (SomeExpr + ((-SomeExpr / B) * B)) or (SomeExpr + ((SomeExpr / B) * -B)). 12611341825Sdim if (Mul->getNumOperands() == 2) 12612341825Sdim return MatchURemWithDivisor(Mul->getOperand(1)) || 12613341825Sdim MatchURemWithDivisor(Mul->getOperand(0)) || 12614341825Sdim MatchURemWithDivisor(getNegativeSCEV(Mul->getOperand(1))) || 12615341825Sdim MatchURemWithDivisor(getNegativeSCEV(Mul->getOperand(0))); 12616341825Sdim return false; 12617341825Sdim} 12618