1193323Sed//===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This transformation analyzes and transforms the induction variables (and 11193323Sed// computations derived from them) into forms suitable for efficient execution 12193323Sed// on the target. 13193323Sed// 14193323Sed// This pass performs a strength reduction on array references inside loops that 15193323Sed// have as one or more of their components the loop induction variable, it 16193323Sed// rewrites expressions to take advantage of scaled-index addressing modes 17193323Sed// available on the target, and it performs a variety of other optimizations 18193323Sed// related to loop induction variables. 19193323Sed// 20203954Srdivacky// Terminology note: this code has a lot of handling for "post-increment" or 21203954Srdivacky// "post-inc" users. This is not talking about post-increment addressing modes; 22203954Srdivacky// it is instead talking about code like this: 23203954Srdivacky// 24203954Srdivacky// %i = phi [ 0, %entry ], [ %i.next, %latch ] 25203954Srdivacky// ... 26203954Srdivacky// %i.next = add %i, 1 27203954Srdivacky// %c = icmp eq %i.next, %n 28203954Srdivacky// 29203954Srdivacky// The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however 30203954Srdivacky// it's useful to think about these as the same register, with some uses using 31203954Srdivacky// the value of the register before the add and some using // it after. In this 32203954Srdivacky// example, the icmp is a post-increment user, since it uses %i.next, which is 33203954Srdivacky// the value of the induction variable after the increment. The other common 34203954Srdivacky// case of post-increment users is users outside the loop. 35203954Srdivacky// 36203954Srdivacky// TODO: More sophistication in the way Formulae are generated and filtered. 37203954Srdivacky// 38203954Srdivacky// TODO: Handle multiple loops at a time. 39203954Srdivacky// 40252723Sdim// TODO: Should the addressing mode BaseGV be changed to a ConstantExpr instead 41252723Sdim// of a GlobalValue? 42203954Srdivacky// 43203954Srdivacky// TODO: When truncation is free, truncate ICmp users' operands to make it a 44203954Srdivacky// smaller encoding (on x86 at least). 45203954Srdivacky// 46203954Srdivacky// TODO: When a negated register is used by an add (such as in a list of 47203954Srdivacky// multiple base registers, or as the increment expression in an addrec), 48203954Srdivacky// we may not actually need both reg and (-1 * reg) in registers; the 49203954Srdivacky// negation can be implemented by using a sub instead of an add. The 50203954Srdivacky// lack of support for taking this into consideration when making 51203954Srdivacky// register pressure decisions is partly worked around by the "Special" 52203954Srdivacky// use kind. 53203954Srdivacky// 54193323Sed//===----------------------------------------------------------------------===// 55193323Sed 56193323Sed#define DEBUG_TYPE "loop-reduce" 57252723Sdim#include "llvm/Transforms/Scalar.h" 58252723Sdim#include "llvm/ADT/DenseSet.h" 59252723Sdim#include "llvm/ADT/SetVector.h" 60252723Sdim#include "llvm/ADT/SmallBitVector.h" 61252723Sdim#include "llvm/ADT/STLExtras.h" 62252723Sdim#include "llvm/Analysis/Dominators.h" 63193323Sed#include "llvm/Analysis/IVUsers.h" 64193323Sed#include "llvm/Analysis/LoopPass.h" 65193323Sed#include "llvm/Analysis/ScalarEvolutionExpander.h" 66252723Sdim#include "llvm/Analysis/TargetTransformInfo.h" 67218893Sdim#include "llvm/Assembly/Writer.h" 68252723Sdim#include "llvm/IR/Constants.h" 69252723Sdim#include "llvm/IR/DerivedTypes.h" 70252723Sdim#include "llvm/IR/Instructions.h" 71252723Sdim#include "llvm/IR/IntrinsicInst.h" 72252723Sdim#include "llvm/Support/CommandLine.h" 73193323Sed#include "llvm/Support/Debug.h" 74193323Sed#include "llvm/Support/ValueHandle.h" 75198090Srdivacky#include "llvm/Support/raw_ostream.h" 76252723Sdim#include "llvm/Transforms/Utils/BasicBlockUtils.h" 77252723Sdim#include "llvm/Transforms/Utils/Local.h" 78193323Sed#include <algorithm> 79193323Sedusing namespace llvm; 80193323Sed 81235633Sdim/// MaxIVUsers is an arbitrary threshold that provides an early opportunitiy for 82235633Sdim/// bail out. This threshold is far beyond the number of users that LSR can 83235633Sdim/// conceivably solve, so it should not affect generated code, but catches the 84235633Sdim/// worst cases before LSR burns too much compile time and stack space. 85235633Sdimstatic const unsigned MaxIVUsers = 200; 86226890Sdim 87226890Sdim// Temporary flag to cleanup congruent phis after LSR phi expansion. 88226890Sdim// It's currently disabled until we can determine whether it's truly useful or 89226890Sdim// not. The flag should be removed after the v3.0 release. 90235633Sdim// This is now needed for ivchains. 91235633Sdimstatic cl::opt<bool> EnablePhiElim( 92235633Sdim "enable-lsr-phielim", cl::Hidden, cl::init(true), 93235633Sdim cl::desc("Enable LSR phi elimination")); 94226890Sdim 95235633Sdim#ifndef NDEBUG 96235633Sdim// Stress test IV chain generation. 97235633Sdimstatic cl::opt<bool> StressIVChain( 98235633Sdim "stress-ivchain", cl::Hidden, cl::init(false), 99235633Sdim cl::desc("Stress test LSR IV chains")); 100235633Sdim#else 101235633Sdimstatic bool StressIVChain = false; 102235633Sdim#endif 103235633Sdim 104203954Srdivackynamespace { 105193323Sed 106203954Srdivacky/// RegSortData - This class holds data which is used to order reuse candidates. 107203954Srdivackyclass RegSortData { 108203954Srdivackypublic: 109203954Srdivacky /// UsedByIndices - This represents the set of LSRUse indices which reference 110203954Srdivacky /// a particular register. 111203954Srdivacky SmallBitVector UsedByIndices; 112193323Sed 113203954Srdivacky RegSortData() {} 114203954Srdivacky 115203954Srdivacky void print(raw_ostream &OS) const; 116203954Srdivacky void dump() const; 117203954Srdivacky}; 118203954Srdivacky 119203954Srdivacky} 120203954Srdivacky 121203954Srdivackyvoid RegSortData::print(raw_ostream &OS) const { 122203954Srdivacky OS << "[NumUses=" << UsedByIndices.count() << ']'; 123203954Srdivacky} 124203954Srdivacky 125245431Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 126203954Srdivackyvoid RegSortData::dump() const { 127203954Srdivacky print(errs()); errs() << '\n'; 128203954Srdivacky} 129245431Sdim#endif 130203954Srdivacky 131193323Sednamespace { 132193323Sed 133203954Srdivacky/// RegUseTracker - Map register candidates to information about how they are 134203954Srdivacky/// used. 135203954Srdivackyclass RegUseTracker { 136203954Srdivacky typedef DenseMap<const SCEV *, RegSortData> RegUsesTy; 137193323Sed 138208599Srdivacky RegUsesTy RegUsesMap; 139203954Srdivacky SmallVector<const SCEV *, 16> RegSequence; 140193323Sed 141203954Srdivackypublic: 142203954Srdivacky void CountRegister(const SCEV *Reg, size_t LUIdx); 143208599Srdivacky void DropRegister(const SCEV *Reg, size_t LUIdx); 144218893Sdim void SwapAndDropUse(size_t LUIdx, size_t LastLUIdx); 145193323Sed 146203954Srdivacky bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const; 147193323Sed 148203954Srdivacky const SmallBitVector &getUsedByIndices(const SCEV *Reg) const; 149193323Sed 150203954Srdivacky void clear(); 151193323Sed 152203954Srdivacky typedef SmallVectorImpl<const SCEV *>::iterator iterator; 153203954Srdivacky typedef SmallVectorImpl<const SCEV *>::const_iterator const_iterator; 154203954Srdivacky iterator begin() { return RegSequence.begin(); } 155203954Srdivacky iterator end() { return RegSequence.end(); } 156203954Srdivacky const_iterator begin() const { return RegSequence.begin(); } 157203954Srdivacky const_iterator end() const { return RegSequence.end(); } 158203954Srdivacky}; 159193323Sed 160203954Srdivacky} 161193323Sed 162203954Srdivackyvoid 163203954SrdivackyRegUseTracker::CountRegister(const SCEV *Reg, size_t LUIdx) { 164203954Srdivacky std::pair<RegUsesTy::iterator, bool> Pair = 165208599Srdivacky RegUsesMap.insert(std::make_pair(Reg, RegSortData())); 166203954Srdivacky RegSortData &RSD = Pair.first->second; 167203954Srdivacky if (Pair.second) 168203954Srdivacky RegSequence.push_back(Reg); 169203954Srdivacky RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1)); 170203954Srdivacky RSD.UsedByIndices.set(LUIdx); 171203954Srdivacky} 172193323Sed 173208599Srdivackyvoid 174208599SrdivackyRegUseTracker::DropRegister(const SCEV *Reg, size_t LUIdx) { 175208599Srdivacky RegUsesTy::iterator It = RegUsesMap.find(Reg); 176208599Srdivacky assert(It != RegUsesMap.end()); 177208599Srdivacky RegSortData &RSD = It->second; 178208599Srdivacky assert(RSD.UsedByIndices.size() > LUIdx); 179208599Srdivacky RSD.UsedByIndices.reset(LUIdx); 180208599Srdivacky} 181208599Srdivacky 182208599Srdivackyvoid 183218893SdimRegUseTracker::SwapAndDropUse(size_t LUIdx, size_t LastLUIdx) { 184218893Sdim assert(LUIdx <= LastLUIdx); 185218893Sdim 186218893Sdim // Update RegUses. The data structure is not optimized for this purpose; 187218893Sdim // we must iterate through it and update each of the bit vectors. 188208599Srdivacky for (RegUsesTy::iterator I = RegUsesMap.begin(), E = RegUsesMap.end(); 189218893Sdim I != E; ++I) { 190218893Sdim SmallBitVector &UsedByIndices = I->second.UsedByIndices; 191218893Sdim if (LUIdx < UsedByIndices.size()) 192218893Sdim UsedByIndices[LUIdx] = 193218893Sdim LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : 0; 194218893Sdim UsedByIndices.resize(std::min(UsedByIndices.size(), LastLUIdx)); 195218893Sdim } 196208599Srdivacky} 197208599Srdivacky 198203954Srdivackybool 199203954SrdivackyRegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const { 200212904Sdim RegUsesTy::const_iterator I = RegUsesMap.find(Reg); 201212904Sdim if (I == RegUsesMap.end()) 202212904Sdim return false; 203212904Sdim const SmallBitVector &UsedByIndices = I->second.UsedByIndices; 204203954Srdivacky int i = UsedByIndices.find_first(); 205203954Srdivacky if (i == -1) return false; 206203954Srdivacky if ((size_t)i != LUIdx) return true; 207203954Srdivacky return UsedByIndices.find_next(i) != -1; 208203954Srdivacky} 209193323Sed 210203954Srdivackyconst SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV *Reg) const { 211208599Srdivacky RegUsesTy::const_iterator I = RegUsesMap.find(Reg); 212208599Srdivacky assert(I != RegUsesMap.end() && "Unknown register!"); 213203954Srdivacky return I->second.UsedByIndices; 214203954Srdivacky} 215193323Sed 216203954Srdivackyvoid RegUseTracker::clear() { 217208599Srdivacky RegUsesMap.clear(); 218203954Srdivacky RegSequence.clear(); 219203954Srdivacky} 220193323Sed 221203954Srdivackynamespace { 222193323Sed 223203954Srdivacky/// Formula - This class holds information that describes a formula for 224203954Srdivacky/// computing satisfying a use. It may include broken-out immediates and scaled 225203954Srdivacky/// registers. 226203954Srdivackystruct Formula { 227252723Sdim /// Global base address used for complex addressing. 228252723Sdim GlobalValue *BaseGV; 229193323Sed 230252723Sdim /// Base offset for complex addressing. 231252723Sdim int64_t BaseOffset; 232252723Sdim 233252723Sdim /// Whether any complex addressing has a base register. 234252723Sdim bool HasBaseReg; 235252723Sdim 236252723Sdim /// The scale of any complex addressing. 237252723Sdim int64_t Scale; 238252723Sdim 239203954Srdivacky /// BaseRegs - The list of "base" registers for this use. When this is 240252723Sdim /// non-empty, 241252723Sdim SmallVector<const SCEV *, 4> BaseRegs; 242193323Sed 243203954Srdivacky /// ScaledReg - The 'scaled' register for this use. This should be non-null 244252723Sdim /// when Scale is not zero. 245203954Srdivacky const SCEV *ScaledReg; 246193323Sed 247223017Sdim /// UnfoldedOffset - An additional constant offset which added near the 248223017Sdim /// use. This requires a temporary register, but the offset itself can 249223017Sdim /// live in an add immediate field rather than a register. 250223017Sdim int64_t UnfoldedOffset; 251193323Sed 252252723Sdim Formula() 253252723Sdim : BaseGV(0), BaseOffset(0), HasBaseReg(false), Scale(0), ScaledReg(0), 254252723Sdim UnfoldedOffset(0) {} 255223017Sdim 256218893Sdim void InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE); 257199481Srdivacky 258203954Srdivacky unsigned getNumRegs() const; 259226890Sdim Type *getType() const; 260199481Srdivacky 261208599Srdivacky void DeleteBaseReg(const SCEV *&S); 262208599Srdivacky 263203954Srdivacky bool referencesReg(const SCEV *S) const; 264203954Srdivacky bool hasRegsUsedByUsesOtherThan(size_t LUIdx, 265203954Srdivacky const RegUseTracker &RegUses) const; 266199481Srdivacky 267203954Srdivacky void print(raw_ostream &OS) const; 268203954Srdivacky void dump() const; 269203954Srdivacky}; 270199481Srdivacky 271193323Sed} 272193323Sed 273204642Srdivacky/// DoInitialMatch - Recursion helper for InitialMatch. 274203954Srdivackystatic void DoInitialMatch(const SCEV *S, Loop *L, 275203954Srdivacky SmallVectorImpl<const SCEV *> &Good, 276203954Srdivacky SmallVectorImpl<const SCEV *> &Bad, 277218893Sdim ScalarEvolution &SE) { 278203954Srdivacky // Collect expressions which properly dominate the loop header. 279218893Sdim if (SE.properlyDominates(S, L->getHeader())) { 280203954Srdivacky Good.push_back(S); 281203954Srdivacky return; 282203954Srdivacky } 283193323Sed 284203954Srdivacky // Look at add operands. 285203954Srdivacky if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { 286203954Srdivacky for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); 287203954Srdivacky I != E; ++I) 288218893Sdim DoInitialMatch(*I, L, Good, Bad, SE); 289203954Srdivacky return; 290203954Srdivacky } 291203954Srdivacky 292203954Srdivacky // Look at addrec operands. 293203954Srdivacky if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) 294203954Srdivacky if (!AR->getStart()->isZero()) { 295218893Sdim DoInitialMatch(AR->getStart(), L, Good, Bad, SE); 296207618Srdivacky DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0), 297203954Srdivacky AR->getStepRecurrence(SE), 298221345Sdim // FIXME: AR->getNoWrapFlags() 299221345Sdim AR->getLoop(), SCEV::FlagAnyWrap), 300218893Sdim L, Good, Bad, SE); 301203954Srdivacky return; 302203954Srdivacky } 303203954Srdivacky 304203954Srdivacky // Handle a multiplication by -1 (negation) if it didn't fold. 305203954Srdivacky if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) 306203954Srdivacky if (Mul->getOperand(0)->isAllOnesValue()) { 307203954Srdivacky SmallVector<const SCEV *, 4> Ops(Mul->op_begin()+1, Mul->op_end()); 308203954Srdivacky const SCEV *NewMul = SE.getMulExpr(Ops); 309203954Srdivacky 310203954Srdivacky SmallVector<const SCEV *, 4> MyGood; 311203954Srdivacky SmallVector<const SCEV *, 4> MyBad; 312218893Sdim DoInitialMatch(NewMul, L, MyGood, MyBad, SE); 313203954Srdivacky const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue( 314203954Srdivacky SE.getEffectiveSCEVType(NewMul->getType()))); 315203954Srdivacky for (SmallVectorImpl<const SCEV *>::const_iterator I = MyGood.begin(), 316203954Srdivacky E = MyGood.end(); I != E; ++I) 317203954Srdivacky Good.push_back(SE.getMulExpr(NegOne, *I)); 318203954Srdivacky for (SmallVectorImpl<const SCEV *>::const_iterator I = MyBad.begin(), 319203954Srdivacky E = MyBad.end(); I != E; ++I) 320203954Srdivacky Bad.push_back(SE.getMulExpr(NegOne, *I)); 321203954Srdivacky return; 322203954Srdivacky } 323203954Srdivacky 324203954Srdivacky // Ok, we can't do anything interesting. Just stuff the whole thing into a 325203954Srdivacky // register and hope for the best. 326203954Srdivacky Bad.push_back(S); 327193323Sed} 328193323Sed 329203954Srdivacky/// InitialMatch - Incorporate loop-variant parts of S into this Formula, 330203954Srdivacky/// attempting to keep all loop-invariant and loop-computable values in a 331203954Srdivacky/// single base register. 332218893Sdimvoid Formula::InitialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) { 333203954Srdivacky SmallVector<const SCEV *, 4> Good; 334203954Srdivacky SmallVector<const SCEV *, 4> Bad; 335218893Sdim DoInitialMatch(S, L, Good, Bad, SE); 336203954Srdivacky if (!Good.empty()) { 337207618Srdivacky const SCEV *Sum = SE.getAddExpr(Good); 338207618Srdivacky if (!Sum->isZero()) 339207618Srdivacky BaseRegs.push_back(Sum); 340252723Sdim HasBaseReg = true; 341203954Srdivacky } 342203954Srdivacky if (!Bad.empty()) { 343207618Srdivacky const SCEV *Sum = SE.getAddExpr(Bad); 344207618Srdivacky if (!Sum->isZero()) 345207618Srdivacky BaseRegs.push_back(Sum); 346252723Sdim HasBaseReg = true; 347203954Srdivacky } 348203954Srdivacky} 349199481Srdivacky 350203954Srdivacky/// getNumRegs - Return the total number of register operands used by this 351203954Srdivacky/// formula. This does not include register uses implied by non-constant 352203954Srdivacky/// addrec strides. 353203954Srdivackyunsigned Formula::getNumRegs() const { 354203954Srdivacky return !!ScaledReg + BaseRegs.size(); 355203954Srdivacky} 356193323Sed 357203954Srdivacky/// getType - Return the type of this formula, if it has one, or null 358203954Srdivacky/// otherwise. This type is meaningless except for the bit size. 359226890SdimType *Formula::getType() const { 360203954Srdivacky return !BaseRegs.empty() ? BaseRegs.front()->getType() : 361203954Srdivacky ScaledReg ? ScaledReg->getType() : 362252723Sdim BaseGV ? BaseGV->getType() : 363203954Srdivacky 0; 364203954Srdivacky} 365203954Srdivacky 366208599Srdivacky/// DeleteBaseReg - Delete the given base reg from the BaseRegs list. 367208599Srdivackyvoid Formula::DeleteBaseReg(const SCEV *&S) { 368208599Srdivacky if (&S != &BaseRegs.back()) 369208599Srdivacky std::swap(S, BaseRegs.back()); 370208599Srdivacky BaseRegs.pop_back(); 371208599Srdivacky} 372208599Srdivacky 373203954Srdivacky/// referencesReg - Test if this formula references the given register. 374203954Srdivackybool Formula::referencesReg(const SCEV *S) const { 375203954Srdivacky return S == ScaledReg || 376203954Srdivacky std::find(BaseRegs.begin(), BaseRegs.end(), S) != BaseRegs.end(); 377203954Srdivacky} 378203954Srdivacky 379203954Srdivacky/// hasRegsUsedByUsesOtherThan - Test whether this formula uses registers 380203954Srdivacky/// which are used by uses other than the use with the given index. 381203954Srdivackybool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx, 382203954Srdivacky const RegUseTracker &RegUses) const { 383203954Srdivacky if (ScaledReg) 384203954Srdivacky if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx)) 385203954Srdivacky return true; 386203954Srdivacky for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(), 387203954Srdivacky E = BaseRegs.end(); I != E; ++I) 388203954Srdivacky if (RegUses.isRegUsedByUsesOtherThan(*I, LUIdx)) 389203954Srdivacky return true; 390203954Srdivacky return false; 391203954Srdivacky} 392203954Srdivacky 393203954Srdivackyvoid Formula::print(raw_ostream &OS) const { 394203954Srdivacky bool First = true; 395252723Sdim if (BaseGV) { 396203954Srdivacky if (!First) OS << " + "; else First = false; 397252723Sdim WriteAsOperand(OS, BaseGV, /*PrintType=*/false); 398203954Srdivacky } 399252723Sdim if (BaseOffset != 0) { 400203954Srdivacky if (!First) OS << " + "; else First = false; 401252723Sdim OS << BaseOffset; 402203954Srdivacky } 403203954Srdivacky for (SmallVectorImpl<const SCEV *>::const_iterator I = BaseRegs.begin(), 404203954Srdivacky E = BaseRegs.end(); I != E; ++I) { 405203954Srdivacky if (!First) OS << " + "; else First = false; 406203954Srdivacky OS << "reg(" << **I << ')'; 407203954Srdivacky } 408252723Sdim if (HasBaseReg && BaseRegs.empty()) { 409208599Srdivacky if (!First) OS << " + "; else First = false; 410208599Srdivacky OS << "**error: HasBaseReg**"; 411252723Sdim } else if (!HasBaseReg && !BaseRegs.empty()) { 412208599Srdivacky if (!First) OS << " + "; else First = false; 413208599Srdivacky OS << "**error: !HasBaseReg**"; 414208599Srdivacky } 415252723Sdim if (Scale != 0) { 416203954Srdivacky if (!First) OS << " + "; else First = false; 417252723Sdim OS << Scale << "*reg("; 418203954Srdivacky if (ScaledReg) 419203954Srdivacky OS << *ScaledReg; 420203954Srdivacky else 421203954Srdivacky OS << "<unknown>"; 422203954Srdivacky OS << ')'; 423203954Srdivacky } 424223017Sdim if (UnfoldedOffset != 0) { 425223017Sdim if (!First) OS << " + "; else First = false; 426223017Sdim OS << "imm(" << UnfoldedOffset << ')'; 427223017Sdim } 428203954Srdivacky} 429203954Srdivacky 430245431Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 431203954Srdivackyvoid Formula::dump() const { 432203954Srdivacky print(errs()); errs() << '\n'; 433203954Srdivacky} 434245431Sdim#endif 435203954Srdivacky 436204642Srdivacky/// isAddRecSExtable - Return true if the given addrec can be sign-extended 437204642Srdivacky/// without changing its value. 438204642Srdivackystatic bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) { 439226890Sdim Type *WideTy = 440208599Srdivacky IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1); 441204642Srdivacky return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy)); 442204642Srdivacky} 443204642Srdivacky 444204642Srdivacky/// isAddSExtable - Return true if the given add can be sign-extended 445204642Srdivacky/// without changing its value. 446204642Srdivackystatic bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) { 447226890Sdim Type *WideTy = 448208599Srdivacky IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1); 449204642Srdivacky return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy)); 450204642Srdivacky} 451204642Srdivacky 452210299Sed/// isMulSExtable - Return true if the given mul can be sign-extended 453204642Srdivacky/// without changing its value. 454210299Sedstatic bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) { 455226890Sdim Type *WideTy = 456210299Sed IntegerType::get(SE.getContext(), 457210299Sed SE.getTypeSizeInBits(M->getType()) * M->getNumOperands()); 458210299Sed return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy)); 459204642Srdivacky} 460204642Srdivacky 461204642Srdivacky/// getExactSDiv - Return an expression for LHS /s RHS, if it can be determined 462204642Srdivacky/// and if the remainder is known to be zero, or null otherwise. If 463204642Srdivacky/// IgnoreSignificantBits is true, expressions like (X * Y) /s Y are simplified 464204642Srdivacky/// to Y, ignoring that the multiplication may overflow, which is useful when 465204642Srdivacky/// the result will be used in a context where the most significant bits are 466204642Srdivacky/// ignored. 467204642Srdivackystatic const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, 468204642Srdivacky ScalarEvolution &SE, 469204642Srdivacky bool IgnoreSignificantBits = false) { 470203954Srdivacky // Handle the trivial case, which works for any SCEV type. 471203954Srdivacky if (LHS == RHS) 472207618Srdivacky return SE.getConstant(LHS->getType(), 1); 473203954Srdivacky 474210299Sed // Handle a few RHS special cases. 475210299Sed const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS); 476210299Sed if (RC) { 477210299Sed const APInt &RA = RC->getValue()->getValue(); 478210299Sed // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do 479210299Sed // some folding. 480210299Sed if (RA.isAllOnesValue()) 481210299Sed return SE.getMulExpr(LHS, RC); 482210299Sed // Handle x /s 1 as x. 483210299Sed if (RA == 1) 484210299Sed return LHS; 485210299Sed } 486203954Srdivacky 487203954Srdivacky // Check for a division of a constant by a constant. 488203954Srdivacky if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) { 489203954Srdivacky if (!RC) 490203954Srdivacky return 0; 491210299Sed const APInt &LA = C->getValue()->getValue(); 492210299Sed const APInt &RA = RC->getValue()->getValue(); 493210299Sed if (LA.srem(RA) != 0) 494203954Srdivacky return 0; 495210299Sed return SE.getConstant(LA.sdiv(RA)); 496203954Srdivacky } 497203954Srdivacky 498204642Srdivacky // Distribute the sdiv over addrec operands, if the addrec doesn't overflow. 499203954Srdivacky if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) { 500204642Srdivacky if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) { 501212904Sdim const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE, 502212904Sdim IgnoreSignificantBits); 503212904Sdim if (!Step) return 0; 504204642Srdivacky const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE, 505204642Srdivacky IgnoreSignificantBits); 506204642Srdivacky if (!Start) return 0; 507221345Sdim // FlagNW is independent of the start value, step direction, and is 508221345Sdim // preserved with smaller magnitude steps. 509221345Sdim // FIXME: AR->getNoWrapFlags(SCEV::FlagNW) 510221345Sdim return SE.getAddRecExpr(Start, Step, AR->getLoop(), SCEV::FlagAnyWrap); 511204642Srdivacky } 512210299Sed return 0; 513203954Srdivacky } 514203954Srdivacky 515204642Srdivacky // Distribute the sdiv over add operands, if the add doesn't overflow. 516203954Srdivacky if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) { 517204642Srdivacky if (IgnoreSignificantBits || isAddSExtable(Add, SE)) { 518204642Srdivacky SmallVector<const SCEV *, 8> Ops; 519204642Srdivacky for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); 520204642Srdivacky I != E; ++I) { 521204642Srdivacky const SCEV *Op = getExactSDiv(*I, RHS, SE, 522204642Srdivacky IgnoreSignificantBits); 523204642Srdivacky if (!Op) return 0; 524204642Srdivacky Ops.push_back(Op); 525204642Srdivacky } 526204642Srdivacky return SE.getAddExpr(Ops); 527203954Srdivacky } 528210299Sed return 0; 529203954Srdivacky } 530203954Srdivacky 531203954Srdivacky // Check for a multiply operand that we can pull RHS out of. 532210299Sed if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) { 533204642Srdivacky if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) { 534203954Srdivacky SmallVector<const SCEV *, 4> Ops; 535203954Srdivacky bool Found = false; 536203954Srdivacky for (SCEVMulExpr::op_iterator I = Mul->op_begin(), E = Mul->op_end(); 537203954Srdivacky I != E; ++I) { 538208599Srdivacky const SCEV *S = *I; 539203954Srdivacky if (!Found) 540208599Srdivacky if (const SCEV *Q = getExactSDiv(S, RHS, SE, 541204642Srdivacky IgnoreSignificantBits)) { 542208599Srdivacky S = Q; 543203954Srdivacky Found = true; 544203954Srdivacky } 545208599Srdivacky Ops.push_back(S); 546193323Sed } 547203954Srdivacky return Found ? SE.getMulExpr(Ops) : 0; 548203954Srdivacky } 549210299Sed return 0; 550210299Sed } 551199481Srdivacky 552203954Srdivacky // Otherwise we don't know. 553203954Srdivacky return 0; 554203954Srdivacky} 555203954Srdivacky 556203954Srdivacky/// ExtractImmediate - If S involves the addition of a constant integer value, 557203954Srdivacky/// return that integer value, and mutate S to point to a new SCEV with that 558203954Srdivacky/// value excluded. 559203954Srdivackystatic int64_t ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) { 560203954Srdivacky if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) { 561203954Srdivacky if (C->getValue()->getValue().getMinSignedBits() <= 64) { 562207618Srdivacky S = SE.getConstant(C->getType(), 0); 563203954Srdivacky return C->getValue()->getSExtValue(); 564203954Srdivacky } 565203954Srdivacky } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { 566203954Srdivacky SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end()); 567203954Srdivacky int64_t Result = ExtractImmediate(NewOps.front(), SE); 568212904Sdim if (Result != 0) 569212904Sdim S = SE.getAddExpr(NewOps); 570203954Srdivacky return Result; 571203954Srdivacky } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { 572203954Srdivacky SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end()); 573203954Srdivacky int64_t Result = ExtractImmediate(NewOps.front(), SE); 574212904Sdim if (Result != 0) 575221345Sdim S = SE.getAddRecExpr(NewOps, AR->getLoop(), 576221345Sdim // FIXME: AR->getNoWrapFlags(SCEV::FlagNW) 577221345Sdim SCEV::FlagAnyWrap); 578203954Srdivacky return Result; 579193323Sed } 580203954Srdivacky return 0; 581193323Sed} 582193323Sed 583203954Srdivacky/// ExtractSymbol - If S involves the addition of a GlobalValue address, 584203954Srdivacky/// return that symbol, and mutate S to point to a new SCEV with that 585203954Srdivacky/// value excluded. 586203954Srdivackystatic GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) { 587203954Srdivacky if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { 588203954Srdivacky if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) { 589207618Srdivacky S = SE.getConstant(GV->getType(), 0); 590203954Srdivacky return GV; 591203954Srdivacky } 592203954Srdivacky } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { 593203954Srdivacky SmallVector<const SCEV *, 8> NewOps(Add->op_begin(), Add->op_end()); 594203954Srdivacky GlobalValue *Result = ExtractSymbol(NewOps.back(), SE); 595212904Sdim if (Result) 596212904Sdim S = SE.getAddExpr(NewOps); 597203954Srdivacky return Result; 598203954Srdivacky } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { 599203954Srdivacky SmallVector<const SCEV *, 8> NewOps(AR->op_begin(), AR->op_end()); 600203954Srdivacky GlobalValue *Result = ExtractSymbol(NewOps.front(), SE); 601212904Sdim if (Result) 602221345Sdim S = SE.getAddRecExpr(NewOps, AR->getLoop(), 603221345Sdim // FIXME: AR->getNoWrapFlags(SCEV::FlagNW) 604221345Sdim SCEV::FlagAnyWrap); 605203954Srdivacky return Result; 606203954Srdivacky } 607203954Srdivacky return 0; 608203954Srdivacky} 609203954Srdivacky 610193323Sed/// isAddressUse - Returns true if the specified instruction is using the 611193323Sed/// specified value as an address. 612193323Sedstatic bool isAddressUse(Instruction *Inst, Value *OperandVal) { 613193323Sed bool isAddress = isa<LoadInst>(Inst); 614193323Sed if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { 615193323Sed if (SI->getOperand(1) == OperandVal) 616193323Sed isAddress = true; 617193323Sed } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { 618193323Sed // Addressing modes can also be folded into prefetches and a variety 619193323Sed // of intrinsics. 620193323Sed switch (II->getIntrinsicID()) { 621193323Sed default: break; 622193323Sed case Intrinsic::prefetch: 623193323Sed case Intrinsic::x86_sse_storeu_ps: 624193323Sed case Intrinsic::x86_sse2_storeu_pd: 625193323Sed case Intrinsic::x86_sse2_storeu_dq: 626193323Sed case Intrinsic::x86_sse2_storel_dq: 627210299Sed if (II->getArgOperand(0) == OperandVal) 628193323Sed isAddress = true; 629193323Sed break; 630193323Sed } 631193323Sed } 632193323Sed return isAddress; 633193323Sed} 634193323Sed 635193323Sed/// getAccessType - Return the type of the memory being accessed. 636226890Sdimstatic Type *getAccessType(const Instruction *Inst) { 637226890Sdim Type *AccessTy = Inst->getType(); 638193323Sed if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) 639193323Sed AccessTy = SI->getOperand(0)->getType(); 640193323Sed else if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { 641193323Sed // Addressing modes can also be folded into prefetches and a variety 642193323Sed // of intrinsics. 643193323Sed switch (II->getIntrinsicID()) { 644193323Sed default: break; 645193323Sed case Intrinsic::x86_sse_storeu_ps: 646193323Sed case Intrinsic::x86_sse2_storeu_pd: 647193323Sed case Intrinsic::x86_sse2_storeu_dq: 648193323Sed case Intrinsic::x86_sse2_storel_dq: 649210299Sed AccessTy = II->getArgOperand(0)->getType(); 650193323Sed break; 651193323Sed } 652193323Sed } 653203954Srdivacky 654203954Srdivacky // All pointers have the same requirements, so canonicalize them to an 655203954Srdivacky // arbitrary pointer type to minimize variation. 656226890Sdim if (PointerType *PTy = dyn_cast<PointerType>(AccessTy)) 657203954Srdivacky AccessTy = PointerType::get(IntegerType::get(PTy->getContext(), 1), 658203954Srdivacky PTy->getAddressSpace()); 659203954Srdivacky 660193323Sed return AccessTy; 661193323Sed} 662193323Sed 663235633Sdim/// isExistingPhi - Return true if this AddRec is already a phi in its loop. 664235633Sdimstatic bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) { 665235633Sdim for (BasicBlock::iterator I = AR->getLoop()->getHeader()->begin(); 666235633Sdim PHINode *PN = dyn_cast<PHINode>(I); ++I) { 667235633Sdim if (SE.isSCEVable(PN->getType()) && 668235633Sdim (SE.getEffectiveSCEVType(PN->getType()) == 669235633Sdim SE.getEffectiveSCEVType(AR->getType())) && 670235633Sdim SE.getSCEV(PN) == AR) 671235633Sdim return true; 672235633Sdim } 673235633Sdim return false; 674235633Sdim} 675235633Sdim 676235633Sdim/// Check if expanding this expression is likely to incur significant cost. This 677235633Sdim/// is tricky because SCEV doesn't track which expressions are actually computed 678235633Sdim/// by the current IR. 679235633Sdim/// 680235633Sdim/// We currently allow expansion of IV increments that involve adds, 681235633Sdim/// multiplication by constants, and AddRecs from existing phis. 682235633Sdim/// 683235633Sdim/// TODO: Allow UDivExpr if we can find an existing IV increment that is an 684235633Sdim/// obvious multiple of the UDivExpr. 685235633Sdimstatic bool isHighCostExpansion(const SCEV *S, 686235633Sdim SmallPtrSet<const SCEV*, 8> &Processed, 687235633Sdim ScalarEvolution &SE) { 688235633Sdim // Zero/One operand expressions 689235633Sdim switch (S->getSCEVType()) { 690235633Sdim case scUnknown: 691235633Sdim case scConstant: 692235633Sdim return false; 693235633Sdim case scTruncate: 694235633Sdim return isHighCostExpansion(cast<SCEVTruncateExpr>(S)->getOperand(), 695235633Sdim Processed, SE); 696235633Sdim case scZeroExtend: 697235633Sdim return isHighCostExpansion(cast<SCEVZeroExtendExpr>(S)->getOperand(), 698235633Sdim Processed, SE); 699235633Sdim case scSignExtend: 700235633Sdim return isHighCostExpansion(cast<SCEVSignExtendExpr>(S)->getOperand(), 701235633Sdim Processed, SE); 702235633Sdim } 703235633Sdim 704235633Sdim if (!Processed.insert(S)) 705235633Sdim return false; 706235633Sdim 707235633Sdim if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { 708235633Sdim for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); 709235633Sdim I != E; ++I) { 710235633Sdim if (isHighCostExpansion(*I, Processed, SE)) 711235633Sdim return true; 712235633Sdim } 713235633Sdim return false; 714235633Sdim } 715235633Sdim 716235633Sdim if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) { 717235633Sdim if (Mul->getNumOperands() == 2) { 718235633Sdim // Multiplication by a constant is ok 719235633Sdim if (isa<SCEVConstant>(Mul->getOperand(0))) 720235633Sdim return isHighCostExpansion(Mul->getOperand(1), Processed, SE); 721235633Sdim 722235633Sdim // If we have the value of one operand, check if an existing 723235633Sdim // multiplication already generates this expression. 724235633Sdim if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Mul->getOperand(1))) { 725235633Sdim Value *UVal = U->getValue(); 726235633Sdim for (Value::use_iterator UI = UVal->use_begin(), UE = UVal->use_end(); 727235633Sdim UI != UE; ++UI) { 728235633Sdim // If U is a constant, it may be used by a ConstantExpr. 729235633Sdim Instruction *User = dyn_cast<Instruction>(*UI); 730235633Sdim if (User && User->getOpcode() == Instruction::Mul 731235633Sdim && SE.isSCEVable(User->getType())) { 732235633Sdim return SE.getSCEV(User) == Mul; 733235633Sdim } 734235633Sdim } 735235633Sdim } 736235633Sdim } 737235633Sdim } 738235633Sdim 739235633Sdim if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { 740235633Sdim if (isExistingPhi(AR, SE)) 741235633Sdim return false; 742235633Sdim } 743235633Sdim 744235633Sdim // Fow now, consider any other type of expression (div/mul/min/max) high cost. 745235633Sdim return true; 746235633Sdim} 747235633Sdim 748203954Srdivacky/// DeleteTriviallyDeadInstructions - If any of the instructions is the 749203954Srdivacky/// specified set are trivially dead, delete them and see if this makes any of 750203954Srdivacky/// their operands subsequently dead. 751203954Srdivackystatic bool 752203954SrdivackyDeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) { 753203954Srdivacky bool Changed = false; 754199481Srdivacky 755203954Srdivacky while (!DeadInsts.empty()) { 756245431Sdim Value *V = DeadInsts.pop_back_val(); 757245431Sdim Instruction *I = dyn_cast_or_null<Instruction>(V); 758193323Sed 759203954Srdivacky if (I == 0 || !isInstructionTriviallyDead(I)) 760203954Srdivacky continue; 761193323Sed 762203954Srdivacky for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) 763203954Srdivacky if (Instruction *U = dyn_cast<Instruction>(*OI)) { 764203954Srdivacky *OI = 0; 765203954Srdivacky if (U->use_empty()) 766203954Srdivacky DeadInsts.push_back(U); 767203954Srdivacky } 768193323Sed 769203954Srdivacky I->eraseFromParent(); 770203954Srdivacky Changed = true; 771203954Srdivacky } 772193323Sed 773203954Srdivacky return Changed; 774203954Srdivacky} 775199481Srdivacky 776203954Srdivackynamespace { 777263509Sdimclass LSRUse; 778263509Sdim} 779263509Sdim// Check if it is legal to fold 2 base registers. 780263509Sdimstatic bool isLegal2RegAMUse(const TargetTransformInfo &TTI, const LSRUse &LU, 781263509Sdim const Formula &F); 782263509Sdim// Get the cost of the scaling factor used in F for LU. 783263509Sdimstatic unsigned getScalingFactorCost(const TargetTransformInfo &TTI, 784263509Sdim const LSRUse &LU, const Formula &F); 785193323Sed 786263509Sdimnamespace { 787263509Sdim 788203954Srdivacky/// Cost - This class is used to measure and compare candidate formulae. 789203954Srdivackyclass Cost { 790203954Srdivacky /// TODO: Some of these could be merged. Also, a lexical ordering 791203954Srdivacky /// isn't always optimal. 792203954Srdivacky unsigned NumRegs; 793203954Srdivacky unsigned AddRecCost; 794203954Srdivacky unsigned NumIVMuls; 795203954Srdivacky unsigned NumBaseAdds; 796203954Srdivacky unsigned ImmCost; 797203954Srdivacky unsigned SetupCost; 798263509Sdim unsigned ScaleCost; 799199481Srdivacky 800203954Srdivackypublic: 801203954Srdivacky Cost() 802203954Srdivacky : NumRegs(0), AddRecCost(0), NumIVMuls(0), NumBaseAdds(0), ImmCost(0), 803263509Sdim SetupCost(0), ScaleCost(0) {} 804193323Sed 805203954Srdivacky bool operator<(const Cost &Other) const; 806199481Srdivacky 807203954Srdivacky void Loose(); 808193323Sed 809226890Sdim#ifndef NDEBUG 810226890Sdim // Once any of the metrics loses, they must all remain losers. 811226890Sdim bool isValid() { 812226890Sdim return ((NumRegs | AddRecCost | NumIVMuls | NumBaseAdds 813263509Sdim | ImmCost | SetupCost | ScaleCost) != ~0u) 814226890Sdim || ((NumRegs & AddRecCost & NumIVMuls & NumBaseAdds 815263509Sdim & ImmCost & SetupCost & ScaleCost) == ~0u); 816226890Sdim } 817226890Sdim#endif 818226890Sdim 819226890Sdim bool isLoser() { 820226890Sdim assert(isValid() && "invalid cost"); 821226890Sdim return NumRegs == ~0u; 822226890Sdim } 823226890Sdim 824263509Sdim void RateFormula(const TargetTransformInfo &TTI, 825263509Sdim const Formula &F, 826203954Srdivacky SmallPtrSet<const SCEV *, 16> &Regs, 827203954Srdivacky const DenseSet<const SCEV *> &VisitedRegs, 828203954Srdivacky const Loop *L, 829203954Srdivacky const SmallVectorImpl<int64_t> &Offsets, 830235633Sdim ScalarEvolution &SE, DominatorTree &DT, 831263509Sdim const LSRUse &LU, 832235633Sdim SmallPtrSet<const SCEV *, 16> *LoserRegs = 0); 833193323Sed 834203954Srdivacky void print(raw_ostream &OS) const; 835203954Srdivacky void dump() const; 836203954Srdivacky 837203954Srdivackyprivate: 838203954Srdivacky void RateRegister(const SCEV *Reg, 839203954Srdivacky SmallPtrSet<const SCEV *, 16> &Regs, 840203954Srdivacky const Loop *L, 841203954Srdivacky ScalarEvolution &SE, DominatorTree &DT); 842203954Srdivacky void RatePrimaryRegister(const SCEV *Reg, 843203954Srdivacky SmallPtrSet<const SCEV *, 16> &Regs, 844203954Srdivacky const Loop *L, 845235633Sdim ScalarEvolution &SE, DominatorTree &DT, 846235633Sdim SmallPtrSet<const SCEV *, 16> *LoserRegs); 847203954Srdivacky}; 848203954Srdivacky 849193323Sed} 850193323Sed 851203954Srdivacky/// RateRegister - Tally up interesting quantities from the given register. 852203954Srdivackyvoid Cost::RateRegister(const SCEV *Reg, 853203954Srdivacky SmallPtrSet<const SCEV *, 16> &Regs, 854203954Srdivacky const Loop *L, 855203954Srdivacky ScalarEvolution &SE, DominatorTree &DT) { 856203954Srdivacky if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) { 857226890Sdim // If this is an addrec for another loop, don't second-guess its addrec phi 858226890Sdim // nodes. LSR isn't currently smart enough to reason about more than one 859235633Sdim // loop at a time. LSR has already run on inner loops, will not run on outer 860235633Sdim // loops, and cannot be expected to change sibling loops. 861235633Sdim if (AR->getLoop() != L) { 862235633Sdim // If the AddRec exists, consider it's register free and leave it alone. 863235633Sdim if (isExistingPhi(AR, SE)) 864226890Sdim return; 865235633Sdim 866235633Sdim // Otherwise, do not consider this formula at all. 867235633Sdim Loose(); 868235633Sdim return; 869193323Sed } 870235633Sdim AddRecCost += 1; /// TODO: This should be a function of the stride. 871193323Sed 872203954Srdivacky // Add the step value register, if it needs one. 873203954Srdivacky // TODO: The non-affine case isn't precisely modeled here. 874226890Sdim if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) { 875226890Sdim if (!Regs.count(AR->getOperand(1))) { 876203954Srdivacky RateRegister(AR->getOperand(1), Regs, L, SE, DT); 877226890Sdim if (isLoser()) 878226890Sdim return; 879226890Sdim } 880226890Sdim } 881193323Sed } 882203954Srdivacky ++NumRegs; 883193323Sed 884203954Srdivacky // Rough heuristic; favor registers which don't require extra setup 885203954Srdivacky // instructions in the preheader. 886203954Srdivacky if (!isa<SCEVUnknown>(Reg) && 887203954Srdivacky !isa<SCEVConstant>(Reg) && 888203954Srdivacky !(isa<SCEVAddRecExpr>(Reg) && 889203954Srdivacky (isa<SCEVUnknown>(cast<SCEVAddRecExpr>(Reg)->getStart()) || 890203954Srdivacky isa<SCEVConstant>(cast<SCEVAddRecExpr>(Reg)->getStart())))) 891203954Srdivacky ++SetupCost; 892218893Sdim 893218893Sdim NumIVMuls += isa<SCEVMulExpr>(Reg) && 894218893Sdim SE.hasComputableLoopEvolution(Reg, L); 895203954Srdivacky} 896193323Sed 897203954Srdivacky/// RatePrimaryRegister - Record this register in the set. If we haven't seen it 898235633Sdim/// before, rate it. Optional LoserRegs provides a way to declare any formula 899235633Sdim/// that refers to one of those regs an instant loser. 900203954Srdivackyvoid Cost::RatePrimaryRegister(const SCEV *Reg, 901204642Srdivacky SmallPtrSet<const SCEV *, 16> &Regs, 902204642Srdivacky const Loop *L, 903235633Sdim ScalarEvolution &SE, DominatorTree &DT, 904235633Sdim SmallPtrSet<const SCEV *, 16> *LoserRegs) { 905235633Sdim if (LoserRegs && LoserRegs->count(Reg)) { 906235633Sdim Loose(); 907235633Sdim return; 908235633Sdim } 909235633Sdim if (Regs.insert(Reg)) { 910203954Srdivacky RateRegister(Reg, Regs, L, SE, DT); 911252723Sdim if (LoserRegs && isLoser()) 912235633Sdim LoserRegs->insert(Reg); 913235633Sdim } 914203954Srdivacky} 915193323Sed 916263509Sdimvoid Cost::RateFormula(const TargetTransformInfo &TTI, 917263509Sdim const Formula &F, 918203954Srdivacky SmallPtrSet<const SCEV *, 16> &Regs, 919203954Srdivacky const DenseSet<const SCEV *> &VisitedRegs, 920203954Srdivacky const Loop *L, 921203954Srdivacky const SmallVectorImpl<int64_t> &Offsets, 922235633Sdim ScalarEvolution &SE, DominatorTree &DT, 923263509Sdim const LSRUse &LU, 924235633Sdim SmallPtrSet<const SCEV *, 16> *LoserRegs) { 925203954Srdivacky // Tally up the registers. 926203954Srdivacky if (const SCEV *ScaledReg = F.ScaledReg) { 927203954Srdivacky if (VisitedRegs.count(ScaledReg)) { 928203954Srdivacky Loose(); 929203954Srdivacky return; 930193323Sed } 931235633Sdim RatePrimaryRegister(ScaledReg, Regs, L, SE, DT, LoserRegs); 932226890Sdim if (isLoser()) 933226890Sdim return; 934193323Sed } 935203954Srdivacky for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(), 936203954Srdivacky E = F.BaseRegs.end(); I != E; ++I) { 937203954Srdivacky const SCEV *BaseReg = *I; 938203954Srdivacky if (VisitedRegs.count(BaseReg)) { 939203954Srdivacky Loose(); 940203954Srdivacky return; 941203954Srdivacky } 942235633Sdim RatePrimaryRegister(BaseReg, Regs, L, SE, DT, LoserRegs); 943226890Sdim if (isLoser()) 944226890Sdim return; 945203954Srdivacky } 946193323Sed 947223017Sdim // Determine how many (unfolded) adds we'll need inside the loop. 948223017Sdim size_t NumBaseParts = F.BaseRegs.size() + (F.UnfoldedOffset != 0); 949223017Sdim if (NumBaseParts > 1) 950263509Sdim // Do not count the base and a possible second register if the target 951263509Sdim // allows to fold 2 registers. 952263509Sdim NumBaseAdds += NumBaseParts - (1 + isLegal2RegAMUse(TTI, LU, F)); 953193323Sed 954263509Sdim // Accumulate non-free scaling amounts. 955263509Sdim ScaleCost += getScalingFactorCost(TTI, LU, F); 956263509Sdim 957203954Srdivacky // Tally up the non-zero immediates. 958203954Srdivacky for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(), 959203954Srdivacky E = Offsets.end(); I != E; ++I) { 960252723Sdim int64_t Offset = (uint64_t)*I + F.BaseOffset; 961252723Sdim if (F.BaseGV) 962203954Srdivacky ImmCost += 64; // Handle symbolic values conservatively. 963203954Srdivacky // TODO: This should probably be the pointer size. 964203954Srdivacky else if (Offset != 0) 965203954Srdivacky ImmCost += APInt(64, Offset, true).getMinSignedBits(); 966193323Sed } 967226890Sdim assert(isValid() && "invalid cost"); 968203954Srdivacky} 969193323Sed 970221345Sdim/// Loose - Set this cost to a losing value. 971203954Srdivackyvoid Cost::Loose() { 972203954Srdivacky NumRegs = ~0u; 973203954Srdivacky AddRecCost = ~0u; 974203954Srdivacky NumIVMuls = ~0u; 975203954Srdivacky NumBaseAdds = ~0u; 976203954Srdivacky ImmCost = ~0u; 977203954Srdivacky SetupCost = ~0u; 978263509Sdim ScaleCost = ~0u; 979203954Srdivacky} 980193323Sed 981203954Srdivacky/// operator< - Choose the lower cost. 982203954Srdivackybool Cost::operator<(const Cost &Other) const { 983203954Srdivacky if (NumRegs != Other.NumRegs) 984203954Srdivacky return NumRegs < Other.NumRegs; 985203954Srdivacky if (AddRecCost != Other.AddRecCost) 986203954Srdivacky return AddRecCost < Other.AddRecCost; 987203954Srdivacky if (NumIVMuls != Other.NumIVMuls) 988203954Srdivacky return NumIVMuls < Other.NumIVMuls; 989203954Srdivacky if (NumBaseAdds != Other.NumBaseAdds) 990203954Srdivacky return NumBaseAdds < Other.NumBaseAdds; 991263509Sdim if (ScaleCost != Other.ScaleCost) 992263509Sdim return ScaleCost < Other.ScaleCost; 993203954Srdivacky if (ImmCost != Other.ImmCost) 994203954Srdivacky return ImmCost < Other.ImmCost; 995203954Srdivacky if (SetupCost != Other.SetupCost) 996203954Srdivacky return SetupCost < Other.SetupCost; 997193323Sed return false; 998193323Sed} 999193323Sed 1000203954Srdivackyvoid Cost::print(raw_ostream &OS) const { 1001203954Srdivacky OS << NumRegs << " reg" << (NumRegs == 1 ? "" : "s"); 1002203954Srdivacky if (AddRecCost != 0) 1003203954Srdivacky OS << ", with addrec cost " << AddRecCost; 1004203954Srdivacky if (NumIVMuls != 0) 1005203954Srdivacky OS << ", plus " << NumIVMuls << " IV mul" << (NumIVMuls == 1 ? "" : "s"); 1006203954Srdivacky if (NumBaseAdds != 0) 1007203954Srdivacky OS << ", plus " << NumBaseAdds << " base add" 1008203954Srdivacky << (NumBaseAdds == 1 ? "" : "s"); 1009263509Sdim if (ScaleCost != 0) 1010263509Sdim OS << ", plus " << ScaleCost << " scale cost"; 1011203954Srdivacky if (ImmCost != 0) 1012203954Srdivacky OS << ", plus " << ImmCost << " imm cost"; 1013203954Srdivacky if (SetupCost != 0) 1014203954Srdivacky OS << ", plus " << SetupCost << " setup cost"; 1015203954Srdivacky} 1016199481Srdivacky 1017245431Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1018203954Srdivackyvoid Cost::dump() const { 1019203954Srdivacky print(errs()); errs() << '\n'; 1020203954Srdivacky} 1021245431Sdim#endif 1022199481Srdivacky 1023203954Srdivackynamespace { 1024193323Sed 1025203954Srdivacky/// LSRFixup - An operand value in an instruction which is to be replaced 1026203954Srdivacky/// with some equivalent, possibly strength-reduced, replacement. 1027203954Srdivackystruct LSRFixup { 1028203954Srdivacky /// UserInst - The instruction which will be updated. 1029203954Srdivacky Instruction *UserInst; 1030199481Srdivacky 1031203954Srdivacky /// OperandValToReplace - The operand of the instruction which will 1032203954Srdivacky /// be replaced. The operand may be used more than once; every instance 1033203954Srdivacky /// will be replaced. 1034203954Srdivacky Value *OperandValToReplace; 1035193323Sed 1036207618Srdivacky /// PostIncLoops - If this user is to use the post-incremented value of an 1037203954Srdivacky /// induction variable, this variable is non-null and holds the loop 1038203954Srdivacky /// associated with the induction variable. 1039207618Srdivacky PostIncLoopSet PostIncLoops; 1040193323Sed 1041203954Srdivacky /// LUIdx - The index of the LSRUse describing the expression which 1042203954Srdivacky /// this fixup needs, minus an offset (below). 1043203954Srdivacky size_t LUIdx; 1044199481Srdivacky 1045203954Srdivacky /// Offset - A constant offset to be added to the LSRUse expression. 1046203954Srdivacky /// This allows multiple fixups to share the same LSRUse with different 1047203954Srdivacky /// offsets, for example in an unrolled loop. 1048203954Srdivacky int64_t Offset; 1049199481Srdivacky 1050207618Srdivacky bool isUseFullyOutsideLoop(const Loop *L) const; 1051207618Srdivacky 1052203954Srdivacky LSRFixup(); 1053193323Sed 1054203954Srdivacky void print(raw_ostream &OS) const; 1055203954Srdivacky void dump() const; 1056203954Srdivacky}; 1057199481Srdivacky 1058203954Srdivacky} 1059193323Sed 1060203954SrdivackyLSRFixup::LSRFixup() 1061208599Srdivacky : UserInst(0), OperandValToReplace(0), LUIdx(~size_t(0)), Offset(0) {} 1062199481Srdivacky 1063207618Srdivacky/// isUseFullyOutsideLoop - Test whether this fixup always uses its 1064207618Srdivacky/// value outside of the given loop. 1065207618Srdivackybool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const { 1066207618Srdivacky // PHI nodes use their value in their incoming blocks. 1067207618Srdivacky if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) { 1068207618Srdivacky for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 1069207618Srdivacky if (PN->getIncomingValue(i) == OperandValToReplace && 1070207618Srdivacky L->contains(PN->getIncomingBlock(i))) 1071207618Srdivacky return false; 1072207618Srdivacky return true; 1073207618Srdivacky } 1074207618Srdivacky 1075207618Srdivacky return !L->contains(UserInst); 1076207618Srdivacky} 1077207618Srdivacky 1078203954Srdivackyvoid LSRFixup::print(raw_ostream &OS) const { 1079203954Srdivacky OS << "UserInst="; 1080203954Srdivacky // Store is common and interesting enough to be worth special-casing. 1081203954Srdivacky if (StoreInst *Store = dyn_cast<StoreInst>(UserInst)) { 1082203954Srdivacky OS << "store "; 1083203954Srdivacky WriteAsOperand(OS, Store->getOperand(0), /*PrintType=*/false); 1084203954Srdivacky } else if (UserInst->getType()->isVoidTy()) 1085203954Srdivacky OS << UserInst->getOpcodeName(); 1086203954Srdivacky else 1087203954Srdivacky WriteAsOperand(OS, UserInst, /*PrintType=*/false); 1088199481Srdivacky 1089203954Srdivacky OS << ", OperandValToReplace="; 1090203954Srdivacky WriteAsOperand(OS, OperandValToReplace, /*PrintType=*/false); 1091193323Sed 1092207618Srdivacky for (PostIncLoopSet::const_iterator I = PostIncLoops.begin(), 1093207618Srdivacky E = PostIncLoops.end(); I != E; ++I) { 1094203954Srdivacky OS << ", PostIncLoop="; 1095207618Srdivacky WriteAsOperand(OS, (*I)->getHeader(), /*PrintType=*/false); 1096193323Sed } 1097193323Sed 1098203954Srdivacky if (LUIdx != ~size_t(0)) 1099203954Srdivacky OS << ", LUIdx=" << LUIdx; 1100203954Srdivacky 1101203954Srdivacky if (Offset != 0) 1102203954Srdivacky OS << ", Offset=" << Offset; 1103193323Sed} 1104193323Sed 1105245431Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1106203954Srdivackyvoid LSRFixup::dump() const { 1107203954Srdivacky print(errs()); errs() << '\n'; 1108193323Sed} 1109245431Sdim#endif 1110193323Sed 1111203954Srdivackynamespace { 1112193323Sed 1113203954Srdivacky/// UniquifierDenseMapInfo - A DenseMapInfo implementation for holding 1114203954Srdivacky/// DenseMaps and DenseSets of sorted SmallVectors of const SCEV*. 1115203954Srdivackystruct UniquifierDenseMapInfo { 1116252723Sdim static SmallVector<const SCEV *, 4> getEmptyKey() { 1117252723Sdim SmallVector<const SCEV *, 4> V; 1118203954Srdivacky V.push_back(reinterpret_cast<const SCEV *>(-1)); 1119203954Srdivacky return V; 1120203954Srdivacky } 1121199481Srdivacky 1122252723Sdim static SmallVector<const SCEV *, 4> getTombstoneKey() { 1123252723Sdim SmallVector<const SCEV *, 4> V; 1124203954Srdivacky V.push_back(reinterpret_cast<const SCEV *>(-2)); 1125203954Srdivacky return V; 1126193323Sed } 1127193323Sed 1128252723Sdim static unsigned getHashValue(const SmallVector<const SCEV *, 4> &V) { 1129203954Srdivacky unsigned Result = 0; 1130203954Srdivacky for (SmallVectorImpl<const SCEV *>::const_iterator I = V.begin(), 1131203954Srdivacky E = V.end(); I != E; ++I) 1132203954Srdivacky Result ^= DenseMapInfo<const SCEV *>::getHashValue(*I); 1133193323Sed return Result; 1134193323Sed } 1135193323Sed 1136252723Sdim static bool isEqual(const SmallVector<const SCEV *, 4> &LHS, 1137252723Sdim const SmallVector<const SCEV *, 4> &RHS) { 1138203954Srdivacky return LHS == RHS; 1139203954Srdivacky } 1140203954Srdivacky}; 1141199481Srdivacky 1142203954Srdivacky/// LSRUse - This class holds the state that LSR keeps for each use in 1143203954Srdivacky/// IVUsers, as well as uses invented by LSR itself. It includes information 1144203954Srdivacky/// about what kinds of things can be folded into the user, information about 1145203954Srdivacky/// the user itself, and information about how the use may be satisfied. 1146203954Srdivacky/// TODO: Represent multiple users of the same expression in common? 1147203954Srdivackyclass LSRUse { 1148252723Sdim DenseSet<SmallVector<const SCEV *, 4>, UniquifierDenseMapInfo> Uniquifier; 1149193323Sed 1150203954Srdivackypublic: 1151203954Srdivacky /// KindType - An enum for a kind of use, indicating what types of 1152203954Srdivacky /// scaled and immediate operands it might support. 1153203954Srdivacky enum KindType { 1154203954Srdivacky Basic, ///< A normal use, with no folding. 1155203954Srdivacky Special, ///< A special case of basic, allowing -1 scales. 1156203954Srdivacky Address, ///< An address use; folding according to TargetLowering 1157203954Srdivacky ICmpZero ///< An equality icmp with both operands folded into one. 1158203954Srdivacky // TODO: Add a generic icmp too? 1159203954Srdivacky }; 1160199481Srdivacky 1161203954Srdivacky KindType Kind; 1162226890Sdim Type *AccessTy; 1163193323Sed 1164203954Srdivacky SmallVector<int64_t, 8> Offsets; 1165203954Srdivacky int64_t MinOffset; 1166203954Srdivacky int64_t MaxOffset; 1167193323Sed 1168203954Srdivacky /// AllFixupsOutsideLoop - This records whether all of the fixups using this 1169203954Srdivacky /// LSRUse are outside of the loop, in which case some special-case heuristics 1170203954Srdivacky /// may be used. 1171203954Srdivacky bool AllFixupsOutsideLoop; 1172193323Sed 1173263509Sdim /// RigidFormula is set to true to guarantee that this use will be associated 1174263509Sdim /// with a single formula--the one that initially matched. Some SCEV 1175263509Sdim /// expressions cannot be expanded. This allows LSR to consider the registers 1176263509Sdim /// used by those expressions without the need to expand them later after 1177263509Sdim /// changing the formula. 1178263509Sdim bool RigidFormula; 1179263509Sdim 1180212904Sdim /// WidestFixupType - This records the widest use type for any fixup using 1181212904Sdim /// this LSRUse. FindUseWithSimilarFormula can't consider uses with different 1182212904Sdim /// max fixup widths to be equivalent, because the narrower one may be relying 1183212904Sdim /// on the implicit truncation to truncate away bogus bits. 1184226890Sdim Type *WidestFixupType; 1185212904Sdim 1186203954Srdivacky /// Formulae - A list of ways to build a value that can satisfy this user. 1187203954Srdivacky /// After the list is populated, one of these is selected heuristically and 1188203954Srdivacky /// used to formulate a replacement for OperandValToReplace in UserInst. 1189203954Srdivacky SmallVector<Formula, 12> Formulae; 1190193323Sed 1191203954Srdivacky /// Regs - The set of register candidates used by all formulae in this LSRUse. 1192203954Srdivacky SmallPtrSet<const SCEV *, 4> Regs; 1193193323Sed 1194226890Sdim LSRUse(KindType K, Type *T) : Kind(K), AccessTy(T), 1195203954Srdivacky MinOffset(INT64_MAX), 1196203954Srdivacky MaxOffset(INT64_MIN), 1197212904Sdim AllFixupsOutsideLoop(true), 1198263509Sdim RigidFormula(false), 1199212904Sdim WidestFixupType(0) {} 1200199481Srdivacky 1201208599Srdivacky bool HasFormulaWithSameRegs(const Formula &F) const; 1202204642Srdivacky bool InsertFormula(const Formula &F); 1203208599Srdivacky void DeleteFormula(Formula &F); 1204208599Srdivacky void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses); 1205193323Sed 1206203954Srdivacky void print(raw_ostream &OS) const; 1207203954Srdivacky void dump() const; 1208203954Srdivacky}; 1209193323Sed 1210210299Sed} 1211210299Sed 1212208599Srdivacky/// HasFormula - Test whether this use as a formula which has the same 1213208599Srdivacky/// registers as the given formula. 1214208599Srdivackybool LSRUse::HasFormulaWithSameRegs(const Formula &F) const { 1215252723Sdim SmallVector<const SCEV *, 4> Key = F.BaseRegs; 1216208599Srdivacky if (F.ScaledReg) Key.push_back(F.ScaledReg); 1217208599Srdivacky // Unstable sort by host order ok, because this is only used for uniquifying. 1218208599Srdivacky std::sort(Key.begin(), Key.end()); 1219208599Srdivacky return Uniquifier.count(Key); 1220208599Srdivacky} 1221208599Srdivacky 1222203954Srdivacky/// InsertFormula - If the given formula has not yet been inserted, add it to 1223203954Srdivacky/// the list, and return true. Return false otherwise. 1224204642Srdivackybool LSRUse::InsertFormula(const Formula &F) { 1225263509Sdim if (!Formulae.empty() && RigidFormula) 1226263509Sdim return false; 1227263509Sdim 1228252723Sdim SmallVector<const SCEV *, 4> Key = F.BaseRegs; 1229203954Srdivacky if (F.ScaledReg) Key.push_back(F.ScaledReg); 1230203954Srdivacky // Unstable sort by host order ok, because this is only used for uniquifying. 1231203954Srdivacky std::sort(Key.begin(), Key.end()); 1232199481Srdivacky 1233203954Srdivacky if (!Uniquifier.insert(Key).second) 1234203954Srdivacky return false; 1235199481Srdivacky 1236203954Srdivacky // Using a register to hold the value of 0 is not profitable. 1237203954Srdivacky assert((!F.ScaledReg || !F.ScaledReg->isZero()) && 1238203954Srdivacky "Zero allocated in a scaled register!"); 1239203954Srdivacky#ifndef NDEBUG 1240203954Srdivacky for (SmallVectorImpl<const SCEV *>::const_iterator I = 1241203954Srdivacky F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) 1242203954Srdivacky assert(!(*I)->isZero() && "Zero allocated in a base register!"); 1243203954Srdivacky#endif 1244193323Sed 1245203954Srdivacky // Add the formula to the list. 1246203954Srdivacky Formulae.push_back(F); 1247193323Sed 1248203954Srdivacky // Record registers now being used by this use. 1249203954Srdivacky Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end()); 1250199481Srdivacky 1251193323Sed return true; 1252193323Sed} 1253193323Sed 1254208599Srdivacky/// DeleteFormula - Remove the given formula from this use's list. 1255208599Srdivackyvoid LSRUse::DeleteFormula(Formula &F) { 1256208599Srdivacky if (&F != &Formulae.back()) 1257208599Srdivacky std::swap(F, Formulae.back()); 1258208599Srdivacky Formulae.pop_back(); 1259208599Srdivacky} 1260208599Srdivacky 1261208599Srdivacky/// RecomputeRegs - Recompute the Regs field, and update RegUses. 1262208599Srdivackyvoid LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) { 1263208599Srdivacky // Now that we've filtered out some formulae, recompute the Regs set. 1264208599Srdivacky SmallPtrSet<const SCEV *, 4> OldRegs = Regs; 1265208599Srdivacky Regs.clear(); 1266208599Srdivacky for (SmallVectorImpl<Formula>::const_iterator I = Formulae.begin(), 1267208599Srdivacky E = Formulae.end(); I != E; ++I) { 1268208599Srdivacky const Formula &F = *I; 1269208599Srdivacky if (F.ScaledReg) Regs.insert(F.ScaledReg); 1270208599Srdivacky Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end()); 1271208599Srdivacky } 1272208599Srdivacky 1273208599Srdivacky // Update the RegTracker. 1274208599Srdivacky for (SmallPtrSet<const SCEV *, 4>::iterator I = OldRegs.begin(), 1275208599Srdivacky E = OldRegs.end(); I != E; ++I) 1276208599Srdivacky if (!Regs.count(*I)) 1277208599Srdivacky RegUses.DropRegister(*I, LUIdx); 1278208599Srdivacky} 1279208599Srdivacky 1280203954Srdivackyvoid LSRUse::print(raw_ostream &OS) const { 1281203954Srdivacky OS << "LSR Use: Kind="; 1282203954Srdivacky switch (Kind) { 1283203954Srdivacky case Basic: OS << "Basic"; break; 1284203954Srdivacky case Special: OS << "Special"; break; 1285203954Srdivacky case ICmpZero: OS << "ICmpZero"; break; 1286203954Srdivacky case Address: 1287203954Srdivacky OS << "Address of "; 1288204642Srdivacky if (AccessTy->isPointerTy()) 1289203954Srdivacky OS << "pointer"; // the full pointer type could be really verbose 1290203954Srdivacky else 1291203954Srdivacky OS << *AccessTy; 1292193323Sed } 1293193323Sed 1294203954Srdivacky OS << ", Offsets={"; 1295203954Srdivacky for (SmallVectorImpl<int64_t>::const_iterator I = Offsets.begin(), 1296203954Srdivacky E = Offsets.end(); I != E; ++I) { 1297203954Srdivacky OS << *I; 1298212904Sdim if (llvm::next(I) != E) 1299203954Srdivacky OS << ','; 1300193323Sed } 1301203954Srdivacky OS << '}'; 1302193323Sed 1303203954Srdivacky if (AllFixupsOutsideLoop) 1304203954Srdivacky OS << ", all-fixups-outside-loop"; 1305212904Sdim 1306212904Sdim if (WidestFixupType) 1307212904Sdim OS << ", widest fixup type: " << *WidestFixupType; 1308193323Sed} 1309193323Sed 1310245431Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1311203954Srdivackyvoid LSRUse::dump() const { 1312203954Srdivacky print(errs()); errs() << '\n'; 1313193323Sed} 1314245431Sdim#endif 1315193323Sed 1316203954Srdivacky/// isLegalUse - Test whether the use described by AM is "legal", meaning it can 1317203954Srdivacky/// be completely folded into the user instruction at isel time. This includes 1318203954Srdivacky/// address-mode folding and special icmp tricks. 1319252723Sdimstatic bool isLegalUse(const TargetTransformInfo &TTI, LSRUse::KindType Kind, 1320252723Sdim Type *AccessTy, GlobalValue *BaseGV, int64_t BaseOffset, 1321252723Sdim bool HasBaseReg, int64_t Scale) { 1322203954Srdivacky switch (Kind) { 1323203954Srdivacky case LSRUse::Address: 1324252723Sdim return TTI.isLegalAddressingMode(AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale); 1325193323Sed 1326203954Srdivacky // Otherwise, just guess that reg+reg addressing is legal. 1327252723Sdim //return ; 1328193323Sed 1329203954Srdivacky case LSRUse::ICmpZero: 1330203954Srdivacky // There's not even a target hook for querying whether it would be legal to 1331203954Srdivacky // fold a GV into an ICmp. 1332252723Sdim if (BaseGV) 1333203954Srdivacky return false; 1334193323Sed 1335203954Srdivacky // ICmp only has two operands; don't allow more than two non-trivial parts. 1336252723Sdim if (Scale != 0 && HasBaseReg && BaseOffset != 0) 1337203954Srdivacky return false; 1338193323Sed 1339203954Srdivacky // ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by 1340203954Srdivacky // putting the scaled register in the other operand of the icmp. 1341252723Sdim if (Scale != 0 && Scale != -1) 1342203954Srdivacky return false; 1343193323Sed 1344203954Srdivacky // If we have low-level target information, ask the target if it can fold an 1345203954Srdivacky // integer immediate on an icmp. 1346252723Sdim if (BaseOffset != 0) { 1347235633Sdim // We have one of: 1348252723Sdim // ICmpZero BaseReg + BaseOffset => ICmp BaseReg, -BaseOffset 1349252723Sdim // ICmpZero -1*ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset 1350235633Sdim // Offs is the ICmp immediate. 1351252723Sdim if (Scale == 0) 1352252723Sdim // The cast does the right thing with INT64_MIN. 1353252723Sdim BaseOffset = -(uint64_t)BaseOffset; 1354252723Sdim return TTI.isLegalICmpImmediate(BaseOffset); 1355203954Srdivacky } 1356193323Sed 1357235633Sdim // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg 1358203954Srdivacky return true; 1359199481Srdivacky 1360203954Srdivacky case LSRUse::Basic: 1361203954Srdivacky // Only handle single-register values. 1362252723Sdim return !BaseGV && Scale == 0 && BaseOffset == 0; 1363199481Srdivacky 1364203954Srdivacky case LSRUse::Special: 1365245431Sdim // Special case Basic to handle -1 scales. 1366252723Sdim return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset == 0; 1367193323Sed } 1368193323Sed 1369235633Sdim llvm_unreachable("Invalid LSRUse Kind!"); 1370193323Sed} 1371193323Sed 1372252723Sdimstatic bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, 1373252723Sdim int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy, 1374252723Sdim GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, 1375252723Sdim int64_t Scale) { 1376203954Srdivacky // Check for overflow. 1377252723Sdim if (((int64_t)((uint64_t)BaseOffset + MinOffset) > BaseOffset) != 1378203954Srdivacky (MinOffset > 0)) 1379193323Sed return false; 1380252723Sdim MinOffset = (uint64_t)BaseOffset + MinOffset; 1381252723Sdim if (((int64_t)((uint64_t)BaseOffset + MaxOffset) > BaseOffset) != 1382252723Sdim (MaxOffset > 0)) 1383252723Sdim return false; 1384252723Sdim MaxOffset = (uint64_t)BaseOffset + MaxOffset; 1385252723Sdim 1386252723Sdim return isLegalUse(TTI, Kind, AccessTy, BaseGV, MinOffset, HasBaseReg, 1387252723Sdim Scale) && 1388252723Sdim isLegalUse(TTI, Kind, AccessTy, BaseGV, MaxOffset, HasBaseReg, Scale); 1389203954Srdivacky} 1390193323Sed 1391252723Sdimstatic bool isLegalUse(const TargetTransformInfo &TTI, int64_t MinOffset, 1392252723Sdim int64_t MaxOffset, LSRUse::KindType Kind, Type *AccessTy, 1393252723Sdim const Formula &F) { 1394252723Sdim return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV, 1395252723Sdim F.BaseOffset, F.HasBaseReg, F.Scale); 1396252723Sdim} 1397252723Sdim 1398263509Sdimstatic bool isLegal2RegAMUse(const TargetTransformInfo &TTI, const LSRUse &LU, 1399263509Sdim const Formula &F) { 1400263509Sdim // If F is used as an Addressing Mode, it may fold one Base plus one 1401263509Sdim // scaled register. If the scaled register is nil, do as if another 1402263509Sdim // element of the base regs is a 1-scaled register. 1403263509Sdim // This is possible if BaseRegs has at least 2 registers. 1404263509Sdim 1405263509Sdim // If this is not an address calculation, this is not an addressing mode 1406263509Sdim // use. 1407263509Sdim if (LU.Kind != LSRUse::Address) 1408263509Sdim return false; 1409263509Sdim 1410263509Sdim // F is already scaled. 1411263509Sdim if (F.Scale != 0) 1412263509Sdim return false; 1413263509Sdim 1414263509Sdim // We need to keep one register for the base and one to scale. 1415263509Sdim if (F.BaseRegs.size() < 2) 1416263509Sdim return false; 1417263509Sdim 1418263509Sdim return isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, 1419263509Sdim F.BaseGV, F.BaseOffset, F.HasBaseReg, 1); 1420263509Sdim } 1421263509Sdim 1422263509Sdimstatic unsigned getScalingFactorCost(const TargetTransformInfo &TTI, 1423263509Sdim const LSRUse &LU, const Formula &F) { 1424263509Sdim if (!F.Scale) 1425263509Sdim return 0; 1426263509Sdim assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, 1427263509Sdim LU.AccessTy, F) && "Illegal formula in use."); 1428263509Sdim 1429263509Sdim switch (LU.Kind) { 1430263509Sdim case LSRUse::Address: { 1431263509Sdim // Check the scaling factor cost with both the min and max offsets. 1432263509Sdim int ScaleCostMinOffset = 1433263509Sdim TTI.getScalingFactorCost(LU.AccessTy, F.BaseGV, 1434263509Sdim F.BaseOffset + LU.MinOffset, 1435263509Sdim F.HasBaseReg, F.Scale); 1436263509Sdim int ScaleCostMaxOffset = 1437263509Sdim TTI.getScalingFactorCost(LU.AccessTy, F.BaseGV, 1438263509Sdim F.BaseOffset + LU.MaxOffset, 1439263509Sdim F.HasBaseReg, F.Scale); 1440263509Sdim 1441263509Sdim assert(ScaleCostMinOffset >= 0 && ScaleCostMaxOffset >= 0 && 1442263509Sdim "Legal addressing mode has an illegal cost!"); 1443263509Sdim return std::max(ScaleCostMinOffset, ScaleCostMaxOffset); 1444263509Sdim } 1445263509Sdim case LSRUse::ICmpZero: 1446263509Sdim // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg. 1447263509Sdim // Therefore, return 0 in case F.Scale == -1. 1448263509Sdim return F.Scale != -1; 1449263509Sdim 1450263509Sdim case LSRUse::Basic: 1451263509Sdim case LSRUse::Special: 1452263509Sdim return 0; 1453263509Sdim } 1454263509Sdim 1455263509Sdim llvm_unreachable("Invalid LSRUse Kind!"); 1456263509Sdim} 1457263509Sdim 1458252723Sdimstatic bool isAlwaysFoldable(const TargetTransformInfo &TTI, 1459226890Sdim LSRUse::KindType Kind, Type *AccessTy, 1460252723Sdim GlobalValue *BaseGV, int64_t BaseOffset, 1461252723Sdim bool HasBaseReg) { 1462203954Srdivacky // Fast-path: zero is always foldable. 1463252723Sdim if (BaseOffset == 0 && !BaseGV) return true; 1464193323Sed 1465203954Srdivacky // Conservatively, create an address with an immediate and a 1466203954Srdivacky // base and a scale. 1467252723Sdim int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1; 1468193323Sed 1469208599Srdivacky // Canonicalize a scale of 1 to a base register if the formula doesn't 1470208599Srdivacky // already have a base register. 1471252723Sdim if (!HasBaseReg && Scale == 1) { 1472252723Sdim Scale = 0; 1473252723Sdim HasBaseReg = true; 1474208599Srdivacky } 1475208599Srdivacky 1476252723Sdim return isLegalUse(TTI, Kind, AccessTy, BaseGV, BaseOffset, HasBaseReg, Scale); 1477193323Sed} 1478193323Sed 1479252723Sdimstatic bool isAlwaysFoldable(const TargetTransformInfo &TTI, 1480252723Sdim ScalarEvolution &SE, int64_t MinOffset, 1481252723Sdim int64_t MaxOffset, LSRUse::KindType Kind, 1482252723Sdim Type *AccessTy, const SCEV *S, bool HasBaseReg) { 1483203954Srdivacky // Fast-path: zero is always foldable. 1484203954Srdivacky if (S->isZero()) return true; 1485193323Sed 1486203954Srdivacky // Conservatively, create an address with an immediate and a 1487203954Srdivacky // base and a scale. 1488252723Sdim int64_t BaseOffset = ExtractImmediate(S, SE); 1489203954Srdivacky GlobalValue *BaseGV = ExtractSymbol(S, SE); 1490193323Sed 1491203954Srdivacky // If there's anything else involved, it's not foldable. 1492203954Srdivacky if (!S->isZero()) return false; 1493193323Sed 1494203954Srdivacky // Fast-path: zero is always foldable. 1495252723Sdim if (BaseOffset == 0 && !BaseGV) return true; 1496193323Sed 1497203954Srdivacky // Conservatively, create an address with an immediate and a 1498203954Srdivacky // base and a scale. 1499252723Sdim int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1; 1500193323Sed 1501252723Sdim return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV, 1502252723Sdim BaseOffset, HasBaseReg, Scale); 1503193323Sed} 1504193323Sed 1505210299Sednamespace { 1506210299Sed 1507210299Sed/// UseMapDenseMapInfo - A DenseMapInfo implementation for holding 1508210299Sed/// DenseMaps and DenseSets of pairs of const SCEV* and LSRUse::Kind. 1509210299Sedstruct UseMapDenseMapInfo { 1510210299Sed static std::pair<const SCEV *, LSRUse::KindType> getEmptyKey() { 1511210299Sed return std::make_pair(reinterpret_cast<const SCEV *>(-1), LSRUse::Basic); 1512210299Sed } 1513210299Sed 1514210299Sed static std::pair<const SCEV *, LSRUse::KindType> getTombstoneKey() { 1515210299Sed return std::make_pair(reinterpret_cast<const SCEV *>(-2), LSRUse::Basic); 1516210299Sed } 1517210299Sed 1518210299Sed static unsigned 1519210299Sed getHashValue(const std::pair<const SCEV *, LSRUse::KindType> &V) { 1520210299Sed unsigned Result = DenseMapInfo<const SCEV *>::getHashValue(V.first); 1521210299Sed Result ^= DenseMapInfo<unsigned>::getHashValue(unsigned(V.second)); 1522210299Sed return Result; 1523210299Sed } 1524210299Sed 1525210299Sed static bool isEqual(const std::pair<const SCEV *, LSRUse::KindType> &LHS, 1526210299Sed const std::pair<const SCEV *, LSRUse::KindType> &RHS) { 1527210299Sed return LHS == RHS; 1528210299Sed } 1529210299Sed}; 1530210299Sed 1531235633Sdim/// IVInc - An individual increment in a Chain of IV increments. 1532235633Sdim/// Relate an IV user to an expression that computes the IV it uses from the IV 1533235633Sdim/// used by the previous link in the Chain. 1534235633Sdim/// 1535235633Sdim/// For the head of a chain, IncExpr holds the absolute SCEV expression for the 1536235633Sdim/// original IVOperand. The head of the chain's IVOperand is only valid during 1537235633Sdim/// chain collection, before LSR replaces IV users. During chain generation, 1538235633Sdim/// IncExpr can be used to find the new IVOperand that computes the same 1539235633Sdim/// expression. 1540235633Sdimstruct IVInc { 1541235633Sdim Instruction *UserInst; 1542235633Sdim Value* IVOperand; 1543235633Sdim const SCEV *IncExpr; 1544235633Sdim 1545235633Sdim IVInc(Instruction *U, Value *O, const SCEV *E): 1546235633Sdim UserInst(U), IVOperand(O), IncExpr(E) {} 1547235633Sdim}; 1548235633Sdim 1549235633Sdim// IVChain - The list of IV increments in program order. 1550235633Sdim// We typically add the head of a chain without finding subsequent links. 1551245431Sdimstruct IVChain { 1552245431Sdim SmallVector<IVInc,1> Incs; 1553245431Sdim const SCEV *ExprBase; 1554235633Sdim 1555245431Sdim IVChain() : ExprBase(0) {} 1556245431Sdim 1557245431Sdim IVChain(const IVInc &Head, const SCEV *Base) 1558245431Sdim : Incs(1, Head), ExprBase(Base) {} 1559245431Sdim 1560245431Sdim typedef SmallVectorImpl<IVInc>::const_iterator const_iterator; 1561245431Sdim 1562245431Sdim // begin - return the first increment in the chain. 1563245431Sdim const_iterator begin() const { 1564245431Sdim assert(!Incs.empty()); 1565245431Sdim return llvm::next(Incs.begin()); 1566245431Sdim } 1567245431Sdim const_iterator end() const { 1568245431Sdim return Incs.end(); 1569245431Sdim } 1570245431Sdim 1571245431Sdim // hasIncs - Returns true if this chain contains any increments. 1572245431Sdim bool hasIncs() const { return Incs.size() >= 2; } 1573245431Sdim 1574245431Sdim // add - Add an IVInc to the end of this chain. 1575245431Sdim void add(const IVInc &X) { Incs.push_back(X); } 1576245431Sdim 1577245431Sdim // tailUserInst - Returns the last UserInst in the chain. 1578245431Sdim Instruction *tailUserInst() const { return Incs.back().UserInst; } 1579245431Sdim 1580245431Sdim // isProfitableIncrement - Returns true if IncExpr can be profitably added to 1581245431Sdim // this chain. 1582245431Sdim bool isProfitableIncrement(const SCEV *OperExpr, 1583245431Sdim const SCEV *IncExpr, 1584245431Sdim ScalarEvolution&); 1585245431Sdim}; 1586245431Sdim 1587235633Sdim/// ChainUsers - Helper for CollectChains to track multiple IV increment uses. 1588235633Sdim/// Distinguish between FarUsers that definitely cross IV increments and 1589235633Sdim/// NearUsers that may be used between IV increments. 1590235633Sdimstruct ChainUsers { 1591235633Sdim SmallPtrSet<Instruction*, 4> FarUsers; 1592235633Sdim SmallPtrSet<Instruction*, 4> NearUsers; 1593235633Sdim}; 1594235633Sdim 1595203954Srdivacky/// LSRInstance - This class holds state for the main loop strength reduction 1596203954Srdivacky/// logic. 1597203954Srdivackyclass LSRInstance { 1598203954Srdivacky IVUsers &IU; 1599203954Srdivacky ScalarEvolution &SE; 1600203954Srdivacky DominatorTree &DT; 1601207618Srdivacky LoopInfo &LI; 1602252723Sdim const TargetTransformInfo &TTI; 1603203954Srdivacky Loop *const L; 1604203954Srdivacky bool Changed; 1605193323Sed 1606203954Srdivacky /// IVIncInsertPos - This is the insert position that the current loop's 1607203954Srdivacky /// induction variable increment should be placed. In simple loops, this is 1608203954Srdivacky /// the latch block's terminator. But in more complicated cases, this is a 1609203954Srdivacky /// position which will dominate all the in-loop post-increment users. 1610203954Srdivacky Instruction *IVIncInsertPos; 1611193323Sed 1612203954Srdivacky /// Factors - Interesting factors between use strides. 1613203954Srdivacky SmallSetVector<int64_t, 8> Factors; 1614193323Sed 1615203954Srdivacky /// Types - Interesting use types, to facilitate truncation reuse. 1616226890Sdim SmallSetVector<Type *, 4> Types; 1617193323Sed 1618203954Srdivacky /// Fixups - The list of operands which are to be replaced. 1619203954Srdivacky SmallVector<LSRFixup, 16> Fixups; 1620193323Sed 1621203954Srdivacky /// Uses - The list of interesting uses. 1622203954Srdivacky SmallVector<LSRUse, 16> Uses; 1623193323Sed 1624203954Srdivacky /// RegUses - Track which uses use which register candidates. 1625203954Srdivacky RegUseTracker RegUses; 1626193323Sed 1627235633Sdim // Limit the number of chains to avoid quadratic behavior. We don't expect to 1628235633Sdim // have more than a few IV increment chains in a loop. Missing a Chain falls 1629235633Sdim // back to normal LSR behavior for those uses. 1630235633Sdim static const unsigned MaxChains = 8; 1631235633Sdim 1632235633Sdim /// IVChainVec - IV users can form a chain of IV increments. 1633235633Sdim SmallVector<IVChain, MaxChains> IVChainVec; 1634235633Sdim 1635235633Sdim /// IVIncSet - IV users that belong to profitable IVChains. 1636235633Sdim SmallPtrSet<Use*, MaxChains> IVIncSet; 1637235633Sdim 1638203954Srdivacky void OptimizeShadowIV(); 1639203954Srdivacky bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse); 1640203954Srdivacky ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse); 1641208599Srdivacky void OptimizeLoopTermCond(); 1642193323Sed 1643235633Sdim void ChainInstruction(Instruction *UserInst, Instruction *IVOper, 1644235633Sdim SmallVectorImpl<ChainUsers> &ChainUsersVec); 1645235633Sdim void FinalizeChain(IVChain &Chain); 1646235633Sdim void CollectChains(); 1647235633Sdim void GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, 1648235633Sdim SmallVectorImpl<WeakVH> &DeadInsts); 1649235633Sdim 1650203954Srdivacky void CollectInterestingTypesAndFactors(); 1651203954Srdivacky void CollectFixupsAndInitialFormulae(); 1652193323Sed 1653203954Srdivacky LSRFixup &getNewFixup() { 1654203954Srdivacky Fixups.push_back(LSRFixup()); 1655203954Srdivacky return Fixups.back(); 1656193323Sed } 1657193323Sed 1658203954Srdivacky // Support for sharing of LSRUses between LSRFixups. 1659210299Sed typedef DenseMap<std::pair<const SCEV *, LSRUse::KindType>, 1660210299Sed size_t, 1661210299Sed UseMapDenseMapInfo> UseMapTy; 1662203954Srdivacky UseMapTy UseMap; 1663193323Sed 1664208599Srdivacky bool reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, 1665226890Sdim LSRUse::KindType Kind, Type *AccessTy); 1666193323Sed 1667203954Srdivacky std::pair<size_t, int64_t> getUse(const SCEV *&Expr, 1668203954Srdivacky LSRUse::KindType Kind, 1669226890Sdim Type *AccessTy); 1670193323Sed 1671218893Sdim void DeleteUse(LSRUse &LU, size_t LUIdx); 1672208599Srdivacky 1673208599Srdivacky LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU); 1674208599Srdivacky 1675204642Srdivacky void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx); 1676203954Srdivacky void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx); 1677203954Srdivacky void CountRegisters(const Formula &F, size_t LUIdx); 1678203954Srdivacky bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F); 1679193323Sed 1680203954Srdivacky void CollectLoopInvariantFixupsAndFormulae(); 1681193323Sed 1682203954Srdivacky void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base, 1683203954Srdivacky unsigned Depth = 0); 1684203954Srdivacky void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base); 1685203954Srdivacky void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base); 1686203954Srdivacky void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base); 1687203954Srdivacky void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base); 1688203954Srdivacky void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base); 1689203954Srdivacky void GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base); 1690203954Srdivacky void GenerateCrossUseConstantOffsets(); 1691203954Srdivacky void GenerateAllReuseFormulae(); 1692193323Sed 1693203954Srdivacky void FilterOutUndesirableDedicatedRegisters(); 1694208599Srdivacky 1695208599Srdivacky size_t EstimateSearchSpaceComplexity() const; 1696212904Sdim void NarrowSearchSpaceByDetectingSupersets(); 1697212904Sdim void NarrowSearchSpaceByCollapsingUnrolledCode(); 1698212904Sdim void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(); 1699212904Sdim void NarrowSearchSpaceByPickingWinnerRegs(); 1700203954Srdivacky void NarrowSearchSpaceUsingHeuristics(); 1701193323Sed 1702203954Srdivacky void SolveRecurse(SmallVectorImpl<const Formula *> &Solution, 1703203954Srdivacky Cost &SolutionCost, 1704203954Srdivacky SmallVectorImpl<const Formula *> &Workspace, 1705203954Srdivacky const Cost &CurCost, 1706203954Srdivacky const SmallPtrSet<const SCEV *, 16> &CurRegs, 1707203954Srdivacky DenseSet<const SCEV *> &VisitedRegs) const; 1708203954Srdivacky void Solve(SmallVectorImpl<const Formula *> &Solution) const; 1709193323Sed 1710207618Srdivacky BasicBlock::iterator 1711207618Srdivacky HoistInsertPosition(BasicBlock::iterator IP, 1712207618Srdivacky const SmallVectorImpl<Instruction *> &Inputs) const; 1713235633Sdim BasicBlock::iterator 1714235633Sdim AdjustInsertPositionForExpand(BasicBlock::iterator IP, 1715235633Sdim const LSRFixup &LF, 1716235633Sdim const LSRUse &LU, 1717235633Sdim SCEVExpander &Rewriter) const; 1718207618Srdivacky 1719203954Srdivacky Value *Expand(const LSRFixup &LF, 1720203954Srdivacky const Formula &F, 1721204642Srdivacky BasicBlock::iterator IP, 1722203954Srdivacky SCEVExpander &Rewriter, 1723204642Srdivacky SmallVectorImpl<WeakVH> &DeadInsts) const; 1724204642Srdivacky void RewriteForPHI(PHINode *PN, const LSRFixup &LF, 1725204642Srdivacky const Formula &F, 1726204642Srdivacky SCEVExpander &Rewriter, 1727204642Srdivacky SmallVectorImpl<WeakVH> &DeadInsts, 1728204642Srdivacky Pass *P) const; 1729203954Srdivacky void Rewrite(const LSRFixup &LF, 1730203954Srdivacky const Formula &F, 1731203954Srdivacky SCEVExpander &Rewriter, 1732203954Srdivacky SmallVectorImpl<WeakVH> &DeadInsts, 1733203954Srdivacky Pass *P) const; 1734203954Srdivacky void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution, 1735203954Srdivacky Pass *P); 1736193323Sed 1737235633Sdimpublic: 1738252723Sdim LSRInstance(Loop *L, Pass *P); 1739193323Sed 1740203954Srdivacky bool getChanged() const { return Changed; } 1741193323Sed 1742203954Srdivacky void print_factors_and_types(raw_ostream &OS) const; 1743203954Srdivacky void print_fixups(raw_ostream &OS) const; 1744203954Srdivacky void print_uses(raw_ostream &OS) const; 1745203954Srdivacky void print(raw_ostream &OS) const; 1746203954Srdivacky void dump() const; 1747203954Srdivacky}; 1748193323Sed 1749203954Srdivacky} 1750193323Sed 1751203954Srdivacky/// OptimizeShadowIV - If IV is used in a int-to-float cast 1752204642Srdivacky/// inside the loop then try to eliminate the cast operation. 1753203954Srdivackyvoid LSRInstance::OptimizeShadowIV() { 1754203954Srdivacky const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L); 1755203954Srdivacky if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) 1756203954Srdivacky return; 1757193323Sed 1758203954Srdivacky for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); 1759203954Srdivacky UI != E; /* empty */) { 1760203954Srdivacky IVUsers::const_iterator CandidateUI = UI; 1761203954Srdivacky ++UI; 1762203954Srdivacky Instruction *ShadowUse = CandidateUI->getUser(); 1763263509Sdim Type *DestTy = 0; 1764226890Sdim bool IsSigned = false; 1765193323Sed 1766203954Srdivacky /* If shadow use is a int->float cast then insert a second IV 1767203954Srdivacky to eliminate this cast. 1768193323Sed 1769203954Srdivacky for (unsigned i = 0; i < n; ++i) 1770203954Srdivacky foo((double)i); 1771193323Sed 1772203954Srdivacky is transformed into 1773193323Sed 1774203954Srdivacky double d = 0.0; 1775203954Srdivacky for (unsigned i = 0; i < n; ++i, ++d) 1776203954Srdivacky foo(d); 1777203954Srdivacky */ 1778226890Sdim if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) { 1779226890Sdim IsSigned = false; 1780203954Srdivacky DestTy = UCast->getDestTy(); 1781226890Sdim } 1782226890Sdim else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) { 1783226890Sdim IsSigned = true; 1784203954Srdivacky DestTy = SCast->getDestTy(); 1785226890Sdim } 1786203954Srdivacky if (!DestTy) continue; 1787193323Sed 1788252723Sdim // If target does not support DestTy natively then do not apply 1789252723Sdim // this transformation. 1790252723Sdim if (!TTI.isTypeLegal(DestTy)) continue; 1791193323Sed 1792203954Srdivacky PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0)); 1793203954Srdivacky if (!PH) continue; 1794203954Srdivacky if (PH->getNumIncomingValues() != 2) continue; 1795193323Sed 1796226890Sdim Type *SrcTy = PH->getType(); 1797203954Srdivacky int Mantissa = DestTy->getFPMantissaWidth(); 1798203954Srdivacky if (Mantissa == -1) continue; 1799203954Srdivacky if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa) 1800203954Srdivacky continue; 1801193323Sed 1802203954Srdivacky unsigned Entry, Latch; 1803203954Srdivacky if (PH->getIncomingBlock(0) == L->getLoopPreheader()) { 1804203954Srdivacky Entry = 0; 1805203954Srdivacky Latch = 1; 1806203954Srdivacky } else { 1807203954Srdivacky Entry = 1; 1808203954Srdivacky Latch = 0; 1809203954Srdivacky } 1810193323Sed 1811203954Srdivacky ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry)); 1812203954Srdivacky if (!Init) continue; 1813226890Sdim Constant *NewInit = ConstantFP::get(DestTy, IsSigned ? 1814226890Sdim (double)Init->getSExtValue() : 1815226890Sdim (double)Init->getZExtValue()); 1816193323Sed 1817203954Srdivacky BinaryOperator *Incr = 1818203954Srdivacky dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch)); 1819203954Srdivacky if (!Incr) continue; 1820203954Srdivacky if (Incr->getOpcode() != Instruction::Add 1821203954Srdivacky && Incr->getOpcode() != Instruction::Sub) 1822203954Srdivacky continue; 1823193323Sed 1824203954Srdivacky /* Initialize new IV, double d = 0.0 in above example. */ 1825263509Sdim ConstantInt *C = 0; 1826203954Srdivacky if (Incr->getOperand(0) == PH) 1827203954Srdivacky C = dyn_cast<ConstantInt>(Incr->getOperand(1)); 1828203954Srdivacky else if (Incr->getOperand(1) == PH) 1829203954Srdivacky C = dyn_cast<ConstantInt>(Incr->getOperand(0)); 1830203954Srdivacky else 1831203954Srdivacky continue; 1832193323Sed 1833203954Srdivacky if (!C) continue; 1834193323Sed 1835203954Srdivacky // Ignore negative constants, as the code below doesn't handle them 1836203954Srdivacky // correctly. TODO: Remove this restriction. 1837203954Srdivacky if (!C->getValue().isStrictlyPositive()) continue; 1838193323Sed 1839203954Srdivacky /* Add new PHINode. */ 1840221345Sdim PHINode *NewPH = PHINode::Create(DestTy, 2, "IV.S.", PH); 1841193323Sed 1842203954Srdivacky /* create new increment. '++d' in above example. */ 1843203954Srdivacky Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue()); 1844203954Srdivacky BinaryOperator *NewIncr = 1845203954Srdivacky BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ? 1846203954Srdivacky Instruction::FAdd : Instruction::FSub, 1847203954Srdivacky NewPH, CFP, "IV.S.next.", Incr); 1848193323Sed 1849203954Srdivacky NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry)); 1850203954Srdivacky NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch)); 1851193323Sed 1852203954Srdivacky /* Remove cast operation */ 1853203954Srdivacky ShadowUse->replaceAllUsesWith(NewPH); 1854203954Srdivacky ShadowUse->eraseFromParent(); 1855208599Srdivacky Changed = true; 1856203954Srdivacky break; 1857193323Sed } 1858193323Sed} 1859193323Sed 1860193323Sed/// FindIVUserForCond - If Cond has an operand that is an expression of an IV, 1861193323Sed/// set the IV user and stride information and return true, otherwise return 1862193323Sed/// false. 1863208599Srdivackybool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) { 1864203954Srdivacky for (IVUsers::iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) 1865203954Srdivacky if (UI->getUser() == Cond) { 1866203954Srdivacky // NOTE: we could handle setcc instructions with multiple uses here, but 1867203954Srdivacky // InstCombine does it as well for simple uses, it's not clear that it 1868203954Srdivacky // occurs enough in real life to handle. 1869203954Srdivacky CondUse = UI; 1870203954Srdivacky return true; 1871203954Srdivacky } 1872193323Sed return false; 1873199481Srdivacky} 1874193323Sed 1875194612Sed/// OptimizeMax - Rewrite the loop's terminating condition if it uses 1876194612Sed/// a max computation. 1877193323Sed/// 1878193323Sed/// This is a narrow solution to a specific, but acute, problem. For loops 1879193323Sed/// like this: 1880193323Sed/// 1881193323Sed/// i = 0; 1882193323Sed/// do { 1883193323Sed/// p[i] = 0.0; 1884193323Sed/// } while (++i < n); 1885193323Sed/// 1886194612Sed/// the trip count isn't just 'n', because 'n' might not be positive. And 1887194612Sed/// unfortunately this can come up even for loops where the user didn't use 1888194612Sed/// a C do-while loop. For example, seemingly well-behaved top-test loops 1889194612Sed/// will commonly be lowered like this: 1890193323Sed// 1891193323Sed/// if (n > 0) { 1892193323Sed/// i = 0; 1893193323Sed/// do { 1894193323Sed/// p[i] = 0.0; 1895193323Sed/// } while (++i < n); 1896193323Sed/// } 1897193323Sed/// 1898193323Sed/// and then it's possible for subsequent optimization to obscure the if 1899193323Sed/// test in such a way that indvars can't find it. 1900193323Sed/// 1901193323Sed/// When indvars can't find the if test in loops like this, it creates a 1902194612Sed/// max expression, which allows it to give the loop a canonical 1903193323Sed/// induction variable: 1904193323Sed/// 1905193323Sed/// i = 0; 1906194612Sed/// max = n < 1 ? 1 : n; 1907193323Sed/// do { 1908193323Sed/// p[i] = 0.0; 1909194612Sed/// } while (++i != max); 1910193323Sed/// 1911193323Sed/// Canonical induction variables are necessary because the loop passes 1912193323Sed/// are designed around them. The most obvious example of this is the 1913193323Sed/// LoopInfo analysis, which doesn't remember trip count values. It 1914193323Sed/// expects to be able to rediscover the trip count each time it is 1915203954Srdivacky/// needed, and it does this using a simple analysis that only succeeds if 1916193323Sed/// the loop has a canonical induction variable. 1917193323Sed/// 1918193323Sed/// However, when it comes time to generate code, the maximum operation 1919193323Sed/// can be quite costly, especially if it's inside of an outer loop. 1920193323Sed/// 1921193323Sed/// This function solves this problem by detecting this type of loop and 1922193323Sed/// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting 1923193323Sed/// the instructions for the maximum computation. 1924193323Sed/// 1925203954SrdivackyICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) { 1926193323Sed // Check that the loop matches the pattern we're looking for. 1927193323Sed if (Cond->getPredicate() != CmpInst::ICMP_EQ && 1928193323Sed Cond->getPredicate() != CmpInst::ICMP_NE) 1929193323Sed return Cond; 1930193323Sed 1931193323Sed SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1)); 1932193323Sed if (!Sel || !Sel->hasOneUse()) return Cond; 1933193323Sed 1934203954Srdivacky const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L); 1935193323Sed if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) 1936193323Sed return Cond; 1937207618Srdivacky const SCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1); 1938193323Sed 1939193323Sed // Add one to the backedge-taken count to get the trip count. 1940212904Sdim const SCEV *IterationCount = SE.getAddExpr(One, BackedgeTakenCount); 1941207618Srdivacky if (IterationCount != SE.getSCEV(Sel)) return Cond; 1942193323Sed 1943207618Srdivacky // Check for a max calculation that matches the pattern. There's no check 1944207618Srdivacky // for ICMP_ULE here because the comparison would be with zero, which 1945207618Srdivacky // isn't interesting. 1946207618Srdivacky CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; 1947207618Srdivacky const SCEVNAryExpr *Max = 0; 1948207618Srdivacky if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) { 1949207618Srdivacky Pred = ICmpInst::ICMP_SLE; 1950207618Srdivacky Max = S; 1951207618Srdivacky } else if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) { 1952207618Srdivacky Pred = ICmpInst::ICMP_SLT; 1953207618Srdivacky Max = S; 1954207618Srdivacky } else if (const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) { 1955207618Srdivacky Pred = ICmpInst::ICMP_ULT; 1956207618Srdivacky Max = U; 1957207618Srdivacky } else { 1958207618Srdivacky // No match; bail. 1959194612Sed return Cond; 1960207618Srdivacky } 1961193323Sed 1962194612Sed // To handle a max with more than two operands, this optimization would 1963194612Sed // require additional checking and setup. 1964194612Sed if (Max->getNumOperands() != 2) 1965194612Sed return Cond; 1966193323Sed 1967198090Srdivacky const SCEV *MaxLHS = Max->getOperand(0); 1968198090Srdivacky const SCEV *MaxRHS = Max->getOperand(1); 1969207618Srdivacky 1970207618Srdivacky // ScalarEvolution canonicalizes constants to the left. For < and >, look 1971207618Srdivacky // for a comparison with 1. For <= and >=, a comparison with zero. 1972207618Srdivacky if (!MaxLHS || 1973207618Srdivacky (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->isZero() : (MaxLHS != One))) 1974207618Srdivacky return Cond; 1975207618Srdivacky 1976193323Sed // Check the relevant induction variable for conformance to 1977193323Sed // the pattern. 1978203954Srdivacky const SCEV *IV = SE.getSCEV(Cond->getOperand(0)); 1979193323Sed const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV); 1980193323Sed if (!AR || !AR->isAffine() || 1981193323Sed AR->getStart() != One || 1982203954Srdivacky AR->getStepRecurrence(SE) != One) 1983193323Sed return Cond; 1984193323Sed 1985193323Sed assert(AR->getLoop() == L && 1986193323Sed "Loop condition operand is an addrec in a different loop!"); 1987193323Sed 1988193323Sed // Check the right operand of the select, and remember it, as it will 1989193323Sed // be used in the new comparison instruction. 1990193323Sed Value *NewRHS = 0; 1991207618Srdivacky if (ICmpInst::isTrueWhenEqual(Pred)) { 1992207618Srdivacky // Look for n+1, and grab n. 1993207618Srdivacky if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1))) 1994252723Sdim if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1))) 1995252723Sdim if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS) 1996252723Sdim NewRHS = BO->getOperand(0); 1997207618Srdivacky if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2))) 1998252723Sdim if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1))) 1999252723Sdim if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS) 2000252723Sdim NewRHS = BO->getOperand(0); 2001207618Srdivacky if (!NewRHS) 2002207618Srdivacky return Cond; 2003207618Srdivacky } else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS) 2004193323Sed NewRHS = Sel->getOperand(1); 2005203954Srdivacky else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS) 2006193323Sed NewRHS = Sel->getOperand(2); 2007210299Sed else if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS)) 2008210299Sed NewRHS = SU->getValue(); 2009207618Srdivacky else 2010210299Sed // Max doesn't match expected pattern. 2011210299Sed return Cond; 2012193323Sed 2013194612Sed // Determine the new comparison opcode. It may be signed or unsigned, 2014194612Sed // and the original comparison may be either equality or inequality. 2015194612Sed if (Cond->getPredicate() == CmpInst::ICMP_EQ) 2016194612Sed Pred = CmpInst::getInversePredicate(Pred); 2017194612Sed 2018193323Sed // Ok, everything looks ok to change the condition into an SLT or SGE and 2019193323Sed // delete the max calculation. 2020193323Sed ICmpInst *NewCond = 2021198090Srdivacky new ICmpInst(Cond, Pred, Cond->getOperand(0), NewRHS, "scmp"); 2022193323Sed 2023193323Sed // Delete the max calculation instructions. 2024193323Sed Cond->replaceAllUsesWith(NewCond); 2025193323Sed CondUse->setUser(NewCond); 2026193323Sed Instruction *Cmp = cast<Instruction>(Sel->getOperand(0)); 2027193323Sed Cond->eraseFromParent(); 2028193323Sed Sel->eraseFromParent(); 2029193323Sed if (Cmp->use_empty()) 2030193323Sed Cmp->eraseFromParent(); 2031193323Sed return NewCond; 2032193323Sed} 2033193323Sed 2034203954Srdivacky/// OptimizeLoopTermCond - Change loop terminating condition to use the 2035203954Srdivacky/// postinc iv when possible. 2036208599Srdivackyvoid 2037203954SrdivackyLSRInstance::OptimizeLoopTermCond() { 2038203954Srdivacky SmallPtrSet<Instruction *, 4> PostIncs; 2039193323Sed 2040203954Srdivacky BasicBlock *LatchBlock = L->getLoopLatch(); 2041203954Srdivacky SmallVector<BasicBlock*, 8> ExitingBlocks; 2042203954Srdivacky L->getExitingBlocks(ExitingBlocks); 2043199481Srdivacky 2044203954Srdivacky for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { 2045203954Srdivacky BasicBlock *ExitingBlock = ExitingBlocks[i]; 2046203954Srdivacky 2047203954Srdivacky // Get the terminating condition for the loop if possible. If we 2048203954Srdivacky // can, we want to change it to use a post-incremented version of its 2049203954Srdivacky // induction variable, to allow coalescing the live ranges for the IV into 2050203954Srdivacky // one register value. 2051203954Srdivacky 2052203954Srdivacky BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); 2053203954Srdivacky if (!TermBr) 2054193323Sed continue; 2055203954Srdivacky // FIXME: Overly conservative, termination condition could be an 'or' etc.. 2056203954Srdivacky if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition())) 2057203954Srdivacky continue; 2058193323Sed 2059203954Srdivacky // Search IVUsesByStride to find Cond's IVUse if there is one. 2060203954Srdivacky IVStrideUse *CondUse = 0; 2061203954Srdivacky ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition()); 2062203954Srdivacky if (!FindIVUserForCond(Cond, CondUse)) 2063203954Srdivacky continue; 2064193323Sed 2065203954Srdivacky // If the trip count is computed in terms of a max (due to ScalarEvolution 2066203954Srdivacky // being unable to find a sufficient guard, for example), change the loop 2067203954Srdivacky // comparison to use SLT or ULT instead of NE. 2068203954Srdivacky // One consequence of doing this now is that it disrupts the count-down 2069203954Srdivacky // optimization. That's not always a bad thing though, because in such 2070203954Srdivacky // cases it may still be worthwhile to avoid a max. 2071203954Srdivacky Cond = OptimizeMax(Cond, CondUse); 2072193323Sed 2073203954Srdivacky // If this exiting block dominates the latch block, it may also use 2074203954Srdivacky // the post-inc value if it won't be shared with other uses. 2075203954Srdivacky // Check for dominance. 2076203954Srdivacky if (!DT.dominates(ExitingBlock, LatchBlock)) 2077203954Srdivacky continue; 2078193323Sed 2079203954Srdivacky // Conservatively avoid trying to use the post-inc value in non-latch 2080203954Srdivacky // exits if there may be pre-inc users in intervening blocks. 2081203954Srdivacky if (LatchBlock != ExitingBlock) 2082203954Srdivacky for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) 2083203954Srdivacky // Test if the use is reachable from the exiting block. This dominator 2084203954Srdivacky // query is a conservative approximation of reachability. 2085203954Srdivacky if (&*UI != CondUse && 2086203954Srdivacky !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) { 2087203954Srdivacky // Conservatively assume there may be reuse if the quotient of their 2088203954Srdivacky // strides could be a legal scale. 2089207618Srdivacky const SCEV *A = IU.getStride(*CondUse, L); 2090207618Srdivacky const SCEV *B = IU.getStride(*UI, L); 2091207618Srdivacky if (!A || !B) continue; 2092203954Srdivacky if (SE.getTypeSizeInBits(A->getType()) != 2093203954Srdivacky SE.getTypeSizeInBits(B->getType())) { 2094203954Srdivacky if (SE.getTypeSizeInBits(A->getType()) > 2095203954Srdivacky SE.getTypeSizeInBits(B->getType())) 2096203954Srdivacky B = SE.getSignExtendExpr(B, A->getType()); 2097203954Srdivacky else 2098203954Srdivacky A = SE.getSignExtendExpr(A, B->getType()); 2099203954Srdivacky } 2100203954Srdivacky if (const SCEVConstant *D = 2101204642Srdivacky dyn_cast_or_null<SCEVConstant>(getExactSDiv(B, A, SE))) { 2102208599Srdivacky const ConstantInt *C = D->getValue(); 2103203954Srdivacky // Stride of one or negative one can have reuse with non-addresses. 2104208599Srdivacky if (C->isOne() || C->isAllOnesValue()) 2105203954Srdivacky goto decline_post_inc; 2106203954Srdivacky // Avoid weird situations. 2107208599Srdivacky if (C->getValue().getMinSignedBits() >= 64 || 2108208599Srdivacky C->getValue().isMinSignedValue()) 2109203954Srdivacky goto decline_post_inc; 2110203954Srdivacky // Check for possible scaled-address reuse. 2111226890Sdim Type *AccessTy = getAccessType(UI->getUser()); 2112252723Sdim int64_t Scale = C->getSExtValue(); 2113252723Sdim if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0, 2114252723Sdim /*BaseOffset=*/ 0, 2115252723Sdim /*HasBaseReg=*/ false, Scale)) 2116203954Srdivacky goto decline_post_inc; 2117252723Sdim Scale = -Scale; 2118252723Sdim if (TTI.isLegalAddressingMode(AccessTy, /*BaseGV=*/ 0, 2119252723Sdim /*BaseOffset=*/ 0, 2120252723Sdim /*HasBaseReg=*/ false, Scale)) 2121203954Srdivacky goto decline_post_inc; 2122203954Srdivacky } 2123203954Srdivacky } 2124193323Sed 2125203954Srdivacky DEBUG(dbgs() << " Change loop exiting icmp to use postinc iv: " 2126203954Srdivacky << *Cond << '\n'); 2127193323Sed 2128203954Srdivacky // It's possible for the setcc instruction to be anywhere in the loop, and 2129203954Srdivacky // possible for it to have multiple users. If it is not immediately before 2130203954Srdivacky // the exiting block branch, move it. 2131203954Srdivacky if (&*++BasicBlock::iterator(Cond) != TermBr) { 2132203954Srdivacky if (Cond->hasOneUse()) { 2133203954Srdivacky Cond->moveBefore(TermBr); 2134203954Srdivacky } else { 2135203954Srdivacky // Clone the terminating condition and insert into the loopend. 2136203954Srdivacky ICmpInst *OldCond = Cond; 2137203954Srdivacky Cond = cast<ICmpInst>(Cond->clone()); 2138203954Srdivacky Cond->setName(L->getHeader()->getName() + ".termcond"); 2139203954Srdivacky ExitingBlock->getInstList().insert(TermBr, Cond); 2140203954Srdivacky 2141203954Srdivacky // Clone the IVUse, as the old use still exists! 2142224145Sdim CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace()); 2143203954Srdivacky TermBr->replaceUsesOfWith(OldCond, Cond); 2144193323Sed } 2145203954Srdivacky } 2146193323Sed 2147203954Srdivacky // If we get to here, we know that we can transform the setcc instruction to 2148203954Srdivacky // use the post-incremented version of the IV, allowing us to coalesce the 2149203954Srdivacky // live ranges for the IV correctly. 2150207618Srdivacky CondUse->transformToPostInc(L); 2151203954Srdivacky Changed = true; 2152193323Sed 2153203954Srdivacky PostIncs.insert(Cond); 2154203954Srdivacky decline_post_inc:; 2155203954Srdivacky } 2156203954Srdivacky 2157203954Srdivacky // Determine an insertion point for the loop induction variable increment. It 2158203954Srdivacky // must dominate all the post-inc comparisons we just set up, and it must 2159203954Srdivacky // dominate the loop latch edge. 2160203954Srdivacky IVIncInsertPos = L->getLoopLatch()->getTerminator(); 2161203954Srdivacky for (SmallPtrSet<Instruction *, 4>::const_iterator I = PostIncs.begin(), 2162203954Srdivacky E = PostIncs.end(); I != E; ++I) { 2163203954Srdivacky BasicBlock *BB = 2164203954Srdivacky DT.findNearestCommonDominator(IVIncInsertPos->getParent(), 2165203954Srdivacky (*I)->getParent()); 2166203954Srdivacky if (BB == (*I)->getParent()) 2167203954Srdivacky IVIncInsertPos = *I; 2168203954Srdivacky else if (BB != IVIncInsertPos->getParent()) 2169203954Srdivacky IVIncInsertPos = BB->getTerminator(); 2170203954Srdivacky } 2171203954Srdivacky} 2172203954Srdivacky 2173221345Sdim/// reconcileNewOffset - Determine if the given use can accommodate a fixup 2174208599Srdivacky/// at the given offset and other details. If so, update the use and 2175208599Srdivacky/// return true. 2176203954Srdivackybool 2177208599SrdivackyLSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset, bool HasBaseReg, 2178226890Sdim LSRUse::KindType Kind, Type *AccessTy) { 2179203954Srdivacky int64_t NewMinOffset = LU.MinOffset; 2180203954Srdivacky int64_t NewMaxOffset = LU.MaxOffset; 2181226890Sdim Type *NewAccessTy = AccessTy; 2182203954Srdivacky 2183203954Srdivacky // Check for a mismatched kind. It's tempting to collapse mismatched kinds to 2184203954Srdivacky // something conservative, however this can pessimize in the case that one of 2185203954Srdivacky // the uses will have all its uses outside the loop, for example. 2186203954Srdivacky if (LU.Kind != Kind) 2187203954Srdivacky return false; 2188203954Srdivacky // Conservatively assume HasBaseReg is true for now. 2189203954Srdivacky if (NewOffset < LU.MinOffset) { 2190252723Sdim if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0, 2191252723Sdim LU.MaxOffset - NewOffset, HasBaseReg)) 2192203954Srdivacky return false; 2193203954Srdivacky NewMinOffset = NewOffset; 2194203954Srdivacky } else if (NewOffset > LU.MaxOffset) { 2195252723Sdim if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0, 2196252723Sdim NewOffset - LU.MinOffset, HasBaseReg)) 2197203954Srdivacky return false; 2198203954Srdivacky NewMaxOffset = NewOffset; 2199203954Srdivacky } 2200203954Srdivacky // Check for a mismatched access type, and fall back conservatively as needed. 2201210299Sed // TODO: Be less conservative when the type is similar and can use the same 2202210299Sed // addressing modes. 2203203954Srdivacky if (Kind == LSRUse::Address && AccessTy != LU.AccessTy) 2204203954Srdivacky NewAccessTy = Type::getVoidTy(AccessTy->getContext()); 2205203954Srdivacky 2206203954Srdivacky // Update the use. 2207203954Srdivacky LU.MinOffset = NewMinOffset; 2208203954Srdivacky LU.MaxOffset = NewMaxOffset; 2209203954Srdivacky LU.AccessTy = NewAccessTy; 2210203954Srdivacky if (NewOffset != LU.Offsets.back()) 2211203954Srdivacky LU.Offsets.push_back(NewOffset); 2212203954Srdivacky return true; 2213203954Srdivacky} 2214203954Srdivacky 2215203954Srdivacky/// getUse - Return an LSRUse index and an offset value for a fixup which 2216203954Srdivacky/// needs the given expression, with the given kind and optional access type. 2217204642Srdivacky/// Either reuse an existing use or create a new one, as needed. 2218203954Srdivackystd::pair<size_t, int64_t> 2219203954SrdivackyLSRInstance::getUse(const SCEV *&Expr, 2220226890Sdim LSRUse::KindType Kind, Type *AccessTy) { 2221203954Srdivacky const SCEV *Copy = Expr; 2222203954Srdivacky int64_t Offset = ExtractImmediate(Expr, SE); 2223203954Srdivacky 2224203954Srdivacky // Basic uses can't accept any offset, for example. 2225252723Sdim if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ 0, 2226252723Sdim Offset, /*HasBaseReg=*/ true)) { 2227203954Srdivacky Expr = Copy; 2228203954Srdivacky Offset = 0; 2229203954Srdivacky } 2230203954Srdivacky 2231203954Srdivacky std::pair<UseMapTy::iterator, bool> P = 2232210299Sed UseMap.insert(std::make_pair(std::make_pair(Expr, Kind), 0)); 2233203954Srdivacky if (!P.second) { 2234203954Srdivacky // A use already existed with this base. 2235203954Srdivacky size_t LUIdx = P.first->second; 2236203954Srdivacky LSRUse &LU = Uses[LUIdx]; 2237208599Srdivacky if (reconcileNewOffset(LU, Offset, /*HasBaseReg=*/true, Kind, AccessTy)) 2238203954Srdivacky // Reuse this use. 2239203954Srdivacky return std::make_pair(LUIdx, Offset); 2240203954Srdivacky } 2241203954Srdivacky 2242203954Srdivacky // Create a new use. 2243203954Srdivacky size_t LUIdx = Uses.size(); 2244203954Srdivacky P.first->second = LUIdx; 2245203954Srdivacky Uses.push_back(LSRUse(Kind, AccessTy)); 2246203954Srdivacky LSRUse &LU = Uses[LUIdx]; 2247203954Srdivacky 2248203954Srdivacky // We don't need to track redundant offsets, but we don't need to go out 2249203954Srdivacky // of our way here to avoid them. 2250203954Srdivacky if (LU.Offsets.empty() || Offset != LU.Offsets.back()) 2251203954Srdivacky LU.Offsets.push_back(Offset); 2252203954Srdivacky 2253203954Srdivacky LU.MinOffset = Offset; 2254203954Srdivacky LU.MaxOffset = Offset; 2255203954Srdivacky return std::make_pair(LUIdx, Offset); 2256203954Srdivacky} 2257203954Srdivacky 2258208599Srdivacky/// DeleteUse - Delete the given use from the Uses list. 2259218893Sdimvoid LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) { 2260208599Srdivacky if (&LU != &Uses.back()) 2261208599Srdivacky std::swap(LU, Uses.back()); 2262208599Srdivacky Uses.pop_back(); 2263218893Sdim 2264218893Sdim // Update RegUses. 2265218893Sdim RegUses.SwapAndDropUse(LUIdx, Uses.size()); 2266208599Srdivacky} 2267208599Srdivacky 2268208599Srdivacky/// FindUseWithFormula - Look for a use distinct from OrigLU which is has 2269208599Srdivacky/// a formula that has the same registers as the given formula. 2270208599SrdivackyLSRUse * 2271208599SrdivackyLSRInstance::FindUseWithSimilarFormula(const Formula &OrigF, 2272208599Srdivacky const LSRUse &OrigLU) { 2273212904Sdim // Search all uses for the formula. This could be more clever. 2274208599Srdivacky for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { 2275208599Srdivacky LSRUse &LU = Uses[LUIdx]; 2276212904Sdim // Check whether this use is close enough to OrigLU, to see whether it's 2277212904Sdim // worthwhile looking through its formulae. 2278212904Sdim // Ignore ICmpZero uses because they may contain formulae generated by 2279212904Sdim // GenerateICmpZeroScales, in which case adding fixup offsets may 2280212904Sdim // be invalid. 2281208599Srdivacky if (&LU != &OrigLU && 2282208599Srdivacky LU.Kind != LSRUse::ICmpZero && 2283208599Srdivacky LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy && 2284212904Sdim LU.WidestFixupType == OrigLU.WidestFixupType && 2285208599Srdivacky LU.HasFormulaWithSameRegs(OrigF)) { 2286212904Sdim // Scan through this use's formulae. 2287208599Srdivacky for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(), 2288208599Srdivacky E = LU.Formulae.end(); I != E; ++I) { 2289208599Srdivacky const Formula &F = *I; 2290212904Sdim // Check to see if this formula has the same registers and symbols 2291212904Sdim // as OrigF. 2292208599Srdivacky if (F.BaseRegs == OrigF.BaseRegs && 2293208599Srdivacky F.ScaledReg == OrigF.ScaledReg && 2294252723Sdim F.BaseGV == OrigF.BaseGV && 2295252723Sdim F.Scale == OrigF.Scale && 2296223017Sdim F.UnfoldedOffset == OrigF.UnfoldedOffset) { 2297252723Sdim if (F.BaseOffset == 0) 2298208599Srdivacky return &LU; 2299212904Sdim // This is the formula where all the registers and symbols matched; 2300212904Sdim // there aren't going to be any others. Since we declined it, we 2301245431Sdim // can skip the rest of the formulae and proceed to the next LSRUse. 2302208599Srdivacky break; 2303208599Srdivacky } 2304208599Srdivacky } 2305208599Srdivacky } 2306208599Srdivacky } 2307208599Srdivacky 2308212904Sdim // Nothing looked good. 2309208599Srdivacky return 0; 2310208599Srdivacky} 2311208599Srdivacky 2312203954Srdivackyvoid LSRInstance::CollectInterestingTypesAndFactors() { 2313203954Srdivacky SmallSetVector<const SCEV *, 4> Strides; 2314203954Srdivacky 2315204642Srdivacky // Collect interesting types and strides. 2316207618Srdivacky SmallVector<const SCEV *, 4> Worklist; 2317203954Srdivacky for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) { 2318207618Srdivacky const SCEV *Expr = IU.getExpr(*UI); 2319203954Srdivacky 2320203954Srdivacky // Collect interesting types. 2321207618Srdivacky Types.insert(SE.getEffectiveSCEVType(Expr->getType())); 2322203954Srdivacky 2323207618Srdivacky // Add strides for mentioned loops. 2324207618Srdivacky Worklist.push_back(Expr); 2325207618Srdivacky do { 2326207618Srdivacky const SCEV *S = Worklist.pop_back_val(); 2327207618Srdivacky if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { 2328235633Sdim if (AR->getLoop() == L) 2329235633Sdim Strides.insert(AR->getStepRecurrence(SE)); 2330207618Srdivacky Worklist.push_back(AR->getStart()); 2331207618Srdivacky } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { 2332210299Sed Worklist.append(Add->op_begin(), Add->op_end()); 2333207618Srdivacky } 2334207618Srdivacky } while (!Worklist.empty()); 2335204642Srdivacky } 2336204642Srdivacky 2337204642Srdivacky // Compute interesting factors from the set of interesting strides. 2338204642Srdivacky for (SmallSetVector<const SCEV *, 4>::const_iterator 2339204642Srdivacky I = Strides.begin(), E = Strides.end(); I != E; ++I) 2340203954Srdivacky for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter = 2341212904Sdim llvm::next(I); NewStrideIter != E; ++NewStrideIter) { 2342204642Srdivacky const SCEV *OldStride = *I; 2343203954Srdivacky const SCEV *NewStride = *NewStrideIter; 2344193323Sed 2345203954Srdivacky if (SE.getTypeSizeInBits(OldStride->getType()) != 2346203954Srdivacky SE.getTypeSizeInBits(NewStride->getType())) { 2347203954Srdivacky if (SE.getTypeSizeInBits(OldStride->getType()) > 2348203954Srdivacky SE.getTypeSizeInBits(NewStride->getType())) 2349203954Srdivacky NewStride = SE.getSignExtendExpr(NewStride, OldStride->getType()); 2350203954Srdivacky else 2351203954Srdivacky OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType()); 2352193323Sed } 2353203954Srdivacky if (const SCEVConstant *Factor = 2354204642Srdivacky dyn_cast_or_null<SCEVConstant>(getExactSDiv(NewStride, OldStride, 2355204642Srdivacky SE, true))) { 2356203954Srdivacky if (Factor->getValue()->getValue().getMinSignedBits() <= 64) 2357203954Srdivacky Factors.insert(Factor->getValue()->getValue().getSExtValue()); 2358203954Srdivacky } else if (const SCEVConstant *Factor = 2359204642Srdivacky dyn_cast_or_null<SCEVConstant>(getExactSDiv(OldStride, 2360204642Srdivacky NewStride, 2361204642Srdivacky SE, true))) { 2362203954Srdivacky if (Factor->getValue()->getValue().getMinSignedBits() <= 64) 2363203954Srdivacky Factors.insert(Factor->getValue()->getValue().getSExtValue()); 2364203954Srdivacky } 2365203954Srdivacky } 2366199481Srdivacky 2367203954Srdivacky // If all uses use the same type, don't bother looking for truncation-based 2368203954Srdivacky // reuse. 2369203954Srdivacky if (Types.size() == 1) 2370203954Srdivacky Types.clear(); 2371193323Sed 2372203954Srdivacky DEBUG(print_factors_and_types(dbgs())); 2373203954Srdivacky} 2374193323Sed 2375235633Sdim/// findIVOperand - Helper for CollectChains that finds an IV operand (computed 2376235633Sdim/// by an AddRec in this loop) within [OI,OE) or returns OE. If IVUsers mapped 2377235633Sdim/// Instructions to IVStrideUses, we could partially skip this. 2378235633Sdimstatic User::op_iterator 2379235633SdimfindIVOperand(User::op_iterator OI, User::op_iterator OE, 2380235633Sdim Loop *L, ScalarEvolution &SE) { 2381235633Sdim for(; OI != OE; ++OI) { 2382235633Sdim if (Instruction *Oper = dyn_cast<Instruction>(*OI)) { 2383235633Sdim if (!SE.isSCEVable(Oper->getType())) 2384235633Sdim continue; 2385235633Sdim 2386235633Sdim if (const SCEVAddRecExpr *AR = 2387235633Sdim dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Oper))) { 2388235633Sdim if (AR->getLoop() == L) 2389235633Sdim break; 2390235633Sdim } 2391235633Sdim } 2392235633Sdim } 2393235633Sdim return OI; 2394235633Sdim} 2395235633Sdim 2396235633Sdim/// getWideOperand - IVChain logic must consistenctly peek base TruncInst 2397235633Sdim/// operands, so wrap it in a convenient helper. 2398235633Sdimstatic Value *getWideOperand(Value *Oper) { 2399235633Sdim if (TruncInst *Trunc = dyn_cast<TruncInst>(Oper)) 2400235633Sdim return Trunc->getOperand(0); 2401235633Sdim return Oper; 2402235633Sdim} 2403235633Sdim 2404235633Sdim/// isCompatibleIVType - Return true if we allow an IV chain to include both 2405235633Sdim/// types. 2406235633Sdimstatic bool isCompatibleIVType(Value *LVal, Value *RVal) { 2407235633Sdim Type *LType = LVal->getType(); 2408235633Sdim Type *RType = RVal->getType(); 2409235633Sdim return (LType == RType) || (LType->isPointerTy() && RType->isPointerTy()); 2410235633Sdim} 2411235633Sdim 2412235633Sdim/// getExprBase - Return an approximation of this SCEV expression's "base", or 2413235633Sdim/// NULL for any constant. Returning the expression itself is 2414235633Sdim/// conservative. Returning a deeper subexpression is more precise and valid as 2415235633Sdim/// long as it isn't less complex than another subexpression. For expressions 2416235633Sdim/// involving multiple unscaled values, we need to return the pointer-type 2417235633Sdim/// SCEVUnknown. This avoids forming chains across objects, such as: 2418235633Sdim/// PrevOper==a[i], IVOper==b[i], IVInc==b-a. 2419235633Sdim/// 2420235633Sdim/// Since SCEVUnknown is the rightmost type, and pointers are the rightmost 2421235633Sdim/// SCEVUnknown, we simply return the rightmost SCEV operand. 2422235633Sdimstatic const SCEV *getExprBase(const SCEV *S) { 2423235633Sdim switch (S->getSCEVType()) { 2424235633Sdim default: // uncluding scUnknown. 2425235633Sdim return S; 2426235633Sdim case scConstant: 2427235633Sdim return 0; 2428235633Sdim case scTruncate: 2429235633Sdim return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand()); 2430235633Sdim case scZeroExtend: 2431235633Sdim return getExprBase(cast<SCEVZeroExtendExpr>(S)->getOperand()); 2432235633Sdim case scSignExtend: 2433235633Sdim return getExprBase(cast<SCEVSignExtendExpr>(S)->getOperand()); 2434235633Sdim case scAddExpr: { 2435235633Sdim // Skip over scaled operands (scMulExpr) to follow add operands as long as 2436235633Sdim // there's nothing more complex. 2437235633Sdim // FIXME: not sure if we want to recognize negation. 2438235633Sdim const SCEVAddExpr *Add = cast<SCEVAddExpr>(S); 2439235633Sdim for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(Add->op_end()), 2440235633Sdim E(Add->op_begin()); I != E; ++I) { 2441235633Sdim const SCEV *SubExpr = *I; 2442235633Sdim if (SubExpr->getSCEVType() == scAddExpr) 2443235633Sdim return getExprBase(SubExpr); 2444235633Sdim 2445235633Sdim if (SubExpr->getSCEVType() != scMulExpr) 2446235633Sdim return SubExpr; 2447235633Sdim } 2448235633Sdim return S; // all operands are scaled, be conservative. 2449235633Sdim } 2450235633Sdim case scAddRecExpr: 2451235633Sdim return getExprBase(cast<SCEVAddRecExpr>(S)->getStart()); 2452235633Sdim } 2453235633Sdim} 2454235633Sdim 2455235633Sdim/// Return true if the chain increment is profitable to expand into a loop 2456235633Sdim/// invariant value, which may require its own register. A profitable chain 2457235633Sdim/// increment will be an offset relative to the same base. We allow such offsets 2458235633Sdim/// to potentially be used as chain increment as long as it's not obviously 2459235633Sdim/// expensive to expand using real instructions. 2460245431Sdimbool IVChain::isProfitableIncrement(const SCEV *OperExpr, 2461245431Sdim const SCEV *IncExpr, 2462245431Sdim ScalarEvolution &SE) { 2463245431Sdim // Aggressively form chains when -stress-ivchain. 2464235633Sdim if (StressIVChain) 2465245431Sdim return true; 2466235633Sdim 2467235633Sdim // Do not replace a constant offset from IV head with a nonconstant IV 2468235633Sdim // increment. 2469235633Sdim if (!isa<SCEVConstant>(IncExpr)) { 2470245431Sdim const SCEV *HeadExpr = SE.getSCEV(getWideOperand(Incs[0].IVOperand)); 2471235633Sdim if (isa<SCEVConstant>(SE.getMinusSCEV(OperExpr, HeadExpr))) 2472235633Sdim return 0; 2473235633Sdim } 2474235633Sdim 2475235633Sdim SmallPtrSet<const SCEV*, 8> Processed; 2476245431Sdim return !isHighCostExpansion(IncExpr, Processed, SE); 2477235633Sdim} 2478235633Sdim 2479235633Sdim/// Return true if the number of registers needed for the chain is estimated to 2480235633Sdim/// be less than the number required for the individual IV users. First prohibit 2481235633Sdim/// any IV users that keep the IV live across increments (the Users set should 2482235633Sdim/// be empty). Next count the number and type of increments in the chain. 2483235633Sdim/// 2484235633Sdim/// Chaining IVs can lead to considerable code bloat if ISEL doesn't 2485235633Sdim/// effectively use postinc addressing modes. Only consider it profitable it the 2486235633Sdim/// increments can be computed in fewer registers when chained. 2487235633Sdim/// 2488235633Sdim/// TODO: Consider IVInc free if it's already used in another chains. 2489235633Sdimstatic bool 2490235633SdimisProfitableChain(IVChain &Chain, SmallPtrSet<Instruction*, 4> &Users, 2491252723Sdim ScalarEvolution &SE, const TargetTransformInfo &TTI) { 2492235633Sdim if (StressIVChain) 2493235633Sdim return true; 2494235633Sdim 2495245431Sdim if (!Chain.hasIncs()) 2496235633Sdim return false; 2497235633Sdim 2498235633Sdim if (!Users.empty()) { 2499245431Sdim DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " users:\n"; 2500235633Sdim for (SmallPtrSet<Instruction*, 4>::const_iterator I = Users.begin(), 2501235633Sdim E = Users.end(); I != E; ++I) { 2502235633Sdim dbgs() << " " << **I << "\n"; 2503235633Sdim }); 2504235633Sdim return false; 2505235633Sdim } 2506245431Sdim assert(!Chain.Incs.empty() && "empty IV chains are not allowed"); 2507235633Sdim 2508235633Sdim // The chain itself may require a register, so intialize cost to 1. 2509235633Sdim int cost = 1; 2510235633Sdim 2511235633Sdim // A complete chain likely eliminates the need for keeping the original IV in 2512235633Sdim // a register. LSR does not currently know how to form a complete chain unless 2513235633Sdim // the header phi already exists. 2514245431Sdim if (isa<PHINode>(Chain.tailUserInst()) 2515245431Sdim && SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) { 2516235633Sdim --cost; 2517235633Sdim } 2518235633Sdim const SCEV *LastIncExpr = 0; 2519235633Sdim unsigned NumConstIncrements = 0; 2520235633Sdim unsigned NumVarIncrements = 0; 2521235633Sdim unsigned NumReusedIncrements = 0; 2522245431Sdim for (IVChain::const_iterator I = Chain.begin(), E = Chain.end(); 2523235633Sdim I != E; ++I) { 2524235633Sdim 2525235633Sdim if (I->IncExpr->isZero()) 2526235633Sdim continue; 2527235633Sdim 2528235633Sdim // Incrementing by zero or some constant is neutral. We assume constants can 2529235633Sdim // be folded into an addressing mode or an add's immediate operand. 2530235633Sdim if (isa<SCEVConstant>(I->IncExpr)) { 2531235633Sdim ++NumConstIncrements; 2532235633Sdim continue; 2533235633Sdim } 2534235633Sdim 2535235633Sdim if (I->IncExpr == LastIncExpr) 2536235633Sdim ++NumReusedIncrements; 2537235633Sdim else 2538235633Sdim ++NumVarIncrements; 2539235633Sdim 2540235633Sdim LastIncExpr = I->IncExpr; 2541235633Sdim } 2542235633Sdim // An IV chain with a single increment is handled by LSR's postinc 2543235633Sdim // uses. However, a chain with multiple increments requires keeping the IV's 2544235633Sdim // value live longer than it needs to be if chained. 2545235633Sdim if (NumConstIncrements > 1) 2546235633Sdim --cost; 2547235633Sdim 2548235633Sdim // Materializing increment expressions in the preheader that didn't exist in 2549235633Sdim // the original code may cost a register. For example, sign-extended array 2550235633Sdim // indices can produce ridiculous increments like this: 2551235633Sdim // IV + ((sext i32 (2 * %s) to i64) + (-1 * (sext i32 %s to i64))) 2552235633Sdim cost += NumVarIncrements; 2553235633Sdim 2554235633Sdim // Reusing variable increments likely saves a register to hold the multiple of 2555235633Sdim // the stride. 2556235633Sdim cost -= NumReusedIncrements; 2557235633Sdim 2558245431Sdim DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " Cost: " << cost 2559245431Sdim << "\n"); 2560235633Sdim 2561235633Sdim return cost < 0; 2562235633Sdim} 2563235633Sdim 2564235633Sdim/// ChainInstruction - Add this IV user to an existing chain or make it the head 2565235633Sdim/// of a new chain. 2566235633Sdimvoid LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper, 2567235633Sdim SmallVectorImpl<ChainUsers> &ChainUsersVec) { 2568235633Sdim // When IVs are used as types of varying widths, they are generally converted 2569235633Sdim // to a wider type with some uses remaining narrow under a (free) trunc. 2570245431Sdim Value *const NextIV = getWideOperand(IVOper); 2571245431Sdim const SCEV *const OperExpr = SE.getSCEV(NextIV); 2572245431Sdim const SCEV *const OperExprBase = getExprBase(OperExpr); 2573235633Sdim 2574235633Sdim // Visit all existing chains. Check if its IVOper can be computed as a 2575235633Sdim // profitable loop invariant increment from the last link in the Chain. 2576235633Sdim unsigned ChainIdx = 0, NChains = IVChainVec.size(); 2577235633Sdim const SCEV *LastIncExpr = 0; 2578235633Sdim for (; ChainIdx < NChains; ++ChainIdx) { 2579245431Sdim IVChain &Chain = IVChainVec[ChainIdx]; 2580245431Sdim 2581245431Sdim // Prune the solution space aggressively by checking that both IV operands 2582245431Sdim // are expressions that operate on the same unscaled SCEVUnknown. This 2583245431Sdim // "base" will be canceled by the subsequent getMinusSCEV call. Checking 2584245431Sdim // first avoids creating extra SCEV expressions. 2585245431Sdim if (!StressIVChain && Chain.ExprBase != OperExprBase) 2586245431Sdim continue; 2587245431Sdim 2588245431Sdim Value *PrevIV = getWideOperand(Chain.Incs.back().IVOperand); 2589235633Sdim if (!isCompatibleIVType(PrevIV, NextIV)) 2590235633Sdim continue; 2591235633Sdim 2592235633Sdim // A phi node terminates a chain. 2593245431Sdim if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst())) 2594235633Sdim continue; 2595235633Sdim 2596245431Sdim // The increment must be loop-invariant so it can be kept in a register. 2597245431Sdim const SCEV *PrevExpr = SE.getSCEV(PrevIV); 2598245431Sdim const SCEV *IncExpr = SE.getMinusSCEV(OperExpr, PrevExpr); 2599245431Sdim if (!SE.isLoopInvariant(IncExpr, L)) 2600245431Sdim continue; 2601245431Sdim 2602245431Sdim if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) { 2603235633Sdim LastIncExpr = IncExpr; 2604235633Sdim break; 2605235633Sdim } 2606235633Sdim } 2607235633Sdim // If we haven't found a chain, create a new one, unless we hit the max. Don't 2608235633Sdim // bother for phi nodes, because they must be last in the chain. 2609235633Sdim if (ChainIdx == NChains) { 2610235633Sdim if (isa<PHINode>(UserInst)) 2611235633Sdim return; 2612235633Sdim if (NChains >= MaxChains && !StressIVChain) { 2613235633Sdim DEBUG(dbgs() << "IV Chain Limit\n"); 2614235633Sdim return; 2615235633Sdim } 2616245431Sdim LastIncExpr = OperExpr; 2617235633Sdim // IVUsers may have skipped over sign/zero extensions. We don't currently 2618235633Sdim // attempt to form chains involving extensions unless they can be hoisted 2619235633Sdim // into this loop's AddRec. 2620235633Sdim if (!isa<SCEVAddRecExpr>(LastIncExpr)) 2621235633Sdim return; 2622235633Sdim ++NChains; 2623245431Sdim IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr), 2624245431Sdim OperExprBase)); 2625235633Sdim ChainUsersVec.resize(NChains); 2626245431Sdim DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Head: (" << *UserInst 2627245431Sdim << ") IV=" << *LastIncExpr << "\n"); 2628245431Sdim } else { 2629245431Sdim DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Inc: (" << *UserInst 2630245431Sdim << ") IV+" << *LastIncExpr << "\n"); 2631245431Sdim // Add this IV user to the end of the chain. 2632245431Sdim IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr)); 2633235633Sdim } 2634252723Sdim IVChain &Chain = IVChainVec[ChainIdx]; 2635235633Sdim 2636235633Sdim SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers; 2637235633Sdim // This chain's NearUsers become FarUsers. 2638235633Sdim if (!LastIncExpr->isZero()) { 2639235633Sdim ChainUsersVec[ChainIdx].FarUsers.insert(NearUsers.begin(), 2640235633Sdim NearUsers.end()); 2641235633Sdim NearUsers.clear(); 2642235633Sdim } 2643235633Sdim 2644235633Sdim // All other uses of IVOperand become near uses of the chain. 2645235633Sdim // We currently ignore intermediate values within SCEV expressions, assuming 2646235633Sdim // they will eventually be used be the current chain, or can be computed 2647235633Sdim // from one of the chain increments. To be more precise we could 2648235633Sdim // transitively follow its user and only add leaf IV users to the set. 2649235633Sdim for (Value::use_iterator UseIter = IVOper->use_begin(), 2650235633Sdim UseEnd = IVOper->use_end(); UseIter != UseEnd; ++UseIter) { 2651235633Sdim Instruction *OtherUse = dyn_cast<Instruction>(*UseIter); 2652252723Sdim if (!OtherUse) 2653235633Sdim continue; 2654252723Sdim // Uses in the chain will no longer be uses if the chain is formed. 2655252723Sdim // Include the head of the chain in this iteration (not Chain.begin()). 2656252723Sdim IVChain::const_iterator IncIter = Chain.Incs.begin(); 2657252723Sdim IVChain::const_iterator IncEnd = Chain.Incs.end(); 2658252723Sdim for( ; IncIter != IncEnd; ++IncIter) { 2659252723Sdim if (IncIter->UserInst == OtherUse) 2660252723Sdim break; 2661252723Sdim } 2662252723Sdim if (IncIter != IncEnd) 2663252723Sdim continue; 2664252723Sdim 2665235633Sdim if (SE.isSCEVable(OtherUse->getType()) 2666235633Sdim && !isa<SCEVUnknown>(SE.getSCEV(OtherUse)) 2667235633Sdim && IU.isIVUserOrOperand(OtherUse)) { 2668235633Sdim continue; 2669235633Sdim } 2670235633Sdim NearUsers.insert(OtherUse); 2671235633Sdim } 2672235633Sdim 2673235633Sdim // Since this user is part of the chain, it's no longer considered a use 2674235633Sdim // of the chain. 2675235633Sdim ChainUsersVec[ChainIdx].FarUsers.erase(UserInst); 2676235633Sdim} 2677235633Sdim 2678235633Sdim/// CollectChains - Populate the vector of Chains. 2679235633Sdim/// 2680235633Sdim/// This decreases ILP at the architecture level. Targets with ample registers, 2681235633Sdim/// multiple memory ports, and no register renaming probably don't want 2682235633Sdim/// this. However, such targets should probably disable LSR altogether. 2683235633Sdim/// 2684235633Sdim/// The job of LSR is to make a reasonable choice of induction variables across 2685235633Sdim/// the loop. Subsequent passes can easily "unchain" computation exposing more 2686235633Sdim/// ILP *within the loop* if the target wants it. 2687235633Sdim/// 2688235633Sdim/// Finding the best IV chain is potentially a scheduling problem. Since LSR 2689235633Sdim/// will not reorder memory operations, it will recognize this as a chain, but 2690235633Sdim/// will generate redundant IV increments. Ideally this would be corrected later 2691235633Sdim/// by a smart scheduler: 2692235633Sdim/// = A[i] 2693235633Sdim/// = A[i+x] 2694235633Sdim/// A[i] = 2695235633Sdim/// A[i+x] = 2696235633Sdim/// 2697235633Sdim/// TODO: Walk the entire domtree within this loop, not just the path to the 2698235633Sdim/// loop latch. This will discover chains on side paths, but requires 2699235633Sdim/// maintaining multiple copies of the Chains state. 2700235633Sdimvoid LSRInstance::CollectChains() { 2701245431Sdim DEBUG(dbgs() << "Collecting IV Chains.\n"); 2702235633Sdim SmallVector<ChainUsers, 8> ChainUsersVec; 2703235633Sdim 2704235633Sdim SmallVector<BasicBlock *,8> LatchPath; 2705235633Sdim BasicBlock *LoopHeader = L->getHeader(); 2706235633Sdim for (DomTreeNode *Rung = DT.getNode(L->getLoopLatch()); 2707235633Sdim Rung->getBlock() != LoopHeader; Rung = Rung->getIDom()) { 2708235633Sdim LatchPath.push_back(Rung->getBlock()); 2709235633Sdim } 2710235633Sdim LatchPath.push_back(LoopHeader); 2711235633Sdim 2712235633Sdim // Walk the instruction stream from the loop header to the loop latch. 2713235633Sdim for (SmallVectorImpl<BasicBlock *>::reverse_iterator 2714235633Sdim BBIter = LatchPath.rbegin(), BBEnd = LatchPath.rend(); 2715235633Sdim BBIter != BBEnd; ++BBIter) { 2716235633Sdim for (BasicBlock::iterator I = (*BBIter)->begin(), E = (*BBIter)->end(); 2717235633Sdim I != E; ++I) { 2718235633Sdim // Skip instructions that weren't seen by IVUsers analysis. 2719235633Sdim if (isa<PHINode>(I) || !IU.isIVUserOrOperand(I)) 2720235633Sdim continue; 2721235633Sdim 2722235633Sdim // Ignore users that are part of a SCEV expression. This way we only 2723235633Sdim // consider leaf IV Users. This effectively rediscovers a portion of 2724235633Sdim // IVUsers analysis but in program order this time. 2725235633Sdim if (SE.isSCEVable(I->getType()) && !isa<SCEVUnknown>(SE.getSCEV(I))) 2726235633Sdim continue; 2727235633Sdim 2728235633Sdim // Remove this instruction from any NearUsers set it may be in. 2729235633Sdim for (unsigned ChainIdx = 0, NChains = IVChainVec.size(); 2730235633Sdim ChainIdx < NChains; ++ChainIdx) { 2731235633Sdim ChainUsersVec[ChainIdx].NearUsers.erase(I); 2732235633Sdim } 2733235633Sdim // Search for operands that can be chained. 2734235633Sdim SmallPtrSet<Instruction*, 4> UniqueOperands; 2735235633Sdim User::op_iterator IVOpEnd = I->op_end(); 2736235633Sdim User::op_iterator IVOpIter = findIVOperand(I->op_begin(), IVOpEnd, L, SE); 2737235633Sdim while (IVOpIter != IVOpEnd) { 2738235633Sdim Instruction *IVOpInst = cast<Instruction>(*IVOpIter); 2739235633Sdim if (UniqueOperands.insert(IVOpInst)) 2740235633Sdim ChainInstruction(I, IVOpInst, ChainUsersVec); 2741235633Sdim IVOpIter = findIVOperand(llvm::next(IVOpIter), IVOpEnd, L, SE); 2742235633Sdim } 2743235633Sdim } // Continue walking down the instructions. 2744235633Sdim } // Continue walking down the domtree. 2745235633Sdim // Visit phi backedges to determine if the chain can generate the IV postinc. 2746235633Sdim for (BasicBlock::iterator I = L->getHeader()->begin(); 2747235633Sdim PHINode *PN = dyn_cast<PHINode>(I); ++I) { 2748235633Sdim if (!SE.isSCEVable(PN->getType())) 2749235633Sdim continue; 2750235633Sdim 2751235633Sdim Instruction *IncV = 2752235633Sdim dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch())); 2753235633Sdim if (IncV) 2754235633Sdim ChainInstruction(PN, IncV, ChainUsersVec); 2755235633Sdim } 2756235633Sdim // Remove any unprofitable chains. 2757235633Sdim unsigned ChainIdx = 0; 2758235633Sdim for (unsigned UsersIdx = 0, NChains = IVChainVec.size(); 2759235633Sdim UsersIdx < NChains; ++UsersIdx) { 2760235633Sdim if (!isProfitableChain(IVChainVec[UsersIdx], 2761252723Sdim ChainUsersVec[UsersIdx].FarUsers, SE, TTI)) 2762235633Sdim continue; 2763235633Sdim // Preserve the chain at UsesIdx. 2764235633Sdim if (ChainIdx != UsersIdx) 2765235633Sdim IVChainVec[ChainIdx] = IVChainVec[UsersIdx]; 2766235633Sdim FinalizeChain(IVChainVec[ChainIdx]); 2767235633Sdim ++ChainIdx; 2768235633Sdim } 2769235633Sdim IVChainVec.resize(ChainIdx); 2770235633Sdim} 2771235633Sdim 2772235633Sdimvoid LSRInstance::FinalizeChain(IVChain &Chain) { 2773245431Sdim assert(!Chain.Incs.empty() && "empty IV chains are not allowed"); 2774245431Sdim DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n"); 2775235633Sdim 2776245431Sdim for (IVChain::const_iterator I = Chain.begin(), E = Chain.end(); 2777235633Sdim I != E; ++I) { 2778235633Sdim DEBUG(dbgs() << " Inc: " << *I->UserInst << "\n"); 2779235633Sdim User::op_iterator UseI = 2780235633Sdim std::find(I->UserInst->op_begin(), I->UserInst->op_end(), I->IVOperand); 2781235633Sdim assert(UseI != I->UserInst->op_end() && "cannot find IV operand"); 2782235633Sdim IVIncSet.insert(UseI); 2783235633Sdim } 2784235633Sdim} 2785235633Sdim 2786235633Sdim/// Return true if the IVInc can be folded into an addressing mode. 2787235633Sdimstatic bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, 2788252723Sdim Value *Operand, const TargetTransformInfo &TTI) { 2789235633Sdim const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr); 2790235633Sdim if (!IncConst || !isAddressUse(UserInst, Operand)) 2791235633Sdim return false; 2792235633Sdim 2793235633Sdim if (IncConst->getValue()->getValue().getMinSignedBits() > 64) 2794235633Sdim return false; 2795235633Sdim 2796235633Sdim int64_t IncOffset = IncConst->getValue()->getSExtValue(); 2797252723Sdim if (!isAlwaysFoldable(TTI, LSRUse::Address, 2798252723Sdim getAccessType(UserInst), /*BaseGV=*/ 0, 2799252723Sdim IncOffset, /*HaseBaseReg=*/ false)) 2800235633Sdim return false; 2801235633Sdim 2802235633Sdim return true; 2803235633Sdim} 2804235633Sdim 2805235633Sdim/// GenerateIVChains - Generate an add or subtract for each IVInc in a chain to 2806235633Sdim/// materialize the IV user's operand from the previous IV user's operand. 2807235633Sdimvoid LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, 2808235633Sdim SmallVectorImpl<WeakVH> &DeadInsts) { 2809235633Sdim // Find the new IVOperand for the head of the chain. It may have been replaced 2810235633Sdim // by LSR. 2811245431Sdim const IVInc &Head = Chain.Incs[0]; 2812235633Sdim User::op_iterator IVOpEnd = Head.UserInst->op_end(); 2813252723Sdim // findIVOperand returns IVOpEnd if it can no longer find a valid IV user. 2814235633Sdim User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(), 2815235633Sdim IVOpEnd, L, SE); 2816235633Sdim Value *IVSrc = 0; 2817235633Sdim while (IVOpIter != IVOpEnd) { 2818235633Sdim IVSrc = getWideOperand(*IVOpIter); 2819235633Sdim 2820235633Sdim // If this operand computes the expression that the chain needs, we may use 2821235633Sdim // it. (Check this after setting IVSrc which is used below.) 2822235633Sdim // 2823235633Sdim // Note that if Head.IncExpr is wider than IVSrc, then this phi is too 2824235633Sdim // narrow for the chain, so we can no longer use it. We do allow using a 2825235633Sdim // wider phi, assuming the LSR checked for free truncation. In that case we 2826235633Sdim // should already have a truncate on this operand such that 2827235633Sdim // getSCEV(IVSrc) == IncExpr. 2828235633Sdim if (SE.getSCEV(*IVOpIter) == Head.IncExpr 2829235633Sdim || SE.getSCEV(IVSrc) == Head.IncExpr) { 2830235633Sdim break; 2831235633Sdim } 2832235633Sdim IVOpIter = findIVOperand(llvm::next(IVOpIter), IVOpEnd, L, SE); 2833235633Sdim } 2834235633Sdim if (IVOpIter == IVOpEnd) { 2835235633Sdim // Gracefully give up on this chain. 2836235633Sdim DEBUG(dbgs() << "Concealed chain head: " << *Head.UserInst << "\n"); 2837235633Sdim return; 2838235633Sdim } 2839235633Sdim 2840235633Sdim DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n"); 2841235633Sdim Type *IVTy = IVSrc->getType(); 2842235633Sdim Type *IntTy = SE.getEffectiveSCEVType(IVTy); 2843235633Sdim const SCEV *LeftOverExpr = 0; 2844245431Sdim for (IVChain::const_iterator IncI = Chain.begin(), 2845235633Sdim IncE = Chain.end(); IncI != IncE; ++IncI) { 2846235633Sdim 2847235633Sdim Instruction *InsertPt = IncI->UserInst; 2848235633Sdim if (isa<PHINode>(InsertPt)) 2849235633Sdim InsertPt = L->getLoopLatch()->getTerminator(); 2850235633Sdim 2851235633Sdim // IVOper will replace the current IV User's operand. IVSrc is the IV 2852235633Sdim // value currently held in a register. 2853235633Sdim Value *IVOper = IVSrc; 2854235633Sdim if (!IncI->IncExpr->isZero()) { 2855235633Sdim // IncExpr was the result of subtraction of two narrow values, so must 2856235633Sdim // be signed. 2857235633Sdim const SCEV *IncExpr = SE.getNoopOrSignExtend(IncI->IncExpr, IntTy); 2858235633Sdim LeftOverExpr = LeftOverExpr ? 2859235633Sdim SE.getAddExpr(LeftOverExpr, IncExpr) : IncExpr; 2860235633Sdim } 2861235633Sdim if (LeftOverExpr && !LeftOverExpr->isZero()) { 2862235633Sdim // Expand the IV increment. 2863235633Sdim Rewriter.clearPostInc(); 2864235633Sdim Value *IncV = Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt); 2865235633Sdim const SCEV *IVOperExpr = SE.getAddExpr(SE.getUnknown(IVSrc), 2866235633Sdim SE.getUnknown(IncV)); 2867235633Sdim IVOper = Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt); 2868235633Sdim 2869235633Sdim // If an IV increment can't be folded, use it as the next IV value. 2870235633Sdim if (!canFoldIVIncExpr(LeftOverExpr, IncI->UserInst, IncI->IVOperand, 2871252723Sdim TTI)) { 2872235633Sdim assert(IVTy == IVOper->getType() && "inconsistent IV increment type"); 2873235633Sdim IVSrc = IVOper; 2874235633Sdim LeftOverExpr = 0; 2875235633Sdim } 2876235633Sdim } 2877235633Sdim Type *OperTy = IncI->IVOperand->getType(); 2878235633Sdim if (IVTy != OperTy) { 2879235633Sdim assert(SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy) && 2880235633Sdim "cannot extend a chained IV"); 2881235633Sdim IRBuilder<> Builder(InsertPt); 2882235633Sdim IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy, "lsr.chain"); 2883235633Sdim } 2884235633Sdim IncI->UserInst->replaceUsesOfWith(IncI->IVOperand, IVOper); 2885235633Sdim DeadInsts.push_back(IncI->IVOperand); 2886235633Sdim } 2887235633Sdim // If LSR created a new, wider phi, we may also replace its postinc. We only 2888235633Sdim // do this if we also found a wide value for the head of the chain. 2889245431Sdim if (isa<PHINode>(Chain.tailUserInst())) { 2890235633Sdim for (BasicBlock::iterator I = L->getHeader()->begin(); 2891235633Sdim PHINode *Phi = dyn_cast<PHINode>(I); ++I) { 2892235633Sdim if (!isCompatibleIVType(Phi, IVSrc)) 2893235633Sdim continue; 2894235633Sdim Instruction *PostIncV = dyn_cast<Instruction>( 2895235633Sdim Phi->getIncomingValueForBlock(L->getLoopLatch())); 2896235633Sdim if (!PostIncV || (SE.getSCEV(PostIncV) != SE.getSCEV(IVSrc))) 2897235633Sdim continue; 2898235633Sdim Value *IVOper = IVSrc; 2899235633Sdim Type *PostIncTy = PostIncV->getType(); 2900235633Sdim if (IVTy != PostIncTy) { 2901235633Sdim assert(PostIncTy->isPointerTy() && "mixing int/ptr IV types"); 2902235633Sdim IRBuilder<> Builder(L->getLoopLatch()->getTerminator()); 2903235633Sdim Builder.SetCurrentDebugLocation(PostIncV->getDebugLoc()); 2904235633Sdim IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy, "lsr.chain"); 2905235633Sdim } 2906235633Sdim Phi->replaceUsesOfWith(PostIncV, IVOper); 2907235633Sdim DeadInsts.push_back(PostIncV); 2908235633Sdim } 2909235633Sdim } 2910235633Sdim} 2911235633Sdim 2912203954Srdivackyvoid LSRInstance::CollectFixupsAndInitialFormulae() { 2913203954Srdivacky for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) { 2914235633Sdim Instruction *UserInst = UI->getUser(); 2915235633Sdim // Skip IV users that are part of profitable IV Chains. 2916235633Sdim User::op_iterator UseI = std::find(UserInst->op_begin(), UserInst->op_end(), 2917235633Sdim UI->getOperandValToReplace()); 2918235633Sdim assert(UseI != UserInst->op_end() && "cannot find IV operand"); 2919235633Sdim if (IVIncSet.count(UseI)) 2920235633Sdim continue; 2921235633Sdim 2922203954Srdivacky // Record the uses. 2923203954Srdivacky LSRFixup &LF = getNewFixup(); 2924235633Sdim LF.UserInst = UserInst; 2925203954Srdivacky LF.OperandValToReplace = UI->getOperandValToReplace(); 2926207618Srdivacky LF.PostIncLoops = UI->getPostIncLoops(); 2927193323Sed 2928203954Srdivacky LSRUse::KindType Kind = LSRUse::Basic; 2929226890Sdim Type *AccessTy = 0; 2930203954Srdivacky if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) { 2931203954Srdivacky Kind = LSRUse::Address; 2932203954Srdivacky AccessTy = getAccessType(LF.UserInst); 2933203954Srdivacky } 2934193323Sed 2935207618Srdivacky const SCEV *S = IU.getExpr(*UI); 2936198892Srdivacky 2937203954Srdivacky // Equality (== and !=) ICmps are special. We can rewrite (i == N) as 2938203954Srdivacky // (N - i == 0), and this allows (N - i) to be the expression that we work 2939203954Srdivacky // with rather than just N or i, so we can consider the register 2940203954Srdivacky // requirements for both N and i at the same time. Limiting this code to 2941203954Srdivacky // equality icmps is not a problem because all interesting loops use 2942203954Srdivacky // equality icmps, thanks to IndVarSimplify. 2943203954Srdivacky if (ICmpInst *CI = dyn_cast<ICmpInst>(LF.UserInst)) 2944203954Srdivacky if (CI->isEquality()) { 2945203954Srdivacky // Swap the operands if needed to put the OperandValToReplace on the 2946203954Srdivacky // left, for consistency. 2947203954Srdivacky Value *NV = CI->getOperand(1); 2948203954Srdivacky if (NV == LF.OperandValToReplace) { 2949203954Srdivacky CI->setOperand(1, CI->getOperand(0)); 2950203954Srdivacky CI->setOperand(0, NV); 2951208599Srdivacky NV = CI->getOperand(1); 2952208599Srdivacky Changed = true; 2953203954Srdivacky } 2954193323Sed 2955203954Srdivacky // x == y --> x - y == 0 2956203954Srdivacky const SCEV *N = SE.getSCEV(NV); 2957263509Sdim if (SE.isLoopInvariant(N, L) && isSafeToExpand(N, SE)) { 2958223017Sdim // S is normalized, so normalize N before folding it into S 2959223017Sdim // to keep the result normalized. 2960223017Sdim N = TransformForPostIncUse(Normalize, N, CI, 0, 2961223017Sdim LF.PostIncLoops, SE, DT); 2962203954Srdivacky Kind = LSRUse::ICmpZero; 2963203954Srdivacky S = SE.getMinusSCEV(N, S); 2964203954Srdivacky } 2965193323Sed 2966203954Srdivacky // -1 and the negations of all interesting strides (except the negation 2967203954Srdivacky // of -1) are now also interesting. 2968203954Srdivacky for (size_t i = 0, e = Factors.size(); i != e; ++i) 2969203954Srdivacky if (Factors[i] != -1) 2970203954Srdivacky Factors.insert(-(uint64_t)Factors[i]); 2971203954Srdivacky Factors.insert(-1); 2972203954Srdivacky } 2973193323Sed 2974203954Srdivacky // Set up the initial formula for this use. 2975203954Srdivacky std::pair<size_t, int64_t> P = getUse(S, Kind, AccessTy); 2976203954Srdivacky LF.LUIdx = P.first; 2977203954Srdivacky LF.Offset = P.second; 2978203954Srdivacky LSRUse &LU = Uses[LF.LUIdx]; 2979207618Srdivacky LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L); 2980212904Sdim if (!LU.WidestFixupType || 2981212904Sdim SE.getTypeSizeInBits(LU.WidestFixupType) < 2982212904Sdim SE.getTypeSizeInBits(LF.OperandValToReplace->getType())) 2983212904Sdim LU.WidestFixupType = LF.OperandValToReplace->getType(); 2984203954Srdivacky 2985203954Srdivacky // If this is the first use of this LSRUse, give it a formula. 2986203954Srdivacky if (LU.Formulae.empty()) { 2987204642Srdivacky InsertInitialFormula(S, LU, LF.LUIdx); 2988203954Srdivacky CountRegisters(LU.Formulae.back(), LF.LUIdx); 2989193323Sed } 2990193323Sed } 2991203954Srdivacky 2992203954Srdivacky DEBUG(print_fixups(dbgs())); 2993193323Sed} 2994193323Sed 2995208599Srdivacky/// InsertInitialFormula - Insert a formula for the given expression into 2996208599Srdivacky/// the given use, separating out loop-variant portions from loop-invariant 2997208599Srdivacky/// and loop-computable portions. 2998203954Srdivackyvoid 2999204642SrdivackyLSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) { 3000263509Sdim // Mark uses whose expressions cannot be expanded. 3001263509Sdim if (!isSafeToExpand(S, SE)) 3002263509Sdim LU.RigidFormula = true; 3003263509Sdim 3004203954Srdivacky Formula F; 3005218893Sdim F.InitialMatch(S, L, SE); 3006203954Srdivacky bool Inserted = InsertFormula(LU, LUIdx, F); 3007203954Srdivacky assert(Inserted && "Initial formula already exists!"); (void)Inserted; 3008203954Srdivacky} 3009193323Sed 3010208599Srdivacky/// InsertSupplementalFormula - Insert a simple single-register formula for 3011208599Srdivacky/// the given expression into the given use. 3012203954Srdivackyvoid 3013203954SrdivackyLSRInstance::InsertSupplementalFormula(const SCEV *S, 3014203954Srdivacky LSRUse &LU, size_t LUIdx) { 3015203954Srdivacky Formula F; 3016203954Srdivacky F.BaseRegs.push_back(S); 3017252723Sdim F.HasBaseReg = true; 3018203954Srdivacky bool Inserted = InsertFormula(LU, LUIdx, F); 3019203954Srdivacky assert(Inserted && "Supplemental formula already exists!"); (void)Inserted; 3020193323Sed} 3021193323Sed 3022203954Srdivacky/// CountRegisters - Note which registers are used by the given formula, 3023203954Srdivacky/// updating RegUses. 3024203954Srdivackyvoid LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) { 3025203954Srdivacky if (F.ScaledReg) 3026203954Srdivacky RegUses.CountRegister(F.ScaledReg, LUIdx); 3027203954Srdivacky for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(), 3028203954Srdivacky E = F.BaseRegs.end(); I != E; ++I) 3029203954Srdivacky RegUses.CountRegister(*I, LUIdx); 3030203954Srdivacky} 3031203954Srdivacky 3032203954Srdivacky/// InsertFormula - If the given formula has not yet been inserted, add it to 3033203954Srdivacky/// the list, and return true. Return false otherwise. 3034203954Srdivackybool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) { 3035204642Srdivacky if (!LU.InsertFormula(F)) 3036203954Srdivacky return false; 3037203954Srdivacky 3038203954Srdivacky CountRegisters(F, LUIdx); 3039203954Srdivacky return true; 3040203954Srdivacky} 3041203954Srdivacky 3042203954Srdivacky/// CollectLoopInvariantFixupsAndFormulae - Check for other uses of 3043203954Srdivacky/// loop-invariant values which we're tracking. These other uses will pin these 3044203954Srdivacky/// values in registers, making them less profitable for elimination. 3045203954Srdivacky/// TODO: This currently misses non-constant addrec step registers. 3046203954Srdivacky/// TODO: Should this give more weight to users inside the loop? 3047203954Srdivackyvoid 3048203954SrdivackyLSRInstance::CollectLoopInvariantFixupsAndFormulae() { 3049203954Srdivacky SmallVector<const SCEV *, 8> Worklist(RegUses.begin(), RegUses.end()); 3050203954Srdivacky SmallPtrSet<const SCEV *, 8> Inserted; 3051203954Srdivacky 3052203954Srdivacky while (!Worklist.empty()) { 3053203954Srdivacky const SCEV *S = Worklist.pop_back_val(); 3054203954Srdivacky 3055203954Srdivacky if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) 3056210299Sed Worklist.append(N->op_begin(), N->op_end()); 3057203954Srdivacky else if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) 3058203954Srdivacky Worklist.push_back(C->getOperand()); 3059203954Srdivacky else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) { 3060203954Srdivacky Worklist.push_back(D->getLHS()); 3061203954Srdivacky Worklist.push_back(D->getRHS()); 3062203954Srdivacky } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { 3063203954Srdivacky if (!Inserted.insert(U)) continue; 3064203954Srdivacky const Value *V = U->getValue(); 3065210299Sed if (const Instruction *Inst = dyn_cast<Instruction>(V)) { 3066210299Sed // Look for instructions defined outside the loop. 3067203954Srdivacky if (L->contains(Inst)) continue; 3068210299Sed } else if (isa<UndefValue>(V)) 3069210299Sed // Undef doesn't have a live range, so it doesn't matter. 3070210299Sed continue; 3071206083Srdivacky for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end(); 3072203954Srdivacky UI != UE; ++UI) { 3073203954Srdivacky const Instruction *UserInst = dyn_cast<Instruction>(*UI); 3074203954Srdivacky // Ignore non-instructions. 3075203954Srdivacky if (!UserInst) 3076203954Srdivacky continue; 3077203954Srdivacky // Ignore instructions in other functions (as can happen with 3078203954Srdivacky // Constants). 3079203954Srdivacky if (UserInst->getParent()->getParent() != L->getHeader()->getParent()) 3080203954Srdivacky continue; 3081203954Srdivacky // Ignore instructions not dominated by the loop. 3082203954Srdivacky const BasicBlock *UseBB = !isa<PHINode>(UserInst) ? 3083203954Srdivacky UserInst->getParent() : 3084203954Srdivacky cast<PHINode>(UserInst)->getIncomingBlock( 3085203954Srdivacky PHINode::getIncomingValueNumForOperand(UI.getOperandNo())); 3086203954Srdivacky if (!DT.dominates(L->getHeader(), UseBB)) 3087203954Srdivacky continue; 3088203954Srdivacky // Ignore uses which are part of other SCEV expressions, to avoid 3089203954Srdivacky // analyzing them multiple times. 3090207618Srdivacky if (SE.isSCEVable(UserInst->getType())) { 3091207618Srdivacky const SCEV *UserS = SE.getSCEV(const_cast<Instruction *>(UserInst)); 3092207618Srdivacky // If the user is a no-op, look through to its uses. 3093207618Srdivacky if (!isa<SCEVUnknown>(UserS)) 3094207618Srdivacky continue; 3095207618Srdivacky if (UserS == U) { 3096207618Srdivacky Worklist.push_back( 3097207618Srdivacky SE.getUnknown(const_cast<Instruction *>(UserInst))); 3098207618Srdivacky continue; 3099207618Srdivacky } 3100207618Srdivacky } 3101203954Srdivacky // Ignore icmp instructions which are already being analyzed. 3102203954Srdivacky if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) { 3103203954Srdivacky unsigned OtherIdx = !UI.getOperandNo(); 3104203954Srdivacky Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx)); 3105218893Sdim if (SE.hasComputableLoopEvolution(SE.getSCEV(OtherOp), L)) 3106203954Srdivacky continue; 3107203954Srdivacky } 3108203954Srdivacky 3109203954Srdivacky LSRFixup &LF = getNewFixup(); 3110203954Srdivacky LF.UserInst = const_cast<Instruction *>(UserInst); 3111203954Srdivacky LF.OperandValToReplace = UI.getUse(); 3112203954Srdivacky std::pair<size_t, int64_t> P = getUse(S, LSRUse::Basic, 0); 3113203954Srdivacky LF.LUIdx = P.first; 3114203954Srdivacky LF.Offset = P.second; 3115203954Srdivacky LSRUse &LU = Uses[LF.LUIdx]; 3116207618Srdivacky LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L); 3117212904Sdim if (!LU.WidestFixupType || 3118212904Sdim SE.getTypeSizeInBits(LU.WidestFixupType) < 3119212904Sdim SE.getTypeSizeInBits(LF.OperandValToReplace->getType())) 3120212904Sdim LU.WidestFixupType = LF.OperandValToReplace->getType(); 3121203954Srdivacky InsertSupplementalFormula(U, LU, LF.LUIdx); 3122203954Srdivacky CountRegisters(LU.Formulae.back(), Uses.size() - 1); 3123203954Srdivacky break; 3124199481Srdivacky } 3125199481Srdivacky } 3126199481Srdivacky } 3127199481Srdivacky} 3128199481Srdivacky 3129203954Srdivacky/// CollectSubexprs - Split S into subexpressions which can be pulled out into 3130203954Srdivacky/// separate registers. If C is non-null, multiply each subexpression by C. 3131245431Sdim/// 3132245431Sdim/// Return remainder expression after factoring the subexpressions captured by 3133245431Sdim/// Ops. If Ops is complete, return NULL. 3134245431Sdimstatic const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C, 3135245431Sdim SmallVectorImpl<const SCEV *> &Ops, 3136245431Sdim const Loop *L, 3137245431Sdim ScalarEvolution &SE, 3138245431Sdim unsigned Depth = 0) { 3139245431Sdim // Arbitrarily cap recursion to protect compile time. 3140245431Sdim if (Depth >= 3) 3141245431Sdim return S; 3142245431Sdim 3143203954Srdivacky if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { 3144203954Srdivacky // Break out add operands. 3145203954Srdivacky for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end(); 3146245431Sdim I != E; ++I) { 3147245431Sdim const SCEV *Remainder = CollectSubexprs(*I, C, Ops, L, SE, Depth+1); 3148245431Sdim if (Remainder) 3149245431Sdim Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder); 3150245431Sdim } 3151263509Sdim return 0; 3152203954Srdivacky } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { 3153203954Srdivacky // Split a non-zero base out of an addrec. 3154245431Sdim if (AR->getStart()->isZero()) 3155245431Sdim return S; 3156245431Sdim 3157245431Sdim const SCEV *Remainder = CollectSubexprs(AR->getStart(), 3158245431Sdim C, Ops, L, SE, Depth+1); 3159245431Sdim // Split the non-zero AddRec unless it is part of a nested recurrence that 3160245431Sdim // does not pertain to this loop. 3161245431Sdim if (Remainder && (AR->getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) { 3162245431Sdim Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder); 3163263509Sdim Remainder = 0; 3164203954Srdivacky } 3165245431Sdim if (Remainder != AR->getStart()) { 3166245431Sdim if (!Remainder) 3167245431Sdim Remainder = SE.getConstant(AR->getType(), 0); 3168245431Sdim return SE.getAddRecExpr(Remainder, 3169245431Sdim AR->getStepRecurrence(SE), 3170245431Sdim AR->getLoop(), 3171245431Sdim //FIXME: AR->getNoWrapFlags(SCEV::FlagNW) 3172245431Sdim SCEV::FlagAnyWrap); 3173245431Sdim } 3174203954Srdivacky } else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) { 3175203954Srdivacky // Break (C * (a + b + c)) into C*a + C*b + C*c. 3176245431Sdim if (Mul->getNumOperands() != 2) 3177245431Sdim return S; 3178245431Sdim if (const SCEVConstant *Op0 = 3179245431Sdim dyn_cast<SCEVConstant>(Mul->getOperand(0))) { 3180245431Sdim C = C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0; 3181245431Sdim const SCEV *Remainder = 3182245431Sdim CollectSubexprs(Mul->getOperand(1), C, Ops, L, SE, Depth+1); 3183245431Sdim if (Remainder) 3184245431Sdim Ops.push_back(SE.getMulExpr(C, Remainder)); 3185263509Sdim return 0; 3186245431Sdim } 3187203954Srdivacky } 3188245431Sdim return S; 3189203954Srdivacky} 3190203954Srdivacky 3191203954Srdivacky/// GenerateReassociations - Split out subexpressions from adds and the bases of 3192203954Srdivacky/// addrecs. 3193203954Srdivackyvoid LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, 3194203954Srdivacky Formula Base, 3195203954Srdivacky unsigned Depth) { 3196203954Srdivacky // Arbitrarily cap recursion to protect compile time. 3197203954Srdivacky if (Depth >= 3) return; 3198203954Srdivacky 3199203954Srdivacky for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) { 3200203954Srdivacky const SCEV *BaseReg = Base.BaseRegs[i]; 3201203954Srdivacky 3202212904Sdim SmallVector<const SCEV *, 8> AddOps; 3203245431Sdim const SCEV *Remainder = CollectSubexprs(BaseReg, 0, AddOps, L, SE); 3204245431Sdim if (Remainder) 3205245431Sdim AddOps.push_back(Remainder); 3206210299Sed 3207203954Srdivacky if (AddOps.size() == 1) continue; 3208203954Srdivacky 3209203954Srdivacky for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(), 3210203954Srdivacky JE = AddOps.end(); J != JE; ++J) { 3211212904Sdim 3212212904Sdim // Loop-variant "unknown" values are uninteresting; we won't be able to 3213212904Sdim // do anything meaningful with them. 3214218893Sdim if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L)) 3215212904Sdim continue; 3216212904Sdim 3217203954Srdivacky // Don't pull a constant into a register if the constant could be folded 3218203954Srdivacky // into an immediate field. 3219252723Sdim if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind, 3220252723Sdim LU.AccessTy, *J, Base.getNumRegs() > 1)) 3221203954Srdivacky continue; 3222203954Srdivacky 3223203954Srdivacky // Collect all operands except *J. 3224210299Sed SmallVector<const SCEV *, 8> InnerAddOps 3225212904Sdim (((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J); 3226210299Sed InnerAddOps.append 3227212904Sdim (llvm::next(J), ((const SmallVector<const SCEV *, 8> &)AddOps).end()); 3228203954Srdivacky 3229203954Srdivacky // Don't leave just a constant behind in a register if the constant could 3230203954Srdivacky // be folded into an immediate field. 3231203954Srdivacky if (InnerAddOps.size() == 1 && 3232252723Sdim isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind, 3233252723Sdim LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1)) 3234203954Srdivacky continue; 3235203954Srdivacky 3236207618Srdivacky const SCEV *InnerSum = SE.getAddExpr(InnerAddOps); 3237207618Srdivacky if (InnerSum->isZero()) 3238207618Srdivacky continue; 3239203954Srdivacky Formula F = Base; 3240223017Sdim 3241223017Sdim // Add the remaining pieces of the add back into the new formula. 3242223017Sdim const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum); 3243252723Sdim if (InnerSumSC && 3244223017Sdim SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 && 3245252723Sdim TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset + 3246252723Sdim InnerSumSC->getValue()->getZExtValue())) { 3247223017Sdim F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset + 3248223017Sdim InnerSumSC->getValue()->getZExtValue(); 3249223017Sdim F.BaseRegs.erase(F.BaseRegs.begin() + i); 3250223017Sdim } else 3251223017Sdim F.BaseRegs[i] = InnerSum; 3252223017Sdim 3253223017Sdim // Add J as its own register, or an unfolded immediate. 3254223017Sdim const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J); 3255252723Sdim if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 && 3256252723Sdim TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset + 3257252723Sdim SC->getValue()->getZExtValue())) 3258223017Sdim F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset + 3259223017Sdim SC->getValue()->getZExtValue(); 3260223017Sdim else 3261223017Sdim F.BaseRegs.push_back(*J); 3262223017Sdim 3263203954Srdivacky if (InsertFormula(LU, LUIdx, F)) 3264203954Srdivacky // If that formula hadn't been seen before, recurse to find more like 3265203954Srdivacky // it. 3266203954Srdivacky GenerateReassociations(LU, LUIdx, LU.Formulae.back(), Depth+1); 3267203954Srdivacky } 3268199481Srdivacky } 3269199481Srdivacky} 3270199481Srdivacky 3271203954Srdivacky/// GenerateCombinations - Generate a formula consisting of all of the 3272203954Srdivacky/// loop-dominating registers added into a single register. 3273203954Srdivackyvoid LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx, 3274203954Srdivacky Formula Base) { 3275204642Srdivacky // This method is only interesting on a plurality of registers. 3276203954Srdivacky if (Base.BaseRegs.size() <= 1) return; 3277199481Srdivacky 3278203954Srdivacky Formula F = Base; 3279203954Srdivacky F.BaseRegs.clear(); 3280203954Srdivacky SmallVector<const SCEV *, 4> Ops; 3281203954Srdivacky for (SmallVectorImpl<const SCEV *>::const_iterator 3282203954Srdivacky I = Base.BaseRegs.begin(), E = Base.BaseRegs.end(); I != E; ++I) { 3283203954Srdivacky const SCEV *BaseReg = *I; 3284218893Sdim if (SE.properlyDominates(BaseReg, L->getHeader()) && 3285218893Sdim !SE.hasComputableLoopEvolution(BaseReg, L)) 3286203954Srdivacky Ops.push_back(BaseReg); 3287203954Srdivacky else 3288203954Srdivacky F.BaseRegs.push_back(BaseReg); 3289203954Srdivacky } 3290203954Srdivacky if (Ops.size() > 1) { 3291203954Srdivacky const SCEV *Sum = SE.getAddExpr(Ops); 3292203954Srdivacky // TODO: If Sum is zero, it probably means ScalarEvolution missed an 3293203954Srdivacky // opportunity to fold something. For now, just ignore such cases 3294204642Srdivacky // rather than proceed with zero in a register. 3295203954Srdivacky if (!Sum->isZero()) { 3296203954Srdivacky F.BaseRegs.push_back(Sum); 3297203954Srdivacky (void)InsertFormula(LU, LUIdx, F); 3298203954Srdivacky } 3299203954Srdivacky } 3300203954Srdivacky} 3301199481Srdivacky 3302203954Srdivacky/// GenerateSymbolicOffsets - Generate reuse formulae using symbolic offsets. 3303203954Srdivackyvoid LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, 3304203954Srdivacky Formula Base) { 3305203954Srdivacky // We can't add a symbolic offset if the address already contains one. 3306252723Sdim if (Base.BaseGV) return; 3307199481Srdivacky 3308203954Srdivacky for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) { 3309203954Srdivacky const SCEV *G = Base.BaseRegs[i]; 3310203954Srdivacky GlobalValue *GV = ExtractSymbol(G, SE); 3311203954Srdivacky if (G->isZero() || !GV) 3312203954Srdivacky continue; 3313203954Srdivacky Formula F = Base; 3314252723Sdim F.BaseGV = GV; 3315252723Sdim if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) 3316203954Srdivacky continue; 3317203954Srdivacky F.BaseRegs[i] = G; 3318203954Srdivacky (void)InsertFormula(LU, LUIdx, F); 3319203954Srdivacky } 3320203954Srdivacky} 3321199481Srdivacky 3322203954Srdivacky/// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets. 3323203954Srdivackyvoid LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, 3324203954Srdivacky Formula Base) { 3325203954Srdivacky // TODO: For now, just add the min and max offset, because it usually isn't 3326203954Srdivacky // worthwhile looking at everything inbetween. 3327210299Sed SmallVector<int64_t, 2> Worklist; 3328203954Srdivacky Worklist.push_back(LU.MinOffset); 3329203954Srdivacky if (LU.MaxOffset != LU.MinOffset) 3330203954Srdivacky Worklist.push_back(LU.MaxOffset); 3331199481Srdivacky 3332203954Srdivacky for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) { 3333203954Srdivacky const SCEV *G = Base.BaseRegs[i]; 3334199481Srdivacky 3335203954Srdivacky for (SmallVectorImpl<int64_t>::const_iterator I = Worklist.begin(), 3336203954Srdivacky E = Worklist.end(); I != E; ++I) { 3337203954Srdivacky Formula F = Base; 3338252723Sdim F.BaseOffset = (uint64_t)Base.BaseOffset - *I; 3339252723Sdim if (isLegalUse(TTI, LU.MinOffset - *I, LU.MaxOffset - *I, LU.Kind, 3340252723Sdim LU.AccessTy, F)) { 3341210299Sed // Add the offset to the base register. 3342212904Sdim const SCEV *NewG = SE.getAddExpr(SE.getConstant(G->getType(), *I), G); 3343210299Sed // If it cancelled out, drop the base register, otherwise update it. 3344210299Sed if (NewG->isZero()) { 3345210299Sed std::swap(F.BaseRegs[i], F.BaseRegs.back()); 3346210299Sed F.BaseRegs.pop_back(); 3347210299Sed } else 3348210299Sed F.BaseRegs[i] = NewG; 3349199481Srdivacky 3350203954Srdivacky (void)InsertFormula(LU, LUIdx, F); 3351203954Srdivacky } 3352203954Srdivacky } 3353203954Srdivacky 3354203954Srdivacky int64_t Imm = ExtractImmediate(G, SE); 3355203954Srdivacky if (G->isZero() || Imm == 0) 3356203954Srdivacky continue; 3357203954Srdivacky Formula F = Base; 3358252723Sdim F.BaseOffset = (uint64_t)F.BaseOffset + Imm; 3359252723Sdim if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) 3360203954Srdivacky continue; 3361203954Srdivacky F.BaseRegs[i] = G; 3362203954Srdivacky (void)InsertFormula(LU, LUIdx, F); 3363203954Srdivacky } 3364199481Srdivacky} 3365199481Srdivacky 3366203954Srdivacky/// GenerateICmpZeroScales - For ICmpZero, check to see if we can scale up 3367203954Srdivacky/// the comparison. For example, x == y -> x*c == y*c. 3368203954Srdivackyvoid LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, 3369203954Srdivacky Formula Base) { 3370203954Srdivacky if (LU.Kind != LSRUse::ICmpZero) return; 3371193323Sed 3372203954Srdivacky // Determine the integer type for the base formula. 3373226890Sdim Type *IntTy = Base.getType(); 3374203954Srdivacky if (!IntTy) return; 3375203954Srdivacky if (SE.getTypeSizeInBits(IntTy) > 64) return; 3376193323Sed 3377203954Srdivacky // Don't do this if there is more than one offset. 3378203954Srdivacky if (LU.MinOffset != LU.MaxOffset) return; 3379193323Sed 3380252723Sdim assert(!Base.BaseGV && "ICmpZero use is not legal!"); 3381203954Srdivacky 3382203954Srdivacky // Check each interesting stride. 3383203954Srdivacky for (SmallSetVector<int64_t, 8>::const_iterator 3384203954Srdivacky I = Factors.begin(), E = Factors.end(); I != E; ++I) { 3385203954Srdivacky int64_t Factor = *I; 3386203954Srdivacky 3387203954Srdivacky // Check that the multiplication doesn't overflow. 3388252723Sdim if (Base.BaseOffset == INT64_MIN && Factor == -1) 3389204642Srdivacky continue; 3390252723Sdim int64_t NewBaseOffset = (uint64_t)Base.BaseOffset * Factor; 3391252723Sdim if (NewBaseOffset / Factor != Base.BaseOffset) 3392199481Srdivacky continue; 3393266759Sdim // If the offset will be truncated at this use, check that it is in bounds. 3394266759Sdim if (!IntTy->isPointerTy() && 3395266759Sdim !ConstantInt::isValueValidForType(IntTy, NewBaseOffset)) 3396266759Sdim continue; 3397203954Srdivacky 3398203954Srdivacky // Check that multiplying with the use offset doesn't overflow. 3399203954Srdivacky int64_t Offset = LU.MinOffset; 3400204642Srdivacky if (Offset == INT64_MIN && Factor == -1) 3401204642Srdivacky continue; 3402203954Srdivacky Offset = (uint64_t)Offset * Factor; 3403204642Srdivacky if (Offset / Factor != LU.MinOffset) 3404199481Srdivacky continue; 3405266759Sdim // If the offset will be truncated at this use, check that it is in bounds. 3406266759Sdim if (!IntTy->isPointerTy() && 3407266759Sdim !ConstantInt::isValueValidForType(IntTy, Offset)) 3408266759Sdim continue; 3409193323Sed 3410210299Sed Formula F = Base; 3411252723Sdim F.BaseOffset = NewBaseOffset; 3412210299Sed 3413203954Srdivacky // Check that this scale is legal. 3414252723Sdim if (!isLegalUse(TTI, Offset, Offset, LU.Kind, LU.AccessTy, F)) 3415199481Srdivacky continue; 3416199481Srdivacky 3417203954Srdivacky // Compensate for the use having MinOffset built into it. 3418252723Sdim F.BaseOffset = (uint64_t)F.BaseOffset + Offset - LU.MinOffset; 3419203954Srdivacky 3420207618Srdivacky const SCEV *FactorS = SE.getConstant(IntTy, Factor); 3421203954Srdivacky 3422203954Srdivacky // Check that multiplying with each base register doesn't overflow. 3423203954Srdivacky for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) { 3424203954Srdivacky F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS); 3425204642Srdivacky if (getExactSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i]) 3426203954Srdivacky goto next; 3427203954Srdivacky } 3428203954Srdivacky 3429203954Srdivacky // Check that multiplying with the scaled register doesn't overflow. 3430203954Srdivacky if (F.ScaledReg) { 3431203954Srdivacky F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS); 3432204642Srdivacky if (getExactSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg) 3433203954Srdivacky continue; 3434203954Srdivacky } 3435203954Srdivacky 3436223017Sdim // Check that multiplying with the unfolded offset doesn't overflow. 3437223017Sdim if (F.UnfoldedOffset != 0) { 3438223017Sdim if (F.UnfoldedOffset == INT64_MIN && Factor == -1) 3439223017Sdim continue; 3440223017Sdim F.UnfoldedOffset = (uint64_t)F.UnfoldedOffset * Factor; 3441223017Sdim if (F.UnfoldedOffset / Factor != Base.UnfoldedOffset) 3442223017Sdim continue; 3443266759Sdim // If the offset will be truncated, check that it is in bounds. 3444266759Sdim if (!IntTy->isPointerTy() && 3445266759Sdim !ConstantInt::isValueValidForType(IntTy, F.UnfoldedOffset)) 3446266759Sdim continue; 3447223017Sdim } 3448223017Sdim 3449203954Srdivacky // If we make it here and it's legal, add it. 3450203954Srdivacky (void)InsertFormula(LU, LUIdx, F); 3451203954Srdivacky next:; 3452203954Srdivacky } 3453203954Srdivacky} 3454203954Srdivacky 3455203954Srdivacky/// GenerateScales - Generate stride factor reuse formulae by making use of 3456203954Srdivacky/// scaled-offset address modes, for example. 3457208599Srdivackyvoid LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) { 3458203954Srdivacky // Determine the integer type for the base formula. 3459226890Sdim Type *IntTy = Base.getType(); 3460203954Srdivacky if (!IntTy) return; 3461203954Srdivacky 3462203954Srdivacky // If this Formula already has a scaled register, we can't add another one. 3463252723Sdim if (Base.Scale != 0) return; 3464203954Srdivacky 3465203954Srdivacky // Check each interesting stride. 3466203954Srdivacky for (SmallSetVector<int64_t, 8>::const_iterator 3467203954Srdivacky I = Factors.begin(), E = Factors.end(); I != E; ++I) { 3468203954Srdivacky int64_t Factor = *I; 3469203954Srdivacky 3470252723Sdim Base.Scale = Factor; 3471252723Sdim Base.HasBaseReg = Base.BaseRegs.size() > 1; 3472203954Srdivacky // Check whether this scale is going to be legal. 3473252723Sdim if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, 3474252723Sdim Base)) { 3475203954Srdivacky // As a special-case, handle special out-of-loop Basic users specially. 3476203954Srdivacky // TODO: Reconsider this special case. 3477203954Srdivacky if (LU.Kind == LSRUse::Basic && 3478252723Sdim isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special, 3479252723Sdim LU.AccessTy, Base) && 3480203954Srdivacky LU.AllFixupsOutsideLoop) 3481203954Srdivacky LU.Kind = LSRUse::Special; 3482203954Srdivacky else 3483203954Srdivacky continue; 3484203954Srdivacky } 3485203954Srdivacky // For an ICmpZero, negating a solitary base register won't lead to 3486203954Srdivacky // new solutions. 3487203954Srdivacky if (LU.Kind == LSRUse::ICmpZero && 3488252723Sdim !Base.HasBaseReg && Base.BaseOffset == 0 && !Base.BaseGV) 3489203954Srdivacky continue; 3490203954Srdivacky // For each addrec base reg, apply the scale, if possible. 3491203954Srdivacky for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) 3492203954Srdivacky if (const SCEVAddRecExpr *AR = 3493203954Srdivacky dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i])) { 3494207618Srdivacky const SCEV *FactorS = SE.getConstant(IntTy, Factor); 3495203954Srdivacky if (FactorS->isZero()) 3496203954Srdivacky continue; 3497203954Srdivacky // Divide out the factor, ignoring high bits, since we'll be 3498203954Srdivacky // scaling the value back up in the end. 3499204642Srdivacky if (const SCEV *Quotient = getExactSDiv(AR, FactorS, SE, true)) { 3500203954Srdivacky // TODO: This could be optimized to avoid all the copying. 3501203954Srdivacky Formula F = Base; 3502203954Srdivacky F.ScaledReg = Quotient; 3503208599Srdivacky F.DeleteBaseReg(F.BaseRegs[i]); 3504203954Srdivacky (void)InsertFormula(LU, LUIdx, F); 3505199481Srdivacky } 3506193323Sed } 3507203954Srdivacky } 3508203954Srdivacky} 3509199481Srdivacky 3510203954Srdivacky/// GenerateTruncates - Generate reuse formulae from different IV types. 3511208599Srdivackyvoid LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) { 3512203954Srdivacky // Don't bother truncating symbolic values. 3513252723Sdim if (Base.BaseGV) return; 3514193323Sed 3515203954Srdivacky // Determine the integer type for the base formula. 3516226890Sdim Type *DstTy = Base.getType(); 3517203954Srdivacky if (!DstTy) return; 3518203954Srdivacky DstTy = SE.getEffectiveSCEVType(DstTy); 3519203954Srdivacky 3520226890Sdim for (SmallSetVector<Type *, 4>::const_iterator 3521203954Srdivacky I = Types.begin(), E = Types.end(); I != E; ++I) { 3522226890Sdim Type *SrcTy = *I; 3523252723Sdim if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) { 3524203954Srdivacky Formula F = Base; 3525203954Srdivacky 3526203954Srdivacky if (F.ScaledReg) F.ScaledReg = SE.getAnyExtendExpr(F.ScaledReg, *I); 3527203954Srdivacky for (SmallVectorImpl<const SCEV *>::iterator J = F.BaseRegs.begin(), 3528203954Srdivacky JE = F.BaseRegs.end(); J != JE; ++J) 3529203954Srdivacky *J = SE.getAnyExtendExpr(*J, SrcTy); 3530203954Srdivacky 3531203954Srdivacky // TODO: This assumes we've done basic processing on all uses and 3532203954Srdivacky // have an idea what the register usage is. 3533203954Srdivacky if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses)) 3534203954Srdivacky continue; 3535203954Srdivacky 3536203954Srdivacky (void)InsertFormula(LU, LUIdx, F); 3537199481Srdivacky } 3538203954Srdivacky } 3539203954Srdivacky} 3540193323Sed 3541203954Srdivackynamespace { 3542203954Srdivacky 3543203954Srdivacky/// WorkItem - Helper class for GenerateCrossUseConstantOffsets. It's used to 3544203954Srdivacky/// defer modifications so that the search phase doesn't have to worry about 3545203954Srdivacky/// the data structures moving underneath it. 3546203954Srdivackystruct WorkItem { 3547203954Srdivacky size_t LUIdx; 3548203954Srdivacky int64_t Imm; 3549203954Srdivacky const SCEV *OrigReg; 3550203954Srdivacky 3551203954Srdivacky WorkItem(size_t LI, int64_t I, const SCEV *R) 3552203954Srdivacky : LUIdx(LI), Imm(I), OrigReg(R) {} 3553203954Srdivacky 3554203954Srdivacky void print(raw_ostream &OS) const; 3555203954Srdivacky void dump() const; 3556203954Srdivacky}; 3557203954Srdivacky 3558203954Srdivacky} 3559203954Srdivacky 3560203954Srdivackyvoid WorkItem::print(raw_ostream &OS) const { 3561203954Srdivacky OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx 3562203954Srdivacky << " , add offset " << Imm; 3563203954Srdivacky} 3564203954Srdivacky 3565245431Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 3566203954Srdivackyvoid WorkItem::dump() const { 3567203954Srdivacky print(errs()); errs() << '\n'; 3568203954Srdivacky} 3569245431Sdim#endif 3570203954Srdivacky 3571203954Srdivacky/// GenerateCrossUseConstantOffsets - Look for registers which are a constant 3572203954Srdivacky/// distance apart and try to form reuse opportunities between them. 3573203954Srdivackyvoid LSRInstance::GenerateCrossUseConstantOffsets() { 3574203954Srdivacky // Group the registers by their value without any added constant offset. 3575203954Srdivacky typedef std::map<int64_t, const SCEV *> ImmMapTy; 3576203954Srdivacky typedef DenseMap<const SCEV *, ImmMapTy> RegMapTy; 3577203954Srdivacky RegMapTy Map; 3578203954Srdivacky DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap; 3579203954Srdivacky SmallVector<const SCEV *, 8> Sequence; 3580203954Srdivacky for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end(); 3581203954Srdivacky I != E; ++I) { 3582203954Srdivacky const SCEV *Reg = *I; 3583203954Srdivacky int64_t Imm = ExtractImmediate(Reg, SE); 3584203954Srdivacky std::pair<RegMapTy::iterator, bool> Pair = 3585203954Srdivacky Map.insert(std::make_pair(Reg, ImmMapTy())); 3586203954Srdivacky if (Pair.second) 3587203954Srdivacky Sequence.push_back(Reg); 3588203954Srdivacky Pair.first->second.insert(std::make_pair(Imm, *I)); 3589203954Srdivacky UsedByIndicesMap[Reg] |= RegUses.getUsedByIndices(*I); 3590203954Srdivacky } 3591203954Srdivacky 3592203954Srdivacky // Now examine each set of registers with the same base value. Build up 3593203954Srdivacky // a list of work to do and do the work in a separate step so that we're 3594203954Srdivacky // not adding formulae and register counts while we're searching. 3595203954Srdivacky SmallVector<WorkItem, 32> WorkItems; 3596203954Srdivacky SmallSet<std::pair<size_t, int64_t>, 32> UniqueItems; 3597203954Srdivacky for (SmallVectorImpl<const SCEV *>::const_iterator I = Sequence.begin(), 3598203954Srdivacky E = Sequence.end(); I != E; ++I) { 3599203954Srdivacky const SCEV *Reg = *I; 3600203954Srdivacky const ImmMapTy &Imms = Map.find(Reg)->second; 3601203954Srdivacky 3602203954Srdivacky // It's not worthwhile looking for reuse if there's only one offset. 3603203954Srdivacky if (Imms.size() == 1) 3604199481Srdivacky continue; 3605193323Sed 3606203954Srdivacky DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':'; 3607203954Srdivacky for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end(); 3608203954Srdivacky J != JE; ++J) 3609203954Srdivacky dbgs() << ' ' << J->first; 3610203954Srdivacky dbgs() << '\n'); 3611199481Srdivacky 3612203954Srdivacky // Examine each offset. 3613203954Srdivacky for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end(); 3614203954Srdivacky J != JE; ++J) { 3615203954Srdivacky const SCEV *OrigReg = J->second; 3616203954Srdivacky 3617203954Srdivacky int64_t JImm = J->first; 3618203954Srdivacky const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg); 3619203954Srdivacky 3620203954Srdivacky if (!isa<SCEVConstant>(OrigReg) && 3621203954Srdivacky UsedByIndicesMap[Reg].count() == 1) { 3622203954Srdivacky DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg << '\n'); 3623203954Srdivacky continue; 3624203954Srdivacky } 3625203954Srdivacky 3626203954Srdivacky // Conservatively examine offsets between this orig reg a few selected 3627203954Srdivacky // other orig regs. 3628203954Srdivacky ImmMapTy::const_iterator OtherImms[] = { 3629203954Srdivacky Imms.begin(), prior(Imms.end()), 3630223017Sdim Imms.lower_bound((Imms.begin()->first + prior(Imms.end())->first) / 2) 3631203954Srdivacky }; 3632203954Srdivacky for (size_t i = 0, e = array_lengthof(OtherImms); i != e; ++i) { 3633203954Srdivacky ImmMapTy::const_iterator M = OtherImms[i]; 3634203954Srdivacky if (M == J || M == JE) continue; 3635203954Srdivacky 3636203954Srdivacky // Compute the difference between the two. 3637203954Srdivacky int64_t Imm = (uint64_t)JImm - M->first; 3638203954Srdivacky for (int LUIdx = UsedByIndices.find_first(); LUIdx != -1; 3639203954Srdivacky LUIdx = UsedByIndices.find_next(LUIdx)) 3640203954Srdivacky // Make a memo of this use, offset, and register tuple. 3641203954Srdivacky if (UniqueItems.insert(std::make_pair(LUIdx, Imm))) 3642203954Srdivacky WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg)); 3643203954Srdivacky } 3644203954Srdivacky } 3645203954Srdivacky } 3646203954Srdivacky 3647203954Srdivacky Map.clear(); 3648203954Srdivacky Sequence.clear(); 3649203954Srdivacky UsedByIndicesMap.clear(); 3650203954Srdivacky UniqueItems.clear(); 3651203954Srdivacky 3652203954Srdivacky // Now iterate through the worklist and add new formulae. 3653203954Srdivacky for (SmallVectorImpl<WorkItem>::const_iterator I = WorkItems.begin(), 3654203954Srdivacky E = WorkItems.end(); I != E; ++I) { 3655203954Srdivacky const WorkItem &WI = *I; 3656203954Srdivacky size_t LUIdx = WI.LUIdx; 3657203954Srdivacky LSRUse &LU = Uses[LUIdx]; 3658203954Srdivacky int64_t Imm = WI.Imm; 3659203954Srdivacky const SCEV *OrigReg = WI.OrigReg; 3660203954Srdivacky 3661226890Sdim Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType()); 3662203954Srdivacky const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm)); 3663203954Srdivacky unsigned BitWidth = SE.getTypeSizeInBits(IntTy); 3664203954Srdivacky 3665204642Srdivacky // TODO: Use a more targeted data structure. 3666203954Srdivacky for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) { 3667208599Srdivacky const Formula &F = LU.Formulae[L]; 3668203954Srdivacky // Use the immediate in the scaled register. 3669203954Srdivacky if (F.ScaledReg == OrigReg) { 3670252723Sdim int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale; 3671203954Srdivacky // Don't create 50 + reg(-50). 3672203954Srdivacky if (F.referencesReg(SE.getSCEV( 3673252723Sdim ConstantInt::get(IntTy, -(uint64_t)Offset)))) 3674203954Srdivacky continue; 3675203954Srdivacky Formula NewF = F; 3676252723Sdim NewF.BaseOffset = Offset; 3677252723Sdim if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, 3678252723Sdim NewF)) 3679203954Srdivacky continue; 3680203954Srdivacky NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg); 3681203954Srdivacky 3682203954Srdivacky // If the new scale is a constant in a register, and adding the constant 3683203954Srdivacky // value to the immediate would produce a value closer to zero than the 3684203954Srdivacky // immediate itself, then the formula isn't worthwhile. 3685203954Srdivacky if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg)) 3686224145Sdim if (C->getValue()->isNegative() != 3687252723Sdim (NewF.BaseOffset < 0) && 3688252723Sdim (C->getValue()->getValue().abs() * APInt(BitWidth, F.Scale)) 3689252723Sdim .ule(abs64(NewF.BaseOffset))) 3690203954Srdivacky continue; 3691203954Srdivacky 3692203954Srdivacky // OK, looks good. 3693203954Srdivacky (void)InsertFormula(LU, LUIdx, NewF); 3694199481Srdivacky } else { 3695203954Srdivacky // Use the immediate in a base register. 3696203954Srdivacky for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) { 3697203954Srdivacky const SCEV *BaseReg = F.BaseRegs[N]; 3698203954Srdivacky if (BaseReg != OrigReg) 3699203954Srdivacky continue; 3700203954Srdivacky Formula NewF = F; 3701252723Sdim NewF.BaseOffset = (uint64_t)NewF.BaseOffset + Imm; 3702252723Sdim if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, 3703252723Sdim LU.Kind, LU.AccessTy, NewF)) { 3704252723Sdim if (!TTI.isLegalAddImmediate((uint64_t)NewF.UnfoldedOffset + Imm)) 3705223017Sdim continue; 3706223017Sdim NewF = F; 3707223017Sdim NewF.UnfoldedOffset = (uint64_t)NewF.UnfoldedOffset + Imm; 3708223017Sdim } 3709203954Srdivacky NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg); 3710199481Srdivacky 3711203954Srdivacky // If the new formula has a constant in a register, and adding the 3712203954Srdivacky // constant value to the immediate would produce a value closer to 3713203954Srdivacky // zero than the immediate itself, then the formula isn't worthwhile. 3714203954Srdivacky for (SmallVectorImpl<const SCEV *>::const_iterator 3715203954Srdivacky J = NewF.BaseRegs.begin(), JE = NewF.BaseRegs.end(); 3716203954Srdivacky J != JE; ++J) 3717203954Srdivacky if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*J)) 3718252723Sdim if ((C->getValue()->getValue() + NewF.BaseOffset).abs().slt( 3719252723Sdim abs64(NewF.BaseOffset)) && 3720208599Srdivacky (C->getValue()->getValue() + 3721252723Sdim NewF.BaseOffset).countTrailingZeros() >= 3722263509Sdim countTrailingZeros<uint64_t>(NewF.BaseOffset)) 3723203954Srdivacky goto skip_formula; 3724203954Srdivacky 3725203954Srdivacky // Ok, looks good. 3726203954Srdivacky (void)InsertFormula(LU, LUIdx, NewF); 3727203954Srdivacky break; 3728203954Srdivacky skip_formula:; 3729203954Srdivacky } 3730199481Srdivacky } 3731193323Sed } 3732203954Srdivacky } 3733203954Srdivacky} 3734193323Sed 3735203954Srdivacky/// GenerateAllReuseFormulae - Generate formulae for each use. 3736203954Srdivackyvoid 3737203954SrdivackyLSRInstance::GenerateAllReuseFormulae() { 3738203954Srdivacky // This is split into multiple loops so that hasRegsUsedByUsesOtherThan 3739203954Srdivacky // queries are more precise. 3740203954Srdivacky for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { 3741203954Srdivacky LSRUse &LU = Uses[LUIdx]; 3742203954Srdivacky for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) 3743203954Srdivacky GenerateReassociations(LU, LUIdx, LU.Formulae[i]); 3744203954Srdivacky for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) 3745203954Srdivacky GenerateCombinations(LU, LUIdx, LU.Formulae[i]); 3746203954Srdivacky } 3747203954Srdivacky for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { 3748203954Srdivacky LSRUse &LU = Uses[LUIdx]; 3749203954Srdivacky for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) 3750203954Srdivacky GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]); 3751203954Srdivacky for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) 3752203954Srdivacky GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]); 3753203954Srdivacky for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) 3754203954Srdivacky GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]); 3755203954Srdivacky for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) 3756203954Srdivacky GenerateScales(LU, LUIdx, LU.Formulae[i]); 3757203954Srdivacky } 3758203954Srdivacky for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { 3759203954Srdivacky LSRUse &LU = Uses[LUIdx]; 3760203954Srdivacky for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) 3761203954Srdivacky GenerateTruncates(LU, LUIdx, LU.Formulae[i]); 3762203954Srdivacky } 3763193323Sed 3764203954Srdivacky GenerateCrossUseConstantOffsets(); 3765212904Sdim 3766212904Sdim DEBUG(dbgs() << "\n" 3767212904Sdim "After generating reuse formulae:\n"; 3768212904Sdim print_uses(dbgs())); 3769203954Srdivacky} 3770203954Srdivacky 3771218893Sdim/// If there are multiple formulae with the same set of registers used 3772203954Srdivacky/// by other uses, pick the best one and delete the others. 3773203954Srdivackyvoid LSRInstance::FilterOutUndesirableDedicatedRegisters() { 3774218893Sdim DenseSet<const SCEV *> VisitedRegs; 3775218893Sdim SmallPtrSet<const SCEV *, 16> Regs; 3776235633Sdim SmallPtrSet<const SCEV *, 16> LoserRegs; 3777203954Srdivacky#ifndef NDEBUG 3778208599Srdivacky bool ChangedFormulae = false; 3779203954Srdivacky#endif 3780203954Srdivacky 3781203954Srdivacky // Collect the best formula for each unique set of shared registers. This 3782203954Srdivacky // is reset for each use. 3783252723Sdim typedef DenseMap<SmallVector<const SCEV *, 4>, size_t, UniquifierDenseMapInfo> 3784203954Srdivacky BestFormulaeTy; 3785203954Srdivacky BestFormulaeTy BestFormulae; 3786203954Srdivacky 3787203954Srdivacky for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { 3788203954Srdivacky LSRUse &LU = Uses[LUIdx]; 3789208599Srdivacky DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); dbgs() << '\n'); 3790203954Srdivacky 3791208599Srdivacky bool Any = false; 3792203954Srdivacky for (size_t FIdx = 0, NumForms = LU.Formulae.size(); 3793203954Srdivacky FIdx != NumForms; ++FIdx) { 3794203954Srdivacky Formula &F = LU.Formulae[FIdx]; 3795203954Srdivacky 3796235633Sdim // Some formulas are instant losers. For example, they may depend on 3797235633Sdim // nonexistent AddRecs from other loops. These need to be filtered 3798235633Sdim // immediately, otherwise heuristics could choose them over others leading 3799235633Sdim // to an unsatisfactory solution. Passing LoserRegs into RateFormula here 3800235633Sdim // avoids the need to recompute this information across formulae using the 3801235633Sdim // same bad AddRec. Passing LoserRegs is also essential unless we remove 3802235633Sdim // the corresponding bad register from the Regs set. 3803235633Sdim Cost CostF; 3804235633Sdim Regs.clear(); 3805263509Sdim CostF.RateFormula(TTI, F, Regs, VisitedRegs, L, LU.Offsets, SE, DT, LU, 3806235633Sdim &LoserRegs); 3807235633Sdim if (CostF.isLoser()) { 3808235633Sdim // During initial formula generation, undesirable formulae are generated 3809235633Sdim // by uses within other loops that have some non-trivial address mode or 3810235633Sdim // use the postinc form of the IV. LSR needs to provide these formulae 3811235633Sdim // as the basis of rediscovering the desired formula that uses an AddRec 3812235633Sdim // corresponding to the existing phi. Once all formulae have been 3813235633Sdim // generated, these initial losers may be pruned. 3814235633Sdim DEBUG(dbgs() << " Filtering loser "; F.print(dbgs()); 3815235633Sdim dbgs() << "\n"); 3816203954Srdivacky } 3817235633Sdim else { 3818252723Sdim SmallVector<const SCEV *, 4> Key; 3819235633Sdim for (SmallVectorImpl<const SCEV *>::const_iterator J = F.BaseRegs.begin(), 3820235633Sdim JE = F.BaseRegs.end(); J != JE; ++J) { 3821235633Sdim const SCEV *Reg = *J; 3822235633Sdim if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx)) 3823235633Sdim Key.push_back(Reg); 3824235633Sdim } 3825235633Sdim if (F.ScaledReg && 3826235633Sdim RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx)) 3827235633Sdim Key.push_back(F.ScaledReg); 3828235633Sdim // Unstable sort by host order ok, because this is only used for 3829235633Sdim // uniquifying. 3830235633Sdim std::sort(Key.begin(), Key.end()); 3831203954Srdivacky 3832235633Sdim std::pair<BestFormulaeTy::const_iterator, bool> P = 3833235633Sdim BestFormulae.insert(std::make_pair(Key, FIdx)); 3834235633Sdim if (P.second) 3835235633Sdim continue; 3836235633Sdim 3837203954Srdivacky Formula &Best = LU.Formulae[P.first->second]; 3838218893Sdim 3839235633Sdim Cost CostBest; 3840218893Sdim Regs.clear(); 3841263509Sdim CostBest.RateFormula(TTI, Best, Regs, VisitedRegs, L, LU.Offsets, SE, 3842263509Sdim DT, LU); 3843218893Sdim if (CostF < CostBest) 3844203954Srdivacky std::swap(F, Best); 3845208599Srdivacky DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs()); 3846203954Srdivacky dbgs() << "\n" 3847208599Srdivacky " in favor of formula "; Best.print(dbgs()); 3848203954Srdivacky dbgs() << '\n'); 3849235633Sdim } 3850203954Srdivacky#ifndef NDEBUG 3851235633Sdim ChangedFormulae = true; 3852203954Srdivacky#endif 3853235633Sdim LU.DeleteFormula(F); 3854235633Sdim --FIdx; 3855235633Sdim --NumForms; 3856235633Sdim Any = true; 3857203954Srdivacky } 3858208599Srdivacky 3859208599Srdivacky // Now that we've filtered out some formulae, recompute the Regs set. 3860208599Srdivacky if (Any) 3861208599Srdivacky LU.RecomputeRegs(LUIdx, RegUses); 3862208599Srdivacky 3863208599Srdivacky // Reset this to prepare for the next use. 3864203954Srdivacky BestFormulae.clear(); 3865199481Srdivacky } 3866203954Srdivacky 3867208599Srdivacky DEBUG(if (ChangedFormulae) { 3868203954Srdivacky dbgs() << "\n" 3869203954Srdivacky "After filtering out undesirable candidates:\n"; 3870203954Srdivacky print_uses(dbgs()); 3871203954Srdivacky }); 3872193323Sed} 3873193323Sed 3874208599Srdivacky// This is a rough guess that seems to work fairly well. 3875208599Srdivackystatic const size_t ComplexityLimit = UINT16_MAX; 3876208599Srdivacky 3877208599Srdivacky/// EstimateSearchSpaceComplexity - Estimate the worst-case number of 3878208599Srdivacky/// solutions the solver might have to consider. It almost never considers 3879208599Srdivacky/// this many solutions because it prune the search space, but the pruning 3880208599Srdivacky/// isn't always sufficient. 3881208599Srdivackysize_t LSRInstance::EstimateSearchSpaceComplexity() const { 3882218893Sdim size_t Power = 1; 3883208599Srdivacky for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(), 3884208599Srdivacky E = Uses.end(); I != E; ++I) { 3885208599Srdivacky size_t FSize = I->Formulae.size(); 3886208599Srdivacky if (FSize >= ComplexityLimit) { 3887208599Srdivacky Power = ComplexityLimit; 3888208599Srdivacky break; 3889208599Srdivacky } 3890208599Srdivacky Power *= FSize; 3891208599Srdivacky if (Power >= ComplexityLimit) 3892208599Srdivacky break; 3893208599Srdivacky } 3894208599Srdivacky return Power; 3895208599Srdivacky} 3896208599Srdivacky 3897212904Sdim/// NarrowSearchSpaceByDetectingSupersets - When one formula uses a superset 3898212904Sdim/// of the registers of another formula, it won't help reduce register 3899212904Sdim/// pressure (though it may not necessarily hurt register pressure); remove 3900212904Sdim/// it to simplify the system. 3901212904Sdimvoid LSRInstance::NarrowSearchSpaceByDetectingSupersets() { 3902208599Srdivacky if (EstimateSearchSpaceComplexity() >= ComplexityLimit) { 3903208599Srdivacky DEBUG(dbgs() << "The search space is too complex.\n"); 3904193323Sed 3905208599Srdivacky DEBUG(dbgs() << "Narrowing the search space by eliminating formulae " 3906208599Srdivacky "which use a superset of registers used by other " 3907208599Srdivacky "formulae.\n"); 3908208599Srdivacky 3909208599Srdivacky for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { 3910208599Srdivacky LSRUse &LU = Uses[LUIdx]; 3911208599Srdivacky bool Any = false; 3912208599Srdivacky for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) { 3913208599Srdivacky Formula &F = LU.Formulae[i]; 3914208599Srdivacky // Look for a formula with a constant or GV in a register. If the use 3915208599Srdivacky // also has a formula with that same value in an immediate field, 3916208599Srdivacky // delete the one that uses a register. 3917208599Srdivacky for (SmallVectorImpl<const SCEV *>::const_iterator 3918208599Srdivacky I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) { 3919208599Srdivacky if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) { 3920208599Srdivacky Formula NewF = F; 3921252723Sdim NewF.BaseOffset += C->getValue()->getSExtValue(); 3922208599Srdivacky NewF.BaseRegs.erase(NewF.BaseRegs.begin() + 3923208599Srdivacky (I - F.BaseRegs.begin())); 3924208599Srdivacky if (LU.HasFormulaWithSameRegs(NewF)) { 3925208599Srdivacky DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n'); 3926208599Srdivacky LU.DeleteFormula(F); 3927208599Srdivacky --i; 3928208599Srdivacky --e; 3929208599Srdivacky Any = true; 3930208599Srdivacky break; 3931208599Srdivacky } 3932208599Srdivacky } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) { 3933208599Srdivacky if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) 3934252723Sdim if (!F.BaseGV) { 3935208599Srdivacky Formula NewF = F; 3936252723Sdim NewF.BaseGV = GV; 3937208599Srdivacky NewF.BaseRegs.erase(NewF.BaseRegs.begin() + 3938208599Srdivacky (I - F.BaseRegs.begin())); 3939208599Srdivacky if (LU.HasFormulaWithSameRegs(NewF)) { 3940208599Srdivacky DEBUG(dbgs() << " Deleting "; F.print(dbgs()); 3941208599Srdivacky dbgs() << '\n'); 3942208599Srdivacky LU.DeleteFormula(F); 3943208599Srdivacky --i; 3944208599Srdivacky --e; 3945208599Srdivacky Any = true; 3946208599Srdivacky break; 3947208599Srdivacky } 3948208599Srdivacky } 3949208599Srdivacky } 3950208599Srdivacky } 3951203954Srdivacky } 3952208599Srdivacky if (Any) 3953208599Srdivacky LU.RecomputeRegs(LUIdx, RegUses); 3954203954Srdivacky } 3955193323Sed 3956208599Srdivacky DEBUG(dbgs() << "After pre-selection:\n"; 3957208599Srdivacky print_uses(dbgs())); 3958208599Srdivacky } 3959212904Sdim} 3960208599Srdivacky 3961212904Sdim/// NarrowSearchSpaceByCollapsingUnrolledCode - When there are many registers 3962212904Sdim/// for expressions like A, A+1, A+2, etc., allocate a single register for 3963212904Sdim/// them. 3964212904Sdimvoid LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { 3965252723Sdim if (EstimateSearchSpaceComplexity() < ComplexityLimit) 3966252723Sdim return; 3967208599Srdivacky 3968252723Sdim DEBUG(dbgs() << "The search space is too complex.\n" 3969252723Sdim "Narrowing the search space by assuming that uses separated " 3970252723Sdim "by a constant offset will use the same registers.\n"); 3971208599Srdivacky 3972252723Sdim // This is especially useful for unrolled loops. 3973208599Srdivacky 3974252723Sdim for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { 3975252723Sdim LSRUse &LU = Uses[LUIdx]; 3976252723Sdim for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(), 3977252723Sdim E = LU.Formulae.end(); I != E; ++I) { 3978252723Sdim const Formula &F = *I; 3979252723Sdim if (F.BaseOffset == 0 || F.Scale != 0) 3980252723Sdim continue; 3981208599Srdivacky 3982252723Sdim LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU); 3983252723Sdim if (!LUThatHas) 3984252723Sdim continue; 3985208599Srdivacky 3986252723Sdim if (!reconcileNewOffset(*LUThatHas, F.BaseOffset, /*HasBaseReg=*/ false, 3987252723Sdim LU.Kind, LU.AccessTy)) 3988252723Sdim continue; 3989218893Sdim 3990252723Sdim DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); dbgs() << '\n'); 3991208599Srdivacky 3992252723Sdim LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop; 3993252723Sdim 3994252723Sdim // Update the relocs to reference the new use. 3995252723Sdim for (SmallVectorImpl<LSRFixup>::iterator I = Fixups.begin(), 3996252723Sdim E = Fixups.end(); I != E; ++I) { 3997252723Sdim LSRFixup &Fixup = *I; 3998252723Sdim if (Fixup.LUIdx == LUIdx) { 3999252723Sdim Fixup.LUIdx = LUThatHas - &Uses.front(); 4000252723Sdim Fixup.Offset += F.BaseOffset; 4001252723Sdim // Add the new offset to LUThatHas' offset list. 4002252723Sdim if (LUThatHas->Offsets.back() != Fixup.Offset) { 4003252723Sdim LUThatHas->Offsets.push_back(Fixup.Offset); 4004252723Sdim if (Fixup.Offset > LUThatHas->MaxOffset) 4005252723Sdim LUThatHas->MaxOffset = Fixup.Offset; 4006252723Sdim if (Fixup.Offset < LUThatHas->MinOffset) 4007252723Sdim LUThatHas->MinOffset = Fixup.Offset; 4008208599Srdivacky } 4009252723Sdim DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n'); 4010208599Srdivacky } 4011252723Sdim if (Fixup.LUIdx == NumUses-1) 4012252723Sdim Fixup.LUIdx = LUIdx; 4013208599Srdivacky } 4014252723Sdim 4015252723Sdim // Delete formulae from the new use which are no longer legal. 4016252723Sdim bool Any = false; 4017252723Sdim for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) { 4018252723Sdim Formula &F = LUThatHas->Formulae[i]; 4019252723Sdim if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset, 4020252723Sdim LUThatHas->Kind, LUThatHas->AccessTy, F)) { 4021252723Sdim DEBUG(dbgs() << " Deleting "; F.print(dbgs()); 4022252723Sdim dbgs() << '\n'); 4023252723Sdim LUThatHas->DeleteFormula(F); 4024252723Sdim --i; 4025252723Sdim --e; 4026252723Sdim Any = true; 4027252723Sdim } 4028252723Sdim } 4029252723Sdim 4030252723Sdim if (Any) 4031252723Sdim LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses); 4032252723Sdim 4033252723Sdim // Delete the old use. 4034252723Sdim DeleteUse(LU, LUIdx); 4035252723Sdim --LUIdx; 4036252723Sdim --NumUses; 4037252723Sdim break; 4038208599Srdivacky } 4039252723Sdim } 4040208599Srdivacky 4041252723Sdim DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs())); 4042212904Sdim} 4043208599Srdivacky 4044221345Sdim/// NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters - Call 4045212904Sdim/// FilterOutUndesirableDedicatedRegisters again, if necessary, now that 4046212904Sdim/// we've done more filtering, as it may be able to find more formulae to 4047212904Sdim/// eliminate. 4048212904Sdimvoid LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){ 4049212904Sdim if (EstimateSearchSpaceComplexity() >= ComplexityLimit) { 4050212904Sdim DEBUG(dbgs() << "The search space is too complex.\n"); 4051212904Sdim 4052212904Sdim DEBUG(dbgs() << "Narrowing the search space by re-filtering out " 4053212904Sdim "undesirable dedicated registers.\n"); 4054212904Sdim 4055212904Sdim FilterOutUndesirableDedicatedRegisters(); 4056212904Sdim 4057212904Sdim DEBUG(dbgs() << "After pre-selection:\n"; 4058212904Sdim print_uses(dbgs())); 4059212904Sdim } 4060212904Sdim} 4061212904Sdim 4062212904Sdim/// NarrowSearchSpaceByPickingWinnerRegs - Pick a register which seems likely 4063212904Sdim/// to be profitable, and then in any use which has any reference to that 4064212904Sdim/// register, delete all formulae which do not reference that register. 4065212904Sdimvoid LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() { 4066208599Srdivacky // With all other options exhausted, loop until the system is simple 4067208599Srdivacky // enough to handle. 4068208599Srdivacky SmallPtrSet<const SCEV *, 4> Taken; 4069208599Srdivacky while (EstimateSearchSpaceComplexity() >= ComplexityLimit) { 4070203954Srdivacky // Ok, we have too many of formulae on our hands to conveniently handle. 4071203954Srdivacky // Use a rough heuristic to thin out the list. 4072208599Srdivacky DEBUG(dbgs() << "The search space is too complex.\n"); 4073193323Sed 4074203954Srdivacky // Pick the register which is used by the most LSRUses, which is likely 4075203954Srdivacky // to be a good reuse register candidate. 4076203954Srdivacky const SCEV *Best = 0; 4077203954Srdivacky unsigned BestNum = 0; 4078203954Srdivacky for (RegUseTracker::const_iterator I = RegUses.begin(), E = RegUses.end(); 4079203954Srdivacky I != E; ++I) { 4080203954Srdivacky const SCEV *Reg = *I; 4081203954Srdivacky if (Taken.count(Reg)) 4082203954Srdivacky continue; 4083203954Srdivacky if (!Best) 4084203954Srdivacky Best = Reg; 4085203954Srdivacky else { 4086203954Srdivacky unsigned Count = RegUses.getUsedByIndices(Reg).count(); 4087203954Srdivacky if (Count > BestNum) { 4088203954Srdivacky Best = Reg; 4089203954Srdivacky BestNum = Count; 4090203954Srdivacky } 4091203954Srdivacky } 4092203954Srdivacky } 4093203954Srdivacky 4094203954Srdivacky DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best 4095204642Srdivacky << " will yield profitable reuse.\n"); 4096203954Srdivacky Taken.insert(Best); 4097203954Srdivacky 4098203954Srdivacky // In any use with formulae which references this register, delete formulae 4099203954Srdivacky // which don't reference it. 4100208599Srdivacky for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { 4101208599Srdivacky LSRUse &LU = Uses[LUIdx]; 4102203954Srdivacky if (!LU.Regs.count(Best)) continue; 4103203954Srdivacky 4104208599Srdivacky bool Any = false; 4105203954Srdivacky for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) { 4106203954Srdivacky Formula &F = LU.Formulae[i]; 4107203954Srdivacky if (!F.referencesReg(Best)) { 4108203954Srdivacky DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n'); 4109208599Srdivacky LU.DeleteFormula(F); 4110203954Srdivacky --e; 4111203954Srdivacky --i; 4112208599Srdivacky Any = true; 4113208599Srdivacky assert(e != 0 && "Use has no formulae left! Is Regs inconsistent?"); 4114203954Srdivacky continue; 4115203954Srdivacky } 4116208599Srdivacky } 4117203954Srdivacky 4118208599Srdivacky if (Any) 4119208599Srdivacky LU.RecomputeRegs(LUIdx, RegUses); 4120203954Srdivacky } 4121203954Srdivacky 4122203954Srdivacky DEBUG(dbgs() << "After pre-selection:\n"; 4123203954Srdivacky print_uses(dbgs())); 4124193323Sed } 4125203954Srdivacky} 4126193323Sed 4127212904Sdim/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of 4128212904Sdim/// formulae to choose from, use some rough heuristics to prune down the number 4129212904Sdim/// of formulae. This keeps the main solver from taking an extraordinary amount 4130212904Sdim/// of time in some worst-case scenarios. 4131212904Sdimvoid LSRInstance::NarrowSearchSpaceUsingHeuristics() { 4132212904Sdim NarrowSearchSpaceByDetectingSupersets(); 4133212904Sdim NarrowSearchSpaceByCollapsingUnrolledCode(); 4134212904Sdim NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(); 4135212904Sdim NarrowSearchSpaceByPickingWinnerRegs(); 4136212904Sdim} 4137212904Sdim 4138203954Srdivacky/// SolveRecurse - This is the recursive solver. 4139203954Srdivackyvoid LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution, 4140203954Srdivacky Cost &SolutionCost, 4141203954Srdivacky SmallVectorImpl<const Formula *> &Workspace, 4142203954Srdivacky const Cost &CurCost, 4143203954Srdivacky const SmallPtrSet<const SCEV *, 16> &CurRegs, 4144203954Srdivacky DenseSet<const SCEV *> &VisitedRegs) const { 4145203954Srdivacky // Some ideas: 4146203954Srdivacky // - prune more: 4147203954Srdivacky // - use more aggressive filtering 4148203954Srdivacky // - sort the formula so that the most profitable solutions are found first 4149203954Srdivacky // - sort the uses too 4150203954Srdivacky // - search faster: 4151204642Srdivacky // - don't compute a cost, and then compare. compare while computing a cost 4152203954Srdivacky // and bail early. 4153203954Srdivacky // - track register sets with SmallBitVector 4154193323Sed 4155203954Srdivacky const LSRUse &LU = Uses[Workspace.size()]; 4156199481Srdivacky 4157203954Srdivacky // If this use references any register that's already a part of the 4158203954Srdivacky // in-progress solution, consider it a requirement that a formula must 4159203954Srdivacky // reference that register in order to be considered. This prunes out 4160203954Srdivacky // unprofitable searching. 4161203954Srdivacky SmallSetVector<const SCEV *, 4> ReqRegs; 4162203954Srdivacky for (SmallPtrSet<const SCEV *, 16>::const_iterator I = CurRegs.begin(), 4163203954Srdivacky E = CurRegs.end(); I != E; ++I) 4164203954Srdivacky if (LU.Regs.count(*I)) 4165203954Srdivacky ReqRegs.insert(*I); 4166193323Sed 4167203954Srdivacky SmallPtrSet<const SCEV *, 16> NewRegs; 4168203954Srdivacky Cost NewCost; 4169203954Srdivacky for (SmallVectorImpl<Formula>::const_iterator I = LU.Formulae.begin(), 4170203954Srdivacky E = LU.Formulae.end(); I != E; ++I) { 4171203954Srdivacky const Formula &F = *I; 4172203954Srdivacky 4173203954Srdivacky // Ignore formulae which do not use any of the required registers. 4174235633Sdim bool SatisfiedReqReg = true; 4175203954Srdivacky for (SmallSetVector<const SCEV *, 4>::const_iterator J = ReqRegs.begin(), 4176203954Srdivacky JE = ReqRegs.end(); J != JE; ++J) { 4177203954Srdivacky const SCEV *Reg = *J; 4178203954Srdivacky if ((!F.ScaledReg || F.ScaledReg != Reg) && 4179203954Srdivacky std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) == 4180235633Sdim F.BaseRegs.end()) { 4181235633Sdim SatisfiedReqReg = false; 4182235633Sdim break; 4183235633Sdim } 4184203954Srdivacky } 4185235633Sdim if (!SatisfiedReqReg) { 4186235633Sdim // If none of the formulae satisfied the required registers, then we could 4187235633Sdim // clear ReqRegs and try again. Currently, we simply give up in this case. 4188235633Sdim continue; 4189235633Sdim } 4190203954Srdivacky 4191203954Srdivacky // Evaluate the cost of the current formula. If it's already worse than 4192203954Srdivacky // the current best, prune the search at that point. 4193203954Srdivacky NewCost = CurCost; 4194203954Srdivacky NewRegs = CurRegs; 4195263509Sdim NewCost.RateFormula(TTI, F, NewRegs, VisitedRegs, L, LU.Offsets, SE, DT, 4196263509Sdim LU); 4197203954Srdivacky if (NewCost < SolutionCost) { 4198203954Srdivacky Workspace.push_back(&F); 4199203954Srdivacky if (Workspace.size() != Uses.size()) { 4200203954Srdivacky SolveRecurse(Solution, SolutionCost, Workspace, NewCost, 4201203954Srdivacky NewRegs, VisitedRegs); 4202203954Srdivacky if (F.getNumRegs() == 1 && Workspace.size() == 1) 4203203954Srdivacky VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]); 4204203954Srdivacky } else { 4205203954Srdivacky DEBUG(dbgs() << "New best at "; NewCost.print(dbgs()); 4206235633Sdim dbgs() << ".\n Regs:"; 4207203954Srdivacky for (SmallPtrSet<const SCEV *, 16>::const_iterator 4208203954Srdivacky I = NewRegs.begin(), E = NewRegs.end(); I != E; ++I) 4209203954Srdivacky dbgs() << ' ' << **I; 4210203954Srdivacky dbgs() << '\n'); 4211203954Srdivacky 4212203954Srdivacky SolutionCost = NewCost; 4213203954Srdivacky Solution = Workspace; 4214203954Srdivacky } 4215203954Srdivacky Workspace.pop_back(); 4216203954Srdivacky } 4217203954Srdivacky } 4218203954Srdivacky} 4219203954Srdivacky 4220208599Srdivacky/// Solve - Choose one formula from each use. Return the results in the given 4221208599Srdivacky/// Solution vector. 4222203954Srdivackyvoid LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const { 4223203954Srdivacky SmallVector<const Formula *, 8> Workspace; 4224203954Srdivacky Cost SolutionCost; 4225203954Srdivacky SolutionCost.Loose(); 4226203954Srdivacky Cost CurCost; 4227203954Srdivacky SmallPtrSet<const SCEV *, 16> CurRegs; 4228203954Srdivacky DenseSet<const SCEV *> VisitedRegs; 4229203954Srdivacky Workspace.reserve(Uses.size()); 4230203954Srdivacky 4231208599Srdivacky // SolveRecurse does all the work. 4232203954Srdivacky SolveRecurse(Solution, SolutionCost, Workspace, CurCost, 4233203954Srdivacky CurRegs, VisitedRegs); 4234226890Sdim if (Solution.empty()) { 4235226890Sdim DEBUG(dbgs() << "\nNo Satisfactory Solution\n"); 4236226890Sdim return; 4237226890Sdim } 4238203954Srdivacky 4239203954Srdivacky // Ok, we've now made all our decisions. 4240203954Srdivacky DEBUG(dbgs() << "\n" 4241203954Srdivacky "The chosen solution requires "; SolutionCost.print(dbgs()); 4242203954Srdivacky dbgs() << ":\n"; 4243203954Srdivacky for (size_t i = 0, e = Uses.size(); i != e; ++i) { 4244203954Srdivacky dbgs() << " "; 4245203954Srdivacky Uses[i].print(dbgs()); 4246203954Srdivacky dbgs() << "\n" 4247203954Srdivacky " "; 4248203954Srdivacky Solution[i]->print(dbgs()); 4249203954Srdivacky dbgs() << '\n'; 4250203954Srdivacky }); 4251203954Srdivacky 4252208599Srdivacky assert(Solution.size() == Uses.size() && "Malformed solution!"); 4253203954Srdivacky} 4254203954Srdivacky 4255207618Srdivacky/// HoistInsertPosition - Helper for AdjustInsertPositionForExpand. Climb up 4256207618Srdivacky/// the dominator tree far as we can go while still being dominated by the 4257207618Srdivacky/// input positions. This helps canonicalize the insert position, which 4258207618Srdivacky/// encourages sharing. 4259207618SrdivackyBasicBlock::iterator 4260207618SrdivackyLSRInstance::HoistInsertPosition(BasicBlock::iterator IP, 4261207618Srdivacky const SmallVectorImpl<Instruction *> &Inputs) 4262207618Srdivacky const { 4263207618Srdivacky for (;;) { 4264207618Srdivacky const Loop *IPLoop = LI.getLoopFor(IP->getParent()); 4265207618Srdivacky unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0; 4266203954Srdivacky 4267207618Srdivacky BasicBlock *IDom; 4268208599Srdivacky for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) { 4269208599Srdivacky if (!Rung) return IP; 4270208599Srdivacky Rung = Rung->getIDom(); 4271208599Srdivacky if (!Rung) return IP; 4272208599Srdivacky IDom = Rung->getBlock(); 4273203954Srdivacky 4274207618Srdivacky // Don't climb into a loop though. 4275207618Srdivacky const Loop *IDomLoop = LI.getLoopFor(IDom); 4276207618Srdivacky unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0; 4277207618Srdivacky if (IDomDepth <= IPLoopDepth && 4278207618Srdivacky (IDomDepth != IPLoopDepth || IDomLoop == IPLoop)) 4279207618Srdivacky break; 4280207618Srdivacky } 4281207618Srdivacky 4282203954Srdivacky bool AllDominate = true; 4283203954Srdivacky Instruction *BetterPos = 0; 4284203954Srdivacky Instruction *Tentative = IDom->getTerminator(); 4285203954Srdivacky for (SmallVectorImpl<Instruction *>::const_iterator I = Inputs.begin(), 4286203954Srdivacky E = Inputs.end(); I != E; ++I) { 4287203954Srdivacky Instruction *Inst = *I; 4288203954Srdivacky if (Inst == Tentative || !DT.dominates(Inst, Tentative)) { 4289203954Srdivacky AllDominate = false; 4290199481Srdivacky break; 4291199481Srdivacky } 4292207618Srdivacky // Attempt to find an insert position in the middle of the block, 4293207618Srdivacky // instead of at the end, so that it can be used for other expansions. 4294203954Srdivacky if (IDom == Inst->getParent() && 4295235633Sdim (!BetterPos || !DT.dominates(Inst, BetterPos))) 4296208599Srdivacky BetterPos = llvm::next(BasicBlock::iterator(Inst)); 4297203954Srdivacky } 4298203954Srdivacky if (!AllDominate) 4299203954Srdivacky break; 4300203954Srdivacky if (BetterPos) 4301203954Srdivacky IP = BetterPos; 4302203954Srdivacky else 4303203954Srdivacky IP = Tentative; 4304199481Srdivacky } 4305207618Srdivacky 4306207618Srdivacky return IP; 4307207618Srdivacky} 4308207618Srdivacky 4309207618Srdivacky/// AdjustInsertPositionForExpand - Determine an input position which will be 4310207618Srdivacky/// dominated by the operands and which will dominate the result. 4311207618SrdivackyBasicBlock::iterator 4312235633SdimLSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP, 4313207618Srdivacky const LSRFixup &LF, 4314235633Sdim const LSRUse &LU, 4315235633Sdim SCEVExpander &Rewriter) const { 4316207618Srdivacky // Collect some instructions which must be dominated by the 4317207618Srdivacky // expanding replacement. These must be dominated by any operands that 4318207618Srdivacky // will be required in the expansion. 4319207618Srdivacky SmallVector<Instruction *, 4> Inputs; 4320207618Srdivacky if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace)) 4321207618Srdivacky Inputs.push_back(I); 4322207618Srdivacky if (LU.Kind == LSRUse::ICmpZero) 4323207618Srdivacky if (Instruction *I = 4324207618Srdivacky dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1))) 4325207618Srdivacky Inputs.push_back(I); 4326207618Srdivacky if (LF.PostIncLoops.count(L)) { 4327207618Srdivacky if (LF.isUseFullyOutsideLoop(L)) 4328207618Srdivacky Inputs.push_back(L->getLoopLatch()->getTerminator()); 4329207618Srdivacky else 4330207618Srdivacky Inputs.push_back(IVIncInsertPos); 4331207618Srdivacky } 4332207618Srdivacky // The expansion must also be dominated by the increment positions of any 4333207618Srdivacky // loops it for which it is using post-inc mode. 4334207618Srdivacky for (PostIncLoopSet::const_iterator I = LF.PostIncLoops.begin(), 4335207618Srdivacky E = LF.PostIncLoops.end(); I != E; ++I) { 4336207618Srdivacky const Loop *PIL = *I; 4337207618Srdivacky if (PIL == L) continue; 4338207618Srdivacky 4339207618Srdivacky // Be dominated by the loop exit. 4340207618Srdivacky SmallVector<BasicBlock *, 4> ExitingBlocks; 4341207618Srdivacky PIL->getExitingBlocks(ExitingBlocks); 4342207618Srdivacky if (!ExitingBlocks.empty()) { 4343207618Srdivacky BasicBlock *BB = ExitingBlocks[0]; 4344207618Srdivacky for (unsigned i = 1, e = ExitingBlocks.size(); i != e; ++i) 4345207618Srdivacky BB = DT.findNearestCommonDominator(BB, ExitingBlocks[i]); 4346207618Srdivacky Inputs.push_back(BB->getTerminator()); 4347207618Srdivacky } 4348207618Srdivacky } 4349207618Srdivacky 4350235633Sdim assert(!isa<PHINode>(LowestIP) && !isa<LandingPadInst>(LowestIP) 4351235633Sdim && !isa<DbgInfoIntrinsic>(LowestIP) && 4352235633Sdim "Insertion point must be a normal instruction"); 4353235633Sdim 4354207618Srdivacky // Then, climb up the immediate dominator tree as far as we can go while 4355207618Srdivacky // still being dominated by the input positions. 4356235633Sdim BasicBlock::iterator IP = HoistInsertPosition(LowestIP, Inputs); 4357207618Srdivacky 4358207618Srdivacky // Don't insert instructions before PHI nodes. 4359203954Srdivacky while (isa<PHINode>(IP)) ++IP; 4360207618Srdivacky 4361226890Sdim // Ignore landingpad instructions. 4362226890Sdim while (isa<LandingPadInst>(IP)) ++IP; 4363226890Sdim 4364207618Srdivacky // Ignore debug intrinsics. 4365206083Srdivacky while (isa<DbgInfoIntrinsic>(IP)) ++IP; 4366199481Srdivacky 4367235633Sdim // Set IP below instructions recently inserted by SCEVExpander. This keeps the 4368235633Sdim // IP consistent across expansions and allows the previously inserted 4369235633Sdim // instructions to be reused by subsequent expansion. 4370235633Sdim while (Rewriter.isInsertedInstruction(IP) && IP != LowestIP) ++IP; 4371235633Sdim 4372207618Srdivacky return IP; 4373207618Srdivacky} 4374207618Srdivacky 4375208599Srdivacky/// Expand - Emit instructions for the leading candidate expression for this 4376208599Srdivacky/// LSRUse (this is called "expanding"). 4377207618SrdivackyValue *LSRInstance::Expand(const LSRFixup &LF, 4378207618Srdivacky const Formula &F, 4379207618Srdivacky BasicBlock::iterator IP, 4380207618Srdivacky SCEVExpander &Rewriter, 4381207618Srdivacky SmallVectorImpl<WeakVH> &DeadInsts) const { 4382207618Srdivacky const LSRUse &LU = Uses[LF.LUIdx]; 4383263509Sdim if (LU.RigidFormula) 4384263509Sdim return LF.OperandValToReplace; 4385207618Srdivacky 4386207618Srdivacky // Determine an input position which will be dominated by the operands and 4387207618Srdivacky // which will dominate the result. 4388235633Sdim IP = AdjustInsertPositionForExpand(IP, LF, LU, Rewriter); 4389207618Srdivacky 4390203954Srdivacky // Inform the Rewriter if we have a post-increment use, so that it can 4391203954Srdivacky // perform an advantageous expansion. 4392207618Srdivacky Rewriter.setPostInc(LF.PostIncLoops); 4393199481Srdivacky 4394203954Srdivacky // This is the type that the user actually needs. 4395226890Sdim Type *OpTy = LF.OperandValToReplace->getType(); 4396203954Srdivacky // This will be the type that we'll initially expand to. 4397226890Sdim Type *Ty = F.getType(); 4398203954Srdivacky if (!Ty) 4399203954Srdivacky // No type known; just expand directly to the ultimate type. 4400203954Srdivacky Ty = OpTy; 4401203954Srdivacky else if (SE.getEffectiveSCEVType(Ty) == SE.getEffectiveSCEVType(OpTy)) 4402203954Srdivacky // Expand directly to the ultimate type if it's the right size. 4403203954Srdivacky Ty = OpTy; 4404203954Srdivacky // This is the type to do integer arithmetic in. 4405226890Sdim Type *IntTy = SE.getEffectiveSCEVType(Ty); 4406199481Srdivacky 4407203954Srdivacky // Build up a list of operands to add together to form the full base. 4408203954Srdivacky SmallVector<const SCEV *, 8> Ops; 4409199481Srdivacky 4410203954Srdivacky // Expand the BaseRegs portion. 4411203954Srdivacky for (SmallVectorImpl<const SCEV *>::const_iterator I = F.BaseRegs.begin(), 4412203954Srdivacky E = F.BaseRegs.end(); I != E; ++I) { 4413203954Srdivacky const SCEV *Reg = *I; 4414203954Srdivacky assert(!Reg->isZero() && "Zero allocated in a base register!"); 4415203954Srdivacky 4416207618Srdivacky // If we're expanding for a post-inc user, make the post-inc adjustment. 4417207618Srdivacky PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops); 4418207618Srdivacky Reg = TransformForPostIncUse(Denormalize, Reg, 4419207618Srdivacky LF.UserInst, LF.OperandValToReplace, 4420207618Srdivacky Loops, SE, DT); 4421203954Srdivacky 4422203954Srdivacky Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP))); 4423203954Srdivacky } 4424203954Srdivacky 4425203954Srdivacky // Expand the ScaledReg portion. 4426203954Srdivacky Value *ICmpScaledV = 0; 4427252723Sdim if (F.Scale != 0) { 4428203954Srdivacky const SCEV *ScaledS = F.ScaledReg; 4429203954Srdivacky 4430207618Srdivacky // If we're expanding for a post-inc user, make the post-inc adjustment. 4431207618Srdivacky PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops); 4432207618Srdivacky ScaledS = TransformForPostIncUse(Denormalize, ScaledS, 4433207618Srdivacky LF.UserInst, LF.OperandValToReplace, 4434207618Srdivacky Loops, SE, DT); 4435203954Srdivacky 4436203954Srdivacky if (LU.Kind == LSRUse::ICmpZero) { 4437203954Srdivacky // An interesting way of "folding" with an icmp is to use a negated 4438203954Srdivacky // scale, which we'll implement by inserting it into the other operand 4439203954Srdivacky // of the icmp. 4440252723Sdim assert(F.Scale == -1 && 4441203954Srdivacky "The only scale supported by ICmpZero uses is -1!"); 4442203954Srdivacky ICmpScaledV = Rewriter.expandCodeFor(ScaledS, 0, IP); 4443203954Srdivacky } else { 4444203954Srdivacky // Otherwise just expand the scaled register and an explicit scale, 4445203954Srdivacky // which is expected to be matched as part of the address. 4446245431Sdim 4447245431Sdim // Flush the operand list to suppress SCEVExpander hoisting address modes. 4448245431Sdim if (!Ops.empty() && LU.Kind == LSRUse::Address) { 4449245431Sdim Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP); 4450245431Sdim Ops.clear(); 4451245431Sdim Ops.push_back(SE.getUnknown(FullV)); 4452245431Sdim } 4453203954Srdivacky ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, 0, IP)); 4454203954Srdivacky ScaledS = SE.getMulExpr(ScaledS, 4455252723Sdim SE.getConstant(ScaledS->getType(), F.Scale)); 4456203954Srdivacky Ops.push_back(ScaledS); 4457245431Sdim } 4458245431Sdim } 4459204642Srdivacky 4460245431Sdim // Expand the GV portion. 4461252723Sdim if (F.BaseGV) { 4462245431Sdim // Flush the operand list to suppress SCEVExpander hoisting. 4463245431Sdim if (!Ops.empty()) { 4464204642Srdivacky Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP); 4465204642Srdivacky Ops.clear(); 4466204642Srdivacky Ops.push_back(SE.getUnknown(FullV)); 4467203954Srdivacky } 4468252723Sdim Ops.push_back(SE.getUnknown(F.BaseGV)); 4469203954Srdivacky } 4470203954Srdivacky 4471245431Sdim // Flush the operand list to suppress SCEVExpander hoisting of both folded and 4472245431Sdim // unfolded offsets. LSR assumes they both live next to their uses. 4473245431Sdim if (!Ops.empty()) { 4474204642Srdivacky Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP); 4475204642Srdivacky Ops.clear(); 4476204642Srdivacky Ops.push_back(SE.getUnknown(FullV)); 4477204642Srdivacky } 4478204642Srdivacky 4479204642Srdivacky // Expand the immediate portion. 4480252723Sdim int64_t Offset = (uint64_t)F.BaseOffset + LF.Offset; 4481203954Srdivacky if (Offset != 0) { 4482203954Srdivacky if (LU.Kind == LSRUse::ICmpZero) { 4483203954Srdivacky // The other interesting way of "folding" with an ICmpZero is to use a 4484203954Srdivacky // negated immediate. 4485203954Srdivacky if (!ICmpScaledV) 4486226890Sdim ICmpScaledV = ConstantInt::get(IntTy, -(uint64_t)Offset); 4487203954Srdivacky else { 4488203954Srdivacky Ops.push_back(SE.getUnknown(ICmpScaledV)); 4489203954Srdivacky ICmpScaledV = ConstantInt::get(IntTy, Offset); 4490203954Srdivacky } 4491203954Srdivacky } else { 4492203954Srdivacky // Just add the immediate values. These again are expected to be matched 4493203954Srdivacky // as part of the address. 4494204642Srdivacky Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy, Offset))); 4495203954Srdivacky } 4496203954Srdivacky } 4497203954Srdivacky 4498223017Sdim // Expand the unfolded offset portion. 4499223017Sdim int64_t UnfoldedOffset = F.UnfoldedOffset; 4500223017Sdim if (UnfoldedOffset != 0) { 4501223017Sdim // Just add the immediate values. 4502223017Sdim Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy, 4503223017Sdim UnfoldedOffset))); 4504223017Sdim } 4505223017Sdim 4506203954Srdivacky // Emit instructions summing all the operands. 4507203954Srdivacky const SCEV *FullS = Ops.empty() ? 4508207618Srdivacky SE.getConstant(IntTy, 0) : 4509203954Srdivacky SE.getAddExpr(Ops); 4510203954Srdivacky Value *FullV = Rewriter.expandCodeFor(FullS, Ty, IP); 4511203954Srdivacky 4512203954Srdivacky // We're done expanding now, so reset the rewriter. 4513207618Srdivacky Rewriter.clearPostInc(); 4514203954Srdivacky 4515203954Srdivacky // An ICmpZero Formula represents an ICmp which we're handling as a 4516203954Srdivacky // comparison against zero. Now that we've expanded an expression for that 4517203954Srdivacky // form, update the ICmp's other operand. 4518203954Srdivacky if (LU.Kind == LSRUse::ICmpZero) { 4519203954Srdivacky ICmpInst *CI = cast<ICmpInst>(LF.UserInst); 4520203954Srdivacky DeadInsts.push_back(CI->getOperand(1)); 4521252723Sdim assert(!F.BaseGV && "ICmp does not support folding a global value and " 4522203954Srdivacky "a scale at the same time!"); 4523252723Sdim if (F.Scale == -1) { 4524203954Srdivacky if (ICmpScaledV->getType() != OpTy) { 4525203954Srdivacky Instruction *Cast = 4526203954Srdivacky CastInst::Create(CastInst::getCastOpcode(ICmpScaledV, false, 4527203954Srdivacky OpTy, false), 4528203954Srdivacky ICmpScaledV, OpTy, "tmp", CI); 4529203954Srdivacky ICmpScaledV = Cast; 4530203954Srdivacky } 4531203954Srdivacky CI->setOperand(1, ICmpScaledV); 4532203954Srdivacky } else { 4533252723Sdim assert(F.Scale == 0 && 4534203954Srdivacky "ICmp does not support folding a global value and " 4535203954Srdivacky "a scale at the same time!"); 4536203954Srdivacky Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy), 4537203954Srdivacky -(uint64_t)Offset); 4538203954Srdivacky if (C->getType() != OpTy) 4539203954Srdivacky C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, 4540203954Srdivacky OpTy, false), 4541203954Srdivacky C, OpTy); 4542203954Srdivacky 4543203954Srdivacky CI->setOperand(1, C); 4544203954Srdivacky } 4545203954Srdivacky } 4546203954Srdivacky 4547203954Srdivacky return FullV; 4548193323Sed} 4549193323Sed 4550204642Srdivacky/// RewriteForPHI - Helper for Rewrite. PHI nodes are special because the use 4551204642Srdivacky/// of their operands effectively happens in their predecessor blocks, so the 4552204642Srdivacky/// expression may need to be expanded in multiple places. 4553204642Srdivackyvoid LSRInstance::RewriteForPHI(PHINode *PN, 4554204642Srdivacky const LSRFixup &LF, 4555204642Srdivacky const Formula &F, 4556204642Srdivacky SCEVExpander &Rewriter, 4557204642Srdivacky SmallVectorImpl<WeakVH> &DeadInsts, 4558204642Srdivacky Pass *P) const { 4559204642Srdivacky DenseMap<BasicBlock *, Value *> Inserted; 4560204642Srdivacky for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 4561204642Srdivacky if (PN->getIncomingValue(i) == LF.OperandValToReplace) { 4562204642Srdivacky BasicBlock *BB = PN->getIncomingBlock(i); 4563204642Srdivacky 4564204642Srdivacky // If this is a critical edge, split the edge so that we do not insert 4565204642Srdivacky // the code on all predecessor/successor paths. We do this unless this 4566204642Srdivacky // is the canonical backedge for this loop, which complicates post-inc 4567204642Srdivacky // users. 4568204642Srdivacky if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 && 4569218893Sdim !isa<IndirectBrInst>(BB->getTerminator())) { 4570226890Sdim BasicBlock *Parent = PN->getParent(); 4571226890Sdim Loop *PNLoop = LI.getLoopFor(Parent); 4572226890Sdim if (!PNLoop || Parent != PNLoop->getHeader()) { 4573218893Sdim // Split the critical edge. 4574226890Sdim BasicBlock *NewBB = 0; 4575226890Sdim if (!Parent->isLandingPad()) { 4576226890Sdim NewBB = SplitCriticalEdge(BB, Parent, P, 4577226890Sdim /*MergeIdenticalEdges=*/true, 4578226890Sdim /*DontDeleteUselessPhis=*/true); 4579226890Sdim } else { 4580226890Sdim SmallVector<BasicBlock*, 2> NewBBs; 4581226890Sdim SplitLandingPadPredecessors(Parent, BB, "", "", P, NewBBs); 4582226890Sdim NewBB = NewBBs[0]; 4583226890Sdim } 4584245431Sdim // If NewBB==NULL, then SplitCriticalEdge refused to split because all 4585245431Sdim // phi predecessors are identical. The simple thing to do is skip 4586245431Sdim // splitting in this case rather than complicate the API. 4587245431Sdim if (NewBB) { 4588245431Sdim // If PN is outside of the loop and BB is in the loop, we want to 4589245431Sdim // move the block to be immediately before the PHI block, not 4590245431Sdim // immediately after BB. 4591245431Sdim if (L->contains(BB) && !L->contains(PN)) 4592245431Sdim NewBB->moveBefore(PN->getParent()); 4593204642Srdivacky 4594245431Sdim // Splitting the edge can reduce the number of PHI entries we have. 4595245431Sdim e = PN->getNumIncomingValues(); 4596245431Sdim BB = NewBB; 4597245431Sdim i = PN->getBasicBlockIndex(BB); 4598245431Sdim } 4599218893Sdim } 4600204642Srdivacky } 4601204642Srdivacky 4602204642Srdivacky std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair = 4603204642Srdivacky Inserted.insert(std::make_pair(BB, static_cast<Value *>(0))); 4604204642Srdivacky if (!Pair.second) 4605204642Srdivacky PN->setIncomingValue(i, Pair.first->second); 4606204642Srdivacky else { 4607204642Srdivacky Value *FullV = Expand(LF, F, BB->getTerminator(), Rewriter, DeadInsts); 4608204642Srdivacky 4609204642Srdivacky // If this is reuse-by-noop-cast, insert the noop cast. 4610226890Sdim Type *OpTy = LF.OperandValToReplace->getType(); 4611204642Srdivacky if (FullV->getType() != OpTy) 4612204642Srdivacky FullV = 4613204642Srdivacky CastInst::Create(CastInst::getCastOpcode(FullV, false, 4614204642Srdivacky OpTy, false), 4615204642Srdivacky FullV, LF.OperandValToReplace->getType(), 4616204642Srdivacky "tmp", BB->getTerminator()); 4617204642Srdivacky 4618204642Srdivacky PN->setIncomingValue(i, FullV); 4619204642Srdivacky Pair.first->second = FullV; 4620204642Srdivacky } 4621204642Srdivacky } 4622204642Srdivacky} 4623204642Srdivacky 4624203954Srdivacky/// Rewrite - Emit instructions for the leading candidate expression for this 4625203954Srdivacky/// LSRUse (this is called "expanding"), and update the UserInst to reference 4626203954Srdivacky/// the newly expanded value. 4627203954Srdivackyvoid LSRInstance::Rewrite(const LSRFixup &LF, 4628203954Srdivacky const Formula &F, 4629203954Srdivacky SCEVExpander &Rewriter, 4630203954Srdivacky SmallVectorImpl<WeakVH> &DeadInsts, 4631203954Srdivacky Pass *P) const { 4632203954Srdivacky // First, find an insertion point that dominates UserInst. For PHI nodes, 4633203954Srdivacky // find the nearest block which dominates all the relevant uses. 4634203954Srdivacky if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) { 4635204642Srdivacky RewriteForPHI(PN, LF, F, Rewriter, DeadInsts, P); 4636203954Srdivacky } else { 4637204642Srdivacky Value *FullV = Expand(LF, F, LF.UserInst, Rewriter, DeadInsts); 4638203954Srdivacky 4639203954Srdivacky // If this is reuse-by-noop-cast, insert the noop cast. 4640226890Sdim Type *OpTy = LF.OperandValToReplace->getType(); 4641203954Srdivacky if (FullV->getType() != OpTy) { 4642203954Srdivacky Instruction *Cast = 4643203954Srdivacky CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false), 4644203954Srdivacky FullV, OpTy, "tmp", LF.UserInst); 4645203954Srdivacky FullV = Cast; 4646199481Srdivacky } 4647203954Srdivacky 4648203954Srdivacky // Update the user. ICmpZero is handled specially here (for now) because 4649203954Srdivacky // Expand may have updated one of the operands of the icmp already, and 4650203954Srdivacky // its new value may happen to be equal to LF.OperandValToReplace, in 4651203954Srdivacky // which case doing replaceUsesOfWith leads to replacing both operands 4652203954Srdivacky // with the same value. TODO: Reorganize this. 4653203954Srdivacky if (Uses[LF.LUIdx].Kind == LSRUse::ICmpZero) 4654203954Srdivacky LF.UserInst->setOperand(0, FullV); 4655203954Srdivacky else 4656203954Srdivacky LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV); 4657199481Srdivacky } 4658199481Srdivacky 4659203954Srdivacky DeadInsts.push_back(LF.OperandValToReplace); 4660199481Srdivacky} 4661199481Srdivacky 4662208599Srdivacky/// ImplementSolution - Rewrite all the fixup locations with new values, 4663208599Srdivacky/// following the chosen solution. 4664203954Srdivackyvoid 4665203954SrdivackyLSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution, 4666203954Srdivacky Pass *P) { 4667203954Srdivacky // Keep track of instructions we may have made dead, so that 4668203954Srdivacky // we can remove them after we are done working. 4669203954Srdivacky SmallVector<WeakVH, 16> DeadInsts; 4670193323Sed 4671224145Sdim SCEVExpander Rewriter(SE, "lsr"); 4672235633Sdim#ifndef NDEBUG 4673235633Sdim Rewriter.setDebugType(DEBUG_TYPE); 4674235633Sdim#endif 4675203954Srdivacky Rewriter.disableCanonicalMode(); 4676226890Sdim Rewriter.enableLSRMode(); 4677203954Srdivacky Rewriter.setIVIncInsertPos(L, IVIncInsertPos); 4678203954Srdivacky 4679235633Sdim // Mark phi nodes that terminate chains so the expander tries to reuse them. 4680235633Sdim for (SmallVectorImpl<IVChain>::const_iterator ChainI = IVChainVec.begin(), 4681235633Sdim ChainE = IVChainVec.end(); ChainI != ChainE; ++ChainI) { 4682245431Sdim if (PHINode *PN = dyn_cast<PHINode>(ChainI->tailUserInst())) 4683235633Sdim Rewriter.setChainedPhi(PN); 4684235633Sdim } 4685235633Sdim 4686203954Srdivacky // Expand the new value definitions and update the users. 4687208599Srdivacky for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(), 4688208599Srdivacky E = Fixups.end(); I != E; ++I) { 4689208599Srdivacky const LSRFixup &Fixup = *I; 4690203954Srdivacky 4691208599Srdivacky Rewrite(Fixup, *Solution[Fixup.LUIdx], Rewriter, DeadInsts, P); 4692203954Srdivacky 4693203954Srdivacky Changed = true; 4694203954Srdivacky } 4695203954Srdivacky 4696235633Sdim for (SmallVectorImpl<IVChain>::const_iterator ChainI = IVChainVec.begin(), 4697235633Sdim ChainE = IVChainVec.end(); ChainI != ChainE; ++ChainI) { 4698235633Sdim GenerateIVChain(*ChainI, Rewriter, DeadInsts); 4699235633Sdim Changed = true; 4700235633Sdim } 4701203954Srdivacky // Clean up after ourselves. This must be done before deleting any 4702203954Srdivacky // instructions. 4703203954Srdivacky Rewriter.clear(); 4704203954Srdivacky 4705203954Srdivacky Changed |= DeleteTriviallyDeadInstructions(DeadInsts); 4706203954Srdivacky} 4707203954Srdivacky 4708252723SdimLSRInstance::LSRInstance(Loop *L, Pass *P) 4709252723Sdim : IU(P->getAnalysis<IVUsers>()), SE(P->getAnalysis<ScalarEvolution>()), 4710252723Sdim DT(P->getAnalysis<DominatorTree>()), LI(P->getAnalysis<LoopInfo>()), 4711252723Sdim TTI(P->getAnalysis<TargetTransformInfo>()), L(L), Changed(false), 4712252723Sdim IVIncInsertPos(0) { 4713199481Srdivacky // If LoopSimplify form is not available, stay out of trouble. 4714235633Sdim if (!L->isLoopSimplifyForm()) 4715235633Sdim return; 4716199481Srdivacky 4717203954Srdivacky // If there's no interesting work to be done, bail early. 4718203954Srdivacky if (IU.empty()) return; 4719193323Sed 4720235633Sdim // If there's too much analysis to be done, bail early. We won't be able to 4721235633Sdim // model the problem anyway. 4722235633Sdim unsigned NumUsers = 0; 4723235633Sdim for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) { 4724235633Sdim if (++NumUsers > MaxIVUsers) { 4725235633Sdim DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << *L 4726235633Sdim << "\n"); 4727235633Sdim return; 4728235633Sdim } 4729235633Sdim } 4730235633Sdim 4731235633Sdim#ifndef NDEBUG 4732235633Sdim // All dominating loops must have preheaders, or SCEVExpander may not be able 4733235633Sdim // to materialize an AddRecExpr whose Start is an outer AddRecExpr. 4734235633Sdim // 4735235633Sdim // IVUsers analysis should only create users that are dominated by simple loop 4736235633Sdim // headers. Since this loop should dominate all of its users, its user list 4737235633Sdim // should be empty if this loop itself is not within a simple loop nest. 4738235633Sdim for (DomTreeNode *Rung = DT.getNode(L->getLoopPreheader()); 4739235633Sdim Rung; Rung = Rung->getIDom()) { 4740235633Sdim BasicBlock *BB = Rung->getBlock(); 4741235633Sdim const Loop *DomLoop = LI.getLoopFor(BB); 4742235633Sdim if (DomLoop && DomLoop->getHeader() == BB) { 4743235633Sdim assert(DomLoop->getLoopPreheader() && "LSR needs a simplified loop nest"); 4744235633Sdim } 4745235633Sdim } 4746235633Sdim#endif // DEBUG 4747235633Sdim 4748203954Srdivacky DEBUG(dbgs() << "\nLSR on loop "; 4749203954Srdivacky WriteAsOperand(dbgs(), L->getHeader(), /*PrintType=*/false); 4750203954Srdivacky dbgs() << ":\n"); 4751193323Sed 4752208599Srdivacky // First, perform some low-level loop optimizations. 4753203954Srdivacky OptimizeShadowIV(); 4754208599Srdivacky OptimizeLoopTermCond(); 4755193323Sed 4756226890Sdim // If loop preparation eliminates all interesting IV users, bail. 4757226890Sdim if (IU.empty()) return; 4758226890Sdim 4759226890Sdim // Skip nested loops until we can model them better with formulae. 4760235633Sdim if (!L->empty()) { 4761226890Sdim DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n"); 4762226890Sdim return; 4763226890Sdim } 4764226890Sdim 4765208599Srdivacky // Start collecting data and preparing for the solver. 4766235633Sdim CollectChains(); 4767203954Srdivacky CollectInterestingTypesAndFactors(); 4768203954Srdivacky CollectFixupsAndInitialFormulae(); 4769203954Srdivacky CollectLoopInvariantFixupsAndFormulae(); 4770193323Sed 4771235633Sdim assert(!Uses.empty() && "IVUsers reported at least one use"); 4772203954Srdivacky DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n"; 4773203954Srdivacky print_uses(dbgs())); 4774193323Sed 4775203954Srdivacky // Now use the reuse data to generate a bunch of interesting ways 4776203954Srdivacky // to formulate the values needed for the uses. 4777203954Srdivacky GenerateAllReuseFormulae(); 4778193323Sed 4779203954Srdivacky FilterOutUndesirableDedicatedRegisters(); 4780203954Srdivacky NarrowSearchSpaceUsingHeuristics(); 4781193323Sed 4782203954Srdivacky SmallVector<const Formula *, 8> Solution; 4783203954Srdivacky Solve(Solution); 4784193323Sed 4785203954Srdivacky // Release memory that is no longer needed. 4786203954Srdivacky Factors.clear(); 4787203954Srdivacky Types.clear(); 4788203954Srdivacky RegUses.clear(); 4789203954Srdivacky 4790226890Sdim if (Solution.empty()) 4791226890Sdim return; 4792226890Sdim 4793203954Srdivacky#ifndef NDEBUG 4794203954Srdivacky // Formulae should be legal. 4795252723Sdim for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(), E = Uses.end(); 4796252723Sdim I != E; ++I) { 4797252723Sdim const LSRUse &LU = *I; 4798252723Sdim for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(), 4799252723Sdim JE = LU.Formulae.end(); 4800252723Sdim J != JE; ++J) 4801252723Sdim assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, 4802252723Sdim *J) && "Illegal formula generated!"); 4803203954Srdivacky }; 4804203954Srdivacky#endif 4805203954Srdivacky 4806203954Srdivacky // Now that we've decided what we want, make it so. 4807203954Srdivacky ImplementSolution(Solution, P); 4808203954Srdivacky} 4809203954Srdivacky 4810203954Srdivackyvoid LSRInstance::print_factors_and_types(raw_ostream &OS) const { 4811203954Srdivacky if (Factors.empty() && Types.empty()) return; 4812203954Srdivacky 4813203954Srdivacky OS << "LSR has identified the following interesting factors and types: "; 4814203954Srdivacky bool First = true; 4815203954Srdivacky 4816203954Srdivacky for (SmallSetVector<int64_t, 8>::const_iterator 4817203954Srdivacky I = Factors.begin(), E = Factors.end(); I != E; ++I) { 4818203954Srdivacky if (!First) OS << ", "; 4819203954Srdivacky First = false; 4820203954Srdivacky OS << '*' << *I; 4821200581Srdivacky } 4822193323Sed 4823226890Sdim for (SmallSetVector<Type *, 4>::const_iterator 4824203954Srdivacky I = Types.begin(), E = Types.end(); I != E; ++I) { 4825203954Srdivacky if (!First) OS << ", "; 4826203954Srdivacky First = false; 4827203954Srdivacky OS << '(' << **I << ')'; 4828203954Srdivacky } 4829203954Srdivacky OS << '\n'; 4830203954Srdivacky} 4831203954Srdivacky 4832203954Srdivackyvoid LSRInstance::print_fixups(raw_ostream &OS) const { 4833203954Srdivacky OS << "LSR is examining the following fixup sites:\n"; 4834203954Srdivacky for (SmallVectorImpl<LSRFixup>::const_iterator I = Fixups.begin(), 4835203954Srdivacky E = Fixups.end(); I != E; ++I) { 4836203954Srdivacky dbgs() << " "; 4837208599Srdivacky I->print(OS); 4838203954Srdivacky OS << '\n'; 4839203954Srdivacky } 4840203954Srdivacky} 4841203954Srdivacky 4842203954Srdivackyvoid LSRInstance::print_uses(raw_ostream &OS) const { 4843203954Srdivacky OS << "LSR is examining the following uses:\n"; 4844203954Srdivacky for (SmallVectorImpl<LSRUse>::const_iterator I = Uses.begin(), 4845203954Srdivacky E = Uses.end(); I != E; ++I) { 4846203954Srdivacky const LSRUse &LU = *I; 4847203954Srdivacky dbgs() << " "; 4848203954Srdivacky LU.print(OS); 4849203954Srdivacky OS << '\n'; 4850203954Srdivacky for (SmallVectorImpl<Formula>::const_iterator J = LU.Formulae.begin(), 4851203954Srdivacky JE = LU.Formulae.end(); J != JE; ++J) { 4852203954Srdivacky OS << " "; 4853203954Srdivacky J->print(OS); 4854203954Srdivacky OS << '\n'; 4855203954Srdivacky } 4856203954Srdivacky } 4857203954Srdivacky} 4858203954Srdivacky 4859203954Srdivackyvoid LSRInstance::print(raw_ostream &OS) const { 4860203954Srdivacky print_factors_and_types(OS); 4861203954Srdivacky print_fixups(OS); 4862203954Srdivacky print_uses(OS); 4863203954Srdivacky} 4864203954Srdivacky 4865245431Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 4866203954Srdivackyvoid LSRInstance::dump() const { 4867203954Srdivacky print(errs()); errs() << '\n'; 4868203954Srdivacky} 4869245431Sdim#endif 4870203954Srdivacky 4871203954Srdivackynamespace { 4872203954Srdivacky 4873203954Srdivackyclass LoopStrengthReduce : public LoopPass { 4874203954Srdivackypublic: 4875203954Srdivacky static char ID; // Pass ID, replacement for typeid 4876252723Sdim LoopStrengthReduce(); 4877203954Srdivacky 4878203954Srdivackyprivate: 4879203954Srdivacky bool runOnLoop(Loop *L, LPPassManager &LPM); 4880203954Srdivacky void getAnalysisUsage(AnalysisUsage &AU) const; 4881203954Srdivacky}; 4882203954Srdivacky 4883203954Srdivacky} 4884203954Srdivacky 4885203954Srdivackychar LoopStrengthReduce::ID = 0; 4886218893SdimINITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce", 4887218893Sdim "Loop Strength Reduction", false, false) 4888252723SdimINITIALIZE_AG_DEPENDENCY(TargetTransformInfo) 4889218893SdimINITIALIZE_PASS_DEPENDENCY(DominatorTree) 4890218893SdimINITIALIZE_PASS_DEPENDENCY(ScalarEvolution) 4891218893SdimINITIALIZE_PASS_DEPENDENCY(IVUsers) 4892218893SdimINITIALIZE_PASS_DEPENDENCY(LoopInfo) 4893218893SdimINITIALIZE_PASS_DEPENDENCY(LoopSimplify) 4894218893SdimINITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce", 4895218893Sdim "Loop Strength Reduction", false, false) 4896203954Srdivacky 4897218893Sdim 4898252723SdimPass *llvm::createLoopStrengthReducePass() { 4899252723Sdim return new LoopStrengthReduce(); 4900203954Srdivacky} 4901203954Srdivacky 4902252723SdimLoopStrengthReduce::LoopStrengthReduce() : LoopPass(ID) { 4903252723Sdim initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry()); 4904252723Sdim} 4905203954Srdivacky 4906203954Srdivackyvoid LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const { 4907203954Srdivacky // We split critical edges, so we change the CFG. However, we do update 4908203954Srdivacky // many analyses if they are around. 4909203954Srdivacky AU.addPreservedID(LoopSimplifyID); 4910203954Srdivacky 4911207618Srdivacky AU.addRequired<LoopInfo>(); 4912207618Srdivacky AU.addPreserved<LoopInfo>(); 4913203954Srdivacky AU.addRequiredID(LoopSimplifyID); 4914203954Srdivacky AU.addRequired<DominatorTree>(); 4915203954Srdivacky AU.addPreserved<DominatorTree>(); 4916203954Srdivacky AU.addRequired<ScalarEvolution>(); 4917203954Srdivacky AU.addPreserved<ScalarEvolution>(); 4918218893Sdim // Requiring LoopSimplify a second time here prevents IVUsers from running 4919218893Sdim // twice, since LoopSimplify was invalidated by running ScalarEvolution. 4920218893Sdim AU.addRequiredID(LoopSimplifyID); 4921203954Srdivacky AU.addRequired<IVUsers>(); 4922203954Srdivacky AU.addPreserved<IVUsers>(); 4923252723Sdim AU.addRequired<TargetTransformInfo>(); 4924203954Srdivacky} 4925203954Srdivacky 4926203954Srdivackybool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) { 4927203954Srdivacky bool Changed = false; 4928203954Srdivacky 4929203954Srdivacky // Run the main LSR transformation. 4930252723Sdim Changed |= LSRInstance(L, this).getChanged(); 4931203954Srdivacky 4932235633Sdim // Remove any extra phis created by processing inner loops. 4933202375Srdivacky Changed |= DeleteDeadPHIs(L->getHeader()); 4934252723Sdim if (EnablePhiElim && L->isLoopSimplifyForm()) { 4935235633Sdim SmallVector<WeakVH, 16> DeadInsts; 4936235633Sdim SCEVExpander Rewriter(getAnalysis<ScalarEvolution>(), "lsr"); 4937235633Sdim#ifndef NDEBUG 4938235633Sdim Rewriter.setDebugType(DEBUG_TYPE); 4939235633Sdim#endif 4940252723Sdim unsigned numFolded = 4941252723Sdim Rewriter.replaceCongruentIVs(L, &getAnalysis<DominatorTree>(), 4942252723Sdim DeadInsts, 4943252723Sdim &getAnalysis<TargetTransformInfo>()); 4944235633Sdim if (numFolded) { 4945235633Sdim Changed = true; 4946235633Sdim DeleteTriviallyDeadInstructions(DeadInsts); 4947235633Sdim DeleteDeadPHIs(L->getHeader()); 4948235633Sdim } 4949235633Sdim } 4950193323Sed return Changed; 4951193323Sed} 4952