ScalarEvolutionExpander.cpp revision 226633
1193323Sed//===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis --*- C++ -*-===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This file contains the implementation of the scalar evolution expander, 11193323Sed// which is used to generate the code corresponding to a given scalar evolution 12193323Sed// expression. 13193323Sed// 14193323Sed//===----------------------------------------------------------------------===// 15193323Sed 16193323Sed#include "llvm/Analysis/ScalarEvolutionExpander.h" 17193323Sed#include "llvm/Analysis/LoopInfo.h" 18204792Srdivacky#include "llvm/IntrinsicInst.h" 19198090Srdivacky#include "llvm/LLVMContext.h" 20226633Sdim#include "llvm/Support/Debug.h" 21193323Sed#include "llvm/Target/TargetData.h" 22194178Sed#include "llvm/ADT/STLExtras.h" 23224145Sdim 24193323Sedusing namespace llvm; 25193323Sed 26210299Sed/// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP, 27210299Sed/// reusing an existing cast if a suitable one exists, moving an existing 28210299Sed/// cast if a suitable one exists but isn't in the right place, or 29210299Sed/// creating a new one. 30226633SdimValue *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, 31210299Sed Instruction::CastOps Op, 32210299Sed BasicBlock::iterator IP) { 33210299Sed // Check to see if there is already a cast! 34210299Sed for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); 35210299Sed UI != E; ++UI) { 36210299Sed User *U = *UI; 37210299Sed if (U->getType() == Ty) 38210299Sed if (CastInst *CI = dyn_cast<CastInst>(U)) 39210299Sed if (CI->getOpcode() == Op) { 40210299Sed // If the cast isn't where we want it, fix it. 41210299Sed if (BasicBlock::iterator(CI) != IP) { 42210299Sed // Create a new cast, and leave the old cast in place in case 43210299Sed // it is being used as an insert point. Clear its operand 44210299Sed // so that it doesn't hold anything live. 45210299Sed Instruction *NewCI = CastInst::Create(Op, V, Ty, "", IP); 46210299Sed NewCI->takeName(CI); 47210299Sed CI->replaceAllUsesWith(NewCI); 48210299Sed CI->setOperand(0, UndefValue::get(V->getType())); 49210299Sed rememberInstruction(NewCI); 50210299Sed return NewCI; 51210299Sed } 52210299Sed rememberInstruction(CI); 53210299Sed return CI; 54210299Sed } 55210299Sed } 56210299Sed 57210299Sed // Create a new cast. 58210299Sed Instruction *I = CastInst::Create(Op, V, Ty, V->getName(), IP); 59210299Sed rememberInstruction(I); 60210299Sed return I; 61210299Sed} 62210299Sed 63195340Sed/// InsertNoopCastOfTo - Insert a cast of V to the specified type, 64195340Sed/// which must be possible with a noop cast, doing what we can to share 65195340Sed/// the casts. 66226633SdimValue *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) { 67195340Sed Instruction::CastOps Op = CastInst::getCastOpcode(V, false, Ty, false); 68195340Sed assert((Op == Instruction::BitCast || 69195340Sed Op == Instruction::PtrToInt || 70195340Sed Op == Instruction::IntToPtr) && 71195340Sed "InsertNoopCastOfTo cannot perform non-noop casts!"); 72195340Sed assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) && 73195340Sed "InsertNoopCastOfTo cannot change sizes!"); 74195340Sed 75193323Sed // Short-circuit unnecessary bitcasts. 76195340Sed if (Op == Instruction::BitCast && V->getType() == Ty) 77193323Sed return V; 78193323Sed 79193323Sed // Short-circuit unnecessary inttoptr<->ptrtoint casts. 80195340Sed if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) && 81193323Sed SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) { 82193323Sed if (CastInst *CI = dyn_cast<CastInst>(V)) 83193323Sed if ((CI->getOpcode() == Instruction::PtrToInt || 84193323Sed CI->getOpcode() == Instruction::IntToPtr) && 85193323Sed SE.getTypeSizeInBits(CI->getType()) == 86193323Sed SE.getTypeSizeInBits(CI->getOperand(0)->getType())) 87193323Sed return CI->getOperand(0); 88193323Sed if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) 89193323Sed if ((CE->getOpcode() == Instruction::PtrToInt || 90193323Sed CE->getOpcode() == Instruction::IntToPtr) && 91193323Sed SE.getTypeSizeInBits(CE->getType()) == 92193323Sed SE.getTypeSizeInBits(CE->getOperand(0)->getType())) 93193323Sed return CE->getOperand(0); 94193323Sed } 95193323Sed 96210299Sed // Fold a cast of a constant. 97193323Sed if (Constant *C = dyn_cast<Constant>(V)) 98195340Sed return ConstantExpr::getCast(Op, C, Ty); 99198090Srdivacky 100210299Sed // Cast the argument at the beginning of the entry block, after 101210299Sed // any bitcasts of other arguments. 102193323Sed if (Argument *A = dyn_cast<Argument>(V)) { 103210299Sed BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin(); 104210299Sed while ((isa<BitCastInst>(IP) && 105210299Sed isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) && 106210299Sed cast<BitCastInst>(IP)->getOperand(0) != A) || 107226633Sdim isa<DbgInfoIntrinsic>(IP) || 108226633Sdim isa<LandingPadInst>(IP)) 109210299Sed ++IP; 110210299Sed return ReuseOrCreateCast(A, Ty, Op, IP); 111193323Sed } 112193323Sed 113210299Sed // Cast the instruction immediately after the instruction. 114193323Sed Instruction *I = cast<Instruction>(V); 115193323Sed BasicBlock::iterator IP = I; ++IP; 116193323Sed if (InvokeInst *II = dyn_cast<InvokeInst>(I)) 117193323Sed IP = II->getNormalDest()->begin(); 118226633Sdim while (isa<PHINode>(IP) || isa<DbgInfoIntrinsic>(IP) || 119226633Sdim isa<LandingPadInst>(IP)) 120226633Sdim ++IP; 121210299Sed return ReuseOrCreateCast(I, Ty, Op, IP); 122193323Sed} 123193323Sed 124193323Sed/// InsertBinop - Insert the specified binary operator, doing a small amount 125193323Sed/// of work to avoid inserting an obviously redundant operation. 126195340SedValue *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, 127195340Sed Value *LHS, Value *RHS) { 128193323Sed // Fold a binop with constant operands. 129193323Sed if (Constant *CLHS = dyn_cast<Constant>(LHS)) 130193323Sed if (Constant *CRHS = dyn_cast<Constant>(RHS)) 131193323Sed return ConstantExpr::get(Opcode, CLHS, CRHS); 132193323Sed 133193323Sed // Do a quick scan to see if we have this binop nearby. If so, reuse it. 134193323Sed unsigned ScanLimit = 6; 135195340Sed BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin(); 136195340Sed // Scanning starts from the last instruction before the insertion point. 137195340Sed BasicBlock::iterator IP = Builder.GetInsertPoint(); 138195340Sed if (IP != BlockBegin) { 139193323Sed --IP; 140193323Sed for (; ScanLimit; --IP, --ScanLimit) { 141204792Srdivacky // Don't count dbg.value against the ScanLimit, to avoid perturbing the 142204792Srdivacky // generated code. 143204792Srdivacky if (isa<DbgInfoIntrinsic>(IP)) 144204792Srdivacky ScanLimit++; 145193323Sed if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS && 146193323Sed IP->getOperand(1) == RHS) 147193323Sed return IP; 148193323Sed if (IP == BlockBegin) break; 149193323Sed } 150193323Sed } 151195340Sed 152204642Srdivacky // Save the original insertion point so we can restore it when we're done. 153204642Srdivacky BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); 154204642Srdivacky BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); 155204642Srdivacky 156204642Srdivacky // Move the insertion point out of as many loops as we can. 157204642Srdivacky while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { 158204642Srdivacky if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break; 159204642Srdivacky BasicBlock *Preheader = L->getLoopPreheader(); 160204642Srdivacky if (!Preheader) break; 161204642Srdivacky 162204642Srdivacky // Ok, move up a level. 163204642Srdivacky Builder.SetInsertPoint(Preheader, Preheader->getTerminator()); 164204642Srdivacky } 165204642Srdivacky 166193323Sed // If we haven't found this binop, insert it. 167226633Sdim Instruction *BO = cast<Instruction>(Builder.CreateBinOp(Opcode, LHS, RHS)); 168224145Sdim BO->setDebugLoc(SaveInsertPt->getDebugLoc()); 169202878Srdivacky rememberInstruction(BO); 170204642Srdivacky 171204642Srdivacky // Restore the original insert point. 172204642Srdivacky if (SaveInsertBB) 173204642Srdivacky restoreInsertPoint(SaveInsertBB, SaveInsertPt); 174204642Srdivacky 175193323Sed return BO; 176193323Sed} 177193323Sed 178193323Sed/// FactorOutConstant - Test if S is divisible by Factor, using signed 179193323Sed/// division. If so, update S with Factor divided out and return true. 180204642Srdivacky/// S need not be evenly divisible if a reasonable remainder can be 181193323Sed/// computed. 182193323Sed/// TODO: When ScalarEvolution gets a SCEVSDivExpr, this can be made 183193323Sed/// unnecessary; in its place, just signed-divide Ops[i] by the scale and 184193323Sed/// check to see if the divide was folded. 185198090Srdivackystatic bool FactorOutConstant(const SCEV *&S, 186198090Srdivacky const SCEV *&Remainder, 187198090Srdivacky const SCEV *Factor, 188198090Srdivacky ScalarEvolution &SE, 189198090Srdivacky const TargetData *TD) { 190193323Sed // Everything is divisible by one. 191198090Srdivacky if (Factor->isOne()) 192193323Sed return true; 193193323Sed 194198090Srdivacky // x/x == 1. 195198090Srdivacky if (S == Factor) { 196207618Srdivacky S = SE.getConstant(S->getType(), 1); 197198090Srdivacky return true; 198198090Srdivacky } 199198090Srdivacky 200193323Sed // For a Constant, check for a multiple of the given factor. 201193323Sed if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) { 202198090Srdivacky // 0/x == 0. 203198090Srdivacky if (C->isZero()) 204193323Sed return true; 205198090Srdivacky // Check for divisibility. 206198090Srdivacky if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor)) { 207198090Srdivacky ConstantInt *CI = 208198090Srdivacky ConstantInt::get(SE.getContext(), 209198090Srdivacky C->getValue()->getValue().sdiv( 210198090Srdivacky FC->getValue()->getValue())); 211198090Srdivacky // If the quotient is zero and the remainder is non-zero, reject 212198090Srdivacky // the value at this scale. It will be considered for subsequent 213198090Srdivacky // smaller scales. 214198090Srdivacky if (!CI->isZero()) { 215198090Srdivacky const SCEV *Div = SE.getConstant(CI); 216198090Srdivacky S = Div; 217198090Srdivacky Remainder = 218198090Srdivacky SE.getAddExpr(Remainder, 219198090Srdivacky SE.getConstant(C->getValue()->getValue().srem( 220198090Srdivacky FC->getValue()->getValue()))); 221198090Srdivacky return true; 222198090Srdivacky } 223193323Sed } 224193323Sed } 225193323Sed 226193323Sed // In a Mul, check if there is a constant operand which is a multiple 227193323Sed // of the given factor. 228198090Srdivacky if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) { 229198090Srdivacky if (TD) { 230198090Srdivacky // With TargetData, the size is known. Check if there is a constant 231198090Srdivacky // operand which is a multiple of the given factor. If so, we can 232198090Srdivacky // factor it. 233198090Srdivacky const SCEVConstant *FC = cast<SCEVConstant>(Factor); 234198090Srdivacky if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0))) 235198090Srdivacky if (!C->getValue()->getValue().srem(FC->getValue()->getValue())) { 236205407Srdivacky SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end()); 237198090Srdivacky NewMulOps[0] = 238198090Srdivacky SE.getConstant(C->getValue()->getValue().sdiv( 239198090Srdivacky FC->getValue()->getValue())); 240198090Srdivacky S = SE.getMulExpr(NewMulOps); 241198090Srdivacky return true; 242198090Srdivacky } 243198090Srdivacky } else { 244198090Srdivacky // Without TargetData, check if Factor can be factored out of any of the 245198090Srdivacky // Mul's operands. If so, we can just remove it. 246198090Srdivacky for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { 247198090Srdivacky const SCEV *SOp = M->getOperand(i); 248207618Srdivacky const SCEV *Remainder = SE.getConstant(SOp->getType(), 0); 249198090Srdivacky if (FactorOutConstant(SOp, Remainder, Factor, SE, TD) && 250198090Srdivacky Remainder->isZero()) { 251205407Srdivacky SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end()); 252198090Srdivacky NewMulOps[i] = SOp; 253198090Srdivacky S = SE.getMulExpr(NewMulOps); 254198090Srdivacky return true; 255198090Srdivacky } 256193323Sed } 257198090Srdivacky } 258198090Srdivacky } 259193323Sed 260193323Sed // In an AddRec, check if both start and step are divisible. 261193323Sed if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) { 262198090Srdivacky const SCEV *Step = A->getStepRecurrence(SE); 263207618Srdivacky const SCEV *StepRem = SE.getConstant(Step->getType(), 0); 264198090Srdivacky if (!FactorOutConstant(Step, StepRem, Factor, SE, TD)) 265193323Sed return false; 266193323Sed if (!StepRem->isZero()) 267193323Sed return false; 268198090Srdivacky const SCEV *Start = A->getStart(); 269198090Srdivacky if (!FactorOutConstant(Start, Remainder, Factor, SE, TD)) 270193323Sed return false; 271221345Sdim // FIXME: can use A->getNoWrapFlags(FlagNW) 272221345Sdim S = SE.getAddRecExpr(Start, Step, A->getLoop(), SCEV::FlagAnyWrap); 273193323Sed return true; 274193323Sed } 275193323Sed 276193323Sed return false; 277193323Sed} 278193323Sed 279198090Srdivacky/// SimplifyAddOperands - Sort and simplify a list of add operands. NumAddRecs 280198090Srdivacky/// is the number of SCEVAddRecExprs present, which are kept at the end of 281198090Srdivacky/// the list. 282193323Sed/// 283198090Srdivackystatic void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops, 284226633Sdim Type *Ty, 285198090Srdivacky ScalarEvolution &SE) { 286198090Srdivacky unsigned NumAddRecs = 0; 287198090Srdivacky for (unsigned i = Ops.size(); i > 0 && isa<SCEVAddRecExpr>(Ops[i-1]); --i) 288198090Srdivacky ++NumAddRecs; 289198090Srdivacky // Group Ops into non-addrecs and addrecs. 290198090Srdivacky SmallVector<const SCEV *, 8> NoAddRecs(Ops.begin(), Ops.end() - NumAddRecs); 291198090Srdivacky SmallVector<const SCEV *, 8> AddRecs(Ops.end() - NumAddRecs, Ops.end()); 292198090Srdivacky // Let ScalarEvolution sort and simplify the non-addrecs list. 293198090Srdivacky const SCEV *Sum = NoAddRecs.empty() ? 294207618Srdivacky SE.getConstant(Ty, 0) : 295198090Srdivacky SE.getAddExpr(NoAddRecs); 296198090Srdivacky // If it returned an add, use the operands. Otherwise it simplified 297198090Srdivacky // the sum into a single value, so just use that. 298205407Srdivacky Ops.clear(); 299198090Srdivacky if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Sum)) 300210299Sed Ops.append(Add->op_begin(), Add->op_end()); 301205407Srdivacky else if (!Sum->isZero()) 302205407Srdivacky Ops.push_back(Sum); 303198090Srdivacky // Then append the addrecs. 304210299Sed Ops.append(AddRecs.begin(), AddRecs.end()); 305198090Srdivacky} 306198090Srdivacky 307198090Srdivacky/// SplitAddRecs - Flatten a list of add operands, moving addrec start values 308198090Srdivacky/// out to the top level. For example, convert {a + b,+,c} to a, b, {0,+,d}. 309198090Srdivacky/// This helps expose more opportunities for folding parts of the expressions 310198090Srdivacky/// into GEP indices. 311198090Srdivacky/// 312198090Srdivackystatic void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops, 313226633Sdim Type *Ty, 314198090Srdivacky ScalarEvolution &SE) { 315198090Srdivacky // Find the addrecs. 316198090Srdivacky SmallVector<const SCEV *, 8> AddRecs; 317198090Srdivacky for (unsigned i = 0, e = Ops.size(); i != e; ++i) 318198090Srdivacky while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Ops[i])) { 319198090Srdivacky const SCEV *Start = A->getStart(); 320198090Srdivacky if (Start->isZero()) break; 321207618Srdivacky const SCEV *Zero = SE.getConstant(Ty, 0); 322198090Srdivacky AddRecs.push_back(SE.getAddRecExpr(Zero, 323198090Srdivacky A->getStepRecurrence(SE), 324221345Sdim A->getLoop(), 325221345Sdim // FIXME: A->getNoWrapFlags(FlagNW) 326221345Sdim SCEV::FlagAnyWrap)); 327198090Srdivacky if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Start)) { 328198090Srdivacky Ops[i] = Zero; 329210299Sed Ops.append(Add->op_begin(), Add->op_end()); 330198090Srdivacky e += Add->getNumOperands(); 331198090Srdivacky } else { 332198090Srdivacky Ops[i] = Start; 333198090Srdivacky } 334198090Srdivacky } 335198090Srdivacky if (!AddRecs.empty()) { 336198090Srdivacky // Add the addrecs onto the end of the list. 337210299Sed Ops.append(AddRecs.begin(), AddRecs.end()); 338198090Srdivacky // Resort the operand list, moving any constants to the front. 339198090Srdivacky SimplifyAddOperands(Ops, Ty, SE); 340198090Srdivacky } 341198090Srdivacky} 342198090Srdivacky 343198090Srdivacky/// expandAddToGEP - Expand an addition expression with a pointer type into 344198090Srdivacky/// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps 345198090Srdivacky/// BasicAliasAnalysis and other passes analyze the result. See the rules 346198090Srdivacky/// for getelementptr vs. inttoptr in 347198090Srdivacky/// http://llvm.org/docs/LangRef.html#pointeraliasing 348198090Srdivacky/// for details. 349198090Srdivacky/// 350202878Srdivacky/// Design note: The correctness of using getelementptr here depends on 351198090Srdivacky/// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as 352198090Srdivacky/// they may introduce pointer arithmetic which may not be safely converted 353198090Srdivacky/// into getelementptr. 354198090Srdivacky/// 355193323Sed/// Design note: It might seem desirable for this function to be more 356193323Sed/// loop-aware. If some of the indices are loop-invariant while others 357193323Sed/// aren't, it might seem desirable to emit multiple GEPs, keeping the 358193323Sed/// loop-invariant portions of the overall computation outside the loop. 359193323Sed/// However, there are a few reasons this is not done here. Hoisting simple 360193323Sed/// arithmetic is a low-level optimization that often isn't very 361193323Sed/// important until late in the optimization process. In fact, passes 362193323Sed/// like InstructionCombining will combine GEPs, even if it means 363193323Sed/// pushing loop-invariant computation down into loops, so even if the 364193323Sed/// GEPs were split here, the work would quickly be undone. The 365193323Sed/// LoopStrengthReduction pass, which is usually run quite late (and 366193323Sed/// after the last InstructionCombining pass), takes care of hoisting 367193323Sed/// loop-invariant portions of expressions, after considering what 368193323Sed/// can be folded using target addressing modes. 369193323Sed/// 370198090SrdivackyValue *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, 371198090Srdivacky const SCEV *const *op_end, 372226633Sdim PointerType *PTy, 373226633Sdim Type *Ty, 374193323Sed Value *V) { 375226633Sdim Type *ElTy = PTy->getElementType(); 376193323Sed SmallVector<Value *, 4> GepIndices; 377198090Srdivacky SmallVector<const SCEV *, 8> Ops(op_begin, op_end); 378193323Sed bool AnyNonZeroIndices = false; 379193323Sed 380198090Srdivacky // Split AddRecs up into parts as either of the parts may be usable 381198090Srdivacky // without the other. 382198090Srdivacky SplitAddRecs(Ops, Ty, SE); 383198090Srdivacky 384200581Srdivacky // Descend down the pointer's type and attempt to convert the other 385193323Sed // operands into GEP indices, at each level. The first index in a GEP 386193323Sed // indexes into the array implied by the pointer operand; the rest of 387193323Sed // the indices index into the element or field type selected by the 388193323Sed // preceding index. 389193323Sed for (;;) { 390198090Srdivacky // If the scale size is not 0, attempt to factor out a scale for 391198090Srdivacky // array indexing. 392198090Srdivacky SmallVector<const SCEV *, 8> ScaledOps; 393203954Srdivacky if (ElTy->isSized()) { 394203954Srdivacky const SCEV *ElSize = SE.getSizeOfExpr(ElTy); 395203954Srdivacky if (!ElSize->isZero()) { 396203954Srdivacky SmallVector<const SCEV *, 8> NewOps; 397203954Srdivacky for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 398203954Srdivacky const SCEV *Op = Ops[i]; 399207618Srdivacky const SCEV *Remainder = SE.getConstant(Ty, 0); 400203954Srdivacky if (FactorOutConstant(Op, Remainder, ElSize, SE, SE.TD)) { 401203954Srdivacky // Op now has ElSize factored out. 402203954Srdivacky ScaledOps.push_back(Op); 403203954Srdivacky if (!Remainder->isZero()) 404203954Srdivacky NewOps.push_back(Remainder); 405203954Srdivacky AnyNonZeroIndices = true; 406203954Srdivacky } else { 407203954Srdivacky // The operand was not divisible, so add it to the list of operands 408203954Srdivacky // we'll scan next iteration. 409203954Srdivacky NewOps.push_back(Ops[i]); 410203954Srdivacky } 411193323Sed } 412203954Srdivacky // If we made any changes, update Ops. 413203954Srdivacky if (!ScaledOps.empty()) { 414203954Srdivacky Ops = NewOps; 415203954Srdivacky SimplifyAddOperands(Ops, Ty, SE); 416203954Srdivacky } 417193323Sed } 418193323Sed } 419198090Srdivacky 420198090Srdivacky // Record the scaled array index for this level of the type. If 421198090Srdivacky // we didn't find any operands that could be factored, tentatively 422198090Srdivacky // assume that element zero was selected (since the zero offset 423198090Srdivacky // would obviously be folded away). 424193323Sed Value *Scaled = ScaledOps.empty() ? 425193323Sed Constant::getNullValue(Ty) : 426193323Sed expandCodeFor(SE.getAddExpr(ScaledOps), Ty); 427193323Sed GepIndices.push_back(Scaled); 428193323Sed 429193323Sed // Collect struct field index operands. 430226633Sdim while (StructType *STy = dyn_cast<StructType>(ElTy)) { 431198090Srdivacky bool FoundFieldNo = false; 432198090Srdivacky // An empty struct has no fields. 433198090Srdivacky if (STy->getNumElements() == 0) break; 434198090Srdivacky if (SE.TD) { 435198090Srdivacky // With TargetData, field offsets are known. See if a constant offset 436198090Srdivacky // falls within any of the struct fields. 437198090Srdivacky if (Ops.empty()) break; 438193323Sed if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0])) 439193323Sed if (SE.getTypeSizeInBits(C->getType()) <= 64) { 440193323Sed const StructLayout &SL = *SE.TD->getStructLayout(STy); 441193323Sed uint64_t FullOffset = C->getValue()->getZExtValue(); 442193323Sed if (FullOffset < SL.getSizeInBytes()) { 443193323Sed unsigned ElIdx = SL.getElementContainingOffset(FullOffset); 444198090Srdivacky GepIndices.push_back( 445198090Srdivacky ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx)); 446193323Sed ElTy = STy->getTypeAtIndex(ElIdx); 447193323Sed Ops[0] = 448194612Sed SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx)); 449193323Sed AnyNonZeroIndices = true; 450198090Srdivacky FoundFieldNo = true; 451193323Sed } 452193323Sed } 453198090Srdivacky } else { 454203954Srdivacky // Without TargetData, just check for an offsetof expression of the 455198090Srdivacky // appropriate struct type. 456198090Srdivacky for (unsigned i = 0, e = Ops.size(); i != e; ++i) 457203954Srdivacky if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Ops[i])) { 458226633Sdim Type *CTy; 459203954Srdivacky Constant *FieldNo; 460203954Srdivacky if (U->isOffsetOf(CTy, FieldNo) && CTy == STy) { 461203954Srdivacky GepIndices.push_back(FieldNo); 462203954Srdivacky ElTy = 463203954Srdivacky STy->getTypeAtIndex(cast<ConstantInt>(FieldNo)->getZExtValue()); 464198090Srdivacky Ops[i] = SE.getConstant(Ty, 0); 465198090Srdivacky AnyNonZeroIndices = true; 466198090Srdivacky FoundFieldNo = true; 467198090Srdivacky break; 468198090Srdivacky } 469203954Srdivacky } 470193323Sed } 471198090Srdivacky // If no struct field offsets were found, tentatively assume that 472198090Srdivacky // field zero was selected (since the zero offset would obviously 473198090Srdivacky // be folded away). 474198090Srdivacky if (!FoundFieldNo) { 475198090Srdivacky ElTy = STy->getTypeAtIndex(0u); 476198090Srdivacky GepIndices.push_back( 477198090Srdivacky Constant::getNullValue(Type::getInt32Ty(Ty->getContext()))); 478198090Srdivacky } 479198090Srdivacky } 480193323Sed 481226633Sdim if (ArrayType *ATy = dyn_cast<ArrayType>(ElTy)) 482193323Sed ElTy = ATy->getElementType(); 483198090Srdivacky else 484198090Srdivacky break; 485193323Sed } 486193323Sed 487204642Srdivacky // If none of the operands were convertible to proper GEP indices, cast 488193323Sed // the base to i8* and do an ugly getelementptr with that. It's still 489193323Sed // better than ptrtoint+arithmetic+inttoptr at least. 490193323Sed if (!AnyNonZeroIndices) { 491198090Srdivacky // Cast the base to i8*. 492193323Sed V = InsertNoopCastOfTo(V, 493198090Srdivacky Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace())); 494198090Srdivacky 495198090Srdivacky // Expand the operands for a plain byte offset. 496194178Sed Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty); 497193323Sed 498193323Sed // Fold a GEP with constant operands. 499193323Sed if (Constant *CLHS = dyn_cast<Constant>(V)) 500193323Sed if (Constant *CRHS = dyn_cast<Constant>(Idx)) 501226633Sdim return ConstantExpr::getGetElementPtr(CLHS, CRHS); 502193323Sed 503193323Sed // Do a quick scan to see if we have this GEP nearby. If so, reuse it. 504193323Sed unsigned ScanLimit = 6; 505195340Sed BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin(); 506195340Sed // Scanning starts from the last instruction before the insertion point. 507195340Sed BasicBlock::iterator IP = Builder.GetInsertPoint(); 508195340Sed if (IP != BlockBegin) { 509193323Sed --IP; 510193323Sed for (; ScanLimit; --IP, --ScanLimit) { 511204792Srdivacky // Don't count dbg.value against the ScanLimit, to avoid perturbing the 512204792Srdivacky // generated code. 513204792Srdivacky if (isa<DbgInfoIntrinsic>(IP)) 514204792Srdivacky ScanLimit++; 515193323Sed if (IP->getOpcode() == Instruction::GetElementPtr && 516193323Sed IP->getOperand(0) == V && IP->getOperand(1) == Idx) 517193323Sed return IP; 518193323Sed if (IP == BlockBegin) break; 519193323Sed } 520193323Sed } 521193323Sed 522204642Srdivacky // Save the original insertion point so we can restore it when we're done. 523204642Srdivacky BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); 524204642Srdivacky BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); 525204642Srdivacky 526204642Srdivacky // Move the insertion point out of as many loops as we can. 527204642Srdivacky while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { 528204642Srdivacky if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break; 529204642Srdivacky BasicBlock *Preheader = L->getLoopPreheader(); 530204642Srdivacky if (!Preheader) break; 531204642Srdivacky 532204642Srdivacky // Ok, move up a level. 533204642Srdivacky Builder.SetInsertPoint(Preheader, Preheader->getTerminator()); 534204642Srdivacky } 535204642Srdivacky 536198090Srdivacky // Emit a GEP. 537198090Srdivacky Value *GEP = Builder.CreateGEP(V, Idx, "uglygep"); 538202878Srdivacky rememberInstruction(GEP); 539204642Srdivacky 540204642Srdivacky // Restore the original insert point. 541204642Srdivacky if (SaveInsertBB) 542204642Srdivacky restoreInsertPoint(SaveInsertBB, SaveInsertPt); 543204642Srdivacky 544193323Sed return GEP; 545193323Sed } 546193323Sed 547204642Srdivacky // Save the original insertion point so we can restore it when we're done. 548204642Srdivacky BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); 549204642Srdivacky BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); 550204642Srdivacky 551204642Srdivacky // Move the insertion point out of as many loops as we can. 552204642Srdivacky while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { 553204642Srdivacky if (!L->isLoopInvariant(V)) break; 554204642Srdivacky 555204642Srdivacky bool AnyIndexNotLoopInvariant = false; 556204642Srdivacky for (SmallVectorImpl<Value *>::const_iterator I = GepIndices.begin(), 557204642Srdivacky E = GepIndices.end(); I != E; ++I) 558204642Srdivacky if (!L->isLoopInvariant(*I)) { 559204642Srdivacky AnyIndexNotLoopInvariant = true; 560204642Srdivacky break; 561204642Srdivacky } 562204642Srdivacky if (AnyIndexNotLoopInvariant) 563204642Srdivacky break; 564204642Srdivacky 565204642Srdivacky BasicBlock *Preheader = L->getLoopPreheader(); 566204642Srdivacky if (!Preheader) break; 567204642Srdivacky 568204642Srdivacky // Ok, move up a level. 569204642Srdivacky Builder.SetInsertPoint(Preheader, Preheader->getTerminator()); 570204642Srdivacky } 571204642Srdivacky 572198090Srdivacky // Insert a pretty getelementptr. Note that this GEP is not marked inbounds, 573198090Srdivacky // because ScalarEvolution may have changed the address arithmetic to 574198090Srdivacky // compute a value which is beyond the end of the allocated object. 575202878Srdivacky Value *Casted = V; 576202878Srdivacky if (V->getType() != PTy) 577202878Srdivacky Casted = InsertNoopCastOfTo(Casted, PTy); 578202878Srdivacky Value *GEP = Builder.CreateGEP(Casted, 579226633Sdim GepIndices, 580195340Sed "scevgep"); 581193323Sed Ops.push_back(SE.getUnknown(GEP)); 582202878Srdivacky rememberInstruction(GEP); 583204642Srdivacky 584204642Srdivacky // Restore the original insert point. 585204642Srdivacky if (SaveInsertBB) 586204642Srdivacky restoreInsertPoint(SaveInsertBB, SaveInsertPt); 587204642Srdivacky 588193323Sed return expand(SE.getAddExpr(Ops)); 589193323Sed} 590193323Sed 591202878Srdivacky/// isNonConstantNegative - Return true if the specified scev is negated, but 592202878Srdivacky/// not a constant. 593202878Srdivackystatic bool isNonConstantNegative(const SCEV *F) { 594202878Srdivacky const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(F); 595202878Srdivacky if (!Mul) return false; 596202878Srdivacky 597202878Srdivacky // If there is a constant factor, it will be first. 598202878Srdivacky const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0)); 599202878Srdivacky if (!SC) return false; 600202878Srdivacky 601202878Srdivacky // Return true if the value is negative, this matches things like (-42 * V). 602202878Srdivacky return SC->getValue()->getValue().isNegative(); 603202878Srdivacky} 604202878Srdivacky 605204642Srdivacky/// PickMostRelevantLoop - Given two loops pick the one that's most relevant for 606204642Srdivacky/// SCEV expansion. If they are nested, this is the most nested. If they are 607204642Srdivacky/// neighboring, pick the later. 608204642Srdivackystatic const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B, 609204642Srdivacky DominatorTree &DT) { 610204642Srdivacky if (!A) return B; 611204642Srdivacky if (!B) return A; 612204642Srdivacky if (A->contains(B)) return B; 613204642Srdivacky if (B->contains(A)) return A; 614204642Srdivacky if (DT.dominates(A->getHeader(), B->getHeader())) return B; 615204642Srdivacky if (DT.dominates(B->getHeader(), A->getHeader())) return A; 616204642Srdivacky return A; // Arbitrarily break the tie. 617204642Srdivacky} 618193323Sed 619218893Sdim/// getRelevantLoop - Get the most relevant loop associated with the given 620204642Srdivacky/// expression, according to PickMostRelevantLoop. 621218893Sdimconst Loop *SCEVExpander::getRelevantLoop(const SCEV *S) { 622218893Sdim // Test whether we've already computed the most relevant loop for this SCEV. 623218893Sdim std::pair<DenseMap<const SCEV *, const Loop *>::iterator, bool> Pair = 624218893Sdim RelevantLoops.insert(std::make_pair(S, static_cast<const Loop *>(0))); 625218893Sdim if (!Pair.second) 626218893Sdim return Pair.first->second; 627218893Sdim 628204642Srdivacky if (isa<SCEVConstant>(S)) 629218893Sdim // A constant has no relevant loops. 630204642Srdivacky return 0; 631204642Srdivacky if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { 632204642Srdivacky if (const Instruction *I = dyn_cast<Instruction>(U->getValue())) 633218893Sdim return Pair.first->second = SE.LI->getLoopFor(I->getParent()); 634218893Sdim // A non-instruction has no relevant loops. 635204642Srdivacky return 0; 636204642Srdivacky } 637204642Srdivacky if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) { 638204642Srdivacky const Loop *L = 0; 639204642Srdivacky if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) 640204642Srdivacky L = AR->getLoop(); 641204642Srdivacky for (SCEVNAryExpr::op_iterator I = N->op_begin(), E = N->op_end(); 642204642Srdivacky I != E; ++I) 643218893Sdim L = PickMostRelevantLoop(L, getRelevantLoop(*I), *SE.DT); 644218893Sdim return RelevantLoops[N] = L; 645204642Srdivacky } 646218893Sdim if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) { 647218893Sdim const Loop *Result = getRelevantLoop(C->getOperand()); 648218893Sdim return RelevantLoops[C] = Result; 649218893Sdim } 650218893Sdim if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) { 651218893Sdim const Loop *Result = 652218893Sdim PickMostRelevantLoop(getRelevantLoop(D->getLHS()), 653218893Sdim getRelevantLoop(D->getRHS()), 654218893Sdim *SE.DT); 655218893Sdim return RelevantLoops[D] = Result; 656218893Sdim } 657204642Srdivacky llvm_unreachable("Unexpected SCEV type!"); 658218893Sdim return 0; 659204642Srdivacky} 660198090Srdivacky 661207618Srdivackynamespace { 662207618Srdivacky 663204642Srdivacky/// LoopCompare - Compare loops by PickMostRelevantLoop. 664204642Srdivackyclass LoopCompare { 665204642Srdivacky DominatorTree &DT; 666204642Srdivackypublic: 667204642Srdivacky explicit LoopCompare(DominatorTree &dt) : DT(dt) {} 668198090Srdivacky 669204642Srdivacky bool operator()(std::pair<const Loop *, const SCEV *> LHS, 670204642Srdivacky std::pair<const Loop *, const SCEV *> RHS) const { 671212904Sdim // Keep pointer operands sorted at the end. 672212904Sdim if (LHS.second->getType()->isPointerTy() != 673212904Sdim RHS.second->getType()->isPointerTy()) 674212904Sdim return LHS.second->getType()->isPointerTy(); 675212904Sdim 676204642Srdivacky // Compare loops with PickMostRelevantLoop. 677204642Srdivacky if (LHS.first != RHS.first) 678204642Srdivacky return PickMostRelevantLoop(LHS.first, RHS.first, DT) != LHS.first; 679204642Srdivacky 680204642Srdivacky // If one operand is a non-constant negative and the other is not, 681204642Srdivacky // put the non-constant negative on the right so that a sub can 682204642Srdivacky // be used instead of a negate and add. 683204642Srdivacky if (isNonConstantNegative(LHS.second)) { 684204642Srdivacky if (!isNonConstantNegative(RHS.second)) 685204642Srdivacky return false; 686204642Srdivacky } else if (isNonConstantNegative(RHS.second)) 687204642Srdivacky return true; 688204642Srdivacky 689204642Srdivacky // Otherwise they are equivalent according to this comparison. 690204642Srdivacky return false; 691198090Srdivacky } 692204642Srdivacky}; 693193323Sed 694207618Srdivacky} 695207618Srdivacky 696204642SrdivackyValue *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { 697226633Sdim Type *Ty = SE.getEffectiveSCEVType(S->getType()); 698193323Sed 699204642Srdivacky // Collect all the add operands in a loop, along with their associated loops. 700204642Srdivacky // Iterate in reverse so that constants are emitted last, all else equal, and 701204642Srdivacky // so that pointer operands are inserted first, which the code below relies on 702204642Srdivacky // to form more involved GEPs. 703204642Srdivacky SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops; 704204642Srdivacky for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(S->op_end()), 705204642Srdivacky E(S->op_begin()); I != E; ++I) 706218893Sdim OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I)); 707204642Srdivacky 708204642Srdivacky // Sort by loop. Use a stable sort so that constants follow non-constants and 709204642Srdivacky // pointer operands precede non-pointer operands. 710204642Srdivacky std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT)); 711204642Srdivacky 712204642Srdivacky // Emit instructions to add all the operands. Hoist as much as possible 713204642Srdivacky // out of loops, and form meaningful getelementptrs where possible. 714204642Srdivacky Value *Sum = 0; 715204642Srdivacky for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator 716204642Srdivacky I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) { 717204642Srdivacky const Loop *CurLoop = I->first; 718204642Srdivacky const SCEV *Op = I->second; 719204642Srdivacky if (!Sum) { 720204642Srdivacky // This is the first operand. Just expand it. 721204642Srdivacky Sum = expand(Op); 722204642Srdivacky ++I; 723226633Sdim } else if (PointerType *PTy = dyn_cast<PointerType>(Sum->getType())) { 724204642Srdivacky // The running sum expression is a pointer. Try to form a getelementptr 725204642Srdivacky // at this level with that as the base. 726204642Srdivacky SmallVector<const SCEV *, 4> NewOps; 727212904Sdim for (; I != E && I->first == CurLoop; ++I) { 728212904Sdim // If the operand is SCEVUnknown and not instructions, peek through 729212904Sdim // it, to enable more of it to be folded into the GEP. 730212904Sdim const SCEV *X = I->second; 731212904Sdim if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(X)) 732212904Sdim if (!isa<Instruction>(U->getValue())) 733212904Sdim X = SE.getSCEV(U->getValue()); 734212904Sdim NewOps.push_back(X); 735212904Sdim } 736204642Srdivacky Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum); 737226633Sdim } else if (PointerType *PTy = dyn_cast<PointerType>(Op->getType())) { 738204642Srdivacky // The running sum is an integer, and there's a pointer at this level. 739207618Srdivacky // Try to form a getelementptr. If the running sum is instructions, 740207618Srdivacky // use a SCEVUnknown to avoid re-analyzing them. 741204642Srdivacky SmallVector<const SCEV *, 4> NewOps; 742207618Srdivacky NewOps.push_back(isa<Instruction>(Sum) ? SE.getUnknown(Sum) : 743207618Srdivacky SE.getSCEV(Sum)); 744204642Srdivacky for (++I; I != E && I->first == CurLoop; ++I) 745204642Srdivacky NewOps.push_back(I->second); 746204642Srdivacky Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, expand(Op)); 747204642Srdivacky } else if (isNonConstantNegative(Op)) { 748204642Srdivacky // Instead of doing a negate and add, just do a subtract. 749202878Srdivacky Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty); 750204642Srdivacky Sum = InsertNoopCastOfTo(Sum, Ty); 751204642Srdivacky Sum = InsertBinop(Instruction::Sub, Sum, W); 752204642Srdivacky ++I; 753202878Srdivacky } else { 754204642Srdivacky // A simple add. 755202878Srdivacky Value *W = expandCodeFor(Op, Ty); 756204642Srdivacky Sum = InsertNoopCastOfTo(Sum, Ty); 757204642Srdivacky // Canonicalize a constant to the RHS. 758204642Srdivacky if (isa<Constant>(Sum)) std::swap(Sum, W); 759204642Srdivacky Sum = InsertBinop(Instruction::Add, Sum, W); 760204642Srdivacky ++I; 761202878Srdivacky } 762193323Sed } 763204642Srdivacky 764204642Srdivacky return Sum; 765193323Sed} 766193323Sed 767193323SedValue *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { 768226633Sdim Type *Ty = SE.getEffectiveSCEVType(S->getType()); 769193323Sed 770204642Srdivacky // Collect all the mul operands in a loop, along with their associated loops. 771204642Srdivacky // Iterate in reverse so that constants are emitted last, all else equal. 772204642Srdivacky SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops; 773204642Srdivacky for (std::reverse_iterator<SCEVMulExpr::op_iterator> I(S->op_end()), 774204642Srdivacky E(S->op_begin()); I != E; ++I) 775218893Sdim OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I)); 776193323Sed 777204642Srdivacky // Sort by loop. Use a stable sort so that constants follow non-constants. 778204642Srdivacky std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT)); 779204642Srdivacky 780204642Srdivacky // Emit instructions to mul all the operands. Hoist as much as possible 781204642Srdivacky // out of loops. 782204642Srdivacky Value *Prod = 0; 783204642Srdivacky for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator 784204642Srdivacky I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) { 785204642Srdivacky const SCEV *Op = I->second; 786204642Srdivacky if (!Prod) { 787204642Srdivacky // This is the first operand. Just expand it. 788204642Srdivacky Prod = expand(Op); 789204642Srdivacky ++I; 790204642Srdivacky } else if (Op->isAllOnesValue()) { 791204642Srdivacky // Instead of doing a multiply by negative one, just do a negate. 792204642Srdivacky Prod = InsertNoopCastOfTo(Prod, Ty); 793204642Srdivacky Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod); 794204642Srdivacky ++I; 795204642Srdivacky } else { 796204642Srdivacky // A simple mul. 797204642Srdivacky Value *W = expandCodeFor(Op, Ty); 798204642Srdivacky Prod = InsertNoopCastOfTo(Prod, Ty); 799204642Srdivacky // Canonicalize a constant to the RHS. 800204642Srdivacky if (isa<Constant>(Prod)) std::swap(Prod, W); 801204642Srdivacky Prod = InsertBinop(Instruction::Mul, Prod, W); 802204642Srdivacky ++I; 803204642Srdivacky } 804193323Sed } 805193323Sed 806204642Srdivacky return Prod; 807193323Sed} 808193323Sed 809193323SedValue *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) { 810226633Sdim Type *Ty = SE.getEffectiveSCEVType(S->getType()); 811193323Sed 812194178Sed Value *LHS = expandCodeFor(S->getLHS(), Ty); 813193323Sed if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) { 814193323Sed const APInt &RHS = SC->getValue()->getValue(); 815193323Sed if (RHS.isPowerOf2()) 816193323Sed return InsertBinop(Instruction::LShr, LHS, 817195340Sed ConstantInt::get(Ty, RHS.logBase2())); 818193323Sed } 819193323Sed 820194178Sed Value *RHS = expandCodeFor(S->getRHS(), Ty); 821195340Sed return InsertBinop(Instruction::UDiv, LHS, RHS); 822193323Sed} 823193323Sed 824193323Sed/// Move parts of Base into Rest to leave Base with the minimal 825193323Sed/// expression that provides a pointer operand suitable for a 826193323Sed/// GEP expansion. 827198090Srdivackystatic void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest, 828193323Sed ScalarEvolution &SE) { 829193323Sed while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Base)) { 830193323Sed Base = A->getStart(); 831193323Sed Rest = SE.getAddExpr(Rest, 832207618Srdivacky SE.getAddRecExpr(SE.getConstant(A->getType(), 0), 833193323Sed A->getStepRecurrence(SE), 834221345Sdim A->getLoop(), 835221345Sdim // FIXME: A->getNoWrapFlags(FlagNW) 836221345Sdim SCEV::FlagAnyWrap)); 837193323Sed } 838193323Sed if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(Base)) { 839193323Sed Base = A->getOperand(A->getNumOperands()-1); 840198090Srdivacky SmallVector<const SCEV *, 8> NewAddOps(A->op_begin(), A->op_end()); 841193323Sed NewAddOps.back() = Rest; 842193323Sed Rest = SE.getAddExpr(NewAddOps); 843193323Sed ExposePointerBase(Base, Rest, SE); 844193323Sed } 845193323Sed} 846193323Sed 847226633Sdim/// Determine if this is a well-behaved chain of instructions leading back to 848226633Sdim/// the PHI. If so, it may be reused by expanded expressions. 849226633Sdimbool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV, 850226633Sdim const Loop *L) { 851226633Sdim if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV) || 852226633Sdim (isa<CastInst>(IncV) && !isa<BitCastInst>(IncV))) 853226633Sdim return false; 854226633Sdim // If any of the operands don't dominate the insert position, bail. 855226633Sdim // Addrec operands are always loop-invariant, so this can only happen 856226633Sdim // if there are instructions which haven't been hoisted. 857226633Sdim if (L == IVIncInsertLoop) { 858226633Sdim for (User::op_iterator OI = IncV->op_begin()+1, 859226633Sdim OE = IncV->op_end(); OI != OE; ++OI) 860226633Sdim if (Instruction *OInst = dyn_cast<Instruction>(OI)) 861226633Sdim if (!SE.DT->dominates(OInst, IVIncInsertPos)) 862226633Sdim return false; 863226633Sdim } 864226633Sdim // Advance to the next instruction. 865226633Sdim IncV = dyn_cast<Instruction>(IncV->getOperand(0)); 866226633Sdim if (!IncV) 867226633Sdim return false; 868226633Sdim 869226633Sdim if (IncV->mayHaveSideEffects()) 870226633Sdim return false; 871226633Sdim 872226633Sdim if (IncV != PN) 873226633Sdim return true; 874226633Sdim 875226633Sdim return isNormalAddRecExprPHI(PN, IncV, L); 876226633Sdim} 877226633Sdim 878226633Sdim/// Determine if this cyclic phi is in a form that would have been generated by 879226633Sdim/// LSR. We don't care if the phi was actually expanded in this pass, as long 880226633Sdim/// as it is in a low-cost form, for example, no implied multiplication. This 881226633Sdim/// should match any patterns generated by getAddRecExprPHILiterally and 882226633Sdim/// expandAddtoGEP. 883226633Sdimbool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV, 884226633Sdim const Loop *L) { 885226633Sdim switch (IncV->getOpcode()) { 886226633Sdim // Check for a simple Add/Sub or GEP of a loop invariant step. 887226633Sdim case Instruction::Add: 888226633Sdim case Instruction::Sub: 889226633Sdim return IncV->getOperand(0) == PN 890226633Sdim && L->isLoopInvariant(IncV->getOperand(1)); 891226633Sdim case Instruction::BitCast: 892226633Sdim IncV = dyn_cast<GetElementPtrInst>(IncV->getOperand(0)); 893226633Sdim if (!IncV) 894226633Sdim return false; 895226633Sdim // fall-thru to GEP handling 896226633Sdim case Instruction::GetElementPtr: { 897226633Sdim // This must be a pointer addition of constants (pretty) or some number of 898226633Sdim // address-size elements (ugly). 899226633Sdim for (Instruction::op_iterator I = IncV->op_begin()+1, E = IncV->op_end(); 900226633Sdim I != E; ++I) { 901226633Sdim if (isa<Constant>(*I)) 902226633Sdim continue; 903226633Sdim // ugly geps have 2 operands. 904226633Sdim // i1* is used by the expander to represent an address-size element. 905226633Sdim if (IncV->getNumOperands() != 2) 906226633Sdim return false; 907226633Sdim unsigned AS = cast<PointerType>(IncV->getType())->getAddressSpace(); 908226633Sdim if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS) 909226633Sdim && IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS)) 910226633Sdim return false; 911226633Sdim // Ensure the operands dominate the insertion point. I don't know of a 912226633Sdim // case when this would not be true, so this is somewhat untested. 913226633Sdim if (L == IVIncInsertLoop) { 914226633Sdim for (User::op_iterator OI = IncV->op_begin()+1, 915226633Sdim OE = IncV->op_end(); OI != OE; ++OI) 916226633Sdim if (Instruction *OInst = dyn_cast<Instruction>(OI)) 917226633Sdim if (!SE.DT->dominates(OInst, IVIncInsertPos)) 918226633Sdim return false; 919226633Sdim } 920226633Sdim break; 921226633Sdim } 922226633Sdim IncV = dyn_cast<Instruction>(IncV->getOperand(0)); 923226633Sdim if (IncV && IncV->getOpcode() == Instruction::BitCast) 924226633Sdim IncV = dyn_cast<Instruction>(IncV->getOperand(0)); 925226633Sdim return IncV == PN; 926226633Sdim } 927226633Sdim default: 928226633Sdim return false; 929226633Sdim } 930226633Sdim} 931226633Sdim 932202878Srdivacky/// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand 933202878Srdivacky/// the base addrec, which is the addrec without any non-loop-dominating 934202878Srdivacky/// values, and return the PHI. 935202878SrdivackyPHINode * 936202878SrdivackySCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, 937202878Srdivacky const Loop *L, 938226633Sdim Type *ExpandTy, 939226633Sdim Type *IntTy) { 940224145Sdim assert((!IVIncInsertLoop||IVIncInsertPos) && "Uninitialized insert position"); 941224145Sdim 942202878Srdivacky // Reuse a previously-inserted PHI, if present. 943226633Sdim BasicBlock *LatchBlock = L->getLoopLatch(); 944226633Sdim if (LatchBlock) { 945226633Sdim for (BasicBlock::iterator I = L->getHeader()->begin(); 946226633Sdim PHINode *PN = dyn_cast<PHINode>(I); ++I) { 947226633Sdim if (!SE.isSCEVable(PN->getType()) || 948226633Sdim (SE.getEffectiveSCEVType(PN->getType()) != 949226633Sdim SE.getEffectiveSCEVType(Normalized->getType())) || 950226633Sdim SE.getSCEV(PN) != Normalized) 951226633Sdim continue; 952202878Srdivacky 953226633Sdim Instruction *IncV = 954226633Sdim cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock)); 955226633Sdim 956226633Sdim if (LSRMode) { 957226633Sdim if (!isExpandedAddRecExprPHI(PN, IncV, L)) 958226633Sdim continue; 959226633Sdim } 960226633Sdim else { 961226633Sdim if (!isNormalAddRecExprPHI(PN, IncV, L)) 962226633Sdim continue; 963226633Sdim } 964226633Sdim // Ok, the add recurrence looks usable. 965226633Sdim // Remember this PHI, even in post-inc mode. 966226633Sdim InsertedValues.insert(PN); 967226633Sdim // Remember the increment. 968226633Sdim rememberInstruction(IncV); 969226633Sdim if (L == IVIncInsertLoop) 970203954Srdivacky do { 971226633Sdim if (SE.DT->dominates(IncV, IVIncInsertPos)) 972203954Srdivacky break; 973226633Sdim // Make sure the increment is where we want it. But don't move it 974226633Sdim // down past a potential existing post-inc user. 975226633Sdim IncV->moveBefore(IVIncInsertPos); 976226633Sdim IVIncInsertPos = IncV; 977226633Sdim IncV = cast<Instruction>(IncV->getOperand(0)); 978203954Srdivacky } while (IncV != PN); 979226633Sdim return PN; 980226633Sdim } 981226633Sdim } 982203954Srdivacky 983202878Srdivacky // Save the original insertion point so we can restore it when we're done. 984202878Srdivacky BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); 985202878Srdivacky BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); 986202878Srdivacky 987202878Srdivacky // Expand code for the start value. 988202878Srdivacky Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy, 989202878Srdivacky L->getHeader()->begin()); 990202878Srdivacky 991224145Sdim // StartV must be hoisted into L's preheader to dominate the new phi. 992224145Sdim assert(!isa<Instruction>(StartV) || 993224145Sdim SE.DT->properlyDominates(cast<Instruction>(StartV)->getParent(), 994224145Sdim L->getHeader())); 995224145Sdim 996202878Srdivacky // Expand code for the step value. Insert instructions right before the 997202878Srdivacky // terminator corresponding to the back-edge. Do this before creating the PHI 998202878Srdivacky // so that PHI reuse code doesn't see an incomplete PHI. If the stride is 999202878Srdivacky // negative, insert a sub instead of an add for the increment (unless it's a 1000202878Srdivacky // constant, because subtracts of constants are canonicalized to adds). 1001202878Srdivacky const SCEV *Step = Normalized->getStepRecurrence(SE); 1002204642Srdivacky bool isPointer = ExpandTy->isPointerTy(); 1003202878Srdivacky bool isNegative = !isPointer && isNonConstantNegative(Step); 1004202878Srdivacky if (isNegative) 1005202878Srdivacky Step = SE.getNegativeSCEV(Step); 1006202878Srdivacky Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin()); 1007202878Srdivacky 1008202878Srdivacky // Create the PHI. 1009221345Sdim BasicBlock *Header = L->getHeader(); 1010221345Sdim Builder.SetInsertPoint(Header, Header->begin()); 1011221345Sdim pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header); 1012224145Sdim PHINode *PN = Builder.CreatePHI(ExpandTy, std::distance(HPB, HPE), 1013224145Sdim Twine(IVName) + ".iv"); 1014202878Srdivacky rememberInstruction(PN); 1015202878Srdivacky 1016202878Srdivacky // Create the step instructions and populate the PHI. 1017221345Sdim for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) { 1018202878Srdivacky BasicBlock *Pred = *HPI; 1019202878Srdivacky 1020202878Srdivacky // Add a start value. 1021202878Srdivacky if (!L->contains(Pred)) { 1022202878Srdivacky PN->addIncoming(StartV, Pred); 1023202878Srdivacky continue; 1024202878Srdivacky } 1025202878Srdivacky 1026202878Srdivacky // Create a step value and add it to the PHI. If IVIncInsertLoop is 1027202878Srdivacky // non-null and equal to the addrec's loop, insert the instructions 1028202878Srdivacky // at IVIncInsertPos. 1029202878Srdivacky Instruction *InsertPos = L == IVIncInsertLoop ? 1030202878Srdivacky IVIncInsertPos : Pred->getTerminator(); 1031224145Sdim Builder.SetInsertPoint(InsertPos); 1032202878Srdivacky Value *IncV; 1033202878Srdivacky // If the PHI is a pointer, use a GEP, otherwise use an add or sub. 1034202878Srdivacky if (isPointer) { 1035226633Sdim PointerType *GEPPtrTy = cast<PointerType>(ExpandTy); 1036202878Srdivacky // If the step isn't constant, don't use an implicitly scaled GEP, because 1037202878Srdivacky // that would require a multiply inside the loop. 1038202878Srdivacky if (!isa<ConstantInt>(StepV)) 1039202878Srdivacky GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()), 1040202878Srdivacky GEPPtrTy->getAddressSpace()); 1041202878Srdivacky const SCEV *const StepArray[1] = { SE.getSCEV(StepV) }; 1042202878Srdivacky IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN); 1043202878Srdivacky if (IncV->getType() != PN->getType()) { 1044226633Sdim IncV = Builder.CreateBitCast(IncV, PN->getType()); 1045202878Srdivacky rememberInstruction(IncV); 1046202878Srdivacky } 1047202878Srdivacky } else { 1048202878Srdivacky IncV = isNegative ? 1049224145Sdim Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") : 1050224145Sdim Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next"); 1051202878Srdivacky rememberInstruction(IncV); 1052202878Srdivacky } 1053202878Srdivacky PN->addIncoming(IncV, Pred); 1054202878Srdivacky } 1055202878Srdivacky 1056202878Srdivacky // Restore the original insert point. 1057202878Srdivacky if (SaveInsertBB) 1058203954Srdivacky restoreInsertPoint(SaveInsertBB, SaveInsertPt); 1059202878Srdivacky 1060202878Srdivacky // Remember this PHI, even in post-inc mode. 1061202878Srdivacky InsertedValues.insert(PN); 1062202878Srdivacky 1063202878Srdivacky return PN; 1064202878Srdivacky} 1065202878Srdivacky 1066202878SrdivackyValue *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { 1067226633Sdim Type *STy = S->getType(); 1068226633Sdim Type *IntTy = SE.getEffectiveSCEVType(STy); 1069202878Srdivacky const Loop *L = S->getLoop(); 1070202878Srdivacky 1071202878Srdivacky // Determine a normalized form of this expression, which is the expression 1072202878Srdivacky // before any post-inc adjustment is made. 1073202878Srdivacky const SCEVAddRecExpr *Normalized = S; 1074207618Srdivacky if (PostIncLoops.count(L)) { 1075207618Srdivacky PostIncLoopSet Loops; 1076207618Srdivacky Loops.insert(L); 1077207618Srdivacky Normalized = 1078207618Srdivacky cast<SCEVAddRecExpr>(TransformForPostIncUse(Normalize, S, 0, 0, 1079207618Srdivacky Loops, SE, *SE.DT)); 1080202878Srdivacky } 1081202878Srdivacky 1082202878Srdivacky // Strip off any non-loop-dominating component from the addrec start. 1083202878Srdivacky const SCEV *Start = Normalized->getStart(); 1084202878Srdivacky const SCEV *PostLoopOffset = 0; 1085218893Sdim if (!SE.properlyDominates(Start, L->getHeader())) { 1086202878Srdivacky PostLoopOffset = Start; 1087207618Srdivacky Start = SE.getConstant(Normalized->getType(), 0); 1088221345Sdim Normalized = cast<SCEVAddRecExpr>( 1089221345Sdim SE.getAddRecExpr(Start, Normalized->getStepRecurrence(SE), 1090221345Sdim Normalized->getLoop(), 1091221345Sdim // FIXME: Normalized->getNoWrapFlags(FlagNW) 1092221345Sdim SCEV::FlagAnyWrap)); 1093202878Srdivacky } 1094202878Srdivacky 1095202878Srdivacky // Strip off any non-loop-dominating component from the addrec step. 1096202878Srdivacky const SCEV *Step = Normalized->getStepRecurrence(SE); 1097202878Srdivacky const SCEV *PostLoopScale = 0; 1098218893Sdim if (!SE.dominates(Step, L->getHeader())) { 1099202878Srdivacky PostLoopScale = Step; 1100207618Srdivacky Step = SE.getConstant(Normalized->getType(), 1); 1101202878Srdivacky Normalized = 1102202878Srdivacky cast<SCEVAddRecExpr>(SE.getAddRecExpr(Start, Step, 1103221345Sdim Normalized->getLoop(), 1104221345Sdim // FIXME: Normalized 1105221345Sdim // ->getNoWrapFlags(FlagNW) 1106221345Sdim SCEV::FlagAnyWrap)); 1107202878Srdivacky } 1108202878Srdivacky 1109202878Srdivacky // Expand the core addrec. If we need post-loop scaling, force it to 1110202878Srdivacky // expand to an integer type to avoid the need for additional casting. 1111226633Sdim Type *ExpandTy = PostLoopScale ? IntTy : STy; 1112202878Srdivacky PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy); 1113202878Srdivacky 1114204642Srdivacky // Accommodate post-inc mode, if necessary. 1115202878Srdivacky Value *Result; 1116207618Srdivacky if (!PostIncLoops.count(L)) 1117202878Srdivacky Result = PN; 1118202878Srdivacky else { 1119202878Srdivacky // In PostInc mode, use the post-incremented value. 1120202878Srdivacky BasicBlock *LatchBlock = L->getLoopLatch(); 1121202878Srdivacky assert(LatchBlock && "PostInc mode requires a unique loop latch!"); 1122202878Srdivacky Result = PN->getIncomingValueForBlock(LatchBlock); 1123226633Sdim 1124226633Sdim // For an expansion to use the postinc form, the client must call 1125226633Sdim // expandCodeFor with an InsertPoint that is either outside the PostIncLoop 1126226633Sdim // or dominated by IVIncInsertPos. 1127226633Sdim assert((!isa<Instruction>(Result) || 1128226633Sdim SE.DT->dominates(cast<Instruction>(Result), 1129226633Sdim Builder.GetInsertPoint())) && 1130226633Sdim "postinc expansion does not dominate use"); 1131202878Srdivacky } 1132202878Srdivacky 1133202878Srdivacky // Re-apply any non-loop-dominating scale. 1134202878Srdivacky if (PostLoopScale) { 1135203954Srdivacky Result = InsertNoopCastOfTo(Result, IntTy); 1136202878Srdivacky Result = Builder.CreateMul(Result, 1137202878Srdivacky expandCodeFor(PostLoopScale, IntTy)); 1138202878Srdivacky rememberInstruction(Result); 1139202878Srdivacky } 1140202878Srdivacky 1141202878Srdivacky // Re-apply any non-loop-dominating offset. 1142202878Srdivacky if (PostLoopOffset) { 1143226633Sdim if (PointerType *PTy = dyn_cast<PointerType>(ExpandTy)) { 1144202878Srdivacky const SCEV *const OffsetArray[1] = { PostLoopOffset }; 1145202878Srdivacky Result = expandAddToGEP(OffsetArray, OffsetArray+1, PTy, IntTy, Result); 1146202878Srdivacky } else { 1147203954Srdivacky Result = InsertNoopCastOfTo(Result, IntTy); 1148202878Srdivacky Result = Builder.CreateAdd(Result, 1149202878Srdivacky expandCodeFor(PostLoopOffset, IntTy)); 1150202878Srdivacky rememberInstruction(Result); 1151202878Srdivacky } 1152202878Srdivacky } 1153202878Srdivacky 1154202878Srdivacky return Result; 1155202878Srdivacky} 1156202878Srdivacky 1157193323SedValue *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { 1158202878Srdivacky if (!CanonicalMode) return expandAddRecExprLiterally(S); 1159202878Srdivacky 1160226633Sdim Type *Ty = SE.getEffectiveSCEVType(S->getType()); 1161193323Sed const Loop *L = S->getLoop(); 1162193323Sed 1163194178Sed // First check for an existing canonical IV in a suitable type. 1164194178Sed PHINode *CanonicalIV = 0; 1165194178Sed if (PHINode *PN = L->getCanonicalInductionVariable()) 1166212904Sdim if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty)) 1167194178Sed CanonicalIV = PN; 1168194178Sed 1169194178Sed // Rewrite an AddRec in terms of the canonical induction variable, if 1170194178Sed // its type is more narrow. 1171194178Sed if (CanonicalIV && 1172194178Sed SE.getTypeSizeInBits(CanonicalIV->getType()) > 1173194178Sed SE.getTypeSizeInBits(Ty)) { 1174205407Srdivacky SmallVector<const SCEV *, 4> NewOps(S->getNumOperands()); 1175205407Srdivacky for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i) 1176205407Srdivacky NewOps[i] = SE.getAnyExtendExpr(S->op_begin()[i], CanonicalIV->getType()); 1177221345Sdim Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(), 1178221345Sdim // FIXME: S->getNoWrapFlags(FlagNW) 1179221345Sdim SCEV::FlagAnyWrap)); 1180195340Sed BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); 1181195340Sed BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); 1182194178Sed BasicBlock::iterator NewInsertPt = 1183200581Srdivacky llvm::next(BasicBlock::iterator(cast<Instruction>(V))); 1184226633Sdim while (isa<PHINode>(NewInsertPt) || isa<DbgInfoIntrinsic>(NewInsertPt) || 1185226633Sdim isa<LandingPadInst>(NewInsertPt)) 1186210299Sed ++NewInsertPt; 1187194178Sed V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0, 1188194178Sed NewInsertPt); 1189203954Srdivacky restoreInsertPoint(SaveInsertBB, SaveInsertPt); 1190194178Sed return V; 1191194178Sed } 1192194178Sed 1193193323Sed // {X,+,F} --> X + {0,+,F} 1194193323Sed if (!S->getStart()->isZero()) { 1195205407Srdivacky SmallVector<const SCEV *, 4> NewOps(S->op_begin(), S->op_end()); 1196207618Srdivacky NewOps[0] = SE.getConstant(Ty, 0); 1197221345Sdim // FIXME: can use S->getNoWrapFlags() 1198221345Sdim const SCEV *Rest = SE.getAddRecExpr(NewOps, L, SCEV::FlagAnyWrap); 1199193323Sed 1200193323Sed // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the 1201193323Sed // comments on expandAddToGEP for details. 1202198090Srdivacky const SCEV *Base = S->getStart(); 1203198090Srdivacky const SCEV *RestArray[1] = { Rest }; 1204198090Srdivacky // Dig into the expression to find the pointer base for a GEP. 1205198090Srdivacky ExposePointerBase(Base, RestArray[0], SE); 1206198090Srdivacky // If we found a pointer, expand the AddRec with a GEP. 1207226633Sdim if (PointerType *PTy = dyn_cast<PointerType>(Base->getType())) { 1208198090Srdivacky // Make sure the Base isn't something exotic, such as a multiplied 1209198090Srdivacky // or divided pointer value. In those cases, the result type isn't 1210198090Srdivacky // actually a pointer type. 1211198090Srdivacky if (!isa<SCEVMulExpr>(Base) && !isa<SCEVUDivExpr>(Base)) { 1212198090Srdivacky Value *StartV = expand(Base); 1213198090Srdivacky assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!"); 1214198090Srdivacky return expandAddToGEP(RestArray, RestArray+1, PTy, Ty, StartV); 1215193323Sed } 1216193323Sed } 1217193323Sed 1218195098Sed // Just do a normal add. Pre-expand the operands to suppress folding. 1219195098Sed return expand(SE.getAddExpr(SE.getUnknown(expand(S->getStart())), 1220195098Sed SE.getUnknown(expand(Rest)))); 1221193323Sed } 1222193323Sed 1223212904Sdim // If we don't yet have a canonical IV, create one. 1224212904Sdim if (!CanonicalIV) { 1225193323Sed // Create and insert the PHI node for the induction variable in the 1226193323Sed // specified loop. 1227193323Sed BasicBlock *Header = L->getHeader(); 1228221345Sdim pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header); 1229221345Sdim CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar", 1230221345Sdim Header->begin()); 1231212904Sdim rememberInstruction(CanonicalIV); 1232193323Sed 1233193323Sed Constant *One = ConstantInt::get(Ty, 1); 1234221345Sdim for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) { 1235210299Sed BasicBlock *HP = *HPI; 1236210299Sed if (L->contains(HP)) { 1237202878Srdivacky // Insert a unit add instruction right before the terminator 1238202878Srdivacky // corresponding to the back-edge. 1239212904Sdim Instruction *Add = BinaryOperator::CreateAdd(CanonicalIV, One, 1240212904Sdim "indvar.next", 1241212904Sdim HP->getTerminator()); 1242224145Sdim Add->setDebugLoc(HP->getTerminator()->getDebugLoc()); 1243202878Srdivacky rememberInstruction(Add); 1244212904Sdim CanonicalIV->addIncoming(Add, HP); 1245198090Srdivacky } else { 1246212904Sdim CanonicalIV->addIncoming(Constant::getNullValue(Ty), HP); 1247198090Srdivacky } 1248210299Sed } 1249193323Sed } 1250193323Sed 1251212904Sdim // {0,+,1} --> Insert a canonical induction variable into the loop! 1252212904Sdim if (S->isAffine() && S->getOperand(1)->isOne()) { 1253212904Sdim assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) && 1254212904Sdim "IVs with types different from the canonical IV should " 1255212904Sdim "already have been handled!"); 1256212904Sdim return CanonicalIV; 1257212904Sdim } 1258212904Sdim 1259194178Sed // {0,+,F} --> {0,+,1} * F 1260193323Sed 1261193323Sed // If this is a simple linear addrec, emit it now as a special case. 1262195098Sed if (S->isAffine()) // {0,+,F} --> i*F 1263195098Sed return 1264195098Sed expand(SE.getTruncateOrNoop( 1265212904Sdim SE.getMulExpr(SE.getUnknown(CanonicalIV), 1266195098Sed SE.getNoopOrAnyExtend(S->getOperand(1), 1267212904Sdim CanonicalIV->getType())), 1268195098Sed Ty)); 1269194178Sed 1270193323Sed // If this is a chain of recurrences, turn it into a closed form, using the 1271193323Sed // folders, then expandCodeFor the closed form. This allows the folders to 1272193323Sed // simplify the expression without having to build a bunch of special code 1273193323Sed // into this folder. 1274212904Sdim const SCEV *IH = SE.getUnknown(CanonicalIV); // Get I as a "symbolic" SCEV. 1275193323Sed 1276194178Sed // Promote S up to the canonical IV type, if the cast is foldable. 1277198090Srdivacky const SCEV *NewS = S; 1278212904Sdim const SCEV *Ext = SE.getNoopOrAnyExtend(S, CanonicalIV->getType()); 1279194178Sed if (isa<SCEVAddRecExpr>(Ext)) 1280194178Sed NewS = Ext; 1281194178Sed 1282198090Srdivacky const SCEV *V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE); 1283193323Sed //cerr << "Evaluated: " << *this << "\n to: " << *V << "\n"; 1284193323Sed 1285194178Sed // Truncate the result down to the original type, if needed. 1286198090Srdivacky const SCEV *T = SE.getTruncateOrNoop(V, Ty); 1287194710Sed return expand(T); 1288193323Sed} 1289193323Sed 1290193323SedValue *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) { 1291226633Sdim Type *Ty = SE.getEffectiveSCEVType(S->getType()); 1292194178Sed Value *V = expandCodeFor(S->getOperand(), 1293194178Sed SE.getEffectiveSCEVType(S->getOperand()->getType())); 1294226633Sdim Value *I = Builder.CreateTrunc(V, Ty); 1295202878Srdivacky rememberInstruction(I); 1296193323Sed return I; 1297193323Sed} 1298193323Sed 1299193323SedValue *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) { 1300226633Sdim Type *Ty = SE.getEffectiveSCEVType(S->getType()); 1301194178Sed Value *V = expandCodeFor(S->getOperand(), 1302194178Sed SE.getEffectiveSCEVType(S->getOperand()->getType())); 1303226633Sdim Value *I = Builder.CreateZExt(V, Ty); 1304202878Srdivacky rememberInstruction(I); 1305193323Sed return I; 1306193323Sed} 1307193323Sed 1308193323SedValue *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) { 1309226633Sdim Type *Ty = SE.getEffectiveSCEVType(S->getType()); 1310194178Sed Value *V = expandCodeFor(S->getOperand(), 1311194178Sed SE.getEffectiveSCEVType(S->getOperand()->getType())); 1312226633Sdim Value *I = Builder.CreateSExt(V, Ty); 1313202878Srdivacky rememberInstruction(I); 1314193323Sed return I; 1315193323Sed} 1316193323Sed 1317193323SedValue *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { 1318198090Srdivacky Value *LHS = expand(S->getOperand(S->getNumOperands()-1)); 1319226633Sdim Type *Ty = LHS->getType(); 1320198090Srdivacky for (int i = S->getNumOperands()-2; i >= 0; --i) { 1321198090Srdivacky // In the case of mixed integer and pointer types, do the 1322198090Srdivacky // rest of the comparisons as integer. 1323198090Srdivacky if (S->getOperand(i)->getType() != Ty) { 1324198090Srdivacky Ty = SE.getEffectiveSCEVType(Ty); 1325198090Srdivacky LHS = InsertNoopCastOfTo(LHS, Ty); 1326198090Srdivacky } 1327194178Sed Value *RHS = expandCodeFor(S->getOperand(i), Ty); 1328226633Sdim Value *ICmp = Builder.CreateICmpSGT(LHS, RHS); 1329202878Srdivacky rememberInstruction(ICmp); 1330195340Sed Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax"); 1331202878Srdivacky rememberInstruction(Sel); 1332193323Sed LHS = Sel; 1333193323Sed } 1334198090Srdivacky // In the case of mixed integer and pointer types, cast the 1335198090Srdivacky // final result back to the pointer type. 1336198090Srdivacky if (LHS->getType() != S->getType()) 1337198090Srdivacky LHS = InsertNoopCastOfTo(LHS, S->getType()); 1338193323Sed return LHS; 1339193323Sed} 1340193323Sed 1341193323SedValue *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { 1342198090Srdivacky Value *LHS = expand(S->getOperand(S->getNumOperands()-1)); 1343226633Sdim Type *Ty = LHS->getType(); 1344198090Srdivacky for (int i = S->getNumOperands()-2; i >= 0; --i) { 1345198090Srdivacky // In the case of mixed integer and pointer types, do the 1346198090Srdivacky // rest of the comparisons as integer. 1347198090Srdivacky if (S->getOperand(i)->getType() != Ty) { 1348198090Srdivacky Ty = SE.getEffectiveSCEVType(Ty); 1349198090Srdivacky LHS = InsertNoopCastOfTo(LHS, Ty); 1350198090Srdivacky } 1351194178Sed Value *RHS = expandCodeFor(S->getOperand(i), Ty); 1352226633Sdim Value *ICmp = Builder.CreateICmpUGT(LHS, RHS); 1353202878Srdivacky rememberInstruction(ICmp); 1354195340Sed Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax"); 1355202878Srdivacky rememberInstruction(Sel); 1356193323Sed LHS = Sel; 1357193323Sed } 1358198090Srdivacky // In the case of mixed integer and pointer types, cast the 1359198090Srdivacky // final result back to the pointer type. 1360198090Srdivacky if (LHS->getType() != S->getType()) 1361198090Srdivacky LHS = InsertNoopCastOfTo(LHS, S->getType()); 1362193323Sed return LHS; 1363193323Sed} 1364193323Sed 1365226633SdimValue *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty, 1366205407Srdivacky Instruction *I) { 1367205407Srdivacky BasicBlock::iterator IP = I; 1368205407Srdivacky while (isInsertedInstruction(IP) || isa<DbgInfoIntrinsic>(IP)) 1369205407Srdivacky ++IP; 1370205407Srdivacky Builder.SetInsertPoint(IP->getParent(), IP); 1371205407Srdivacky return expandCodeFor(SH, Ty); 1372205407Srdivacky} 1373205407Srdivacky 1374226633SdimValue *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) { 1375193323Sed // Expand the code for this SCEV. 1376193323Sed Value *V = expand(SH); 1377193323Sed if (Ty) { 1378193323Sed assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) && 1379193323Sed "non-trivial casts should be done with the SCEVs directly!"); 1380193323Sed V = InsertNoopCastOfTo(V, Ty); 1381193323Sed } 1382193323Sed return V; 1383193323Sed} 1384193323Sed 1385193323SedValue *SCEVExpander::expand(const SCEV *S) { 1386195098Sed // Compute an insertion point for this SCEV object. Hoist the instructions 1387195098Sed // as far out in the loop nest as possible. 1388195340Sed Instruction *InsertPt = Builder.GetInsertPoint(); 1389195340Sed for (Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock()); ; 1390195098Sed L = L->getParentLoop()) 1391218893Sdim if (SE.isLoopInvariant(S, L)) { 1392195098Sed if (!L) break; 1393206083Srdivacky if (BasicBlock *Preheader = L->getLoopPreheader()) 1394195098Sed InsertPt = Preheader->getTerminator(); 1395195098Sed } else { 1396195098Sed // If the SCEV is computable at this level, insert it into the header 1397195098Sed // after the PHIs (and after any other instructions that we've inserted 1398195098Sed // there) so that it is guaranteed to dominate any user inside the loop. 1399218893Sdim if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L)) 1400226633Sdim InsertPt = L->getHeader()->getFirstInsertionPt(); 1401205407Srdivacky while (isInsertedInstruction(InsertPt) || isa<DbgInfoIntrinsic>(InsertPt)) 1402204961Srdivacky InsertPt = llvm::next(BasicBlock::iterator(InsertPt)); 1403195098Sed break; 1404195098Sed } 1405195098Sed 1406195098Sed // Check to see if we already expanded this here. 1407195098Sed std::map<std::pair<const SCEV *, Instruction *>, 1408195098Sed AssertingVH<Value> >::iterator I = 1409195098Sed InsertedExpressions.find(std::make_pair(S, InsertPt)); 1410195340Sed if (I != InsertedExpressions.end()) 1411193323Sed return I->second; 1412195098Sed 1413195340Sed BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); 1414195340Sed BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); 1415195340Sed Builder.SetInsertPoint(InsertPt->getParent(), InsertPt); 1416195340Sed 1417195098Sed // Expand the expression into instructions. 1418193323Sed Value *V = visit(S); 1419195098Sed 1420195098Sed // Remember the expanded value for this SCEV at this location. 1421226633Sdim // 1422226633Sdim // This is independent of PostIncLoops. The mapped value simply materializes 1423226633Sdim // the expression at this insertion point. If the mapped value happened to be 1424226633Sdim // a postinc expansion, it could be reused by a non postinc user, but only if 1425226633Sdim // its insertion point was already at the head of the loop. 1426226633Sdim InsertedExpressions[std::make_pair(S, InsertPt)] = V; 1427195098Sed 1428203954Srdivacky restoreInsertPoint(SaveInsertBB, SaveInsertPt); 1429193323Sed return V; 1430193323Sed} 1431193574Sed 1432203954Srdivackyvoid SCEVExpander::rememberInstruction(Value *I) { 1433210299Sed if (!PostIncLoops.empty()) 1434210299Sed InsertedPostIncValues.insert(I); 1435210299Sed else 1436203954Srdivacky InsertedValues.insert(I); 1437203954Srdivacky 1438203954Srdivacky // If we just claimed an existing instruction and that instruction had 1439221345Sdim // been the insert point, adjust the insert point forward so that 1440203954Srdivacky // subsequently inserted code will be dominated. 1441203954Srdivacky if (Builder.GetInsertPoint() == I) { 1442203954Srdivacky BasicBlock::iterator It = cast<Instruction>(I); 1443205407Srdivacky do { ++It; } while (isInsertedInstruction(It) || 1444205407Srdivacky isa<DbgInfoIntrinsic>(It)); 1445203954Srdivacky Builder.SetInsertPoint(Builder.GetInsertBlock(), It); 1446203954Srdivacky } 1447203954Srdivacky} 1448203954Srdivacky 1449203954Srdivackyvoid SCEVExpander::restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I) { 1450204642Srdivacky // If we acquired more instructions since the old insert point was saved, 1451203954Srdivacky // advance past them. 1452205407Srdivacky while (isInsertedInstruction(I) || isa<DbgInfoIntrinsic>(I)) ++I; 1453203954Srdivacky 1454203954Srdivacky Builder.SetInsertPoint(BB, I); 1455203954Srdivacky} 1456203954Srdivacky 1457193574Sed/// getOrInsertCanonicalInductionVariable - This method returns the 1458193574Sed/// canonical induction variable of the specified type for the specified 1459193574Sed/// loop (inserting one if there is none). A canonical induction variable 1460193574Sed/// starts at zero and steps by one on each iteration. 1461212904SdimPHINode * 1462193574SedSCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, 1463226633Sdim Type *Ty) { 1464203954Srdivacky assert(Ty->isIntegerTy() && "Can only insert integer induction variables!"); 1465212904Sdim 1466212904Sdim // Build a SCEV for {0,+,1}<L>. 1467221345Sdim // Conservatively use FlagAnyWrap for now. 1468207618Srdivacky const SCEV *H = SE.getAddRecExpr(SE.getConstant(Ty, 0), 1469221345Sdim SE.getConstant(Ty, 1), L, SCEV::FlagAnyWrap); 1470212904Sdim 1471212904Sdim // Emit code for it. 1472195340Sed BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); 1473195340Sed BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); 1474212904Sdim PHINode *V = cast<PHINode>(expandCodeFor(H, 0, L->getHeader()->begin())); 1475195340Sed if (SaveInsertBB) 1476203954Srdivacky restoreInsertPoint(SaveInsertBB, SaveInsertPt); 1477212904Sdim 1478195098Sed return V; 1479193574Sed} 1480226633Sdim 1481226633Sdim/// hoistStep - Attempt to hoist an IV increment above a potential use. 1482226633Sdim/// 1483226633Sdim/// To successfully hoist, two criteria must be met: 1484226633Sdim/// - IncV operands dominate InsertPos and 1485226633Sdim/// - InsertPos dominates IncV 1486226633Sdim/// 1487226633Sdim/// Meeting the second condition means that we don't need to check all of IncV's 1488226633Sdim/// existing uses (it's moving up in the domtree). 1489226633Sdim/// 1490226633Sdim/// This does not yet recursively hoist the operands, although that would 1491226633Sdim/// not be difficult. 1492226633Sdim/// 1493226633Sdim/// This does not require a SCEVExpander instance and could be replaced by a 1494226633Sdim/// general code-insertion helper. 1495226633Sdimbool SCEVExpander::hoistStep(Instruction *IncV, Instruction *InsertPos, 1496226633Sdim const DominatorTree *DT) { 1497226633Sdim if (DT->dominates(IncV, InsertPos)) 1498226633Sdim return true; 1499226633Sdim 1500226633Sdim if (!DT->dominates(InsertPos->getParent(), IncV->getParent())) 1501226633Sdim return false; 1502226633Sdim 1503226633Sdim if (IncV->mayHaveSideEffects()) 1504226633Sdim return false; 1505226633Sdim 1506226633Sdim // Attempt to hoist IncV 1507226633Sdim for (User::op_iterator OI = IncV->op_begin(), OE = IncV->op_end(); 1508226633Sdim OI != OE; ++OI) { 1509226633Sdim Instruction *OInst = dyn_cast<Instruction>(OI); 1510226633Sdim if (OInst && !DT->dominates(OInst, InsertPos)) 1511226633Sdim return false; 1512226633Sdim } 1513226633Sdim IncV->moveBefore(InsertPos); 1514226633Sdim return true; 1515226633Sdim} 1516226633Sdim 1517226633Sdim/// replaceCongruentIVs - Check for congruent phis in this loop header and 1518226633Sdim/// replace them with their most canonical representative. Return the number of 1519226633Sdim/// phis eliminated. 1520226633Sdim/// 1521226633Sdim/// This does not depend on any SCEVExpander state but should be used in 1522226633Sdim/// the same context that SCEVExpander is used. 1523226633Sdimunsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, 1524226633Sdim SmallVectorImpl<WeakVH> &DeadInsts) { 1525226633Sdim unsigned NumElim = 0; 1526226633Sdim DenseMap<const SCEV *, PHINode *> ExprToIVMap; 1527226633Sdim for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) { 1528226633Sdim PHINode *Phi = cast<PHINode>(I); 1529226633Sdim if (!SE.isSCEVable(Phi->getType())) 1530226633Sdim continue; 1531226633Sdim 1532226633Sdim PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)]; 1533226633Sdim if (!OrigPhiRef) { 1534226633Sdim OrigPhiRef = Phi; 1535226633Sdim continue; 1536226633Sdim } 1537226633Sdim 1538226633Sdim // If one phi derives from the other via GEPs, types may differ. 1539226633Sdim // We could consider adding a bitcast here to handle it. 1540226633Sdim if (OrigPhiRef->getType() != Phi->getType()) 1541226633Sdim continue; 1542226633Sdim 1543226633Sdim if (BasicBlock *LatchBlock = L->getLoopLatch()) { 1544226633Sdim Instruction *OrigInc = 1545226633Sdim cast<Instruction>(OrigPhiRef->getIncomingValueForBlock(LatchBlock)); 1546226633Sdim Instruction *IsomorphicInc = 1547226633Sdim cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock)); 1548226633Sdim 1549226633Sdim // If this phi is more canonical, swap it with the original. 1550226633Sdim if (!isExpandedAddRecExprPHI(OrigPhiRef, OrigInc, L) 1551226633Sdim && isExpandedAddRecExprPHI(Phi, IsomorphicInc, L)) { 1552226633Sdim std::swap(OrigPhiRef, Phi); 1553226633Sdim std::swap(OrigInc, IsomorphicInc); 1554226633Sdim } 1555226633Sdim // Replacing the congruent phi is sufficient because acyclic redundancy 1556226633Sdim // elimination, CSE/GVN, should handle the rest. However, once SCEV proves 1557226633Sdim // that a phi is congruent, it's often the head of an IV user cycle that 1558226633Sdim // is isomorphic with the original phi. So it's worth eagerly cleaning up 1559226633Sdim // the common case of a single IV increment. 1560226633Sdim if (OrigInc != IsomorphicInc && 1561226633Sdim OrigInc->getType() == IsomorphicInc->getType() && 1562226633Sdim SE.getSCEV(OrigInc) == SE.getSCEV(IsomorphicInc) && 1563226633Sdim hoistStep(OrigInc, IsomorphicInc, DT)) { 1564226633Sdim DEBUG_WITH_TYPE(DebugType, dbgs() 1565226633Sdim << "INDVARS: Eliminated congruent iv.inc: " 1566226633Sdim << *IsomorphicInc << '\n'); 1567226633Sdim IsomorphicInc->replaceAllUsesWith(OrigInc); 1568226633Sdim DeadInsts.push_back(IsomorphicInc); 1569226633Sdim } 1570226633Sdim } 1571226633Sdim DEBUG_WITH_TYPE(DebugType, dbgs() 1572226633Sdim << "INDVARS: Eliminated congruent iv: " << *Phi << '\n'); 1573226633Sdim ++NumElim; 1574226633Sdim Phi->replaceAllUsesWith(OrigPhiRef); 1575226633Sdim DeadInsts.push_back(Phi); 1576226633Sdim } 1577226633Sdim return NumElim; 1578226633Sdim} 1579