1//===-- LoopIdiomRecognize.cpp - Loop idiom recognition -------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass implements an idiom recognizer that transforms simple loops into a 11// non-loop form. In cases that this kicks in, it can be a significant 12// performance win. 13// 14//===----------------------------------------------------------------------===// 15// 16// TODO List: 17// 18// Future loop memory idioms to recognize: 19// memcmp, memmove, strlen, etc. 20// Future floating point idioms to recognize in -ffast-math mode: 21// fpowi 22// Future integer operation idioms to recognize: 23// ctpop, ctlz, cttz 24// 25// Beware that isel's default lowering for ctpop is highly inefficient for 26// i64 and larger types when i64 is legal and the value has few bits set. It 27// would be good to enhance isel to emit a loop for ctpop in this case. 28// 29// We should enhance the memset/memcpy recognition to handle multiple stores in 30// the loop. This would handle things like: 31// void foo(_Complex float *P) 32// for (i) { __real__(*P) = 0; __imag__(*P) = 0; } 33// 34// We should enhance this to handle negative strides through memory. 35// Alternatively (and perhaps better) we could rely on an earlier pass to force 36// forward iteration through memory, which is generally better for cache 37// behavior. Negative strides *do* happen for memset/memcpy loops. 38// 39// This could recognize common matrix multiplies and dot product idioms and 40// replace them with calls to BLAS (if linked in??). 41// 42//===----------------------------------------------------------------------===// 43 44#define DEBUG_TYPE "loop-idiom" 45#include "llvm/Transforms/Scalar.h" 46#include "llvm/IRBuilder.h" 47#include "llvm/IntrinsicInst.h" 48#include "llvm/Module.h" 49#include "llvm/ADT/Statistic.h" 50#include "llvm/Analysis/AliasAnalysis.h" 51#include "llvm/Analysis/LoopPass.h" 52#include "llvm/Analysis/ScalarEvolutionExpander.h" 53#include "llvm/Analysis/ScalarEvolutionExpressions.h" 54#include "llvm/Analysis/ValueTracking.h" 55#include "llvm/Support/Debug.h" 56#include "llvm/Support/raw_ostream.h" 57#include "llvm/Target/TargetData.h" 58#include "llvm/Target/TargetLibraryInfo.h" 59#include "llvm/Transforms/Utils/Local.h" 60using namespace llvm; 61 62STATISTIC(NumMemSet, "Number of memset's formed from loop stores"); 63STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores"); 64 65namespace { 66 class LoopIdiomRecognize : public LoopPass { 67 Loop *CurLoop; 68 const TargetData *TD; 69 DominatorTree *DT; 70 ScalarEvolution *SE; 71 TargetLibraryInfo *TLI; 72 public: 73 static char ID; 74 explicit LoopIdiomRecognize() : LoopPass(ID) { 75 initializeLoopIdiomRecognizePass(*PassRegistry::getPassRegistry()); 76 } 77 78 bool runOnLoop(Loop *L, LPPassManager &LPM); 79 bool runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, 80 SmallVectorImpl<BasicBlock*> &ExitBlocks); 81 82 bool processLoopStore(StoreInst *SI, const SCEV *BECount); 83 bool processLoopMemSet(MemSetInst *MSI, const SCEV *BECount); 84 85 bool processLoopStridedStore(Value *DestPtr, unsigned StoreSize, 86 unsigned StoreAlignment, 87 Value *SplatValue, Instruction *TheStore, 88 const SCEVAddRecExpr *Ev, 89 const SCEV *BECount); 90 bool processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, 91 const SCEVAddRecExpr *StoreEv, 92 const SCEVAddRecExpr *LoadEv, 93 const SCEV *BECount); 94 95 /// This transformation requires natural loop information & requires that 96 /// loop preheaders be inserted into the CFG. 97 /// 98 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 99 AU.addRequired<LoopInfo>(); 100 AU.addPreserved<LoopInfo>(); 101 AU.addRequiredID(LoopSimplifyID); 102 AU.addPreservedID(LoopSimplifyID); 103 AU.addRequiredID(LCSSAID); 104 AU.addPreservedID(LCSSAID); 105 AU.addRequired<AliasAnalysis>(); 106 AU.addPreserved<AliasAnalysis>(); 107 AU.addRequired<ScalarEvolution>(); 108 AU.addPreserved<ScalarEvolution>(); 109 AU.addPreserved<DominatorTree>(); 110 AU.addRequired<DominatorTree>(); 111 AU.addRequired<TargetLibraryInfo>(); 112 } 113 }; 114} 115 116char LoopIdiomRecognize::ID = 0; 117INITIALIZE_PASS_BEGIN(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms", 118 false, false) 119INITIALIZE_PASS_DEPENDENCY(LoopInfo) 120INITIALIZE_PASS_DEPENDENCY(DominatorTree) 121INITIALIZE_PASS_DEPENDENCY(LoopSimplify) 122INITIALIZE_PASS_DEPENDENCY(LCSSA) 123INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) 124INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) 125INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 126INITIALIZE_PASS_END(LoopIdiomRecognize, "loop-idiom", "Recognize loop idioms", 127 false, false) 128 129Pass *llvm::createLoopIdiomPass() { return new LoopIdiomRecognize(); } 130 131/// deleteDeadInstruction - Delete this instruction. Before we do, go through 132/// and zero out all the operands of this instruction. If any of them become 133/// dead, delete them and the computation tree that feeds them. 134/// 135static void deleteDeadInstruction(Instruction *I, ScalarEvolution &SE, 136 const TargetLibraryInfo *TLI) { 137 SmallVector<Instruction*, 32> NowDeadInsts; 138 139 NowDeadInsts.push_back(I); 140 141 // Before we touch this instruction, remove it from SE! 142 do { 143 Instruction *DeadInst = NowDeadInsts.pop_back_val(); 144 145 // This instruction is dead, zap it, in stages. Start by removing it from 146 // SCEV. 147 SE.forgetValue(DeadInst); 148 149 for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) { 150 Value *Op = DeadInst->getOperand(op); 151 DeadInst->setOperand(op, 0); 152 153 // If this operand just became dead, add it to the NowDeadInsts list. 154 if (!Op->use_empty()) continue; 155 156 if (Instruction *OpI = dyn_cast<Instruction>(Op)) 157 if (isInstructionTriviallyDead(OpI, TLI)) 158 NowDeadInsts.push_back(OpI); 159 } 160 161 DeadInst->eraseFromParent(); 162 163 } while (!NowDeadInsts.empty()); 164} 165 166/// deleteIfDeadInstruction - If the specified value is a dead instruction, 167/// delete it and any recursively used instructions. 168static void deleteIfDeadInstruction(Value *V, ScalarEvolution &SE, 169 const TargetLibraryInfo *TLI) { 170 if (Instruction *I = dyn_cast<Instruction>(V)) 171 if (isInstructionTriviallyDead(I, TLI)) 172 deleteDeadInstruction(I, SE, TLI); 173} 174 175bool LoopIdiomRecognize::runOnLoop(Loop *L, LPPassManager &LPM) { 176 CurLoop = L; 177 178 // If the loop could not be converted to canonical form, it must have an 179 // indirectbr in it, just give up. 180 if (!L->getLoopPreheader()) 181 return false; 182 183 // Disable loop idiom recognition if the function's name is a common idiom. 184 StringRef Name = L->getHeader()->getParent()->getName(); 185 if (Name == "memset" || Name == "memcpy") 186 return false; 187 188 // The trip count of the loop must be analyzable. 189 SE = &getAnalysis<ScalarEvolution>(); 190 if (!SE->hasLoopInvariantBackedgeTakenCount(L)) 191 return false; 192 const SCEV *BECount = SE->getBackedgeTakenCount(L); 193 if (isa<SCEVCouldNotCompute>(BECount)) return false; 194 195 // If this loop executes exactly one time, then it should be peeled, not 196 // optimized by this pass. 197 if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount)) 198 if (BECst->getValue()->getValue() == 0) 199 return false; 200 201 // We require target data for now. 202 TD = getAnalysisIfAvailable<TargetData>(); 203 if (TD == 0) return false; 204 205 DT = &getAnalysis<DominatorTree>(); 206 LoopInfo &LI = getAnalysis<LoopInfo>(); 207 TLI = &getAnalysis<TargetLibraryInfo>(); 208 209 SmallVector<BasicBlock*, 8> ExitBlocks; 210 CurLoop->getUniqueExitBlocks(ExitBlocks); 211 212 DEBUG(dbgs() << "loop-idiom Scanning: F[" 213 << L->getHeader()->getParent()->getName() 214 << "] Loop %" << L->getHeader()->getName() << "\n"); 215 216 bool MadeChange = false; 217 // Scan all the blocks in the loop that are not in subloops. 218 for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E; 219 ++BI) { 220 // Ignore blocks in subloops. 221 if (LI.getLoopFor(*BI) != CurLoop) 222 continue; 223 224 MadeChange |= runOnLoopBlock(*BI, BECount, ExitBlocks); 225 } 226 return MadeChange; 227} 228 229/// runOnLoopBlock - Process the specified block, which lives in a counted loop 230/// with the specified backedge count. This block is known to be in the current 231/// loop and not in any subloops. 232bool LoopIdiomRecognize::runOnLoopBlock(BasicBlock *BB, const SCEV *BECount, 233 SmallVectorImpl<BasicBlock*> &ExitBlocks) { 234 // We can only promote stores in this block if they are unconditionally 235 // executed in the loop. For a block to be unconditionally executed, it has 236 // to dominate all the exit blocks of the loop. Verify this now. 237 for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) 238 if (!DT->dominates(BB, ExitBlocks[i])) 239 return false; 240 241 bool MadeChange = false; 242 for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { 243 Instruction *Inst = I++; 244 // Look for store instructions, which may be optimized to memset/memcpy. 245 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { 246 WeakVH InstPtr(I); 247 if (!processLoopStore(SI, BECount)) continue; 248 MadeChange = true; 249 250 // If processing the store invalidated our iterator, start over from the 251 // top of the block. 252 if (InstPtr == 0) 253 I = BB->begin(); 254 continue; 255 } 256 257 // Look for memset instructions, which may be optimized to a larger memset. 258 if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) { 259 WeakVH InstPtr(I); 260 if (!processLoopMemSet(MSI, BECount)) continue; 261 MadeChange = true; 262 263 // If processing the memset invalidated our iterator, start over from the 264 // top of the block. 265 if (InstPtr == 0) 266 I = BB->begin(); 267 continue; 268 } 269 } 270 271 return MadeChange; 272} 273 274 275/// processLoopStore - See if this store can be promoted to a memset or memcpy. 276bool LoopIdiomRecognize::processLoopStore(StoreInst *SI, const SCEV *BECount) { 277 if (!SI->isSimple()) return false; 278 279 Value *StoredVal = SI->getValueOperand(); 280 Value *StorePtr = SI->getPointerOperand(); 281 282 // Reject stores that are so large that they overflow an unsigned. 283 uint64_t SizeInBits = TD->getTypeSizeInBits(StoredVal->getType()); 284 if ((SizeInBits & 7) || (SizeInBits >> 32) != 0) 285 return false; 286 287 // See if the pointer expression is an AddRec like {base,+,1} on the current 288 // loop, which indicates a strided store. If we have something else, it's a 289 // random store we can't handle. 290 const SCEVAddRecExpr *StoreEv = 291 dyn_cast<SCEVAddRecExpr>(SE->getSCEV(StorePtr)); 292 if (StoreEv == 0 || StoreEv->getLoop() != CurLoop || !StoreEv->isAffine()) 293 return false; 294 295 // Check to see if the stride matches the size of the store. If so, then we 296 // know that every byte is touched in the loop. 297 unsigned StoreSize = (unsigned)SizeInBits >> 3; 298 const SCEVConstant *Stride = dyn_cast<SCEVConstant>(StoreEv->getOperand(1)); 299 300 if (Stride == 0 || StoreSize != Stride->getValue()->getValue()) { 301 // TODO: Could also handle negative stride here someday, that will require 302 // the validity check in mayLoopAccessLocation to be updated though. 303 // Enable this to print exact negative strides. 304 if (0 && Stride && StoreSize == -Stride->getValue()->getValue()) { 305 dbgs() << "NEGATIVE STRIDE: " << *SI << "\n"; 306 dbgs() << "BB: " << *SI->getParent(); 307 } 308 309 return false; 310 } 311 312 // See if we can optimize just this store in isolation. 313 if (processLoopStridedStore(StorePtr, StoreSize, SI->getAlignment(), 314 StoredVal, SI, StoreEv, BECount)) 315 return true; 316 317 // If the stored value is a strided load in the same loop with the same stride 318 // this this may be transformable into a memcpy. This kicks in for stuff like 319 // for (i) A[i] = B[i]; 320 if (LoadInst *LI = dyn_cast<LoadInst>(StoredVal)) { 321 const SCEVAddRecExpr *LoadEv = 322 dyn_cast<SCEVAddRecExpr>(SE->getSCEV(LI->getOperand(0))); 323 if (LoadEv && LoadEv->getLoop() == CurLoop && LoadEv->isAffine() && 324 StoreEv->getOperand(1) == LoadEv->getOperand(1) && LI->isSimple()) 325 if (processLoopStoreOfLoopLoad(SI, StoreSize, StoreEv, LoadEv, BECount)) 326 return true; 327 } 328 //errs() << "UNHANDLED strided store: " << *StoreEv << " - " << *SI << "\n"; 329 330 return false; 331} 332 333/// processLoopMemSet - See if this memset can be promoted to a large memset. 334bool LoopIdiomRecognize:: 335processLoopMemSet(MemSetInst *MSI, const SCEV *BECount) { 336 // We can only handle non-volatile memsets with a constant size. 337 if (MSI->isVolatile() || !isa<ConstantInt>(MSI->getLength())) return false; 338 339 // If we're not allowed to hack on memset, we fail. 340 if (!TLI->has(LibFunc::memset)) 341 return false; 342 343 Value *Pointer = MSI->getDest(); 344 345 // See if the pointer expression is an AddRec like {base,+,1} on the current 346 // loop, which indicates a strided store. If we have something else, it's a 347 // random store we can't handle. 348 const SCEVAddRecExpr *Ev = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(Pointer)); 349 if (Ev == 0 || Ev->getLoop() != CurLoop || !Ev->isAffine()) 350 return false; 351 352 // Reject memsets that are so large that they overflow an unsigned. 353 uint64_t SizeInBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue(); 354 if ((SizeInBytes >> 32) != 0) 355 return false; 356 357 // Check to see if the stride matches the size of the memset. If so, then we 358 // know that every byte is touched in the loop. 359 const SCEVConstant *Stride = dyn_cast<SCEVConstant>(Ev->getOperand(1)); 360 361 // TODO: Could also handle negative stride here someday, that will require the 362 // validity check in mayLoopAccessLocation to be updated though. 363 if (Stride == 0 || MSI->getLength() != Stride->getValue()) 364 return false; 365 366 return processLoopStridedStore(Pointer, (unsigned)SizeInBytes, 367 MSI->getAlignment(), MSI->getValue(), 368 MSI, Ev, BECount); 369} 370 371 372/// mayLoopAccessLocation - Return true if the specified loop might access the 373/// specified pointer location, which is a loop-strided access. The 'Access' 374/// argument specifies what the verboten forms of access are (read or write). 375static bool mayLoopAccessLocation(Value *Ptr,AliasAnalysis::ModRefResult Access, 376 Loop *L, const SCEV *BECount, 377 unsigned StoreSize, AliasAnalysis &AA, 378 Instruction *IgnoredStore) { 379 // Get the location that may be stored across the loop. Since the access is 380 // strided positively through memory, we say that the modified location starts 381 // at the pointer and has infinite size. 382 uint64_t AccessSize = AliasAnalysis::UnknownSize; 383 384 // If the loop iterates a fixed number of times, we can refine the access size 385 // to be exactly the size of the memset, which is (BECount+1)*StoreSize 386 if (const SCEVConstant *BECst = dyn_cast<SCEVConstant>(BECount)) 387 AccessSize = (BECst->getValue()->getZExtValue()+1)*StoreSize; 388 389 // TODO: For this to be really effective, we have to dive into the pointer 390 // operand in the store. Store to &A[i] of 100 will always return may alias 391 // with store of &A[100], we need to StoreLoc to be "A" with size of 100, 392 // which will then no-alias a store to &A[100]. 393 AliasAnalysis::Location StoreLoc(Ptr, AccessSize); 394 395 for (Loop::block_iterator BI = L->block_begin(), E = L->block_end(); BI != E; 396 ++BI) 397 for (BasicBlock::iterator I = (*BI)->begin(), E = (*BI)->end(); I != E; ++I) 398 if (&*I != IgnoredStore && 399 (AA.getModRefInfo(I, StoreLoc) & Access)) 400 return true; 401 402 return false; 403} 404 405/// getMemSetPatternValue - If a strided store of the specified value is safe to 406/// turn into a memset_pattern16, return a ConstantArray of 16 bytes that should 407/// be passed in. Otherwise, return null. 408/// 409/// Note that we don't ever attempt to use memset_pattern8 or 4, because these 410/// just replicate their input array and then pass on to memset_pattern16. 411static Constant *getMemSetPatternValue(Value *V, const TargetData &TD) { 412 // If the value isn't a constant, we can't promote it to being in a constant 413 // array. We could theoretically do a store to an alloca or something, but 414 // that doesn't seem worthwhile. 415 Constant *C = dyn_cast<Constant>(V); 416 if (C == 0) return 0; 417 418 // Only handle simple values that are a power of two bytes in size. 419 uint64_t Size = TD.getTypeSizeInBits(V->getType()); 420 if (Size == 0 || (Size & 7) || (Size & (Size-1))) 421 return 0; 422 423 // Don't care enough about darwin/ppc to implement this. 424 if (TD.isBigEndian()) 425 return 0; 426 427 // Convert to size in bytes. 428 Size /= 8; 429 430 // TODO: If CI is larger than 16-bytes, we can try slicing it in half to see 431 // if the top and bottom are the same (e.g. for vectors and large integers). 432 if (Size > 16) return 0; 433 434 // If the constant is exactly 16 bytes, just use it. 435 if (Size == 16) return C; 436 437 // Otherwise, we'll use an array of the constants. 438 unsigned ArraySize = 16/Size; 439 ArrayType *AT = ArrayType::get(V->getType(), ArraySize); 440 return ConstantArray::get(AT, std::vector<Constant*>(ArraySize, C)); 441} 442 443 444/// processLoopStridedStore - We see a strided store of some value. If we can 445/// transform this into a memset or memset_pattern in the loop preheader, do so. 446bool LoopIdiomRecognize:: 447processLoopStridedStore(Value *DestPtr, unsigned StoreSize, 448 unsigned StoreAlignment, Value *StoredVal, 449 Instruction *TheStore, const SCEVAddRecExpr *Ev, 450 const SCEV *BECount) { 451 452 // If the stored value is a byte-wise value (like i32 -1), then it may be 453 // turned into a memset of i8 -1, assuming that all the consecutive bytes 454 // are stored. A store of i32 0x01020304 can never be turned into a memset, 455 // but it can be turned into memset_pattern if the target supports it. 456 Value *SplatValue = isBytewiseValue(StoredVal); 457 Constant *PatternValue = 0; 458 459 // If we're allowed to form a memset, and the stored value would be acceptable 460 // for memset, use it. 461 if (SplatValue && TLI->has(LibFunc::memset) && 462 // Verify that the stored value is loop invariant. If not, we can't 463 // promote the memset. 464 CurLoop->isLoopInvariant(SplatValue)) { 465 // Keep and use SplatValue. 466 PatternValue = 0; 467 } else if (TLI->has(LibFunc::memset_pattern16) && 468 (PatternValue = getMemSetPatternValue(StoredVal, *TD))) { 469 // It looks like we can use PatternValue! 470 SplatValue = 0; 471 } else { 472 // Otherwise, this isn't an idiom we can transform. For example, we can't 473 // do anything with a 3-byte store. 474 return false; 475 } 476 477 // The trip count of the loop and the base pointer of the addrec SCEV is 478 // guaranteed to be loop invariant, which means that it should dominate the 479 // header. This allows us to insert code for it in the preheader. 480 BasicBlock *Preheader = CurLoop->getLoopPreheader(); 481 IRBuilder<> Builder(Preheader->getTerminator()); 482 SCEVExpander Expander(*SE, "loop-idiom"); 483 484 // Okay, we have a strided store "p[i]" of a splattable value. We can turn 485 // this into a memset in the loop preheader now if we want. However, this 486 // would be unsafe to do if there is anything else in the loop that may read 487 // or write to the aliased location. Check for any overlap by generating the 488 // base pointer and checking the region. 489 unsigned AddrSpace = cast<PointerType>(DestPtr->getType())->getAddressSpace(); 490 Value *BasePtr = 491 Expander.expandCodeFor(Ev->getStart(), Builder.getInt8PtrTy(AddrSpace), 492 Preheader->getTerminator()); 493 494 495 if (mayLoopAccessLocation(BasePtr, AliasAnalysis::ModRef, 496 CurLoop, BECount, 497 StoreSize, getAnalysis<AliasAnalysis>(), TheStore)){ 498 Expander.clear(); 499 // If we generated new code for the base pointer, clean up. 500 deleteIfDeadInstruction(BasePtr, *SE, TLI); 501 return false; 502 } 503 504 // Okay, everything looks good, insert the memset. 505 506 // The # stored bytes is (BECount+1)*Size. Expand the trip count out to 507 // pointer size if it isn't already. 508 Type *IntPtr = TD->getIntPtrType(DestPtr->getContext()); 509 BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr); 510 511 const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1), 512 SCEV::FlagNUW); 513 if (StoreSize != 1) 514 NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize), 515 SCEV::FlagNUW); 516 517 Value *NumBytes = 518 Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator()); 519 520 CallInst *NewCall; 521 if (SplatValue) 522 NewCall = Builder.CreateMemSet(BasePtr, SplatValue,NumBytes,StoreAlignment); 523 else { 524 Module *M = TheStore->getParent()->getParent()->getParent(); 525 Value *MSP = M->getOrInsertFunction("memset_pattern16", 526 Builder.getVoidTy(), 527 Builder.getInt8PtrTy(), 528 Builder.getInt8PtrTy(), IntPtr, 529 (void*)0); 530 531 // Otherwise we should form a memset_pattern16. PatternValue is known to be 532 // an constant array of 16-bytes. Plop the value into a mergable global. 533 GlobalVariable *GV = new GlobalVariable(*M, PatternValue->getType(), true, 534 GlobalValue::InternalLinkage, 535 PatternValue, ".memset_pattern"); 536 GV->setUnnamedAddr(true); // Ok to merge these. 537 GV->setAlignment(16); 538 Value *PatternPtr = ConstantExpr::getBitCast(GV, Builder.getInt8PtrTy()); 539 NewCall = Builder.CreateCall3(MSP, BasePtr, PatternPtr, NumBytes); 540 } 541 542 DEBUG(dbgs() << " Formed memset: " << *NewCall << "\n" 543 << " from store to: " << *Ev << " at: " << *TheStore << "\n"); 544 NewCall->setDebugLoc(TheStore->getDebugLoc()); 545 546 // Okay, the memset has been formed. Zap the original store and anything that 547 // feeds into it. 548 deleteDeadInstruction(TheStore, *SE, TLI); 549 ++NumMemSet; 550 return true; 551} 552 553/// processLoopStoreOfLoopLoad - We see a strided store whose value is a 554/// same-strided load. 555bool LoopIdiomRecognize:: 556processLoopStoreOfLoopLoad(StoreInst *SI, unsigned StoreSize, 557 const SCEVAddRecExpr *StoreEv, 558 const SCEVAddRecExpr *LoadEv, 559 const SCEV *BECount) { 560 // If we're not allowed to form memcpy, we fail. 561 if (!TLI->has(LibFunc::memcpy)) 562 return false; 563 564 LoadInst *LI = cast<LoadInst>(SI->getValueOperand()); 565 566 // The trip count of the loop and the base pointer of the addrec SCEV is 567 // guaranteed to be loop invariant, which means that it should dominate the 568 // header. This allows us to insert code for it in the preheader. 569 BasicBlock *Preheader = CurLoop->getLoopPreheader(); 570 IRBuilder<> Builder(Preheader->getTerminator()); 571 SCEVExpander Expander(*SE, "loop-idiom"); 572 573 // Okay, we have a strided store "p[i]" of a loaded value. We can turn 574 // this into a memcpy in the loop preheader now if we want. However, this 575 // would be unsafe to do if there is anything else in the loop that may read 576 // or write the memory region we're storing to. This includes the load that 577 // feeds the stores. Check for an alias by generating the base address and 578 // checking everything. 579 Value *StoreBasePtr = 580 Expander.expandCodeFor(StoreEv->getStart(), 581 Builder.getInt8PtrTy(SI->getPointerAddressSpace()), 582 Preheader->getTerminator()); 583 584 if (mayLoopAccessLocation(StoreBasePtr, AliasAnalysis::ModRef, 585 CurLoop, BECount, StoreSize, 586 getAnalysis<AliasAnalysis>(), SI)) { 587 Expander.clear(); 588 // If we generated new code for the base pointer, clean up. 589 deleteIfDeadInstruction(StoreBasePtr, *SE, TLI); 590 return false; 591 } 592 593 // For a memcpy, we have to make sure that the input array is not being 594 // mutated by the loop. 595 Value *LoadBasePtr = 596 Expander.expandCodeFor(LoadEv->getStart(), 597 Builder.getInt8PtrTy(LI->getPointerAddressSpace()), 598 Preheader->getTerminator()); 599 600 if (mayLoopAccessLocation(LoadBasePtr, AliasAnalysis::Mod, CurLoop, BECount, 601 StoreSize, getAnalysis<AliasAnalysis>(), SI)) { 602 Expander.clear(); 603 // If we generated new code for the base pointer, clean up. 604 deleteIfDeadInstruction(LoadBasePtr, *SE, TLI); 605 deleteIfDeadInstruction(StoreBasePtr, *SE, TLI); 606 return false; 607 } 608 609 // Okay, everything is safe, we can transform this! 610 611 612 // The # stored bytes is (BECount+1)*Size. Expand the trip count out to 613 // pointer size if it isn't already. 614 Type *IntPtr = TD->getIntPtrType(SI->getContext()); 615 BECount = SE->getTruncateOrZeroExtend(BECount, IntPtr); 616 617 const SCEV *NumBytesS = SE->getAddExpr(BECount, SE->getConstant(IntPtr, 1), 618 SCEV::FlagNUW); 619 if (StoreSize != 1) 620 NumBytesS = SE->getMulExpr(NumBytesS, SE->getConstant(IntPtr, StoreSize), 621 SCEV::FlagNUW); 622 623 Value *NumBytes = 624 Expander.expandCodeFor(NumBytesS, IntPtr, Preheader->getTerminator()); 625 626 CallInst *NewCall = 627 Builder.CreateMemCpy(StoreBasePtr, LoadBasePtr, NumBytes, 628 std::min(SI->getAlignment(), LI->getAlignment())); 629 NewCall->setDebugLoc(SI->getDebugLoc()); 630 631 DEBUG(dbgs() << " Formed memcpy: " << *NewCall << "\n" 632 << " from load ptr=" << *LoadEv << " at: " << *LI << "\n" 633 << " from store ptr=" << *StoreEv << " at: " << *SI << "\n"); 634 635 636 // Okay, the memset has been formed. Zap the original store and anything that 637 // feeds into it. 638 deleteDeadInstruction(SI, *SE, TLI); 639 ++NumMemCpy; 640 return true; 641} 642