1193323Sed//===- MemCpyOptimizer.cpp - Optimize use of memcpy and friends -----------===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This pass performs various transformations related to eliminating memcpy 11193323Sed// calls, or transforming sets of stores into memset's. 12193323Sed// 13193323Sed//===----------------------------------------------------------------------===// 14193323Sed 15193323Sed#define DEBUG_TYPE "memcpyopt" 16193323Sed#include "llvm/Transforms/Scalar.h" 17193323Sed#include "llvm/ADT/SmallVector.h" 18193323Sed#include "llvm/ADT/Statistic.h" 19239462Sdim#include "llvm/Analysis/AliasAnalysis.h" 20193323Sed#include "llvm/Analysis/Dominators.h" 21193323Sed#include "llvm/Analysis/MemoryDependenceAnalysis.h" 22218893Sdim#include "llvm/Analysis/ValueTracking.h" 23249423Sdim#include "llvm/IR/DataLayout.h" 24249423Sdim#include "llvm/IR/GlobalVariable.h" 25249423Sdim#include "llvm/IR/IRBuilder.h" 26249423Sdim#include "llvm/IR/Instructions.h" 27249423Sdim#include "llvm/IR/IntrinsicInst.h" 28193323Sed#include "llvm/Support/Debug.h" 29193323Sed#include "llvm/Support/GetElementPtrTypeIterator.h" 30198090Srdivacky#include "llvm/Support/raw_ostream.h" 31221345Sdim#include "llvm/Target/TargetLibraryInfo.h" 32239462Sdim#include "llvm/Transforms/Utils/Local.h" 33193323Sed#include <list> 34193323Sedusing namespace llvm; 35193323Sed 36193323SedSTATISTIC(NumMemCpyInstr, "Number of memcpy instructions deleted"); 37193323SedSTATISTIC(NumMemSetInfer, "Number of memsets inferred"); 38198090SrdivackySTATISTIC(NumMoveToCpy, "Number of memmoves converted to memcpy"); 39218893SdimSTATISTIC(NumCpyToSet, "Number of memcpys converted to memset"); 40193323Sed 41243830Sdimstatic int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx, 42243830Sdim bool &VariableIdxFound, const DataLayout &TD){ 43193323Sed // Skip over the first indices. 44193323Sed gep_type_iterator GTI = gep_type_begin(GEP); 45193323Sed for (unsigned i = 1; i != Idx; ++i, ++GTI) 46193323Sed /*skip along*/; 47239462Sdim 48193323Sed // Compute the offset implied by the rest of the indices. 49193323Sed int64_t Offset = 0; 50193323Sed for (unsigned i = Idx, e = GEP->getNumOperands(); i != e; ++i, ++GTI) { 51193323Sed ConstantInt *OpC = dyn_cast<ConstantInt>(GEP->getOperand(i)); 52193323Sed if (OpC == 0) 53193323Sed return VariableIdxFound = true; 54193323Sed if (OpC->isZero()) continue; // No offset. 55193323Sed 56193323Sed // Handle struct indices, which add their field offset to the pointer. 57226633Sdim if (StructType *STy = dyn_cast<StructType>(*GTI)) { 58193323Sed Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); 59193323Sed continue; 60193323Sed } 61239462Sdim 62193323Sed // Otherwise, we have a sequential type like an array or vector. Multiply 63193323Sed // the index by the ElementSize. 64193323Sed uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); 65193323Sed Offset += Size*OpC->getSExtValue(); 66193323Sed } 67193323Sed 68193323Sed return Offset; 69193323Sed} 70193323Sed 71193323Sed/// IsPointerOffset - Return true if Ptr1 is provably equal to Ptr2 plus a 72193323Sed/// constant offset, and return that constant offset. For example, Ptr1 might 73193323Sed/// be &A[42], and Ptr2 might be &A[40]. In this case offset would be -8. 74193323Sedstatic bool IsPointerOffset(Value *Ptr1, Value *Ptr2, int64_t &Offset, 75243830Sdim const DataLayout &TD) { 76218893Sdim Ptr1 = Ptr1->stripPointerCasts(); 77218893Sdim Ptr2 = Ptr2->stripPointerCasts(); 78243830Sdim GEPOperator *GEP1 = dyn_cast<GEPOperator>(Ptr1); 79243830Sdim GEPOperator *GEP2 = dyn_cast<GEPOperator>(Ptr2); 80239462Sdim 81218893Sdim bool VariableIdxFound = false; 82218893Sdim 83218893Sdim // If one pointer is a GEP and the other isn't, then see if the GEP is a 84218893Sdim // constant offset from the base, as in "P" and "gep P, 1". 85218893Sdim if (GEP1 && GEP2 == 0 && GEP1->getOperand(0)->stripPointerCasts() == Ptr2) { 86218893Sdim Offset = -GetOffsetFromIndex(GEP1, 1, VariableIdxFound, TD); 87218893Sdim return !VariableIdxFound; 88218893Sdim } 89218893Sdim 90218893Sdim if (GEP2 && GEP1 == 0 && GEP2->getOperand(0)->stripPointerCasts() == Ptr1) { 91218893Sdim Offset = GetOffsetFromIndex(GEP2, 1, VariableIdxFound, TD); 92218893Sdim return !VariableIdxFound; 93218893Sdim } 94239462Sdim 95193323Sed // Right now we handle the case when Ptr1/Ptr2 are both GEPs with an identical 96193323Sed // base. After that base, they may have some number of common (and 97193323Sed // potentially variable) indices. After that they handle some constant 98193323Sed // offset, which determines their offset from each other. At this point, we 99193323Sed // handle no other case. 100193323Sed if (!GEP1 || !GEP2 || GEP1->getOperand(0) != GEP2->getOperand(0)) 101193323Sed return false; 102239462Sdim 103193323Sed // Skip any common indices and track the GEP types. 104193323Sed unsigned Idx = 1; 105193323Sed for (; Idx != GEP1->getNumOperands() && Idx != GEP2->getNumOperands(); ++Idx) 106193323Sed if (GEP1->getOperand(Idx) != GEP2->getOperand(Idx)) 107193323Sed break; 108193323Sed 109193323Sed int64_t Offset1 = GetOffsetFromIndex(GEP1, Idx, VariableIdxFound, TD); 110193323Sed int64_t Offset2 = GetOffsetFromIndex(GEP2, Idx, VariableIdxFound, TD); 111193323Sed if (VariableIdxFound) return false; 112239462Sdim 113193323Sed Offset = Offset2-Offset1; 114193323Sed return true; 115193323Sed} 116193323Sed 117193323Sed 118193323Sed/// MemsetRange - Represents a range of memset'd bytes with the ByteVal value. 119193323Sed/// This allows us to analyze stores like: 120193323Sed/// store 0 -> P+1 121193323Sed/// store 0 -> P+0 122193323Sed/// store 0 -> P+3 123193323Sed/// store 0 -> P+2 124193323Sed/// which sometimes happens with stores to arrays of structs etc. When we see 125193323Sed/// the first store, we make a range [1, 2). The second store extends the range 126193323Sed/// to [0, 2). The third makes a new range [2, 3). The fourth store joins the 127193323Sed/// two ranges into [0, 3) which is memset'able. 128193323Sednamespace { 129193323Sedstruct MemsetRange { 130193323Sed // Start/End - A semi range that describes the span that this range covers. 131239462Sdim // The range is closed at the start and open at the end: [Start, End). 132193323Sed int64_t Start, End; 133193323Sed 134193323Sed /// StartPtr - The getelementptr instruction that points to the start of the 135193323Sed /// range. 136193323Sed Value *StartPtr; 137239462Sdim 138193323Sed /// Alignment - The known alignment of the first store. 139193323Sed unsigned Alignment; 140239462Sdim 141193323Sed /// TheStores - The actual stores that make up this range. 142218893Sdim SmallVector<Instruction*, 16> TheStores; 143239462Sdim 144243830Sdim bool isProfitableToUseMemset(const DataLayout &TD) const; 145193323Sed 146193323Sed}; 147193323Sed} // end anon namespace 148193323Sed 149243830Sdimbool MemsetRange::isProfitableToUseMemset(const DataLayout &TD) const { 150234353Sdim // If we found more than 4 stores to merge or 16 bytes, use memset. 151234353Sdim if (TheStores.size() >= 4 || End-Start >= 16) return true; 152218893Sdim 153218893Sdim // If there is nothing to merge, don't do anything. 154218893Sdim if (TheStores.size() < 2) return false; 155239462Sdim 156218893Sdim // If any of the stores are a memset, then it is always good to extend the 157218893Sdim // memset. 158218893Sdim for (unsigned i = 0, e = TheStores.size(); i != e; ++i) 159218893Sdim if (!isa<StoreInst>(TheStores[i])) 160218893Sdim return true; 161239462Sdim 162193323Sed // Assume that the code generator is capable of merging pairs of stores 163193323Sed // together if it wants to. 164218893Sdim if (TheStores.size() == 2) return false; 165239462Sdim 166193323Sed // If we have fewer than 8 stores, it can still be worthwhile to do this. 167193323Sed // For example, merging 4 i8 stores into an i32 store is useful almost always. 168193323Sed // However, merging 2 32-bit stores isn't useful on a 32-bit architecture (the 169193323Sed // memset will be split into 2 32-bit stores anyway) and doing so can 170193323Sed // pessimize the llvm optimizer. 171193323Sed // 172193323Sed // Since we don't have perfect knowledge here, make some assumptions: assume 173263508Sdim // the maximum GPR width is the same size as the largest legal integer 174263508Sdim // size. If so, check to see whether we will end up actually reducing the 175263508Sdim // number of stores used. 176193323Sed unsigned Bytes = unsigned(End-Start); 177263508Sdim unsigned MaxIntSize = TD.getLargestLegalIntTypeSize(); 178263508Sdim if (MaxIntSize == 0) 179263508Sdim MaxIntSize = 1; 180263508Sdim unsigned NumPointerStores = Bytes / MaxIntSize; 181239462Sdim 182193323Sed // Assume the remaining bytes if any are done a byte at a time. 183263508Sdim unsigned NumByteStores = Bytes - NumPointerStores * MaxIntSize; 184239462Sdim 185193323Sed // If we will reduce the # stores (according to this heuristic), do the 186193323Sed // transformation. This encourages merging 4 x i8 -> i32 and 2 x i16 -> i32 187193323Sed // etc. 188193323Sed return TheStores.size() > NumPointerStores+NumByteStores; 189239462Sdim} 190193323Sed 191193323Sed 192193323Sednamespace { 193193323Sedclass MemsetRanges { 194193323Sed /// Ranges - A sorted list of the memset ranges. We use std::list here 195193323Sed /// because each element is relatively large and expensive to copy. 196193323Sed std::list<MemsetRange> Ranges; 197193323Sed typedef std::list<MemsetRange>::iterator range_iterator; 198243830Sdim const DataLayout &TD; 199193323Sedpublic: 200243830Sdim MemsetRanges(const DataLayout &td) : TD(td) {} 201239462Sdim 202193323Sed typedef std::list<MemsetRange>::const_iterator const_iterator; 203193323Sed const_iterator begin() const { return Ranges.begin(); } 204193323Sed const_iterator end() const { return Ranges.end(); } 205193323Sed bool empty() const { return Ranges.empty(); } 206239462Sdim 207218893Sdim void addInst(int64_t OffsetFromFirst, Instruction *Inst) { 208218893Sdim if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) 209218893Sdim addStore(OffsetFromFirst, SI); 210218893Sdim else 211218893Sdim addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst)); 212218893Sdim } 213218893Sdim 214218893Sdim void addStore(int64_t OffsetFromFirst, StoreInst *SI) { 215218893Sdim int64_t StoreSize = TD.getTypeStoreSize(SI->getOperand(0)->getType()); 216239462Sdim 217218893Sdim addRange(OffsetFromFirst, StoreSize, 218218893Sdim SI->getPointerOperand(), SI->getAlignment(), SI); 219218893Sdim } 220239462Sdim 221218893Sdim void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) { 222218893Sdim int64_t Size = cast<ConstantInt>(MSI->getLength())->getZExtValue(); 223218893Sdim addRange(OffsetFromFirst, Size, MSI->getDest(), MSI->getAlignment(), MSI); 224218893Sdim } 225239462Sdim 226218893Sdim void addRange(int64_t Start, int64_t Size, Value *Ptr, 227218893Sdim unsigned Alignment, Instruction *Inst); 228218893Sdim 229193323Sed}; 230239462Sdim 231193323Sed} // end anon namespace 232193323Sed 233193323Sed 234218893Sdim/// addRange - Add a new store to the MemsetRanges data structure. This adds a 235193323Sed/// new range for the specified store at the specified offset, merging into 236193323Sed/// existing ranges as appropriate. 237218893Sdim/// 238218893Sdim/// Do a linear search of the ranges to see if this can be joined and/or to 239218893Sdim/// find the insertion point in the list. We keep the ranges sorted for 240218893Sdim/// simplicity here. This is a linear search of a linked list, which is ugly, 241218893Sdim/// however the number of ranges is limited, so this won't get crazy slow. 242218893Sdimvoid MemsetRanges::addRange(int64_t Start, int64_t Size, Value *Ptr, 243218893Sdim unsigned Alignment, Instruction *Inst) { 244218893Sdim int64_t End = Start+Size; 245193323Sed range_iterator I = Ranges.begin(), E = Ranges.end(); 246239462Sdim 247193323Sed while (I != E && Start > I->End) 248193323Sed ++I; 249239462Sdim 250193323Sed // We now know that I == E, in which case we didn't find anything to merge 251193323Sed // with, or that Start <= I->End. If End < I->Start or I == E, then we need 252193323Sed // to insert a new range. Handle this now. 253193323Sed if (I == E || End < I->Start) { 254193323Sed MemsetRange &R = *Ranges.insert(I, MemsetRange()); 255193323Sed R.Start = Start; 256193323Sed R.End = End; 257218893Sdim R.StartPtr = Ptr; 258218893Sdim R.Alignment = Alignment; 259218893Sdim R.TheStores.push_back(Inst); 260193323Sed return; 261193323Sed } 262239462Sdim 263193323Sed // This store overlaps with I, add it. 264218893Sdim I->TheStores.push_back(Inst); 265239462Sdim 266193323Sed // At this point, we may have an interval that completely contains our store. 267193323Sed // If so, just add it to the interval and return. 268193323Sed if (I->Start <= Start && I->End >= End) 269193323Sed return; 270239462Sdim 271193323Sed // Now we know that Start <= I->End and End >= I->Start so the range overlaps 272193323Sed // but is not entirely contained within the range. 273239462Sdim 274193323Sed // See if the range extends the start of the range. In this case, it couldn't 275193323Sed // possibly cause it to join the prior range, because otherwise we would have 276193323Sed // stopped on *it*. 277193323Sed if (Start < I->Start) { 278193323Sed I->Start = Start; 279218893Sdim I->StartPtr = Ptr; 280218893Sdim I->Alignment = Alignment; 281193323Sed } 282239462Sdim 283193323Sed // Now we know that Start <= I->End and Start >= I->Start (so the startpoint 284193323Sed // is in or right at the end of I), and that End >= I->Start. Extend I out to 285193323Sed // End. 286193323Sed if (End > I->End) { 287193323Sed I->End = End; 288193323Sed range_iterator NextI = I; 289193323Sed while (++NextI != E && End >= NextI->Start) { 290193323Sed // Merge the range in. 291193323Sed I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end()); 292193323Sed if (NextI->End > I->End) 293193323Sed I->End = NextI->End; 294193323Sed Ranges.erase(NextI); 295193323Sed NextI = I; 296193323Sed } 297193323Sed } 298193323Sed} 299193323Sed 300193323Sed//===----------------------------------------------------------------------===// 301193323Sed// MemCpyOpt Pass 302193323Sed//===----------------------------------------------------------------------===// 303193323Sed 304193323Sednamespace { 305198090Srdivacky class MemCpyOpt : public FunctionPass { 306218893Sdim MemoryDependenceAnalysis *MD; 307221345Sdim TargetLibraryInfo *TLI; 308243830Sdim const DataLayout *TD; 309193323Sed public: 310193323Sed static char ID; // Pass identification, replacement for typeid 311218893Sdim MemCpyOpt() : FunctionPass(ID) { 312218893Sdim initializeMemCpyOptPass(*PassRegistry::getPassRegistry()); 313218893Sdim MD = 0; 314221345Sdim TLI = 0; 315221345Sdim TD = 0; 316218893Sdim } 317193323Sed 318218893Sdim bool runOnFunction(Function &F); 319218893Sdim 320193323Sed private: 321193323Sed // This transformation requires dominator postdominator info 322193323Sed virtual void getAnalysisUsage(AnalysisUsage &AU) const { 323193323Sed AU.setPreservesCFG(); 324193323Sed AU.addRequired<DominatorTree>(); 325193323Sed AU.addRequired<MemoryDependenceAnalysis>(); 326193323Sed AU.addRequired<AliasAnalysis>(); 327221345Sdim AU.addRequired<TargetLibraryInfo>(); 328193323Sed AU.addPreserved<AliasAnalysis>(); 329193323Sed AU.addPreserved<MemoryDependenceAnalysis>(); 330193323Sed } 331239462Sdim 332193323Sed // Helper fuctions 333198090Srdivacky bool processStore(StoreInst *SI, BasicBlock::iterator &BBI); 334218893Sdim bool processMemSet(MemSetInst *SI, BasicBlock::iterator &BBI); 335198090Srdivacky bool processMemCpy(MemCpyInst *M); 336198090Srdivacky bool processMemMove(MemMoveInst *M); 337218893Sdim bool performCallSlotOptzn(Instruction *cpy, Value *cpyDst, Value *cpySrc, 338243830Sdim uint64_t cpyLen, unsigned cpyAlign, CallInst *C); 339218893Sdim bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep, 340218893Sdim uint64_t MSize); 341218893Sdim bool processByValArgument(CallSite CS, unsigned ArgNo); 342218893Sdim Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr, 343218893Sdim Value *ByteVal); 344218893Sdim 345193323Sed bool iterateOnFunction(Function &F); 346193323Sed }; 347239462Sdim 348193323Sed char MemCpyOpt::ID = 0; 349193323Sed} 350193323Sed 351193323Sed// createMemCpyOptPass - The public interface to this file... 352193323SedFunctionPass *llvm::createMemCpyOptPass() { return new MemCpyOpt(); } 353193323Sed 354218893SdimINITIALIZE_PASS_BEGIN(MemCpyOpt, "memcpyopt", "MemCpy Optimization", 355218893Sdim false, false) 356218893SdimINITIALIZE_PASS_DEPENDENCY(DominatorTree) 357218893SdimINITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) 358221345SdimINITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) 359218893SdimINITIALIZE_AG_DEPENDENCY(AliasAnalysis) 360218893SdimINITIALIZE_PASS_END(MemCpyOpt, "memcpyopt", "MemCpy Optimization", 361218893Sdim false, false) 362193323Sed 363218893Sdim/// tryMergingIntoMemset - When scanning forward over instructions, we look for 364193323Sed/// some other patterns to fold away. In particular, this looks for stores to 365218893Sdim/// neighboring locations of memory. If it sees enough consecutive ones, it 366218893Sdim/// attempts to merge them together into a memcpy/memset. 367239462SdimInstruction *MemCpyOpt::tryMergingIntoMemset(Instruction *StartInst, 368218893Sdim Value *StartPtr, Value *ByteVal) { 369218893Sdim if (TD == 0) return 0; 370239462Sdim 371193323Sed // Okay, so we now have a single store that can be splatable. Scan to find 372193323Sed // all subsequent stores of the same value to offset from the same pointer. 373193323Sed // Join these together into ranges, so we can decide whether contiguous blocks 374193323Sed // are stored. 375198090Srdivacky MemsetRanges Ranges(*TD); 376239462Sdim 377218893Sdim BasicBlock::iterator BI = StartInst; 378193323Sed for (++BI; !isa<TerminatorInst>(BI); ++BI) { 379218893Sdim if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) { 380218893Sdim // If the instruction is readnone, ignore it, otherwise bail out. We 381218893Sdim // don't even allow readonly here because we don't want something like: 382193323Sed // A[1] = 2; strlen(A); A[2] = 2; -> memcpy(A, ...); strlen(A). 383218893Sdim if (BI->mayWriteToMemory() || BI->mayReadFromMemory()) 384218893Sdim break; 385218893Sdim continue; 386218893Sdim } 387239462Sdim 388218893Sdim if (StoreInst *NextStore = dyn_cast<StoreInst>(BI)) { 389218893Sdim // If this is a store, see if we can merge it in. 390226633Sdim if (!NextStore->isSimple()) break; 391239462Sdim 392218893Sdim // Check to see if this stored value is of the same byte-splattable value. 393218893Sdim if (ByteVal != isBytewiseValue(NextStore->getOperand(0))) 394218893Sdim break; 395239462Sdim 396218893Sdim // Check to see if this store is to a constant offset from the start ptr. 397218893Sdim int64_t Offset; 398218893Sdim if (!IsPointerOffset(StartPtr, NextStore->getPointerOperand(), 399218893Sdim Offset, *TD)) 400218893Sdim break; 401239462Sdim 402218893Sdim Ranges.addStore(Offset, NextStore); 403218893Sdim } else { 404218893Sdim MemSetInst *MSI = cast<MemSetInst>(BI); 405239462Sdim 406218893Sdim if (MSI->isVolatile() || ByteVal != MSI->getValue() || 407218893Sdim !isa<ConstantInt>(MSI->getLength())) 408218893Sdim break; 409239462Sdim 410218893Sdim // Check to see if this store is to a constant offset from the start ptr. 411218893Sdim int64_t Offset; 412218893Sdim if (!IsPointerOffset(StartPtr, MSI->getDest(), Offset, *TD)) 413218893Sdim break; 414239462Sdim 415218893Sdim Ranges.addMemSet(Offset, MSI); 416218893Sdim } 417193323Sed } 418239462Sdim 419193323Sed // If we have no ranges, then we just had a single store with nothing that 420193323Sed // could be merged in. This is a very common case of course. 421193323Sed if (Ranges.empty()) 422218893Sdim return 0; 423239462Sdim 424193323Sed // If we had at least one store that could be merged in, add the starting 425193323Sed // store as well. We try to avoid this unless there is at least something 426193323Sed // interesting as a small compile-time optimization. 427218893Sdim Ranges.addInst(0, StartInst); 428218893Sdim 429218893Sdim // If we create any memsets, we put it right before the first instruction that 430218893Sdim // isn't part of the memset block. This ensure that the memset is dominated 431218893Sdim // by any addressing instruction needed by the start of the block. 432218893Sdim IRBuilder<> Builder(BI); 433218893Sdim 434193323Sed // Now that we have full information about ranges, loop over the ranges and 435193323Sed // emit memset's for anything big enough to be worthwhile. 436218893Sdim Instruction *AMemSet = 0; 437193323Sed for (MemsetRanges::const_iterator I = Ranges.begin(), E = Ranges.end(); 438193323Sed I != E; ++I) { 439193323Sed const MemsetRange &Range = *I; 440239462Sdim 441193323Sed if (Range.TheStores.size() == 1) continue; 442239462Sdim 443193323Sed // If it is profitable to lower this range to memset, do so now. 444198090Srdivacky if (!Range.isProfitableToUseMemset(*TD)) 445193323Sed continue; 446239462Sdim 447218893Sdim // Otherwise, we do want to transform this! Create a new memset. 448193323Sed // Get the starting pointer of the block. 449193323Sed StartPtr = Range.StartPtr; 450239462Sdim 451206274Srdivacky // Determine alignment 452206274Srdivacky unsigned Alignment = Range.Alignment; 453206274Srdivacky if (Alignment == 0) { 454239462Sdim Type *EltType = 455218893Sdim cast<PointerType>(StartPtr->getType())->getElementType(); 456206274Srdivacky Alignment = TD->getABITypeAlignment(EltType); 457206274Srdivacky } 458239462Sdim 459239462Sdim AMemSet = 460218893Sdim Builder.CreateMemSet(StartPtr, ByteVal, Range.End-Range.Start, Alignment); 461239462Sdim 462202375Srdivacky DEBUG(dbgs() << "Replace stores:\n"; 463193323Sed for (unsigned i = 0, e = Range.TheStores.size(); i != e; ++i) 464218893Sdim dbgs() << *Range.TheStores[i] << '\n'; 465218893Sdim dbgs() << "With: " << *AMemSet << '\n'); 466223017Sdim 467223017Sdim if (!Range.TheStores.empty()) 468223017Sdim AMemSet->setDebugLoc(Range.TheStores[0]->getDebugLoc()); 469223017Sdim 470193323Sed // Zap all the stores. 471263508Sdim for (SmallVectorImpl<Instruction *>::const_iterator 472198090Srdivacky SI = Range.TheStores.begin(), 473218893Sdim SE = Range.TheStores.end(); SI != SE; ++SI) { 474218893Sdim MD->removeInstruction(*SI); 475193323Sed (*SI)->eraseFromParent(); 476218893Sdim } 477193323Sed ++NumMemSetInfer; 478193323Sed } 479239462Sdim 480218893Sdim return AMemSet; 481193323Sed} 482193323Sed 483193323Sed 484218893Sdimbool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) { 485226633Sdim if (!SI->isSimple()) return false; 486239462Sdim 487218893Sdim if (TD == 0) return false; 488218893Sdim 489218893Sdim // Detect cases where we're performing call slot forwarding, but 490218893Sdim // happen to be using a load-store pair to implement it, rather than 491218893Sdim // a memcpy. 492218893Sdim if (LoadInst *LI = dyn_cast<LoadInst>(SI->getOperand(0))) { 493226633Sdim if (LI->isSimple() && LI->hasOneUse() && 494224145Sdim LI->getParent() == SI->getParent()) { 495223017Sdim MemDepResult ldep = MD->getDependency(LI); 496218893Sdim CallInst *C = 0; 497223017Sdim if (ldep.isClobber() && !isa<MemCpyInst>(ldep.getInst())) 498223017Sdim C = dyn_cast<CallInst>(ldep.getInst()); 499223017Sdim 500218893Sdim if (C) { 501223017Sdim // Check that nothing touches the dest of the "copy" between 502223017Sdim // the call and the store. 503224145Sdim AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); 504224145Sdim AliasAnalysis::Location StoreLoc = AA.getLocation(SI); 505224145Sdim for (BasicBlock::iterator I = --BasicBlock::iterator(SI), 506224145Sdim E = C; I != E; --I) { 507224145Sdim if (AA.getModRefInfo(&*I, StoreLoc) != AliasAnalysis::NoModRef) { 508224145Sdim C = 0; 509224145Sdim break; 510223017Sdim } 511223017Sdim } 512223017Sdim } 513223017Sdim 514223017Sdim if (C) { 515243830Sdim unsigned storeAlign = SI->getAlignment(); 516243830Sdim if (!storeAlign) 517243830Sdim storeAlign = TD->getABITypeAlignment(SI->getOperand(0)->getType()); 518243830Sdim unsigned loadAlign = LI->getAlignment(); 519243830Sdim if (!loadAlign) 520243830Sdim loadAlign = TD->getABITypeAlignment(LI->getType()); 521243830Sdim 522218893Sdim bool changed = performCallSlotOptzn(LI, 523239462Sdim SI->getPointerOperand()->stripPointerCasts(), 524218893Sdim LI->getPointerOperand()->stripPointerCasts(), 525243830Sdim TD->getTypeStoreSize(SI->getOperand(0)->getType()), 526243830Sdim std::min(storeAlign, loadAlign), C); 527218893Sdim if (changed) { 528218893Sdim MD->removeInstruction(SI); 529218893Sdim SI->eraseFromParent(); 530218893Sdim MD->removeInstruction(LI); 531218893Sdim LI->eraseFromParent(); 532218893Sdim ++NumMemCpyInstr; 533218893Sdim return true; 534218893Sdim } 535218893Sdim } 536218893Sdim } 537218893Sdim } 538239462Sdim 539218893Sdim // There are two cases that are interesting for this code to handle: memcpy 540218893Sdim // and memset. Right now we only handle memset. 541239462Sdim 542218893Sdim // Ensure that the value being stored is something that can be memset'able a 543218893Sdim // byte at a time like "0" or "-1" or any width, as well as things like 544218893Sdim // 0xA0A0A0A0 and 0.0. 545218893Sdim if (Value *ByteVal = isBytewiseValue(SI->getOperand(0))) 546218893Sdim if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(), 547218893Sdim ByteVal)) { 548218893Sdim BBI = I; // Don't invalidate iterator. 549218893Sdim return true; 550218893Sdim } 551239462Sdim 552218893Sdim return false; 553218893Sdim} 554218893Sdim 555218893Sdimbool MemCpyOpt::processMemSet(MemSetInst *MSI, BasicBlock::iterator &BBI) { 556218893Sdim // See if there is another memset or store neighboring this memset which 557218893Sdim // allows us to widen out the memset to do a single larger store. 558218893Sdim if (isa<ConstantInt>(MSI->getLength()) && !MSI->isVolatile()) 559218893Sdim if (Instruction *I = tryMergingIntoMemset(MSI, MSI->getDest(), 560218893Sdim MSI->getValue())) { 561218893Sdim BBI = I; // Don't invalidate iterator. 562218893Sdim return true; 563218893Sdim } 564218893Sdim return false; 565218893Sdim} 566218893Sdim 567218893Sdim 568193323Sed/// performCallSlotOptzn - takes a memcpy and a call that it depends on, 569193323Sed/// and checks for the possibility of a call slot optimization by having 570193323Sed/// the call write its result directly into the destination of the memcpy. 571218893Sdimbool MemCpyOpt::performCallSlotOptzn(Instruction *cpy, 572218893Sdim Value *cpyDest, Value *cpySrc, 573243830Sdim uint64_t cpyLen, unsigned cpyAlign, 574243830Sdim CallInst *C) { 575193323Sed // The general transformation to keep in mind is 576193323Sed // 577193323Sed // call @func(..., src, ...) 578193323Sed // memcpy(dest, src, ...) 579193323Sed // 580193323Sed // -> 581193323Sed // 582193323Sed // memcpy(dest, src, ...) 583193323Sed // call @func(..., dest, ...) 584193323Sed // 585193323Sed // Since moving the memcpy is technically awkward, we additionally check that 586193323Sed // src only holds uninitialized values at the moment of the call, meaning that 587193323Sed // the memcpy can be discarded rather than moved. 588193323Sed 589193323Sed // Deliberately get the source and destination with bitcasts stripped away, 590193323Sed // because we'll need to do type comparisons based on the underlying type. 591212904Sdim CallSite CS(C); 592193323Sed 593193323Sed // Require that src be an alloca. This simplifies the reasoning considerably. 594198090Srdivacky AllocaInst *srcAlloca = dyn_cast<AllocaInst>(cpySrc); 595193323Sed if (!srcAlloca) 596193323Sed return false; 597193323Sed 598193323Sed // Check that all of src is copied to dest. 599218893Sdim if (TD == 0) return false; 600193323Sed 601198090Srdivacky ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize()); 602193323Sed if (!srcArraySize) 603193323Sed return false; 604193323Sed 605198090Srdivacky uint64_t srcSize = TD->getTypeAllocSize(srcAlloca->getAllocatedType()) * 606193323Sed srcArraySize->getZExtValue(); 607193323Sed 608218893Sdim if (cpyLen < srcSize) 609193323Sed return false; 610193323Sed 611193323Sed // Check that accessing the first srcSize bytes of dest will not cause a 612193323Sed // trap. Otherwise the transform is invalid since it might cause a trap 613193323Sed // to occur earlier than it otherwise would. 614198090Srdivacky if (AllocaInst *A = dyn_cast<AllocaInst>(cpyDest)) { 615193323Sed // The destination is an alloca. Check it is larger than srcSize. 616198090Srdivacky ConstantInt *destArraySize = dyn_cast<ConstantInt>(A->getArraySize()); 617193323Sed if (!destArraySize) 618193323Sed return false; 619193323Sed 620198090Srdivacky uint64_t destSize = TD->getTypeAllocSize(A->getAllocatedType()) * 621193323Sed destArraySize->getZExtValue(); 622193323Sed 623193323Sed if (destSize < srcSize) 624193323Sed return false; 625198090Srdivacky } else if (Argument *A = dyn_cast<Argument>(cpyDest)) { 626193323Sed // If the destination is an sret parameter then only accesses that are 627193323Sed // outside of the returned struct type can trap. 628193323Sed if (!A->hasStructRetAttr()) 629193323Sed return false; 630193323Sed 631226633Sdim Type *StructTy = cast<PointerType>(A->getType())->getElementType(); 632263508Sdim if (!StructTy->isSized()) { 633263508Sdim // The call may never return and hence the copy-instruction may never 634263508Sdim // be executed, and therefore it's not safe to say "the destination 635263508Sdim // has at least <cpyLen> bytes, as implied by the copy-instruction", 636263508Sdim return false; 637263508Sdim } 638263508Sdim 639198090Srdivacky uint64_t destSize = TD->getTypeAllocSize(StructTy); 640193323Sed if (destSize < srcSize) 641193323Sed return false; 642193323Sed } else { 643193323Sed return false; 644193323Sed } 645193323Sed 646243830Sdim // Check that dest points to memory that is at least as aligned as src. 647243830Sdim unsigned srcAlign = srcAlloca->getAlignment(); 648243830Sdim if (!srcAlign) 649243830Sdim srcAlign = TD->getABITypeAlignment(srcAlloca->getAllocatedType()); 650243830Sdim bool isDestSufficientlyAligned = srcAlign <= cpyAlign; 651243830Sdim // If dest is not aligned enough and we can't increase its alignment then 652243830Sdim // bail out. 653243830Sdim if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest)) 654243830Sdim return false; 655243830Sdim 656193323Sed // Check that src is not accessed except via the call and the memcpy. This 657193323Sed // guarantees that it holds only undefined values when passed in (so the final 658193323Sed // memcpy can be dropped), that it is not read or written between the call and 659193323Sed // the memcpy, and that writing beyond the end of it is undefined. 660193323Sed SmallVector<User*, 8> srcUseList(srcAlloca->use_begin(), 661193323Sed srcAlloca->use_end()); 662193323Sed while (!srcUseList.empty()) { 663202375Srdivacky User *UI = srcUseList.pop_back_val(); 664193323Sed 665193323Sed if (isa<BitCastInst>(UI)) { 666193323Sed for (User::use_iterator I = UI->use_begin(), E = UI->use_end(); 667193323Sed I != E; ++I) 668193323Sed srcUseList.push_back(*I); 669198090Srdivacky } else if (GetElementPtrInst *G = dyn_cast<GetElementPtrInst>(UI)) { 670193323Sed if (G->hasAllZeroIndices()) 671193323Sed for (User::use_iterator I = UI->use_begin(), E = UI->use_end(); 672193323Sed I != E; ++I) 673193323Sed srcUseList.push_back(*I); 674193323Sed else 675193323Sed return false; 676193323Sed } else if (UI != C && UI != cpy) { 677193323Sed return false; 678193323Sed } 679193323Sed } 680193323Sed 681193323Sed // Since we're changing the parameter to the callsite, we need to make sure 682193323Sed // that what would be the new parameter dominates the callsite. 683198090Srdivacky DominatorTree &DT = getAnalysis<DominatorTree>(); 684198090Srdivacky if (Instruction *cpyDestInst = dyn_cast<Instruction>(cpyDest)) 685193323Sed if (!DT.dominates(cpyDestInst, C)) 686193323Sed return false; 687193323Sed 688193323Sed // In addition to knowing that the call does not access src in some 689193323Sed // unexpected manner, for example via a global, which we deduce from 690193323Sed // the use analysis, we also need to know that it does not sneakily 691193323Sed // access dest. We rely on AA to figure this out for us. 692198090Srdivacky AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); 693239462Sdim AliasAnalysis::ModRefResult MR = AA.getModRefInfo(C, cpyDest, srcSize); 694239462Sdim // If necessary, perform additional analysis. 695239462Sdim if (MR != AliasAnalysis::NoModRef) 696239462Sdim MR = AA.callCapturesBefore(C, cpyDest, srcSize, &DT); 697239462Sdim if (MR != AliasAnalysis::NoModRef) 698193323Sed return false; 699193323Sed 700193323Sed // All the checks have passed, so do the transformation. 701193323Sed bool changedArgument = false; 702193323Sed for (unsigned i = 0; i < CS.arg_size(); ++i) 703193323Sed if (CS.getArgument(i)->stripPointerCasts() == cpySrc) { 704243830Sdim Value *Dest = cpySrc->getType() == cpyDest->getType() ? cpyDest 705243830Sdim : CastInst::CreatePointerCast(cpyDest, cpySrc->getType(), 706243830Sdim cpyDest->getName(), C); 707193323Sed changedArgument = true; 708243830Sdim if (CS.getArgument(i)->getType() == Dest->getType()) 709243830Sdim CS.setArgument(i, Dest); 710198090Srdivacky else 711243830Sdim CS.setArgument(i, CastInst::CreatePointerCast(Dest, 712243830Sdim CS.getArgument(i)->getType(), Dest->getName(), C)); 713193323Sed } 714193323Sed 715193323Sed if (!changedArgument) 716193323Sed return false; 717193323Sed 718243830Sdim // If the destination wasn't sufficiently aligned then increase its alignment. 719243830Sdim if (!isDestSufficientlyAligned) { 720243830Sdim assert(isa<AllocaInst>(cpyDest) && "Can only increase alloca alignment!"); 721243830Sdim cast<AllocaInst>(cpyDest)->setAlignment(srcAlign); 722243830Sdim } 723243830Sdim 724193323Sed // Drop any cached information about the call, because we may have changed 725193323Sed // its dependence information by changing its parameter. 726218893Sdim MD->removeInstruction(C); 727193323Sed 728218893Sdim // Remove the memcpy. 729218893Sdim MD->removeInstruction(cpy); 730210299Sed ++NumMemCpyInstr; 731193323Sed 732193323Sed return true; 733193323Sed} 734193323Sed 735218893Sdim/// processMemCpyMemCpyDependence - We've found that the (upward scanning) 736218893Sdim/// memory dependence of memcpy 'M' is the memcpy 'MDep'. Try to simplify M to 737218893Sdim/// copy from MDep's input if we can. MSize is the size of M's copy. 738239462Sdim/// 739218893Sdimbool MemCpyOpt::processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep, 740218893Sdim uint64_t MSize) { 741218893Sdim // We can only transforms memcpy's where the dest of one is the source of the 742218893Sdim // other. 743218893Sdim if (M->getSource() != MDep->getDest() || MDep->isVolatile()) 744193323Sed return false; 745239462Sdim 746218893Sdim // If dep instruction is reading from our current input, then it is a noop 747218893Sdim // transfer and substituting the input won't change this instruction. Just 748218893Sdim // ignore the input and let someone else zap MDep. This handles cases like: 749218893Sdim // memcpy(a <- a) 750218893Sdim // memcpy(b <- a) 751218893Sdim if (M->getSource() == MDep->getSource()) 752193323Sed return false; 753239462Sdim 754221345Sdim // Second, the length of the memcpy's must be the same, or the preceding one 755193323Sed // must be larger than the following one. 756218893Sdim ConstantInt *MDepLen = dyn_cast<ConstantInt>(MDep->getLength()); 757218893Sdim ConstantInt *MLen = dyn_cast<ConstantInt>(M->getLength()); 758218893Sdim if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue()) 759193323Sed return false; 760239462Sdim 761198090Srdivacky AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); 762218893Sdim 763218893Sdim // Verify that the copied-from memory doesn't change in between the two 764218893Sdim // transfers. For example, in: 765218893Sdim // memcpy(a <- b) 766218893Sdim // *b = 42; 767218893Sdim // memcpy(c <- a) 768218893Sdim // It would be invalid to transform the second memcpy into memcpy(c <- b). 769218893Sdim // 770218893Sdim // TODO: If the code between M and MDep is transparent to the destination "c", 771218893Sdim // then we could still perform the xform by moving M up to the first memcpy. 772218893Sdim // 773218893Sdim // NOTE: This is conservative, it will stop on any read from the source loc, 774218893Sdim // not just the defining memcpy. 775218893Sdim MemDepResult SourceDep = 776218893Sdim MD->getPointerDependencyFrom(AA.getLocationForSource(MDep), 777218893Sdim false, M, M->getParent()); 778218893Sdim if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) 779193323Sed return false; 780239462Sdim 781218893Sdim // If the dest of the second might alias the source of the first, then the 782218893Sdim // source and dest might overlap. We still want to eliminate the intermediate 783218893Sdim // value, but we have to generate a memmove instead of memcpy. 784218893Sdim bool UseMemMove = false; 785218893Sdim if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(MDep))) 786218893Sdim UseMemMove = true; 787239462Sdim 788218893Sdim // If all checks passed, then we can transform M. 789239462Sdim 790218893Sdim // Make sure to use the lesser of the alignment of the source and the dest 791218893Sdim // since we're changing where we're reading from, but don't want to increase 792218893Sdim // the alignment past what can be read from or written to. 793218893Sdim // TODO: Is this worth it if we're creating a less aligned memcpy? For 794218893Sdim // example we could be moving from movaps -> movq on x86. 795218893Sdim unsigned Align = std::min(MDep->getAlignment(), M->getAlignment()); 796239462Sdim 797218893Sdim IRBuilder<> Builder(M); 798218893Sdim if (UseMemMove) 799218893Sdim Builder.CreateMemMove(M->getRawDest(), MDep->getRawSource(), M->getLength(), 800218893Sdim Align, M->isVolatile()); 801218893Sdim else 802218893Sdim Builder.CreateMemCpy(M->getRawDest(), MDep->getRawSource(), M->getLength(), 803218893Sdim Align, M->isVolatile()); 804218893Sdim 805218893Sdim // Remove the instruction we're replacing. 806218893Sdim MD->removeInstruction(M); 807218893Sdim M->eraseFromParent(); 808218893Sdim ++NumMemCpyInstr; 809218893Sdim return true; 810218893Sdim} 811218893Sdim 812218893Sdim 813218893Sdim/// processMemCpy - perform simplification of memcpy's. If we have memcpy A 814218893Sdim/// which copies X to Y, and memcpy B which copies Y to Z, then we can rewrite 815218893Sdim/// B to be a memcpy from X to Z (or potentially a memmove, depending on 816218893Sdim/// circumstances). This allows later passes to remove the first memcpy 817218893Sdim/// altogether. 818218893Sdimbool MemCpyOpt::processMemCpy(MemCpyInst *M) { 819218893Sdim // We can only optimize statically-sized memcpy's that are non-volatile. 820218893Sdim ConstantInt *CopySize = dyn_cast<ConstantInt>(M->getLength()); 821218893Sdim if (CopySize == 0 || M->isVolatile()) return false; 822218893Sdim 823218893Sdim // If the source and destination of the memcpy are the same, then zap it. 824218893Sdim if (M->getSource() == M->getDest()) { 825218893Sdim MD->removeInstruction(M); 826193323Sed M->eraseFromParent(); 827218893Sdim return false; 828193323Sed } 829218893Sdim 830218893Sdim // If copying from a constant, try to turn the memcpy into a memset. 831218893Sdim if (GlobalVariable *GV = dyn_cast<GlobalVariable>(M->getSource())) 832218893Sdim if (GV->isConstant() && GV->hasDefinitiveInitializer()) 833218893Sdim if (Value *ByteVal = isBytewiseValue(GV->getInitializer())) { 834218893Sdim IRBuilder<> Builder(M); 835218893Sdim Builder.CreateMemSet(M->getRawDest(), ByteVal, CopySize, 836218893Sdim M->getAlignment(), false); 837218893Sdim MD->removeInstruction(M); 838218893Sdim M->eraseFromParent(); 839218893Sdim ++NumCpyToSet; 840218893Sdim return true; 841218893Sdim } 842218893Sdim 843218893Sdim // The are two possible optimizations we can do for memcpy: 844218893Sdim // a) memcpy-memcpy xform which exposes redundance for DSE. 845218893Sdim // b) call-memcpy xform for return slot optimization. 846218893Sdim MemDepResult DepInfo = MD->getDependency(M); 847234353Sdim if (DepInfo.isClobber()) { 848234353Sdim if (CallInst *C = dyn_cast<CallInst>(DepInfo.getInst())) { 849234353Sdim if (performCallSlotOptzn(M, M->getDest(), M->getSource(), 850243830Sdim CopySize->getZExtValue(), M->getAlignment(), 851243830Sdim C)) { 852234353Sdim MD->removeInstruction(M); 853234353Sdim M->eraseFromParent(); 854234353Sdim return true; 855234353Sdim } 856218893Sdim } 857218893Sdim } 858234353Sdim 859234353Sdim AliasAnalysis::Location SrcLoc = AliasAnalysis::getLocationForSource(M); 860234353Sdim MemDepResult SrcDepInfo = MD->getPointerDependencyFrom(SrcLoc, true, 861234353Sdim M, M->getParent()); 862234353Sdim if (SrcDepInfo.isClobber()) { 863234353Sdim if (MemCpyInst *MDep = dyn_cast<MemCpyInst>(SrcDepInfo.getInst())) 864234353Sdim return processMemCpyMemCpyDependence(M, MDep, CopySize->getZExtValue()); 865234353Sdim } 866234353Sdim 867193323Sed return false; 868193323Sed} 869193323Sed 870198090Srdivacky/// processMemMove - Transforms memmove calls to memcpy calls when the src/dst 871198090Srdivacky/// are guaranteed not to alias. 872198090Srdivackybool MemCpyOpt::processMemMove(MemMoveInst *M) { 873198090Srdivacky AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); 874198090Srdivacky 875221345Sdim if (!TLI->has(LibFunc::memmove)) 876221345Sdim return false; 877239462Sdim 878198090Srdivacky // See if the pointers alias. 879218893Sdim if (!AA.isNoAlias(AA.getLocationForDest(M), AA.getLocationForSource(M))) 880198090Srdivacky return false; 881239462Sdim 882202375Srdivacky DEBUG(dbgs() << "MemCpyOpt: Optimizing memmove -> memcpy: " << *M << "\n"); 883239462Sdim 884198090Srdivacky // If not, then we know we can transform this. 885198090Srdivacky Module *Mod = M->getParent()->getParent()->getParent(); 886224145Sdim Type *ArgTys[3] = { M->getRawDest()->getType(), 887224145Sdim M->getRawSource()->getType(), 888224145Sdim M->getLength()->getType() }; 889212904Sdim M->setCalledFunction(Intrinsic::getDeclaration(Mod, Intrinsic::memcpy, 890224145Sdim ArgTys)); 891198090Srdivacky 892198090Srdivacky // MemDep may have over conservative information about this instruction, just 893198090Srdivacky // conservatively flush it from the cache. 894218893Sdim MD->removeInstruction(M); 895198090Srdivacky 896198090Srdivacky ++NumMoveToCpy; 897198090Srdivacky return true; 898193323Sed} 899239462Sdim 900218893Sdim/// processByValArgument - This is called on every byval argument in call sites. 901218893Sdimbool MemCpyOpt::processByValArgument(CallSite CS, unsigned ArgNo) { 902218893Sdim if (TD == 0) return false; 903193323Sed 904218893Sdim // Find out what feeds this byval argument. 905218893Sdim Value *ByValArg = CS.getArgument(ArgNo); 906226633Sdim Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType(); 907218893Sdim uint64_t ByValSize = TD->getTypeAllocSize(ByValTy); 908218893Sdim MemDepResult DepInfo = 909218893Sdim MD->getPointerDependencyFrom(AliasAnalysis::Location(ByValArg, ByValSize), 910218893Sdim true, CS.getInstruction(), 911218893Sdim CS.getInstruction()->getParent()); 912218893Sdim if (!DepInfo.isClobber()) 913218893Sdim return false; 914218893Sdim 915218893Sdim // If the byval argument isn't fed by a memcpy, ignore it. If it is fed by 916218893Sdim // a memcpy, see if we can byval from the source of the memcpy instead of the 917218893Sdim // result. 918218893Sdim MemCpyInst *MDep = dyn_cast<MemCpyInst>(DepInfo.getInst()); 919218893Sdim if (MDep == 0 || MDep->isVolatile() || 920218893Sdim ByValArg->stripPointerCasts() != MDep->getDest()) 921218893Sdim return false; 922239462Sdim 923218893Sdim // The length of the memcpy must be larger or equal to the size of the byval. 924218893Sdim ConstantInt *C1 = dyn_cast<ConstantInt>(MDep->getLength()); 925218893Sdim if (C1 == 0 || C1->getValue().getZExtValue() < ByValSize) 926218893Sdim return false; 927218893Sdim 928223017Sdim // Get the alignment of the byval. If the call doesn't specify the alignment, 929223017Sdim // then it is some target specific value that we can't know. 930218893Sdim unsigned ByValAlign = CS.getParamAlignment(ArgNo+1); 931223017Sdim if (ByValAlign == 0) return false; 932239462Sdim 933223017Sdim // If it is greater than the memcpy, then we check to see if we can force the 934223017Sdim // source of the memcpy to the alignment we need. If we fail, we bail out. 935223017Sdim if (MDep->getAlignment() < ByValAlign && 936223017Sdim getOrEnforceKnownAlignment(MDep->getSource(),ByValAlign, TD) < ByValAlign) 937223017Sdim return false; 938239462Sdim 939218893Sdim // Verify that the copied-from memory doesn't change in between the memcpy and 940218893Sdim // the byval call. 941218893Sdim // memcpy(a <- b) 942218893Sdim // *b = 42; 943218893Sdim // foo(*a) 944218893Sdim // It would be invalid to transform the second memcpy into foo(*b). 945218893Sdim // 946218893Sdim // NOTE: This is conservative, it will stop on any read from the source loc, 947218893Sdim // not just the defining memcpy. 948218893Sdim MemDepResult SourceDep = 949218893Sdim MD->getPointerDependencyFrom(AliasAnalysis::getLocationForSource(MDep), 950218893Sdim false, CS.getInstruction(), MDep->getParent()); 951218893Sdim if (!SourceDep.isClobber() || SourceDep.getInst() != MDep) 952218893Sdim return false; 953239462Sdim 954218893Sdim Value *TmpCast = MDep->getSource(); 955218893Sdim if (MDep->getSource()->getType() != ByValArg->getType()) 956218893Sdim TmpCast = new BitCastInst(MDep->getSource(), ByValArg->getType(), 957218893Sdim "tmpcast", CS.getInstruction()); 958239462Sdim 959218893Sdim DEBUG(dbgs() << "MemCpyOpt: Forwarding memcpy to byval:\n" 960218893Sdim << " " << *MDep << "\n" 961218893Sdim << " " << *CS.getInstruction() << "\n"); 962239462Sdim 963218893Sdim // Otherwise we're good! Update the byval argument. 964218893Sdim CS.setArgument(ArgNo, TmpCast); 965218893Sdim ++NumMemCpyInstr; 966218893Sdim return true; 967218893Sdim} 968218893Sdim 969218893Sdim/// iterateOnFunction - Executes one iteration of MemCpyOpt. 970193323Sedbool MemCpyOpt::iterateOnFunction(Function &F) { 971198090Srdivacky bool MadeChange = false; 972193323Sed 973198090Srdivacky // Walk all instruction in the function. 974193323Sed for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) { 975218893Sdim for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { 976198090Srdivacky // Avoid invalidating the iterator. 977198090Srdivacky Instruction *I = BI++; 978239462Sdim 979218893Sdim bool RepeatInstruction = false; 980239462Sdim 981193323Sed if (StoreInst *SI = dyn_cast<StoreInst>(I)) 982198090Srdivacky MadeChange |= processStore(SI, BI); 983218893Sdim else if (MemSetInst *M = dyn_cast<MemSetInst>(I)) 984218893Sdim RepeatInstruction = processMemSet(M, BI); 985198090Srdivacky else if (MemCpyInst *M = dyn_cast<MemCpyInst>(I)) 986218893Sdim RepeatInstruction = processMemCpy(M); 987218893Sdim else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I)) 988218893Sdim RepeatInstruction = processMemMove(M); 989218893Sdim else if (CallSite CS = (Value*)I) { 990218893Sdim for (unsigned i = 0, e = CS.arg_size(); i != e; ++i) 991234353Sdim if (CS.isByValArgument(i)) 992218893Sdim MadeChange |= processByValArgument(CS, i); 993193323Sed } 994218893Sdim 995218893Sdim // Reprocess the instruction if desired. 996218893Sdim if (RepeatInstruction) { 997218893Sdim if (BI != BB->begin()) --BI; 998218893Sdim MadeChange = true; 999218893Sdim } 1000193323Sed } 1001193323Sed } 1002239462Sdim 1003198090Srdivacky return MadeChange; 1004193323Sed} 1005198090Srdivacky 1006198090Srdivacky// MemCpyOpt::runOnFunction - This is the main transformation entry point for a 1007198090Srdivacky// function. 1008198090Srdivacky// 1009198090Srdivackybool MemCpyOpt::runOnFunction(Function &F) { 1010198090Srdivacky bool MadeChange = false; 1011218893Sdim MD = &getAnalysis<MemoryDependenceAnalysis>(); 1012243830Sdim TD = getAnalysisIfAvailable<DataLayout>(); 1013221345Sdim TLI = &getAnalysis<TargetLibraryInfo>(); 1014239462Sdim 1015221345Sdim // If we don't have at least memset and memcpy, there is little point of doing 1016221345Sdim // anything here. These are required by a freestanding implementation, so if 1017221345Sdim // even they are disabled, there is no point in trying hard. 1018221345Sdim if (!TLI->has(LibFunc::memset) || !TLI->has(LibFunc::memcpy)) 1019221345Sdim return false; 1020239462Sdim 1021198090Srdivacky while (1) { 1022198090Srdivacky if (!iterateOnFunction(F)) 1023198090Srdivacky break; 1024198090Srdivacky MadeChange = true; 1025198090Srdivacky } 1026239462Sdim 1027218893Sdim MD = 0; 1028198090Srdivacky return MadeChange; 1029198090Srdivacky} 1030