InstCombineLoadStoreAlloca.cpp revision 251662
1202375Srdivacky//===- InstCombineLoadStoreAlloca.cpp -------------------------------------===// 2202375Srdivacky// 3202375Srdivacky// The LLVM Compiler Infrastructure 4202375Srdivacky// 5202375Srdivacky// This file is distributed under the University of Illinois Open Source 6202375Srdivacky// License. See LICENSE.TXT for details. 7202375Srdivacky// 8202375Srdivacky//===----------------------------------------------------------------------===// 9202375Srdivacky// 10202375Srdivacky// This file implements the visit functions for load, store and alloca. 11202375Srdivacky// 12202375Srdivacky//===----------------------------------------------------------------------===// 13202375Srdivacky 14202375Srdivacky#include "InstCombine.h" 15249423Sdim#include "llvm/ADT/Statistic.h" 16210299Sed#include "llvm/Analysis/Loads.h" 17249423Sdim#include "llvm/IR/DataLayout.h" 18249423Sdim#include "llvm/IR/IntrinsicInst.h" 19202375Srdivacky#include "llvm/Transforms/Utils/BasicBlockUtils.h" 20202375Srdivacky#include "llvm/Transforms/Utils/Local.h" 21202375Srdivackyusing namespace llvm; 22202375Srdivacky 23243830SdimSTATISTIC(NumDeadStore, "Number of dead stores eliminated"); 24243830SdimSTATISTIC(NumGlobalCopies, "Number of allocas copied from constant global"); 25202375Srdivacky 26243830Sdim/// pointsToConstantGlobal - Return true if V (possibly indirectly) points to 27243830Sdim/// some part of a constant global variable. This intentionally only accepts 28243830Sdim/// constant expressions because we can't rewrite arbitrary instructions. 29243830Sdimstatic bool pointsToConstantGlobal(Value *V) { 30243830Sdim if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) 31243830Sdim return GV->isConstant(); 32243830Sdim if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) 33243830Sdim if (CE->getOpcode() == Instruction::BitCast || 34243830Sdim CE->getOpcode() == Instruction::GetElementPtr) 35243830Sdim return pointsToConstantGlobal(CE->getOperand(0)); 36243830Sdim return false; 37243830Sdim} 38243830Sdim 39243830Sdim/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived) 40243830Sdim/// pointer to an alloca. Ignore any reads of the pointer, return false if we 41243830Sdim/// see any stores or other unknown uses. If we see pointer arithmetic, keep 42243830Sdim/// track of whether it moves the pointer (with IsOffset) but otherwise traverse 43243830Sdim/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to 44243830Sdim/// the alloca, and if the source pointer is a pointer to a constant global, we 45243830Sdim/// can optimize this. 46243830Sdimstatic bool 47243830SdimisOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, 48243830Sdim SmallVectorImpl<Instruction *> &ToDelete, 49243830Sdim bool IsOffset = false) { 50243830Sdim // We track lifetime intrinsics as we encounter them. If we decide to go 51243830Sdim // ahead and replace the value with the global, this lets the caller quickly 52243830Sdim // eliminate the markers. 53243830Sdim 54243830Sdim for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { 55243830Sdim User *U = cast<Instruction>(*UI); 56243830Sdim 57243830Sdim if (LoadInst *LI = dyn_cast<LoadInst>(U)) { 58243830Sdim // Ignore non-volatile loads, they are always ok. 59243830Sdim if (!LI->isSimple()) return false; 60243830Sdim continue; 61243830Sdim } 62243830Sdim 63243830Sdim if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { 64243830Sdim // If uses of the bitcast are ok, we are ok. 65243830Sdim if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, ToDelete, IsOffset)) 66243830Sdim return false; 67243830Sdim continue; 68243830Sdim } 69243830Sdim if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { 70243830Sdim // If the GEP has all zero indices, it doesn't offset the pointer. If it 71243830Sdim // doesn't, it does. 72251662Sdim if (!isOnlyCopiedFromConstantGlobal( 73251662Sdim GEP, TheCopy, ToDelete, IsOffset || !GEP->hasAllZeroIndices())) 74243830Sdim return false; 75243830Sdim continue; 76243830Sdim } 77243830Sdim 78243830Sdim if (CallSite CS = U) { 79243830Sdim // If this is the function being called then we treat it like a load and 80243830Sdim // ignore it. 81243830Sdim if (CS.isCallee(UI)) 82243830Sdim continue; 83243830Sdim 84243830Sdim // If this is a readonly/readnone call site, then we know it is just a 85243830Sdim // load (but one that potentially returns the value itself), so we can 86243830Sdim // ignore it if we know that the value isn't captured. 87243830Sdim unsigned ArgNo = CS.getArgumentNo(UI); 88243830Sdim if (CS.onlyReadsMemory() && 89243830Sdim (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo))) 90243830Sdim continue; 91243830Sdim 92243830Sdim // If this is being passed as a byval argument, the caller is making a 93243830Sdim // copy, so it is only a read of the alloca. 94243830Sdim if (CS.isByValArgument(ArgNo)) 95243830Sdim continue; 96243830Sdim } 97243830Sdim 98243830Sdim // Lifetime intrinsics can be handled by the caller. 99243830Sdim if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { 100243830Sdim if (II->getIntrinsicID() == Intrinsic::lifetime_start || 101243830Sdim II->getIntrinsicID() == Intrinsic::lifetime_end) { 102243830Sdim assert(II->use_empty() && "Lifetime markers have no result to use!"); 103243830Sdim ToDelete.push_back(II); 104243830Sdim continue; 105243830Sdim } 106243830Sdim } 107243830Sdim 108243830Sdim // If this is isn't our memcpy/memmove, reject it as something we can't 109243830Sdim // handle. 110243830Sdim MemTransferInst *MI = dyn_cast<MemTransferInst>(U); 111243830Sdim if (MI == 0) 112243830Sdim return false; 113243830Sdim 114243830Sdim // If the transfer is using the alloca as a source of the transfer, then 115243830Sdim // ignore it since it is a load (unless the transfer is volatile). 116243830Sdim if (UI.getOperandNo() == 1) { 117243830Sdim if (MI->isVolatile()) return false; 118243830Sdim continue; 119243830Sdim } 120243830Sdim 121243830Sdim // If we already have seen a copy, reject the second one. 122243830Sdim if (TheCopy) return false; 123243830Sdim 124243830Sdim // If the pointer has been offset from the start of the alloca, we can't 125243830Sdim // safely handle this. 126243830Sdim if (IsOffset) return false; 127243830Sdim 128243830Sdim // If the memintrinsic isn't using the alloca as the dest, reject it. 129243830Sdim if (UI.getOperandNo() != 0) return false; 130243830Sdim 131243830Sdim // If the source of the memcpy/move is not a constant global, reject it. 132243830Sdim if (!pointsToConstantGlobal(MI->getSource())) 133243830Sdim return false; 134243830Sdim 135243830Sdim // Otherwise, the transform is safe. Remember the copy instruction. 136243830Sdim TheCopy = MI; 137243830Sdim } 138243830Sdim return true; 139243830Sdim} 140243830Sdim 141243830Sdim/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only 142243830Sdim/// modified by a copy from a constant global. If we can prove this, we can 143243830Sdim/// replace any uses of the alloca with uses of the global directly. 144243830Sdimstatic MemTransferInst * 145243830SdimisOnlyCopiedFromConstantGlobal(AllocaInst *AI, 146243830Sdim SmallVectorImpl<Instruction *> &ToDelete) { 147243830Sdim MemTransferInst *TheCopy = 0; 148243830Sdim if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete)) 149243830Sdim return TheCopy; 150243830Sdim return 0; 151243830Sdim} 152243830Sdim 153202375SrdivackyInstruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { 154210299Sed // Ensure that the alloca array size argument has type intptr_t, so that 155210299Sed // any casting is exposed early. 156210299Sed if (TD) { 157226633Sdim Type *IntPtrTy = TD->getIntPtrType(AI.getContext()); 158210299Sed if (AI.getArraySize()->getType() != IntPtrTy) { 159210299Sed Value *V = Builder->CreateIntCast(AI.getArraySize(), 160210299Sed IntPtrTy, false); 161210299Sed AI.setOperand(0, V); 162210299Sed return &AI; 163210299Sed } 164210299Sed } 165210299Sed 166202375Srdivacky // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 167202375Srdivacky if (AI.isArrayAllocation()) { // Check C != 1 168202375Srdivacky if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { 169251662Sdim Type *NewTy = 170202375Srdivacky ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); 171202375Srdivacky AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName()); 172202375Srdivacky New->setAlignment(AI.getAlignment()); 173202375Srdivacky 174202375Srdivacky // Scan to the end of the allocation instructions, to skip over a block of 175202375Srdivacky // allocas if possible...also skip interleaved debug info 176202375Srdivacky // 177202375Srdivacky BasicBlock::iterator It = New; 178202375Srdivacky while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It; 179202375Srdivacky 180202375Srdivacky // Now that I is pointing to the first non-allocation-inst in the block, 181202375Srdivacky // insert our getelementptr instruction... 182202375Srdivacky // 183202375Srdivacky Value *NullIdx =Constant::getNullValue(Type::getInt32Ty(AI.getContext())); 184202375Srdivacky Value *Idx[2]; 185202375Srdivacky Idx[0] = NullIdx; 186202375Srdivacky Idx[1] = NullIdx; 187223017Sdim Instruction *GEP = 188226633Sdim GetElementPtrInst::CreateInBounds(New, Idx, New->getName()+".sub"); 189223017Sdim InsertNewInstBefore(GEP, *It); 190202375Srdivacky 191202375Srdivacky // Now make everything use the getelementptr instead of the original 192202375Srdivacky // allocation. 193223017Sdim return ReplaceInstUsesWith(AI, GEP); 194202375Srdivacky } else if (isa<UndefValue>(AI.getArraySize())) { 195202375Srdivacky return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); 196202375Srdivacky } 197202375Srdivacky } 198202375Srdivacky 199239462Sdim if (TD && AI.getAllocatedType()->isSized()) { 200202375Srdivacky // If the alignment is 0 (unspecified), assign it the preferred alignment. 201202375Srdivacky if (AI.getAlignment() == 0) 202202375Srdivacky AI.setAlignment(TD->getPrefTypeAlignment(AI.getAllocatedType())); 203239462Sdim 204239462Sdim // Move all alloca's of zero byte objects to the entry block and merge them 205239462Sdim // together. Note that we only do this for alloca's, because malloc should 206239462Sdim // allocate and return a unique pointer, even for a zero byte allocation. 207239462Sdim if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0) { 208239462Sdim // For a zero sized alloca there is no point in doing an array allocation. 209239462Sdim // This is helpful if the array size is a complicated expression not used 210239462Sdim // elsewhere. 211239462Sdim if (AI.isArrayAllocation()) { 212239462Sdim AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1)); 213239462Sdim return &AI; 214239462Sdim } 215239462Sdim 216239462Sdim // Get the first instruction in the entry block. 217239462Sdim BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock(); 218239462Sdim Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg(); 219239462Sdim if (FirstInst != &AI) { 220239462Sdim // If the entry block doesn't start with a zero-size alloca then move 221239462Sdim // this one to the start of the entry block. There is no problem with 222239462Sdim // dominance as the array size was forced to a constant earlier already. 223239462Sdim AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst); 224239462Sdim if (!EntryAI || !EntryAI->getAllocatedType()->isSized() || 225239462Sdim TD->getTypeAllocSize(EntryAI->getAllocatedType()) != 0) { 226239462Sdim AI.moveBefore(FirstInst); 227239462Sdim return &AI; 228239462Sdim } 229239462Sdim 230243830Sdim // If the alignment of the entry block alloca is 0 (unspecified), 231243830Sdim // assign it the preferred alignment. 232243830Sdim if (EntryAI->getAlignment() == 0) 233243830Sdim EntryAI->setAlignment( 234243830Sdim TD->getPrefTypeAlignment(EntryAI->getAllocatedType())); 235239462Sdim // Replace this zero-sized alloca with the one at the start of the entry 236239462Sdim // block after ensuring that the address will be aligned enough for both 237239462Sdim // types. 238243830Sdim unsigned MaxAlign = std::max(EntryAI->getAlignment(), 239243830Sdim AI.getAlignment()); 240239462Sdim EntryAI->setAlignment(MaxAlign); 241239462Sdim if (AI.getType() != EntryAI->getType()) 242239462Sdim return new BitCastInst(EntryAI, AI.getType()); 243239462Sdim return ReplaceInstUsesWith(AI, EntryAI); 244239462Sdim } 245239462Sdim } 246202375Srdivacky } 247202375Srdivacky 248249423Sdim if (AI.getAlignment()) { 249243830Sdim // Check to see if this allocation is only modified by a memcpy/memmove from 250243830Sdim // a constant global whose alignment is equal to or exceeds that of the 251243830Sdim // allocation. If this is the case, we can change all users to use 252243830Sdim // the constant global instead. This is commonly produced by the CFE by 253243830Sdim // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' 254243830Sdim // is only subsequently read. 255243830Sdim SmallVector<Instruction *, 4> ToDelete; 256243830Sdim if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { 257249423Sdim unsigned SourceAlign = getOrEnforceKnownAlignment(Copy->getSource(), 258249423Sdim AI.getAlignment(), TD); 259249423Sdim if (AI.getAlignment() <= SourceAlign) { 260243830Sdim DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); 261243830Sdim DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); 262243830Sdim for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) 263243830Sdim EraseInstFromFunction(*ToDelete[i]); 264243830Sdim Constant *TheSrc = cast<Constant>(Copy->getSource()); 265243830Sdim Instruction *NewI 266243830Sdim = ReplaceInstUsesWith(AI, ConstantExpr::getBitCast(TheSrc, 267243830Sdim AI.getType())); 268243830Sdim EraseInstFromFunction(*Copy); 269243830Sdim ++NumGlobalCopies; 270243830Sdim return NewI; 271243830Sdim } 272243830Sdim } 273243830Sdim } 274243830Sdim 275239462Sdim // At last, use the generic allocation site handler to aggressively remove 276239462Sdim // unused allocas. 277239462Sdim return visitAllocSite(AI); 278202375Srdivacky} 279202375Srdivacky 280202375Srdivacky 281202375Srdivacky/// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible. 282202375Srdivackystatic Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, 283243830Sdim const DataLayout *TD) { 284202375Srdivacky User *CI = cast<User>(LI.getOperand(0)); 285202375Srdivacky Value *CastOp = CI->getOperand(0); 286202375Srdivacky 287226633Sdim PointerType *DestTy = cast<PointerType>(CI->getType()); 288226633Sdim Type *DestPTy = DestTy->getElementType(); 289226633Sdim if (PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) { 290202375Srdivacky 291202375Srdivacky // If the address spaces don't match, don't eliminate the cast. 292202375Srdivacky if (DestTy->getAddressSpace() != SrcTy->getAddressSpace()) 293202375Srdivacky return 0; 294202375Srdivacky 295226633Sdim Type *SrcPTy = SrcTy->getElementType(); 296202375Srdivacky 297251662Sdim if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() || 298204642Srdivacky DestPTy->isVectorTy()) { 299202375Srdivacky // If the source is an array, the code below will not succeed. Check to 300202375Srdivacky // see if a trivial 'gep P, 0, 0' will help matters. Only do this for 301202375Srdivacky // constants. 302226633Sdim if (ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy)) 303202375Srdivacky if (Constant *CSrc = dyn_cast<Constant>(CastOp)) 304202375Srdivacky if (ASrcTy->getNumElements() != 0) { 305202375Srdivacky Value *Idxs[2]; 306202375Srdivacky Idxs[0] = Constant::getNullValue(Type::getInt32Ty(LI.getContext())); 307202375Srdivacky Idxs[1] = Idxs[0]; 308226633Sdim CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs); 309202375Srdivacky SrcTy = cast<PointerType>(CastOp->getType()); 310202375Srdivacky SrcPTy = SrcTy->getElementType(); 311202375Srdivacky } 312202375Srdivacky 313243830Sdim if (IC.getDataLayout() && 314251662Sdim (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() || 315204642Srdivacky SrcPTy->isVectorTy()) && 316202375Srdivacky // Do not allow turning this into a load of an integer, which is then 317202375Srdivacky // casted to a pointer, this pessimizes pointer analysis a lot. 318204642Srdivacky (SrcPTy->isPointerTy() == LI.getType()->isPointerTy()) && 319243830Sdim IC.getDataLayout()->getTypeSizeInBits(SrcPTy) == 320243830Sdim IC.getDataLayout()->getTypeSizeInBits(DestPTy)) { 321202375Srdivacky 322202375Srdivacky // Okay, we are casting from one integer or pointer type to another of 323202375Srdivacky // the same size. Instead of casting the pointer before the load, cast 324202375Srdivacky // the result of the loaded value. 325251662Sdim LoadInst *NewLoad = 326202375Srdivacky IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName()); 327203954Srdivacky NewLoad->setAlignment(LI.getAlignment()); 328226633Sdim NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope()); 329202375Srdivacky // Now cast the result of the load. 330202375Srdivacky return new BitCastInst(NewLoad, LI.getType()); 331202375Srdivacky } 332202375Srdivacky } 333202375Srdivacky } 334202375Srdivacky return 0; 335202375Srdivacky} 336202375Srdivacky 337202375SrdivackyInstruction *InstCombiner::visitLoadInst(LoadInst &LI) { 338202375Srdivacky Value *Op = LI.getOperand(0); 339202375Srdivacky 340202375Srdivacky // Attempt to improve the alignment. 341202375Srdivacky if (TD) { 342202375Srdivacky unsigned KnownAlign = 343218893Sdim getOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()),TD); 344212904Sdim unsigned LoadAlign = LI.getAlignment(); 345212904Sdim unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign : 346212904Sdim TD->getABITypeAlignment(LI.getType()); 347212904Sdim 348212904Sdim if (KnownAlign > EffectiveLoadAlign) 349202375Srdivacky LI.setAlignment(KnownAlign); 350212904Sdim else if (LoadAlign == 0) 351212904Sdim LI.setAlignment(EffectiveLoadAlign); 352202375Srdivacky } 353202375Srdivacky 354202375Srdivacky // load (cast X) --> cast (load X) iff safe. 355202375Srdivacky if (isa<CastInst>(Op)) 356202375Srdivacky if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) 357202375Srdivacky return Res; 358202375Srdivacky 359226633Sdim // None of the following transforms are legal for volatile/atomic loads. 360226633Sdim // FIXME: Some of it is okay for atomic loads; needs refactoring. 361226633Sdim if (!LI.isSimple()) return 0; 362251662Sdim 363202375Srdivacky // Do really simple store-to-load forwarding and load CSE, to catch cases 364218893Sdim // where there are several consecutive memory accesses to the same location, 365202375Srdivacky // separated by a few arithmetic operations. 366202375Srdivacky BasicBlock::iterator BBI = &LI; 367202375Srdivacky if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6)) 368202375Srdivacky return ReplaceInstUsesWith(LI, AvailableVal); 369202375Srdivacky 370202375Srdivacky // load(gep null, ...) -> unreachable 371202375Srdivacky if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) { 372202375Srdivacky const Value *GEPI0 = GEPI->getOperand(0); 373202375Srdivacky // TODO: Consider a target hook for valid address spaces for this xform. 374202375Srdivacky if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){ 375202375Srdivacky // Insert a new store to null instruction before the load to indicate 376202375Srdivacky // that this code is not reachable. We do this instead of inserting 377202375Srdivacky // an unreachable instruction directly because we cannot modify the 378202375Srdivacky // CFG. 379202375Srdivacky new StoreInst(UndefValue::get(LI.getType()), 380202375Srdivacky Constant::getNullValue(Op->getType()), &LI); 381202375Srdivacky return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); 382202375Srdivacky } 383251662Sdim } 384202375Srdivacky 385202375Srdivacky // load null/undef -> unreachable 386202375Srdivacky // TODO: Consider a target hook for valid address spaces for this xform. 387202375Srdivacky if (isa<UndefValue>(Op) || 388202375Srdivacky (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) { 389202375Srdivacky // Insert a new store to null instruction before the load to indicate that 390202375Srdivacky // this code is not reachable. We do this instead of inserting an 391202375Srdivacky // unreachable instruction directly because we cannot modify the CFG. 392202375Srdivacky new StoreInst(UndefValue::get(LI.getType()), 393202375Srdivacky Constant::getNullValue(Op->getType()), &LI); 394202375Srdivacky return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); 395202375Srdivacky } 396202375Srdivacky 397202375Srdivacky // Instcombine load (constantexpr_cast global) -> cast (load global) 398202375Srdivacky if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op)) 399202375Srdivacky if (CE->isCast()) 400202375Srdivacky if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) 401202375Srdivacky return Res; 402251662Sdim 403202375Srdivacky if (Op->hasOneUse()) { 404202375Srdivacky // Change select and PHI nodes to select values instead of addresses: this 405202375Srdivacky // helps alias analysis out a lot, allows many others simplifications, and 406202375Srdivacky // exposes redundancy in the code. 407202375Srdivacky // 408202375Srdivacky // Note that we cannot do the transformation unless we know that the 409202375Srdivacky // introduced loads cannot trap! Something like this is valid as long as 410202375Srdivacky // the condition is always false: load (select bool %C, int* null, int* %G), 411202375Srdivacky // but it would not be valid if we transformed it to load from null 412202375Srdivacky // unconditionally. 413202375Srdivacky // 414202375Srdivacky if (SelectInst *SI = dyn_cast<SelectInst>(Op)) { 415202375Srdivacky // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2). 416203954Srdivacky unsigned Align = LI.getAlignment(); 417203954Srdivacky if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, TD) && 418203954Srdivacky isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, TD)) { 419203954Srdivacky LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1), 420203954Srdivacky SI->getOperand(1)->getName()+".val"); 421203954Srdivacky LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2), 422203954Srdivacky SI->getOperand(2)->getName()+".val"); 423203954Srdivacky V1->setAlignment(Align); 424203954Srdivacky V2->setAlignment(Align); 425202375Srdivacky return SelectInst::Create(SI->getCondition(), V1, V2); 426202375Srdivacky } 427202375Srdivacky 428202375Srdivacky // load (select (cond, null, P)) -> load P 429202375Srdivacky if (Constant *C = dyn_cast<Constant>(SI->getOperand(1))) 430202375Srdivacky if (C->isNullValue()) { 431202375Srdivacky LI.setOperand(0, SI->getOperand(2)); 432202375Srdivacky return &LI; 433202375Srdivacky } 434202375Srdivacky 435202375Srdivacky // load (select (cond, P, null)) -> load P 436202375Srdivacky if (Constant *C = dyn_cast<Constant>(SI->getOperand(2))) 437202375Srdivacky if (C->isNullValue()) { 438202375Srdivacky LI.setOperand(0, SI->getOperand(1)); 439202375Srdivacky return &LI; 440202375Srdivacky } 441202375Srdivacky } 442202375Srdivacky } 443202375Srdivacky return 0; 444202375Srdivacky} 445202375Srdivacky 446202375Srdivacky/// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P 447202375Srdivacky/// when possible. This makes it generally easy to do alias analysis and/or 448202375Srdivacky/// SROA/mem2reg of the memory object. 449202375Srdivackystatic Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { 450202375Srdivacky User *CI = cast<User>(SI.getOperand(1)); 451202375Srdivacky Value *CastOp = CI->getOperand(0); 452202375Srdivacky 453226633Sdim Type *DestPTy = cast<PointerType>(CI->getType())->getElementType(); 454226633Sdim PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType()); 455202375Srdivacky if (SrcTy == 0) return 0; 456251662Sdim 457226633Sdim Type *SrcPTy = SrcTy->getElementType(); 458202375Srdivacky 459204642Srdivacky if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy()) 460202375Srdivacky return 0; 461251662Sdim 462202375Srdivacky /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep" 463202375Srdivacky /// to its first element. This allows us to handle things like: 464202375Srdivacky /// store i32 xxx, (bitcast {foo*, float}* %P to i32*) 465202375Srdivacky /// on 32-bit hosts. 466202375Srdivacky SmallVector<Value*, 4> NewGEPIndices; 467251662Sdim 468202375Srdivacky // If the source is an array, the code below will not succeed. Check to 469202375Srdivacky // see if a trivial 'gep P, 0, 0' will help matters. Only do this for 470202375Srdivacky // constants. 471204642Srdivacky if (SrcPTy->isArrayTy() || SrcPTy->isStructTy()) { 472202375Srdivacky // Index through pointer. 473202375Srdivacky Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext())); 474202375Srdivacky NewGEPIndices.push_back(Zero); 475251662Sdim 476202375Srdivacky while (1) { 477226633Sdim if (StructType *STy = dyn_cast<StructType>(SrcPTy)) { 478202375Srdivacky if (!STy->getNumElements()) /* Struct can be empty {} */ 479202375Srdivacky break; 480202375Srdivacky NewGEPIndices.push_back(Zero); 481202375Srdivacky SrcPTy = STy->getElementType(0); 482226633Sdim } else if (ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) { 483202375Srdivacky NewGEPIndices.push_back(Zero); 484202375Srdivacky SrcPTy = ATy->getElementType(); 485202375Srdivacky } else { 486202375Srdivacky break; 487202375Srdivacky } 488202375Srdivacky } 489251662Sdim 490202375Srdivacky SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace()); 491202375Srdivacky } 492202375Srdivacky 493204642Srdivacky if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy()) 494202375Srdivacky return 0; 495251662Sdim 496202375Srdivacky // If the pointers point into different address spaces or if they point to 497202375Srdivacky // values with different sizes, we can't do the transformation. 498243830Sdim if (!IC.getDataLayout() || 499251662Sdim SrcTy->getAddressSpace() != 500202375Srdivacky cast<PointerType>(CI->getType())->getAddressSpace() || 501243830Sdim IC.getDataLayout()->getTypeSizeInBits(SrcPTy) != 502243830Sdim IC.getDataLayout()->getTypeSizeInBits(DestPTy)) 503202375Srdivacky return 0; 504202375Srdivacky 505202375Srdivacky // Okay, we are casting from one integer or pointer type to another of 506251662Sdim // the same size. Instead of casting the pointer before 507202375Srdivacky // the store, cast the value to be stored. 508202375Srdivacky Value *NewCast; 509202375Srdivacky Value *SIOp0 = SI.getOperand(0); 510202375Srdivacky Instruction::CastOps opcode = Instruction::BitCast; 511226633Sdim Type* CastSrcTy = SIOp0->getType(); 512226633Sdim Type* CastDstTy = SrcPTy; 513204642Srdivacky if (CastDstTy->isPointerTy()) { 514203954Srdivacky if (CastSrcTy->isIntegerTy()) 515202375Srdivacky opcode = Instruction::IntToPtr; 516204642Srdivacky } else if (CastDstTy->isIntegerTy()) { 517204642Srdivacky if (SIOp0->getType()->isPointerTy()) 518202375Srdivacky opcode = Instruction::PtrToInt; 519202375Srdivacky } 520251662Sdim 521202375Srdivacky // SIOp0 is a pointer to aggregate and this is a store to the first field, 522202375Srdivacky // emit a GEP to index into its first field. 523202375Srdivacky if (!NewGEPIndices.empty()) 524226633Sdim CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices); 525251662Sdim 526202375Srdivacky NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy, 527202375Srdivacky SIOp0->getName()+".c"); 528218893Sdim SI.setOperand(0, NewCast); 529218893Sdim SI.setOperand(1, CastOp); 530218893Sdim return &SI; 531202375Srdivacky} 532202375Srdivacky 533202375Srdivacky/// equivalentAddressValues - Test if A and B will obviously have the same 534202375Srdivacky/// value. This includes recognizing that %t0 and %t1 will have the same 535202375Srdivacky/// value in code like this: 536202375Srdivacky/// %t0 = getelementptr \@a, 0, 3 537202375Srdivacky/// store i32 0, i32* %t0 538202375Srdivacky/// %t1 = getelementptr \@a, 0, 3 539202375Srdivacky/// %t2 = load i32* %t1 540202375Srdivacky/// 541202375Srdivackystatic bool equivalentAddressValues(Value *A, Value *B) { 542202375Srdivacky // Test if the values are trivially equivalent. 543202375Srdivacky if (A == B) return true; 544251662Sdim 545202375Srdivacky // Test if the values come form identical arithmetic instructions. 546202375Srdivacky // This uses isIdenticalToWhenDefined instead of isIdenticalTo because 547202375Srdivacky // its only used to compare two uses within the same basic block, which 548202375Srdivacky // means that they'll always either have the same value or one of them 549202375Srdivacky // will have an undefined value. 550202375Srdivacky if (isa<BinaryOperator>(A) || 551202375Srdivacky isa<CastInst>(A) || 552202375Srdivacky isa<PHINode>(A) || 553202375Srdivacky isa<GetElementPtrInst>(A)) 554202375Srdivacky if (Instruction *BI = dyn_cast<Instruction>(B)) 555202375Srdivacky if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI)) 556202375Srdivacky return true; 557251662Sdim 558202375Srdivacky // Otherwise they may not be equivalent. 559202375Srdivacky return false; 560202375Srdivacky} 561202375Srdivacky 562202375SrdivackyInstruction *InstCombiner::visitStoreInst(StoreInst &SI) { 563202375Srdivacky Value *Val = SI.getOperand(0); 564202375Srdivacky Value *Ptr = SI.getOperand(1); 565202375Srdivacky 566202375Srdivacky // Attempt to improve the alignment. 567202375Srdivacky if (TD) { 568202375Srdivacky unsigned KnownAlign = 569218893Sdim getOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()), 570218893Sdim TD); 571212904Sdim unsigned StoreAlign = SI.getAlignment(); 572212904Sdim unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign : 573212904Sdim TD->getABITypeAlignment(Val->getType()); 574212904Sdim 575212904Sdim if (KnownAlign > EffectiveStoreAlign) 576202375Srdivacky SI.setAlignment(KnownAlign); 577212904Sdim else if (StoreAlign == 0) 578212904Sdim SI.setAlignment(EffectiveStoreAlign); 579202375Srdivacky } 580202375Srdivacky 581226633Sdim // Don't hack volatile/atomic stores. 582226633Sdim // FIXME: Some bits are legal for atomic stores; needs refactoring. 583226633Sdim if (!SI.isSimple()) return 0; 584226633Sdim 585226633Sdim // If the RHS is an alloca with a single use, zapify the store, making the 586226633Sdim // alloca dead. 587226633Sdim if (Ptr->hasOneUse()) { 588251662Sdim if (isa<AllocaInst>(Ptr)) 589226633Sdim return EraseInstFromFunction(SI); 590226633Sdim if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) { 591226633Sdim if (isa<AllocaInst>(GEP->getOperand(0))) { 592226633Sdim if (GEP->getOperand(0)->hasOneUse()) 593226633Sdim return EraseInstFromFunction(SI); 594226633Sdim } 595226633Sdim } 596226633Sdim } 597226633Sdim 598202375Srdivacky // Do really simple DSE, to catch cases where there are several consecutive 599202375Srdivacky // stores to the same location, separated by a few arithmetic operations. This 600202375Srdivacky // situation often occurs with bitfield accesses. 601202375Srdivacky BasicBlock::iterator BBI = &SI; 602202375Srdivacky for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts; 603202375Srdivacky --ScanInsts) { 604202375Srdivacky --BBI; 605202375Srdivacky // Don't count debug info directives, lest they affect codegen, 606202375Srdivacky // and we skip pointer-to-pointer bitcasts, which are NOPs. 607202375Srdivacky if (isa<DbgInfoIntrinsic>(BBI) || 608204642Srdivacky (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { 609202375Srdivacky ScanInsts++; 610202375Srdivacky continue; 611251662Sdim } 612251662Sdim 613202375Srdivacky if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) { 614202375Srdivacky // Prev store isn't volatile, and stores to the same location? 615226633Sdim if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1), 616226633Sdim SI.getOperand(1))) { 617202375Srdivacky ++NumDeadStore; 618202375Srdivacky ++BBI; 619202375Srdivacky EraseInstFromFunction(*PrevSI); 620202375Srdivacky continue; 621202375Srdivacky } 622202375Srdivacky break; 623202375Srdivacky } 624251662Sdim 625202375Srdivacky // If this is a load, we have to stop. However, if the loaded value is from 626202375Srdivacky // the pointer we're loading and is producing the pointer we're storing, 627202375Srdivacky // then *this* store is dead (X = load P; store X -> P). 628202375Srdivacky if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { 629202375Srdivacky if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) && 630226633Sdim LI->isSimple()) 631202375Srdivacky return EraseInstFromFunction(SI); 632251662Sdim 633202375Srdivacky // Otherwise, this is a load from some other location. Stores before it 634202375Srdivacky // may not be dead. 635202375Srdivacky break; 636202375Srdivacky } 637251662Sdim 638202375Srdivacky // Don't skip over loads or things that can modify memory. 639202375Srdivacky if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory()) 640202375Srdivacky break; 641202375Srdivacky } 642202375Srdivacky 643202375Srdivacky // store X, null -> turns into 'unreachable' in SimplifyCFG 644202375Srdivacky if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) { 645202375Srdivacky if (!isa<UndefValue>(Val)) { 646202375Srdivacky SI.setOperand(0, UndefValue::get(Val->getType())); 647202375Srdivacky if (Instruction *U = dyn_cast<Instruction>(Val)) 648202375Srdivacky Worklist.Add(U); // Dropped a use. 649202375Srdivacky } 650202375Srdivacky return 0; // Do not modify these! 651202375Srdivacky } 652202375Srdivacky 653202375Srdivacky // store undef, Ptr -> noop 654202375Srdivacky if (isa<UndefValue>(Val)) 655202375Srdivacky return EraseInstFromFunction(SI); 656202375Srdivacky 657202375Srdivacky // If the pointer destination is a cast, see if we can fold the cast into the 658202375Srdivacky // source instead. 659202375Srdivacky if (isa<CastInst>(Ptr)) 660202375Srdivacky if (Instruction *Res = InstCombineStoreToCast(*this, SI)) 661202375Srdivacky return Res; 662202375Srdivacky if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) 663202375Srdivacky if (CE->isCast()) 664202375Srdivacky if (Instruction *Res = InstCombineStoreToCast(*this, SI)) 665202375Srdivacky return Res; 666202375Srdivacky 667251662Sdim 668202375Srdivacky // If this store is the last instruction in the basic block (possibly 669202878Srdivacky // excepting debug info instructions), and if the block ends with an 670202878Srdivacky // unconditional branch, try to move it to the successor block. 671251662Sdim BBI = &SI; 672202375Srdivacky do { 673202375Srdivacky ++BBI; 674202375Srdivacky } while (isa<DbgInfoIntrinsic>(BBI) || 675204642Srdivacky (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())); 676202375Srdivacky if (BranchInst *BI = dyn_cast<BranchInst>(BBI)) 677202375Srdivacky if (BI->isUnconditional()) 678202375Srdivacky if (SimplifyStoreAtEndOfBlock(SI)) 679202375Srdivacky return 0; // xform done! 680251662Sdim 681202375Srdivacky return 0; 682202375Srdivacky} 683202375Srdivacky 684202375Srdivacky/// SimplifyStoreAtEndOfBlock - Turn things like: 685202375Srdivacky/// if () { *P = v1; } else { *P = v2 } 686202375Srdivacky/// into a phi node with a store in the successor. 687202375Srdivacky/// 688202375Srdivacky/// Simplify things like: 689202375Srdivacky/// *P = v1; if () { *P = v2; } 690202375Srdivacky/// into a phi node with a store in the successor. 691202375Srdivacky/// 692202375Srdivackybool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { 693202375Srdivacky BasicBlock *StoreBB = SI.getParent(); 694251662Sdim 695202375Srdivacky // Check to see if the successor block has exactly two incoming edges. If 696202375Srdivacky // so, see if the other predecessor contains a store to the same location. 697202375Srdivacky // if so, insert a PHI node (if needed) and move the stores down. 698202375Srdivacky BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0); 699251662Sdim 700202375Srdivacky // Determine whether Dest has exactly two predecessors and, if so, compute 701202375Srdivacky // the other predecessor. 702202375Srdivacky pred_iterator PI = pred_begin(DestBB); 703210299Sed BasicBlock *P = *PI; 704202375Srdivacky BasicBlock *OtherBB = 0; 705210299Sed 706210299Sed if (P != StoreBB) 707210299Sed OtherBB = P; 708210299Sed 709210299Sed if (++PI == pred_end(DestBB)) 710202375Srdivacky return false; 711251662Sdim 712210299Sed P = *PI; 713210299Sed if (P != StoreBB) { 714202375Srdivacky if (OtherBB) 715202375Srdivacky return false; 716210299Sed OtherBB = P; 717202375Srdivacky } 718202375Srdivacky if (++PI != pred_end(DestBB)) 719202375Srdivacky return false; 720202375Srdivacky 721202375Srdivacky // Bail out if all the relevant blocks aren't distinct (this can happen, 722202375Srdivacky // for example, if SI is in an infinite loop) 723202375Srdivacky if (StoreBB == DestBB || OtherBB == DestBB) 724202375Srdivacky return false; 725202375Srdivacky 726202375Srdivacky // Verify that the other block ends in a branch and is not otherwise empty. 727202375Srdivacky BasicBlock::iterator BBI = OtherBB->getTerminator(); 728202375Srdivacky BranchInst *OtherBr = dyn_cast<BranchInst>(BBI); 729202375Srdivacky if (!OtherBr || BBI == OtherBB->begin()) 730202375Srdivacky return false; 731251662Sdim 732202375Srdivacky // If the other block ends in an unconditional branch, check for the 'if then 733202375Srdivacky // else' case. there is an instruction before the branch. 734202375Srdivacky StoreInst *OtherStore = 0; 735202375Srdivacky if (OtherBr->isUnconditional()) { 736202375Srdivacky --BBI; 737202375Srdivacky // Skip over debugging info. 738202375Srdivacky while (isa<DbgInfoIntrinsic>(BBI) || 739204642Srdivacky (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { 740202375Srdivacky if (BBI==OtherBB->begin()) 741202375Srdivacky return false; 742202375Srdivacky --BBI; 743202375Srdivacky } 744226633Sdim // If this isn't a store, isn't a store to the same location, or is not the 745226633Sdim // right kind of store, bail out. 746202375Srdivacky OtherStore = dyn_cast<StoreInst>(BBI); 747202375Srdivacky if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) || 748226633Sdim !SI.isSameOperationAs(OtherStore)) 749202375Srdivacky return false; 750202375Srdivacky } else { 751202375Srdivacky // Otherwise, the other block ended with a conditional branch. If one of the 752202375Srdivacky // destinations is StoreBB, then we have the if/then case. 753251662Sdim if (OtherBr->getSuccessor(0) != StoreBB && 754202375Srdivacky OtherBr->getSuccessor(1) != StoreBB) 755202375Srdivacky return false; 756251662Sdim 757202375Srdivacky // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an 758202375Srdivacky // if/then triangle. See if there is a store to the same ptr as SI that 759202375Srdivacky // lives in OtherBB. 760202375Srdivacky for (;; --BBI) { 761202375Srdivacky // Check to see if we find the matching store. 762202375Srdivacky if ((OtherStore = dyn_cast<StoreInst>(BBI))) { 763202375Srdivacky if (OtherStore->getOperand(1) != SI.getOperand(1) || 764226633Sdim !SI.isSameOperationAs(OtherStore)) 765202375Srdivacky return false; 766202375Srdivacky break; 767202375Srdivacky } 768202375Srdivacky // If we find something that may be using or overwriting the stored 769202375Srdivacky // value, or if we run out of instructions, we can't do the xform. 770202375Srdivacky if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() || 771202375Srdivacky BBI == OtherBB->begin()) 772202375Srdivacky return false; 773202375Srdivacky } 774251662Sdim 775202375Srdivacky // In order to eliminate the store in OtherBr, we have to 776202375Srdivacky // make sure nothing reads or overwrites the stored value in 777202375Srdivacky // StoreBB. 778202375Srdivacky for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) { 779202375Srdivacky // FIXME: This should really be AA driven. 780202375Srdivacky if (I->mayReadFromMemory() || I->mayWriteToMemory()) 781202375Srdivacky return false; 782202375Srdivacky } 783202375Srdivacky } 784251662Sdim 785202375Srdivacky // Insert a PHI node now if we need it. 786202375Srdivacky Value *MergedVal = OtherStore->getOperand(0); 787202375Srdivacky if (MergedVal != SI.getOperand(0)) { 788221345Sdim PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge"); 789202375Srdivacky PN->addIncoming(SI.getOperand(0), SI.getParent()); 790202375Srdivacky PN->addIncoming(OtherStore->getOperand(0), OtherBB); 791202375Srdivacky MergedVal = InsertNewInstBefore(PN, DestBB->front()); 792202375Srdivacky } 793251662Sdim 794202375Srdivacky // Advance to a place where it is safe to insert the new store and 795202375Srdivacky // insert it. 796226633Sdim BBI = DestBB->getFirstInsertionPt(); 797223017Sdim StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1), 798226633Sdim SI.isVolatile(), 799226633Sdim SI.getAlignment(), 800226633Sdim SI.getOrdering(), 801226633Sdim SI.getSynchScope()); 802223017Sdim InsertNewInstBefore(NewSI, *BBI); 803251662Sdim NewSI->setDebugLoc(OtherStore->getDebugLoc()); 804223017Sdim 805249423Sdim // If the two stores had the same TBAA tag, preserve it. 806249423Sdim if (MDNode *TBAATag = SI.getMetadata(LLVMContext::MD_tbaa)) 807249423Sdim if ((TBAATag = MDNode::getMostGenericTBAA(TBAATag, 808249423Sdim OtherStore->getMetadata(LLVMContext::MD_tbaa)))) 809249423Sdim NewSI->setMetadata(LLVMContext::MD_tbaa, TBAATag); 810249423Sdim 811251662Sdim 812202375Srdivacky // Nuke the old stores. 813202375Srdivacky EraseInstFromFunction(SI); 814202375Srdivacky EraseInstFromFunction(*OtherStore); 815202375Srdivacky return true; 816202375Srdivacky} 817