InstCombineLoadStoreAlloca.cpp revision 243830
1202375Srdivacky//===- InstCombineLoadStoreAlloca.cpp -------------------------------------===// 2202375Srdivacky// 3202375Srdivacky// The LLVM Compiler Infrastructure 4202375Srdivacky// 5202375Srdivacky// This file is distributed under the University of Illinois Open Source 6202375Srdivacky// License. See LICENSE.TXT for details. 7202375Srdivacky// 8202375Srdivacky//===----------------------------------------------------------------------===// 9202375Srdivacky// 10202375Srdivacky// This file implements the visit functions for load, store and alloca. 11202375Srdivacky// 12202375Srdivacky//===----------------------------------------------------------------------===// 13202375Srdivacky 14202375Srdivacky#include "InstCombine.h" 15202375Srdivacky#include "llvm/IntrinsicInst.h" 16210299Sed#include "llvm/Analysis/Loads.h" 17243830Sdim#include "llvm/DataLayout.h" 18202375Srdivacky#include "llvm/Transforms/Utils/BasicBlockUtils.h" 19202375Srdivacky#include "llvm/Transforms/Utils/Local.h" 20202375Srdivacky#include "llvm/ADT/Statistic.h" 21202375Srdivackyusing namespace llvm; 22202375Srdivacky 23243830SdimSTATISTIC(NumDeadStore, "Number of dead stores eliminated"); 24243830SdimSTATISTIC(NumGlobalCopies, "Number of allocas copied from constant global"); 25202375Srdivacky 26243830Sdim/// pointsToConstantGlobal - Return true if V (possibly indirectly) points to 27243830Sdim/// some part of a constant global variable. This intentionally only accepts 28243830Sdim/// constant expressions because we can't rewrite arbitrary instructions. 29243830Sdimstatic bool pointsToConstantGlobal(Value *V) { 30243830Sdim if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) 31243830Sdim return GV->isConstant(); 32243830Sdim if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) 33243830Sdim if (CE->getOpcode() == Instruction::BitCast || 34243830Sdim CE->getOpcode() == Instruction::GetElementPtr) 35243830Sdim return pointsToConstantGlobal(CE->getOperand(0)); 36243830Sdim return false; 37243830Sdim} 38243830Sdim 39243830Sdim/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived) 40243830Sdim/// pointer to an alloca. Ignore any reads of the pointer, return false if we 41243830Sdim/// see any stores or other unknown uses. If we see pointer arithmetic, keep 42243830Sdim/// track of whether it moves the pointer (with IsOffset) but otherwise traverse 43243830Sdim/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to 44243830Sdim/// the alloca, and if the source pointer is a pointer to a constant global, we 45243830Sdim/// can optimize this. 46243830Sdimstatic bool 47243830SdimisOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, 48243830Sdim SmallVectorImpl<Instruction *> &ToDelete, 49243830Sdim bool IsOffset = false) { 50243830Sdim // We track lifetime intrinsics as we encounter them. If we decide to go 51243830Sdim // ahead and replace the value with the global, this lets the caller quickly 52243830Sdim // eliminate the markers. 53243830Sdim 54243830Sdim for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { 55243830Sdim User *U = cast<Instruction>(*UI); 56243830Sdim 57243830Sdim if (LoadInst *LI = dyn_cast<LoadInst>(U)) { 58243830Sdim // Ignore non-volatile loads, they are always ok. 59243830Sdim if (!LI->isSimple()) return false; 60243830Sdim continue; 61243830Sdim } 62243830Sdim 63243830Sdim if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { 64243830Sdim // If uses of the bitcast are ok, we are ok. 65243830Sdim if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, ToDelete, IsOffset)) 66243830Sdim return false; 67243830Sdim continue; 68243830Sdim } 69243830Sdim if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { 70243830Sdim // If the GEP has all zero indices, it doesn't offset the pointer. If it 71243830Sdim // doesn't, it does. 72243830Sdim if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, ToDelete, 73243830Sdim IsOffset || !GEP->hasAllZeroIndices())) 74243830Sdim return false; 75243830Sdim continue; 76243830Sdim } 77243830Sdim 78243830Sdim if (CallSite CS = U) { 79243830Sdim // If this is the function being called then we treat it like a load and 80243830Sdim // ignore it. 81243830Sdim if (CS.isCallee(UI)) 82243830Sdim continue; 83243830Sdim 84243830Sdim // If this is a readonly/readnone call site, then we know it is just a 85243830Sdim // load (but one that potentially returns the value itself), so we can 86243830Sdim // ignore it if we know that the value isn't captured. 87243830Sdim unsigned ArgNo = CS.getArgumentNo(UI); 88243830Sdim if (CS.onlyReadsMemory() && 89243830Sdim (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo))) 90243830Sdim continue; 91243830Sdim 92243830Sdim // If this is being passed as a byval argument, the caller is making a 93243830Sdim // copy, so it is only a read of the alloca. 94243830Sdim if (CS.isByValArgument(ArgNo)) 95243830Sdim continue; 96243830Sdim } 97243830Sdim 98243830Sdim // Lifetime intrinsics can be handled by the caller. 99243830Sdim if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { 100243830Sdim if (II->getIntrinsicID() == Intrinsic::lifetime_start || 101243830Sdim II->getIntrinsicID() == Intrinsic::lifetime_end) { 102243830Sdim assert(II->use_empty() && "Lifetime markers have no result to use!"); 103243830Sdim ToDelete.push_back(II); 104243830Sdim continue; 105243830Sdim } 106243830Sdim } 107243830Sdim 108243830Sdim // If this is isn't our memcpy/memmove, reject it as something we can't 109243830Sdim // handle. 110243830Sdim MemTransferInst *MI = dyn_cast<MemTransferInst>(U); 111243830Sdim if (MI == 0) 112243830Sdim return false; 113243830Sdim 114243830Sdim // If the transfer is using the alloca as a source of the transfer, then 115243830Sdim // ignore it since it is a load (unless the transfer is volatile). 116243830Sdim if (UI.getOperandNo() == 1) { 117243830Sdim if (MI->isVolatile()) return false; 118243830Sdim continue; 119243830Sdim } 120243830Sdim 121243830Sdim // If we already have seen a copy, reject the second one. 122243830Sdim if (TheCopy) return false; 123243830Sdim 124243830Sdim // If the pointer has been offset from the start of the alloca, we can't 125243830Sdim // safely handle this. 126243830Sdim if (IsOffset) return false; 127243830Sdim 128243830Sdim // If the memintrinsic isn't using the alloca as the dest, reject it. 129243830Sdim if (UI.getOperandNo() != 0) return false; 130243830Sdim 131243830Sdim // If the source of the memcpy/move is not a constant global, reject it. 132243830Sdim if (!pointsToConstantGlobal(MI->getSource())) 133243830Sdim return false; 134243830Sdim 135243830Sdim // Otherwise, the transform is safe. Remember the copy instruction. 136243830Sdim TheCopy = MI; 137243830Sdim } 138243830Sdim return true; 139243830Sdim} 140243830Sdim 141243830Sdim/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only 142243830Sdim/// modified by a copy from a constant global. If we can prove this, we can 143243830Sdim/// replace any uses of the alloca with uses of the global directly. 144243830Sdimstatic MemTransferInst * 145243830SdimisOnlyCopiedFromConstantGlobal(AllocaInst *AI, 146243830Sdim SmallVectorImpl<Instruction *> &ToDelete) { 147243830Sdim MemTransferInst *TheCopy = 0; 148243830Sdim if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete)) 149243830Sdim return TheCopy; 150243830Sdim return 0; 151243830Sdim} 152243830Sdim 153243830Sdim/// getPointeeAlignment - Compute the minimum alignment of the value pointed 154243830Sdim/// to by the given pointer. 155243830Sdimstatic unsigned getPointeeAlignment(Value *V, const DataLayout &TD) { 156243830Sdim if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) 157243830Sdim if (CE->getOpcode() == Instruction::BitCast || 158243830Sdim (CE->getOpcode() == Instruction::GetElementPtr && 159243830Sdim cast<GEPOperator>(CE)->hasAllZeroIndices())) 160243830Sdim return getPointeeAlignment(CE->getOperand(0), TD); 161243830Sdim 162243830Sdim if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) 163243830Sdim if (!GV->isDeclaration()) 164243830Sdim return TD.getPreferredAlignment(GV); 165243830Sdim 166243830Sdim if (PointerType *PT = dyn_cast<PointerType>(V->getType())) 167243830Sdim if (PT->getElementType()->isSized()) 168243830Sdim return TD.getABITypeAlignment(PT->getElementType()); 169243830Sdim 170243830Sdim return 0; 171243830Sdim} 172243830Sdim 173202375SrdivackyInstruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { 174210299Sed // Ensure that the alloca array size argument has type intptr_t, so that 175210299Sed // any casting is exposed early. 176210299Sed if (TD) { 177226633Sdim Type *IntPtrTy = TD->getIntPtrType(AI.getContext()); 178210299Sed if (AI.getArraySize()->getType() != IntPtrTy) { 179210299Sed Value *V = Builder->CreateIntCast(AI.getArraySize(), 180210299Sed IntPtrTy, false); 181210299Sed AI.setOperand(0, V); 182210299Sed return &AI; 183210299Sed } 184210299Sed } 185210299Sed 186202375Srdivacky // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 187202375Srdivacky if (AI.isArrayAllocation()) { // Check C != 1 188202375Srdivacky if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { 189226633Sdim Type *NewTy = 190202375Srdivacky ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); 191202375Srdivacky AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName()); 192202375Srdivacky New->setAlignment(AI.getAlignment()); 193202375Srdivacky 194202375Srdivacky // Scan to the end of the allocation instructions, to skip over a block of 195202375Srdivacky // allocas if possible...also skip interleaved debug info 196202375Srdivacky // 197202375Srdivacky BasicBlock::iterator It = New; 198202375Srdivacky while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It; 199202375Srdivacky 200202375Srdivacky // Now that I is pointing to the first non-allocation-inst in the block, 201202375Srdivacky // insert our getelementptr instruction... 202202375Srdivacky // 203202375Srdivacky Value *NullIdx =Constant::getNullValue(Type::getInt32Ty(AI.getContext())); 204202375Srdivacky Value *Idx[2]; 205202375Srdivacky Idx[0] = NullIdx; 206202375Srdivacky Idx[1] = NullIdx; 207223017Sdim Instruction *GEP = 208226633Sdim GetElementPtrInst::CreateInBounds(New, Idx, New->getName()+".sub"); 209223017Sdim InsertNewInstBefore(GEP, *It); 210202375Srdivacky 211202375Srdivacky // Now make everything use the getelementptr instead of the original 212202375Srdivacky // allocation. 213223017Sdim return ReplaceInstUsesWith(AI, GEP); 214202375Srdivacky } else if (isa<UndefValue>(AI.getArraySize())) { 215202375Srdivacky return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); 216202375Srdivacky } 217202375Srdivacky } 218202375Srdivacky 219239462Sdim if (TD && AI.getAllocatedType()->isSized()) { 220202375Srdivacky // If the alignment is 0 (unspecified), assign it the preferred alignment. 221202375Srdivacky if (AI.getAlignment() == 0) 222202375Srdivacky AI.setAlignment(TD->getPrefTypeAlignment(AI.getAllocatedType())); 223239462Sdim 224239462Sdim // Move all alloca's of zero byte objects to the entry block and merge them 225239462Sdim // together. Note that we only do this for alloca's, because malloc should 226239462Sdim // allocate and return a unique pointer, even for a zero byte allocation. 227239462Sdim if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0) { 228239462Sdim // For a zero sized alloca there is no point in doing an array allocation. 229239462Sdim // This is helpful if the array size is a complicated expression not used 230239462Sdim // elsewhere. 231239462Sdim if (AI.isArrayAllocation()) { 232239462Sdim AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1)); 233239462Sdim return &AI; 234239462Sdim } 235239462Sdim 236239462Sdim // Get the first instruction in the entry block. 237239462Sdim BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock(); 238239462Sdim Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg(); 239239462Sdim if (FirstInst != &AI) { 240239462Sdim // If the entry block doesn't start with a zero-size alloca then move 241239462Sdim // this one to the start of the entry block. There is no problem with 242239462Sdim // dominance as the array size was forced to a constant earlier already. 243239462Sdim AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst); 244239462Sdim if (!EntryAI || !EntryAI->getAllocatedType()->isSized() || 245239462Sdim TD->getTypeAllocSize(EntryAI->getAllocatedType()) != 0) { 246239462Sdim AI.moveBefore(FirstInst); 247239462Sdim return &AI; 248239462Sdim } 249239462Sdim 250243830Sdim // If the alignment of the entry block alloca is 0 (unspecified), 251243830Sdim // assign it the preferred alignment. 252243830Sdim if (EntryAI->getAlignment() == 0) 253243830Sdim EntryAI->setAlignment( 254243830Sdim TD->getPrefTypeAlignment(EntryAI->getAllocatedType())); 255239462Sdim // Replace this zero-sized alloca with the one at the start of the entry 256239462Sdim // block after ensuring that the address will be aligned enough for both 257239462Sdim // types. 258243830Sdim unsigned MaxAlign = std::max(EntryAI->getAlignment(), 259243830Sdim AI.getAlignment()); 260239462Sdim EntryAI->setAlignment(MaxAlign); 261239462Sdim if (AI.getType() != EntryAI->getType()) 262239462Sdim return new BitCastInst(EntryAI, AI.getType()); 263239462Sdim return ReplaceInstUsesWith(AI, EntryAI); 264239462Sdim } 265239462Sdim } 266202375Srdivacky } 267202375Srdivacky 268243830Sdim if (TD) { 269243830Sdim // Check to see if this allocation is only modified by a memcpy/memmove from 270243830Sdim // a constant global whose alignment is equal to or exceeds that of the 271243830Sdim // allocation. If this is the case, we can change all users to use 272243830Sdim // the constant global instead. This is commonly produced by the CFE by 273243830Sdim // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' 274243830Sdim // is only subsequently read. 275243830Sdim SmallVector<Instruction *, 4> ToDelete; 276243830Sdim if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { 277243830Sdim if (AI.getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) { 278243830Sdim DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); 279243830Sdim DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); 280243830Sdim for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) 281243830Sdim EraseInstFromFunction(*ToDelete[i]); 282243830Sdim Constant *TheSrc = cast<Constant>(Copy->getSource()); 283243830Sdim Instruction *NewI 284243830Sdim = ReplaceInstUsesWith(AI, ConstantExpr::getBitCast(TheSrc, 285243830Sdim AI.getType())); 286243830Sdim EraseInstFromFunction(*Copy); 287243830Sdim ++NumGlobalCopies; 288243830Sdim return NewI; 289243830Sdim } 290243830Sdim } 291243830Sdim } 292243830Sdim 293239462Sdim // At last, use the generic allocation site handler to aggressively remove 294239462Sdim // unused allocas. 295239462Sdim return visitAllocSite(AI); 296202375Srdivacky} 297202375Srdivacky 298202375Srdivacky 299202375Srdivacky/// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible. 300202375Srdivackystatic Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, 301243830Sdim const DataLayout *TD) { 302202375Srdivacky User *CI = cast<User>(LI.getOperand(0)); 303202375Srdivacky Value *CastOp = CI->getOperand(0); 304202375Srdivacky 305226633Sdim PointerType *DestTy = cast<PointerType>(CI->getType()); 306226633Sdim Type *DestPTy = DestTy->getElementType(); 307226633Sdim if (PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) { 308202375Srdivacky 309202375Srdivacky // If the address spaces don't match, don't eliminate the cast. 310202375Srdivacky if (DestTy->getAddressSpace() != SrcTy->getAddressSpace()) 311202375Srdivacky return 0; 312202375Srdivacky 313226633Sdim Type *SrcPTy = SrcTy->getElementType(); 314202375Srdivacky 315204642Srdivacky if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() || 316204642Srdivacky DestPTy->isVectorTy()) { 317202375Srdivacky // If the source is an array, the code below will not succeed. Check to 318202375Srdivacky // see if a trivial 'gep P, 0, 0' will help matters. Only do this for 319202375Srdivacky // constants. 320226633Sdim if (ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy)) 321202375Srdivacky if (Constant *CSrc = dyn_cast<Constant>(CastOp)) 322202375Srdivacky if (ASrcTy->getNumElements() != 0) { 323202375Srdivacky Value *Idxs[2]; 324202375Srdivacky Idxs[0] = Constant::getNullValue(Type::getInt32Ty(LI.getContext())); 325202375Srdivacky Idxs[1] = Idxs[0]; 326226633Sdim CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs); 327202375Srdivacky SrcTy = cast<PointerType>(CastOp->getType()); 328202375Srdivacky SrcPTy = SrcTy->getElementType(); 329202375Srdivacky } 330202375Srdivacky 331243830Sdim if (IC.getDataLayout() && 332204642Srdivacky (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() || 333204642Srdivacky SrcPTy->isVectorTy()) && 334202375Srdivacky // Do not allow turning this into a load of an integer, which is then 335202375Srdivacky // casted to a pointer, this pessimizes pointer analysis a lot. 336204642Srdivacky (SrcPTy->isPointerTy() == LI.getType()->isPointerTy()) && 337243830Sdim IC.getDataLayout()->getTypeSizeInBits(SrcPTy) == 338243830Sdim IC.getDataLayout()->getTypeSizeInBits(DestPTy)) { 339202375Srdivacky 340202375Srdivacky // Okay, we are casting from one integer or pointer type to another of 341202375Srdivacky // the same size. Instead of casting the pointer before the load, cast 342202375Srdivacky // the result of the loaded value. 343203954Srdivacky LoadInst *NewLoad = 344202375Srdivacky IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName()); 345203954Srdivacky NewLoad->setAlignment(LI.getAlignment()); 346226633Sdim NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope()); 347202375Srdivacky // Now cast the result of the load. 348202375Srdivacky return new BitCastInst(NewLoad, LI.getType()); 349202375Srdivacky } 350202375Srdivacky } 351202375Srdivacky } 352202375Srdivacky return 0; 353202375Srdivacky} 354202375Srdivacky 355202375SrdivackyInstruction *InstCombiner::visitLoadInst(LoadInst &LI) { 356202375Srdivacky Value *Op = LI.getOperand(0); 357202375Srdivacky 358202375Srdivacky // Attempt to improve the alignment. 359202375Srdivacky if (TD) { 360202375Srdivacky unsigned KnownAlign = 361218893Sdim getOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()),TD); 362212904Sdim unsigned LoadAlign = LI.getAlignment(); 363212904Sdim unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign : 364212904Sdim TD->getABITypeAlignment(LI.getType()); 365212904Sdim 366212904Sdim if (KnownAlign > EffectiveLoadAlign) 367202375Srdivacky LI.setAlignment(KnownAlign); 368212904Sdim else if (LoadAlign == 0) 369212904Sdim LI.setAlignment(EffectiveLoadAlign); 370202375Srdivacky } 371202375Srdivacky 372202375Srdivacky // load (cast X) --> cast (load X) iff safe. 373202375Srdivacky if (isa<CastInst>(Op)) 374202375Srdivacky if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) 375202375Srdivacky return Res; 376202375Srdivacky 377226633Sdim // None of the following transforms are legal for volatile/atomic loads. 378226633Sdim // FIXME: Some of it is okay for atomic loads; needs refactoring. 379226633Sdim if (!LI.isSimple()) return 0; 380202375Srdivacky 381202375Srdivacky // Do really simple store-to-load forwarding and load CSE, to catch cases 382218893Sdim // where there are several consecutive memory accesses to the same location, 383202375Srdivacky // separated by a few arithmetic operations. 384202375Srdivacky BasicBlock::iterator BBI = &LI; 385202375Srdivacky if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6)) 386202375Srdivacky return ReplaceInstUsesWith(LI, AvailableVal); 387202375Srdivacky 388202375Srdivacky // load(gep null, ...) -> unreachable 389202375Srdivacky if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) { 390202375Srdivacky const Value *GEPI0 = GEPI->getOperand(0); 391202375Srdivacky // TODO: Consider a target hook for valid address spaces for this xform. 392202375Srdivacky if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){ 393202375Srdivacky // Insert a new store to null instruction before the load to indicate 394202375Srdivacky // that this code is not reachable. We do this instead of inserting 395202375Srdivacky // an unreachable instruction directly because we cannot modify the 396202375Srdivacky // CFG. 397202375Srdivacky new StoreInst(UndefValue::get(LI.getType()), 398202375Srdivacky Constant::getNullValue(Op->getType()), &LI); 399202375Srdivacky return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); 400202375Srdivacky } 401202375Srdivacky } 402202375Srdivacky 403202375Srdivacky // load null/undef -> unreachable 404202375Srdivacky // TODO: Consider a target hook for valid address spaces for this xform. 405202375Srdivacky if (isa<UndefValue>(Op) || 406202375Srdivacky (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) { 407202375Srdivacky // Insert a new store to null instruction before the load to indicate that 408202375Srdivacky // this code is not reachable. We do this instead of inserting an 409202375Srdivacky // unreachable instruction directly because we cannot modify the CFG. 410202375Srdivacky new StoreInst(UndefValue::get(LI.getType()), 411202375Srdivacky Constant::getNullValue(Op->getType()), &LI); 412202375Srdivacky return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); 413202375Srdivacky } 414202375Srdivacky 415202375Srdivacky // Instcombine load (constantexpr_cast global) -> cast (load global) 416202375Srdivacky if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op)) 417202375Srdivacky if (CE->isCast()) 418202375Srdivacky if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) 419202375Srdivacky return Res; 420202375Srdivacky 421202375Srdivacky if (Op->hasOneUse()) { 422202375Srdivacky // Change select and PHI nodes to select values instead of addresses: this 423202375Srdivacky // helps alias analysis out a lot, allows many others simplifications, and 424202375Srdivacky // exposes redundancy in the code. 425202375Srdivacky // 426202375Srdivacky // Note that we cannot do the transformation unless we know that the 427202375Srdivacky // introduced loads cannot trap! Something like this is valid as long as 428202375Srdivacky // the condition is always false: load (select bool %C, int* null, int* %G), 429202375Srdivacky // but it would not be valid if we transformed it to load from null 430202375Srdivacky // unconditionally. 431202375Srdivacky // 432202375Srdivacky if (SelectInst *SI = dyn_cast<SelectInst>(Op)) { 433202375Srdivacky // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2). 434203954Srdivacky unsigned Align = LI.getAlignment(); 435203954Srdivacky if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, TD) && 436203954Srdivacky isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, TD)) { 437203954Srdivacky LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1), 438203954Srdivacky SI->getOperand(1)->getName()+".val"); 439203954Srdivacky LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2), 440203954Srdivacky SI->getOperand(2)->getName()+".val"); 441203954Srdivacky V1->setAlignment(Align); 442203954Srdivacky V2->setAlignment(Align); 443202375Srdivacky return SelectInst::Create(SI->getCondition(), V1, V2); 444202375Srdivacky } 445202375Srdivacky 446202375Srdivacky // load (select (cond, null, P)) -> load P 447202375Srdivacky if (Constant *C = dyn_cast<Constant>(SI->getOperand(1))) 448202375Srdivacky if (C->isNullValue()) { 449202375Srdivacky LI.setOperand(0, SI->getOperand(2)); 450202375Srdivacky return &LI; 451202375Srdivacky } 452202375Srdivacky 453202375Srdivacky // load (select (cond, P, null)) -> load P 454202375Srdivacky if (Constant *C = dyn_cast<Constant>(SI->getOperand(2))) 455202375Srdivacky if (C->isNullValue()) { 456202375Srdivacky LI.setOperand(0, SI->getOperand(1)); 457202375Srdivacky return &LI; 458202375Srdivacky } 459202375Srdivacky } 460202375Srdivacky } 461202375Srdivacky return 0; 462202375Srdivacky} 463202375Srdivacky 464202375Srdivacky/// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P 465202375Srdivacky/// when possible. This makes it generally easy to do alias analysis and/or 466202375Srdivacky/// SROA/mem2reg of the memory object. 467202375Srdivackystatic Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { 468202375Srdivacky User *CI = cast<User>(SI.getOperand(1)); 469202375Srdivacky Value *CastOp = CI->getOperand(0); 470202375Srdivacky 471226633Sdim Type *DestPTy = cast<PointerType>(CI->getType())->getElementType(); 472226633Sdim PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType()); 473202375Srdivacky if (SrcTy == 0) return 0; 474202375Srdivacky 475226633Sdim Type *SrcPTy = SrcTy->getElementType(); 476202375Srdivacky 477204642Srdivacky if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy()) 478202375Srdivacky return 0; 479202375Srdivacky 480202375Srdivacky /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep" 481202375Srdivacky /// to its first element. This allows us to handle things like: 482202375Srdivacky /// store i32 xxx, (bitcast {foo*, float}* %P to i32*) 483202375Srdivacky /// on 32-bit hosts. 484202375Srdivacky SmallVector<Value*, 4> NewGEPIndices; 485202375Srdivacky 486202375Srdivacky // If the source is an array, the code below will not succeed. Check to 487202375Srdivacky // see if a trivial 'gep P, 0, 0' will help matters. Only do this for 488202375Srdivacky // constants. 489204642Srdivacky if (SrcPTy->isArrayTy() || SrcPTy->isStructTy()) { 490202375Srdivacky // Index through pointer. 491202375Srdivacky Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext())); 492202375Srdivacky NewGEPIndices.push_back(Zero); 493202375Srdivacky 494202375Srdivacky while (1) { 495226633Sdim if (StructType *STy = dyn_cast<StructType>(SrcPTy)) { 496202375Srdivacky if (!STy->getNumElements()) /* Struct can be empty {} */ 497202375Srdivacky break; 498202375Srdivacky NewGEPIndices.push_back(Zero); 499202375Srdivacky SrcPTy = STy->getElementType(0); 500226633Sdim } else if (ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) { 501202375Srdivacky NewGEPIndices.push_back(Zero); 502202375Srdivacky SrcPTy = ATy->getElementType(); 503202375Srdivacky } else { 504202375Srdivacky break; 505202375Srdivacky } 506202375Srdivacky } 507202375Srdivacky 508202375Srdivacky SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace()); 509202375Srdivacky } 510202375Srdivacky 511204642Srdivacky if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy()) 512202375Srdivacky return 0; 513202375Srdivacky 514202375Srdivacky // If the pointers point into different address spaces or if they point to 515202375Srdivacky // values with different sizes, we can't do the transformation. 516243830Sdim if (!IC.getDataLayout() || 517202375Srdivacky SrcTy->getAddressSpace() != 518202375Srdivacky cast<PointerType>(CI->getType())->getAddressSpace() || 519243830Sdim IC.getDataLayout()->getTypeSizeInBits(SrcPTy) != 520243830Sdim IC.getDataLayout()->getTypeSizeInBits(DestPTy)) 521202375Srdivacky return 0; 522202375Srdivacky 523202375Srdivacky // Okay, we are casting from one integer or pointer type to another of 524202375Srdivacky // the same size. Instead of casting the pointer before 525202375Srdivacky // the store, cast the value to be stored. 526202375Srdivacky Value *NewCast; 527202375Srdivacky Value *SIOp0 = SI.getOperand(0); 528202375Srdivacky Instruction::CastOps opcode = Instruction::BitCast; 529226633Sdim Type* CastSrcTy = SIOp0->getType(); 530226633Sdim Type* CastDstTy = SrcPTy; 531204642Srdivacky if (CastDstTy->isPointerTy()) { 532203954Srdivacky if (CastSrcTy->isIntegerTy()) 533202375Srdivacky opcode = Instruction::IntToPtr; 534204642Srdivacky } else if (CastDstTy->isIntegerTy()) { 535204642Srdivacky if (SIOp0->getType()->isPointerTy()) 536202375Srdivacky opcode = Instruction::PtrToInt; 537202375Srdivacky } 538202375Srdivacky 539202375Srdivacky // SIOp0 is a pointer to aggregate and this is a store to the first field, 540202375Srdivacky // emit a GEP to index into its first field. 541202375Srdivacky if (!NewGEPIndices.empty()) 542226633Sdim CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices); 543202375Srdivacky 544202375Srdivacky NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy, 545202375Srdivacky SIOp0->getName()+".c"); 546218893Sdim SI.setOperand(0, NewCast); 547218893Sdim SI.setOperand(1, CastOp); 548218893Sdim return &SI; 549202375Srdivacky} 550202375Srdivacky 551202375Srdivacky/// equivalentAddressValues - Test if A and B will obviously have the same 552202375Srdivacky/// value. This includes recognizing that %t0 and %t1 will have the same 553202375Srdivacky/// value in code like this: 554202375Srdivacky/// %t0 = getelementptr \@a, 0, 3 555202375Srdivacky/// store i32 0, i32* %t0 556202375Srdivacky/// %t1 = getelementptr \@a, 0, 3 557202375Srdivacky/// %t2 = load i32* %t1 558202375Srdivacky/// 559202375Srdivackystatic bool equivalentAddressValues(Value *A, Value *B) { 560202375Srdivacky // Test if the values are trivially equivalent. 561202375Srdivacky if (A == B) return true; 562202375Srdivacky 563202375Srdivacky // Test if the values come form identical arithmetic instructions. 564202375Srdivacky // This uses isIdenticalToWhenDefined instead of isIdenticalTo because 565202375Srdivacky // its only used to compare two uses within the same basic block, which 566202375Srdivacky // means that they'll always either have the same value or one of them 567202375Srdivacky // will have an undefined value. 568202375Srdivacky if (isa<BinaryOperator>(A) || 569202375Srdivacky isa<CastInst>(A) || 570202375Srdivacky isa<PHINode>(A) || 571202375Srdivacky isa<GetElementPtrInst>(A)) 572202375Srdivacky if (Instruction *BI = dyn_cast<Instruction>(B)) 573202375Srdivacky if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI)) 574202375Srdivacky return true; 575202375Srdivacky 576202375Srdivacky // Otherwise they may not be equivalent. 577202375Srdivacky return false; 578202375Srdivacky} 579202375Srdivacky 580202375SrdivackyInstruction *InstCombiner::visitStoreInst(StoreInst &SI) { 581202375Srdivacky Value *Val = SI.getOperand(0); 582202375Srdivacky Value *Ptr = SI.getOperand(1); 583202375Srdivacky 584202375Srdivacky // Attempt to improve the alignment. 585202375Srdivacky if (TD) { 586202375Srdivacky unsigned KnownAlign = 587218893Sdim getOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()), 588218893Sdim TD); 589212904Sdim unsigned StoreAlign = SI.getAlignment(); 590212904Sdim unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign : 591212904Sdim TD->getABITypeAlignment(Val->getType()); 592212904Sdim 593212904Sdim if (KnownAlign > EffectiveStoreAlign) 594202375Srdivacky SI.setAlignment(KnownAlign); 595212904Sdim else if (StoreAlign == 0) 596212904Sdim SI.setAlignment(EffectiveStoreAlign); 597202375Srdivacky } 598202375Srdivacky 599226633Sdim // Don't hack volatile/atomic stores. 600226633Sdim // FIXME: Some bits are legal for atomic stores; needs refactoring. 601226633Sdim if (!SI.isSimple()) return 0; 602226633Sdim 603226633Sdim // If the RHS is an alloca with a single use, zapify the store, making the 604226633Sdim // alloca dead. 605226633Sdim if (Ptr->hasOneUse()) { 606226633Sdim if (isa<AllocaInst>(Ptr)) 607226633Sdim return EraseInstFromFunction(SI); 608226633Sdim if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) { 609226633Sdim if (isa<AllocaInst>(GEP->getOperand(0))) { 610226633Sdim if (GEP->getOperand(0)->hasOneUse()) 611226633Sdim return EraseInstFromFunction(SI); 612226633Sdim } 613226633Sdim } 614226633Sdim } 615226633Sdim 616202375Srdivacky // Do really simple DSE, to catch cases where there are several consecutive 617202375Srdivacky // stores to the same location, separated by a few arithmetic operations. This 618202375Srdivacky // situation often occurs with bitfield accesses. 619202375Srdivacky BasicBlock::iterator BBI = &SI; 620202375Srdivacky for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts; 621202375Srdivacky --ScanInsts) { 622202375Srdivacky --BBI; 623202375Srdivacky // Don't count debug info directives, lest they affect codegen, 624202375Srdivacky // and we skip pointer-to-pointer bitcasts, which are NOPs. 625202375Srdivacky if (isa<DbgInfoIntrinsic>(BBI) || 626204642Srdivacky (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { 627202375Srdivacky ScanInsts++; 628202375Srdivacky continue; 629202375Srdivacky } 630202375Srdivacky 631202375Srdivacky if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) { 632202375Srdivacky // Prev store isn't volatile, and stores to the same location? 633226633Sdim if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1), 634226633Sdim SI.getOperand(1))) { 635202375Srdivacky ++NumDeadStore; 636202375Srdivacky ++BBI; 637202375Srdivacky EraseInstFromFunction(*PrevSI); 638202375Srdivacky continue; 639202375Srdivacky } 640202375Srdivacky break; 641202375Srdivacky } 642202375Srdivacky 643202375Srdivacky // If this is a load, we have to stop. However, if the loaded value is from 644202375Srdivacky // the pointer we're loading and is producing the pointer we're storing, 645202375Srdivacky // then *this* store is dead (X = load P; store X -> P). 646202375Srdivacky if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { 647202375Srdivacky if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) && 648226633Sdim LI->isSimple()) 649202375Srdivacky return EraseInstFromFunction(SI); 650202375Srdivacky 651202375Srdivacky // Otherwise, this is a load from some other location. Stores before it 652202375Srdivacky // may not be dead. 653202375Srdivacky break; 654202375Srdivacky } 655202375Srdivacky 656202375Srdivacky // Don't skip over loads or things that can modify memory. 657202375Srdivacky if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory()) 658202375Srdivacky break; 659202375Srdivacky } 660202375Srdivacky 661202375Srdivacky // store X, null -> turns into 'unreachable' in SimplifyCFG 662202375Srdivacky if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) { 663202375Srdivacky if (!isa<UndefValue>(Val)) { 664202375Srdivacky SI.setOperand(0, UndefValue::get(Val->getType())); 665202375Srdivacky if (Instruction *U = dyn_cast<Instruction>(Val)) 666202375Srdivacky Worklist.Add(U); // Dropped a use. 667202375Srdivacky } 668202375Srdivacky return 0; // Do not modify these! 669202375Srdivacky } 670202375Srdivacky 671202375Srdivacky // store undef, Ptr -> noop 672202375Srdivacky if (isa<UndefValue>(Val)) 673202375Srdivacky return EraseInstFromFunction(SI); 674202375Srdivacky 675202375Srdivacky // If the pointer destination is a cast, see if we can fold the cast into the 676202375Srdivacky // source instead. 677202375Srdivacky if (isa<CastInst>(Ptr)) 678202375Srdivacky if (Instruction *Res = InstCombineStoreToCast(*this, SI)) 679202375Srdivacky return Res; 680202375Srdivacky if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) 681202375Srdivacky if (CE->isCast()) 682202375Srdivacky if (Instruction *Res = InstCombineStoreToCast(*this, SI)) 683202375Srdivacky return Res; 684202375Srdivacky 685202375Srdivacky 686202375Srdivacky // If this store is the last instruction in the basic block (possibly 687202878Srdivacky // excepting debug info instructions), and if the block ends with an 688202878Srdivacky // unconditional branch, try to move it to the successor block. 689202375Srdivacky BBI = &SI; 690202375Srdivacky do { 691202375Srdivacky ++BBI; 692202375Srdivacky } while (isa<DbgInfoIntrinsic>(BBI) || 693204642Srdivacky (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())); 694202375Srdivacky if (BranchInst *BI = dyn_cast<BranchInst>(BBI)) 695202375Srdivacky if (BI->isUnconditional()) 696202375Srdivacky if (SimplifyStoreAtEndOfBlock(SI)) 697202375Srdivacky return 0; // xform done! 698202375Srdivacky 699202375Srdivacky return 0; 700202375Srdivacky} 701202375Srdivacky 702202375Srdivacky/// SimplifyStoreAtEndOfBlock - Turn things like: 703202375Srdivacky/// if () { *P = v1; } else { *P = v2 } 704202375Srdivacky/// into a phi node with a store in the successor. 705202375Srdivacky/// 706202375Srdivacky/// Simplify things like: 707202375Srdivacky/// *P = v1; if () { *P = v2; } 708202375Srdivacky/// into a phi node with a store in the successor. 709202375Srdivacky/// 710202375Srdivackybool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { 711202375Srdivacky BasicBlock *StoreBB = SI.getParent(); 712202375Srdivacky 713202375Srdivacky // Check to see if the successor block has exactly two incoming edges. If 714202375Srdivacky // so, see if the other predecessor contains a store to the same location. 715202375Srdivacky // if so, insert a PHI node (if needed) and move the stores down. 716202375Srdivacky BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0); 717202375Srdivacky 718202375Srdivacky // Determine whether Dest has exactly two predecessors and, if so, compute 719202375Srdivacky // the other predecessor. 720202375Srdivacky pred_iterator PI = pred_begin(DestBB); 721210299Sed BasicBlock *P = *PI; 722202375Srdivacky BasicBlock *OtherBB = 0; 723210299Sed 724210299Sed if (P != StoreBB) 725210299Sed OtherBB = P; 726210299Sed 727210299Sed if (++PI == pred_end(DestBB)) 728202375Srdivacky return false; 729202375Srdivacky 730210299Sed P = *PI; 731210299Sed if (P != StoreBB) { 732202375Srdivacky if (OtherBB) 733202375Srdivacky return false; 734210299Sed OtherBB = P; 735202375Srdivacky } 736202375Srdivacky if (++PI != pred_end(DestBB)) 737202375Srdivacky return false; 738202375Srdivacky 739202375Srdivacky // Bail out if all the relevant blocks aren't distinct (this can happen, 740202375Srdivacky // for example, if SI is in an infinite loop) 741202375Srdivacky if (StoreBB == DestBB || OtherBB == DestBB) 742202375Srdivacky return false; 743202375Srdivacky 744202375Srdivacky // Verify that the other block ends in a branch and is not otherwise empty. 745202375Srdivacky BasicBlock::iterator BBI = OtherBB->getTerminator(); 746202375Srdivacky BranchInst *OtherBr = dyn_cast<BranchInst>(BBI); 747202375Srdivacky if (!OtherBr || BBI == OtherBB->begin()) 748202375Srdivacky return false; 749202375Srdivacky 750202375Srdivacky // If the other block ends in an unconditional branch, check for the 'if then 751202375Srdivacky // else' case. there is an instruction before the branch. 752202375Srdivacky StoreInst *OtherStore = 0; 753202375Srdivacky if (OtherBr->isUnconditional()) { 754202375Srdivacky --BBI; 755202375Srdivacky // Skip over debugging info. 756202375Srdivacky while (isa<DbgInfoIntrinsic>(BBI) || 757204642Srdivacky (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { 758202375Srdivacky if (BBI==OtherBB->begin()) 759202375Srdivacky return false; 760202375Srdivacky --BBI; 761202375Srdivacky } 762226633Sdim // If this isn't a store, isn't a store to the same location, or is not the 763226633Sdim // right kind of store, bail out. 764202375Srdivacky OtherStore = dyn_cast<StoreInst>(BBI); 765202375Srdivacky if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) || 766226633Sdim !SI.isSameOperationAs(OtherStore)) 767202375Srdivacky return false; 768202375Srdivacky } else { 769202375Srdivacky // Otherwise, the other block ended with a conditional branch. If one of the 770202375Srdivacky // destinations is StoreBB, then we have the if/then case. 771202375Srdivacky if (OtherBr->getSuccessor(0) != StoreBB && 772202375Srdivacky OtherBr->getSuccessor(1) != StoreBB) 773202375Srdivacky return false; 774202375Srdivacky 775202375Srdivacky // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an 776202375Srdivacky // if/then triangle. See if there is a store to the same ptr as SI that 777202375Srdivacky // lives in OtherBB. 778202375Srdivacky for (;; --BBI) { 779202375Srdivacky // Check to see if we find the matching store. 780202375Srdivacky if ((OtherStore = dyn_cast<StoreInst>(BBI))) { 781202375Srdivacky if (OtherStore->getOperand(1) != SI.getOperand(1) || 782226633Sdim !SI.isSameOperationAs(OtherStore)) 783202375Srdivacky return false; 784202375Srdivacky break; 785202375Srdivacky } 786202375Srdivacky // If we find something that may be using or overwriting the stored 787202375Srdivacky // value, or if we run out of instructions, we can't do the xform. 788202375Srdivacky if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() || 789202375Srdivacky BBI == OtherBB->begin()) 790202375Srdivacky return false; 791202375Srdivacky } 792202375Srdivacky 793202375Srdivacky // In order to eliminate the store in OtherBr, we have to 794202375Srdivacky // make sure nothing reads or overwrites the stored value in 795202375Srdivacky // StoreBB. 796202375Srdivacky for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) { 797202375Srdivacky // FIXME: This should really be AA driven. 798202375Srdivacky if (I->mayReadFromMemory() || I->mayWriteToMemory()) 799202375Srdivacky return false; 800202375Srdivacky } 801202375Srdivacky } 802202375Srdivacky 803202375Srdivacky // Insert a PHI node now if we need it. 804202375Srdivacky Value *MergedVal = OtherStore->getOperand(0); 805202375Srdivacky if (MergedVal != SI.getOperand(0)) { 806221345Sdim PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge"); 807202375Srdivacky PN->addIncoming(SI.getOperand(0), SI.getParent()); 808202375Srdivacky PN->addIncoming(OtherStore->getOperand(0), OtherBB); 809202375Srdivacky MergedVal = InsertNewInstBefore(PN, DestBB->front()); 810202375Srdivacky } 811202375Srdivacky 812202375Srdivacky // Advance to a place where it is safe to insert the new store and 813202375Srdivacky // insert it. 814226633Sdim BBI = DestBB->getFirstInsertionPt(); 815223017Sdim StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1), 816226633Sdim SI.isVolatile(), 817226633Sdim SI.getAlignment(), 818226633Sdim SI.getOrdering(), 819226633Sdim SI.getSynchScope()); 820223017Sdim InsertNewInstBefore(NewSI, *BBI); 821223017Sdim NewSI->setDebugLoc(OtherStore->getDebugLoc()); 822223017Sdim 823202375Srdivacky // Nuke the old stores. 824202375Srdivacky EraseInstFromFunction(SI); 825202375Srdivacky EraseInstFromFunction(*OtherStore); 826202375Srdivacky return true; 827202375Srdivacky} 828