1202375Srdivacky//===- InstCombineLoadStoreAlloca.cpp -------------------------------------===// 2202375Srdivacky// 3202375Srdivacky// The LLVM Compiler Infrastructure 4202375Srdivacky// 5202375Srdivacky// This file is distributed under the University of Illinois Open Source 6202375Srdivacky// License. See LICENSE.TXT for details. 7202375Srdivacky// 8202375Srdivacky//===----------------------------------------------------------------------===// 9202375Srdivacky// 10202375Srdivacky// This file implements the visit functions for load, store and alloca. 11202375Srdivacky// 12202375Srdivacky//===----------------------------------------------------------------------===// 13202375Srdivacky 14202375Srdivacky#include "InstCombine.h" 15249423Sdim#include "llvm/ADT/Statistic.h" 16210299Sed#include "llvm/Analysis/Loads.h" 17249423Sdim#include "llvm/IR/DataLayout.h" 18249423Sdim#include "llvm/IR/IntrinsicInst.h" 19202375Srdivacky#include "llvm/Transforms/Utils/BasicBlockUtils.h" 20202375Srdivacky#include "llvm/Transforms/Utils/Local.h" 21202375Srdivackyusing namespace llvm; 22202375Srdivacky 23243830SdimSTATISTIC(NumDeadStore, "Number of dead stores eliminated"); 24243830SdimSTATISTIC(NumGlobalCopies, "Number of allocas copied from constant global"); 25202375Srdivacky 26243830Sdim/// pointsToConstantGlobal - Return true if V (possibly indirectly) points to 27243830Sdim/// some part of a constant global variable. This intentionally only accepts 28243830Sdim/// constant expressions because we can't rewrite arbitrary instructions. 29243830Sdimstatic bool pointsToConstantGlobal(Value *V) { 30243830Sdim if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) 31243830Sdim return GV->isConstant(); 32243830Sdim if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) 33243830Sdim if (CE->getOpcode() == Instruction::BitCast || 34243830Sdim CE->getOpcode() == Instruction::GetElementPtr) 35243830Sdim return pointsToConstantGlobal(CE->getOperand(0)); 36243830Sdim return false; 37243830Sdim} 38243830Sdim 39243830Sdim/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived) 40243830Sdim/// pointer to an alloca. Ignore any reads of the pointer, return false if we 41243830Sdim/// see any stores or other unknown uses. If we see pointer arithmetic, keep 42243830Sdim/// track of whether it moves the pointer (with IsOffset) but otherwise traverse 43243830Sdim/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to 44243830Sdim/// the alloca, and if the source pointer is a pointer to a constant global, we 45243830Sdim/// can optimize this. 46243830Sdimstatic bool 47243830SdimisOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, 48243830Sdim SmallVectorImpl<Instruction *> &ToDelete, 49243830Sdim bool IsOffset = false) { 50243830Sdim // We track lifetime intrinsics as we encounter them. If we decide to go 51243830Sdim // ahead and replace the value with the global, this lets the caller quickly 52243830Sdim // eliminate the markers. 53243830Sdim 54243830Sdim for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { 55243830Sdim User *U = cast<Instruction>(*UI); 56243830Sdim 57243830Sdim if (LoadInst *LI = dyn_cast<LoadInst>(U)) { 58243830Sdim // Ignore non-volatile loads, they are always ok. 59243830Sdim if (!LI->isSimple()) return false; 60243830Sdim continue; 61243830Sdim } 62243830Sdim 63243830Sdim if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { 64243830Sdim // If uses of the bitcast are ok, we are ok. 65243830Sdim if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, ToDelete, IsOffset)) 66243830Sdim return false; 67243830Sdim continue; 68243830Sdim } 69243830Sdim if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { 70243830Sdim // If the GEP has all zero indices, it doesn't offset the pointer. If it 71243830Sdim // doesn't, it does. 72251662Sdim if (!isOnlyCopiedFromConstantGlobal( 73251662Sdim GEP, TheCopy, ToDelete, IsOffset || !GEP->hasAllZeroIndices())) 74243830Sdim return false; 75243830Sdim continue; 76243830Sdim } 77243830Sdim 78243830Sdim if (CallSite CS = U) { 79243830Sdim // If this is the function being called then we treat it like a load and 80243830Sdim // ignore it. 81243830Sdim if (CS.isCallee(UI)) 82243830Sdim continue; 83243830Sdim 84243830Sdim // If this is a readonly/readnone call site, then we know it is just a 85243830Sdim // load (but one that potentially returns the value itself), so we can 86243830Sdim // ignore it if we know that the value isn't captured. 87243830Sdim unsigned ArgNo = CS.getArgumentNo(UI); 88243830Sdim if (CS.onlyReadsMemory() && 89243830Sdim (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo))) 90243830Sdim continue; 91243830Sdim 92243830Sdim // If this is being passed as a byval argument, the caller is making a 93243830Sdim // copy, so it is only a read of the alloca. 94243830Sdim if (CS.isByValArgument(ArgNo)) 95243830Sdim continue; 96243830Sdim } 97243830Sdim 98243830Sdim // Lifetime intrinsics can be handled by the caller. 99243830Sdim if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { 100243830Sdim if (II->getIntrinsicID() == Intrinsic::lifetime_start || 101243830Sdim II->getIntrinsicID() == Intrinsic::lifetime_end) { 102243830Sdim assert(II->use_empty() && "Lifetime markers have no result to use!"); 103243830Sdim ToDelete.push_back(II); 104243830Sdim continue; 105243830Sdim } 106243830Sdim } 107243830Sdim 108243830Sdim // If this is isn't our memcpy/memmove, reject it as something we can't 109243830Sdim // handle. 110243830Sdim MemTransferInst *MI = dyn_cast<MemTransferInst>(U); 111243830Sdim if (MI == 0) 112243830Sdim return false; 113243830Sdim 114243830Sdim // If the transfer is using the alloca as a source of the transfer, then 115243830Sdim // ignore it since it is a load (unless the transfer is volatile). 116243830Sdim if (UI.getOperandNo() == 1) { 117243830Sdim if (MI->isVolatile()) return false; 118243830Sdim continue; 119243830Sdim } 120243830Sdim 121243830Sdim // If we already have seen a copy, reject the second one. 122243830Sdim if (TheCopy) return false; 123243830Sdim 124243830Sdim // If the pointer has been offset from the start of the alloca, we can't 125243830Sdim // safely handle this. 126243830Sdim if (IsOffset) return false; 127243830Sdim 128243830Sdim // If the memintrinsic isn't using the alloca as the dest, reject it. 129243830Sdim if (UI.getOperandNo() != 0) return false; 130243830Sdim 131243830Sdim // If the source of the memcpy/move is not a constant global, reject it. 132243830Sdim if (!pointsToConstantGlobal(MI->getSource())) 133243830Sdim return false; 134243830Sdim 135243830Sdim // Otherwise, the transform is safe. Remember the copy instruction. 136243830Sdim TheCopy = MI; 137243830Sdim } 138243830Sdim return true; 139243830Sdim} 140243830Sdim 141243830Sdim/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only 142243830Sdim/// modified by a copy from a constant global. If we can prove this, we can 143243830Sdim/// replace any uses of the alloca with uses of the global directly. 144243830Sdimstatic MemTransferInst * 145243830SdimisOnlyCopiedFromConstantGlobal(AllocaInst *AI, 146243830Sdim SmallVectorImpl<Instruction *> &ToDelete) { 147243830Sdim MemTransferInst *TheCopy = 0; 148243830Sdim if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete)) 149243830Sdim return TheCopy; 150243830Sdim return 0; 151243830Sdim} 152243830Sdim 153202375SrdivackyInstruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { 154210299Sed // Ensure that the alloca array size argument has type intptr_t, so that 155210299Sed // any casting is exposed early. 156210299Sed if (TD) { 157263508Sdim Type *IntPtrTy = TD->getIntPtrType(AI.getType()); 158210299Sed if (AI.getArraySize()->getType() != IntPtrTy) { 159210299Sed Value *V = Builder->CreateIntCast(AI.getArraySize(), 160210299Sed IntPtrTy, false); 161210299Sed AI.setOperand(0, V); 162210299Sed return &AI; 163210299Sed } 164210299Sed } 165210299Sed 166202375Srdivacky // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 167202375Srdivacky if (AI.isArrayAllocation()) { // Check C != 1 168202375Srdivacky if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { 169251662Sdim Type *NewTy = 170202375Srdivacky ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); 171202375Srdivacky AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName()); 172202375Srdivacky New->setAlignment(AI.getAlignment()); 173202375Srdivacky 174202375Srdivacky // Scan to the end of the allocation instructions, to skip over a block of 175202375Srdivacky // allocas if possible...also skip interleaved debug info 176202375Srdivacky // 177202375Srdivacky BasicBlock::iterator It = New; 178202375Srdivacky while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It; 179202375Srdivacky 180202375Srdivacky // Now that I is pointing to the first non-allocation-inst in the block, 181202375Srdivacky // insert our getelementptr instruction... 182202375Srdivacky // 183263508Sdim Type *IdxTy = TD 184263508Sdim ? TD->getIntPtrType(AI.getType()) 185263508Sdim : Type::getInt64Ty(AI.getContext()); 186263508Sdim Value *NullIdx = Constant::getNullValue(IdxTy); 187263508Sdim Value *Idx[2] = { NullIdx, NullIdx }; 188223017Sdim Instruction *GEP = 189263508Sdim GetElementPtrInst::CreateInBounds(New, Idx, New->getName() + ".sub"); 190223017Sdim InsertNewInstBefore(GEP, *It); 191202375Srdivacky 192202375Srdivacky // Now make everything use the getelementptr instead of the original 193202375Srdivacky // allocation. 194223017Sdim return ReplaceInstUsesWith(AI, GEP); 195202375Srdivacky } else if (isa<UndefValue>(AI.getArraySize())) { 196202375Srdivacky return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); 197202375Srdivacky } 198202375Srdivacky } 199202375Srdivacky 200239462Sdim if (TD && AI.getAllocatedType()->isSized()) { 201202375Srdivacky // If the alignment is 0 (unspecified), assign it the preferred alignment. 202202375Srdivacky if (AI.getAlignment() == 0) 203202375Srdivacky AI.setAlignment(TD->getPrefTypeAlignment(AI.getAllocatedType())); 204239462Sdim 205239462Sdim // Move all alloca's of zero byte objects to the entry block and merge them 206239462Sdim // together. Note that we only do this for alloca's, because malloc should 207239462Sdim // allocate and return a unique pointer, even for a zero byte allocation. 208239462Sdim if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0) { 209239462Sdim // For a zero sized alloca there is no point in doing an array allocation. 210239462Sdim // This is helpful if the array size is a complicated expression not used 211239462Sdim // elsewhere. 212239462Sdim if (AI.isArrayAllocation()) { 213239462Sdim AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1)); 214239462Sdim return &AI; 215239462Sdim } 216239462Sdim 217239462Sdim // Get the first instruction in the entry block. 218239462Sdim BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock(); 219239462Sdim Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg(); 220239462Sdim if (FirstInst != &AI) { 221239462Sdim // If the entry block doesn't start with a zero-size alloca then move 222239462Sdim // this one to the start of the entry block. There is no problem with 223239462Sdim // dominance as the array size was forced to a constant earlier already. 224239462Sdim AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst); 225239462Sdim if (!EntryAI || !EntryAI->getAllocatedType()->isSized() || 226239462Sdim TD->getTypeAllocSize(EntryAI->getAllocatedType()) != 0) { 227239462Sdim AI.moveBefore(FirstInst); 228239462Sdim return &AI; 229239462Sdim } 230239462Sdim 231243830Sdim // If the alignment of the entry block alloca is 0 (unspecified), 232243830Sdim // assign it the preferred alignment. 233243830Sdim if (EntryAI->getAlignment() == 0) 234243830Sdim EntryAI->setAlignment( 235243830Sdim TD->getPrefTypeAlignment(EntryAI->getAllocatedType())); 236239462Sdim // Replace this zero-sized alloca with the one at the start of the entry 237239462Sdim // block after ensuring that the address will be aligned enough for both 238239462Sdim // types. 239243830Sdim unsigned MaxAlign = std::max(EntryAI->getAlignment(), 240243830Sdim AI.getAlignment()); 241239462Sdim EntryAI->setAlignment(MaxAlign); 242239462Sdim if (AI.getType() != EntryAI->getType()) 243239462Sdim return new BitCastInst(EntryAI, AI.getType()); 244239462Sdim return ReplaceInstUsesWith(AI, EntryAI); 245239462Sdim } 246239462Sdim } 247202375Srdivacky } 248202375Srdivacky 249249423Sdim if (AI.getAlignment()) { 250243830Sdim // Check to see if this allocation is only modified by a memcpy/memmove from 251243830Sdim // a constant global whose alignment is equal to or exceeds that of the 252243830Sdim // allocation. If this is the case, we can change all users to use 253243830Sdim // the constant global instead. This is commonly produced by the CFE by 254243830Sdim // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' 255243830Sdim // is only subsequently read. 256243830Sdim SmallVector<Instruction *, 4> ToDelete; 257243830Sdim if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { 258249423Sdim unsigned SourceAlign = getOrEnforceKnownAlignment(Copy->getSource(), 259249423Sdim AI.getAlignment(), TD); 260249423Sdim if (AI.getAlignment() <= SourceAlign) { 261243830Sdim DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); 262243830Sdim DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); 263243830Sdim for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) 264243830Sdim EraseInstFromFunction(*ToDelete[i]); 265243830Sdim Constant *TheSrc = cast<Constant>(Copy->getSource()); 266263508Sdim Constant *Cast 267263508Sdim = ConstantExpr::getPointerBitCastOrAddrSpaceCast(TheSrc, AI.getType()); 268263508Sdim Instruction *NewI = ReplaceInstUsesWith(AI, Cast); 269243830Sdim EraseInstFromFunction(*Copy); 270243830Sdim ++NumGlobalCopies; 271243830Sdim return NewI; 272243830Sdim } 273243830Sdim } 274243830Sdim } 275243830Sdim 276239462Sdim // At last, use the generic allocation site handler to aggressively remove 277239462Sdim // unused allocas. 278239462Sdim return visitAllocSite(AI); 279202375Srdivacky} 280202375Srdivacky 281202375Srdivacky 282202375Srdivacky/// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible. 283202375Srdivackystatic Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, 284243830Sdim const DataLayout *TD) { 285202375Srdivacky User *CI = cast<User>(LI.getOperand(0)); 286202375Srdivacky Value *CastOp = CI->getOperand(0); 287202375Srdivacky 288226633Sdim PointerType *DestTy = cast<PointerType>(CI->getType()); 289226633Sdim Type *DestPTy = DestTy->getElementType(); 290226633Sdim if (PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) { 291202375Srdivacky 292202375Srdivacky // If the address spaces don't match, don't eliminate the cast. 293202375Srdivacky if (DestTy->getAddressSpace() != SrcTy->getAddressSpace()) 294202375Srdivacky return 0; 295202375Srdivacky 296226633Sdim Type *SrcPTy = SrcTy->getElementType(); 297202375Srdivacky 298251662Sdim if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() || 299204642Srdivacky DestPTy->isVectorTy()) { 300202375Srdivacky // If the source is an array, the code below will not succeed. Check to 301202375Srdivacky // see if a trivial 'gep P, 0, 0' will help matters. Only do this for 302202375Srdivacky // constants. 303226633Sdim if (ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy)) 304202375Srdivacky if (Constant *CSrc = dyn_cast<Constant>(CastOp)) 305202375Srdivacky if (ASrcTy->getNumElements() != 0) { 306263508Sdim Type *IdxTy = TD 307263508Sdim ? TD->getIntPtrType(SrcTy) 308263508Sdim : Type::getInt64Ty(SrcTy->getContext()); 309263508Sdim Value *Idx = Constant::getNullValue(IdxTy); 310263508Sdim Value *Idxs[2] = { Idx, Idx }; 311226633Sdim CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs); 312202375Srdivacky SrcTy = cast<PointerType>(CastOp->getType()); 313202375Srdivacky SrcPTy = SrcTy->getElementType(); 314202375Srdivacky } 315202375Srdivacky 316243830Sdim if (IC.getDataLayout() && 317251662Sdim (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() || 318204642Srdivacky SrcPTy->isVectorTy()) && 319202375Srdivacky // Do not allow turning this into a load of an integer, which is then 320202375Srdivacky // casted to a pointer, this pessimizes pointer analysis a lot. 321263508Sdim (SrcPTy->isPtrOrPtrVectorTy() == 322263508Sdim LI.getType()->isPtrOrPtrVectorTy()) && 323243830Sdim IC.getDataLayout()->getTypeSizeInBits(SrcPTy) == 324243830Sdim IC.getDataLayout()->getTypeSizeInBits(DestPTy)) { 325202375Srdivacky 326202375Srdivacky // Okay, we are casting from one integer or pointer type to another of 327202375Srdivacky // the same size. Instead of casting the pointer before the load, cast 328202375Srdivacky // the result of the loaded value. 329251662Sdim LoadInst *NewLoad = 330202375Srdivacky IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName()); 331203954Srdivacky NewLoad->setAlignment(LI.getAlignment()); 332226633Sdim NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope()); 333202375Srdivacky // Now cast the result of the load. 334202375Srdivacky return new BitCastInst(NewLoad, LI.getType()); 335202375Srdivacky } 336202375Srdivacky } 337202375Srdivacky } 338202375Srdivacky return 0; 339202375Srdivacky} 340202375Srdivacky 341202375SrdivackyInstruction *InstCombiner::visitLoadInst(LoadInst &LI) { 342202375Srdivacky Value *Op = LI.getOperand(0); 343202375Srdivacky 344202375Srdivacky // Attempt to improve the alignment. 345202375Srdivacky if (TD) { 346202375Srdivacky unsigned KnownAlign = 347218893Sdim getOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()),TD); 348212904Sdim unsigned LoadAlign = LI.getAlignment(); 349212904Sdim unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign : 350212904Sdim TD->getABITypeAlignment(LI.getType()); 351212904Sdim 352212904Sdim if (KnownAlign > EffectiveLoadAlign) 353202375Srdivacky LI.setAlignment(KnownAlign); 354212904Sdim else if (LoadAlign == 0) 355212904Sdim LI.setAlignment(EffectiveLoadAlign); 356202375Srdivacky } 357202375Srdivacky 358202375Srdivacky // load (cast X) --> cast (load X) iff safe. 359202375Srdivacky if (isa<CastInst>(Op)) 360202375Srdivacky if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) 361202375Srdivacky return Res; 362202375Srdivacky 363226633Sdim // None of the following transforms are legal for volatile/atomic loads. 364226633Sdim // FIXME: Some of it is okay for atomic loads; needs refactoring. 365226633Sdim if (!LI.isSimple()) return 0; 366251662Sdim 367202375Srdivacky // Do really simple store-to-load forwarding and load CSE, to catch cases 368218893Sdim // where there are several consecutive memory accesses to the same location, 369202375Srdivacky // separated by a few arithmetic operations. 370202375Srdivacky BasicBlock::iterator BBI = &LI; 371202375Srdivacky if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6)) 372202375Srdivacky return ReplaceInstUsesWith(LI, AvailableVal); 373202375Srdivacky 374202375Srdivacky // load(gep null, ...) -> unreachable 375202375Srdivacky if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) { 376202375Srdivacky const Value *GEPI0 = GEPI->getOperand(0); 377202375Srdivacky // TODO: Consider a target hook for valid address spaces for this xform. 378202375Srdivacky if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){ 379202375Srdivacky // Insert a new store to null instruction before the load to indicate 380202375Srdivacky // that this code is not reachable. We do this instead of inserting 381202375Srdivacky // an unreachable instruction directly because we cannot modify the 382202375Srdivacky // CFG. 383202375Srdivacky new StoreInst(UndefValue::get(LI.getType()), 384202375Srdivacky Constant::getNullValue(Op->getType()), &LI); 385202375Srdivacky return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); 386202375Srdivacky } 387251662Sdim } 388202375Srdivacky 389202375Srdivacky // load null/undef -> unreachable 390202375Srdivacky // TODO: Consider a target hook for valid address spaces for this xform. 391202375Srdivacky if (isa<UndefValue>(Op) || 392202375Srdivacky (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) { 393202375Srdivacky // Insert a new store to null instruction before the load to indicate that 394202375Srdivacky // this code is not reachable. We do this instead of inserting an 395202375Srdivacky // unreachable instruction directly because we cannot modify the CFG. 396202375Srdivacky new StoreInst(UndefValue::get(LI.getType()), 397202375Srdivacky Constant::getNullValue(Op->getType()), &LI); 398202375Srdivacky return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); 399202375Srdivacky } 400202375Srdivacky 401202375Srdivacky // Instcombine load (constantexpr_cast global) -> cast (load global) 402202375Srdivacky if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op)) 403202375Srdivacky if (CE->isCast()) 404202375Srdivacky if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) 405202375Srdivacky return Res; 406251662Sdim 407202375Srdivacky if (Op->hasOneUse()) { 408202375Srdivacky // Change select and PHI nodes to select values instead of addresses: this 409202375Srdivacky // helps alias analysis out a lot, allows many others simplifications, and 410202375Srdivacky // exposes redundancy in the code. 411202375Srdivacky // 412202375Srdivacky // Note that we cannot do the transformation unless we know that the 413202375Srdivacky // introduced loads cannot trap! Something like this is valid as long as 414202375Srdivacky // the condition is always false: load (select bool %C, int* null, int* %G), 415202375Srdivacky // but it would not be valid if we transformed it to load from null 416202375Srdivacky // unconditionally. 417202375Srdivacky // 418202375Srdivacky if (SelectInst *SI = dyn_cast<SelectInst>(Op)) { 419202375Srdivacky // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2). 420203954Srdivacky unsigned Align = LI.getAlignment(); 421203954Srdivacky if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, TD) && 422203954Srdivacky isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, TD)) { 423203954Srdivacky LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1), 424203954Srdivacky SI->getOperand(1)->getName()+".val"); 425203954Srdivacky LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2), 426203954Srdivacky SI->getOperand(2)->getName()+".val"); 427203954Srdivacky V1->setAlignment(Align); 428203954Srdivacky V2->setAlignment(Align); 429202375Srdivacky return SelectInst::Create(SI->getCondition(), V1, V2); 430202375Srdivacky } 431202375Srdivacky 432202375Srdivacky // load (select (cond, null, P)) -> load P 433202375Srdivacky if (Constant *C = dyn_cast<Constant>(SI->getOperand(1))) 434202375Srdivacky if (C->isNullValue()) { 435202375Srdivacky LI.setOperand(0, SI->getOperand(2)); 436202375Srdivacky return &LI; 437202375Srdivacky } 438202375Srdivacky 439202375Srdivacky // load (select (cond, P, null)) -> load P 440202375Srdivacky if (Constant *C = dyn_cast<Constant>(SI->getOperand(2))) 441202375Srdivacky if (C->isNullValue()) { 442202375Srdivacky LI.setOperand(0, SI->getOperand(1)); 443202375Srdivacky return &LI; 444202375Srdivacky } 445202375Srdivacky } 446202375Srdivacky } 447202375Srdivacky return 0; 448202375Srdivacky} 449202375Srdivacky 450202375Srdivacky/// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P 451202375Srdivacky/// when possible. This makes it generally easy to do alias analysis and/or 452202375Srdivacky/// SROA/mem2reg of the memory object. 453202375Srdivackystatic Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { 454202375Srdivacky User *CI = cast<User>(SI.getOperand(1)); 455202375Srdivacky Value *CastOp = CI->getOperand(0); 456202375Srdivacky 457226633Sdim Type *DestPTy = cast<PointerType>(CI->getType())->getElementType(); 458226633Sdim PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType()); 459202375Srdivacky if (SrcTy == 0) return 0; 460251662Sdim 461226633Sdim Type *SrcPTy = SrcTy->getElementType(); 462202375Srdivacky 463204642Srdivacky if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy()) 464202375Srdivacky return 0; 465251662Sdim 466202375Srdivacky /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep" 467202375Srdivacky /// to its first element. This allows us to handle things like: 468202375Srdivacky /// store i32 xxx, (bitcast {foo*, float}* %P to i32*) 469202375Srdivacky /// on 32-bit hosts. 470202375Srdivacky SmallVector<Value*, 4> NewGEPIndices; 471251662Sdim 472202375Srdivacky // If the source is an array, the code below will not succeed. Check to 473202375Srdivacky // see if a trivial 'gep P, 0, 0' will help matters. Only do this for 474202375Srdivacky // constants. 475204642Srdivacky if (SrcPTy->isArrayTy() || SrcPTy->isStructTy()) { 476202375Srdivacky // Index through pointer. 477202375Srdivacky Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext())); 478202375Srdivacky NewGEPIndices.push_back(Zero); 479251662Sdim 480202375Srdivacky while (1) { 481226633Sdim if (StructType *STy = dyn_cast<StructType>(SrcPTy)) { 482202375Srdivacky if (!STy->getNumElements()) /* Struct can be empty {} */ 483202375Srdivacky break; 484202375Srdivacky NewGEPIndices.push_back(Zero); 485202375Srdivacky SrcPTy = STy->getElementType(0); 486226633Sdim } else if (ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) { 487202375Srdivacky NewGEPIndices.push_back(Zero); 488202375Srdivacky SrcPTy = ATy->getElementType(); 489202375Srdivacky } else { 490202375Srdivacky break; 491202375Srdivacky } 492202375Srdivacky } 493251662Sdim 494202375Srdivacky SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace()); 495202375Srdivacky } 496202375Srdivacky 497204642Srdivacky if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy()) 498202375Srdivacky return 0; 499251662Sdim 500202375Srdivacky // If the pointers point into different address spaces or if they point to 501202375Srdivacky // values with different sizes, we can't do the transformation. 502243830Sdim if (!IC.getDataLayout() || 503251662Sdim SrcTy->getAddressSpace() != 504202375Srdivacky cast<PointerType>(CI->getType())->getAddressSpace() || 505243830Sdim IC.getDataLayout()->getTypeSizeInBits(SrcPTy) != 506243830Sdim IC.getDataLayout()->getTypeSizeInBits(DestPTy)) 507202375Srdivacky return 0; 508202375Srdivacky 509202375Srdivacky // Okay, we are casting from one integer or pointer type to another of 510251662Sdim // the same size. Instead of casting the pointer before 511202375Srdivacky // the store, cast the value to be stored. 512202375Srdivacky Value *NewCast; 513202375Srdivacky Value *SIOp0 = SI.getOperand(0); 514202375Srdivacky Instruction::CastOps opcode = Instruction::BitCast; 515226633Sdim Type* CastSrcTy = SIOp0->getType(); 516226633Sdim Type* CastDstTy = SrcPTy; 517204642Srdivacky if (CastDstTy->isPointerTy()) { 518203954Srdivacky if (CastSrcTy->isIntegerTy()) 519202375Srdivacky opcode = Instruction::IntToPtr; 520204642Srdivacky } else if (CastDstTy->isIntegerTy()) { 521204642Srdivacky if (SIOp0->getType()->isPointerTy()) 522202375Srdivacky opcode = Instruction::PtrToInt; 523202375Srdivacky } 524251662Sdim 525202375Srdivacky // SIOp0 is a pointer to aggregate and this is a store to the first field, 526202375Srdivacky // emit a GEP to index into its first field. 527202375Srdivacky if (!NewGEPIndices.empty()) 528226633Sdim CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices); 529251662Sdim 530202375Srdivacky NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy, 531202375Srdivacky SIOp0->getName()+".c"); 532218893Sdim SI.setOperand(0, NewCast); 533218893Sdim SI.setOperand(1, CastOp); 534218893Sdim return &SI; 535202375Srdivacky} 536202375Srdivacky 537202375Srdivacky/// equivalentAddressValues - Test if A and B will obviously have the same 538202375Srdivacky/// value. This includes recognizing that %t0 and %t1 will have the same 539202375Srdivacky/// value in code like this: 540202375Srdivacky/// %t0 = getelementptr \@a, 0, 3 541202375Srdivacky/// store i32 0, i32* %t0 542202375Srdivacky/// %t1 = getelementptr \@a, 0, 3 543202375Srdivacky/// %t2 = load i32* %t1 544202375Srdivacky/// 545202375Srdivackystatic bool equivalentAddressValues(Value *A, Value *B) { 546202375Srdivacky // Test if the values are trivially equivalent. 547202375Srdivacky if (A == B) return true; 548251662Sdim 549202375Srdivacky // Test if the values come form identical arithmetic instructions. 550202375Srdivacky // This uses isIdenticalToWhenDefined instead of isIdenticalTo because 551202375Srdivacky // its only used to compare two uses within the same basic block, which 552202375Srdivacky // means that they'll always either have the same value or one of them 553202375Srdivacky // will have an undefined value. 554202375Srdivacky if (isa<BinaryOperator>(A) || 555202375Srdivacky isa<CastInst>(A) || 556202375Srdivacky isa<PHINode>(A) || 557202375Srdivacky isa<GetElementPtrInst>(A)) 558202375Srdivacky if (Instruction *BI = dyn_cast<Instruction>(B)) 559202375Srdivacky if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI)) 560202375Srdivacky return true; 561251662Sdim 562202375Srdivacky // Otherwise they may not be equivalent. 563202375Srdivacky return false; 564202375Srdivacky} 565202375Srdivacky 566202375SrdivackyInstruction *InstCombiner::visitStoreInst(StoreInst &SI) { 567202375Srdivacky Value *Val = SI.getOperand(0); 568202375Srdivacky Value *Ptr = SI.getOperand(1); 569202375Srdivacky 570202375Srdivacky // Attempt to improve the alignment. 571202375Srdivacky if (TD) { 572202375Srdivacky unsigned KnownAlign = 573218893Sdim getOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()), 574218893Sdim TD); 575212904Sdim unsigned StoreAlign = SI.getAlignment(); 576212904Sdim unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign : 577212904Sdim TD->getABITypeAlignment(Val->getType()); 578212904Sdim 579212904Sdim if (KnownAlign > EffectiveStoreAlign) 580202375Srdivacky SI.setAlignment(KnownAlign); 581212904Sdim else if (StoreAlign == 0) 582212904Sdim SI.setAlignment(EffectiveStoreAlign); 583202375Srdivacky } 584202375Srdivacky 585226633Sdim // Don't hack volatile/atomic stores. 586226633Sdim // FIXME: Some bits are legal for atomic stores; needs refactoring. 587226633Sdim if (!SI.isSimple()) return 0; 588226633Sdim 589226633Sdim // If the RHS is an alloca with a single use, zapify the store, making the 590226633Sdim // alloca dead. 591226633Sdim if (Ptr->hasOneUse()) { 592251662Sdim if (isa<AllocaInst>(Ptr)) 593226633Sdim return EraseInstFromFunction(SI); 594226633Sdim if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) { 595226633Sdim if (isa<AllocaInst>(GEP->getOperand(0))) { 596226633Sdim if (GEP->getOperand(0)->hasOneUse()) 597226633Sdim return EraseInstFromFunction(SI); 598226633Sdim } 599226633Sdim } 600226633Sdim } 601226633Sdim 602202375Srdivacky // Do really simple DSE, to catch cases where there are several consecutive 603202375Srdivacky // stores to the same location, separated by a few arithmetic operations. This 604202375Srdivacky // situation often occurs with bitfield accesses. 605202375Srdivacky BasicBlock::iterator BBI = &SI; 606202375Srdivacky for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts; 607202375Srdivacky --ScanInsts) { 608202375Srdivacky --BBI; 609202375Srdivacky // Don't count debug info directives, lest they affect codegen, 610202375Srdivacky // and we skip pointer-to-pointer bitcasts, which are NOPs. 611202375Srdivacky if (isa<DbgInfoIntrinsic>(BBI) || 612204642Srdivacky (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { 613202375Srdivacky ScanInsts++; 614202375Srdivacky continue; 615251662Sdim } 616251662Sdim 617202375Srdivacky if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) { 618202375Srdivacky // Prev store isn't volatile, and stores to the same location? 619226633Sdim if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1), 620226633Sdim SI.getOperand(1))) { 621202375Srdivacky ++NumDeadStore; 622202375Srdivacky ++BBI; 623202375Srdivacky EraseInstFromFunction(*PrevSI); 624202375Srdivacky continue; 625202375Srdivacky } 626202375Srdivacky break; 627202375Srdivacky } 628251662Sdim 629202375Srdivacky // If this is a load, we have to stop. However, if the loaded value is from 630202375Srdivacky // the pointer we're loading and is producing the pointer we're storing, 631202375Srdivacky // then *this* store is dead (X = load P; store X -> P). 632202375Srdivacky if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { 633202375Srdivacky if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) && 634226633Sdim LI->isSimple()) 635202375Srdivacky return EraseInstFromFunction(SI); 636251662Sdim 637202375Srdivacky // Otherwise, this is a load from some other location. Stores before it 638202375Srdivacky // may not be dead. 639202375Srdivacky break; 640202375Srdivacky } 641251662Sdim 642202375Srdivacky // Don't skip over loads or things that can modify memory. 643202375Srdivacky if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory()) 644202375Srdivacky break; 645202375Srdivacky } 646202375Srdivacky 647202375Srdivacky // store X, null -> turns into 'unreachable' in SimplifyCFG 648202375Srdivacky if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) { 649202375Srdivacky if (!isa<UndefValue>(Val)) { 650202375Srdivacky SI.setOperand(0, UndefValue::get(Val->getType())); 651202375Srdivacky if (Instruction *U = dyn_cast<Instruction>(Val)) 652202375Srdivacky Worklist.Add(U); // Dropped a use. 653202375Srdivacky } 654202375Srdivacky return 0; // Do not modify these! 655202375Srdivacky } 656202375Srdivacky 657202375Srdivacky // store undef, Ptr -> noop 658202375Srdivacky if (isa<UndefValue>(Val)) 659202375Srdivacky return EraseInstFromFunction(SI); 660202375Srdivacky 661202375Srdivacky // If the pointer destination is a cast, see if we can fold the cast into the 662202375Srdivacky // source instead. 663202375Srdivacky if (isa<CastInst>(Ptr)) 664202375Srdivacky if (Instruction *Res = InstCombineStoreToCast(*this, SI)) 665202375Srdivacky return Res; 666202375Srdivacky if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) 667202375Srdivacky if (CE->isCast()) 668202375Srdivacky if (Instruction *Res = InstCombineStoreToCast(*this, SI)) 669202375Srdivacky return Res; 670202375Srdivacky 671251662Sdim 672202375Srdivacky // If this store is the last instruction in the basic block (possibly 673202878Srdivacky // excepting debug info instructions), and if the block ends with an 674202878Srdivacky // unconditional branch, try to move it to the successor block. 675251662Sdim BBI = &SI; 676202375Srdivacky do { 677202375Srdivacky ++BBI; 678202375Srdivacky } while (isa<DbgInfoIntrinsic>(BBI) || 679204642Srdivacky (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())); 680202375Srdivacky if (BranchInst *BI = dyn_cast<BranchInst>(BBI)) 681202375Srdivacky if (BI->isUnconditional()) 682202375Srdivacky if (SimplifyStoreAtEndOfBlock(SI)) 683202375Srdivacky return 0; // xform done! 684251662Sdim 685202375Srdivacky return 0; 686202375Srdivacky} 687202375Srdivacky 688202375Srdivacky/// SimplifyStoreAtEndOfBlock - Turn things like: 689202375Srdivacky/// if () { *P = v1; } else { *P = v2 } 690202375Srdivacky/// into a phi node with a store in the successor. 691202375Srdivacky/// 692202375Srdivacky/// Simplify things like: 693202375Srdivacky/// *P = v1; if () { *P = v2; } 694202375Srdivacky/// into a phi node with a store in the successor. 695202375Srdivacky/// 696202375Srdivackybool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { 697202375Srdivacky BasicBlock *StoreBB = SI.getParent(); 698251662Sdim 699202375Srdivacky // Check to see if the successor block has exactly two incoming edges. If 700202375Srdivacky // so, see if the other predecessor contains a store to the same location. 701202375Srdivacky // if so, insert a PHI node (if needed) and move the stores down. 702202375Srdivacky BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0); 703251662Sdim 704202375Srdivacky // Determine whether Dest has exactly two predecessors and, if so, compute 705202375Srdivacky // the other predecessor. 706202375Srdivacky pred_iterator PI = pred_begin(DestBB); 707210299Sed BasicBlock *P = *PI; 708202375Srdivacky BasicBlock *OtherBB = 0; 709210299Sed 710210299Sed if (P != StoreBB) 711210299Sed OtherBB = P; 712210299Sed 713210299Sed if (++PI == pred_end(DestBB)) 714202375Srdivacky return false; 715251662Sdim 716210299Sed P = *PI; 717210299Sed if (P != StoreBB) { 718202375Srdivacky if (OtherBB) 719202375Srdivacky return false; 720210299Sed OtherBB = P; 721202375Srdivacky } 722202375Srdivacky if (++PI != pred_end(DestBB)) 723202375Srdivacky return false; 724202375Srdivacky 725202375Srdivacky // Bail out if all the relevant blocks aren't distinct (this can happen, 726202375Srdivacky // for example, if SI is in an infinite loop) 727202375Srdivacky if (StoreBB == DestBB || OtherBB == DestBB) 728202375Srdivacky return false; 729202375Srdivacky 730202375Srdivacky // Verify that the other block ends in a branch and is not otherwise empty. 731202375Srdivacky BasicBlock::iterator BBI = OtherBB->getTerminator(); 732202375Srdivacky BranchInst *OtherBr = dyn_cast<BranchInst>(BBI); 733202375Srdivacky if (!OtherBr || BBI == OtherBB->begin()) 734202375Srdivacky return false; 735251662Sdim 736202375Srdivacky // If the other block ends in an unconditional branch, check for the 'if then 737202375Srdivacky // else' case. there is an instruction before the branch. 738202375Srdivacky StoreInst *OtherStore = 0; 739202375Srdivacky if (OtherBr->isUnconditional()) { 740202375Srdivacky --BBI; 741202375Srdivacky // Skip over debugging info. 742202375Srdivacky while (isa<DbgInfoIntrinsic>(BBI) || 743204642Srdivacky (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { 744202375Srdivacky if (BBI==OtherBB->begin()) 745202375Srdivacky return false; 746202375Srdivacky --BBI; 747202375Srdivacky } 748226633Sdim // If this isn't a store, isn't a store to the same location, or is not the 749226633Sdim // right kind of store, bail out. 750202375Srdivacky OtherStore = dyn_cast<StoreInst>(BBI); 751202375Srdivacky if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) || 752226633Sdim !SI.isSameOperationAs(OtherStore)) 753202375Srdivacky return false; 754202375Srdivacky } else { 755202375Srdivacky // Otherwise, the other block ended with a conditional branch. If one of the 756202375Srdivacky // destinations is StoreBB, then we have the if/then case. 757251662Sdim if (OtherBr->getSuccessor(0) != StoreBB && 758202375Srdivacky OtherBr->getSuccessor(1) != StoreBB) 759202375Srdivacky return false; 760251662Sdim 761202375Srdivacky // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an 762202375Srdivacky // if/then triangle. See if there is a store to the same ptr as SI that 763202375Srdivacky // lives in OtherBB. 764202375Srdivacky for (;; --BBI) { 765202375Srdivacky // Check to see if we find the matching store. 766202375Srdivacky if ((OtherStore = dyn_cast<StoreInst>(BBI))) { 767202375Srdivacky if (OtherStore->getOperand(1) != SI.getOperand(1) || 768226633Sdim !SI.isSameOperationAs(OtherStore)) 769202375Srdivacky return false; 770202375Srdivacky break; 771202375Srdivacky } 772202375Srdivacky // If we find something that may be using or overwriting the stored 773202375Srdivacky // value, or if we run out of instructions, we can't do the xform. 774202375Srdivacky if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() || 775202375Srdivacky BBI == OtherBB->begin()) 776202375Srdivacky return false; 777202375Srdivacky } 778251662Sdim 779202375Srdivacky // In order to eliminate the store in OtherBr, we have to 780202375Srdivacky // make sure nothing reads or overwrites the stored value in 781202375Srdivacky // StoreBB. 782202375Srdivacky for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) { 783202375Srdivacky // FIXME: This should really be AA driven. 784202375Srdivacky if (I->mayReadFromMemory() || I->mayWriteToMemory()) 785202375Srdivacky return false; 786202375Srdivacky } 787202375Srdivacky } 788251662Sdim 789202375Srdivacky // Insert a PHI node now if we need it. 790202375Srdivacky Value *MergedVal = OtherStore->getOperand(0); 791202375Srdivacky if (MergedVal != SI.getOperand(0)) { 792221345Sdim PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge"); 793202375Srdivacky PN->addIncoming(SI.getOperand(0), SI.getParent()); 794202375Srdivacky PN->addIncoming(OtherStore->getOperand(0), OtherBB); 795202375Srdivacky MergedVal = InsertNewInstBefore(PN, DestBB->front()); 796202375Srdivacky } 797251662Sdim 798202375Srdivacky // Advance to a place where it is safe to insert the new store and 799202375Srdivacky // insert it. 800226633Sdim BBI = DestBB->getFirstInsertionPt(); 801223017Sdim StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1), 802226633Sdim SI.isVolatile(), 803226633Sdim SI.getAlignment(), 804226633Sdim SI.getOrdering(), 805226633Sdim SI.getSynchScope()); 806223017Sdim InsertNewInstBefore(NewSI, *BBI); 807251662Sdim NewSI->setDebugLoc(OtherStore->getDebugLoc()); 808223017Sdim 809249423Sdim // If the two stores had the same TBAA tag, preserve it. 810249423Sdim if (MDNode *TBAATag = SI.getMetadata(LLVMContext::MD_tbaa)) 811249423Sdim if ((TBAATag = MDNode::getMostGenericTBAA(TBAATag, 812249423Sdim OtherStore->getMetadata(LLVMContext::MD_tbaa)))) 813249423Sdim NewSI->setMetadata(LLVMContext::MD_tbaa, TBAATag); 814249423Sdim 815251662Sdim 816202375Srdivacky // Nuke the old stores. 817202375Srdivacky EraseInstFromFunction(SI); 818202375Srdivacky EraseInstFromFunction(*OtherStore); 819202375Srdivacky return true; 820202375Srdivacky} 821