1//===- InstCombineLoadStoreAlloca.cpp -------------------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the visit functions for load, store and alloca. 11// 12//===----------------------------------------------------------------------===// 13 14#include "InstCombine.h" 15#include "llvm/IntrinsicInst.h" 16#include "llvm/Analysis/Loads.h" 17#include "llvm/Target/TargetData.h" 18#include "llvm/Transforms/Utils/BasicBlockUtils.h" 19#include "llvm/Transforms/Utils/Local.h" 20#include "llvm/ADT/Statistic.h" 21using namespace llvm; 22 23STATISTIC(NumDeadStore, "Number of dead stores eliminated"); 24STATISTIC(NumGlobalCopies, "Number of allocas copied from constant global"); 25 26/// pointsToConstantGlobal - Return true if V (possibly indirectly) points to 27/// some part of a constant global variable. This intentionally only accepts 28/// constant expressions because we can't rewrite arbitrary instructions. 29static bool pointsToConstantGlobal(Value *V) { 30 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) 31 return GV->isConstant(); 32 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) 33 if (CE->getOpcode() == Instruction::BitCast || 34 CE->getOpcode() == Instruction::GetElementPtr) 35 return pointsToConstantGlobal(CE->getOperand(0)); 36 return false; 37} 38 39/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived) 40/// pointer to an alloca. Ignore any reads of the pointer, return false if we 41/// see any stores or other unknown uses. If we see pointer arithmetic, keep 42/// track of whether it moves the pointer (with IsOffset) but otherwise traverse 43/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to 44/// the alloca, and if the source pointer is a pointer to a constant global, we 45/// can optimize this. 46static bool 47isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, 48 SmallVectorImpl<Instruction *> &ToDelete, 49 bool IsOffset = false) { 50 // We track lifetime intrinsics as we encounter them. If we decide to go 51 // ahead and replace the value with the global, this lets the caller quickly 52 // eliminate the markers. 53 54 for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { 55 User *U = cast<Instruction>(*UI); 56 57 if (LoadInst *LI = dyn_cast<LoadInst>(U)) { 58 // Ignore non-volatile loads, they are always ok. 59 if (!LI->isSimple()) return false; 60 continue; 61 } 62 63 if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { 64 // If uses of the bitcast are ok, we are ok. 65 if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, ToDelete, IsOffset)) 66 return false; 67 continue; 68 } 69 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { 70 // If the GEP has all zero indices, it doesn't offset the pointer. If it 71 // doesn't, it does. 72 if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, ToDelete, 73 IsOffset || !GEP->hasAllZeroIndices())) 74 return false; 75 continue; 76 } 77 78 if (CallSite CS = U) { 79 // If this is the function being called then we treat it like a load and 80 // ignore it. 81 if (CS.isCallee(UI)) 82 continue; 83 84 // If this is a readonly/readnone call site, then we know it is just a 85 // load (but one that potentially returns the value itself), so we can 86 // ignore it if we know that the value isn't captured. 87 unsigned ArgNo = CS.getArgumentNo(UI); 88 if (CS.onlyReadsMemory() && 89 (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo))) 90 continue; 91 92 // If this is being passed as a byval argument, the caller is making a 93 // copy, so it is only a read of the alloca. 94 if (CS.isByValArgument(ArgNo)) 95 continue; 96 } 97 98 // Lifetime intrinsics can be handled by the caller. 99 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { 100 if (II->getIntrinsicID() == Intrinsic::lifetime_start || 101 II->getIntrinsicID() == Intrinsic::lifetime_end) { 102 assert(II->use_empty() && "Lifetime markers have no result to use!"); 103 ToDelete.push_back(II); 104 continue; 105 } 106 } 107 108 // If this is isn't our memcpy/memmove, reject it as something we can't 109 // handle. 110 MemTransferInst *MI = dyn_cast<MemTransferInst>(U); 111 if (MI == 0) 112 return false; 113 114 // If the transfer is using the alloca as a source of the transfer, then 115 // ignore it since it is a load (unless the transfer is volatile). 116 if (UI.getOperandNo() == 1) { 117 if (MI->isVolatile()) return false; 118 continue; 119 } 120 121 // If we already have seen a copy, reject the second one. 122 if (TheCopy) return false; 123 124 // If the pointer has been offset from the start of the alloca, we can't 125 // safely handle this. 126 if (IsOffset) return false; 127 128 // If the memintrinsic isn't using the alloca as the dest, reject it. 129 if (UI.getOperandNo() != 0) return false; 130 131 // If the source of the memcpy/move is not a constant global, reject it. 132 if (!pointsToConstantGlobal(MI->getSource())) 133 return false; 134 135 // Otherwise, the transform is safe. Remember the copy instruction. 136 TheCopy = MI; 137 } 138 return true; 139} 140 141/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only 142/// modified by a copy from a constant global. If we can prove this, we can 143/// replace any uses of the alloca with uses of the global directly. 144static MemTransferInst * 145isOnlyCopiedFromConstantGlobal(AllocaInst *AI, 146 SmallVectorImpl<Instruction *> &ToDelete) { 147 MemTransferInst *TheCopy = 0; 148 if (isOnlyCopiedFromConstantGlobal(AI, TheCopy, ToDelete)) 149 return TheCopy; 150 return 0; 151} 152 153/// getPointeeAlignment - Compute the minimum alignment of the value pointed 154/// to by the given pointer. 155static unsigned getPointeeAlignment(Value *V, const TargetData &TD) { 156 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) 157 if (CE->getOpcode() == Instruction::BitCast || 158 (CE->getOpcode() == Instruction::GetElementPtr && 159 cast<GEPOperator>(CE)->hasAllZeroIndices())) 160 return getPointeeAlignment(CE->getOperand(0), TD); 161 162 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) 163 if (!GV->isDeclaration()) 164 return TD.getPreferredAlignment(GV); 165 166 if (PointerType *PT = dyn_cast<PointerType>(V->getType())) 167 return TD.getABITypeAlignment(PT->getElementType()); 168 169 return 0; 170} 171 172Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) { 173 // Ensure that the alloca array size argument has type intptr_t, so that 174 // any casting is exposed early. 175 if (TD) { 176 Type *IntPtrTy = TD->getIntPtrType(AI.getContext()); 177 if (AI.getArraySize()->getType() != IntPtrTy) { 178 Value *V = Builder->CreateIntCast(AI.getArraySize(), 179 IntPtrTy, false); 180 AI.setOperand(0, V); 181 return &AI; 182 } 183 } 184 185 // Convert: alloca Ty, C - where C is a constant != 1 into: alloca [C x Ty], 1 186 if (AI.isArrayAllocation()) { // Check C != 1 187 if (const ConstantInt *C = dyn_cast<ConstantInt>(AI.getArraySize())) { 188 Type *NewTy = 189 ArrayType::get(AI.getAllocatedType(), C->getZExtValue()); 190 AllocaInst *New = Builder->CreateAlloca(NewTy, 0, AI.getName()); 191 New->setAlignment(AI.getAlignment()); 192 193 // Scan to the end of the allocation instructions, to skip over a block of 194 // allocas if possible...also skip interleaved debug info 195 // 196 BasicBlock::iterator It = New; 197 while (isa<AllocaInst>(*It) || isa<DbgInfoIntrinsic>(*It)) ++It; 198 199 // Now that I is pointing to the first non-allocation-inst in the block, 200 // insert our getelementptr instruction... 201 // 202 Value *NullIdx =Constant::getNullValue(Type::getInt32Ty(AI.getContext())); 203 Value *Idx[2]; 204 Idx[0] = NullIdx; 205 Idx[1] = NullIdx; 206 Instruction *GEP = 207 GetElementPtrInst::CreateInBounds(New, Idx, New->getName()+".sub"); 208 InsertNewInstBefore(GEP, *It); 209 210 // Now make everything use the getelementptr instead of the original 211 // allocation. 212 return ReplaceInstUsesWith(AI, GEP); 213 } else if (isa<UndefValue>(AI.getArraySize())) { 214 return ReplaceInstUsesWith(AI, Constant::getNullValue(AI.getType())); 215 } 216 } 217 218 if (TD && AI.getAllocatedType()->isSized()) { 219 // If the alignment is 0 (unspecified), assign it the preferred alignment. 220 if (AI.getAlignment() == 0) 221 AI.setAlignment(TD->getPrefTypeAlignment(AI.getAllocatedType())); 222 223 // Move all alloca's of zero byte objects to the entry block and merge them 224 // together. Note that we only do this for alloca's, because malloc should 225 // allocate and return a unique pointer, even for a zero byte allocation. 226 if (TD->getTypeAllocSize(AI.getAllocatedType()) == 0) { 227 // For a zero sized alloca there is no point in doing an array allocation. 228 // This is helpful if the array size is a complicated expression not used 229 // elsewhere. 230 if (AI.isArrayAllocation()) { 231 AI.setOperand(0, ConstantInt::get(AI.getArraySize()->getType(), 1)); 232 return &AI; 233 } 234 235 // Get the first instruction in the entry block. 236 BasicBlock &EntryBlock = AI.getParent()->getParent()->getEntryBlock(); 237 Instruction *FirstInst = EntryBlock.getFirstNonPHIOrDbg(); 238 if (FirstInst != &AI) { 239 // If the entry block doesn't start with a zero-size alloca then move 240 // this one to the start of the entry block. There is no problem with 241 // dominance as the array size was forced to a constant earlier already. 242 AllocaInst *EntryAI = dyn_cast<AllocaInst>(FirstInst); 243 if (!EntryAI || !EntryAI->getAllocatedType()->isSized() || 244 TD->getTypeAllocSize(EntryAI->getAllocatedType()) != 0) { 245 AI.moveBefore(FirstInst); 246 return &AI; 247 } 248 249 // If the alignment of the entry block alloca is 0 (unspecified), 250 // assign it the preferred alignment. 251 if (EntryAI->getAlignment() == 0) 252 EntryAI->setAlignment( 253 TD->getPrefTypeAlignment(EntryAI->getAllocatedType())); 254 // Replace this zero-sized alloca with the one at the start of the entry 255 // block after ensuring that the address will be aligned enough for both 256 // types. 257 unsigned MaxAlign = std::max(EntryAI->getAlignment(), 258 AI.getAlignment()); 259 EntryAI->setAlignment(MaxAlign); 260 if (AI.getType() != EntryAI->getType()) 261 return new BitCastInst(EntryAI, AI.getType()); 262 return ReplaceInstUsesWith(AI, EntryAI); 263 } 264 } 265 } 266 267 if (TD) { 268 // Check to see if this allocation is only modified by a memcpy/memmove from 269 // a constant global whose alignment is equal to or exceeds that of the 270 // allocation. If this is the case, we can change all users to use 271 // the constant global instead. This is commonly produced by the CFE by 272 // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' 273 // is only subsequently read. 274 SmallVector<Instruction *, 4> ToDelete; 275 if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) { 276 if (AI.getAlignment() <= getPointeeAlignment(Copy->getSource(), *TD)) { 277 DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n'); 278 DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); 279 for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) 280 EraseInstFromFunction(*ToDelete[i]); 281 Constant *TheSrc = cast<Constant>(Copy->getSource()); 282 Instruction *NewI 283 = ReplaceInstUsesWith(AI, ConstantExpr::getBitCast(TheSrc, 284 AI.getType())); 285 EraseInstFromFunction(*Copy); 286 ++NumGlobalCopies; 287 return NewI; 288 } 289 } 290 } 291 292 // At last, use the generic allocation site handler to aggressively remove 293 // unused allocas. 294 return visitAllocSite(AI); 295} 296 297 298/// InstCombineLoadCast - Fold 'load (cast P)' -> cast (load P)' when possible. 299static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI, 300 const TargetData *TD) { 301 User *CI = cast<User>(LI.getOperand(0)); 302 Value *CastOp = CI->getOperand(0); 303 304 PointerType *DestTy = cast<PointerType>(CI->getType()); 305 Type *DestPTy = DestTy->getElementType(); 306 if (PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType())) { 307 308 // If the address spaces don't match, don't eliminate the cast. 309 if (DestTy->getAddressSpace() != SrcTy->getAddressSpace()) 310 return 0; 311 312 Type *SrcPTy = SrcTy->getElementType(); 313 314 if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() || 315 DestPTy->isVectorTy()) { 316 // If the source is an array, the code below will not succeed. Check to 317 // see if a trivial 'gep P, 0, 0' will help matters. Only do this for 318 // constants. 319 if (ArrayType *ASrcTy = dyn_cast<ArrayType>(SrcPTy)) 320 if (Constant *CSrc = dyn_cast<Constant>(CastOp)) 321 if (ASrcTy->getNumElements() != 0) { 322 Value *Idxs[2]; 323 Idxs[0] = Constant::getNullValue(Type::getInt32Ty(LI.getContext())); 324 Idxs[1] = Idxs[0]; 325 CastOp = ConstantExpr::getGetElementPtr(CSrc, Idxs); 326 SrcTy = cast<PointerType>(CastOp->getType()); 327 SrcPTy = SrcTy->getElementType(); 328 } 329 330 if (IC.getTargetData() && 331 (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() || 332 SrcPTy->isVectorTy()) && 333 // Do not allow turning this into a load of an integer, which is then 334 // casted to a pointer, this pessimizes pointer analysis a lot. 335 (SrcPTy->isPointerTy() == LI.getType()->isPointerTy()) && 336 IC.getTargetData()->getTypeSizeInBits(SrcPTy) == 337 IC.getTargetData()->getTypeSizeInBits(DestPTy)) { 338 339 // Okay, we are casting from one integer or pointer type to another of 340 // the same size. Instead of casting the pointer before the load, cast 341 // the result of the loaded value. 342 LoadInst *NewLoad = 343 IC.Builder->CreateLoad(CastOp, LI.isVolatile(), CI->getName()); 344 NewLoad->setAlignment(LI.getAlignment()); 345 NewLoad->setAtomic(LI.getOrdering(), LI.getSynchScope()); 346 // Now cast the result of the load. 347 return new BitCastInst(NewLoad, LI.getType()); 348 } 349 } 350 } 351 return 0; 352} 353 354Instruction *InstCombiner::visitLoadInst(LoadInst &LI) { 355 Value *Op = LI.getOperand(0); 356 357 // Attempt to improve the alignment. 358 if (TD) { 359 unsigned KnownAlign = 360 getOrEnforceKnownAlignment(Op, TD->getPrefTypeAlignment(LI.getType()),TD); 361 unsigned LoadAlign = LI.getAlignment(); 362 unsigned EffectiveLoadAlign = LoadAlign != 0 ? LoadAlign : 363 TD->getABITypeAlignment(LI.getType()); 364 365 if (KnownAlign > EffectiveLoadAlign) 366 LI.setAlignment(KnownAlign); 367 else if (LoadAlign == 0) 368 LI.setAlignment(EffectiveLoadAlign); 369 } 370 371 // load (cast X) --> cast (load X) iff safe. 372 if (isa<CastInst>(Op)) 373 if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) 374 return Res; 375 376 // None of the following transforms are legal for volatile/atomic loads. 377 // FIXME: Some of it is okay for atomic loads; needs refactoring. 378 if (!LI.isSimple()) return 0; 379 380 // Do really simple store-to-load forwarding and load CSE, to catch cases 381 // where there are several consecutive memory accesses to the same location, 382 // separated by a few arithmetic operations. 383 BasicBlock::iterator BBI = &LI; 384 if (Value *AvailableVal = FindAvailableLoadedValue(Op, LI.getParent(), BBI,6)) 385 return ReplaceInstUsesWith(LI, AvailableVal); 386 387 // load(gep null, ...) -> unreachable 388 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(Op)) { 389 const Value *GEPI0 = GEPI->getOperand(0); 390 // TODO: Consider a target hook for valid address spaces for this xform. 391 if (isa<ConstantPointerNull>(GEPI0) && GEPI->getPointerAddressSpace() == 0){ 392 // Insert a new store to null instruction before the load to indicate 393 // that this code is not reachable. We do this instead of inserting 394 // an unreachable instruction directly because we cannot modify the 395 // CFG. 396 new StoreInst(UndefValue::get(LI.getType()), 397 Constant::getNullValue(Op->getType()), &LI); 398 return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); 399 } 400 } 401 402 // load null/undef -> unreachable 403 // TODO: Consider a target hook for valid address spaces for this xform. 404 if (isa<UndefValue>(Op) || 405 (isa<ConstantPointerNull>(Op) && LI.getPointerAddressSpace() == 0)) { 406 // Insert a new store to null instruction before the load to indicate that 407 // this code is not reachable. We do this instead of inserting an 408 // unreachable instruction directly because we cannot modify the CFG. 409 new StoreInst(UndefValue::get(LI.getType()), 410 Constant::getNullValue(Op->getType()), &LI); 411 return ReplaceInstUsesWith(LI, UndefValue::get(LI.getType())); 412 } 413 414 // Instcombine load (constantexpr_cast global) -> cast (load global) 415 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op)) 416 if (CE->isCast()) 417 if (Instruction *Res = InstCombineLoadCast(*this, LI, TD)) 418 return Res; 419 420 if (Op->hasOneUse()) { 421 // Change select and PHI nodes to select values instead of addresses: this 422 // helps alias analysis out a lot, allows many others simplifications, and 423 // exposes redundancy in the code. 424 // 425 // Note that we cannot do the transformation unless we know that the 426 // introduced loads cannot trap! Something like this is valid as long as 427 // the condition is always false: load (select bool %C, int* null, int* %G), 428 // but it would not be valid if we transformed it to load from null 429 // unconditionally. 430 // 431 if (SelectInst *SI = dyn_cast<SelectInst>(Op)) { 432 // load (select (Cond, &V1, &V2)) --> select(Cond, load &V1, load &V2). 433 unsigned Align = LI.getAlignment(); 434 if (isSafeToLoadUnconditionally(SI->getOperand(1), SI, Align, TD) && 435 isSafeToLoadUnconditionally(SI->getOperand(2), SI, Align, TD)) { 436 LoadInst *V1 = Builder->CreateLoad(SI->getOperand(1), 437 SI->getOperand(1)->getName()+".val"); 438 LoadInst *V2 = Builder->CreateLoad(SI->getOperand(2), 439 SI->getOperand(2)->getName()+".val"); 440 V1->setAlignment(Align); 441 V2->setAlignment(Align); 442 return SelectInst::Create(SI->getCondition(), V1, V2); 443 } 444 445 // load (select (cond, null, P)) -> load P 446 if (Constant *C = dyn_cast<Constant>(SI->getOperand(1))) 447 if (C->isNullValue()) { 448 LI.setOperand(0, SI->getOperand(2)); 449 return &LI; 450 } 451 452 // load (select (cond, P, null)) -> load P 453 if (Constant *C = dyn_cast<Constant>(SI->getOperand(2))) 454 if (C->isNullValue()) { 455 LI.setOperand(0, SI->getOperand(1)); 456 return &LI; 457 } 458 } 459 } 460 return 0; 461} 462 463/// InstCombineStoreToCast - Fold store V, (cast P) -> store (cast V), P 464/// when possible. This makes it generally easy to do alias analysis and/or 465/// SROA/mem2reg of the memory object. 466static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) { 467 User *CI = cast<User>(SI.getOperand(1)); 468 Value *CastOp = CI->getOperand(0); 469 470 Type *DestPTy = cast<PointerType>(CI->getType())->getElementType(); 471 PointerType *SrcTy = dyn_cast<PointerType>(CastOp->getType()); 472 if (SrcTy == 0) return 0; 473 474 Type *SrcPTy = SrcTy->getElementType(); 475 476 if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy()) 477 return 0; 478 479 /// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep" 480 /// to its first element. This allows us to handle things like: 481 /// store i32 xxx, (bitcast {foo*, float}* %P to i32*) 482 /// on 32-bit hosts. 483 SmallVector<Value*, 4> NewGEPIndices; 484 485 // If the source is an array, the code below will not succeed. Check to 486 // see if a trivial 'gep P, 0, 0' will help matters. Only do this for 487 // constants. 488 if (SrcPTy->isArrayTy() || SrcPTy->isStructTy()) { 489 // Index through pointer. 490 Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext())); 491 NewGEPIndices.push_back(Zero); 492 493 while (1) { 494 if (StructType *STy = dyn_cast<StructType>(SrcPTy)) { 495 if (!STy->getNumElements()) /* Struct can be empty {} */ 496 break; 497 NewGEPIndices.push_back(Zero); 498 SrcPTy = STy->getElementType(0); 499 } else if (ArrayType *ATy = dyn_cast<ArrayType>(SrcPTy)) { 500 NewGEPIndices.push_back(Zero); 501 SrcPTy = ATy->getElementType(); 502 } else { 503 break; 504 } 505 } 506 507 SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace()); 508 } 509 510 if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy()) 511 return 0; 512 513 // If the pointers point into different address spaces or if they point to 514 // values with different sizes, we can't do the transformation. 515 if (!IC.getTargetData() || 516 SrcTy->getAddressSpace() != 517 cast<PointerType>(CI->getType())->getAddressSpace() || 518 IC.getTargetData()->getTypeSizeInBits(SrcPTy) != 519 IC.getTargetData()->getTypeSizeInBits(DestPTy)) 520 return 0; 521 522 // Okay, we are casting from one integer or pointer type to another of 523 // the same size. Instead of casting the pointer before 524 // the store, cast the value to be stored. 525 Value *NewCast; 526 Value *SIOp0 = SI.getOperand(0); 527 Instruction::CastOps opcode = Instruction::BitCast; 528 Type* CastSrcTy = SIOp0->getType(); 529 Type* CastDstTy = SrcPTy; 530 if (CastDstTy->isPointerTy()) { 531 if (CastSrcTy->isIntegerTy()) 532 opcode = Instruction::IntToPtr; 533 } else if (CastDstTy->isIntegerTy()) { 534 if (SIOp0->getType()->isPointerTy()) 535 opcode = Instruction::PtrToInt; 536 } 537 538 // SIOp0 is a pointer to aggregate and this is a store to the first field, 539 // emit a GEP to index into its first field. 540 if (!NewGEPIndices.empty()) 541 CastOp = IC.Builder->CreateInBoundsGEP(CastOp, NewGEPIndices); 542 543 NewCast = IC.Builder->CreateCast(opcode, SIOp0, CastDstTy, 544 SIOp0->getName()+".c"); 545 SI.setOperand(0, NewCast); 546 SI.setOperand(1, CastOp); 547 return &SI; 548} 549 550/// equivalentAddressValues - Test if A and B will obviously have the same 551/// value. This includes recognizing that %t0 and %t1 will have the same 552/// value in code like this: 553/// %t0 = getelementptr \@a, 0, 3 554/// store i32 0, i32* %t0 555/// %t1 = getelementptr \@a, 0, 3 556/// %t2 = load i32* %t1 557/// 558static bool equivalentAddressValues(Value *A, Value *B) { 559 // Test if the values are trivially equivalent. 560 if (A == B) return true; 561 562 // Test if the values come form identical arithmetic instructions. 563 // This uses isIdenticalToWhenDefined instead of isIdenticalTo because 564 // its only used to compare two uses within the same basic block, which 565 // means that they'll always either have the same value or one of them 566 // will have an undefined value. 567 if (isa<BinaryOperator>(A) || 568 isa<CastInst>(A) || 569 isa<PHINode>(A) || 570 isa<GetElementPtrInst>(A)) 571 if (Instruction *BI = dyn_cast<Instruction>(B)) 572 if (cast<Instruction>(A)->isIdenticalToWhenDefined(BI)) 573 return true; 574 575 // Otherwise they may not be equivalent. 576 return false; 577} 578 579Instruction *InstCombiner::visitStoreInst(StoreInst &SI) { 580 Value *Val = SI.getOperand(0); 581 Value *Ptr = SI.getOperand(1); 582 583 // Attempt to improve the alignment. 584 if (TD) { 585 unsigned KnownAlign = 586 getOrEnforceKnownAlignment(Ptr, TD->getPrefTypeAlignment(Val->getType()), 587 TD); 588 unsigned StoreAlign = SI.getAlignment(); 589 unsigned EffectiveStoreAlign = StoreAlign != 0 ? StoreAlign : 590 TD->getABITypeAlignment(Val->getType()); 591 592 if (KnownAlign > EffectiveStoreAlign) 593 SI.setAlignment(KnownAlign); 594 else if (StoreAlign == 0) 595 SI.setAlignment(EffectiveStoreAlign); 596 } 597 598 // Don't hack volatile/atomic stores. 599 // FIXME: Some bits are legal for atomic stores; needs refactoring. 600 if (!SI.isSimple()) return 0; 601 602 // If the RHS is an alloca with a single use, zapify the store, making the 603 // alloca dead. 604 if (Ptr->hasOneUse()) { 605 if (isa<AllocaInst>(Ptr)) 606 return EraseInstFromFunction(SI); 607 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr)) { 608 if (isa<AllocaInst>(GEP->getOperand(0))) { 609 if (GEP->getOperand(0)->hasOneUse()) 610 return EraseInstFromFunction(SI); 611 } 612 } 613 } 614 615 // Do really simple DSE, to catch cases where there are several consecutive 616 // stores to the same location, separated by a few arithmetic operations. This 617 // situation often occurs with bitfield accesses. 618 BasicBlock::iterator BBI = &SI; 619 for (unsigned ScanInsts = 6; BBI != SI.getParent()->begin() && ScanInsts; 620 --ScanInsts) { 621 --BBI; 622 // Don't count debug info directives, lest they affect codegen, 623 // and we skip pointer-to-pointer bitcasts, which are NOPs. 624 if (isa<DbgInfoIntrinsic>(BBI) || 625 (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { 626 ScanInsts++; 627 continue; 628 } 629 630 if (StoreInst *PrevSI = dyn_cast<StoreInst>(BBI)) { 631 // Prev store isn't volatile, and stores to the same location? 632 if (PrevSI->isSimple() && equivalentAddressValues(PrevSI->getOperand(1), 633 SI.getOperand(1))) { 634 ++NumDeadStore; 635 ++BBI; 636 EraseInstFromFunction(*PrevSI); 637 continue; 638 } 639 break; 640 } 641 642 // If this is a load, we have to stop. However, if the loaded value is from 643 // the pointer we're loading and is producing the pointer we're storing, 644 // then *this* store is dead (X = load P; store X -> P). 645 if (LoadInst *LI = dyn_cast<LoadInst>(BBI)) { 646 if (LI == Val && equivalentAddressValues(LI->getOperand(0), Ptr) && 647 LI->isSimple()) 648 return EraseInstFromFunction(SI); 649 650 // Otherwise, this is a load from some other location. Stores before it 651 // may not be dead. 652 break; 653 } 654 655 // Don't skip over loads or things that can modify memory. 656 if (BBI->mayWriteToMemory() || BBI->mayReadFromMemory()) 657 break; 658 } 659 660 // store X, null -> turns into 'unreachable' in SimplifyCFG 661 if (isa<ConstantPointerNull>(Ptr) && SI.getPointerAddressSpace() == 0) { 662 if (!isa<UndefValue>(Val)) { 663 SI.setOperand(0, UndefValue::get(Val->getType())); 664 if (Instruction *U = dyn_cast<Instruction>(Val)) 665 Worklist.Add(U); // Dropped a use. 666 } 667 return 0; // Do not modify these! 668 } 669 670 // store undef, Ptr -> noop 671 if (isa<UndefValue>(Val)) 672 return EraseInstFromFunction(SI); 673 674 // If the pointer destination is a cast, see if we can fold the cast into the 675 // source instead. 676 if (isa<CastInst>(Ptr)) 677 if (Instruction *Res = InstCombineStoreToCast(*this, SI)) 678 return Res; 679 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) 680 if (CE->isCast()) 681 if (Instruction *Res = InstCombineStoreToCast(*this, SI)) 682 return Res; 683 684 685 // If this store is the last instruction in the basic block (possibly 686 // excepting debug info instructions), and if the block ends with an 687 // unconditional branch, try to move it to the successor block. 688 BBI = &SI; 689 do { 690 ++BBI; 691 } while (isa<DbgInfoIntrinsic>(BBI) || 692 (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())); 693 if (BranchInst *BI = dyn_cast<BranchInst>(BBI)) 694 if (BI->isUnconditional()) 695 if (SimplifyStoreAtEndOfBlock(SI)) 696 return 0; // xform done! 697 698 return 0; 699} 700 701/// SimplifyStoreAtEndOfBlock - Turn things like: 702/// if () { *P = v1; } else { *P = v2 } 703/// into a phi node with a store in the successor. 704/// 705/// Simplify things like: 706/// *P = v1; if () { *P = v2; } 707/// into a phi node with a store in the successor. 708/// 709bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) { 710 BasicBlock *StoreBB = SI.getParent(); 711 712 // Check to see if the successor block has exactly two incoming edges. If 713 // so, see if the other predecessor contains a store to the same location. 714 // if so, insert a PHI node (if needed) and move the stores down. 715 BasicBlock *DestBB = StoreBB->getTerminator()->getSuccessor(0); 716 717 // Determine whether Dest has exactly two predecessors and, if so, compute 718 // the other predecessor. 719 pred_iterator PI = pred_begin(DestBB); 720 BasicBlock *P = *PI; 721 BasicBlock *OtherBB = 0; 722 723 if (P != StoreBB) 724 OtherBB = P; 725 726 if (++PI == pred_end(DestBB)) 727 return false; 728 729 P = *PI; 730 if (P != StoreBB) { 731 if (OtherBB) 732 return false; 733 OtherBB = P; 734 } 735 if (++PI != pred_end(DestBB)) 736 return false; 737 738 // Bail out if all the relevant blocks aren't distinct (this can happen, 739 // for example, if SI is in an infinite loop) 740 if (StoreBB == DestBB || OtherBB == DestBB) 741 return false; 742 743 // Verify that the other block ends in a branch and is not otherwise empty. 744 BasicBlock::iterator BBI = OtherBB->getTerminator(); 745 BranchInst *OtherBr = dyn_cast<BranchInst>(BBI); 746 if (!OtherBr || BBI == OtherBB->begin()) 747 return false; 748 749 // If the other block ends in an unconditional branch, check for the 'if then 750 // else' case. there is an instruction before the branch. 751 StoreInst *OtherStore = 0; 752 if (OtherBr->isUnconditional()) { 753 --BBI; 754 // Skip over debugging info. 755 while (isa<DbgInfoIntrinsic>(BBI) || 756 (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) { 757 if (BBI==OtherBB->begin()) 758 return false; 759 --BBI; 760 } 761 // If this isn't a store, isn't a store to the same location, or is not the 762 // right kind of store, bail out. 763 OtherStore = dyn_cast<StoreInst>(BBI); 764 if (!OtherStore || OtherStore->getOperand(1) != SI.getOperand(1) || 765 !SI.isSameOperationAs(OtherStore)) 766 return false; 767 } else { 768 // Otherwise, the other block ended with a conditional branch. If one of the 769 // destinations is StoreBB, then we have the if/then case. 770 if (OtherBr->getSuccessor(0) != StoreBB && 771 OtherBr->getSuccessor(1) != StoreBB) 772 return false; 773 774 // Okay, we know that OtherBr now goes to Dest and StoreBB, so this is an 775 // if/then triangle. See if there is a store to the same ptr as SI that 776 // lives in OtherBB. 777 for (;; --BBI) { 778 // Check to see if we find the matching store. 779 if ((OtherStore = dyn_cast<StoreInst>(BBI))) { 780 if (OtherStore->getOperand(1) != SI.getOperand(1) || 781 !SI.isSameOperationAs(OtherStore)) 782 return false; 783 break; 784 } 785 // If we find something that may be using or overwriting the stored 786 // value, or if we run out of instructions, we can't do the xform. 787 if (BBI->mayReadFromMemory() || BBI->mayWriteToMemory() || 788 BBI == OtherBB->begin()) 789 return false; 790 } 791 792 // In order to eliminate the store in OtherBr, we have to 793 // make sure nothing reads or overwrites the stored value in 794 // StoreBB. 795 for (BasicBlock::iterator I = StoreBB->begin(); &*I != &SI; ++I) { 796 // FIXME: This should really be AA driven. 797 if (I->mayReadFromMemory() || I->mayWriteToMemory()) 798 return false; 799 } 800 } 801 802 // Insert a PHI node now if we need it. 803 Value *MergedVal = OtherStore->getOperand(0); 804 if (MergedVal != SI.getOperand(0)) { 805 PHINode *PN = PHINode::Create(MergedVal->getType(), 2, "storemerge"); 806 PN->addIncoming(SI.getOperand(0), SI.getParent()); 807 PN->addIncoming(OtherStore->getOperand(0), OtherBB); 808 MergedVal = InsertNewInstBefore(PN, DestBB->front()); 809 } 810 811 // Advance to a place where it is safe to insert the new store and 812 // insert it. 813 BBI = DestBB->getFirstInsertionPt(); 814 StoreInst *NewSI = new StoreInst(MergedVal, SI.getOperand(1), 815 SI.isVolatile(), 816 SI.getAlignment(), 817 SI.getOrdering(), 818 SI.getSynchScope()); 819 InsertNewInstBefore(NewSI, *BBI); 820 NewSI->setDebugLoc(OtherStore->getDebugLoc()); 821 822 // Nuke the old stores. 823 EraseInstFromFunction(SI); 824 EraseInstFromFunction(*OtherStore); 825 return true; 826} 827