ScalarReplAggregates.cpp revision 226890
1193323Sed//===- ScalarReplAggregates.cpp - Scalar Replacement of Aggregates --------===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This transformation implements the well known scalar replacement of 11193323Sed// aggregates transformation. This xform breaks up alloca instructions of 12193323Sed// aggregate type (structure or array) into individual alloca instructions for 13193323Sed// each member (if possible). Then, if possible, it transforms the individual 14193323Sed// alloca instructions into nice clean scalar SSA form. 15193323Sed// 16193323Sed// This combines a simple SRoA algorithm with the Mem2Reg algorithm because 17193323Sed// often interact, especially for C++ programs. As such, iterating between 18193323Sed// SRoA, then Mem2Reg until we run out of things to promote works well. 19193323Sed// 20193323Sed//===----------------------------------------------------------------------===// 21193323Sed 22193323Sed#define DEBUG_TYPE "scalarrepl" 23193323Sed#include "llvm/Transforms/Scalar.h" 24193323Sed#include "llvm/Constants.h" 25193323Sed#include "llvm/DerivedTypes.h" 26193323Sed#include "llvm/Function.h" 27193323Sed#include "llvm/GlobalVariable.h" 28193323Sed#include "llvm/Instructions.h" 29193323Sed#include "llvm/IntrinsicInst.h" 30195340Sed#include "llvm/LLVMContext.h" 31212904Sdim#include "llvm/Module.h" 32193323Sed#include "llvm/Pass.h" 33224145Sdim#include "llvm/Analysis/DebugInfo.h" 34223017Sdim#include "llvm/Analysis/DIBuilder.h" 35193323Sed#include "llvm/Analysis/Dominators.h" 36218893Sdim#include "llvm/Analysis/Loads.h" 37218893Sdim#include "llvm/Analysis/ValueTracking.h" 38193323Sed#include "llvm/Target/TargetData.h" 39193323Sed#include "llvm/Transforms/Utils/PromoteMemToReg.h" 40193323Sed#include "llvm/Transforms/Utils/Local.h" 41218893Sdim#include "llvm/Transforms/Utils/SSAUpdater.h" 42218893Sdim#include "llvm/Support/CallSite.h" 43193323Sed#include "llvm/Support/Debug.h" 44198090Srdivacky#include "llvm/Support/ErrorHandling.h" 45193323Sed#include "llvm/Support/GetElementPtrTypeIterator.h" 46193323Sed#include "llvm/Support/IRBuilder.h" 47193323Sed#include "llvm/Support/MathExtras.h" 48198090Srdivacky#include "llvm/Support/raw_ostream.h" 49218893Sdim#include "llvm/ADT/SetVector.h" 50193323Sed#include "llvm/ADT/SmallVector.h" 51193323Sed#include "llvm/ADT/Statistic.h" 52193323Sedusing namespace llvm; 53193323Sed 54193323SedSTATISTIC(NumReplaced, "Number of allocas broken up"); 55193323SedSTATISTIC(NumPromoted, "Number of allocas promoted"); 56218893SdimSTATISTIC(NumAdjusted, "Number of scalar allocas adjusted to allow promotion"); 57193323SedSTATISTIC(NumConverted, "Number of aggregates converted to scalar"); 58193323SedSTATISTIC(NumGlobals, "Number of allocas copied from constant global"); 59193323Sed 60193323Sednamespace { 61198090Srdivacky struct SROA : public FunctionPass { 62218893Sdim SROA(int T, bool hasDT, char &ID) 63218893Sdim : FunctionPass(ID), HasDomTree(hasDT) { 64193323Sed if (T == -1) 65193323Sed SRThreshold = 128; 66193323Sed else 67193323Sed SRThreshold = T; 68193323Sed } 69193323Sed 70193323Sed bool runOnFunction(Function &F); 71193323Sed 72193323Sed bool performScalarRepl(Function &F); 73193323Sed bool performPromotion(Function &F); 74193323Sed 75193323Sed private: 76218893Sdim bool HasDomTree; 77193323Sed TargetData *TD; 78218893Sdim 79201360Srdivacky /// DeadInsts - Keep track of instructions we have made dead, so that 80201360Srdivacky /// we can remove them after we are done working. 81201360Srdivacky SmallVector<Value*, 32> DeadInsts; 82201360Srdivacky 83193323Sed /// AllocaInfo - When analyzing uses of an alloca instruction, this captures 84193323Sed /// information about the uses. All these fields are initialized to false 85193323Sed /// and set to true when something is learned. 86193323Sed struct AllocaInfo { 87218893Sdim /// The alloca to promote. 88218893Sdim AllocaInst *AI; 89218893Sdim 90218893Sdim /// CheckedPHIs - This is a set of verified PHI nodes, to prevent infinite 91218893Sdim /// looping and avoid redundant work. 92218893Sdim SmallPtrSet<PHINode*, 8> CheckedPHIs; 93218893Sdim 94193323Sed /// isUnsafe - This is set to true if the alloca cannot be SROA'd. 95193323Sed bool isUnsafe : 1; 96218893Sdim 97193323Sed /// isMemCpySrc - This is true if this aggregate is memcpy'd from. 98193323Sed bool isMemCpySrc : 1; 99193323Sed 100193323Sed /// isMemCpyDst - This is true if this aggregate is memcpy'd into. 101193323Sed bool isMemCpyDst : 1; 102193323Sed 103218893Sdim /// hasSubelementAccess - This is true if a subelement of the alloca is 104218893Sdim /// ever accessed, or false if the alloca is only accessed with mem 105218893Sdim /// intrinsics or load/store that only access the entire alloca at once. 106218893Sdim bool hasSubelementAccess : 1; 107218893Sdim 108218893Sdim /// hasALoadOrStore - This is true if there are any loads or stores to it. 109218893Sdim /// The alloca may just be accessed with memcpy, for example, which would 110218893Sdim /// not set this. 111218893Sdim bool hasALoadOrStore : 1; 112218893Sdim 113218893Sdim explicit AllocaInfo(AllocaInst *ai) 114218893Sdim : AI(ai), isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false), 115218893Sdim hasSubelementAccess(false), hasALoadOrStore(false) {} 116193323Sed }; 117218893Sdim 118193323Sed unsigned SRThreshold; 119193323Sed 120218893Sdim void MarkUnsafe(AllocaInfo &I, Instruction *User) { 121218893Sdim I.isUnsafe = true; 122218893Sdim DEBUG(dbgs() << " Transformation preventing inst: " << *User << '\n'); 123218893Sdim } 124193323Sed 125202878Srdivacky bool isSafeAllocaToScalarRepl(AllocaInst *AI); 126193323Sed 127218893Sdim void isSafeForScalarRepl(Instruction *I, uint64_t Offset, AllocaInfo &Info); 128218893Sdim void isSafePHISelectUseForScalarRepl(Instruction *User, uint64_t Offset, 129218893Sdim AllocaInfo &Info); 130218893Sdim void isSafeGEP(GetElementPtrInst *GEPI, uint64_t &Offset, AllocaInfo &Info); 131218893Sdim void isSafeMemAccess(uint64_t Offset, uint64_t MemSize, 132226890Sdim Type *MemOpType, bool isStore, AllocaInfo &Info, 133218893Sdim Instruction *TheAccess, bool AllowWholeAccess); 134226890Sdim bool TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size); 135226890Sdim uint64_t FindElementAndOffset(Type *&T, uint64_t &Offset, 136226890Sdim Type *&IdxTy); 137218893Sdim 138218893Sdim void DoScalarReplacement(AllocaInst *AI, 139198892Srdivacky std::vector<AllocaInst*> &WorkList); 140201360Srdivacky void DeleteDeadInstructions(); 141218893Sdim 142201360Srdivacky void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, 143201360Srdivacky SmallVector<AllocaInst*, 32> &NewElts); 144201360Srdivacky void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, 145201360Srdivacky SmallVector<AllocaInst*, 32> &NewElts); 146201360Srdivacky void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, 147201360Srdivacky SmallVector<AllocaInst*, 32> &NewElts); 148226890Sdim void RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI, 149226890Sdim uint64_t Offset, 150226890Sdim SmallVector<AllocaInst*, 32> &NewElts); 151201360Srdivacky void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, 152198892Srdivacky AllocaInst *AI, 153193323Sed SmallVector<AllocaInst*, 32> &NewElts); 154198892Srdivacky void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, 155193323Sed SmallVector<AllocaInst*, 32> &NewElts); 156198892Srdivacky void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, 157193323Sed SmallVector<AllocaInst*, 32> &NewElts); 158218893Sdim 159224145Sdim static MemTransferInst *isOnlyCopiedFromConstantGlobal( 160224145Sdim AllocaInst *AI, SmallVector<Instruction*, 4> &ToDelete); 161193323Sed }; 162218893Sdim 163218893Sdim // SROA_DT - SROA that uses DominatorTree. 164218893Sdim struct SROA_DT : public SROA { 165218893Sdim static char ID; 166218893Sdim public: 167218893Sdim SROA_DT(int T = -1) : SROA(T, true, ID) { 168218893Sdim initializeSROA_DTPass(*PassRegistry::getPassRegistry()); 169218893Sdim } 170218893Sdim 171218893Sdim // getAnalysisUsage - This pass does not require any passes, but we know it 172218893Sdim // will not alter the CFG, so say so. 173218893Sdim virtual void getAnalysisUsage(AnalysisUsage &AU) const { 174218893Sdim AU.addRequired<DominatorTree>(); 175218893Sdim AU.setPreservesCFG(); 176218893Sdim } 177218893Sdim }; 178218893Sdim 179218893Sdim // SROA_SSAUp - SROA that uses SSAUpdater. 180218893Sdim struct SROA_SSAUp : public SROA { 181218893Sdim static char ID; 182218893Sdim public: 183218893Sdim SROA_SSAUp(int T = -1) : SROA(T, false, ID) { 184218893Sdim initializeSROA_SSAUpPass(*PassRegistry::getPassRegistry()); 185218893Sdim } 186218893Sdim 187218893Sdim // getAnalysisUsage - This pass does not require any passes, but we know it 188218893Sdim // will not alter the CFG, so say so. 189218893Sdim virtual void getAnalysisUsage(AnalysisUsage &AU) const { 190218893Sdim AU.setPreservesCFG(); 191218893Sdim } 192218893Sdim }; 193218893Sdim 194193323Sed} 195193323Sed 196218893Sdimchar SROA_DT::ID = 0; 197218893Sdimchar SROA_SSAUp::ID = 0; 198193323Sed 199218893SdimINITIALIZE_PASS_BEGIN(SROA_DT, "scalarrepl", 200218893Sdim "Scalar Replacement of Aggregates (DT)", false, false) 201218893SdimINITIALIZE_PASS_DEPENDENCY(DominatorTree) 202218893SdimINITIALIZE_PASS_END(SROA_DT, "scalarrepl", 203218893Sdim "Scalar Replacement of Aggregates (DT)", false, false) 204218893Sdim 205218893SdimINITIALIZE_PASS_BEGIN(SROA_SSAUp, "scalarrepl-ssa", 206218893Sdim "Scalar Replacement of Aggregates (SSAUp)", false, false) 207218893SdimINITIALIZE_PASS_END(SROA_SSAUp, "scalarrepl-ssa", 208218893Sdim "Scalar Replacement of Aggregates (SSAUp)", false, false) 209218893Sdim 210193323Sed// Public interface to the ScalarReplAggregates pass 211218893SdimFunctionPass *llvm::createScalarReplAggregatesPass(int Threshold, 212218893Sdim bool UseDomTree) { 213218893Sdim if (UseDomTree) 214218893Sdim return new SROA_DT(Threshold); 215218893Sdim return new SROA_SSAUp(Threshold); 216193323Sed} 217193323Sed 218193323Sed 219207618Srdivacky//===----------------------------------------------------------------------===// 220207618Srdivacky// Convert To Scalar Optimization. 221207618Srdivacky//===----------------------------------------------------------------------===// 222207618Srdivacky 223207618Srdivackynamespace { 224207618Srdivacky/// ConvertToScalarInfo - This class implements the "Convert To Scalar" 225207618Srdivacky/// optimization, which scans the uses of an alloca and determines if it can 226207618Srdivacky/// rewrite it in terms of a single new alloca that can be mem2reg'd. 227207618Srdivackyclass ConvertToScalarInfo { 228221345Sdim /// AllocaSize - The size of the alloca being considered in bytes. 229207618Srdivacky unsigned AllocaSize; 230207618Srdivacky const TargetData &TD; 231218893Sdim 232207618Srdivacky /// IsNotTrivial - This is set to true if there is some access to the object 233207618Srdivacky /// which means that mem2reg can't promote it. 234207618Srdivacky bool IsNotTrivial; 235218893Sdim 236224145Sdim /// ScalarKind - Tracks the kind of alloca being considered for promotion, 237224145Sdim /// computed based on the uses of the alloca rather than the LLVM type system. 238224145Sdim enum { 239224145Sdim Unknown, 240224145Sdim 241224145Sdim // Accesses via GEPs that are consistent with element access of a vector 242224145Sdim // type. This will not be converted into a vector unless there is a later 243224145Sdim // access using an actual vector type. 244224145Sdim ImplicitVector, 245224145Sdim 246224145Sdim // Accesses via vector operations and GEPs that are consistent with the 247224145Sdim // layout of a vector type. 248224145Sdim Vector, 249224145Sdim 250224145Sdim // An integer bag-of-bits with bitwise operations for insertion and 251224145Sdim // extraction. Any combination of types can be converted into this kind 252224145Sdim // of scalar. 253224145Sdim Integer 254224145Sdim } ScalarKind; 255224145Sdim 256207618Srdivacky /// VectorTy - This tracks the type that we should promote the vector to if 257207618Srdivacky /// it is possible to turn it into a vector. This starts out null, and if it 258207618Srdivacky /// isn't possible to turn into a vector type, it gets set to VoidTy. 259226890Sdim VectorType *VectorTy; 260218893Sdim 261221345Sdim /// HadNonMemTransferAccess - True if there is at least one access to the 262221345Sdim /// alloca that is not a MemTransferInst. We don't want to turn structs into 263221345Sdim /// large integers unless there is some potential for optimization. 264221345Sdim bool HadNonMemTransferAccess; 265221345Sdim 266207618Srdivackypublic: 267207618Srdivacky explicit ConvertToScalarInfo(unsigned Size, const TargetData &td) 268224145Sdim : AllocaSize(Size), TD(td), IsNotTrivial(false), ScalarKind(Unknown), 269224145Sdim VectorTy(0), HadNonMemTransferAccess(false) { } 270218893Sdim 271207618Srdivacky AllocaInst *TryConvert(AllocaInst *AI); 272218893Sdim 273207618Srdivackyprivate: 274207618Srdivacky bool CanConvertToScalar(Value *V, uint64_t Offset); 275226890Sdim void MergeInTypeForLoadOrStore(Type *In, uint64_t Offset); 276226890Sdim bool MergeInVectorType(VectorType *VInTy, uint64_t Offset); 277207618Srdivacky void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset); 278218893Sdim 279226890Sdim Value *ConvertScalar_ExtractValue(Value *NV, Type *ToType, 280207618Srdivacky uint64_t Offset, IRBuilder<> &Builder); 281207618Srdivacky Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal, 282207618Srdivacky uint64_t Offset, IRBuilder<> &Builder); 283207618Srdivacky}; 284207618Srdivacky} // end anonymous namespace. 285207618Srdivacky 286212904Sdim 287207618Srdivacky/// TryConvert - Analyze the specified alloca, and if it is safe to do so, 288207618Srdivacky/// rewrite it to be a new alloca which is mem2reg'able. This returns the new 289207618Srdivacky/// alloca if possible or null if not. 290207618SrdivackyAllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) { 291207618Srdivacky // If we can't convert this scalar, or if mem2reg can trivially do it, bail 292207618Srdivacky // out. 293207618Srdivacky if (!CanConvertToScalar(AI, 0) || !IsNotTrivial) 294207618Srdivacky return 0; 295218893Sdim 296224145Sdim // If an alloca has only memset / memcpy uses, it may still have an Unknown 297224145Sdim // ScalarKind. Treat it as an Integer below. 298224145Sdim if (ScalarKind == Unknown) 299224145Sdim ScalarKind = Integer; 300224145Sdim 301224145Sdim if (ScalarKind == Vector && VectorTy->getBitWidth() != AllocaSize * 8) 302224145Sdim ScalarKind = Integer; 303224145Sdim 304207618Srdivacky // If we were able to find a vector type that can handle this with 305207618Srdivacky // insert/extract elements, and if there was at least one use that had 306207618Srdivacky // a vector type, promote this to a vector. We don't want to promote 307207618Srdivacky // random stuff that doesn't use vectors (e.g. <9 x double>) because then 308207618Srdivacky // we just get a lot of insert/extracts. If at least one vector is 309207618Srdivacky // involved, then we probably really do have a union of vector/array. 310226890Sdim Type *NewTy; 311224145Sdim if (ScalarKind == Vector) { 312224145Sdim assert(VectorTy && "Missing type for vector scalar."); 313207618Srdivacky DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = " 314207618Srdivacky << *VectorTy << '\n'); 315207618Srdivacky NewTy = VectorTy; // Use the vector type. 316207618Srdivacky } else { 317221345Sdim unsigned BitWidth = AllocaSize * 8; 318224145Sdim if ((ScalarKind == ImplicitVector || ScalarKind == Integer) && 319224145Sdim !HadNonMemTransferAccess && !TD.fitsInLegalInteger(BitWidth)) 320221345Sdim return 0; 321221345Sdim 322207618Srdivacky DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n"); 323207618Srdivacky // Create and insert the integer alloca. 324221345Sdim NewTy = IntegerType::get(AI->getContext(), BitWidth); 325207618Srdivacky } 326207618Srdivacky AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin()); 327207618Srdivacky ConvertUsesToScalar(AI, NewAI, 0); 328207618Srdivacky return NewAI; 329207618Srdivacky} 330207618Srdivacky 331224145Sdim/// MergeInTypeForLoadOrStore - Add the 'In' type to the accumulated vector type 332224145Sdim/// (VectorTy) so far at the offset specified by Offset (which is specified in 333224145Sdim/// bytes). 334207618Srdivacky/// 335226890Sdim/// There are two cases we handle here: 336207618Srdivacky/// 1) A union of vector types of the same size and potentially its elements. 337207618Srdivacky/// Here we turn element accesses into insert/extract element operations. 338207618Srdivacky/// This promotes a <4 x float> with a store of float to the third element 339207618Srdivacky/// into a <4 x float> that uses insert element. 340226890Sdim/// 2) A fully general blob of memory, which we turn into some (potentially 341207618Srdivacky/// large) integer type with extract and insert operations where the loads 342207618Srdivacky/// and stores would mutate the memory. We mark this by setting VectorTy 343207618Srdivacky/// to VoidTy. 344226890Sdimvoid ConvertToScalarInfo::MergeInTypeForLoadOrStore(Type *In, 345224145Sdim uint64_t Offset) { 346207618Srdivacky // If we already decided to turn this into a blob of integer memory, there is 347207618Srdivacky // nothing to be done. 348224145Sdim if (ScalarKind == Integer) 349207618Srdivacky return; 350218893Sdim 351207618Srdivacky // If this could be contributing to a vector, analyze it. 352207618Srdivacky 353207618Srdivacky // If the In type is a vector that is the same size as the alloca, see if it 354207618Srdivacky // matches the existing VecTy. 355226890Sdim if (VectorType *VInTy = dyn_cast<VectorType>(In)) { 356221345Sdim if (MergeInVectorType(VInTy, Offset)) 357207618Srdivacky return; 358207618Srdivacky } else if (In->isFloatTy() || In->isDoubleTy() || 359207618Srdivacky (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 && 360207618Srdivacky isPowerOf2_32(In->getPrimitiveSizeInBits()))) { 361221345Sdim // Full width accesses can be ignored, because they can always be turned 362221345Sdim // into bitcasts. 363221345Sdim unsigned EltSize = In->getPrimitiveSizeInBits()/8; 364224145Sdim if (EltSize == AllocaSize) 365221345Sdim return; 366221345Sdim 367207618Srdivacky // If we're accessing something that could be an element of a vector, see 368207618Srdivacky // if the implied vector agrees with what we already have and if Offset is 369207618Srdivacky // compatible with it. 370223017Sdim if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 && 371226890Sdim (!VectorTy || EltSize == VectorTy->getElementType() 372226890Sdim ->getPrimitiveSizeInBits()/8)) { 373221345Sdim if (!VectorTy) { 374224145Sdim ScalarKind = ImplicitVector; 375207618Srdivacky VectorTy = VectorType::get(In, AllocaSize/EltSize); 376221345Sdim } 377226890Sdim return; 378207618Srdivacky } 379207618Srdivacky } 380218893Sdim 381207618Srdivacky // Otherwise, we have a case that we can't handle with an optimized vector 382207618Srdivacky // form. We can still turn this into a large integer. 383224145Sdim ScalarKind = Integer; 384207618Srdivacky} 385207618Srdivacky 386224145Sdim/// MergeInVectorType - Handles the vector case of MergeInTypeForLoadOrStore, 387224145Sdim/// returning true if the type was successfully merged and false otherwise. 388226890Sdimbool ConvertToScalarInfo::MergeInVectorType(VectorType *VInTy, 389221345Sdim uint64_t Offset) { 390226890Sdim if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) { 391226890Sdim // If we're storing/loading a vector of the right size, allow it as a 392226890Sdim // vector. If this the first vector we see, remember the type so that 393226890Sdim // we know the element size. If this is a subsequent access, ignore it 394226890Sdim // even if it is a differing type but the same size. Worst case we can 395226890Sdim // bitcast the resultant vectors. 396226890Sdim if (!VectorTy) 397226890Sdim VectorTy = VInTy; 398224145Sdim ScalarKind = Vector; 399221345Sdim return true; 400221345Sdim } 401221345Sdim 402226890Sdim return false; 403221345Sdim} 404221345Sdim 405207618Srdivacky/// CanConvertToScalar - V is a pointer. If we can convert the pointee and all 406207618Srdivacky/// its accesses to a single vector type, return true and set VecTy to 407207618Srdivacky/// the new type. If we could convert the alloca into a single promotable 408207618Srdivacky/// integer, return true but set VecTy to VoidTy. Further, if the use is not a 409207618Srdivacky/// completely trivial use that mem2reg could promote, set IsNotTrivial. Offset 410207618Srdivacky/// is the current offset from the base of the alloca being analyzed. 411207618Srdivacky/// 412207618Srdivacky/// If we see at least one access to the value that is as a vector type, set the 413207618Srdivacky/// SawVec flag. 414207618Srdivackybool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) { 415207618Srdivacky for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { 416207618Srdivacky Instruction *User = cast<Instruction>(*UI); 417218893Sdim 418207618Srdivacky if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 419207618Srdivacky // Don't break volatile loads. 420226890Sdim if (!LI->isSimple()) 421207618Srdivacky return false; 422218893Sdim // Don't touch MMX operations. 423218893Sdim if (LI->getType()->isX86_MMXTy()) 424218893Sdim return false; 425221345Sdim HadNonMemTransferAccess = true; 426224145Sdim MergeInTypeForLoadOrStore(LI->getType(), Offset); 427207618Srdivacky continue; 428207618Srdivacky } 429218893Sdim 430207618Srdivacky if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 431207618Srdivacky // Storing the pointer, not into the value? 432226890Sdim if (SI->getOperand(0) == V || !SI->isSimple()) return false; 433218893Sdim // Don't touch MMX operations. 434218893Sdim if (SI->getOperand(0)->getType()->isX86_MMXTy()) 435218893Sdim return false; 436221345Sdim HadNonMemTransferAccess = true; 437224145Sdim MergeInTypeForLoadOrStore(SI->getOperand(0)->getType(), Offset); 438207618Srdivacky continue; 439207618Srdivacky } 440218893Sdim 441207618Srdivacky if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) { 442226890Sdim if (!onlyUsedByLifetimeMarkers(BCI)) 443226890Sdim IsNotTrivial = true; // Can't be mem2reg'd. 444207618Srdivacky if (!CanConvertToScalar(BCI, Offset)) 445207618Srdivacky return false; 446207618Srdivacky continue; 447207618Srdivacky } 448207618Srdivacky 449207618Srdivacky if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) { 450207618Srdivacky // If this is a GEP with a variable indices, we can't handle it. 451207618Srdivacky if (!GEP->hasAllConstantIndices()) 452207618Srdivacky return false; 453218893Sdim 454207618Srdivacky // Compute the offset that this GEP adds to the pointer. 455207618Srdivacky SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); 456207618Srdivacky uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(), 457226890Sdim Indices); 458207618Srdivacky // See if all uses can be converted. 459207618Srdivacky if (!CanConvertToScalar(GEP, Offset+GEPOffset)) 460207618Srdivacky return false; 461207618Srdivacky IsNotTrivial = true; // Can't be mem2reg'd. 462221345Sdim HadNonMemTransferAccess = true; 463207618Srdivacky continue; 464207618Srdivacky } 465207618Srdivacky 466207618Srdivacky // If this is a constant sized memset of a constant value (e.g. 0) we can 467207618Srdivacky // handle it. 468207618Srdivacky if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) { 469224145Sdim // Store of constant value. 470224145Sdim if (!isa<ConstantInt>(MSI->getValue())) 471207618Srdivacky return false; 472224145Sdim 473224145Sdim // Store of constant size. 474224145Sdim ConstantInt *Len = dyn_cast<ConstantInt>(MSI->getLength()); 475224145Sdim if (!Len) 476224145Sdim return false; 477224145Sdim 478224145Sdim // If the size differs from the alloca, we can only convert the alloca to 479224145Sdim // an integer bag-of-bits. 480224145Sdim // FIXME: This should handle all of the cases that are currently accepted 481224145Sdim // as vector element insertions. 482224145Sdim if (Len->getZExtValue() != AllocaSize || Offset != 0) 483224145Sdim ScalarKind = Integer; 484224145Sdim 485207618Srdivacky IsNotTrivial = true; // Can't be mem2reg'd. 486221345Sdim HadNonMemTransferAccess = true; 487207618Srdivacky continue; 488207618Srdivacky } 489207618Srdivacky 490207618Srdivacky // If this is a memcpy or memmove into or out of the whole allocation, we 491207618Srdivacky // can handle it like a load or store of the scalar type. 492207618Srdivacky if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) { 493207618Srdivacky ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength()); 494207618Srdivacky if (Len == 0 || Len->getZExtValue() != AllocaSize || Offset != 0) 495207618Srdivacky return false; 496218893Sdim 497207618Srdivacky IsNotTrivial = true; // Can't be mem2reg'd. 498207618Srdivacky continue; 499207618Srdivacky } 500218893Sdim 501226890Sdim // If this is a lifetime intrinsic, we can handle it. 502226890Sdim if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) { 503226890Sdim if (II->getIntrinsicID() == Intrinsic::lifetime_start || 504226890Sdim II->getIntrinsicID() == Intrinsic::lifetime_end) { 505226890Sdim continue; 506226890Sdim } 507226890Sdim } 508226890Sdim 509207618Srdivacky // Otherwise, we cannot handle this! 510207618Srdivacky return false; 511207618Srdivacky } 512218893Sdim 513207618Srdivacky return true; 514207618Srdivacky} 515207618Srdivacky 516207618Srdivacky/// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca 517207618Srdivacky/// directly. This happens when we are converting an "integer union" to a 518207618Srdivacky/// single integer scalar, or when we are converting a "vector union" to a 519207618Srdivacky/// vector with insert/extractelement instructions. 520207618Srdivacky/// 521207618Srdivacky/// Offset is an offset from the original alloca, in bits that need to be 522207618Srdivacky/// shifted to the right. By the end of this, there should be no uses of Ptr. 523207618Srdivackyvoid ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, 524207618Srdivacky uint64_t Offset) { 525207618Srdivacky while (!Ptr->use_empty()) { 526207618Srdivacky Instruction *User = cast<Instruction>(Ptr->use_back()); 527207618Srdivacky 528207618Srdivacky if (BitCastInst *CI = dyn_cast<BitCastInst>(User)) { 529207618Srdivacky ConvertUsesToScalar(CI, NewAI, Offset); 530207618Srdivacky CI->eraseFromParent(); 531207618Srdivacky continue; 532207618Srdivacky } 533207618Srdivacky 534207618Srdivacky if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) { 535207618Srdivacky // Compute the offset that this GEP adds to the pointer. 536207618Srdivacky SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); 537207618Srdivacky uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(), 538226890Sdim Indices); 539207618Srdivacky ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8); 540207618Srdivacky GEP->eraseFromParent(); 541207618Srdivacky continue; 542207618Srdivacky } 543218893Sdim 544218893Sdim IRBuilder<> Builder(User); 545218893Sdim 546207618Srdivacky if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 547207618Srdivacky // The load is a bit extract from NewAI shifted right by Offset bits. 548226890Sdim Value *LoadedVal = Builder.CreateLoad(NewAI); 549207618Srdivacky Value *NewLoadVal 550207618Srdivacky = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, Builder); 551207618Srdivacky LI->replaceAllUsesWith(NewLoadVal); 552207618Srdivacky LI->eraseFromParent(); 553207618Srdivacky continue; 554207618Srdivacky } 555218893Sdim 556207618Srdivacky if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 557207618Srdivacky assert(SI->getOperand(0) != Ptr && "Consistency error!"); 558207618Srdivacky Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in"); 559207618Srdivacky Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset, 560207618Srdivacky Builder); 561207618Srdivacky Builder.CreateStore(New, NewAI); 562207618Srdivacky SI->eraseFromParent(); 563218893Sdim 564207618Srdivacky // If the load we just inserted is now dead, then the inserted store 565207618Srdivacky // overwrote the entire thing. 566207618Srdivacky if (Old->use_empty()) 567207618Srdivacky Old->eraseFromParent(); 568207618Srdivacky continue; 569207618Srdivacky } 570218893Sdim 571207618Srdivacky // If this is a constant sized memset of a constant value (e.g. 0) we can 572207618Srdivacky // transform it into a store of the expanded constant value. 573207618Srdivacky if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) { 574207618Srdivacky assert(MSI->getRawDest() == Ptr && "Consistency error!"); 575207618Srdivacky unsigned NumBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue(); 576207618Srdivacky if (NumBytes != 0) { 577207618Srdivacky unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue(); 578218893Sdim 579207618Srdivacky // Compute the value replicated the right number of times. 580207618Srdivacky APInt APVal(NumBytes*8, Val); 581207618Srdivacky 582207618Srdivacky // Splat the value if non-zero. 583207618Srdivacky if (Val) 584207618Srdivacky for (unsigned i = 1; i != NumBytes; ++i) 585207618Srdivacky APVal |= APVal << 8; 586218893Sdim 587207618Srdivacky Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in"); 588207618Srdivacky Value *New = ConvertScalar_InsertValue( 589207618Srdivacky ConstantInt::get(User->getContext(), APVal), 590207618Srdivacky Old, Offset, Builder); 591207618Srdivacky Builder.CreateStore(New, NewAI); 592218893Sdim 593207618Srdivacky // If the load we just inserted is now dead, then the memset overwrote 594207618Srdivacky // the entire thing. 595207618Srdivacky if (Old->use_empty()) 596218893Sdim Old->eraseFromParent(); 597207618Srdivacky } 598207618Srdivacky MSI->eraseFromParent(); 599207618Srdivacky continue; 600207618Srdivacky } 601207618Srdivacky 602207618Srdivacky // If this is a memcpy or memmove into or out of the whole allocation, we 603207618Srdivacky // can handle it like a load or store of the scalar type. 604207618Srdivacky if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) { 605207618Srdivacky assert(Offset == 0 && "must be store to start of alloca"); 606218893Sdim 607207618Srdivacky // If the source and destination are both to the same alloca, then this is 608207618Srdivacky // a noop copy-to-self, just delete it. Otherwise, emit a load and store 609207618Srdivacky // as appropriate. 610218893Sdim AllocaInst *OrigAI = cast<AllocaInst>(GetUnderlyingObject(Ptr, &TD, 0)); 611218893Sdim 612218893Sdim if (GetUnderlyingObject(MTI->getSource(), &TD, 0) != OrigAI) { 613207618Srdivacky // Dest must be OrigAI, change this to be a load from the original 614207618Srdivacky // pointer (bitcasted), then a store to our new alloca. 615207618Srdivacky assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?"); 616207618Srdivacky Value *SrcPtr = MTI->getSource(); 617226890Sdim PointerType* SPTy = cast<PointerType>(SrcPtr->getType()); 618226890Sdim PointerType* AIPTy = cast<PointerType>(NewAI->getType()); 619218893Sdim if (SPTy->getAddressSpace() != AIPTy->getAddressSpace()) { 620218893Sdim AIPTy = PointerType::get(AIPTy->getElementType(), 621218893Sdim SPTy->getAddressSpace()); 622218893Sdim } 623218893Sdim SrcPtr = Builder.CreateBitCast(SrcPtr, AIPTy); 624218893Sdim 625207618Srdivacky LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval"); 626207618Srdivacky SrcVal->setAlignment(MTI->getAlignment()); 627207618Srdivacky Builder.CreateStore(SrcVal, NewAI); 628218893Sdim } else if (GetUnderlyingObject(MTI->getDest(), &TD, 0) != OrigAI) { 629207618Srdivacky // Src must be OrigAI, change this to be a load from NewAI then a store 630207618Srdivacky // through the original dest pointer (bitcasted). 631207618Srdivacky assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?"); 632207618Srdivacky LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval"); 633207618Srdivacky 634226890Sdim PointerType* DPTy = cast<PointerType>(MTI->getDest()->getType()); 635226890Sdim PointerType* AIPTy = cast<PointerType>(NewAI->getType()); 636218893Sdim if (DPTy->getAddressSpace() != AIPTy->getAddressSpace()) { 637218893Sdim AIPTy = PointerType::get(AIPTy->getElementType(), 638218893Sdim DPTy->getAddressSpace()); 639218893Sdim } 640218893Sdim Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), AIPTy); 641218893Sdim 642207618Srdivacky StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr); 643207618Srdivacky NewStore->setAlignment(MTI->getAlignment()); 644207618Srdivacky } else { 645207618Srdivacky // Noop transfer. Src == Dst 646207618Srdivacky } 647207618Srdivacky 648207618Srdivacky MTI->eraseFromParent(); 649207618Srdivacky continue; 650207618Srdivacky } 651218893Sdim 652226890Sdim if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) { 653226890Sdim if (II->getIntrinsicID() == Intrinsic::lifetime_start || 654226890Sdim II->getIntrinsicID() == Intrinsic::lifetime_end) { 655226890Sdim // There's no need to preserve these, as the resulting alloca will be 656226890Sdim // converted to a register anyways. 657226890Sdim II->eraseFromParent(); 658226890Sdim continue; 659226890Sdim } 660226890Sdim } 661226890Sdim 662207618Srdivacky llvm_unreachable("Unsupported operation!"); 663207618Srdivacky } 664207618Srdivacky} 665207618Srdivacky 666207618Srdivacky/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer 667207618Srdivacky/// or vector value FromVal, extracting the bits from the offset specified by 668207618Srdivacky/// Offset. This returns the value, which is of type ToType. 669207618Srdivacky/// 670207618Srdivacky/// This happens when we are converting an "integer union" to a single 671207618Srdivacky/// integer scalar, or when we are converting a "vector union" to a vector with 672207618Srdivacky/// insert/extractelement instructions. 673207618Srdivacky/// 674207618Srdivacky/// Offset is an offset from the original alloca, in bits that need to be 675207618Srdivacky/// shifted to the right. 676207618SrdivackyValue *ConvertToScalarInfo:: 677226890SdimConvertScalar_ExtractValue(Value *FromVal, Type *ToType, 678207618Srdivacky uint64_t Offset, IRBuilder<> &Builder) { 679207618Srdivacky // If the load is of the whole new alloca, no conversion is needed. 680226890Sdim Type *FromType = FromVal->getType(); 681221345Sdim if (FromType == ToType && Offset == 0) 682207618Srdivacky return FromVal; 683207618Srdivacky 684207618Srdivacky // If the result alloca is a vector type, this is either an element 685207618Srdivacky // access or a bitcast to another vector type of the same size. 686226890Sdim if (VectorType *VTy = dyn_cast<VectorType>(FromType)) { 687223017Sdim unsigned FromTypeSize = TD.getTypeAllocSize(FromType); 688221345Sdim unsigned ToTypeSize = TD.getTypeAllocSize(ToType); 689226890Sdim if (FromTypeSize == ToTypeSize) 690226890Sdim return Builder.CreateBitCast(FromVal, ToType); 691207618Srdivacky 692207618Srdivacky // Otherwise it must be an element access. 693207618Srdivacky unsigned Elt = 0; 694207618Srdivacky if (Offset) { 695207618Srdivacky unsigned EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType()); 696207618Srdivacky Elt = Offset/EltSize; 697207618Srdivacky assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); 698207618Srdivacky } 699207618Srdivacky // Return the element extracted out of it. 700226890Sdim Value *V = Builder.CreateExtractElement(FromVal, Builder.getInt32(Elt)); 701207618Srdivacky if (V->getType() != ToType) 702226890Sdim V = Builder.CreateBitCast(V, ToType); 703207618Srdivacky return V; 704207618Srdivacky } 705218893Sdim 706207618Srdivacky // If ToType is a first class aggregate, extract out each of the pieces and 707207618Srdivacky // use insertvalue's to form the FCA. 708226890Sdim if (StructType *ST = dyn_cast<StructType>(ToType)) { 709207618Srdivacky const StructLayout &Layout = *TD.getStructLayout(ST); 710207618Srdivacky Value *Res = UndefValue::get(ST); 711207618Srdivacky for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { 712207618Srdivacky Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i), 713207618Srdivacky Offset+Layout.getElementOffsetInBits(i), 714207618Srdivacky Builder); 715226890Sdim Res = Builder.CreateInsertValue(Res, Elt, i); 716207618Srdivacky } 717207618Srdivacky return Res; 718207618Srdivacky } 719218893Sdim 720226890Sdim if (ArrayType *AT = dyn_cast<ArrayType>(ToType)) { 721207618Srdivacky uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType()); 722207618Srdivacky Value *Res = UndefValue::get(AT); 723207618Srdivacky for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { 724207618Srdivacky Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(), 725207618Srdivacky Offset+i*EltSize, Builder); 726226890Sdim Res = Builder.CreateInsertValue(Res, Elt, i); 727207618Srdivacky } 728207618Srdivacky return Res; 729207618Srdivacky } 730207618Srdivacky 731207618Srdivacky // Otherwise, this must be a union that was converted to an integer value. 732226890Sdim IntegerType *NTy = cast<IntegerType>(FromVal->getType()); 733207618Srdivacky 734207618Srdivacky // If this is a big-endian system and the load is narrower than the 735207618Srdivacky // full alloca type, we need to do a shift to get the right bits. 736207618Srdivacky int ShAmt = 0; 737207618Srdivacky if (TD.isBigEndian()) { 738207618Srdivacky // On big-endian machines, the lowest bit is stored at the bit offset 739207618Srdivacky // from the pointer given by getTypeStoreSizeInBits. This matters for 740207618Srdivacky // integers with a bitwidth that is not a multiple of 8. 741207618Srdivacky ShAmt = TD.getTypeStoreSizeInBits(NTy) - 742207618Srdivacky TD.getTypeStoreSizeInBits(ToType) - Offset; 743207618Srdivacky } else { 744207618Srdivacky ShAmt = Offset; 745207618Srdivacky } 746207618Srdivacky 747207618Srdivacky // Note: we support negative bitwidths (with shl) which are not defined. 748207618Srdivacky // We do this to support (f.e.) loads off the end of a structure where 749207618Srdivacky // only some bits are used. 750207618Srdivacky if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth()) 751207618Srdivacky FromVal = Builder.CreateLShr(FromVal, 752226890Sdim ConstantInt::get(FromVal->getType(), ShAmt)); 753207618Srdivacky else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth()) 754218893Sdim FromVal = Builder.CreateShl(FromVal, 755226890Sdim ConstantInt::get(FromVal->getType(), -ShAmt)); 756207618Srdivacky 757207618Srdivacky // Finally, unconditionally truncate the integer to the right width. 758207618Srdivacky unsigned LIBitWidth = TD.getTypeSizeInBits(ToType); 759207618Srdivacky if (LIBitWidth < NTy->getBitWidth()) 760207618Srdivacky FromVal = 761218893Sdim Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(), 762226890Sdim LIBitWidth)); 763207618Srdivacky else if (LIBitWidth > NTy->getBitWidth()) 764207618Srdivacky FromVal = 765218893Sdim Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(), 766226890Sdim LIBitWidth)); 767207618Srdivacky 768207618Srdivacky // If the result is an integer, this is a trunc or bitcast. 769207618Srdivacky if (ToType->isIntegerTy()) { 770207618Srdivacky // Should be done. 771207618Srdivacky } else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) { 772207618Srdivacky // Just do a bitcast, we know the sizes match up. 773226890Sdim FromVal = Builder.CreateBitCast(FromVal, ToType); 774207618Srdivacky } else { 775207618Srdivacky // Otherwise must be a pointer. 776226890Sdim FromVal = Builder.CreateIntToPtr(FromVal, ToType); 777207618Srdivacky } 778207618Srdivacky assert(FromVal->getType() == ToType && "Didn't convert right?"); 779207618Srdivacky return FromVal; 780207618Srdivacky} 781207618Srdivacky 782207618Srdivacky/// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer 783207618Srdivacky/// or vector value "Old" at the offset specified by Offset. 784207618Srdivacky/// 785207618Srdivacky/// This happens when we are converting an "integer union" to a 786207618Srdivacky/// single integer scalar, or when we are converting a "vector union" to a 787207618Srdivacky/// vector with insert/extractelement instructions. 788207618Srdivacky/// 789207618Srdivacky/// Offset is an offset from the original alloca, in bits that need to be 790207618Srdivacky/// shifted to the right. 791207618SrdivackyValue *ConvertToScalarInfo:: 792207618SrdivackyConvertScalar_InsertValue(Value *SV, Value *Old, 793207618Srdivacky uint64_t Offset, IRBuilder<> &Builder) { 794207618Srdivacky // Convert the stored type to the actual type, shift it left to insert 795207618Srdivacky // then 'or' into place. 796226890Sdim Type *AllocaType = Old->getType(); 797207618Srdivacky LLVMContext &Context = Old->getContext(); 798207618Srdivacky 799226890Sdim if (VectorType *VTy = dyn_cast<VectorType>(AllocaType)) { 800207618Srdivacky uint64_t VecSize = TD.getTypeAllocSizeInBits(VTy); 801207618Srdivacky uint64_t ValSize = TD.getTypeAllocSizeInBits(SV->getType()); 802218893Sdim 803207618Srdivacky // Changing the whole vector with memset or with an access of a different 804207618Srdivacky // vector type? 805226890Sdim if (ValSize == VecSize) 806226890Sdim return Builder.CreateBitCast(SV, AllocaType); 807207618Srdivacky 808207618Srdivacky // Must be an element insertion. 809221345Sdim assert(SV->getType() == VTy->getElementType()); 810221345Sdim uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType()); 811207618Srdivacky unsigned Elt = Offset/EltSize; 812226890Sdim return Builder.CreateInsertElement(Old, SV, Builder.getInt32(Elt)); 813207618Srdivacky } 814218893Sdim 815207618Srdivacky // If SV is a first-class aggregate value, insert each value recursively. 816226890Sdim if (StructType *ST = dyn_cast<StructType>(SV->getType())) { 817207618Srdivacky const StructLayout &Layout = *TD.getStructLayout(ST); 818207618Srdivacky for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { 819226890Sdim Value *Elt = Builder.CreateExtractValue(SV, i); 820218893Sdim Old = ConvertScalar_InsertValue(Elt, Old, 821207618Srdivacky Offset+Layout.getElementOffsetInBits(i), 822207618Srdivacky Builder); 823207618Srdivacky } 824207618Srdivacky return Old; 825207618Srdivacky } 826218893Sdim 827226890Sdim if (ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) { 828207618Srdivacky uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType()); 829207618Srdivacky for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { 830226890Sdim Value *Elt = Builder.CreateExtractValue(SV, i); 831207618Srdivacky Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, Builder); 832207618Srdivacky } 833207618Srdivacky return Old; 834207618Srdivacky } 835207618Srdivacky 836207618Srdivacky // If SV is a float, convert it to the appropriate integer type. 837207618Srdivacky // If it is a pointer, do the same. 838207618Srdivacky unsigned SrcWidth = TD.getTypeSizeInBits(SV->getType()); 839207618Srdivacky unsigned DestWidth = TD.getTypeSizeInBits(AllocaType); 840207618Srdivacky unsigned SrcStoreWidth = TD.getTypeStoreSizeInBits(SV->getType()); 841207618Srdivacky unsigned DestStoreWidth = TD.getTypeStoreSizeInBits(AllocaType); 842207618Srdivacky if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy()) 843226890Sdim SV = Builder.CreateBitCast(SV, IntegerType::get(SV->getContext(),SrcWidth)); 844207618Srdivacky else if (SV->getType()->isPointerTy()) 845226890Sdim SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getContext())); 846207618Srdivacky 847207618Srdivacky // Zero extend or truncate the value if needed. 848207618Srdivacky if (SV->getType() != AllocaType) { 849207618Srdivacky if (SV->getType()->getPrimitiveSizeInBits() < 850207618Srdivacky AllocaType->getPrimitiveSizeInBits()) 851226890Sdim SV = Builder.CreateZExt(SV, AllocaType); 852207618Srdivacky else { 853207618Srdivacky // Truncation may be needed if storing more than the alloca can hold 854207618Srdivacky // (undefined behavior). 855226890Sdim SV = Builder.CreateTrunc(SV, AllocaType); 856207618Srdivacky SrcWidth = DestWidth; 857207618Srdivacky SrcStoreWidth = DestStoreWidth; 858207618Srdivacky } 859207618Srdivacky } 860207618Srdivacky 861207618Srdivacky // If this is a big-endian system and the store is narrower than the 862207618Srdivacky // full alloca type, we need to do a shift to get the right bits. 863207618Srdivacky int ShAmt = 0; 864207618Srdivacky if (TD.isBigEndian()) { 865207618Srdivacky // On big-endian machines, the lowest bit is stored at the bit offset 866207618Srdivacky // from the pointer given by getTypeStoreSizeInBits. This matters for 867207618Srdivacky // integers with a bitwidth that is not a multiple of 8. 868207618Srdivacky ShAmt = DestStoreWidth - SrcStoreWidth - Offset; 869207618Srdivacky } else { 870207618Srdivacky ShAmt = Offset; 871207618Srdivacky } 872207618Srdivacky 873207618Srdivacky // Note: we support negative bitwidths (with shr) which are not defined. 874207618Srdivacky // We do this to support (f.e.) stores off the end of a structure where 875207618Srdivacky // only some bits in the structure are set. 876207618Srdivacky APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth)); 877207618Srdivacky if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) { 878226890Sdim SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(), ShAmt)); 879207618Srdivacky Mask <<= ShAmt; 880207618Srdivacky } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) { 881226890Sdim SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(), -ShAmt)); 882207618Srdivacky Mask = Mask.lshr(-ShAmt); 883207618Srdivacky } 884207618Srdivacky 885207618Srdivacky // Mask out the bits we are about to insert from the old value, and or 886207618Srdivacky // in the new bits. 887207618Srdivacky if (SrcWidth != DestWidth) { 888207618Srdivacky assert(DestWidth > SrcWidth); 889207618Srdivacky Old = Builder.CreateAnd(Old, ConstantInt::get(Context, ~Mask), "mask"); 890207618Srdivacky SV = Builder.CreateOr(Old, SV, "ins"); 891207618Srdivacky } 892207618Srdivacky return SV; 893207618Srdivacky} 894207618Srdivacky 895207618Srdivacky 896207618Srdivacky//===----------------------------------------------------------------------===// 897207618Srdivacky// SRoA Driver 898207618Srdivacky//===----------------------------------------------------------------------===// 899207618Srdivacky 900207618Srdivacky 901193323Sedbool SROA::runOnFunction(Function &F) { 902198090Srdivacky TD = getAnalysisIfAvailable<TargetData>(); 903198090Srdivacky 904193323Sed bool Changed = performPromotion(F); 905198090Srdivacky 906198090Srdivacky // FIXME: ScalarRepl currently depends on TargetData more than it 907198090Srdivacky // theoretically needs to. It should be refactored in order to support 908198090Srdivacky // target-independent IR. Until this is done, just skip the actual 909198090Srdivacky // scalar-replacement portion of this pass. 910198090Srdivacky if (!TD) return Changed; 911198090Srdivacky 912193323Sed while (1) { 913193323Sed bool LocalChange = performScalarRepl(F); 914193323Sed if (!LocalChange) break; // No need to repromote if no scalarrepl 915193323Sed Changed = true; 916193323Sed LocalChange = performPromotion(F); 917193323Sed if (!LocalChange) break; // No need to re-scalarrepl if no promotion 918193323Sed } 919193323Sed 920193323Sed return Changed; 921193323Sed} 922193323Sed 923218893Sdimnamespace { 924218893Sdimclass AllocaPromoter : public LoadAndStorePromoter { 925218893Sdim AllocaInst *AI; 926224145Sdim DIBuilder *DIB; 927224145Sdim SmallVector<DbgDeclareInst *, 4> DDIs; 928224145Sdim SmallVector<DbgValueInst *, 4> DVIs; 929218893Sdimpublic: 930223017Sdim AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S, 931224145Sdim DIBuilder *DB) 932224145Sdim : LoadAndStorePromoter(Insts, S), AI(0), DIB(DB) {} 933218893Sdim 934218893Sdim void run(AllocaInst *AI, const SmallVectorImpl<Instruction*> &Insts) { 935218893Sdim // Remember which alloca we're promoting (for isInstInList). 936218893Sdim this->AI = AI; 937224145Sdim if (MDNode *DebugNode = MDNode::getIfExists(AI->getContext(), AI)) 938224145Sdim for (Value::use_iterator UI = DebugNode->use_begin(), 939224145Sdim E = DebugNode->use_end(); UI != E; ++UI) 940224145Sdim if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI)) 941224145Sdim DDIs.push_back(DDI); 942224145Sdim else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(*UI)) 943224145Sdim DVIs.push_back(DVI); 944224145Sdim 945218893Sdim LoadAndStorePromoter::run(Insts); 946218893Sdim AI->eraseFromParent(); 947224145Sdim for (SmallVector<DbgDeclareInst *, 4>::iterator I = DDIs.begin(), 948224145Sdim E = DDIs.end(); I != E; ++I) { 949224145Sdim DbgDeclareInst *DDI = *I; 950224145Sdim DDI->eraseFromParent(); 951224145Sdim } 952224145Sdim for (SmallVector<DbgValueInst *, 4>::iterator I = DVIs.begin(), 953224145Sdim E = DVIs.end(); I != E; ++I) { 954224145Sdim DbgValueInst *DVI = *I; 955224145Sdim DVI->eraseFromParent(); 956224145Sdim } 957218893Sdim } 958218893Sdim 959218893Sdim virtual bool isInstInList(Instruction *I, 960218893Sdim const SmallVectorImpl<Instruction*> &Insts) const { 961218893Sdim if (LoadInst *LI = dyn_cast<LoadInst>(I)) 962218893Sdim return LI->getOperand(0) == AI; 963218893Sdim return cast<StoreInst>(I)->getPointerOperand() == AI; 964218893Sdim } 965224145Sdim 966224145Sdim virtual void updateDebugInfo(Instruction *Inst) const { 967224145Sdim for (SmallVector<DbgDeclareInst *, 4>::const_iterator I = DDIs.begin(), 968224145Sdim E = DDIs.end(); I != E; ++I) { 969224145Sdim DbgDeclareInst *DDI = *I; 970224145Sdim if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) 971224145Sdim ConvertDebugDeclareToDebugValue(DDI, SI, *DIB); 972224145Sdim else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) 973224145Sdim ConvertDebugDeclareToDebugValue(DDI, LI, *DIB); 974224145Sdim } 975224145Sdim for (SmallVector<DbgValueInst *, 4>::const_iterator I = DVIs.begin(), 976224145Sdim E = DVIs.end(); I != E; ++I) { 977224145Sdim DbgValueInst *DVI = *I; 978224145Sdim if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { 979224145Sdim Instruction *DbgVal = NULL; 980224145Sdim // If an argument is zero extended then use argument directly. The ZExt 981224145Sdim // may be zapped by an optimization pass in future. 982224145Sdim Argument *ExtendedArg = NULL; 983224145Sdim if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0))) 984224145Sdim ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0)); 985224145Sdim if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0))) 986224145Sdim ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0)); 987224145Sdim if (ExtendedArg) 988224145Sdim DbgVal = DIB->insertDbgValueIntrinsic(ExtendedArg, 0, 989224145Sdim DIVariable(DVI->getVariable()), 990224145Sdim SI); 991224145Sdim else 992224145Sdim DbgVal = DIB->insertDbgValueIntrinsic(SI->getOperand(0), 0, 993224145Sdim DIVariable(DVI->getVariable()), 994224145Sdim SI); 995224145Sdim DbgVal->setDebugLoc(DVI->getDebugLoc()); 996224145Sdim } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { 997224145Sdim Instruction *DbgVal = 998224145Sdim DIB->insertDbgValueIntrinsic(LI->getOperand(0), 0, 999224145Sdim DIVariable(DVI->getVariable()), LI); 1000224145Sdim DbgVal->setDebugLoc(DVI->getDebugLoc()); 1001224145Sdim } 1002224145Sdim } 1003224145Sdim } 1004218893Sdim}; 1005218893Sdim} // end anon namespace 1006193323Sed 1007218893Sdim/// isSafeSelectToSpeculate - Select instructions that use an alloca and are 1008218893Sdim/// subsequently loaded can be rewritten to load both input pointers and then 1009218893Sdim/// select between the result, allowing the load of the alloca to be promoted. 1010218893Sdim/// From this: 1011218893Sdim/// %P2 = select i1 %cond, i32* %Alloca, i32* %Other 1012218893Sdim/// %V = load i32* %P2 1013218893Sdim/// to: 1014218893Sdim/// %V1 = load i32* %Alloca -> will be mem2reg'd 1015218893Sdim/// %V2 = load i32* %Other 1016218893Sdim/// %V = select i1 %cond, i32 %V1, i32 %V2 1017218893Sdim/// 1018218893Sdim/// We can do this to a select if its only uses are loads and if the operand to 1019218893Sdim/// the select can be loaded unconditionally. 1020218893Sdimstatic bool isSafeSelectToSpeculate(SelectInst *SI, const TargetData *TD) { 1021218893Sdim bool TDerefable = SI->getTrueValue()->isDereferenceablePointer(); 1022218893Sdim bool FDerefable = SI->getFalseValue()->isDereferenceablePointer(); 1023218893Sdim 1024218893Sdim for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end(); 1025218893Sdim UI != UE; ++UI) { 1026218893Sdim LoadInst *LI = dyn_cast<LoadInst>(*UI); 1027226890Sdim if (LI == 0 || !LI->isSimple()) return false; 1028218893Sdim 1029218893Sdim // Both operands to the select need to be dereferencable, either absolutely 1030218893Sdim // (e.g. allocas) or at this point because we can see other accesses to it. 1031218893Sdim if (!TDerefable && !isSafeToLoadUnconditionally(SI->getTrueValue(), LI, 1032218893Sdim LI->getAlignment(), TD)) 1033218893Sdim return false; 1034218893Sdim if (!FDerefable && !isSafeToLoadUnconditionally(SI->getFalseValue(), LI, 1035218893Sdim LI->getAlignment(), TD)) 1036218893Sdim return false; 1037218893Sdim } 1038218893Sdim 1039218893Sdim return true; 1040218893Sdim} 1041218893Sdim 1042218893Sdim/// isSafePHIToSpeculate - PHI instructions that use an alloca and are 1043218893Sdim/// subsequently loaded can be rewritten to load both input pointers in the pred 1044218893Sdim/// blocks and then PHI the results, allowing the load of the alloca to be 1045218893Sdim/// promoted. 1046218893Sdim/// From this: 1047218893Sdim/// %P2 = phi [i32* %Alloca, i32* %Other] 1048218893Sdim/// %V = load i32* %P2 1049218893Sdim/// to: 1050218893Sdim/// %V1 = load i32* %Alloca -> will be mem2reg'd 1051218893Sdim/// ... 1052218893Sdim/// %V2 = load i32* %Other 1053218893Sdim/// ... 1054218893Sdim/// %V = phi [i32 %V1, i32 %V2] 1055218893Sdim/// 1056218893Sdim/// We can do this to a select if its only uses are loads and if the operand to 1057218893Sdim/// the select can be loaded unconditionally. 1058218893Sdimstatic bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) { 1059218893Sdim // For now, we can only do this promotion if the load is in the same block as 1060218893Sdim // the PHI, and if there are no stores between the phi and load. 1061218893Sdim // TODO: Allow recursive phi users. 1062218893Sdim // TODO: Allow stores. 1063218893Sdim BasicBlock *BB = PN->getParent(); 1064218893Sdim unsigned MaxAlign = 0; 1065218893Sdim for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end(); 1066218893Sdim UI != UE; ++UI) { 1067218893Sdim LoadInst *LI = dyn_cast<LoadInst>(*UI); 1068226890Sdim if (LI == 0 || !LI->isSimple()) return false; 1069218893Sdim 1070218893Sdim // For now we only allow loads in the same block as the PHI. This is a 1071218893Sdim // common case that happens when instcombine merges two loads through a PHI. 1072218893Sdim if (LI->getParent() != BB) return false; 1073218893Sdim 1074218893Sdim // Ensure that there are no instructions between the PHI and the load that 1075218893Sdim // could store. 1076218893Sdim for (BasicBlock::iterator BBI = PN; &*BBI != LI; ++BBI) 1077218893Sdim if (BBI->mayWriteToMemory()) 1078218893Sdim return false; 1079218893Sdim 1080218893Sdim MaxAlign = std::max(MaxAlign, LI->getAlignment()); 1081218893Sdim } 1082218893Sdim 1083218893Sdim // Okay, we know that we have one or more loads in the same block as the PHI. 1084218893Sdim // We can transform this if it is safe to push the loads into the predecessor 1085218893Sdim // blocks. The only thing to watch out for is that we can't put a possibly 1086218893Sdim // trapping load in the predecessor if it is a critical edge. 1087218893Sdim for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 1088218893Sdim BasicBlock *Pred = PN->getIncomingBlock(i); 1089226890Sdim Value *InVal = PN->getIncomingValue(i); 1090218893Sdim 1091226890Sdim // If the terminator of the predecessor has side-effects (an invoke), 1092226890Sdim // there is no safe place to put a load in the predecessor. 1093226890Sdim if (Pred->getTerminator()->mayHaveSideEffects()) 1094226890Sdim return false; 1095226890Sdim 1096226890Sdim // If the value is produced by the terminator of the predecessor 1097226890Sdim // (an invoke), there is no valid place to put a load in the predecessor. 1098226890Sdim if (Pred->getTerminator() == InVal) 1099226890Sdim return false; 1100226890Sdim 1101218893Sdim // If the predecessor has a single successor, then the edge isn't critical. 1102218893Sdim if (Pred->getTerminator()->getNumSuccessors() == 1) 1103218893Sdim continue; 1104218893Sdim 1105218893Sdim // If this pointer is always safe to load, or if we can prove that there is 1106218893Sdim // already a load in the block, then we can move the load to the pred block. 1107218893Sdim if (InVal->isDereferenceablePointer() || 1108218893Sdim isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign, TD)) 1109218893Sdim continue; 1110218893Sdim 1111218893Sdim return false; 1112218893Sdim } 1113218893Sdim 1114218893Sdim return true; 1115218893Sdim} 1116218893Sdim 1117218893Sdim 1118218893Sdim/// tryToMakeAllocaBePromotable - This returns true if the alloca only has 1119218893Sdim/// direct (non-volatile) loads and stores to it. If the alloca is close but 1120218893Sdim/// not quite there, this will transform the code to allow promotion. As such, 1121218893Sdim/// it is a non-pure predicate. 1122218893Sdimstatic bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) { 1123218893Sdim SetVector<Instruction*, SmallVector<Instruction*, 4>, 1124218893Sdim SmallPtrSet<Instruction*, 4> > InstsToRewrite; 1125218893Sdim 1126218893Sdim for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end(); 1127218893Sdim UI != UE; ++UI) { 1128218893Sdim User *U = *UI; 1129218893Sdim if (LoadInst *LI = dyn_cast<LoadInst>(U)) { 1130226890Sdim if (!LI->isSimple()) 1131218893Sdim return false; 1132218893Sdim continue; 1133218893Sdim } 1134218893Sdim 1135218893Sdim if (StoreInst *SI = dyn_cast<StoreInst>(U)) { 1136226890Sdim if (SI->getOperand(0) == AI || !SI->isSimple()) 1137218893Sdim return false; // Don't allow a store OF the AI, only INTO the AI. 1138218893Sdim continue; 1139218893Sdim } 1140218893Sdim 1141218893Sdim if (SelectInst *SI = dyn_cast<SelectInst>(U)) { 1142218893Sdim // If the condition being selected on is a constant, fold the select, yes 1143218893Sdim // this does (rarely) happen early on. 1144218893Sdim if (ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition())) { 1145218893Sdim Value *Result = SI->getOperand(1+CI->isZero()); 1146218893Sdim SI->replaceAllUsesWith(Result); 1147218893Sdim SI->eraseFromParent(); 1148218893Sdim 1149218893Sdim // This is very rare and we just scrambled the use list of AI, start 1150218893Sdim // over completely. 1151218893Sdim return tryToMakeAllocaBePromotable(AI, TD); 1152218893Sdim } 1153218893Sdim 1154218893Sdim // If it is safe to turn "load (select c, AI, ptr)" into a select of two 1155218893Sdim // loads, then we can transform this by rewriting the select. 1156218893Sdim if (!isSafeSelectToSpeculate(SI, TD)) 1157218893Sdim return false; 1158218893Sdim 1159218893Sdim InstsToRewrite.insert(SI); 1160218893Sdim continue; 1161218893Sdim } 1162218893Sdim 1163218893Sdim if (PHINode *PN = dyn_cast<PHINode>(U)) { 1164218893Sdim if (PN->use_empty()) { // Dead PHIs can be stripped. 1165218893Sdim InstsToRewrite.insert(PN); 1166218893Sdim continue; 1167218893Sdim } 1168218893Sdim 1169218893Sdim // If it is safe to turn "load (phi [AI, ptr, ...])" into a PHI of loads 1170218893Sdim // in the pred blocks, then we can transform this by rewriting the PHI. 1171218893Sdim if (!isSafePHIToSpeculate(PN, TD)) 1172218893Sdim return false; 1173218893Sdim 1174218893Sdim InstsToRewrite.insert(PN); 1175218893Sdim continue; 1176218893Sdim } 1177218893Sdim 1178226890Sdim if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { 1179226890Sdim if (onlyUsedByLifetimeMarkers(BCI)) { 1180226890Sdim InstsToRewrite.insert(BCI); 1181226890Sdim continue; 1182226890Sdim } 1183226890Sdim } 1184226890Sdim 1185218893Sdim return false; 1186218893Sdim } 1187218893Sdim 1188218893Sdim // If there are no instructions to rewrite, then all uses are load/stores and 1189218893Sdim // we're done! 1190218893Sdim if (InstsToRewrite.empty()) 1191218893Sdim return true; 1192218893Sdim 1193218893Sdim // If we have instructions that need to be rewritten for this to be promotable 1194218893Sdim // take care of it now. 1195218893Sdim for (unsigned i = 0, e = InstsToRewrite.size(); i != e; ++i) { 1196226890Sdim if (BitCastInst *BCI = dyn_cast<BitCastInst>(InstsToRewrite[i])) { 1197226890Sdim // This could only be a bitcast used by nothing but lifetime intrinsics. 1198226890Sdim for (BitCastInst::use_iterator I = BCI->use_begin(), E = BCI->use_end(); 1199226890Sdim I != E;) { 1200226890Sdim Use &U = I.getUse(); 1201226890Sdim ++I; 1202226890Sdim cast<Instruction>(U.getUser())->eraseFromParent(); 1203226890Sdim } 1204226890Sdim BCI->eraseFromParent(); 1205226890Sdim continue; 1206226890Sdim } 1207226890Sdim 1208218893Sdim if (SelectInst *SI = dyn_cast<SelectInst>(InstsToRewrite[i])) { 1209218893Sdim // Selects in InstsToRewrite only have load uses. Rewrite each as two 1210218893Sdim // loads with a new select. 1211218893Sdim while (!SI->use_empty()) { 1212218893Sdim LoadInst *LI = cast<LoadInst>(SI->use_back()); 1213218893Sdim 1214218893Sdim IRBuilder<> Builder(LI); 1215218893Sdim LoadInst *TrueLoad = 1216218893Sdim Builder.CreateLoad(SI->getTrueValue(), LI->getName()+".t"); 1217218893Sdim LoadInst *FalseLoad = 1218224145Sdim Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".f"); 1219218893Sdim 1220218893Sdim // Transfer alignment and TBAA info if present. 1221218893Sdim TrueLoad->setAlignment(LI->getAlignment()); 1222218893Sdim FalseLoad->setAlignment(LI->getAlignment()); 1223218893Sdim if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) { 1224218893Sdim TrueLoad->setMetadata(LLVMContext::MD_tbaa, Tag); 1225218893Sdim FalseLoad->setMetadata(LLVMContext::MD_tbaa, Tag); 1226218893Sdim } 1227218893Sdim 1228218893Sdim Value *V = Builder.CreateSelect(SI->getCondition(), TrueLoad, FalseLoad); 1229218893Sdim V->takeName(LI); 1230218893Sdim LI->replaceAllUsesWith(V); 1231218893Sdim LI->eraseFromParent(); 1232218893Sdim } 1233218893Sdim 1234218893Sdim // Now that all the loads are gone, the select is gone too. 1235218893Sdim SI->eraseFromParent(); 1236218893Sdim continue; 1237218893Sdim } 1238218893Sdim 1239218893Sdim // Otherwise, we have a PHI node which allows us to push the loads into the 1240218893Sdim // predecessors. 1241218893Sdim PHINode *PN = cast<PHINode>(InstsToRewrite[i]); 1242218893Sdim if (PN->use_empty()) { 1243218893Sdim PN->eraseFromParent(); 1244218893Sdim continue; 1245218893Sdim } 1246218893Sdim 1247226890Sdim Type *LoadTy = cast<PointerType>(PN->getType())->getElementType(); 1248221345Sdim PHINode *NewPN = PHINode::Create(LoadTy, PN->getNumIncomingValues(), 1249221345Sdim PN->getName()+".ld", PN); 1250218893Sdim 1251218893Sdim // Get the TBAA tag and alignment to use from one of the loads. It doesn't 1252218893Sdim // matter which one we get and if any differ, it doesn't matter. 1253218893Sdim LoadInst *SomeLoad = cast<LoadInst>(PN->use_back()); 1254218893Sdim MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa); 1255218893Sdim unsigned Align = SomeLoad->getAlignment(); 1256218893Sdim 1257218893Sdim // Rewrite all loads of the PN to use the new PHI. 1258218893Sdim while (!PN->use_empty()) { 1259218893Sdim LoadInst *LI = cast<LoadInst>(PN->use_back()); 1260218893Sdim LI->replaceAllUsesWith(NewPN); 1261218893Sdim LI->eraseFromParent(); 1262218893Sdim } 1263218893Sdim 1264218893Sdim // Inject loads into all of the pred blocks. Keep track of which blocks we 1265218893Sdim // insert them into in case we have multiple edges from the same block. 1266218893Sdim DenseMap<BasicBlock*, LoadInst*> InsertedLoads; 1267218893Sdim 1268218893Sdim for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { 1269218893Sdim BasicBlock *Pred = PN->getIncomingBlock(i); 1270218893Sdim LoadInst *&Load = InsertedLoads[Pred]; 1271218893Sdim if (Load == 0) { 1272218893Sdim Load = new LoadInst(PN->getIncomingValue(i), 1273218893Sdim PN->getName() + "." + Pred->getName(), 1274218893Sdim Pred->getTerminator()); 1275218893Sdim Load->setAlignment(Align); 1276218893Sdim if (TBAATag) Load->setMetadata(LLVMContext::MD_tbaa, TBAATag); 1277218893Sdim } 1278218893Sdim 1279218893Sdim NewPN->addIncoming(Load, Pred); 1280218893Sdim } 1281218893Sdim 1282218893Sdim PN->eraseFromParent(); 1283218893Sdim } 1284218893Sdim 1285218893Sdim ++NumAdjusted; 1286218893Sdim return true; 1287218893Sdim} 1288218893Sdim 1289193323Sedbool SROA::performPromotion(Function &F) { 1290193323Sed std::vector<AllocaInst*> Allocas; 1291218893Sdim DominatorTree *DT = 0; 1292218893Sdim if (HasDomTree) 1293218893Sdim DT = &getAnalysis<DominatorTree>(); 1294193323Sed 1295193323Sed BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function 1296224145Sdim DIBuilder DIB(*F.getParent()); 1297193323Sed bool Changed = false; 1298218893Sdim SmallVector<Instruction*, 64> Insts; 1299193323Sed while (1) { 1300193323Sed Allocas.clear(); 1301193323Sed 1302193323Sed // Find allocas that are safe to promote, by looking at all instructions in 1303193323Sed // the entry node 1304193323Sed for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) 1305193323Sed if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca? 1306218893Sdim if (tryToMakeAllocaBePromotable(AI, TD)) 1307193323Sed Allocas.push_back(AI); 1308193323Sed 1309193323Sed if (Allocas.empty()) break; 1310193323Sed 1311218893Sdim if (HasDomTree) 1312218893Sdim PromoteMemToReg(Allocas, *DT); 1313218893Sdim else { 1314218893Sdim SSAUpdater SSA; 1315218893Sdim for (unsigned i = 0, e = Allocas.size(); i != e; ++i) { 1316218893Sdim AllocaInst *AI = Allocas[i]; 1317218893Sdim 1318218893Sdim // Build list of instructions to promote. 1319218893Sdim for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); 1320218893Sdim UI != E; ++UI) 1321218893Sdim Insts.push_back(cast<Instruction>(*UI)); 1322224145Sdim AllocaPromoter(Insts, SSA, &DIB).run(AI, Insts); 1323218893Sdim Insts.clear(); 1324218893Sdim } 1325218893Sdim } 1326193323Sed NumPromoted += Allocas.size(); 1327193323Sed Changed = true; 1328193323Sed } 1329193323Sed 1330193323Sed return Changed; 1331193323Sed} 1332193323Sed 1333207618Srdivacky 1334203954Srdivacky/// ShouldAttemptScalarRepl - Decide if an alloca is a good candidate for 1335203954Srdivacky/// SROA. It must be a struct or array type with a small number of elements. 1336203954Srdivackystatic bool ShouldAttemptScalarRepl(AllocaInst *AI) { 1337226890Sdim Type *T = AI->getAllocatedType(); 1338203954Srdivacky // Do not promote any struct into more than 32 separate vars. 1339226890Sdim if (StructType *ST = dyn_cast<StructType>(T)) 1340203954Srdivacky return ST->getNumElements() <= 32; 1341203954Srdivacky // Arrays are much less likely to be safe for SROA; only consider 1342203954Srdivacky // them if they are very small. 1343226890Sdim if (ArrayType *AT = dyn_cast<ArrayType>(T)) 1344203954Srdivacky return AT->getNumElements() <= 8; 1345203954Srdivacky return false; 1346193323Sed} 1347193323Sed 1348207618Srdivacky 1349193323Sed// performScalarRepl - This algorithm is a simple worklist driven algorithm, 1350224145Sdim// which runs on all of the alloca instructions in the function, removing them 1351224145Sdim// if they are only used by getelementptr instructions. 1352193323Sed// 1353193323Sedbool SROA::performScalarRepl(Function &F) { 1354198892Srdivacky std::vector<AllocaInst*> WorkList; 1355193323Sed 1356207618Srdivacky // Scan the entry basic block, adding allocas to the worklist. 1357193323Sed BasicBlock &BB = F.getEntryBlock(); 1358193323Sed for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) 1359198892Srdivacky if (AllocaInst *A = dyn_cast<AllocaInst>(I)) 1360193323Sed WorkList.push_back(A); 1361193323Sed 1362193323Sed // Process the worklist 1363193323Sed bool Changed = false; 1364193323Sed while (!WorkList.empty()) { 1365198892Srdivacky AllocaInst *AI = WorkList.back(); 1366193323Sed WorkList.pop_back(); 1367218893Sdim 1368193323Sed // Handle dead allocas trivially. These can be formed by SROA'ing arrays 1369193323Sed // with unused elements. 1370193323Sed if (AI->use_empty()) { 1371193323Sed AI->eraseFromParent(); 1372207618Srdivacky Changed = true; 1373193323Sed continue; 1374193323Sed } 1375193323Sed 1376193323Sed // If this alloca is impossible for us to promote, reject it early. 1377193323Sed if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized()) 1378193323Sed continue; 1379218893Sdim 1380193323Sed // Check to see if this allocation is only modified by a memcpy/memmove from 1381193323Sed // a constant global. If this is the case, we can change all users to use 1382193323Sed // the constant global instead. This is commonly produced by the CFE by 1383193323Sed // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' 1384193323Sed // is only subsequently read. 1385224145Sdim SmallVector<Instruction *, 4> ToDelete; 1386224145Sdim if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(AI, ToDelete)) { 1387202375Srdivacky DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n'); 1388224145Sdim DEBUG(dbgs() << " memcpy = " << *Copy << '\n'); 1389224145Sdim for (unsigned i = 0, e = ToDelete.size(); i != e; ++i) 1390224145Sdim ToDelete[i]->eraseFromParent(); 1391224145Sdim Constant *TheSrc = cast<Constant>(Copy->getSource()); 1392198090Srdivacky AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType())); 1393224145Sdim Copy->eraseFromParent(); // Don't mutate the global. 1394193323Sed AI->eraseFromParent(); 1395193323Sed ++NumGlobals; 1396193323Sed Changed = true; 1397193323Sed continue; 1398193323Sed } 1399218893Sdim 1400193323Sed // Check to see if we can perform the core SROA transformation. We cannot 1401193323Sed // transform the allocation instruction if it is an array allocation 1402193323Sed // (allocations OF arrays are ok though), and an allocation of a scalar 1403193323Sed // value cannot be decomposed at all. 1404193323Sed uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType()); 1405193323Sed 1406198090Srdivacky // Do not promote [0 x %struct]. 1407198090Srdivacky if (AllocaSize == 0) continue; 1408218893Sdim 1409207618Srdivacky // Do not promote any struct whose size is too big. 1410207618Srdivacky if (AllocaSize > SRThreshold) continue; 1411218893Sdim 1412203954Srdivacky // If the alloca looks like a good candidate for scalar replacement, and if 1413203954Srdivacky // all its users can be transformed, then split up the aggregate into its 1414203954Srdivacky // separate elements. 1415203954Srdivacky if (ShouldAttemptScalarRepl(AI) && isSafeAllocaToScalarRepl(AI)) { 1416203954Srdivacky DoScalarReplacement(AI, WorkList); 1417203954Srdivacky Changed = true; 1418203954Srdivacky continue; 1419203954Srdivacky } 1420203954Srdivacky 1421193323Sed // If we can turn this aggregate value (potentially with casts) into a 1422193323Sed // simple scalar value that can be mem2reg'd into a register value. 1423193323Sed // IsNotTrivial tracks whether this is something that mem2reg could have 1424193323Sed // promoted itself. If so, we don't want to transform it needlessly. Note 1425193323Sed // that we can't just check based on the type: the alloca may be of an i32 1426193323Sed // but that has pointer arithmetic to set byte 3 of it or something. 1427207618Srdivacky if (AllocaInst *NewAI = 1428207618Srdivacky ConvertToScalarInfo((unsigned)AllocaSize, *TD).TryConvert(AI)) { 1429193323Sed NewAI->takeName(AI); 1430193323Sed AI->eraseFromParent(); 1431193323Sed ++NumConverted; 1432193323Sed Changed = true; 1433193323Sed continue; 1434218893Sdim } 1435218893Sdim 1436193323Sed // Otherwise, couldn't process this alloca. 1437193323Sed } 1438193323Sed 1439193323Sed return Changed; 1440193323Sed} 1441193323Sed 1442193323Sed/// DoScalarReplacement - This alloca satisfied the isSafeAllocaToScalarRepl 1443193323Sed/// predicate, do SROA now. 1444218893Sdimvoid SROA::DoScalarReplacement(AllocaInst *AI, 1445198892Srdivacky std::vector<AllocaInst*> &WorkList) { 1446202375Srdivacky DEBUG(dbgs() << "Found inst to SROA: " << *AI << '\n'); 1447193323Sed SmallVector<AllocaInst*, 32> ElementAllocas; 1448226890Sdim if (StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) { 1449193323Sed ElementAllocas.reserve(ST->getNumContainedTypes()); 1450193323Sed for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) { 1451218893Sdim AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0, 1452193323Sed AI->getAlignment(), 1453198090Srdivacky AI->getName() + "." + Twine(i), AI); 1454193323Sed ElementAllocas.push_back(NA); 1455193323Sed WorkList.push_back(NA); // Add to worklist for recursive processing 1456193323Sed } 1457193323Sed } else { 1458226890Sdim ArrayType *AT = cast<ArrayType>(AI->getAllocatedType()); 1459193323Sed ElementAllocas.reserve(AT->getNumElements()); 1460226890Sdim Type *ElTy = AT->getElementType(); 1461193323Sed for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { 1462193323Sed AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(), 1463198090Srdivacky AI->getName() + "." + Twine(i), AI); 1464193323Sed ElementAllocas.push_back(NA); 1465193323Sed WorkList.push_back(NA); // Add to worklist for recursive processing 1466193323Sed } 1467193323Sed } 1468193323Sed 1469201360Srdivacky // Now that we have created the new alloca instructions, rewrite all the 1470201360Srdivacky // uses of the old alloca. 1471201360Srdivacky RewriteForScalarRepl(AI, AI, 0, ElementAllocas); 1472193323Sed 1473201360Srdivacky // Now erase any instructions that were made dead while rewriting the alloca. 1474201360Srdivacky DeleteDeadInstructions(); 1475201360Srdivacky AI->eraseFromParent(); 1476193323Sed 1477210299Sed ++NumReplaced; 1478201360Srdivacky} 1479193323Sed 1480201360Srdivacky/// DeleteDeadInstructions - Erase instructions on the DeadInstrs list, 1481201360Srdivacky/// recursively including all their operands that become trivially dead. 1482201360Srdivackyvoid SROA::DeleteDeadInstructions() { 1483201360Srdivacky while (!DeadInsts.empty()) { 1484201360Srdivacky Instruction *I = cast<Instruction>(DeadInsts.pop_back_val()); 1485193323Sed 1486201360Srdivacky for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) 1487201360Srdivacky if (Instruction *U = dyn_cast<Instruction>(*OI)) { 1488201360Srdivacky // Zero out the operand and see if it becomes trivially dead. 1489201360Srdivacky // (But, don't add allocas to the dead instruction list -- they are 1490201360Srdivacky // already on the worklist and will be deleted separately.) 1491201360Srdivacky *OI = 0; 1492201360Srdivacky if (isInstructionTriviallyDead(U) && !isa<AllocaInst>(U)) 1493201360Srdivacky DeadInsts.push_back(U); 1494201360Srdivacky } 1495201360Srdivacky 1496201360Srdivacky I->eraseFromParent(); 1497193323Sed } 1498193323Sed} 1499218893Sdim 1500201360Srdivacky/// isSafeForScalarRepl - Check if instruction I is a safe use with regard to 1501201360Srdivacky/// performing scalar replacement of alloca AI. The results are flagged in 1502201360Srdivacky/// the Info parameter. Offset indicates the position within AI that is 1503201360Srdivacky/// referenced by this instruction. 1504218893Sdimvoid SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset, 1505201360Srdivacky AllocaInfo &Info) { 1506201360Srdivacky for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { 1507201360Srdivacky Instruction *User = cast<Instruction>(*UI); 1508193323Sed 1509201360Srdivacky if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) { 1510218893Sdim isSafeForScalarRepl(BC, Offset, Info); 1511201360Srdivacky } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) { 1512201360Srdivacky uint64_t GEPOffset = Offset; 1513218893Sdim isSafeGEP(GEPI, GEPOffset, Info); 1514201360Srdivacky if (!Info.isUnsafe) 1515218893Sdim isSafeForScalarRepl(GEPI, GEPOffset, Info); 1516210299Sed } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) { 1517201360Srdivacky ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength()); 1518218893Sdim if (Length == 0) 1519218893Sdim return MarkUnsafe(Info, User); 1520218893Sdim isSafeMemAccess(Offset, Length->getZExtValue(), 0, 1521218893Sdim UI.getOperandNo() == 0, Info, MI, 1522218893Sdim true /*AllowWholeAccess*/); 1523201360Srdivacky } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 1524226890Sdim if (!LI->isSimple()) 1525218893Sdim return MarkUnsafe(Info, User); 1526226890Sdim Type *LIType = LI->getType(); 1527218893Sdim isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType), 1528218893Sdim LIType, false, Info, LI, true /*AllowWholeAccess*/); 1529218893Sdim Info.hasALoadOrStore = true; 1530218893Sdim 1531201360Srdivacky } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 1532193323Sed // Store is ok if storing INTO the pointer, not storing the pointer 1533226890Sdim if (!SI->isSimple() || SI->getOperand(0) == I) 1534218893Sdim return MarkUnsafe(Info, User); 1535218893Sdim 1536226890Sdim Type *SIType = SI->getOperand(0)->getType(); 1537218893Sdim isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType), 1538218893Sdim SIType, true, Info, SI, true /*AllowWholeAccess*/); 1539218893Sdim Info.hasALoadOrStore = true; 1540226890Sdim } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) { 1541226890Sdim if (II->getIntrinsicID() != Intrinsic::lifetime_start && 1542226890Sdim II->getIntrinsicID() != Intrinsic::lifetime_end) 1543226890Sdim return MarkUnsafe(Info, User); 1544218893Sdim } else if (isa<PHINode>(User) || isa<SelectInst>(User)) { 1545218893Sdim isSafePHISelectUseForScalarRepl(User, Offset, Info); 1546201360Srdivacky } else { 1547218893Sdim return MarkUnsafe(Info, User); 1548193323Sed } 1549201360Srdivacky if (Info.isUnsafe) return; 1550193323Sed } 1551193323Sed} 1552218893Sdim 1553193323Sed 1554218893Sdim/// isSafePHIUseForScalarRepl - If we see a PHI node or select using a pointer 1555218893Sdim/// derived from the alloca, we can often still split the alloca into elements. 1556218893Sdim/// This is useful if we have a large alloca where one element is phi'd 1557218893Sdim/// together somewhere: we can SRoA and promote all the other elements even if 1558218893Sdim/// we end up not being able to promote this one. 1559218893Sdim/// 1560218893Sdim/// All we require is that the uses of the PHI do not index into other parts of 1561218893Sdim/// the alloca. The most important use case for this is single load and stores 1562218893Sdim/// that are PHI'd together, which can happen due to code sinking. 1563218893Sdimvoid SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset, 1564218893Sdim AllocaInfo &Info) { 1565218893Sdim // If we've already checked this PHI, don't do it again. 1566218893Sdim if (PHINode *PN = dyn_cast<PHINode>(I)) 1567218893Sdim if (!Info.CheckedPHIs.insert(PN)) 1568218893Sdim return; 1569218893Sdim 1570218893Sdim for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { 1571218893Sdim Instruction *User = cast<Instruction>(*UI); 1572218893Sdim 1573218893Sdim if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) { 1574218893Sdim isSafePHISelectUseForScalarRepl(BC, Offset, Info); 1575218893Sdim } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) { 1576218893Sdim // Only allow "bitcast" GEPs for simplicity. We could generalize this, 1577218893Sdim // but would have to prove that we're staying inside of an element being 1578218893Sdim // promoted. 1579218893Sdim if (!GEPI->hasAllZeroIndices()) 1580218893Sdim return MarkUnsafe(Info, User); 1581218893Sdim isSafePHISelectUseForScalarRepl(GEPI, Offset, Info); 1582218893Sdim } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 1583226890Sdim if (!LI->isSimple()) 1584218893Sdim return MarkUnsafe(Info, User); 1585226890Sdim Type *LIType = LI->getType(); 1586218893Sdim isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType), 1587218893Sdim LIType, false, Info, LI, false /*AllowWholeAccess*/); 1588218893Sdim Info.hasALoadOrStore = true; 1589218893Sdim 1590218893Sdim } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 1591218893Sdim // Store is ok if storing INTO the pointer, not storing the pointer 1592226890Sdim if (!SI->isSimple() || SI->getOperand(0) == I) 1593218893Sdim return MarkUnsafe(Info, User); 1594218893Sdim 1595226890Sdim Type *SIType = SI->getOperand(0)->getType(); 1596218893Sdim isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType), 1597218893Sdim SIType, true, Info, SI, false /*AllowWholeAccess*/); 1598218893Sdim Info.hasALoadOrStore = true; 1599218893Sdim } else if (isa<PHINode>(User) || isa<SelectInst>(User)) { 1600218893Sdim isSafePHISelectUseForScalarRepl(User, Offset, Info); 1601218893Sdim } else { 1602218893Sdim return MarkUnsafe(Info, User); 1603218893Sdim } 1604218893Sdim if (Info.isUnsafe) return; 1605218893Sdim } 1606218893Sdim} 1607218893Sdim 1608201360Srdivacky/// isSafeGEP - Check if a GEP instruction can be handled for scalar 1609201360Srdivacky/// replacement. It is safe when all the indices are constant, in-bounds 1610201360Srdivacky/// references, and when the resulting offset corresponds to an element within 1611201360Srdivacky/// the alloca type. The results are flagged in the Info parameter. Upon 1612201360Srdivacky/// return, Offset is adjusted as specified by the GEP indices. 1613218893Sdimvoid SROA::isSafeGEP(GetElementPtrInst *GEPI, 1614201360Srdivacky uint64_t &Offset, AllocaInfo &Info) { 1615201360Srdivacky gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI); 1616201360Srdivacky if (GEPIt == E) 1617201360Srdivacky return; 1618193323Sed 1619201360Srdivacky // Walk through the GEP type indices, checking the types that this indexes 1620201360Srdivacky // into. 1621201360Srdivacky for (; GEPIt != E; ++GEPIt) { 1622201360Srdivacky // Ignore struct elements, no extra checking needed for these. 1623204642Srdivacky if ((*GEPIt)->isStructTy()) 1624201360Srdivacky continue; 1625193323Sed 1626201360Srdivacky ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand()); 1627201360Srdivacky if (!IdxVal) 1628218893Sdim return MarkUnsafe(Info, GEPI); 1629193323Sed } 1630193323Sed 1631201360Srdivacky // Compute the offset due to this GEP and check if the alloca has a 1632201360Srdivacky // component element at that offset. 1633201360Srdivacky SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end()); 1634226890Sdim Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), Indices); 1635218893Sdim if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, 0)) 1636218893Sdim MarkUnsafe(Info, GEPI); 1637201360Srdivacky} 1638193323Sed 1639218893Sdim/// isHomogeneousAggregate - Check if type T is a struct or array containing 1640218893Sdim/// elements of the same type (which is always true for arrays). If so, 1641218893Sdim/// return true with NumElts and EltTy set to the number of elements and the 1642218893Sdim/// element type, respectively. 1643226890Sdimstatic bool isHomogeneousAggregate(Type *T, unsigned &NumElts, 1644226890Sdim Type *&EltTy) { 1645226890Sdim if (ArrayType *AT = dyn_cast<ArrayType>(T)) { 1646218893Sdim NumElts = AT->getNumElements(); 1647218893Sdim EltTy = (NumElts == 0 ? 0 : AT->getElementType()); 1648218893Sdim return true; 1649218893Sdim } 1650226890Sdim if (StructType *ST = dyn_cast<StructType>(T)) { 1651218893Sdim NumElts = ST->getNumContainedTypes(); 1652218893Sdim EltTy = (NumElts == 0 ? 0 : ST->getContainedType(0)); 1653218893Sdim for (unsigned n = 1; n < NumElts; ++n) { 1654218893Sdim if (ST->getContainedType(n) != EltTy) 1655218893Sdim return false; 1656218893Sdim } 1657218893Sdim return true; 1658218893Sdim } 1659218893Sdim return false; 1660218893Sdim} 1661218893Sdim 1662218893Sdim/// isCompatibleAggregate - Check if T1 and T2 are either the same type or are 1663218893Sdim/// "homogeneous" aggregates with the same element type and number of elements. 1664226890Sdimstatic bool isCompatibleAggregate(Type *T1, Type *T2) { 1665218893Sdim if (T1 == T2) 1666218893Sdim return true; 1667218893Sdim 1668218893Sdim unsigned NumElts1, NumElts2; 1669226890Sdim Type *EltTy1, *EltTy2; 1670218893Sdim if (isHomogeneousAggregate(T1, NumElts1, EltTy1) && 1671218893Sdim isHomogeneousAggregate(T2, NumElts2, EltTy2) && 1672218893Sdim NumElts1 == NumElts2 && 1673218893Sdim EltTy1 == EltTy2) 1674218893Sdim return true; 1675218893Sdim 1676218893Sdim return false; 1677218893Sdim} 1678218893Sdim 1679201360Srdivacky/// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI 1680201360Srdivacky/// alloca or has an offset and size that corresponds to a component element 1681201360Srdivacky/// within it. The offset checked here may have been formed from a GEP with a 1682201360Srdivacky/// pointer bitcasted to a different type. 1683218893Sdim/// 1684218893Sdim/// If AllowWholeAccess is true, then this allows uses of the entire alloca as a 1685218893Sdim/// unit. If false, it only allows accesses known to be in a single element. 1686218893Sdimvoid SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize, 1687226890Sdim Type *MemOpType, bool isStore, 1688218893Sdim AllocaInfo &Info, Instruction *TheAccess, 1689218893Sdim bool AllowWholeAccess) { 1690201360Srdivacky // Check if this is a load/store of the entire alloca. 1691218893Sdim if (Offset == 0 && AllowWholeAccess && 1692218893Sdim MemSize == TD->getTypeAllocSize(Info.AI->getAllocatedType())) { 1693218893Sdim // This can be safe for MemIntrinsics (where MemOpType is 0) and integer 1694218893Sdim // loads/stores (which are essentially the same as the MemIntrinsics with 1695218893Sdim // regard to copying padding between elements). But, if an alloca is 1696218893Sdim // flagged as both a source and destination of such operations, we'll need 1697218893Sdim // to check later for padding between elements. 1698218893Sdim if (!MemOpType || MemOpType->isIntegerTy()) { 1699218893Sdim if (isStore) 1700218893Sdim Info.isMemCpyDst = true; 1701218893Sdim else 1702218893Sdim Info.isMemCpySrc = true; 1703201360Srdivacky return; 1704193323Sed } 1705218893Sdim // This is also safe for references using a type that is compatible with 1706218893Sdim // the type of the alloca, so that loads/stores can be rewritten using 1707218893Sdim // insertvalue/extractvalue. 1708218893Sdim if (isCompatibleAggregate(MemOpType, Info.AI->getAllocatedType())) { 1709218893Sdim Info.hasSubelementAccess = true; 1710218893Sdim return; 1711218893Sdim } 1712193323Sed } 1713201360Srdivacky // Check if the offset/size correspond to a component within the alloca type. 1714226890Sdim Type *T = Info.AI->getAllocatedType(); 1715218893Sdim if (TypeHasComponent(T, Offset, MemSize)) { 1716218893Sdim Info.hasSubelementAccess = true; 1717201360Srdivacky return; 1718218893Sdim } 1719193323Sed 1720218893Sdim return MarkUnsafe(Info, TheAccess); 1721193323Sed} 1722193323Sed 1723201360Srdivacky/// TypeHasComponent - Return true if T has a component type with the 1724201360Srdivacky/// specified offset and size. If Size is zero, do not check the size. 1725226890Sdimbool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size) { 1726226890Sdim Type *EltTy; 1727201360Srdivacky uint64_t EltSize; 1728226890Sdim if (StructType *ST = dyn_cast<StructType>(T)) { 1729201360Srdivacky const StructLayout *Layout = TD->getStructLayout(ST); 1730201360Srdivacky unsigned EltIdx = Layout->getElementContainingOffset(Offset); 1731201360Srdivacky EltTy = ST->getContainedType(EltIdx); 1732201360Srdivacky EltSize = TD->getTypeAllocSize(EltTy); 1733201360Srdivacky Offset -= Layout->getElementOffset(EltIdx); 1734226890Sdim } else if (ArrayType *AT = dyn_cast<ArrayType>(T)) { 1735201360Srdivacky EltTy = AT->getElementType(); 1736201360Srdivacky EltSize = TD->getTypeAllocSize(EltTy); 1737201360Srdivacky if (Offset >= AT->getNumElements() * EltSize) 1738201360Srdivacky return false; 1739201360Srdivacky Offset %= EltSize; 1740201360Srdivacky } else { 1741201360Srdivacky return false; 1742193323Sed } 1743201360Srdivacky if (Offset == 0 && (Size == 0 || EltSize == Size)) 1744201360Srdivacky return true; 1745201360Srdivacky // Check if the component spans multiple elements. 1746201360Srdivacky if (Offset + Size > EltSize) 1747201360Srdivacky return false; 1748201360Srdivacky return TypeHasComponent(EltTy, Offset, Size); 1749193323Sed} 1750193323Sed 1751201360Srdivacky/// RewriteForScalarRepl - Alloca AI is being split into NewElts, so rewrite 1752201360Srdivacky/// the instruction I, which references it, to use the separate elements. 1753201360Srdivacky/// Offset indicates the position within AI that is referenced by this 1754201360Srdivacky/// instruction. 1755201360Srdivackyvoid SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, 1756201360Srdivacky SmallVector<AllocaInst*, 32> &NewElts) { 1757218893Sdim for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E;) { 1758218893Sdim Use &TheUse = UI.getUse(); 1759218893Sdim Instruction *User = cast<Instruction>(*UI++); 1760193323Sed 1761201360Srdivacky if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) { 1762201360Srdivacky RewriteBitCast(BC, AI, Offset, NewElts); 1763218893Sdim continue; 1764218893Sdim } 1765218893Sdim 1766218893Sdim if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) { 1767201360Srdivacky RewriteGEP(GEPI, AI, Offset, NewElts); 1768218893Sdim continue; 1769218893Sdim } 1770218893Sdim 1771218893Sdim if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) { 1772201360Srdivacky ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength()); 1773201360Srdivacky uint64_t MemSize = Length->getZExtValue(); 1774201360Srdivacky if (Offset == 0 && 1775201360Srdivacky MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) 1776201360Srdivacky RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts); 1777201360Srdivacky // Otherwise the intrinsic can only touch a single element and the 1778201360Srdivacky // address operand will be updated, so nothing else needs to be done. 1779218893Sdim continue; 1780218893Sdim } 1781226890Sdim 1782226890Sdim if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) { 1783226890Sdim if (II->getIntrinsicID() == Intrinsic::lifetime_start || 1784226890Sdim II->getIntrinsicID() == Intrinsic::lifetime_end) { 1785226890Sdim RewriteLifetimeIntrinsic(II, AI, Offset, NewElts); 1786226890Sdim } 1787226890Sdim continue; 1788226890Sdim } 1789218893Sdim 1790218893Sdim if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 1791226890Sdim Type *LIType = LI->getType(); 1792218893Sdim 1793218893Sdim if (isCompatibleAggregate(LIType, AI->getAllocatedType())) { 1794201360Srdivacky // Replace: 1795201360Srdivacky // %res = load { i32, i32 }* %alloc 1796201360Srdivacky // with: 1797201360Srdivacky // %load.0 = load i32* %alloc.0 1798201360Srdivacky // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0 1799201360Srdivacky // %load.1 = load i32* %alloc.1 1800201360Srdivacky // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 1801201360Srdivacky // (Also works for arrays instead of structs) 1802201360Srdivacky Value *Insert = UndefValue::get(LIType); 1803223017Sdim IRBuilder<> Builder(LI); 1804201360Srdivacky for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 1805223017Sdim Value *Load = Builder.CreateLoad(NewElts[i], "load"); 1806223017Sdim Insert = Builder.CreateInsertValue(Insert, Load, i, "insert"); 1807201360Srdivacky } 1808201360Srdivacky LI->replaceAllUsesWith(Insert); 1809201360Srdivacky DeadInsts.push_back(LI); 1810204642Srdivacky } else if (LIType->isIntegerTy() && 1811201360Srdivacky TD->getTypeAllocSize(LIType) == 1812201360Srdivacky TD->getTypeAllocSize(AI->getAllocatedType())) { 1813201360Srdivacky // If this is a load of the entire alloca to an integer, rewrite it. 1814201360Srdivacky RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); 1815193323Sed } 1816218893Sdim continue; 1817218893Sdim } 1818218893Sdim 1819218893Sdim if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 1820201360Srdivacky Value *Val = SI->getOperand(0); 1821226890Sdim Type *SIType = Val->getType(); 1822218893Sdim if (isCompatibleAggregate(SIType, AI->getAllocatedType())) { 1823201360Srdivacky // Replace: 1824201360Srdivacky // store { i32, i32 } %val, { i32, i32 }* %alloc 1825201360Srdivacky // with: 1826201360Srdivacky // %val.0 = extractvalue { i32, i32 } %val, 0 1827201360Srdivacky // store i32 %val.0, i32* %alloc.0 1828201360Srdivacky // %val.1 = extractvalue { i32, i32 } %val, 1 1829201360Srdivacky // store i32 %val.1, i32* %alloc.1 1830201360Srdivacky // (Also works for arrays instead of structs) 1831223017Sdim IRBuilder<> Builder(SI); 1832201360Srdivacky for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 1833223017Sdim Value *Extract = Builder.CreateExtractValue(Val, i, Val->getName()); 1834223017Sdim Builder.CreateStore(Extract, NewElts[i]); 1835201360Srdivacky } 1836201360Srdivacky DeadInsts.push_back(SI); 1837204642Srdivacky } else if (SIType->isIntegerTy() && 1838201360Srdivacky TD->getTypeAllocSize(SIType) == 1839201360Srdivacky TD->getTypeAllocSize(AI->getAllocatedType())) { 1840201360Srdivacky // If this is a store of the entire alloca from an integer, rewrite it. 1841201360Srdivacky RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); 1842193323Sed } 1843218893Sdim continue; 1844193323Sed } 1845218893Sdim 1846218893Sdim if (isa<SelectInst>(User) || isa<PHINode>(User)) { 1847218893Sdim // If we have a PHI user of the alloca itself (as opposed to a GEP or 1848218893Sdim // bitcast) we have to rewrite it. GEP and bitcast uses will be RAUW'd to 1849218893Sdim // the new pointer. 1850218893Sdim if (!isa<AllocaInst>(I)) continue; 1851218893Sdim 1852218893Sdim assert(Offset == 0 && NewElts[0] && 1853218893Sdim "Direct alloca use should have a zero offset"); 1854218893Sdim 1855218893Sdim // If we have a use of the alloca, we know the derived uses will be 1856218893Sdim // utilizing just the first element of the scalarized result. Insert a 1857218893Sdim // bitcast of the first alloca before the user as required. 1858218893Sdim AllocaInst *NewAI = NewElts[0]; 1859218893Sdim BitCastInst *BCI = new BitCastInst(NewAI, AI->getType(), "", NewAI); 1860218893Sdim NewAI->moveBefore(BCI); 1861218893Sdim TheUse = BCI; 1862218893Sdim continue; 1863218893Sdim } 1864193323Sed } 1865193323Sed} 1866193323Sed 1867201360Srdivacky/// RewriteBitCast - Update a bitcast reference to the alloca being replaced 1868201360Srdivacky/// and recursively continue updating all of its uses. 1869201360Srdivackyvoid SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, 1870201360Srdivacky SmallVector<AllocaInst*, 32> &NewElts) { 1871201360Srdivacky RewriteForScalarRepl(BC, AI, Offset, NewElts); 1872201360Srdivacky if (BC->getOperand(0) != AI) 1873201360Srdivacky return; 1874193323Sed 1875201360Srdivacky // The bitcast references the original alloca. Replace its uses with 1876201360Srdivacky // references to the first new element alloca. 1877201360Srdivacky Instruction *Val = NewElts[0]; 1878201360Srdivacky if (Val->getType() != BC->getDestTy()) { 1879201360Srdivacky Val = new BitCastInst(Val, BC->getDestTy(), "", BC); 1880201360Srdivacky Val->takeName(BC); 1881201360Srdivacky } 1882201360Srdivacky BC->replaceAllUsesWith(Val); 1883201360Srdivacky DeadInsts.push_back(BC); 1884201360Srdivacky} 1885193323Sed 1886201360Srdivacky/// FindElementAndOffset - Return the index of the element containing Offset 1887201360Srdivacky/// within the specified type, which must be either a struct or an array. 1888201360Srdivacky/// Sets T to the type of the element and Offset to the offset within that 1889201360Srdivacky/// element. IdxTy is set to the type of the index result to be used in a 1890201360Srdivacky/// GEP instruction. 1891226890Sdimuint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset, 1892226890Sdim Type *&IdxTy) { 1893201360Srdivacky uint64_t Idx = 0; 1894226890Sdim if (StructType *ST = dyn_cast<StructType>(T)) { 1895201360Srdivacky const StructLayout *Layout = TD->getStructLayout(ST); 1896201360Srdivacky Idx = Layout->getElementContainingOffset(Offset); 1897201360Srdivacky T = ST->getContainedType(Idx); 1898201360Srdivacky Offset -= Layout->getElementOffset(Idx); 1899201360Srdivacky IdxTy = Type::getInt32Ty(T->getContext()); 1900201360Srdivacky return Idx; 1901193323Sed } 1902226890Sdim ArrayType *AT = cast<ArrayType>(T); 1903201360Srdivacky T = AT->getElementType(); 1904201360Srdivacky uint64_t EltSize = TD->getTypeAllocSize(T); 1905201360Srdivacky Idx = Offset / EltSize; 1906201360Srdivacky Offset -= Idx * EltSize; 1907201360Srdivacky IdxTy = Type::getInt64Ty(T->getContext()); 1908201360Srdivacky return Idx; 1909193323Sed} 1910193323Sed 1911201360Srdivacky/// RewriteGEP - Check if this GEP instruction moves the pointer across 1912201360Srdivacky/// elements of the alloca that are being split apart, and if so, rewrite 1913201360Srdivacky/// the GEP to be relative to the new element. 1914201360Srdivackyvoid SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, 1915201360Srdivacky SmallVector<AllocaInst*, 32> &NewElts) { 1916201360Srdivacky uint64_t OldOffset = Offset; 1917201360Srdivacky SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end()); 1918226890Sdim Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), Indices); 1919201360Srdivacky 1920201360Srdivacky RewriteForScalarRepl(GEPI, AI, Offset, NewElts); 1921201360Srdivacky 1922226890Sdim Type *T = AI->getAllocatedType(); 1923226890Sdim Type *IdxTy; 1924201360Srdivacky uint64_t OldIdx = FindElementAndOffset(T, OldOffset, IdxTy); 1925201360Srdivacky if (GEPI->getOperand(0) == AI) 1926201360Srdivacky OldIdx = ~0ULL; // Force the GEP to be rewritten. 1927201360Srdivacky 1928201360Srdivacky T = AI->getAllocatedType(); 1929201360Srdivacky uint64_t EltOffset = Offset; 1930201360Srdivacky uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy); 1931201360Srdivacky 1932201360Srdivacky // If this GEP does not move the pointer across elements of the alloca 1933201360Srdivacky // being split, then it does not needs to be rewritten. 1934201360Srdivacky if (Idx == OldIdx) 1935201360Srdivacky return; 1936201360Srdivacky 1937226890Sdim Type *i32Ty = Type::getInt32Ty(AI->getContext()); 1938201360Srdivacky SmallVector<Value*, 8> NewArgs; 1939201360Srdivacky NewArgs.push_back(Constant::getNullValue(i32Ty)); 1940201360Srdivacky while (EltOffset != 0) { 1941201360Srdivacky uint64_t EltIdx = FindElementAndOffset(T, EltOffset, IdxTy); 1942201360Srdivacky NewArgs.push_back(ConstantInt::get(IdxTy, EltIdx)); 1943201360Srdivacky } 1944201360Srdivacky Instruction *Val = NewElts[Idx]; 1945201360Srdivacky if (NewArgs.size() > 1) { 1946226890Sdim Val = GetElementPtrInst::CreateInBounds(Val, NewArgs, "", GEPI); 1947201360Srdivacky Val->takeName(GEPI); 1948201360Srdivacky } 1949201360Srdivacky if (Val->getType() != GEPI->getType()) 1950203954Srdivacky Val = new BitCastInst(Val, GEPI->getType(), Val->getName(), GEPI); 1951201360Srdivacky GEPI->replaceAllUsesWith(Val); 1952201360Srdivacky DeadInsts.push_back(GEPI); 1953201360Srdivacky} 1954201360Srdivacky 1955226890Sdim/// RewriteLifetimeIntrinsic - II is a lifetime.start/lifetime.end. Rewrite it 1956226890Sdim/// to mark the lifetime of the scalarized memory. 1957226890Sdimvoid SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI, 1958226890Sdim uint64_t Offset, 1959226890Sdim SmallVector<AllocaInst*, 32> &NewElts) { 1960226890Sdim ConstantInt *OldSize = cast<ConstantInt>(II->getArgOperand(0)); 1961226890Sdim // Put matching lifetime markers on everything from Offset up to 1962226890Sdim // Offset+OldSize. 1963226890Sdim Type *AIType = AI->getAllocatedType(); 1964226890Sdim uint64_t NewOffset = Offset; 1965226890Sdim Type *IdxTy; 1966226890Sdim uint64_t Idx = FindElementAndOffset(AIType, NewOffset, IdxTy); 1967226890Sdim 1968226890Sdim IRBuilder<> Builder(II); 1969226890Sdim uint64_t Size = OldSize->getLimitedValue(); 1970226890Sdim 1971226890Sdim if (NewOffset) { 1972226890Sdim // Splice the first element and index 'NewOffset' bytes in. SROA will 1973226890Sdim // split the alloca again later. 1974226890Sdim Value *V = Builder.CreateBitCast(NewElts[Idx], Builder.getInt8PtrTy()); 1975226890Sdim V = Builder.CreateGEP(V, Builder.getInt64(NewOffset)); 1976226890Sdim 1977226890Sdim IdxTy = NewElts[Idx]->getAllocatedType(); 1978226890Sdim uint64_t EltSize = TD->getTypeAllocSize(IdxTy) - NewOffset; 1979226890Sdim if (EltSize > Size) { 1980226890Sdim EltSize = Size; 1981226890Sdim Size = 0; 1982226890Sdim } else { 1983226890Sdim Size -= EltSize; 1984226890Sdim } 1985226890Sdim if (II->getIntrinsicID() == Intrinsic::lifetime_start) 1986226890Sdim Builder.CreateLifetimeStart(V, Builder.getInt64(EltSize)); 1987226890Sdim else 1988226890Sdim Builder.CreateLifetimeEnd(V, Builder.getInt64(EltSize)); 1989226890Sdim ++Idx; 1990226890Sdim } 1991226890Sdim 1992226890Sdim for (; Idx != NewElts.size() && Size; ++Idx) { 1993226890Sdim IdxTy = NewElts[Idx]->getAllocatedType(); 1994226890Sdim uint64_t EltSize = TD->getTypeAllocSize(IdxTy); 1995226890Sdim if (EltSize > Size) { 1996226890Sdim EltSize = Size; 1997226890Sdim Size = 0; 1998226890Sdim } else { 1999226890Sdim Size -= EltSize; 2000226890Sdim } 2001226890Sdim if (II->getIntrinsicID() == Intrinsic::lifetime_start) 2002226890Sdim Builder.CreateLifetimeStart(NewElts[Idx], 2003226890Sdim Builder.getInt64(EltSize)); 2004226890Sdim else 2005226890Sdim Builder.CreateLifetimeEnd(NewElts[Idx], 2006226890Sdim Builder.getInt64(EltSize)); 2007226890Sdim } 2008226890Sdim DeadInsts.push_back(II); 2009226890Sdim} 2010226890Sdim 2011193323Sed/// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI. 2012193323Sed/// Rewrite it to copy or set the elements of the scalarized memory. 2013201360Srdivackyvoid SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, 2014198892Srdivacky AllocaInst *AI, 2015193323Sed SmallVector<AllocaInst*, 32> &NewElts) { 2016193323Sed // If this is a memcpy/memmove, construct the other pointer as the 2017193323Sed // appropriate type. The "Other" pointer is the pointer that goes to memory 2018193323Sed // that doesn't have anything to do with the alloca that we are promoting. For 2019193323Sed // memset, this Value* stays null. 2020193323Sed Value *OtherPtr = 0; 2021193323Sed unsigned MemAlignment = MI->getAlignment(); 2022193323Sed if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy 2023201360Srdivacky if (Inst == MTI->getRawDest()) 2024193323Sed OtherPtr = MTI->getRawSource(); 2025193323Sed else { 2026201360Srdivacky assert(Inst == MTI->getRawSource()); 2027193323Sed OtherPtr = MTI->getRawDest(); 2028193323Sed } 2029193323Sed } 2030200581Srdivacky 2031193323Sed // If there is an other pointer, we want to convert it to the same pointer 2032193323Sed // type as AI has, so we can GEP through it safely. 2033193323Sed if (OtherPtr) { 2034210299Sed unsigned AddrSpace = 2035210299Sed cast<PointerType>(OtherPtr->getType())->getAddressSpace(); 2036201360Srdivacky 2037201360Srdivacky // Remove bitcasts and all-zero GEPs from OtherPtr. This is an 2038201360Srdivacky // optimization, but it's also required to detect the corner case where 2039201360Srdivacky // both pointer operands are referencing the same memory, and where 2040201360Srdivacky // OtherPtr may be a bitcast or GEP that currently being rewritten. (This 2041201360Srdivacky // function is only called for mem intrinsics that access the whole 2042201360Srdivacky // aggregate, so non-zero GEPs are not an issue here.) 2043210299Sed OtherPtr = OtherPtr->stripPointerCasts(); 2044218893Sdim 2045202878Srdivacky // Copying the alloca to itself is a no-op: just delete it. 2046202878Srdivacky if (OtherPtr == AI || OtherPtr == NewElts[0]) { 2047202878Srdivacky // This code will run twice for a no-op memcpy -- once for each operand. 2048202878Srdivacky // Put only one reference to MI on the DeadInsts list. 2049202878Srdivacky for (SmallVector<Value*, 32>::const_iterator I = DeadInsts.begin(), 2050202878Srdivacky E = DeadInsts.end(); I != E; ++I) 2051202878Srdivacky if (*I == MI) return; 2052202878Srdivacky DeadInsts.push_back(MI); 2053201360Srdivacky return; 2054202878Srdivacky } 2055218893Sdim 2056193323Sed // If the pointer is not the right type, insert a bitcast to the right 2057193323Sed // type. 2058226890Sdim Type *NewTy = 2059210299Sed PointerType::get(AI->getType()->getElementType(), AddrSpace); 2060218893Sdim 2061210299Sed if (OtherPtr->getType() != NewTy) 2062210299Sed OtherPtr = new BitCastInst(OtherPtr, NewTy, OtherPtr->getName(), MI); 2063193323Sed } 2064218893Sdim 2065193323Sed // Process each element of the aggregate. 2066201360Srdivacky bool SROADest = MI->getRawDest() == Inst; 2067218893Sdim 2068198090Srdivacky Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext())); 2069193323Sed 2070193323Sed for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 2071193323Sed // If this is a memcpy/memmove, emit a GEP of the other element address. 2072193323Sed Value *OtherElt = 0; 2073193323Sed unsigned OtherEltAlign = MemAlignment; 2074218893Sdim 2075202878Srdivacky if (OtherPtr) { 2076198090Srdivacky Value *Idx[2] = { Zero, 2077198090Srdivacky ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) }; 2078226890Sdim OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, 2079203954Srdivacky OtherPtr->getName()+"."+Twine(i), 2080201360Srdivacky MI); 2081193323Sed uint64_t EltOffset; 2082226890Sdim PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType()); 2083226890Sdim Type *OtherTy = OtherPtrTy->getElementType(); 2084226890Sdim if (StructType *ST = dyn_cast<StructType>(OtherTy)) { 2085193323Sed EltOffset = TD->getStructLayout(ST)->getElementOffset(i); 2086193323Sed } else { 2087226890Sdim Type *EltTy = cast<SequentialType>(OtherTy)->getElementType(); 2088193323Sed EltOffset = TD->getTypeAllocSize(EltTy)*i; 2089193323Sed } 2090218893Sdim 2091193323Sed // The alignment of the other pointer is the guaranteed alignment of the 2092193323Sed // element, which is affected by both the known alignment of the whole 2093193323Sed // mem intrinsic and the alignment of the element. If the alignment of 2094193323Sed // the memcpy (f.e.) is 32 but the element is at a 4-byte offset, then the 2095193323Sed // known alignment is just 4 bytes. 2096193323Sed OtherEltAlign = (unsigned)MinAlign(OtherEltAlign, EltOffset); 2097193323Sed } 2098218893Sdim 2099193323Sed Value *EltPtr = NewElts[i]; 2100226890Sdim Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType(); 2101218893Sdim 2102193323Sed // If we got down to a scalar, insert a load or store as appropriate. 2103193323Sed if (EltTy->isSingleValueType()) { 2104193323Sed if (isa<MemTransferInst>(MI)) { 2105193323Sed if (SROADest) { 2106193323Sed // From Other to Alloca. 2107193323Sed Value *Elt = new LoadInst(OtherElt, "tmp", false, OtherEltAlign, MI); 2108193323Sed new StoreInst(Elt, EltPtr, MI); 2109193323Sed } else { 2110193323Sed // From Alloca to Other. 2111193323Sed Value *Elt = new LoadInst(EltPtr, "tmp", MI); 2112193323Sed new StoreInst(Elt, OtherElt, false, OtherEltAlign, MI); 2113193323Sed } 2114193323Sed continue; 2115193323Sed } 2116193323Sed assert(isa<MemSetInst>(MI)); 2117218893Sdim 2118193323Sed // If the stored element is zero (common case), just store a null 2119193323Sed // constant. 2120193323Sed Constant *StoreVal; 2121210299Sed if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getArgOperand(1))) { 2122193323Sed if (CI->isZero()) { 2123198090Srdivacky StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0> 2124193323Sed } else { 2125193323Sed // If EltTy is a vector type, get the element type. 2126226890Sdim Type *ValTy = EltTy->getScalarType(); 2127194612Sed 2128193323Sed // Construct an integer with the right value. 2129193323Sed unsigned EltSize = TD->getTypeSizeInBits(ValTy); 2130193323Sed APInt OneVal(EltSize, CI->getZExtValue()); 2131193323Sed APInt TotalVal(OneVal); 2132193323Sed // Set each byte. 2133193323Sed for (unsigned i = 0; 8*i < EltSize; ++i) { 2134193323Sed TotalVal = TotalVal.shl(8); 2135193323Sed TotalVal |= OneVal; 2136193323Sed } 2137218893Sdim 2138193323Sed // Convert the integer value to the appropriate type. 2139207618Srdivacky StoreVal = ConstantInt::get(CI->getContext(), TotalVal); 2140204642Srdivacky if (ValTy->isPointerTy()) 2141198090Srdivacky StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy); 2142203954Srdivacky else if (ValTy->isFloatingPointTy()) 2143198090Srdivacky StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy); 2144193323Sed assert(StoreVal->getType() == ValTy && "Type mismatch!"); 2145218893Sdim 2146193323Sed // If the requested value was a vector constant, create it. 2147226890Sdim if (EltTy->isVectorTy()) { 2148226890Sdim unsigned NumElts = cast<VectorType>(EltTy)->getNumElements(); 2149193323Sed SmallVector<Constant*, 16> Elts(NumElts, StoreVal); 2150218893Sdim StoreVal = ConstantVector::get(Elts); 2151193323Sed } 2152193323Sed } 2153193323Sed new StoreInst(StoreVal, EltPtr, MI); 2154193323Sed continue; 2155193323Sed } 2156193323Sed // Otherwise, if we're storing a byte variable, use a memset call for 2157193323Sed // this element. 2158193323Sed } 2159218893Sdim 2160193323Sed unsigned EltSize = TD->getTypeAllocSize(EltTy); 2161218893Sdim 2162218893Sdim IRBuilder<> Builder(MI); 2163218893Sdim 2164193323Sed // Finally, insert the meminst for this element. 2165218893Sdim if (isa<MemSetInst>(MI)) { 2166218893Sdim Builder.CreateMemSet(EltPtr, MI->getArgOperand(1), EltSize, 2167218893Sdim MI->isVolatile()); 2168193323Sed } else { 2169218893Sdim assert(isa<MemTransferInst>(MI)); 2170218893Sdim Value *Dst = SROADest ? EltPtr : OtherElt; // Dest ptr 2171218893Sdim Value *Src = SROADest ? OtherElt : EltPtr; // Src ptr 2172218893Sdim 2173218893Sdim if (isa<MemCpyInst>(MI)) 2174218893Sdim Builder.CreateMemCpy(Dst, Src, EltSize, OtherEltAlign,MI->isVolatile()); 2175218893Sdim else 2176218893Sdim Builder.CreateMemMove(Dst, Src, EltSize,OtherEltAlign,MI->isVolatile()); 2177193323Sed } 2178193323Sed } 2179201360Srdivacky DeadInsts.push_back(MI); 2180193323Sed} 2181193323Sed 2182200581Srdivacky/// RewriteStoreUserOfWholeAlloca - We found a store of an integer that 2183193323Sed/// overwrites the entire allocation. Extract out the pieces of the stored 2184193323Sed/// integer and store them individually. 2185198892Srdivackyvoid SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, 2186193323Sed SmallVector<AllocaInst*, 32> &NewElts){ 2187193323Sed // Extract each element out of the integer according to its structure offset 2188193323Sed // and store the element value to the individual alloca. 2189193323Sed Value *SrcVal = SI->getOperand(0); 2190226890Sdim Type *AllocaEltTy = AI->getAllocatedType(); 2191193323Sed uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); 2192218893Sdim 2193218893Sdim IRBuilder<> Builder(SI); 2194193323Sed 2195193323Sed // Handle tail padding by extending the operand 2196193323Sed if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits) 2197218893Sdim SrcVal = Builder.CreateZExt(SrcVal, 2198218893Sdim IntegerType::get(SI->getContext(), AllocaSizeBits)); 2199193323Sed 2200202375Srdivacky DEBUG(dbgs() << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << '\n' << *SI 2201198090Srdivacky << '\n'); 2202193323Sed 2203193323Sed // There are two forms here: AI could be an array or struct. Both cases 2204193323Sed // have different ways to compute the element offset. 2205226890Sdim if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { 2206193323Sed const StructLayout *Layout = TD->getStructLayout(EltSTy); 2207218893Sdim 2208193323Sed for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 2209193323Sed // Get the number of bits to shift SrcVal to get the value. 2210226890Sdim Type *FieldTy = EltSTy->getElementType(i); 2211193323Sed uint64_t Shift = Layout->getElementOffsetInBits(i); 2212218893Sdim 2213193323Sed if (TD->isBigEndian()) 2214193323Sed Shift = AllocaSizeBits-Shift-TD->getTypeAllocSizeInBits(FieldTy); 2215218893Sdim 2216193323Sed Value *EltVal = SrcVal; 2217193323Sed if (Shift) { 2218198090Srdivacky Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift); 2219218893Sdim EltVal = Builder.CreateLShr(EltVal, ShiftVal, "sroa.store.elt"); 2220193323Sed } 2221218893Sdim 2222193323Sed // Truncate down to an integer of the right size. 2223193323Sed uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy); 2224218893Sdim 2225193323Sed // Ignore zero sized fields like {}, they obviously contain no data. 2226193323Sed if (FieldSizeBits == 0) continue; 2227218893Sdim 2228193323Sed if (FieldSizeBits != AllocaSizeBits) 2229218893Sdim EltVal = Builder.CreateTrunc(EltVal, 2230218893Sdim IntegerType::get(SI->getContext(), FieldSizeBits)); 2231193323Sed Value *DestField = NewElts[i]; 2232193323Sed if (EltVal->getType() == FieldTy) { 2233193323Sed // Storing to an integer field of this size, just do it. 2234204642Srdivacky } else if (FieldTy->isFloatingPointTy() || FieldTy->isVectorTy()) { 2235193323Sed // Bitcast to the right element type (for fp/vector values). 2236218893Sdim EltVal = Builder.CreateBitCast(EltVal, FieldTy); 2237193323Sed } else { 2238193323Sed // Otherwise, bitcast the dest pointer (for aggregates). 2239218893Sdim DestField = Builder.CreateBitCast(DestField, 2240218893Sdim PointerType::getUnqual(EltVal->getType())); 2241193323Sed } 2242193323Sed new StoreInst(EltVal, DestField, SI); 2243193323Sed } 2244218893Sdim 2245193323Sed } else { 2246226890Sdim ArrayType *ATy = cast<ArrayType>(AllocaEltTy); 2247226890Sdim Type *ArrayEltTy = ATy->getElementType(); 2248193323Sed uint64_t ElementOffset = TD->getTypeAllocSizeInBits(ArrayEltTy); 2249193323Sed uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy); 2250193323Sed 2251193323Sed uint64_t Shift; 2252218893Sdim 2253193323Sed if (TD->isBigEndian()) 2254193323Sed Shift = AllocaSizeBits-ElementOffset; 2255218893Sdim else 2256193323Sed Shift = 0; 2257218893Sdim 2258193323Sed for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 2259193323Sed // Ignore zero sized fields like {}, they obviously contain no data. 2260193323Sed if (ElementSizeBits == 0) continue; 2261218893Sdim 2262193323Sed Value *EltVal = SrcVal; 2263193323Sed if (Shift) { 2264198090Srdivacky Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift); 2265218893Sdim EltVal = Builder.CreateLShr(EltVal, ShiftVal, "sroa.store.elt"); 2266193323Sed } 2267218893Sdim 2268193323Sed // Truncate down to an integer of the right size. 2269193323Sed if (ElementSizeBits != AllocaSizeBits) 2270218893Sdim EltVal = Builder.CreateTrunc(EltVal, 2271218893Sdim IntegerType::get(SI->getContext(), 2272218893Sdim ElementSizeBits)); 2273193323Sed Value *DestField = NewElts[i]; 2274193323Sed if (EltVal->getType() == ArrayEltTy) { 2275193323Sed // Storing to an integer field of this size, just do it. 2276203954Srdivacky } else if (ArrayEltTy->isFloatingPointTy() || 2277204642Srdivacky ArrayEltTy->isVectorTy()) { 2278193323Sed // Bitcast to the right element type (for fp/vector values). 2279218893Sdim EltVal = Builder.CreateBitCast(EltVal, ArrayEltTy); 2280193323Sed } else { 2281193323Sed // Otherwise, bitcast the dest pointer (for aggregates). 2282218893Sdim DestField = Builder.CreateBitCast(DestField, 2283218893Sdim PointerType::getUnqual(EltVal->getType())); 2284193323Sed } 2285193323Sed new StoreInst(EltVal, DestField, SI); 2286218893Sdim 2287193323Sed if (TD->isBigEndian()) 2288193323Sed Shift -= ElementOffset; 2289218893Sdim else 2290193323Sed Shift += ElementOffset; 2291193323Sed } 2292193323Sed } 2293218893Sdim 2294201360Srdivacky DeadInsts.push_back(SI); 2295193323Sed} 2296193323Sed 2297200581Srdivacky/// RewriteLoadUserOfWholeAlloca - We found a load of the entire allocation to 2298193323Sed/// an integer. Load the individual pieces to form the aggregate value. 2299198892Srdivackyvoid SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, 2300193323Sed SmallVector<AllocaInst*, 32> &NewElts) { 2301193323Sed // Extract each element out of the NewElts according to its structure offset 2302193323Sed // and form the result value. 2303226890Sdim Type *AllocaEltTy = AI->getAllocatedType(); 2304193323Sed uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); 2305218893Sdim 2306202375Srdivacky DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI 2307198090Srdivacky << '\n'); 2308218893Sdim 2309193323Sed // There are two forms here: AI could be an array or struct. Both cases 2310193323Sed // have different ways to compute the element offset. 2311193323Sed const StructLayout *Layout = 0; 2312193323Sed uint64_t ArrayEltBitOffset = 0; 2313226890Sdim if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { 2314193323Sed Layout = TD->getStructLayout(EltSTy); 2315193323Sed } else { 2316226890Sdim Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType(); 2317193323Sed ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy); 2318218893Sdim } 2319218893Sdim 2320218893Sdim Value *ResultVal = 2321198090Srdivacky Constant::getNullValue(IntegerType::get(LI->getContext(), AllocaSizeBits)); 2322218893Sdim 2323193323Sed for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 2324193323Sed // Load the value from the alloca. If the NewElt is an aggregate, cast 2325193323Sed // the pointer to an integer of the same size before doing the load. 2326193323Sed Value *SrcField = NewElts[i]; 2327226890Sdim Type *FieldTy = 2328193323Sed cast<PointerType>(SrcField->getType())->getElementType(); 2329193323Sed uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy); 2330218893Sdim 2331193323Sed // Ignore zero sized fields like {}, they obviously contain no data. 2332193323Sed if (FieldSizeBits == 0) continue; 2333218893Sdim 2334226890Sdim IntegerType *FieldIntTy = IntegerType::get(LI->getContext(), 2335198090Srdivacky FieldSizeBits); 2336204642Srdivacky if (!FieldTy->isIntegerTy() && !FieldTy->isFloatingPointTy() && 2337204642Srdivacky !FieldTy->isVectorTy()) 2338195340Sed SrcField = new BitCastInst(SrcField, 2339198090Srdivacky PointerType::getUnqual(FieldIntTy), 2340193323Sed "", LI); 2341193323Sed SrcField = new LoadInst(SrcField, "sroa.load.elt", LI); 2342193323Sed 2343193323Sed // If SrcField is a fp or vector of the right size but that isn't an 2344193323Sed // integer type, bitcast to an integer so we can shift it. 2345193323Sed if (SrcField->getType() != FieldIntTy) 2346193323Sed SrcField = new BitCastInst(SrcField, FieldIntTy, "", LI); 2347193323Sed 2348193323Sed // Zero extend the field to be the same size as the final alloca so that 2349193323Sed // we can shift and insert it. 2350193323Sed if (SrcField->getType() != ResultVal->getType()) 2351193323Sed SrcField = new ZExtInst(SrcField, ResultVal->getType(), "", LI); 2352218893Sdim 2353193323Sed // Determine the number of bits to shift SrcField. 2354193323Sed uint64_t Shift; 2355193323Sed if (Layout) // Struct case. 2356193323Sed Shift = Layout->getElementOffsetInBits(i); 2357193323Sed else // Array case. 2358193323Sed Shift = i*ArrayEltBitOffset; 2359218893Sdim 2360193323Sed if (TD->isBigEndian()) 2361193323Sed Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth(); 2362218893Sdim 2363193323Sed if (Shift) { 2364198090Srdivacky Value *ShiftVal = ConstantInt::get(SrcField->getType(), Shift); 2365193323Sed SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI); 2366193323Sed } 2367193323Sed 2368210299Sed // Don't create an 'or x, 0' on the first iteration. 2369210299Sed if (!isa<Constant>(ResultVal) || 2370210299Sed !cast<Constant>(ResultVal)->isNullValue()) 2371210299Sed ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI); 2372210299Sed else 2373210299Sed ResultVal = SrcField; 2374193323Sed } 2375193323Sed 2376193323Sed // Handle tail padding by truncating the result 2377193323Sed if (TD->getTypeSizeInBits(LI->getType()) != AllocaSizeBits) 2378193323Sed ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI); 2379193323Sed 2380193323Sed LI->replaceAllUsesWith(ResultVal); 2381201360Srdivacky DeadInsts.push_back(LI); 2382193323Sed} 2383193323Sed 2384193323Sed/// HasPadding - Return true if the specified type has any structure or 2385218893Sdim/// alignment padding in between the elements that would be split apart 2386218893Sdim/// by SROA; return false otherwise. 2387226890Sdimstatic bool HasPadding(Type *Ty, const TargetData &TD) { 2388226890Sdim if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { 2389218893Sdim Ty = ATy->getElementType(); 2390218893Sdim return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty); 2391218893Sdim } 2392193323Sed 2393218893Sdim // SROA currently handles only Arrays and Structs. 2394226890Sdim StructType *STy = cast<StructType>(Ty); 2395218893Sdim const StructLayout *SL = TD.getStructLayout(STy); 2396218893Sdim unsigned PrevFieldBitOffset = 0; 2397218893Sdim for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 2398218893Sdim unsigned FieldBitOffset = SL->getElementOffsetInBits(i); 2399193323Sed 2400218893Sdim // Check to see if there is any padding between this element and the 2401218893Sdim // previous one. 2402218893Sdim if (i) { 2403218893Sdim unsigned PrevFieldEnd = 2404193323Sed PrevFieldBitOffset+TD.getTypeSizeInBits(STy->getElementType(i-1)); 2405218893Sdim if (PrevFieldEnd < FieldBitOffset) 2406193323Sed return true; 2407193323Sed } 2408218893Sdim PrevFieldBitOffset = FieldBitOffset; 2409193323Sed } 2410218893Sdim // Check for tail padding. 2411218893Sdim if (unsigned EltCount = STy->getNumElements()) { 2412218893Sdim unsigned PrevFieldEnd = PrevFieldBitOffset + 2413218893Sdim TD.getTypeSizeInBits(STy->getElementType(EltCount-1)); 2414218893Sdim if (PrevFieldEnd < SL->getSizeInBits()) 2415218893Sdim return true; 2416218893Sdim } 2417218893Sdim return false; 2418193323Sed} 2419193323Sed 2420193323Sed/// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of 2421193323Sed/// an aggregate can be broken down into elements. Return 0 if not, 3 if safe, 2422193323Sed/// or 1 if safe after canonicalization has been performed. 2423202878Srdivackybool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) { 2424193323Sed // Loop over the use list of the alloca. We can only transform it if all of 2425193323Sed // the users are safe to transform. 2426218893Sdim AllocaInfo Info(AI); 2427218893Sdim 2428218893Sdim isSafeForScalarRepl(AI, 0, Info); 2429201360Srdivacky if (Info.isUnsafe) { 2430202375Srdivacky DEBUG(dbgs() << "Cannot transform: " << *AI << '\n'); 2431202878Srdivacky return false; 2432193323Sed } 2433218893Sdim 2434193323Sed // Okay, we know all the users are promotable. If the aggregate is a memcpy 2435193323Sed // source and destination, we have to be careful. In particular, the memcpy 2436193323Sed // could be moving around elements that live in structure padding of the LLVM 2437193323Sed // types, but may actually be used. In these cases, we refuse to promote the 2438193323Sed // struct. 2439193323Sed if (Info.isMemCpySrc && Info.isMemCpyDst && 2440201360Srdivacky HasPadding(AI->getAllocatedType(), *TD)) 2441202878Srdivacky return false; 2442193323Sed 2443218893Sdim // If the alloca never has an access to just *part* of it, but is accessed 2444218893Sdim // via loads and stores, then we should use ConvertToScalarInfo to promote 2445218893Sdim // the alloca instead of promoting each piece at a time and inserting fission 2446218893Sdim // and fusion code. 2447218893Sdim if (!Info.hasSubelementAccess && Info.hasALoadOrStore) { 2448218893Sdim // If the struct/array just has one element, use basic SRoA. 2449226890Sdim if (StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) { 2450218893Sdim if (ST->getNumElements() > 1) return false; 2451218893Sdim } else { 2452218893Sdim if (cast<ArrayType>(AI->getAllocatedType())->getNumElements() > 1) 2453218893Sdim return false; 2454218893Sdim } 2455218893Sdim } 2456218893Sdim 2457202878Srdivacky return true; 2458193323Sed} 2459193323Sed 2460193323Sed 2461193323Sed 2462193323Sed/// PointsToConstantGlobal - Return true if V (possibly indirectly) points to 2463193323Sed/// some part of a constant global variable. This intentionally only accepts 2464193323Sed/// constant expressions because we don't can't rewrite arbitrary instructions. 2465193323Sedstatic bool PointsToConstantGlobal(Value *V) { 2466193323Sed if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) 2467193323Sed return GV->isConstant(); 2468193323Sed if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) 2469218893Sdim if (CE->getOpcode() == Instruction::BitCast || 2470193323Sed CE->getOpcode() == Instruction::GetElementPtr) 2471193323Sed return PointsToConstantGlobal(CE->getOperand(0)); 2472193323Sed return false; 2473193323Sed} 2474193323Sed 2475193323Sed/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived) 2476193323Sed/// pointer to an alloca. Ignore any reads of the pointer, return false if we 2477193323Sed/// see any stores or other unknown uses. If we see pointer arithmetic, keep 2478193323Sed/// track of whether it moves the pointer (with isOffset) but otherwise traverse 2479193323Sed/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to 2480218893Sdim/// the alloca, and if the source pointer is a pointer to a constant global, we 2481193323Sed/// can optimize this. 2482224145Sdimstatic bool 2483224145SdimisOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, 2484224145Sdim bool isOffset, 2485224145Sdim SmallVector<Instruction *, 4> &LifetimeMarkers) { 2486224145Sdim // We track lifetime intrinsics as we encounter them. If we decide to go 2487224145Sdim // ahead and replace the value with the global, this lets the caller quickly 2488224145Sdim // eliminate the markers. 2489224145Sdim 2490193323Sed for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { 2491207618Srdivacky User *U = cast<Instruction>(*UI); 2492207618Srdivacky 2493218893Sdim if (LoadInst *LI = dyn_cast<LoadInst>(U)) { 2494193323Sed // Ignore non-volatile loads, they are always ok. 2495226890Sdim if (!LI->isSimple()) return false; 2496218893Sdim continue; 2497218893Sdim } 2498218893Sdim 2499207618Srdivacky if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) { 2500193323Sed // If uses of the bitcast are ok, we are ok. 2501224145Sdim if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset, 2502224145Sdim LifetimeMarkers)) 2503193323Sed return false; 2504193323Sed continue; 2505193323Sed } 2506207618Srdivacky if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) { 2507193323Sed // If the GEP has all zero indices, it doesn't offset the pointer. If it 2508193323Sed // doesn't, it does. 2509193323Sed if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, 2510224145Sdim isOffset || !GEP->hasAllZeroIndices(), 2511224145Sdim LifetimeMarkers)) 2512193323Sed return false; 2513193323Sed continue; 2514193323Sed } 2515218893Sdim 2516218893Sdim if (CallSite CS = U) { 2517218893Sdim // If this is the function being called then we treat it like a load and 2518218893Sdim // ignore it. 2519218893Sdim if (CS.isCallee(UI)) 2520218893Sdim continue; 2521218893Sdim 2522223017Sdim // If this is a readonly/readnone call site, then we know it is just a 2523223017Sdim // load (but one that potentially returns the value itself), so we can 2524223017Sdim // ignore it if we know that the value isn't captured. 2525223017Sdim unsigned ArgNo = CS.getArgumentNo(UI); 2526223017Sdim if (CS.onlyReadsMemory() && 2527223017Sdim (CS.getInstruction()->use_empty() || 2528223017Sdim CS.paramHasAttr(ArgNo+1, Attribute::NoCapture))) 2529223017Sdim continue; 2530223017Sdim 2531218893Sdim // If this is being passed as a byval argument, the caller is making a 2532218893Sdim // copy, so it is only a read of the alloca. 2533218893Sdim if (CS.paramHasAttr(ArgNo+1, Attribute::ByVal)) 2534218893Sdim continue; 2535218893Sdim } 2536218893Sdim 2537224145Sdim // Lifetime intrinsics can be handled by the caller. 2538224145Sdim if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) { 2539224145Sdim if (II->getIntrinsicID() == Intrinsic::lifetime_start || 2540224145Sdim II->getIntrinsicID() == Intrinsic::lifetime_end) { 2541224145Sdim assert(II->use_empty() && "Lifetime markers have no result to use!"); 2542224145Sdim LifetimeMarkers.push_back(II); 2543224145Sdim continue; 2544224145Sdim } 2545224145Sdim } 2546224145Sdim 2547193323Sed // If this is isn't our memcpy/memmove, reject it as something we can't 2548193323Sed // handle. 2549207618Srdivacky MemTransferInst *MI = dyn_cast<MemTransferInst>(U); 2550207618Srdivacky if (MI == 0) 2551193323Sed return false; 2552193323Sed 2553218893Sdim // If the transfer is using the alloca as a source of the transfer, then 2554218893Sdim // ignore it since it is a load (unless the transfer is volatile). 2555218893Sdim if (UI.getOperandNo() == 1) { 2556218893Sdim if (MI->isVolatile()) return false; 2557218893Sdim continue; 2558218893Sdim } 2559218893Sdim 2560193323Sed // If we already have seen a copy, reject the second one. 2561193323Sed if (TheCopy) return false; 2562218893Sdim 2563193323Sed // If the pointer has been offset from the start of the alloca, we can't 2564193323Sed // safely handle this. 2565193323Sed if (isOffset) return false; 2566193323Sed 2567193323Sed // If the memintrinsic isn't using the alloca as the dest, reject it. 2568212904Sdim if (UI.getOperandNo() != 0) return false; 2569218893Sdim 2570193323Sed // If the source of the memcpy/move is not a constant global, reject it. 2571207618Srdivacky if (!PointsToConstantGlobal(MI->getSource())) 2572193323Sed return false; 2573218893Sdim 2574193323Sed // Otherwise, the transform is safe. Remember the copy instruction. 2575193323Sed TheCopy = MI; 2576193323Sed } 2577193323Sed return true; 2578193323Sed} 2579193323Sed 2580193323Sed/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only 2581193323Sed/// modified by a copy from a constant global. If we can prove this, we can 2582193323Sed/// replace any uses of the alloca with uses of the global directly. 2583224145SdimMemTransferInst * 2584224145SdimSROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI, 2585224145Sdim SmallVector<Instruction*, 4> &ToDelete) { 2586207618Srdivacky MemTransferInst *TheCopy = 0; 2587224145Sdim if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false, ToDelete)) 2588193323Sed return TheCopy; 2589193323Sed return 0; 2590193323Sed} 2591