ScalarReplAggregates.cpp revision 204642
1193323Sed//===- ScalarReplAggregates.cpp - Scalar Replacement of Aggregates --------===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This transformation implements the well known scalar replacement of 11193323Sed// aggregates transformation. This xform breaks up alloca instructions of 12193323Sed// aggregate type (structure or array) into individual alloca instructions for 13193323Sed// each member (if possible). Then, if possible, it transforms the individual 14193323Sed// alloca instructions into nice clean scalar SSA form. 15193323Sed// 16193323Sed// This combines a simple SRoA algorithm with the Mem2Reg algorithm because 17193323Sed// often interact, especially for C++ programs. As such, iterating between 18193323Sed// SRoA, then Mem2Reg until we run out of things to promote works well. 19193323Sed// 20193323Sed//===----------------------------------------------------------------------===// 21193323Sed 22193323Sed#define DEBUG_TYPE "scalarrepl" 23193323Sed#include "llvm/Transforms/Scalar.h" 24193323Sed#include "llvm/Constants.h" 25193323Sed#include "llvm/DerivedTypes.h" 26193323Sed#include "llvm/Function.h" 27193323Sed#include "llvm/GlobalVariable.h" 28193323Sed#include "llvm/Instructions.h" 29193323Sed#include "llvm/IntrinsicInst.h" 30195340Sed#include "llvm/LLVMContext.h" 31193323Sed#include "llvm/Pass.h" 32193323Sed#include "llvm/Analysis/Dominators.h" 33193323Sed#include "llvm/Target/TargetData.h" 34193323Sed#include "llvm/Transforms/Utils/PromoteMemToReg.h" 35193323Sed#include "llvm/Transforms/Utils/Local.h" 36193323Sed#include "llvm/Support/Debug.h" 37198090Srdivacky#include "llvm/Support/ErrorHandling.h" 38193323Sed#include "llvm/Support/GetElementPtrTypeIterator.h" 39193323Sed#include "llvm/Support/IRBuilder.h" 40193323Sed#include "llvm/Support/MathExtras.h" 41198090Srdivacky#include "llvm/Support/raw_ostream.h" 42193323Sed#include "llvm/ADT/SmallVector.h" 43193323Sed#include "llvm/ADT/Statistic.h" 44193323Sedusing namespace llvm; 45193323Sed 46193323SedSTATISTIC(NumReplaced, "Number of allocas broken up"); 47193323SedSTATISTIC(NumPromoted, "Number of allocas promoted"); 48193323SedSTATISTIC(NumConverted, "Number of aggregates converted to scalar"); 49193323SedSTATISTIC(NumGlobals, "Number of allocas copied from constant global"); 50193323Sed 51193323Sednamespace { 52198090Srdivacky struct SROA : public FunctionPass { 53193323Sed static char ID; // Pass identification, replacement for typeid 54193323Sed explicit SROA(signed T = -1) : FunctionPass(&ID) { 55193323Sed if (T == -1) 56193323Sed SRThreshold = 128; 57193323Sed else 58193323Sed SRThreshold = T; 59193323Sed } 60193323Sed 61193323Sed bool runOnFunction(Function &F); 62193323Sed 63193323Sed bool performScalarRepl(Function &F); 64193323Sed bool performPromotion(Function &F); 65193323Sed 66193323Sed // getAnalysisUsage - This pass does not require any passes, but we know it 67193323Sed // will not alter the CFG, so say so. 68193323Sed virtual void getAnalysisUsage(AnalysisUsage &AU) const { 69193323Sed AU.addRequired<DominatorTree>(); 70193323Sed AU.addRequired<DominanceFrontier>(); 71193323Sed AU.setPreservesCFG(); 72193323Sed } 73193323Sed 74193323Sed private: 75193323Sed TargetData *TD; 76193323Sed 77201360Srdivacky /// DeadInsts - Keep track of instructions we have made dead, so that 78201360Srdivacky /// we can remove them after we are done working. 79201360Srdivacky SmallVector<Value*, 32> DeadInsts; 80201360Srdivacky 81193323Sed /// AllocaInfo - When analyzing uses of an alloca instruction, this captures 82193323Sed /// information about the uses. All these fields are initialized to false 83193323Sed /// and set to true when something is learned. 84193323Sed struct AllocaInfo { 85193323Sed /// isUnsafe - This is set to true if the alloca cannot be SROA'd. 86193323Sed bool isUnsafe : 1; 87193323Sed 88193323Sed /// isMemCpySrc - This is true if this aggregate is memcpy'd from. 89193323Sed bool isMemCpySrc : 1; 90193323Sed 91193323Sed /// isMemCpyDst - This is true if this aggregate is memcpy'd into. 92193323Sed bool isMemCpyDst : 1; 93193323Sed 94193323Sed AllocaInfo() 95202878Srdivacky : isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false) {} 96193323Sed }; 97193323Sed 98193323Sed unsigned SRThreshold; 99193323Sed 100193323Sed void MarkUnsafe(AllocaInfo &I) { I.isUnsafe = true; } 101193323Sed 102202878Srdivacky bool isSafeAllocaToScalarRepl(AllocaInst *AI); 103193323Sed 104201360Srdivacky void isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, 105201360Srdivacky AllocaInfo &Info); 106201360Srdivacky void isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t &Offset, 107201360Srdivacky AllocaInfo &Info); 108201360Srdivacky void isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t MemSize, 109201360Srdivacky const Type *MemOpType, bool isStore, AllocaInfo &Info); 110201360Srdivacky bool TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size); 111201360Srdivacky uint64_t FindElementAndOffset(const Type *&T, uint64_t &Offset, 112201360Srdivacky const Type *&IdxTy); 113193323Sed 114198892Srdivacky void DoScalarReplacement(AllocaInst *AI, 115198892Srdivacky std::vector<AllocaInst*> &WorkList); 116201360Srdivacky void DeleteDeadInstructions(); 117198892Srdivacky AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocaInst *Base); 118193323Sed 119201360Srdivacky void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, 120201360Srdivacky SmallVector<AllocaInst*, 32> &NewElts); 121201360Srdivacky void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, 122201360Srdivacky SmallVector<AllocaInst*, 32> &NewElts); 123201360Srdivacky void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, 124201360Srdivacky SmallVector<AllocaInst*, 32> &NewElts); 125201360Srdivacky void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, 126198892Srdivacky AllocaInst *AI, 127193323Sed SmallVector<AllocaInst*, 32> &NewElts); 128198892Srdivacky void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, 129193323Sed SmallVector<AllocaInst*, 32> &NewElts); 130198892Srdivacky void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, 131193323Sed SmallVector<AllocaInst*, 32> &NewElts); 132193323Sed 133193323Sed bool CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy, 134193323Sed bool &SawVec, uint64_t Offset, unsigned AllocaSize); 135193323Sed void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset); 136193323Sed Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType, 137193323Sed uint64_t Offset, IRBuilder<> &Builder); 138193323Sed Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal, 139193323Sed uint64_t Offset, IRBuilder<> &Builder); 140198892Srdivacky static Instruction *isOnlyCopiedFromConstantGlobal(AllocaInst *AI); 141193323Sed }; 142193323Sed} 143193323Sed 144193323Sedchar SROA::ID = 0; 145193323Sedstatic RegisterPass<SROA> X("scalarrepl", "Scalar Replacement of Aggregates"); 146193323Sed 147193323Sed// Public interface to the ScalarReplAggregates pass 148193323SedFunctionPass *llvm::createScalarReplAggregatesPass(signed int Threshold) { 149193323Sed return new SROA(Threshold); 150193323Sed} 151193323Sed 152193323Sed 153193323Sedbool SROA::runOnFunction(Function &F) { 154198090Srdivacky TD = getAnalysisIfAvailable<TargetData>(); 155198090Srdivacky 156193323Sed bool Changed = performPromotion(F); 157198090Srdivacky 158198090Srdivacky // FIXME: ScalarRepl currently depends on TargetData more than it 159198090Srdivacky // theoretically needs to. It should be refactored in order to support 160198090Srdivacky // target-independent IR. Until this is done, just skip the actual 161198090Srdivacky // scalar-replacement portion of this pass. 162198090Srdivacky if (!TD) return Changed; 163198090Srdivacky 164193323Sed while (1) { 165193323Sed bool LocalChange = performScalarRepl(F); 166193323Sed if (!LocalChange) break; // No need to repromote if no scalarrepl 167193323Sed Changed = true; 168193323Sed LocalChange = performPromotion(F); 169193323Sed if (!LocalChange) break; // No need to re-scalarrepl if no promotion 170193323Sed } 171193323Sed 172193323Sed return Changed; 173193323Sed} 174193323Sed 175193323Sed 176193323Sedbool SROA::performPromotion(Function &F) { 177193323Sed std::vector<AllocaInst*> Allocas; 178193323Sed DominatorTree &DT = getAnalysis<DominatorTree>(); 179193323Sed DominanceFrontier &DF = getAnalysis<DominanceFrontier>(); 180193323Sed 181193323Sed BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function 182193323Sed 183193323Sed bool Changed = false; 184193323Sed 185193323Sed while (1) { 186193323Sed Allocas.clear(); 187193323Sed 188193323Sed // Find allocas that are safe to promote, by looking at all instructions in 189193323Sed // the entry node 190193323Sed for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) 191193323Sed if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca? 192193323Sed if (isAllocaPromotable(AI)) 193193323Sed Allocas.push_back(AI); 194193323Sed 195193323Sed if (Allocas.empty()) break; 196193323Sed 197199989Srdivacky PromoteMemToReg(Allocas, DT, DF); 198193323Sed NumPromoted += Allocas.size(); 199193323Sed Changed = true; 200193323Sed } 201193323Sed 202193323Sed return Changed; 203193323Sed} 204193323Sed 205203954Srdivacky/// ShouldAttemptScalarRepl - Decide if an alloca is a good candidate for 206203954Srdivacky/// SROA. It must be a struct or array type with a small number of elements. 207203954Srdivackystatic bool ShouldAttemptScalarRepl(AllocaInst *AI) { 208203954Srdivacky const Type *T = AI->getAllocatedType(); 209203954Srdivacky // Do not promote any struct into more than 32 separate vars. 210193323Sed if (const StructType *ST = dyn_cast<StructType>(T)) 211203954Srdivacky return ST->getNumElements() <= 32; 212203954Srdivacky // Arrays are much less likely to be safe for SROA; only consider 213203954Srdivacky // them if they are very small. 214203954Srdivacky if (const ArrayType *AT = dyn_cast<ArrayType>(T)) 215203954Srdivacky return AT->getNumElements() <= 8; 216203954Srdivacky return false; 217193323Sed} 218193323Sed 219193323Sed// performScalarRepl - This algorithm is a simple worklist driven algorithm, 220193323Sed// which runs on all of the malloc/alloca instructions in the function, removing 221193323Sed// them if they are only used by getelementptr instructions. 222193323Sed// 223193323Sedbool SROA::performScalarRepl(Function &F) { 224198892Srdivacky std::vector<AllocaInst*> WorkList; 225193323Sed 226193323Sed // Scan the entry basic block, adding any alloca's and mallocs to the worklist 227193323Sed BasicBlock &BB = F.getEntryBlock(); 228193323Sed for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) 229198892Srdivacky if (AllocaInst *A = dyn_cast<AllocaInst>(I)) 230193323Sed WorkList.push_back(A); 231193323Sed 232193323Sed // Process the worklist 233193323Sed bool Changed = false; 234193323Sed while (!WorkList.empty()) { 235198892Srdivacky AllocaInst *AI = WorkList.back(); 236193323Sed WorkList.pop_back(); 237193323Sed 238193323Sed // Handle dead allocas trivially. These can be formed by SROA'ing arrays 239193323Sed // with unused elements. 240193323Sed if (AI->use_empty()) { 241193323Sed AI->eraseFromParent(); 242193323Sed continue; 243193323Sed } 244193323Sed 245193323Sed // If this alloca is impossible for us to promote, reject it early. 246193323Sed if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized()) 247193323Sed continue; 248193323Sed 249193323Sed // Check to see if this allocation is only modified by a memcpy/memmove from 250193323Sed // a constant global. If this is the case, we can change all users to use 251193323Sed // the constant global instead. This is commonly produced by the CFE by 252193323Sed // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' 253193323Sed // is only subsequently read. 254193323Sed if (Instruction *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) { 255202375Srdivacky DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n'); 256202375Srdivacky DEBUG(dbgs() << " memcpy = " << *TheCopy << '\n'); 257193323Sed Constant *TheSrc = cast<Constant>(TheCopy->getOperand(2)); 258198090Srdivacky AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType())); 259193323Sed TheCopy->eraseFromParent(); // Don't mutate the global. 260193323Sed AI->eraseFromParent(); 261193323Sed ++NumGlobals; 262193323Sed Changed = true; 263193323Sed continue; 264193323Sed } 265193323Sed 266193323Sed // Check to see if we can perform the core SROA transformation. We cannot 267193323Sed // transform the allocation instruction if it is an array allocation 268193323Sed // (allocations OF arrays are ok though), and an allocation of a scalar 269193323Sed // value cannot be decomposed at all. 270193323Sed uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType()); 271193323Sed 272198090Srdivacky // Do not promote [0 x %struct]. 273198090Srdivacky if (AllocaSize == 0) continue; 274198090Srdivacky 275203954Srdivacky // If the alloca looks like a good candidate for scalar replacement, and if 276203954Srdivacky // all its users can be transformed, then split up the aggregate into its 277203954Srdivacky // separate elements. 278203954Srdivacky if (ShouldAttemptScalarRepl(AI) && isSafeAllocaToScalarRepl(AI)) { 279203954Srdivacky DoScalarReplacement(AI, WorkList); 280203954Srdivacky Changed = true; 281203954Srdivacky continue; 282203954Srdivacky } 283203954Srdivacky 284193323Sed // Do not promote any struct whose size is too big. 285193323Sed if (AllocaSize > SRThreshold) continue; 286198090Srdivacky 287193323Sed // If we can turn this aggregate value (potentially with casts) into a 288193323Sed // simple scalar value that can be mem2reg'd into a register value. 289193323Sed // IsNotTrivial tracks whether this is something that mem2reg could have 290193323Sed // promoted itself. If so, we don't want to transform it needlessly. Note 291193323Sed // that we can't just check based on the type: the alloca may be of an i32 292193323Sed // but that has pointer arithmetic to set byte 3 of it or something. 293193323Sed bool IsNotTrivial = false; 294193323Sed const Type *VectorTy = 0; 295193323Sed bool HadAVector = false; 296193323Sed if (CanConvertToScalar(AI, IsNotTrivial, VectorTy, HadAVector, 297193323Sed 0, unsigned(AllocaSize)) && IsNotTrivial) { 298193323Sed AllocaInst *NewAI; 299193323Sed // If we were able to find a vector type that can handle this with 300193323Sed // insert/extract elements, and if there was at least one use that had 301193323Sed // a vector type, promote this to a vector. We don't want to promote 302193323Sed // random stuff that doesn't use vectors (e.g. <9 x double>) because then 303193323Sed // we just get a lot of insert/extracts. If at least one vector is 304193323Sed // involved, then we probably really do have a union of vector/array. 305204642Srdivacky if (VectorTy && VectorTy->isVectorTy() && HadAVector) { 306202375Srdivacky DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = " 307198090Srdivacky << *VectorTy << '\n'); 308193323Sed 309193323Sed // Create and insert the vector alloca. 310198090Srdivacky NewAI = new AllocaInst(VectorTy, 0, "", AI->getParent()->begin()); 311193323Sed ConvertUsesToScalar(AI, NewAI, 0); 312193323Sed } else { 313202375Srdivacky DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n"); 314193323Sed 315193323Sed // Create and insert the integer alloca. 316198090Srdivacky const Type *NewTy = IntegerType::get(AI->getContext(), AllocaSize*8); 317193323Sed NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin()); 318193323Sed ConvertUsesToScalar(AI, NewAI, 0); 319193323Sed } 320193323Sed NewAI->takeName(AI); 321193323Sed AI->eraseFromParent(); 322193323Sed ++NumConverted; 323193323Sed Changed = true; 324193323Sed continue; 325193323Sed } 326193323Sed 327193323Sed // Otherwise, couldn't process this alloca. 328193323Sed } 329193323Sed 330193323Sed return Changed; 331193323Sed} 332193323Sed 333193323Sed/// DoScalarReplacement - This alloca satisfied the isSafeAllocaToScalarRepl 334193323Sed/// predicate, do SROA now. 335198892Srdivackyvoid SROA::DoScalarReplacement(AllocaInst *AI, 336198892Srdivacky std::vector<AllocaInst*> &WorkList) { 337202375Srdivacky DEBUG(dbgs() << "Found inst to SROA: " << *AI << '\n'); 338193323Sed SmallVector<AllocaInst*, 32> ElementAllocas; 339193323Sed if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) { 340193323Sed ElementAllocas.reserve(ST->getNumContainedTypes()); 341193323Sed for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) { 342193323Sed AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0, 343193323Sed AI->getAlignment(), 344198090Srdivacky AI->getName() + "." + Twine(i), AI); 345193323Sed ElementAllocas.push_back(NA); 346193323Sed WorkList.push_back(NA); // Add to worklist for recursive processing 347193323Sed } 348193323Sed } else { 349193323Sed const ArrayType *AT = cast<ArrayType>(AI->getAllocatedType()); 350193323Sed ElementAllocas.reserve(AT->getNumElements()); 351193323Sed const Type *ElTy = AT->getElementType(); 352193323Sed for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { 353193323Sed AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(), 354198090Srdivacky AI->getName() + "." + Twine(i), AI); 355193323Sed ElementAllocas.push_back(NA); 356193323Sed WorkList.push_back(NA); // Add to worklist for recursive processing 357193323Sed } 358193323Sed } 359193323Sed 360201360Srdivacky // Now that we have created the new alloca instructions, rewrite all the 361201360Srdivacky // uses of the old alloca. 362201360Srdivacky RewriteForScalarRepl(AI, AI, 0, ElementAllocas); 363193323Sed 364201360Srdivacky // Now erase any instructions that were made dead while rewriting the alloca. 365201360Srdivacky DeleteDeadInstructions(); 366201360Srdivacky AI->eraseFromParent(); 367193323Sed 368201360Srdivacky NumReplaced++; 369201360Srdivacky} 370193323Sed 371201360Srdivacky/// DeleteDeadInstructions - Erase instructions on the DeadInstrs list, 372201360Srdivacky/// recursively including all their operands that become trivially dead. 373201360Srdivackyvoid SROA::DeleteDeadInstructions() { 374201360Srdivacky while (!DeadInsts.empty()) { 375201360Srdivacky Instruction *I = cast<Instruction>(DeadInsts.pop_back_val()); 376193323Sed 377201360Srdivacky for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) 378201360Srdivacky if (Instruction *U = dyn_cast<Instruction>(*OI)) { 379201360Srdivacky // Zero out the operand and see if it becomes trivially dead. 380201360Srdivacky // (But, don't add allocas to the dead instruction list -- they are 381201360Srdivacky // already on the worklist and will be deleted separately.) 382201360Srdivacky *OI = 0; 383201360Srdivacky if (isInstructionTriviallyDead(U) && !isa<AllocaInst>(U)) 384201360Srdivacky DeadInsts.push_back(U); 385201360Srdivacky } 386201360Srdivacky 387201360Srdivacky I->eraseFromParent(); 388193323Sed } 389193323Sed} 390201360Srdivacky 391201360Srdivacky/// isSafeForScalarRepl - Check if instruction I is a safe use with regard to 392201360Srdivacky/// performing scalar replacement of alloca AI. The results are flagged in 393201360Srdivacky/// the Info parameter. Offset indicates the position within AI that is 394201360Srdivacky/// referenced by this instruction. 395201360Srdivackyvoid SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, 396201360Srdivacky AllocaInfo &Info) { 397201360Srdivacky for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { 398201360Srdivacky Instruction *User = cast<Instruction>(*UI); 399193323Sed 400201360Srdivacky if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) { 401201360Srdivacky isSafeForScalarRepl(BC, AI, Offset, Info); 402201360Srdivacky } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) { 403201360Srdivacky uint64_t GEPOffset = Offset; 404201360Srdivacky isSafeGEP(GEPI, AI, GEPOffset, Info); 405201360Srdivacky if (!Info.isUnsafe) 406201360Srdivacky isSafeForScalarRepl(GEPI, AI, GEPOffset, Info); 407201360Srdivacky } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(UI)) { 408201360Srdivacky ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength()); 409201360Srdivacky if (Length) 410201360Srdivacky isSafeMemAccess(AI, Offset, Length->getZExtValue(), 0, 411201360Srdivacky UI.getOperandNo() == 1, Info); 412201360Srdivacky else 413201360Srdivacky MarkUnsafe(Info); 414201360Srdivacky } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 415201360Srdivacky if (!LI->isVolatile()) { 416201360Srdivacky const Type *LIType = LI->getType(); 417201360Srdivacky isSafeMemAccess(AI, Offset, TD->getTypeAllocSize(LIType), 418201360Srdivacky LIType, false, Info); 419201360Srdivacky } else 420201360Srdivacky MarkUnsafe(Info); 421201360Srdivacky } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 422193323Sed // Store is ok if storing INTO the pointer, not storing the pointer 423201360Srdivacky if (!SI->isVolatile() && SI->getOperand(0) != I) { 424201360Srdivacky const Type *SIType = SI->getOperand(0)->getType(); 425201360Srdivacky isSafeMemAccess(AI, Offset, TD->getTypeAllocSize(SIType), 426201360Srdivacky SIType, true, Info); 427201360Srdivacky } else 428201360Srdivacky MarkUnsafe(Info); 429201360Srdivacky } else { 430198090Srdivacky DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); 431201360Srdivacky MarkUnsafe(Info); 432193323Sed } 433201360Srdivacky if (Info.isUnsafe) return; 434193323Sed } 435193323Sed} 436193323Sed 437201360Srdivacky/// isSafeGEP - Check if a GEP instruction can be handled for scalar 438201360Srdivacky/// replacement. It is safe when all the indices are constant, in-bounds 439201360Srdivacky/// references, and when the resulting offset corresponds to an element within 440201360Srdivacky/// the alloca type. The results are flagged in the Info parameter. Upon 441201360Srdivacky/// return, Offset is adjusted as specified by the GEP indices. 442201360Srdivackyvoid SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, 443201360Srdivacky uint64_t &Offset, AllocaInfo &Info) { 444201360Srdivacky gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI); 445201360Srdivacky if (GEPIt == E) 446201360Srdivacky return; 447193323Sed 448201360Srdivacky // Walk through the GEP type indices, checking the types that this indexes 449201360Srdivacky // into. 450201360Srdivacky for (; GEPIt != E; ++GEPIt) { 451201360Srdivacky // Ignore struct elements, no extra checking needed for these. 452204642Srdivacky if ((*GEPIt)->isStructTy()) 453201360Srdivacky continue; 454193323Sed 455201360Srdivacky ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand()); 456201360Srdivacky if (!IdxVal) 457201360Srdivacky return MarkUnsafe(Info); 458193323Sed } 459193323Sed 460201360Srdivacky // Compute the offset due to this GEP and check if the alloca has a 461201360Srdivacky // component element at that offset. 462201360Srdivacky SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end()); 463201360Srdivacky Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), 464201360Srdivacky &Indices[0], Indices.size()); 465201360Srdivacky if (!TypeHasComponent(AI->getAllocatedType(), Offset, 0)) 466201360Srdivacky MarkUnsafe(Info); 467201360Srdivacky} 468193323Sed 469201360Srdivacky/// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI 470201360Srdivacky/// alloca or has an offset and size that corresponds to a component element 471201360Srdivacky/// within it. The offset checked here may have been formed from a GEP with a 472201360Srdivacky/// pointer bitcasted to a different type. 473201360Srdivackyvoid SROA::isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t MemSize, 474201360Srdivacky const Type *MemOpType, bool isStore, 475201360Srdivacky AllocaInfo &Info) { 476201360Srdivacky // Check if this is a load/store of the entire alloca. 477201360Srdivacky if (Offset == 0 && MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) { 478201360Srdivacky bool UsesAggregateType = (MemOpType == AI->getAllocatedType()); 479201360Srdivacky // This is safe for MemIntrinsics (where MemOpType is 0), integer types 480201360Srdivacky // (which are essentially the same as the MemIntrinsics, especially with 481201360Srdivacky // regard to copying padding between elements), or references using the 482201360Srdivacky // aggregate type of the alloca. 483204642Srdivacky if (!MemOpType || MemOpType->isIntegerTy() || UsesAggregateType) { 484201360Srdivacky if (!UsesAggregateType) { 485201360Srdivacky if (isStore) 486201360Srdivacky Info.isMemCpyDst = true; 487201360Srdivacky else 488201360Srdivacky Info.isMemCpySrc = true; 489193323Sed } 490201360Srdivacky return; 491193323Sed } 492193323Sed } 493201360Srdivacky // Check if the offset/size correspond to a component within the alloca type. 494201360Srdivacky const Type *T = AI->getAllocatedType(); 495201360Srdivacky if (TypeHasComponent(T, Offset, MemSize)) 496201360Srdivacky return; 497193323Sed 498201360Srdivacky return MarkUnsafe(Info); 499193323Sed} 500193323Sed 501201360Srdivacky/// TypeHasComponent - Return true if T has a component type with the 502201360Srdivacky/// specified offset and size. If Size is zero, do not check the size. 503201360Srdivackybool SROA::TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size) { 504201360Srdivacky const Type *EltTy; 505201360Srdivacky uint64_t EltSize; 506201360Srdivacky if (const StructType *ST = dyn_cast<StructType>(T)) { 507201360Srdivacky const StructLayout *Layout = TD->getStructLayout(ST); 508201360Srdivacky unsigned EltIdx = Layout->getElementContainingOffset(Offset); 509201360Srdivacky EltTy = ST->getContainedType(EltIdx); 510201360Srdivacky EltSize = TD->getTypeAllocSize(EltTy); 511201360Srdivacky Offset -= Layout->getElementOffset(EltIdx); 512201360Srdivacky } else if (const ArrayType *AT = dyn_cast<ArrayType>(T)) { 513201360Srdivacky EltTy = AT->getElementType(); 514201360Srdivacky EltSize = TD->getTypeAllocSize(EltTy); 515201360Srdivacky if (Offset >= AT->getNumElements() * EltSize) 516201360Srdivacky return false; 517201360Srdivacky Offset %= EltSize; 518201360Srdivacky } else { 519201360Srdivacky return false; 520193323Sed } 521201360Srdivacky if (Offset == 0 && (Size == 0 || EltSize == Size)) 522201360Srdivacky return true; 523201360Srdivacky // Check if the component spans multiple elements. 524201360Srdivacky if (Offset + Size > EltSize) 525201360Srdivacky return false; 526201360Srdivacky return TypeHasComponent(EltTy, Offset, Size); 527193323Sed} 528193323Sed 529201360Srdivacky/// RewriteForScalarRepl - Alloca AI is being split into NewElts, so rewrite 530201360Srdivacky/// the instruction I, which references it, to use the separate elements. 531201360Srdivacky/// Offset indicates the position within AI that is referenced by this 532201360Srdivacky/// instruction. 533201360Srdivackyvoid SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset, 534201360Srdivacky SmallVector<AllocaInst*, 32> &NewElts) { 535201360Srdivacky for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) { 536201360Srdivacky Instruction *User = cast<Instruction>(*UI); 537193323Sed 538201360Srdivacky if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) { 539201360Srdivacky RewriteBitCast(BC, AI, Offset, NewElts); 540201360Srdivacky } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) { 541201360Srdivacky RewriteGEP(GEPI, AI, Offset, NewElts); 542201360Srdivacky } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) { 543201360Srdivacky ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength()); 544201360Srdivacky uint64_t MemSize = Length->getZExtValue(); 545201360Srdivacky if (Offset == 0 && 546201360Srdivacky MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) 547201360Srdivacky RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts); 548201360Srdivacky // Otherwise the intrinsic can only touch a single element and the 549201360Srdivacky // address operand will be updated, so nothing else needs to be done. 550201360Srdivacky } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 551201360Srdivacky const Type *LIType = LI->getType(); 552201360Srdivacky if (LIType == AI->getAllocatedType()) { 553201360Srdivacky // Replace: 554201360Srdivacky // %res = load { i32, i32 }* %alloc 555201360Srdivacky // with: 556201360Srdivacky // %load.0 = load i32* %alloc.0 557201360Srdivacky // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0 558201360Srdivacky // %load.1 = load i32* %alloc.1 559201360Srdivacky // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 560201360Srdivacky // (Also works for arrays instead of structs) 561201360Srdivacky Value *Insert = UndefValue::get(LIType); 562201360Srdivacky for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 563201360Srdivacky Value *Load = new LoadInst(NewElts[i], "load", LI); 564201360Srdivacky Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI); 565201360Srdivacky } 566201360Srdivacky LI->replaceAllUsesWith(Insert); 567201360Srdivacky DeadInsts.push_back(LI); 568204642Srdivacky } else if (LIType->isIntegerTy() && 569201360Srdivacky TD->getTypeAllocSize(LIType) == 570201360Srdivacky TD->getTypeAllocSize(AI->getAllocatedType())) { 571201360Srdivacky // If this is a load of the entire alloca to an integer, rewrite it. 572201360Srdivacky RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); 573193323Sed } 574201360Srdivacky } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 575201360Srdivacky Value *Val = SI->getOperand(0); 576201360Srdivacky const Type *SIType = Val->getType(); 577201360Srdivacky if (SIType == AI->getAllocatedType()) { 578201360Srdivacky // Replace: 579201360Srdivacky // store { i32, i32 } %val, { i32, i32 }* %alloc 580201360Srdivacky // with: 581201360Srdivacky // %val.0 = extractvalue { i32, i32 } %val, 0 582201360Srdivacky // store i32 %val.0, i32* %alloc.0 583201360Srdivacky // %val.1 = extractvalue { i32, i32 } %val, 1 584201360Srdivacky // store i32 %val.1, i32* %alloc.1 585201360Srdivacky // (Also works for arrays instead of structs) 586201360Srdivacky for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 587201360Srdivacky Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI); 588201360Srdivacky new StoreInst(Extract, NewElts[i], SI); 589201360Srdivacky } 590201360Srdivacky DeadInsts.push_back(SI); 591204642Srdivacky } else if (SIType->isIntegerTy() && 592201360Srdivacky TD->getTypeAllocSize(SIType) == 593201360Srdivacky TD->getTypeAllocSize(AI->getAllocatedType())) { 594201360Srdivacky // If this is a store of the entire alloca from an integer, rewrite it. 595201360Srdivacky RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); 596193323Sed } 597193323Sed } 598193323Sed } 599193323Sed} 600193323Sed 601201360Srdivacky/// RewriteBitCast - Update a bitcast reference to the alloca being replaced 602201360Srdivacky/// and recursively continue updating all of its uses. 603201360Srdivackyvoid SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, 604201360Srdivacky SmallVector<AllocaInst*, 32> &NewElts) { 605201360Srdivacky RewriteForScalarRepl(BC, AI, Offset, NewElts); 606201360Srdivacky if (BC->getOperand(0) != AI) 607201360Srdivacky return; 608193323Sed 609201360Srdivacky // The bitcast references the original alloca. Replace its uses with 610201360Srdivacky // references to the first new element alloca. 611201360Srdivacky Instruction *Val = NewElts[0]; 612201360Srdivacky if (Val->getType() != BC->getDestTy()) { 613201360Srdivacky Val = new BitCastInst(Val, BC->getDestTy(), "", BC); 614201360Srdivacky Val->takeName(BC); 615201360Srdivacky } 616201360Srdivacky BC->replaceAllUsesWith(Val); 617201360Srdivacky DeadInsts.push_back(BC); 618201360Srdivacky} 619193323Sed 620201360Srdivacky/// FindElementAndOffset - Return the index of the element containing Offset 621201360Srdivacky/// within the specified type, which must be either a struct or an array. 622201360Srdivacky/// Sets T to the type of the element and Offset to the offset within that 623201360Srdivacky/// element. IdxTy is set to the type of the index result to be used in a 624201360Srdivacky/// GEP instruction. 625201360Srdivackyuint64_t SROA::FindElementAndOffset(const Type *&T, uint64_t &Offset, 626201360Srdivacky const Type *&IdxTy) { 627201360Srdivacky uint64_t Idx = 0; 628201360Srdivacky if (const StructType *ST = dyn_cast<StructType>(T)) { 629201360Srdivacky const StructLayout *Layout = TD->getStructLayout(ST); 630201360Srdivacky Idx = Layout->getElementContainingOffset(Offset); 631201360Srdivacky T = ST->getContainedType(Idx); 632201360Srdivacky Offset -= Layout->getElementOffset(Idx); 633201360Srdivacky IdxTy = Type::getInt32Ty(T->getContext()); 634201360Srdivacky return Idx; 635193323Sed } 636201360Srdivacky const ArrayType *AT = cast<ArrayType>(T); 637201360Srdivacky T = AT->getElementType(); 638201360Srdivacky uint64_t EltSize = TD->getTypeAllocSize(T); 639201360Srdivacky Idx = Offset / EltSize; 640201360Srdivacky Offset -= Idx * EltSize; 641201360Srdivacky IdxTy = Type::getInt64Ty(T->getContext()); 642201360Srdivacky return Idx; 643193323Sed} 644193323Sed 645201360Srdivacky/// RewriteGEP - Check if this GEP instruction moves the pointer across 646201360Srdivacky/// elements of the alloca that are being split apart, and if so, rewrite 647201360Srdivacky/// the GEP to be relative to the new element. 648201360Srdivackyvoid SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset, 649201360Srdivacky SmallVector<AllocaInst*, 32> &NewElts) { 650201360Srdivacky uint64_t OldOffset = Offset; 651201360Srdivacky SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end()); 652201360Srdivacky Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), 653201360Srdivacky &Indices[0], Indices.size()); 654201360Srdivacky 655201360Srdivacky RewriteForScalarRepl(GEPI, AI, Offset, NewElts); 656201360Srdivacky 657201360Srdivacky const Type *T = AI->getAllocatedType(); 658201360Srdivacky const Type *IdxTy; 659201360Srdivacky uint64_t OldIdx = FindElementAndOffset(T, OldOffset, IdxTy); 660201360Srdivacky if (GEPI->getOperand(0) == AI) 661201360Srdivacky OldIdx = ~0ULL; // Force the GEP to be rewritten. 662201360Srdivacky 663201360Srdivacky T = AI->getAllocatedType(); 664201360Srdivacky uint64_t EltOffset = Offset; 665201360Srdivacky uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy); 666201360Srdivacky 667201360Srdivacky // If this GEP does not move the pointer across elements of the alloca 668201360Srdivacky // being split, then it does not needs to be rewritten. 669201360Srdivacky if (Idx == OldIdx) 670201360Srdivacky return; 671201360Srdivacky 672201360Srdivacky const Type *i32Ty = Type::getInt32Ty(AI->getContext()); 673201360Srdivacky SmallVector<Value*, 8> NewArgs; 674201360Srdivacky NewArgs.push_back(Constant::getNullValue(i32Ty)); 675201360Srdivacky while (EltOffset != 0) { 676201360Srdivacky uint64_t EltIdx = FindElementAndOffset(T, EltOffset, IdxTy); 677201360Srdivacky NewArgs.push_back(ConstantInt::get(IdxTy, EltIdx)); 678201360Srdivacky } 679201360Srdivacky Instruction *Val = NewElts[Idx]; 680201360Srdivacky if (NewArgs.size() > 1) { 681201360Srdivacky Val = GetElementPtrInst::CreateInBounds(Val, NewArgs.begin(), 682201360Srdivacky NewArgs.end(), "", GEPI); 683201360Srdivacky Val->takeName(GEPI); 684201360Srdivacky } 685201360Srdivacky if (Val->getType() != GEPI->getType()) 686203954Srdivacky Val = new BitCastInst(Val, GEPI->getType(), Val->getName(), GEPI); 687201360Srdivacky GEPI->replaceAllUsesWith(Val); 688201360Srdivacky DeadInsts.push_back(GEPI); 689201360Srdivacky} 690201360Srdivacky 691193323Sed/// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI. 692193323Sed/// Rewrite it to copy or set the elements of the scalarized memory. 693201360Srdivackyvoid SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, 694198892Srdivacky AllocaInst *AI, 695193323Sed SmallVector<AllocaInst*, 32> &NewElts) { 696193323Sed // If this is a memcpy/memmove, construct the other pointer as the 697193323Sed // appropriate type. The "Other" pointer is the pointer that goes to memory 698193323Sed // that doesn't have anything to do with the alloca that we are promoting. For 699193323Sed // memset, this Value* stays null. 700193323Sed Value *OtherPtr = 0; 701198090Srdivacky LLVMContext &Context = MI->getContext(); 702193323Sed unsigned MemAlignment = MI->getAlignment(); 703193323Sed if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy 704201360Srdivacky if (Inst == MTI->getRawDest()) 705193323Sed OtherPtr = MTI->getRawSource(); 706193323Sed else { 707201360Srdivacky assert(Inst == MTI->getRawSource()); 708193323Sed OtherPtr = MTI->getRawDest(); 709193323Sed } 710193323Sed } 711200581Srdivacky 712193323Sed // If there is an other pointer, we want to convert it to the same pointer 713193323Sed // type as AI has, so we can GEP through it safely. 714193323Sed if (OtherPtr) { 715201360Srdivacky 716201360Srdivacky // Remove bitcasts and all-zero GEPs from OtherPtr. This is an 717201360Srdivacky // optimization, but it's also required to detect the corner case where 718201360Srdivacky // both pointer operands are referencing the same memory, and where 719201360Srdivacky // OtherPtr may be a bitcast or GEP that currently being rewritten. (This 720201360Srdivacky // function is only called for mem intrinsics that access the whole 721201360Srdivacky // aggregate, so non-zero GEPs are not an issue here.) 722201360Srdivacky while (1) { 723201360Srdivacky if (BitCastInst *BC = dyn_cast<BitCastInst>(OtherPtr)) { 724201360Srdivacky OtherPtr = BC->getOperand(0); 725201360Srdivacky continue; 726201360Srdivacky } 727201360Srdivacky if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(OtherPtr)) { 728201360Srdivacky // All zero GEPs are effectively bitcasts. 729201360Srdivacky if (GEP->hasAllZeroIndices()) { 730201360Srdivacky OtherPtr = GEP->getOperand(0); 731201360Srdivacky continue; 732201360Srdivacky } 733201360Srdivacky } 734201360Srdivacky break; 735201360Srdivacky } 736202878Srdivacky // Copying the alloca to itself is a no-op: just delete it. 737202878Srdivacky if (OtherPtr == AI || OtherPtr == NewElts[0]) { 738202878Srdivacky // This code will run twice for a no-op memcpy -- once for each operand. 739202878Srdivacky // Put only one reference to MI on the DeadInsts list. 740202878Srdivacky for (SmallVector<Value*, 32>::const_iterator I = DeadInsts.begin(), 741202878Srdivacky E = DeadInsts.end(); I != E; ++I) 742202878Srdivacky if (*I == MI) return; 743202878Srdivacky DeadInsts.push_back(MI); 744201360Srdivacky return; 745202878Srdivacky } 746193323Sed 747193323Sed if (ConstantExpr *BCE = dyn_cast<ConstantExpr>(OtherPtr)) 748193323Sed if (BCE->getOpcode() == Instruction::BitCast) 749193323Sed OtherPtr = BCE->getOperand(0); 750193323Sed 751193323Sed // If the pointer is not the right type, insert a bitcast to the right 752193323Sed // type. 753193323Sed if (OtherPtr->getType() != AI->getType()) 754193323Sed OtherPtr = new BitCastInst(OtherPtr, AI->getType(), OtherPtr->getName(), 755193323Sed MI); 756193323Sed } 757193323Sed 758193323Sed // Process each element of the aggregate. 759193323Sed Value *TheFn = MI->getOperand(0); 760193323Sed const Type *BytePtrTy = MI->getRawDest()->getType(); 761201360Srdivacky bool SROADest = MI->getRawDest() == Inst; 762193323Sed 763198090Srdivacky Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext())); 764193323Sed 765193323Sed for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 766193323Sed // If this is a memcpy/memmove, emit a GEP of the other element address. 767193323Sed Value *OtherElt = 0; 768193323Sed unsigned OtherEltAlign = MemAlignment; 769193323Sed 770202878Srdivacky if (OtherPtr) { 771198090Srdivacky Value *Idx[2] = { Zero, 772198090Srdivacky ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) }; 773201360Srdivacky OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, Idx + 2, 774203954Srdivacky OtherPtr->getName()+"."+Twine(i), 775201360Srdivacky MI); 776193323Sed uint64_t EltOffset; 777193323Sed const PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType()); 778193323Sed if (const StructType *ST = 779193323Sed dyn_cast<StructType>(OtherPtrTy->getElementType())) { 780193323Sed EltOffset = TD->getStructLayout(ST)->getElementOffset(i); 781193323Sed } else { 782193323Sed const Type *EltTy = 783193323Sed cast<SequentialType>(OtherPtr->getType())->getElementType(); 784193323Sed EltOffset = TD->getTypeAllocSize(EltTy)*i; 785193323Sed } 786193323Sed 787193323Sed // The alignment of the other pointer is the guaranteed alignment of the 788193323Sed // element, which is affected by both the known alignment of the whole 789193323Sed // mem intrinsic and the alignment of the element. If the alignment of 790193323Sed // the memcpy (f.e.) is 32 but the element is at a 4-byte offset, then the 791193323Sed // known alignment is just 4 bytes. 792193323Sed OtherEltAlign = (unsigned)MinAlign(OtherEltAlign, EltOffset); 793193323Sed } 794193323Sed 795193323Sed Value *EltPtr = NewElts[i]; 796193323Sed const Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType(); 797193323Sed 798193323Sed // If we got down to a scalar, insert a load or store as appropriate. 799193323Sed if (EltTy->isSingleValueType()) { 800193323Sed if (isa<MemTransferInst>(MI)) { 801193323Sed if (SROADest) { 802193323Sed // From Other to Alloca. 803193323Sed Value *Elt = new LoadInst(OtherElt, "tmp", false, OtherEltAlign, MI); 804193323Sed new StoreInst(Elt, EltPtr, MI); 805193323Sed } else { 806193323Sed // From Alloca to Other. 807193323Sed Value *Elt = new LoadInst(EltPtr, "tmp", MI); 808193323Sed new StoreInst(Elt, OtherElt, false, OtherEltAlign, MI); 809193323Sed } 810193323Sed continue; 811193323Sed } 812193323Sed assert(isa<MemSetInst>(MI)); 813193323Sed 814193323Sed // If the stored element is zero (common case), just store a null 815193323Sed // constant. 816193323Sed Constant *StoreVal; 817193323Sed if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getOperand(2))) { 818193323Sed if (CI->isZero()) { 819198090Srdivacky StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0> 820193323Sed } else { 821193323Sed // If EltTy is a vector type, get the element type. 822194612Sed const Type *ValTy = EltTy->getScalarType(); 823194612Sed 824193323Sed // Construct an integer with the right value. 825193323Sed unsigned EltSize = TD->getTypeSizeInBits(ValTy); 826193323Sed APInt OneVal(EltSize, CI->getZExtValue()); 827193323Sed APInt TotalVal(OneVal); 828193323Sed // Set each byte. 829193323Sed for (unsigned i = 0; 8*i < EltSize; ++i) { 830193323Sed TotalVal = TotalVal.shl(8); 831193323Sed TotalVal |= OneVal; 832193323Sed } 833193323Sed 834193323Sed // Convert the integer value to the appropriate type. 835198090Srdivacky StoreVal = ConstantInt::get(Context, TotalVal); 836204642Srdivacky if (ValTy->isPointerTy()) 837198090Srdivacky StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy); 838203954Srdivacky else if (ValTy->isFloatingPointTy()) 839198090Srdivacky StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy); 840193323Sed assert(StoreVal->getType() == ValTy && "Type mismatch!"); 841193323Sed 842193323Sed // If the requested value was a vector constant, create it. 843193323Sed if (EltTy != ValTy) { 844193323Sed unsigned NumElts = cast<VectorType>(ValTy)->getNumElements(); 845193323Sed SmallVector<Constant*, 16> Elts(NumElts, StoreVal); 846198090Srdivacky StoreVal = ConstantVector::get(&Elts[0], NumElts); 847193323Sed } 848193323Sed } 849193323Sed new StoreInst(StoreVal, EltPtr, MI); 850193323Sed continue; 851193323Sed } 852193323Sed // Otherwise, if we're storing a byte variable, use a memset call for 853193323Sed // this element. 854193323Sed } 855193323Sed 856193323Sed // Cast the element pointer to BytePtrTy. 857193323Sed if (EltPtr->getType() != BytePtrTy) 858203954Srdivacky EltPtr = new BitCastInst(EltPtr, BytePtrTy, EltPtr->getName(), MI); 859193323Sed 860193323Sed // Cast the other pointer (if we have one) to BytePtrTy. 861193323Sed if (OtherElt && OtherElt->getType() != BytePtrTy) 862203954Srdivacky OtherElt = new BitCastInst(OtherElt, BytePtrTy, OtherElt->getName(), MI); 863193323Sed 864193323Sed unsigned EltSize = TD->getTypeAllocSize(EltTy); 865193323Sed 866193323Sed // Finally, insert the meminst for this element. 867193323Sed if (isa<MemTransferInst>(MI)) { 868193323Sed Value *Ops[] = { 869193323Sed SROADest ? EltPtr : OtherElt, // Dest ptr 870193323Sed SROADest ? OtherElt : EltPtr, // Src ptr 871198090Srdivacky ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size 872198090Srdivacky // Align 873198090Srdivacky ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign) 874193323Sed }; 875193323Sed CallInst::Create(TheFn, Ops, Ops + 4, "", MI); 876193323Sed } else { 877193323Sed assert(isa<MemSetInst>(MI)); 878193323Sed Value *Ops[] = { 879193323Sed EltPtr, MI->getOperand(2), // Dest, Value, 880198090Srdivacky ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size 881193323Sed Zero // Align 882193323Sed }; 883193323Sed CallInst::Create(TheFn, Ops, Ops + 4, "", MI); 884193323Sed } 885193323Sed } 886201360Srdivacky DeadInsts.push_back(MI); 887193323Sed} 888193323Sed 889200581Srdivacky/// RewriteStoreUserOfWholeAlloca - We found a store of an integer that 890193323Sed/// overwrites the entire allocation. Extract out the pieces of the stored 891193323Sed/// integer and store them individually. 892198892Srdivackyvoid SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, 893193323Sed SmallVector<AllocaInst*, 32> &NewElts){ 894193323Sed // Extract each element out of the integer according to its structure offset 895193323Sed // and store the element value to the individual alloca. 896193323Sed Value *SrcVal = SI->getOperand(0); 897201360Srdivacky const Type *AllocaEltTy = AI->getAllocatedType(); 898193323Sed uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); 899193323Sed 900193323Sed // Handle tail padding by extending the operand 901193323Sed if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits) 902195340Sed SrcVal = new ZExtInst(SrcVal, 903198090Srdivacky IntegerType::get(SI->getContext(), AllocaSizeBits), 904198090Srdivacky "", SI); 905193323Sed 906202375Srdivacky DEBUG(dbgs() << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << '\n' << *SI 907198090Srdivacky << '\n'); 908193323Sed 909193323Sed // There are two forms here: AI could be an array or struct. Both cases 910193323Sed // have different ways to compute the element offset. 911193323Sed if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { 912193323Sed const StructLayout *Layout = TD->getStructLayout(EltSTy); 913193323Sed 914193323Sed for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 915193323Sed // Get the number of bits to shift SrcVal to get the value. 916193323Sed const Type *FieldTy = EltSTy->getElementType(i); 917193323Sed uint64_t Shift = Layout->getElementOffsetInBits(i); 918193323Sed 919193323Sed if (TD->isBigEndian()) 920193323Sed Shift = AllocaSizeBits-Shift-TD->getTypeAllocSizeInBits(FieldTy); 921193323Sed 922193323Sed Value *EltVal = SrcVal; 923193323Sed if (Shift) { 924198090Srdivacky Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift); 925193323Sed EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal, 926193323Sed "sroa.store.elt", SI); 927193323Sed } 928193323Sed 929193323Sed // Truncate down to an integer of the right size. 930193323Sed uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy); 931193323Sed 932193323Sed // Ignore zero sized fields like {}, they obviously contain no data. 933193323Sed if (FieldSizeBits == 0) continue; 934193323Sed 935193323Sed if (FieldSizeBits != AllocaSizeBits) 936195340Sed EltVal = new TruncInst(EltVal, 937198090Srdivacky IntegerType::get(SI->getContext(), FieldSizeBits), 938198090Srdivacky "", SI); 939193323Sed Value *DestField = NewElts[i]; 940193323Sed if (EltVal->getType() == FieldTy) { 941193323Sed // Storing to an integer field of this size, just do it. 942204642Srdivacky } else if (FieldTy->isFloatingPointTy() || FieldTy->isVectorTy()) { 943193323Sed // Bitcast to the right element type (for fp/vector values). 944193323Sed EltVal = new BitCastInst(EltVal, FieldTy, "", SI); 945193323Sed } else { 946193323Sed // Otherwise, bitcast the dest pointer (for aggregates). 947193323Sed DestField = new BitCastInst(DestField, 948198090Srdivacky PointerType::getUnqual(EltVal->getType()), 949193323Sed "", SI); 950193323Sed } 951193323Sed new StoreInst(EltVal, DestField, SI); 952193323Sed } 953193323Sed 954193323Sed } else { 955193323Sed const ArrayType *ATy = cast<ArrayType>(AllocaEltTy); 956193323Sed const Type *ArrayEltTy = ATy->getElementType(); 957193323Sed uint64_t ElementOffset = TD->getTypeAllocSizeInBits(ArrayEltTy); 958193323Sed uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy); 959193323Sed 960193323Sed uint64_t Shift; 961193323Sed 962193323Sed if (TD->isBigEndian()) 963193323Sed Shift = AllocaSizeBits-ElementOffset; 964193323Sed else 965193323Sed Shift = 0; 966193323Sed 967193323Sed for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 968193323Sed // Ignore zero sized fields like {}, they obviously contain no data. 969193323Sed if (ElementSizeBits == 0) continue; 970193323Sed 971193323Sed Value *EltVal = SrcVal; 972193323Sed if (Shift) { 973198090Srdivacky Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift); 974193323Sed EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal, 975193323Sed "sroa.store.elt", SI); 976193323Sed } 977193323Sed 978193323Sed // Truncate down to an integer of the right size. 979193323Sed if (ElementSizeBits != AllocaSizeBits) 980195340Sed EltVal = new TruncInst(EltVal, 981198090Srdivacky IntegerType::get(SI->getContext(), 982198090Srdivacky ElementSizeBits),"",SI); 983193323Sed Value *DestField = NewElts[i]; 984193323Sed if (EltVal->getType() == ArrayEltTy) { 985193323Sed // Storing to an integer field of this size, just do it. 986203954Srdivacky } else if (ArrayEltTy->isFloatingPointTy() || 987204642Srdivacky ArrayEltTy->isVectorTy()) { 988193323Sed // Bitcast to the right element type (for fp/vector values). 989193323Sed EltVal = new BitCastInst(EltVal, ArrayEltTy, "", SI); 990193323Sed } else { 991193323Sed // Otherwise, bitcast the dest pointer (for aggregates). 992193323Sed DestField = new BitCastInst(DestField, 993198090Srdivacky PointerType::getUnqual(EltVal->getType()), 994193323Sed "", SI); 995193323Sed } 996193323Sed new StoreInst(EltVal, DestField, SI); 997193323Sed 998193323Sed if (TD->isBigEndian()) 999193323Sed Shift -= ElementOffset; 1000193323Sed else 1001193323Sed Shift += ElementOffset; 1002193323Sed } 1003193323Sed } 1004193323Sed 1005201360Srdivacky DeadInsts.push_back(SI); 1006193323Sed} 1007193323Sed 1008200581Srdivacky/// RewriteLoadUserOfWholeAlloca - We found a load of the entire allocation to 1009193323Sed/// an integer. Load the individual pieces to form the aggregate value. 1010198892Srdivackyvoid SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, 1011193323Sed SmallVector<AllocaInst*, 32> &NewElts) { 1012193323Sed // Extract each element out of the NewElts according to its structure offset 1013193323Sed // and form the result value. 1014201360Srdivacky const Type *AllocaEltTy = AI->getAllocatedType(); 1015193323Sed uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); 1016193323Sed 1017202375Srdivacky DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI 1018198090Srdivacky << '\n'); 1019193323Sed 1020193323Sed // There are two forms here: AI could be an array or struct. Both cases 1021193323Sed // have different ways to compute the element offset. 1022193323Sed const StructLayout *Layout = 0; 1023193323Sed uint64_t ArrayEltBitOffset = 0; 1024193323Sed if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { 1025193323Sed Layout = TD->getStructLayout(EltSTy); 1026193323Sed } else { 1027193323Sed const Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType(); 1028193323Sed ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy); 1029193323Sed } 1030193323Sed 1031198090Srdivacky Value *ResultVal = 1032198090Srdivacky Constant::getNullValue(IntegerType::get(LI->getContext(), AllocaSizeBits)); 1033198090Srdivacky 1034193323Sed for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 1035193323Sed // Load the value from the alloca. If the NewElt is an aggregate, cast 1036193323Sed // the pointer to an integer of the same size before doing the load. 1037193323Sed Value *SrcField = NewElts[i]; 1038193323Sed const Type *FieldTy = 1039193323Sed cast<PointerType>(SrcField->getType())->getElementType(); 1040193323Sed uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy); 1041193323Sed 1042193323Sed // Ignore zero sized fields like {}, they obviously contain no data. 1043193323Sed if (FieldSizeBits == 0) continue; 1044193323Sed 1045198090Srdivacky const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(), 1046198090Srdivacky FieldSizeBits); 1047204642Srdivacky if (!FieldTy->isIntegerTy() && !FieldTy->isFloatingPointTy() && 1048204642Srdivacky !FieldTy->isVectorTy()) 1049195340Sed SrcField = new BitCastInst(SrcField, 1050198090Srdivacky PointerType::getUnqual(FieldIntTy), 1051193323Sed "", LI); 1052193323Sed SrcField = new LoadInst(SrcField, "sroa.load.elt", LI); 1053193323Sed 1054193323Sed // If SrcField is a fp or vector of the right size but that isn't an 1055193323Sed // integer type, bitcast to an integer so we can shift it. 1056193323Sed if (SrcField->getType() != FieldIntTy) 1057193323Sed SrcField = new BitCastInst(SrcField, FieldIntTy, "", LI); 1058193323Sed 1059193323Sed // Zero extend the field to be the same size as the final alloca so that 1060193323Sed // we can shift and insert it. 1061193323Sed if (SrcField->getType() != ResultVal->getType()) 1062193323Sed SrcField = new ZExtInst(SrcField, ResultVal->getType(), "", LI); 1063193323Sed 1064193323Sed // Determine the number of bits to shift SrcField. 1065193323Sed uint64_t Shift; 1066193323Sed if (Layout) // Struct case. 1067193323Sed Shift = Layout->getElementOffsetInBits(i); 1068193323Sed else // Array case. 1069193323Sed Shift = i*ArrayEltBitOffset; 1070193323Sed 1071193323Sed if (TD->isBigEndian()) 1072193323Sed Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth(); 1073193323Sed 1074193323Sed if (Shift) { 1075198090Srdivacky Value *ShiftVal = ConstantInt::get(SrcField->getType(), Shift); 1076193323Sed SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI); 1077193323Sed } 1078193323Sed 1079193323Sed ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI); 1080193323Sed } 1081193323Sed 1082193323Sed // Handle tail padding by truncating the result 1083193323Sed if (TD->getTypeSizeInBits(LI->getType()) != AllocaSizeBits) 1084193323Sed ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI); 1085193323Sed 1086193323Sed LI->replaceAllUsesWith(ResultVal); 1087201360Srdivacky DeadInsts.push_back(LI); 1088193323Sed} 1089193323Sed 1090193323Sed/// HasPadding - Return true if the specified type has any structure or 1091193323Sed/// alignment padding, false otherwise. 1092193323Sedstatic bool HasPadding(const Type *Ty, const TargetData &TD) { 1093193323Sed if (const StructType *STy = dyn_cast<StructType>(Ty)) { 1094193323Sed const StructLayout *SL = TD.getStructLayout(STy); 1095193323Sed unsigned PrevFieldBitOffset = 0; 1096193323Sed for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 1097193323Sed unsigned FieldBitOffset = SL->getElementOffsetInBits(i); 1098193323Sed 1099193323Sed // Padding in sub-elements? 1100193323Sed if (HasPadding(STy->getElementType(i), TD)) 1101193323Sed return true; 1102193323Sed 1103193323Sed // Check to see if there is any padding between this element and the 1104193323Sed // previous one. 1105193323Sed if (i) { 1106193323Sed unsigned PrevFieldEnd = 1107193323Sed PrevFieldBitOffset+TD.getTypeSizeInBits(STy->getElementType(i-1)); 1108193323Sed if (PrevFieldEnd < FieldBitOffset) 1109193323Sed return true; 1110193323Sed } 1111193323Sed 1112193323Sed PrevFieldBitOffset = FieldBitOffset; 1113193323Sed } 1114193323Sed 1115193323Sed // Check for tail padding. 1116193323Sed if (unsigned EltCount = STy->getNumElements()) { 1117193323Sed unsigned PrevFieldEnd = PrevFieldBitOffset + 1118193323Sed TD.getTypeSizeInBits(STy->getElementType(EltCount-1)); 1119193323Sed if (PrevFieldEnd < SL->getSizeInBits()) 1120193323Sed return true; 1121193323Sed } 1122193323Sed 1123193323Sed } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { 1124193323Sed return HasPadding(ATy->getElementType(), TD); 1125193323Sed } else if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) { 1126193323Sed return HasPadding(VTy->getElementType(), TD); 1127193323Sed } 1128193323Sed return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty); 1129193323Sed} 1130193323Sed 1131193323Sed/// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of 1132193323Sed/// an aggregate can be broken down into elements. Return 0 if not, 3 if safe, 1133193323Sed/// or 1 if safe after canonicalization has been performed. 1134202878Srdivackybool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) { 1135193323Sed // Loop over the use list of the alloca. We can only transform it if all of 1136193323Sed // the users are safe to transform. 1137193323Sed AllocaInfo Info; 1138193323Sed 1139201360Srdivacky isSafeForScalarRepl(AI, AI, 0, Info); 1140201360Srdivacky if (Info.isUnsafe) { 1141202375Srdivacky DEBUG(dbgs() << "Cannot transform: " << *AI << '\n'); 1142202878Srdivacky return false; 1143193323Sed } 1144193323Sed 1145193323Sed // Okay, we know all the users are promotable. If the aggregate is a memcpy 1146193323Sed // source and destination, we have to be careful. In particular, the memcpy 1147193323Sed // could be moving around elements that live in structure padding of the LLVM 1148193323Sed // types, but may actually be used. In these cases, we refuse to promote the 1149193323Sed // struct. 1150193323Sed if (Info.isMemCpySrc && Info.isMemCpyDst && 1151201360Srdivacky HasPadding(AI->getAllocatedType(), *TD)) 1152202878Srdivacky return false; 1153193323Sed 1154202878Srdivacky return true; 1155193323Sed} 1156193323Sed 1157193323Sed/// MergeInType - Add the 'In' type to the accumulated type (Accum) so far at 1158193323Sed/// the offset specified by Offset (which is specified in bytes). 1159193323Sed/// 1160193323Sed/// There are two cases we handle here: 1161193323Sed/// 1) A union of vector types of the same size and potentially its elements. 1162193323Sed/// Here we turn element accesses into insert/extract element operations. 1163193323Sed/// This promotes a <4 x float> with a store of float to the third element 1164193323Sed/// into a <4 x float> that uses insert element. 1165193323Sed/// 2) A fully general blob of memory, which we turn into some (potentially 1166193323Sed/// large) integer type with extract and insert operations where the loads 1167193323Sed/// and stores would mutate the memory. 1168193323Sedstatic void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy, 1169195340Sed unsigned AllocaSize, const TargetData &TD, 1170198090Srdivacky LLVMContext &Context) { 1171193323Sed // If this could be contributing to a vector, analyze it. 1172198090Srdivacky if (VecTy != Type::getVoidTy(Context)) { // either null or a vector type. 1173193323Sed 1174193323Sed // If the In type is a vector that is the same size as the alloca, see if it 1175193323Sed // matches the existing VecTy. 1176193323Sed if (const VectorType *VInTy = dyn_cast<VectorType>(In)) { 1177193323Sed if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) { 1178193323Sed // If we're storing/loading a vector of the right size, allow it as a 1179193323Sed // vector. If this the first vector we see, remember the type so that 1180193323Sed // we know the element size. 1181193323Sed if (VecTy == 0) 1182193323Sed VecTy = VInTy; 1183193323Sed return; 1184193323Sed } 1185198090Srdivacky } else if (In->isFloatTy() || In->isDoubleTy() || 1186204642Srdivacky (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 && 1187193323Sed isPowerOf2_32(In->getPrimitiveSizeInBits()))) { 1188193323Sed // If we're accessing something that could be an element of a vector, see 1189193323Sed // if the implied vector agrees with what we already have and if Offset is 1190193323Sed // compatible with it. 1191193323Sed unsigned EltSize = In->getPrimitiveSizeInBits()/8; 1192193323Sed if (Offset % EltSize == 0 && 1193193323Sed AllocaSize % EltSize == 0 && 1194193323Sed (VecTy == 0 || 1195193323Sed cast<VectorType>(VecTy)->getElementType() 1196193323Sed ->getPrimitiveSizeInBits()/8 == EltSize)) { 1197193323Sed if (VecTy == 0) 1198198090Srdivacky VecTy = VectorType::get(In, AllocaSize/EltSize); 1199193323Sed return; 1200193323Sed } 1201193323Sed } 1202193323Sed } 1203193323Sed 1204193323Sed // Otherwise, we have a case that we can't handle with an optimized vector 1205193323Sed // form. We can still turn this into a large integer. 1206198090Srdivacky VecTy = Type::getVoidTy(Context); 1207193323Sed} 1208193323Sed 1209193323Sed/// CanConvertToScalar - V is a pointer. If we can convert the pointee and all 1210200581Srdivacky/// its accesses to a single vector type, return true and set VecTy to 1211193323Sed/// the new type. If we could convert the alloca into a single promotable 1212193323Sed/// integer, return true but set VecTy to VoidTy. Further, if the use is not a 1213193323Sed/// completely trivial use that mem2reg could promote, set IsNotTrivial. Offset 1214193323Sed/// is the current offset from the base of the alloca being analyzed. 1215193323Sed/// 1216193323Sed/// If we see at least one access to the value that is as a vector type, set the 1217193323Sed/// SawVec flag. 1218193323Sedbool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy, 1219193323Sed bool &SawVec, uint64_t Offset, 1220193323Sed unsigned AllocaSize) { 1221193323Sed for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { 1222193323Sed Instruction *User = cast<Instruction>(*UI); 1223193323Sed 1224193323Sed if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 1225193323Sed // Don't break volatile loads. 1226193323Sed if (LI->isVolatile()) 1227193323Sed return false; 1228198090Srdivacky MergeInType(LI->getType(), Offset, VecTy, 1229198090Srdivacky AllocaSize, *TD, V->getContext()); 1230204642Srdivacky SawVec |= LI->getType()->isVectorTy(); 1231193323Sed continue; 1232193323Sed } 1233193323Sed 1234193323Sed if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 1235193323Sed // Storing the pointer, not into the value? 1236193323Sed if (SI->getOperand(0) == V || SI->isVolatile()) return 0; 1237195340Sed MergeInType(SI->getOperand(0)->getType(), Offset, 1238198090Srdivacky VecTy, AllocaSize, *TD, V->getContext()); 1239204642Srdivacky SawVec |= SI->getOperand(0)->getType()->isVectorTy(); 1240193323Sed continue; 1241193323Sed } 1242193323Sed 1243193323Sed if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) { 1244193323Sed if (!CanConvertToScalar(BCI, IsNotTrivial, VecTy, SawVec, Offset, 1245193323Sed AllocaSize)) 1246193323Sed return false; 1247193323Sed IsNotTrivial = true; 1248193323Sed continue; 1249193323Sed } 1250193323Sed 1251193323Sed if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) { 1252193323Sed // If this is a GEP with a variable indices, we can't handle it. 1253193323Sed if (!GEP->hasAllConstantIndices()) 1254193323Sed return false; 1255193323Sed 1256193323Sed // Compute the offset that this GEP adds to the pointer. 1257193323Sed SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); 1258201360Srdivacky uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(), 1259193323Sed &Indices[0], Indices.size()); 1260193323Sed // See if all uses can be converted. 1261193323Sed if (!CanConvertToScalar(GEP, IsNotTrivial, VecTy, SawVec,Offset+GEPOffset, 1262193323Sed AllocaSize)) 1263193323Sed return false; 1264193323Sed IsNotTrivial = true; 1265193323Sed continue; 1266193323Sed } 1267193323Sed 1268193323Sed // If this is a constant sized memset of a constant value (e.g. 0) we can 1269193323Sed // handle it. 1270193323Sed if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) { 1271193323Sed // Store of constant value and constant size. 1272193323Sed if (isa<ConstantInt>(MSI->getValue()) && 1273193323Sed isa<ConstantInt>(MSI->getLength())) { 1274193323Sed IsNotTrivial = true; 1275193323Sed continue; 1276193323Sed } 1277193323Sed } 1278193323Sed 1279193323Sed // If this is a memcpy or memmove into or out of the whole allocation, we 1280193323Sed // can handle it like a load or store of the scalar type. 1281193323Sed if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) { 1282193323Sed if (ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength())) 1283193323Sed if (Len->getZExtValue() == AllocaSize && Offset == 0) { 1284193323Sed IsNotTrivial = true; 1285193323Sed continue; 1286193323Sed } 1287193323Sed } 1288193323Sed 1289193323Sed // Otherwise, we cannot handle this! 1290193323Sed return false; 1291193323Sed } 1292193323Sed 1293193323Sed return true; 1294193323Sed} 1295193323Sed 1296193323Sed/// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca 1297193323Sed/// directly. This happens when we are converting an "integer union" to a 1298193323Sed/// single integer scalar, or when we are converting a "vector union" to a 1299193323Sed/// vector with insert/extractelement instructions. 1300193323Sed/// 1301193323Sed/// Offset is an offset from the original alloca, in bits that need to be 1302193323Sed/// shifted to the right. By the end of this, there should be no uses of Ptr. 1303193323Sedvoid SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) { 1304193323Sed while (!Ptr->use_empty()) { 1305193323Sed Instruction *User = cast<Instruction>(Ptr->use_back()); 1306193323Sed 1307193323Sed if (BitCastInst *CI = dyn_cast<BitCastInst>(User)) { 1308193323Sed ConvertUsesToScalar(CI, NewAI, Offset); 1309193323Sed CI->eraseFromParent(); 1310193323Sed continue; 1311193323Sed } 1312193323Sed 1313193323Sed if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) { 1314193323Sed // Compute the offset that this GEP adds to the pointer. 1315193323Sed SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); 1316201360Srdivacky uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(), 1317193323Sed &Indices[0], Indices.size()); 1318193323Sed ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8); 1319193323Sed GEP->eraseFromParent(); 1320193323Sed continue; 1321193323Sed } 1322193323Sed 1323193323Sed IRBuilder<> Builder(User->getParent(), User); 1324193323Sed 1325193323Sed if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 1326193323Sed // The load is a bit extract from NewAI shifted right by Offset bits. 1327193323Sed Value *LoadedVal = Builder.CreateLoad(NewAI, "tmp"); 1328193323Sed Value *NewLoadVal 1329193323Sed = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, Builder); 1330193323Sed LI->replaceAllUsesWith(NewLoadVal); 1331193323Sed LI->eraseFromParent(); 1332193323Sed continue; 1333193323Sed } 1334193323Sed 1335193323Sed if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 1336193323Sed assert(SI->getOperand(0) != Ptr && "Consistency error!"); 1337201360Srdivacky Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in"); 1338193323Sed Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset, 1339193323Sed Builder); 1340193323Sed Builder.CreateStore(New, NewAI); 1341193323Sed SI->eraseFromParent(); 1342201360Srdivacky 1343201360Srdivacky // If the load we just inserted is now dead, then the inserted store 1344201360Srdivacky // overwrote the entire thing. 1345201360Srdivacky if (Old->use_empty()) 1346201360Srdivacky Old->eraseFromParent(); 1347193323Sed continue; 1348193323Sed } 1349193323Sed 1350193323Sed // If this is a constant sized memset of a constant value (e.g. 0) we can 1351193323Sed // transform it into a store of the expanded constant value. 1352193323Sed if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) { 1353193323Sed assert(MSI->getRawDest() == Ptr && "Consistency error!"); 1354193323Sed unsigned NumBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue(); 1355193323Sed if (NumBytes != 0) { 1356193323Sed unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue(); 1357193323Sed 1358193323Sed // Compute the value replicated the right number of times. 1359193323Sed APInt APVal(NumBytes*8, Val); 1360193323Sed 1361193323Sed // Splat the value if non-zero. 1362193323Sed if (Val) 1363193323Sed for (unsigned i = 1; i != NumBytes; ++i) 1364193323Sed APVal |= APVal << 8; 1365193323Sed 1366201360Srdivacky Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in"); 1367198090Srdivacky Value *New = ConvertScalar_InsertValue( 1368198090Srdivacky ConstantInt::get(User->getContext(), APVal), 1369195340Sed Old, Offset, Builder); 1370193323Sed Builder.CreateStore(New, NewAI); 1371201360Srdivacky 1372201360Srdivacky // If the load we just inserted is now dead, then the memset overwrote 1373201360Srdivacky // the entire thing. 1374201360Srdivacky if (Old->use_empty()) 1375201360Srdivacky Old->eraseFromParent(); 1376193323Sed } 1377193323Sed MSI->eraseFromParent(); 1378193323Sed continue; 1379193323Sed } 1380193323Sed 1381193323Sed // If this is a memcpy or memmove into or out of the whole allocation, we 1382193323Sed // can handle it like a load or store of the scalar type. 1383193323Sed if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) { 1384193323Sed assert(Offset == 0 && "must be store to start of alloca"); 1385193323Sed 1386193323Sed // If the source and destination are both to the same alloca, then this is 1387193323Sed // a noop copy-to-self, just delete it. Otherwise, emit a load and store 1388193323Sed // as appropriate. 1389203954Srdivacky AllocaInst *OrigAI = cast<AllocaInst>(Ptr->getUnderlyingObject(0)); 1390193323Sed 1391203954Srdivacky if (MTI->getSource()->getUnderlyingObject(0) != OrigAI) { 1392193323Sed // Dest must be OrigAI, change this to be a load from the original 1393193323Sed // pointer (bitcasted), then a store to our new alloca. 1394193323Sed assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?"); 1395193323Sed Value *SrcPtr = MTI->getSource(); 1396193323Sed SrcPtr = Builder.CreateBitCast(SrcPtr, NewAI->getType()); 1397193323Sed 1398193323Sed LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval"); 1399193323Sed SrcVal->setAlignment(MTI->getAlignment()); 1400193323Sed Builder.CreateStore(SrcVal, NewAI); 1401203954Srdivacky } else if (MTI->getDest()->getUnderlyingObject(0) != OrigAI) { 1402193323Sed // Src must be OrigAI, change this to be a load from NewAI then a store 1403193323Sed // through the original dest pointer (bitcasted). 1404193323Sed assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?"); 1405193323Sed LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval"); 1406193323Sed 1407193323Sed Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), NewAI->getType()); 1408193323Sed StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr); 1409193323Sed NewStore->setAlignment(MTI->getAlignment()); 1410193323Sed } else { 1411193323Sed // Noop transfer. Src == Dst 1412193323Sed } 1413193323Sed 1414193323Sed MTI->eraseFromParent(); 1415193323Sed continue; 1416193323Sed } 1417193323Sed 1418198090Srdivacky llvm_unreachable("Unsupported operation!"); 1419193323Sed } 1420193323Sed} 1421193323Sed 1422193323Sed/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer 1423193323Sed/// or vector value FromVal, extracting the bits from the offset specified by 1424193323Sed/// Offset. This returns the value, which is of type ToType. 1425193323Sed/// 1426193323Sed/// This happens when we are converting an "integer union" to a single 1427193323Sed/// integer scalar, or when we are converting a "vector union" to a vector with 1428193323Sed/// insert/extractelement instructions. 1429193323Sed/// 1430193323Sed/// Offset is an offset from the original alloca, in bits that need to be 1431193323Sed/// shifted to the right. 1432193323SedValue *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, 1433193323Sed uint64_t Offset, IRBuilder<> &Builder) { 1434193323Sed // If the load is of the whole new alloca, no conversion is needed. 1435193323Sed if (FromVal->getType() == ToType && Offset == 0) 1436193323Sed return FromVal; 1437193323Sed 1438193323Sed // If the result alloca is a vector type, this is either an element 1439193323Sed // access or a bitcast to another vector type of the same size. 1440193323Sed if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) { 1441204642Srdivacky if (ToType->isVectorTy()) 1442193323Sed return Builder.CreateBitCast(FromVal, ToType, "tmp"); 1443193323Sed 1444193323Sed // Otherwise it must be an element access. 1445193323Sed unsigned Elt = 0; 1446193323Sed if (Offset) { 1447193323Sed unsigned EltSize = TD->getTypeAllocSizeInBits(VTy->getElementType()); 1448193323Sed Elt = Offset/EltSize; 1449193323Sed assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); 1450193323Sed } 1451193323Sed // Return the element extracted out of it. 1452198090Srdivacky Value *V = Builder.CreateExtractElement(FromVal, ConstantInt::get( 1453198090Srdivacky Type::getInt32Ty(FromVal->getContext()), Elt), "tmp"); 1454193323Sed if (V->getType() != ToType) 1455193323Sed V = Builder.CreateBitCast(V, ToType, "tmp"); 1456193323Sed return V; 1457193323Sed } 1458193323Sed 1459193323Sed // If ToType is a first class aggregate, extract out each of the pieces and 1460193323Sed // use insertvalue's to form the FCA. 1461193323Sed if (const StructType *ST = dyn_cast<StructType>(ToType)) { 1462193323Sed const StructLayout &Layout = *TD->getStructLayout(ST); 1463198090Srdivacky Value *Res = UndefValue::get(ST); 1464193323Sed for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { 1465193323Sed Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i), 1466193323Sed Offset+Layout.getElementOffsetInBits(i), 1467193323Sed Builder); 1468193323Sed Res = Builder.CreateInsertValue(Res, Elt, i, "tmp"); 1469193323Sed } 1470193323Sed return Res; 1471193323Sed } 1472193323Sed 1473193323Sed if (const ArrayType *AT = dyn_cast<ArrayType>(ToType)) { 1474193323Sed uint64_t EltSize = TD->getTypeAllocSizeInBits(AT->getElementType()); 1475198090Srdivacky Value *Res = UndefValue::get(AT); 1476193323Sed for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { 1477193323Sed Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(), 1478193323Sed Offset+i*EltSize, Builder); 1479193323Sed Res = Builder.CreateInsertValue(Res, Elt, i, "tmp"); 1480193323Sed } 1481193323Sed return Res; 1482193323Sed } 1483193323Sed 1484193323Sed // Otherwise, this must be a union that was converted to an integer value. 1485193323Sed const IntegerType *NTy = cast<IntegerType>(FromVal->getType()); 1486193323Sed 1487193323Sed // If this is a big-endian system and the load is narrower than the 1488193323Sed // full alloca type, we need to do a shift to get the right bits. 1489193323Sed int ShAmt = 0; 1490193323Sed if (TD->isBigEndian()) { 1491193323Sed // On big-endian machines, the lowest bit is stored at the bit offset 1492193323Sed // from the pointer given by getTypeStoreSizeInBits. This matters for 1493193323Sed // integers with a bitwidth that is not a multiple of 8. 1494193323Sed ShAmt = TD->getTypeStoreSizeInBits(NTy) - 1495193323Sed TD->getTypeStoreSizeInBits(ToType) - Offset; 1496193323Sed } else { 1497193323Sed ShAmt = Offset; 1498193323Sed } 1499193323Sed 1500193323Sed // Note: we support negative bitwidths (with shl) which are not defined. 1501193323Sed // We do this to support (f.e.) loads off the end of a structure where 1502193323Sed // only some bits are used. 1503193323Sed if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth()) 1504195340Sed FromVal = Builder.CreateLShr(FromVal, 1505198090Srdivacky ConstantInt::get(FromVal->getType(), 1506193323Sed ShAmt), "tmp"); 1507193323Sed else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth()) 1508195340Sed FromVal = Builder.CreateShl(FromVal, 1509198090Srdivacky ConstantInt::get(FromVal->getType(), 1510193323Sed -ShAmt), "tmp"); 1511193323Sed 1512193323Sed // Finally, unconditionally truncate the integer to the right width. 1513193323Sed unsigned LIBitWidth = TD->getTypeSizeInBits(ToType); 1514193323Sed if (LIBitWidth < NTy->getBitWidth()) 1515195340Sed FromVal = 1516198090Srdivacky Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(), 1517198090Srdivacky LIBitWidth), "tmp"); 1518193323Sed else if (LIBitWidth > NTy->getBitWidth()) 1519195340Sed FromVal = 1520198090Srdivacky Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(), 1521198090Srdivacky LIBitWidth), "tmp"); 1522193323Sed 1523193323Sed // If the result is an integer, this is a trunc or bitcast. 1524204642Srdivacky if (ToType->isIntegerTy()) { 1525193323Sed // Should be done. 1526204642Srdivacky } else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) { 1527193323Sed // Just do a bitcast, we know the sizes match up. 1528193323Sed FromVal = Builder.CreateBitCast(FromVal, ToType, "tmp"); 1529193323Sed } else { 1530193323Sed // Otherwise must be a pointer. 1531193323Sed FromVal = Builder.CreateIntToPtr(FromVal, ToType, "tmp"); 1532193323Sed } 1533193323Sed assert(FromVal->getType() == ToType && "Didn't convert right?"); 1534193323Sed return FromVal; 1535193323Sed} 1536193323Sed 1537193323Sed/// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer 1538193323Sed/// or vector value "Old" at the offset specified by Offset. 1539193323Sed/// 1540193323Sed/// This happens when we are converting an "integer union" to a 1541193323Sed/// single integer scalar, or when we are converting a "vector union" to a 1542193323Sed/// vector with insert/extractelement instructions. 1543193323Sed/// 1544193323Sed/// Offset is an offset from the original alloca, in bits that need to be 1545193323Sed/// shifted to the right. 1546193323SedValue *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old, 1547193323Sed uint64_t Offset, IRBuilder<> &Builder) { 1548193323Sed 1549193323Sed // Convert the stored type to the actual type, shift it left to insert 1550193323Sed // then 'or' into place. 1551193323Sed const Type *AllocaType = Old->getType(); 1552198090Srdivacky LLVMContext &Context = Old->getContext(); 1553193323Sed 1554193323Sed if (const VectorType *VTy = dyn_cast<VectorType>(AllocaType)) { 1555193323Sed uint64_t VecSize = TD->getTypeAllocSizeInBits(VTy); 1556193323Sed uint64_t ValSize = TD->getTypeAllocSizeInBits(SV->getType()); 1557193323Sed 1558193323Sed // Changing the whole vector with memset or with an access of a different 1559193323Sed // vector type? 1560193323Sed if (ValSize == VecSize) 1561193323Sed return Builder.CreateBitCast(SV, AllocaType, "tmp"); 1562193323Sed 1563193323Sed uint64_t EltSize = TD->getTypeAllocSizeInBits(VTy->getElementType()); 1564193323Sed 1565193323Sed // Must be an element insertion. 1566193323Sed unsigned Elt = Offset/EltSize; 1567193323Sed 1568193323Sed if (SV->getType() != VTy->getElementType()) 1569193323Sed SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp"); 1570193323Sed 1571193323Sed SV = Builder.CreateInsertElement(Old, SV, 1572198090Srdivacky ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt), 1573193323Sed "tmp"); 1574193323Sed return SV; 1575193323Sed } 1576193323Sed 1577193323Sed // If SV is a first-class aggregate value, insert each value recursively. 1578193323Sed if (const StructType *ST = dyn_cast<StructType>(SV->getType())) { 1579193323Sed const StructLayout &Layout = *TD->getStructLayout(ST); 1580193323Sed for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { 1581193323Sed Value *Elt = Builder.CreateExtractValue(SV, i, "tmp"); 1582193323Sed Old = ConvertScalar_InsertValue(Elt, Old, 1583193323Sed Offset+Layout.getElementOffsetInBits(i), 1584193323Sed Builder); 1585193323Sed } 1586193323Sed return Old; 1587193323Sed } 1588193323Sed 1589193323Sed if (const ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) { 1590193323Sed uint64_t EltSize = TD->getTypeAllocSizeInBits(AT->getElementType()); 1591193323Sed for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { 1592193323Sed Value *Elt = Builder.CreateExtractValue(SV, i, "tmp"); 1593193323Sed Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, Builder); 1594193323Sed } 1595193323Sed return Old; 1596193323Sed } 1597193323Sed 1598193323Sed // If SV is a float, convert it to the appropriate integer type. 1599193323Sed // If it is a pointer, do the same. 1600193323Sed unsigned SrcWidth = TD->getTypeSizeInBits(SV->getType()); 1601193323Sed unsigned DestWidth = TD->getTypeSizeInBits(AllocaType); 1602193323Sed unsigned SrcStoreWidth = TD->getTypeStoreSizeInBits(SV->getType()); 1603193323Sed unsigned DestStoreWidth = TD->getTypeStoreSizeInBits(AllocaType); 1604204642Srdivacky if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy()) 1605198090Srdivacky SV = Builder.CreateBitCast(SV, 1606198090Srdivacky IntegerType::get(SV->getContext(),SrcWidth), "tmp"); 1607204642Srdivacky else if (SV->getType()->isPointerTy()) 1608198090Srdivacky SV = Builder.CreatePtrToInt(SV, TD->getIntPtrType(SV->getContext()), "tmp"); 1609193323Sed 1610193323Sed // Zero extend or truncate the value if needed. 1611193323Sed if (SV->getType() != AllocaType) { 1612193323Sed if (SV->getType()->getPrimitiveSizeInBits() < 1613193323Sed AllocaType->getPrimitiveSizeInBits()) 1614193323Sed SV = Builder.CreateZExt(SV, AllocaType, "tmp"); 1615193323Sed else { 1616193323Sed // Truncation may be needed if storing more than the alloca can hold 1617193323Sed // (undefined behavior). 1618193323Sed SV = Builder.CreateTrunc(SV, AllocaType, "tmp"); 1619193323Sed SrcWidth = DestWidth; 1620193323Sed SrcStoreWidth = DestStoreWidth; 1621193323Sed } 1622193323Sed } 1623193323Sed 1624193323Sed // If this is a big-endian system and the store is narrower than the 1625193323Sed // full alloca type, we need to do a shift to get the right bits. 1626193323Sed int ShAmt = 0; 1627193323Sed if (TD->isBigEndian()) { 1628193323Sed // On big-endian machines, the lowest bit is stored at the bit offset 1629193323Sed // from the pointer given by getTypeStoreSizeInBits. This matters for 1630193323Sed // integers with a bitwidth that is not a multiple of 8. 1631193323Sed ShAmt = DestStoreWidth - SrcStoreWidth - Offset; 1632193323Sed } else { 1633193323Sed ShAmt = Offset; 1634193323Sed } 1635193323Sed 1636193323Sed // Note: we support negative bitwidths (with shr) which are not defined. 1637193323Sed // We do this to support (f.e.) stores off the end of a structure where 1638193323Sed // only some bits in the structure are set. 1639193323Sed APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth)); 1640193323Sed if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) { 1641198090Srdivacky SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(), 1642195340Sed ShAmt), "tmp"); 1643193323Sed Mask <<= ShAmt; 1644193323Sed } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) { 1645198090Srdivacky SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(), 1646195340Sed -ShAmt), "tmp"); 1647193323Sed Mask = Mask.lshr(-ShAmt); 1648193323Sed } 1649193323Sed 1650193323Sed // Mask out the bits we are about to insert from the old value, and or 1651193323Sed // in the new bits. 1652193323Sed if (SrcWidth != DestWidth) { 1653193323Sed assert(DestWidth > SrcWidth); 1654198090Srdivacky Old = Builder.CreateAnd(Old, ConstantInt::get(Context, ~Mask), "mask"); 1655193323Sed SV = Builder.CreateOr(Old, SV, "ins"); 1656193323Sed } 1657193323Sed return SV; 1658193323Sed} 1659193323Sed 1660193323Sed 1661193323Sed 1662193323Sed/// PointsToConstantGlobal - Return true if V (possibly indirectly) points to 1663193323Sed/// some part of a constant global variable. This intentionally only accepts 1664193323Sed/// constant expressions because we don't can't rewrite arbitrary instructions. 1665193323Sedstatic bool PointsToConstantGlobal(Value *V) { 1666193323Sed if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) 1667193323Sed return GV->isConstant(); 1668193323Sed if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) 1669193323Sed if (CE->getOpcode() == Instruction::BitCast || 1670193323Sed CE->getOpcode() == Instruction::GetElementPtr) 1671193323Sed return PointsToConstantGlobal(CE->getOperand(0)); 1672193323Sed return false; 1673193323Sed} 1674193323Sed 1675193323Sed/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived) 1676193323Sed/// pointer to an alloca. Ignore any reads of the pointer, return false if we 1677193323Sed/// see any stores or other unknown uses. If we see pointer arithmetic, keep 1678193323Sed/// track of whether it moves the pointer (with isOffset) but otherwise traverse 1679193323Sed/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to 1680193323Sed/// the alloca, and if the source pointer is a pointer to a constant global, we 1681193323Sed/// can optimize this. 1682193323Sedstatic bool isOnlyCopiedFromConstantGlobal(Value *V, Instruction *&TheCopy, 1683193323Sed bool isOffset) { 1684193323Sed for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { 1685193323Sed if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) 1686193323Sed // Ignore non-volatile loads, they are always ok. 1687193323Sed if (!LI->isVolatile()) 1688193323Sed continue; 1689193323Sed 1690193323Sed if (BitCastInst *BCI = dyn_cast<BitCastInst>(*UI)) { 1691193323Sed // If uses of the bitcast are ok, we are ok. 1692193323Sed if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset)) 1693193323Sed return false; 1694193323Sed continue; 1695193323Sed } 1696193323Sed if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(*UI)) { 1697193323Sed // If the GEP has all zero indices, it doesn't offset the pointer. If it 1698193323Sed // doesn't, it does. 1699193323Sed if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, 1700193323Sed isOffset || !GEP->hasAllZeroIndices())) 1701193323Sed return false; 1702193323Sed continue; 1703193323Sed } 1704193323Sed 1705193323Sed // If this is isn't our memcpy/memmove, reject it as something we can't 1706193323Sed // handle. 1707193323Sed if (!isa<MemTransferInst>(*UI)) 1708193323Sed return false; 1709193323Sed 1710193323Sed // If we already have seen a copy, reject the second one. 1711193323Sed if (TheCopy) return false; 1712193323Sed 1713193323Sed // If the pointer has been offset from the start of the alloca, we can't 1714193323Sed // safely handle this. 1715193323Sed if (isOffset) return false; 1716193323Sed 1717193323Sed // If the memintrinsic isn't using the alloca as the dest, reject it. 1718193323Sed if (UI.getOperandNo() != 1) return false; 1719193323Sed 1720193323Sed MemIntrinsic *MI = cast<MemIntrinsic>(*UI); 1721193323Sed 1722193323Sed // If the source of the memcpy/move is not a constant global, reject it. 1723193323Sed if (!PointsToConstantGlobal(MI->getOperand(2))) 1724193323Sed return false; 1725193323Sed 1726193323Sed // Otherwise, the transform is safe. Remember the copy instruction. 1727193323Sed TheCopy = MI; 1728193323Sed } 1729193323Sed return true; 1730193323Sed} 1731193323Sed 1732193323Sed/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only 1733193323Sed/// modified by a copy from a constant global. If we can prove this, we can 1734193323Sed/// replace any uses of the alloca with uses of the global directly. 1735198892SrdivackyInstruction *SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI) { 1736193323Sed Instruction *TheCopy = 0; 1737193323Sed if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false)) 1738193323Sed return TheCopy; 1739193323Sed return 0; 1740193323Sed} 1741