ScalarReplAggregates.cpp revision 200581
1193323Sed//===- ScalarReplAggregates.cpp - Scalar Replacement of Aggregates --------===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This transformation implements the well known scalar replacement of 11193323Sed// aggregates transformation. This xform breaks up alloca instructions of 12193323Sed// aggregate type (structure or array) into individual alloca instructions for 13193323Sed// each member (if possible). Then, if possible, it transforms the individual 14193323Sed// alloca instructions into nice clean scalar SSA form. 15193323Sed// 16193323Sed// This combines a simple SRoA algorithm with the Mem2Reg algorithm because 17193323Sed// often interact, especially for C++ programs. As such, iterating between 18193323Sed// SRoA, then Mem2Reg until we run out of things to promote works well. 19193323Sed// 20193323Sed//===----------------------------------------------------------------------===// 21193323Sed 22193323Sed#define DEBUG_TYPE "scalarrepl" 23193323Sed#include "llvm/Transforms/Scalar.h" 24193323Sed#include "llvm/Constants.h" 25193323Sed#include "llvm/DerivedTypes.h" 26193323Sed#include "llvm/Function.h" 27193323Sed#include "llvm/GlobalVariable.h" 28193323Sed#include "llvm/Instructions.h" 29193323Sed#include "llvm/IntrinsicInst.h" 30195340Sed#include "llvm/LLVMContext.h" 31193323Sed#include "llvm/Pass.h" 32193323Sed#include "llvm/Analysis/Dominators.h" 33193323Sed#include "llvm/Target/TargetData.h" 34193323Sed#include "llvm/Transforms/Utils/PromoteMemToReg.h" 35193323Sed#include "llvm/Transforms/Utils/Local.h" 36193323Sed#include "llvm/Support/Debug.h" 37198090Srdivacky#include "llvm/Support/ErrorHandling.h" 38193323Sed#include "llvm/Support/GetElementPtrTypeIterator.h" 39193323Sed#include "llvm/Support/IRBuilder.h" 40193323Sed#include "llvm/Support/MathExtras.h" 41198090Srdivacky#include "llvm/Support/raw_ostream.h" 42193323Sed#include "llvm/ADT/SmallVector.h" 43193323Sed#include "llvm/ADT/Statistic.h" 44193323Sedusing namespace llvm; 45193323Sed 46193323SedSTATISTIC(NumReplaced, "Number of allocas broken up"); 47193323SedSTATISTIC(NumPromoted, "Number of allocas promoted"); 48193323SedSTATISTIC(NumConverted, "Number of aggregates converted to scalar"); 49193323SedSTATISTIC(NumGlobals, "Number of allocas copied from constant global"); 50193323Sed 51193323Sednamespace { 52198090Srdivacky struct SROA : public FunctionPass { 53193323Sed static char ID; // Pass identification, replacement for typeid 54193323Sed explicit SROA(signed T = -1) : FunctionPass(&ID) { 55193323Sed if (T == -1) 56193323Sed SRThreshold = 128; 57193323Sed else 58193323Sed SRThreshold = T; 59193323Sed } 60193323Sed 61193323Sed bool runOnFunction(Function &F); 62193323Sed 63193323Sed bool performScalarRepl(Function &F); 64193323Sed bool performPromotion(Function &F); 65193323Sed 66193323Sed // getAnalysisUsage - This pass does not require any passes, but we know it 67193323Sed // will not alter the CFG, so say so. 68193323Sed virtual void getAnalysisUsage(AnalysisUsage &AU) const { 69193323Sed AU.addRequired<DominatorTree>(); 70193323Sed AU.addRequired<DominanceFrontier>(); 71193323Sed AU.setPreservesCFG(); 72193323Sed } 73193323Sed 74193323Sed private: 75193323Sed TargetData *TD; 76193323Sed 77193323Sed /// AllocaInfo - When analyzing uses of an alloca instruction, this captures 78193323Sed /// information about the uses. All these fields are initialized to false 79193323Sed /// and set to true when something is learned. 80193323Sed struct AllocaInfo { 81193323Sed /// isUnsafe - This is set to true if the alloca cannot be SROA'd. 82193323Sed bool isUnsafe : 1; 83193323Sed 84193323Sed /// needsCleanup - This is set to true if there is some use of the alloca 85193323Sed /// that requires cleanup. 86193323Sed bool needsCleanup : 1; 87193323Sed 88193323Sed /// isMemCpySrc - This is true if this aggregate is memcpy'd from. 89193323Sed bool isMemCpySrc : 1; 90193323Sed 91193323Sed /// isMemCpyDst - This is true if this aggregate is memcpy'd into. 92193323Sed bool isMemCpyDst : 1; 93193323Sed 94193323Sed AllocaInfo() 95193323Sed : isUnsafe(false), needsCleanup(false), 96193323Sed isMemCpySrc(false), isMemCpyDst(false) {} 97193323Sed }; 98193323Sed 99193323Sed unsigned SRThreshold; 100193323Sed 101193323Sed void MarkUnsafe(AllocaInfo &I) { I.isUnsafe = true; } 102193323Sed 103198892Srdivacky int isSafeAllocaToScalarRepl(AllocaInst *AI); 104193323Sed 105198892Srdivacky void isSafeUseOfAllocation(Instruction *User, AllocaInst *AI, 106193323Sed AllocaInfo &Info); 107198892Srdivacky void isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI, 108200581Srdivacky AllocaInfo &Info); 109198892Srdivacky void isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI, 110193323Sed unsigned OpNo, AllocaInfo &Info); 111198892Srdivacky void isSafeUseOfBitCastedAllocation(BitCastInst *User, AllocaInst *AI, 112193323Sed AllocaInfo &Info); 113193323Sed 114198892Srdivacky void DoScalarReplacement(AllocaInst *AI, 115198892Srdivacky std::vector<AllocaInst*> &WorkList); 116193323Sed void CleanupGEP(GetElementPtrInst *GEP); 117198892Srdivacky void CleanupAllocaUsers(AllocaInst *AI); 118198892Srdivacky AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocaInst *Base); 119193323Sed 120198892Srdivacky void RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocaInst *AI, 121193323Sed SmallVector<AllocaInst*, 32> &NewElts); 122193323Sed 123193323Sed void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, 124198892Srdivacky AllocaInst *AI, 125193323Sed SmallVector<AllocaInst*, 32> &NewElts); 126198892Srdivacky void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, 127193323Sed SmallVector<AllocaInst*, 32> &NewElts); 128198892Srdivacky void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, 129193323Sed SmallVector<AllocaInst*, 32> &NewElts); 130193323Sed 131193323Sed bool CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy, 132193323Sed bool &SawVec, uint64_t Offset, unsigned AllocaSize); 133193323Sed void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset); 134193323Sed Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType, 135193323Sed uint64_t Offset, IRBuilder<> &Builder); 136193323Sed Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal, 137193323Sed uint64_t Offset, IRBuilder<> &Builder); 138198892Srdivacky static Instruction *isOnlyCopiedFromConstantGlobal(AllocaInst *AI); 139193323Sed }; 140193323Sed} 141193323Sed 142193323Sedchar SROA::ID = 0; 143193323Sedstatic RegisterPass<SROA> X("scalarrepl", "Scalar Replacement of Aggregates"); 144193323Sed 145193323Sed// Public interface to the ScalarReplAggregates pass 146193323SedFunctionPass *llvm::createScalarReplAggregatesPass(signed int Threshold) { 147193323Sed return new SROA(Threshold); 148193323Sed} 149193323Sed 150193323Sed 151193323Sedbool SROA::runOnFunction(Function &F) { 152198090Srdivacky TD = getAnalysisIfAvailable<TargetData>(); 153198090Srdivacky 154193323Sed bool Changed = performPromotion(F); 155198090Srdivacky 156198090Srdivacky // FIXME: ScalarRepl currently depends on TargetData more than it 157198090Srdivacky // theoretically needs to. It should be refactored in order to support 158198090Srdivacky // target-independent IR. Until this is done, just skip the actual 159198090Srdivacky // scalar-replacement portion of this pass. 160198090Srdivacky if (!TD) return Changed; 161198090Srdivacky 162193323Sed while (1) { 163193323Sed bool LocalChange = performScalarRepl(F); 164193323Sed if (!LocalChange) break; // No need to repromote if no scalarrepl 165193323Sed Changed = true; 166193323Sed LocalChange = performPromotion(F); 167193323Sed if (!LocalChange) break; // No need to re-scalarrepl if no promotion 168193323Sed } 169193323Sed 170193323Sed return Changed; 171193323Sed} 172193323Sed 173193323Sed 174193323Sedbool SROA::performPromotion(Function &F) { 175193323Sed std::vector<AllocaInst*> Allocas; 176193323Sed DominatorTree &DT = getAnalysis<DominatorTree>(); 177193323Sed DominanceFrontier &DF = getAnalysis<DominanceFrontier>(); 178193323Sed 179193323Sed BasicBlock &BB = F.getEntryBlock(); // Get the entry node for the function 180193323Sed 181193323Sed bool Changed = false; 182193323Sed 183193323Sed while (1) { 184193323Sed Allocas.clear(); 185193323Sed 186193323Sed // Find allocas that are safe to promote, by looking at all instructions in 187193323Sed // the entry node 188193323Sed for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I) 189193323Sed if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) // Is it an alloca? 190193323Sed if (isAllocaPromotable(AI)) 191193323Sed Allocas.push_back(AI); 192193323Sed 193193323Sed if (Allocas.empty()) break; 194193323Sed 195199989Srdivacky PromoteMemToReg(Allocas, DT, DF); 196193323Sed NumPromoted += Allocas.size(); 197193323Sed Changed = true; 198193323Sed } 199193323Sed 200193323Sed return Changed; 201193323Sed} 202193323Sed 203193323Sed/// getNumSAElements - Return the number of elements in the specific struct or 204193323Sed/// array. 205193323Sedstatic uint64_t getNumSAElements(const Type *T) { 206193323Sed if (const StructType *ST = dyn_cast<StructType>(T)) 207193323Sed return ST->getNumElements(); 208193323Sed return cast<ArrayType>(T)->getNumElements(); 209193323Sed} 210193323Sed 211193323Sed// performScalarRepl - This algorithm is a simple worklist driven algorithm, 212193323Sed// which runs on all of the malloc/alloca instructions in the function, removing 213193323Sed// them if they are only used by getelementptr instructions. 214193323Sed// 215193323Sedbool SROA::performScalarRepl(Function &F) { 216198892Srdivacky std::vector<AllocaInst*> WorkList; 217193323Sed 218193323Sed // Scan the entry basic block, adding any alloca's and mallocs to the worklist 219193323Sed BasicBlock &BB = F.getEntryBlock(); 220193323Sed for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) 221198892Srdivacky if (AllocaInst *A = dyn_cast<AllocaInst>(I)) 222193323Sed WorkList.push_back(A); 223193323Sed 224193323Sed // Process the worklist 225193323Sed bool Changed = false; 226193323Sed while (!WorkList.empty()) { 227198892Srdivacky AllocaInst *AI = WorkList.back(); 228193323Sed WorkList.pop_back(); 229193323Sed 230193323Sed // Handle dead allocas trivially. These can be formed by SROA'ing arrays 231193323Sed // with unused elements. 232193323Sed if (AI->use_empty()) { 233193323Sed AI->eraseFromParent(); 234193323Sed continue; 235193323Sed } 236193323Sed 237193323Sed // If this alloca is impossible for us to promote, reject it early. 238193323Sed if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized()) 239193323Sed continue; 240193323Sed 241193323Sed // Check to see if this allocation is only modified by a memcpy/memmove from 242193323Sed // a constant global. If this is the case, we can change all users to use 243193323Sed // the constant global instead. This is commonly produced by the CFE by 244193323Sed // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A' 245193323Sed // is only subsequently read. 246193323Sed if (Instruction *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) { 247198090Srdivacky DEBUG(errs() << "Found alloca equal to global: " << *AI << '\n'); 248198090Srdivacky DEBUG(errs() << " memcpy = " << *TheCopy << '\n'); 249193323Sed Constant *TheSrc = cast<Constant>(TheCopy->getOperand(2)); 250198090Srdivacky AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType())); 251193323Sed TheCopy->eraseFromParent(); // Don't mutate the global. 252193323Sed AI->eraseFromParent(); 253193323Sed ++NumGlobals; 254193323Sed Changed = true; 255193323Sed continue; 256193323Sed } 257193323Sed 258193323Sed // Check to see if we can perform the core SROA transformation. We cannot 259193323Sed // transform the allocation instruction if it is an array allocation 260193323Sed // (allocations OF arrays are ok though), and an allocation of a scalar 261193323Sed // value cannot be decomposed at all. 262193323Sed uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType()); 263193323Sed 264198090Srdivacky // Do not promote [0 x %struct]. 265198090Srdivacky if (AllocaSize == 0) continue; 266198090Srdivacky 267193323Sed // Do not promote any struct whose size is too big. 268193323Sed if (AllocaSize > SRThreshold) continue; 269198090Srdivacky 270193323Sed if ((isa<StructType>(AI->getAllocatedType()) || 271193323Sed isa<ArrayType>(AI->getAllocatedType())) && 272193323Sed // Do not promote any struct into more than "32" separate vars. 273193323Sed getNumSAElements(AI->getAllocatedType()) <= SRThreshold/4) { 274193323Sed // Check that all of the users of the allocation are capable of being 275193323Sed // transformed. 276193323Sed switch (isSafeAllocaToScalarRepl(AI)) { 277198090Srdivacky default: llvm_unreachable("Unexpected value!"); 278193323Sed case 0: // Not safe to scalar replace. 279193323Sed break; 280193323Sed case 1: // Safe, but requires cleanup/canonicalizations first 281193323Sed CleanupAllocaUsers(AI); 282193323Sed // FALL THROUGH. 283193323Sed case 3: // Safe to scalar replace. 284193323Sed DoScalarReplacement(AI, WorkList); 285193323Sed Changed = true; 286193323Sed continue; 287193323Sed } 288193323Sed } 289193323Sed 290193323Sed // If we can turn this aggregate value (potentially with casts) into a 291193323Sed // simple scalar value that can be mem2reg'd into a register value. 292193323Sed // IsNotTrivial tracks whether this is something that mem2reg could have 293193323Sed // promoted itself. If so, we don't want to transform it needlessly. Note 294193323Sed // that we can't just check based on the type: the alloca may be of an i32 295193323Sed // but that has pointer arithmetic to set byte 3 of it or something. 296193323Sed bool IsNotTrivial = false; 297193323Sed const Type *VectorTy = 0; 298193323Sed bool HadAVector = false; 299193323Sed if (CanConvertToScalar(AI, IsNotTrivial, VectorTy, HadAVector, 300193323Sed 0, unsigned(AllocaSize)) && IsNotTrivial) { 301193323Sed AllocaInst *NewAI; 302193323Sed // If we were able to find a vector type that can handle this with 303193323Sed // insert/extract elements, and if there was at least one use that had 304193323Sed // a vector type, promote this to a vector. We don't want to promote 305193323Sed // random stuff that doesn't use vectors (e.g. <9 x double>) because then 306193323Sed // we just get a lot of insert/extracts. If at least one vector is 307193323Sed // involved, then we probably really do have a union of vector/array. 308193323Sed if (VectorTy && isa<VectorType>(VectorTy) && HadAVector) { 309198090Srdivacky DEBUG(errs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = " 310198090Srdivacky << *VectorTy << '\n'); 311193323Sed 312193323Sed // Create and insert the vector alloca. 313198090Srdivacky NewAI = new AllocaInst(VectorTy, 0, "", AI->getParent()->begin()); 314193323Sed ConvertUsesToScalar(AI, NewAI, 0); 315193323Sed } else { 316198090Srdivacky DEBUG(errs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n"); 317193323Sed 318193323Sed // Create and insert the integer alloca. 319198090Srdivacky const Type *NewTy = IntegerType::get(AI->getContext(), AllocaSize*8); 320193323Sed NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin()); 321193323Sed ConvertUsesToScalar(AI, NewAI, 0); 322193323Sed } 323193323Sed NewAI->takeName(AI); 324193323Sed AI->eraseFromParent(); 325193323Sed ++NumConverted; 326193323Sed Changed = true; 327193323Sed continue; 328193323Sed } 329193323Sed 330193323Sed // Otherwise, couldn't process this alloca. 331193323Sed } 332193323Sed 333193323Sed return Changed; 334193323Sed} 335193323Sed 336193323Sed/// DoScalarReplacement - This alloca satisfied the isSafeAllocaToScalarRepl 337193323Sed/// predicate, do SROA now. 338198892Srdivackyvoid SROA::DoScalarReplacement(AllocaInst *AI, 339198892Srdivacky std::vector<AllocaInst*> &WorkList) { 340198090Srdivacky DEBUG(errs() << "Found inst to SROA: " << *AI << '\n'); 341193323Sed SmallVector<AllocaInst*, 32> ElementAllocas; 342193323Sed if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) { 343193323Sed ElementAllocas.reserve(ST->getNumContainedTypes()); 344193323Sed for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) { 345193323Sed AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0, 346193323Sed AI->getAlignment(), 347198090Srdivacky AI->getName() + "." + Twine(i), AI); 348193323Sed ElementAllocas.push_back(NA); 349193323Sed WorkList.push_back(NA); // Add to worklist for recursive processing 350193323Sed } 351193323Sed } else { 352193323Sed const ArrayType *AT = cast<ArrayType>(AI->getAllocatedType()); 353193323Sed ElementAllocas.reserve(AT->getNumElements()); 354193323Sed const Type *ElTy = AT->getElementType(); 355193323Sed for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { 356193323Sed AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(), 357198090Srdivacky AI->getName() + "." + Twine(i), AI); 358193323Sed ElementAllocas.push_back(NA); 359193323Sed WorkList.push_back(NA); // Add to worklist for recursive processing 360193323Sed } 361193323Sed } 362193323Sed 363193323Sed // Now that we have created the alloca instructions that we want to use, 364193323Sed // expand the getelementptr instructions to use them. 365193323Sed while (!AI->use_empty()) { 366193323Sed Instruction *User = cast<Instruction>(AI->use_back()); 367193323Sed if (BitCastInst *BCInst = dyn_cast<BitCastInst>(User)) { 368193323Sed RewriteBitCastUserOfAlloca(BCInst, AI, ElementAllocas); 369193323Sed BCInst->eraseFromParent(); 370193323Sed continue; 371193323Sed } 372193323Sed 373193323Sed // Replace: 374193323Sed // %res = load { i32, i32 }* %alloc 375193323Sed // with: 376193323Sed // %load.0 = load i32* %alloc.0 377193323Sed // %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0 378193323Sed // %load.1 = load i32* %alloc.1 379193323Sed // %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1 380193323Sed // (Also works for arrays instead of structs) 381193323Sed if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 382198090Srdivacky Value *Insert = UndefValue::get(LI->getType()); 383193323Sed for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) { 384193323Sed Value *Load = new LoadInst(ElementAllocas[i], "load", LI); 385193323Sed Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI); 386193323Sed } 387193323Sed LI->replaceAllUsesWith(Insert); 388193323Sed LI->eraseFromParent(); 389193323Sed continue; 390193323Sed } 391193323Sed 392193323Sed // Replace: 393193323Sed // store { i32, i32 } %val, { i32, i32 }* %alloc 394193323Sed // with: 395193323Sed // %val.0 = extractvalue { i32, i32 } %val, 0 396193323Sed // store i32 %val.0, i32* %alloc.0 397193323Sed // %val.1 = extractvalue { i32, i32 } %val, 1 398193323Sed // store i32 %val.1, i32* %alloc.1 399193323Sed // (Also works for arrays instead of structs) 400193323Sed if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 401193323Sed Value *Val = SI->getOperand(0); 402193323Sed for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) { 403193323Sed Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI); 404193323Sed new StoreInst(Extract, ElementAllocas[i], SI); 405193323Sed } 406193323Sed SI->eraseFromParent(); 407193323Sed continue; 408193323Sed } 409193323Sed 410193323Sed GetElementPtrInst *GEPI = cast<GetElementPtrInst>(User); 411193323Sed // We now know that the GEP is of the form: GEP <ptr>, 0, <cst> 412193323Sed unsigned Idx = 413193323Sed (unsigned)cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue(); 414193323Sed 415193323Sed assert(Idx < ElementAllocas.size() && "Index out of range?"); 416193323Sed AllocaInst *AllocaToUse = ElementAllocas[Idx]; 417193323Sed 418193323Sed Value *RepValue; 419193323Sed if (GEPI->getNumOperands() == 3) { 420193323Sed // Do not insert a new getelementptr instruction with zero indices, only 421193323Sed // to have it optimized out later. 422193323Sed RepValue = AllocaToUse; 423193323Sed } else { 424193323Sed // We are indexing deeply into the structure, so we still need a 425193323Sed // getelement ptr instruction to finish the indexing. This may be 426193323Sed // expanded itself once the worklist is rerun. 427193323Sed // 428193323Sed SmallVector<Value*, 8> NewArgs; 429198090Srdivacky NewArgs.push_back(Constant::getNullValue( 430198090Srdivacky Type::getInt32Ty(AI->getContext()))); 431193323Sed NewArgs.append(GEPI->op_begin()+3, GEPI->op_end()); 432193323Sed RepValue = GetElementPtrInst::Create(AllocaToUse, NewArgs.begin(), 433193323Sed NewArgs.end(), "", GEPI); 434193323Sed RepValue->takeName(GEPI); 435193323Sed } 436193323Sed 437193323Sed // If this GEP is to the start of the aggregate, check for memcpys. 438193323Sed if (Idx == 0 && GEPI->hasAllZeroIndices()) 439193323Sed RewriteBitCastUserOfAlloca(GEPI, AI, ElementAllocas); 440193323Sed 441193323Sed // Move all of the users over to the new GEP. 442193323Sed GEPI->replaceAllUsesWith(RepValue); 443193323Sed // Delete the old GEP 444193323Sed GEPI->eraseFromParent(); 445193323Sed } 446193323Sed 447193323Sed // Finally, delete the Alloca instruction 448193323Sed AI->eraseFromParent(); 449193323Sed NumReplaced++; 450193323Sed} 451193323Sed 452193323Sed/// isSafeElementUse - Check to see if this use is an allowed use for a 453193323Sed/// getelementptr instruction of an array aggregate allocation. isFirstElt 454193323Sed/// indicates whether Ptr is known to the start of the aggregate. 455198892Srdivackyvoid SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI, 456193323Sed AllocaInfo &Info) { 457193323Sed for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); 458193323Sed I != E; ++I) { 459193323Sed Instruction *User = cast<Instruction>(*I); 460193323Sed switch (User->getOpcode()) { 461193323Sed case Instruction::Load: break; 462193323Sed case Instruction::Store: 463193323Sed // Store is ok if storing INTO the pointer, not storing the pointer 464193323Sed if (User->getOperand(0) == Ptr) return MarkUnsafe(Info); 465193323Sed break; 466193323Sed case Instruction::GetElementPtr: { 467193323Sed GetElementPtrInst *GEP = cast<GetElementPtrInst>(User); 468193323Sed bool AreAllZeroIndices = isFirstElt; 469199989Srdivacky if (GEP->getNumOperands() > 1 && 470199989Srdivacky (!isa<ConstantInt>(GEP->getOperand(1)) || 471199989Srdivacky !cast<ConstantInt>(GEP->getOperand(1))->isZero())) 472199989Srdivacky // Using pointer arithmetic to navigate the array. 473199989Srdivacky return MarkUnsafe(Info); 474199989Srdivacky 475199989Srdivacky // Verify that any array subscripts are in range. 476199989Srdivacky for (gep_type_iterator GEPIt = gep_type_begin(GEP), 477199989Srdivacky E = gep_type_end(GEP); GEPIt != E; ++GEPIt) { 478199989Srdivacky // Ignore struct elements, no extra checking needed for these. 479199989Srdivacky if (isa<StructType>(*GEPIt)) 480199989Srdivacky continue; 481199989Srdivacky 482199989Srdivacky // This GEP indexes an array. Verify that this is an in-range 483199989Srdivacky // constant integer. Specifically, consider A[0][i]. We cannot know that 484199989Srdivacky // the user isn't doing invalid things like allowing i to index an 485199989Srdivacky // out-of-range subscript that accesses A[1]. Because of this, we have 486199989Srdivacky // to reject SROA of any accesses into structs where any of the 487199989Srdivacky // components are variables. 488199989Srdivacky ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand()); 489199989Srdivacky if (!IdxVal) return MarkUnsafe(Info); 490199989Srdivacky 491199989Srdivacky // Are all indices still zero? 492199989Srdivacky AreAllZeroIndices &= IdxVal->isZero(); 493199989Srdivacky 494199989Srdivacky if (const ArrayType *AT = dyn_cast<ArrayType>(*GEPIt)) { 495199989Srdivacky if (IdxVal->getZExtValue() >= AT->getNumElements()) 496199989Srdivacky return MarkUnsafe(Info); 497199989Srdivacky } else if (const VectorType *VT = dyn_cast<VectorType>(*GEPIt)) { 498199989Srdivacky if (IdxVal->getZExtValue() >= VT->getNumElements()) 499199989Srdivacky return MarkUnsafe(Info); 500199989Srdivacky } 501193323Sed } 502199989Srdivacky 503193323Sed isSafeElementUse(GEP, AreAllZeroIndices, AI, Info); 504193323Sed if (Info.isUnsafe) return; 505193323Sed break; 506193323Sed } 507193323Sed case Instruction::BitCast: 508193323Sed if (isFirstElt) { 509193323Sed isSafeUseOfBitCastedAllocation(cast<BitCastInst>(User), AI, Info); 510193323Sed if (Info.isUnsafe) return; 511193323Sed break; 512193323Sed } 513198090Srdivacky DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); 514193323Sed return MarkUnsafe(Info); 515193323Sed case Instruction::Call: 516193323Sed if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) { 517193323Sed if (isFirstElt) { 518193323Sed isSafeMemIntrinsicOnAllocation(MI, AI, I.getOperandNo(), Info); 519193323Sed if (Info.isUnsafe) return; 520193323Sed break; 521193323Sed } 522193323Sed } 523198090Srdivacky DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); 524193323Sed return MarkUnsafe(Info); 525193323Sed default: 526198090Srdivacky DEBUG(errs() << " Transformation preventing inst: " << *User << '\n'); 527193323Sed return MarkUnsafe(Info); 528193323Sed } 529193323Sed } 530193323Sed return; // All users look ok :) 531193323Sed} 532193323Sed 533193323Sed/// AllUsersAreLoads - Return true if all users of this value are loads. 534193323Sedstatic bool AllUsersAreLoads(Value *Ptr) { 535193323Sed for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end(); 536193323Sed I != E; ++I) 537193323Sed if (cast<Instruction>(*I)->getOpcode() != Instruction::Load) 538193323Sed return false; 539193323Sed return true; 540193323Sed} 541193323Sed 542200581Srdivacky/// isSafeUseOfAllocation - Check if this user is an allowed use for an 543193323Sed/// aggregate allocation. 544198892Srdivackyvoid SROA::isSafeUseOfAllocation(Instruction *User, AllocaInst *AI, 545193323Sed AllocaInfo &Info) { 546193323Sed if (BitCastInst *C = dyn_cast<BitCastInst>(User)) 547193323Sed return isSafeUseOfBitCastedAllocation(C, AI, Info); 548193323Sed 549193323Sed if (LoadInst *LI = dyn_cast<LoadInst>(User)) 550193323Sed if (!LI->isVolatile()) 551193323Sed return;// Loads (returning a first class aggregrate) are always rewritable 552193323Sed 553193323Sed if (StoreInst *SI = dyn_cast<StoreInst>(User)) 554193323Sed if (!SI->isVolatile() && SI->getOperand(0) != AI) 555193323Sed return;// Store is ok if storing INTO the pointer, not storing the pointer 556193323Sed 557193323Sed GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User); 558193323Sed if (GEPI == 0) 559193323Sed return MarkUnsafe(Info); 560193323Sed 561193323Sed gep_type_iterator I = gep_type_begin(GEPI), E = gep_type_end(GEPI); 562193323Sed 563193323Sed // The GEP is not safe to transform if not of the form "GEP <ptr>, 0, <cst>". 564193323Sed if (I == E || 565198090Srdivacky I.getOperand() != Constant::getNullValue(I.getOperand()->getType())) { 566193323Sed return MarkUnsafe(Info); 567193323Sed } 568193323Sed 569193323Sed ++I; 570193323Sed if (I == E) return MarkUnsafe(Info); // ran out of GEP indices?? 571193323Sed 572193323Sed bool IsAllZeroIndices = true; 573193323Sed 574193323Sed // If the first index is a non-constant index into an array, see if we can 575193323Sed // handle it as a special case. 576193323Sed if (const ArrayType *AT = dyn_cast<ArrayType>(*I)) { 577193323Sed if (!isa<ConstantInt>(I.getOperand())) { 578193323Sed IsAllZeroIndices = 0; 579193323Sed uint64_t NumElements = AT->getNumElements(); 580193323Sed 581193323Sed // If this is an array index and the index is not constant, we cannot 582193323Sed // promote... that is unless the array has exactly one or two elements in 583193323Sed // it, in which case we CAN promote it, but we have to canonicalize this 584193323Sed // out if this is the only problem. 585193323Sed if ((NumElements == 1 || NumElements == 2) && 586193323Sed AllUsersAreLoads(GEPI)) { 587193323Sed Info.needsCleanup = true; 588193323Sed return; // Canonicalization required! 589193323Sed } 590193323Sed return MarkUnsafe(Info); 591193323Sed } 592193323Sed } 593193323Sed 594193323Sed // Walk through the GEP type indices, checking the types that this indexes 595193323Sed // into. 596193323Sed for (; I != E; ++I) { 597193323Sed // Ignore struct elements, no extra checking needed for these. 598193323Sed if (isa<StructType>(*I)) 599193323Sed continue; 600193323Sed 601193323Sed ConstantInt *IdxVal = dyn_cast<ConstantInt>(I.getOperand()); 602193323Sed if (!IdxVal) return MarkUnsafe(Info); 603193323Sed 604193323Sed // Are all indices still zero? 605193323Sed IsAllZeroIndices &= IdxVal->isZero(); 606193323Sed 607193323Sed if (const ArrayType *AT = dyn_cast<ArrayType>(*I)) { 608193323Sed // This GEP indexes an array. Verify that this is an in-range constant 609193323Sed // integer. Specifically, consider A[0][i]. We cannot know that the user 610193323Sed // isn't doing invalid things like allowing i to index an out-of-range 611193323Sed // subscript that accesses A[1]. Because of this, we have to reject SROA 612200581Srdivacky // of any accesses into structs where any of the components are variables. 613193323Sed if (IdxVal->getZExtValue() >= AT->getNumElements()) 614193323Sed return MarkUnsafe(Info); 615193323Sed } else if (const VectorType *VT = dyn_cast<VectorType>(*I)) { 616193323Sed if (IdxVal->getZExtValue() >= VT->getNumElements()) 617193323Sed return MarkUnsafe(Info); 618193323Sed } 619193323Sed } 620193323Sed 621193323Sed // If there are any non-simple uses of this getelementptr, make sure to reject 622193323Sed // them. 623193323Sed return isSafeElementUse(GEPI, IsAllZeroIndices, AI, Info); 624193323Sed} 625193323Sed 626200581Srdivacky/// isSafeMemIntrinsicOnAllocation - Check if the specified memory 627193323Sed/// intrinsic can be promoted by SROA. At this point, we know that the operand 628193323Sed/// of the memintrinsic is a pointer to the beginning of the allocation. 629198892Srdivackyvoid SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI, 630193323Sed unsigned OpNo, AllocaInfo &Info) { 631193323Sed // If not constant length, give up. 632193323Sed ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength()); 633193323Sed if (!Length) return MarkUnsafe(Info); 634193323Sed 635193323Sed // If not the whole aggregate, give up. 636193323Sed if (Length->getZExtValue() != 637193323Sed TD->getTypeAllocSize(AI->getType()->getElementType())) 638193323Sed return MarkUnsafe(Info); 639193323Sed 640193323Sed // We only know about memcpy/memset/memmove. 641193323Sed if (!isa<MemIntrinsic>(MI)) 642193323Sed return MarkUnsafe(Info); 643193323Sed 644193323Sed // Otherwise, we can transform it. Determine whether this is a memcpy/set 645193323Sed // into or out of the aggregate. 646193323Sed if (OpNo == 1) 647193323Sed Info.isMemCpyDst = true; 648193323Sed else { 649193323Sed assert(OpNo == 2); 650193323Sed Info.isMemCpySrc = true; 651193323Sed } 652193323Sed} 653193323Sed 654200581Srdivacky/// isSafeUseOfBitCastedAllocation - Check if all users of this bitcast 655200581Srdivacky/// from an alloca are safe for SROA of that alloca. 656198892Srdivackyvoid SROA::isSafeUseOfBitCastedAllocation(BitCastInst *BC, AllocaInst *AI, 657193323Sed AllocaInfo &Info) { 658193323Sed for (Value::use_iterator UI = BC->use_begin(), E = BC->use_end(); 659193323Sed UI != E; ++UI) { 660193323Sed if (BitCastInst *BCU = dyn_cast<BitCastInst>(UI)) { 661193323Sed isSafeUseOfBitCastedAllocation(BCU, AI, Info); 662193323Sed } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(UI)) { 663193323Sed isSafeMemIntrinsicOnAllocation(MI, AI, UI.getOperandNo(), Info); 664193323Sed } else if (StoreInst *SI = dyn_cast<StoreInst>(UI)) { 665193323Sed if (SI->isVolatile()) 666193323Sed return MarkUnsafe(Info); 667193323Sed 668193323Sed // If storing the entire alloca in one chunk through a bitcasted pointer 669193323Sed // to integer, we can transform it. This happens (for example) when you 670193323Sed // cast a {i32,i32}* to i64* and store through it. This is similar to the 671193323Sed // memcpy case and occurs in various "byval" cases and emulated memcpys. 672193323Sed if (isa<IntegerType>(SI->getOperand(0)->getType()) && 673193323Sed TD->getTypeAllocSize(SI->getOperand(0)->getType()) == 674193323Sed TD->getTypeAllocSize(AI->getType()->getElementType())) { 675193323Sed Info.isMemCpyDst = true; 676193323Sed continue; 677193323Sed } 678193323Sed return MarkUnsafe(Info); 679193323Sed } else if (LoadInst *LI = dyn_cast<LoadInst>(UI)) { 680193323Sed if (LI->isVolatile()) 681193323Sed return MarkUnsafe(Info); 682193323Sed 683193323Sed // If loading the entire alloca in one chunk through a bitcasted pointer 684193323Sed // to integer, we can transform it. This happens (for example) when you 685193323Sed // cast a {i32,i32}* to i64* and load through it. This is similar to the 686193323Sed // memcpy case and occurs in various "byval" cases and emulated memcpys. 687193323Sed if (isa<IntegerType>(LI->getType()) && 688193323Sed TD->getTypeAllocSize(LI->getType()) == 689193323Sed TD->getTypeAllocSize(AI->getType()->getElementType())) { 690193323Sed Info.isMemCpySrc = true; 691193323Sed continue; 692193323Sed } 693193323Sed return MarkUnsafe(Info); 694193323Sed } else if (isa<DbgInfoIntrinsic>(UI)) { 695193323Sed // If one user is DbgInfoIntrinsic then check if all users are 696193323Sed // DbgInfoIntrinsics. 697193323Sed if (OnlyUsedByDbgInfoIntrinsics(BC)) { 698193323Sed Info.needsCleanup = true; 699193323Sed return; 700193323Sed } 701193323Sed else 702193323Sed MarkUnsafe(Info); 703193323Sed } 704193323Sed else { 705193323Sed return MarkUnsafe(Info); 706193323Sed } 707193323Sed if (Info.isUnsafe) return; 708193323Sed } 709193323Sed} 710193323Sed 711193323Sed/// RewriteBitCastUserOfAlloca - BCInst (transitively) bitcasts AI, or indexes 712193323Sed/// to its first element. Transform users of the cast to use the new values 713193323Sed/// instead. 714198892Srdivackyvoid SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocaInst *AI, 715193323Sed SmallVector<AllocaInst*, 32> &NewElts) { 716193323Sed Value::use_iterator UI = BCInst->use_begin(), UE = BCInst->use_end(); 717193323Sed while (UI != UE) { 718193323Sed Instruction *User = cast<Instruction>(*UI++); 719193323Sed if (BitCastInst *BCU = dyn_cast<BitCastInst>(User)) { 720193323Sed RewriteBitCastUserOfAlloca(BCU, AI, NewElts); 721193323Sed if (BCU->use_empty()) BCU->eraseFromParent(); 722193323Sed continue; 723193323Sed } 724193323Sed 725193323Sed if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) { 726193323Sed // This must be memcpy/memmove/memset of the entire aggregate. 727193323Sed // Split into one per element. 728193323Sed RewriteMemIntrinUserOfAlloca(MI, BCInst, AI, NewElts); 729193323Sed continue; 730193323Sed } 731193323Sed 732193323Sed if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 733193323Sed // If this is a store of the entire alloca from an integer, rewrite it. 734193323Sed RewriteStoreUserOfWholeAlloca(SI, AI, NewElts); 735193323Sed continue; 736193323Sed } 737193323Sed 738193323Sed if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 739193323Sed // If this is a load of the entire alloca to an integer, rewrite it. 740193323Sed RewriteLoadUserOfWholeAlloca(LI, AI, NewElts); 741193323Sed continue; 742193323Sed } 743193323Sed 744193323Sed // Otherwise it must be some other user of a gep of the first pointer. Just 745193323Sed // leave these alone. 746193323Sed continue; 747193323Sed } 748193323Sed} 749193323Sed 750193323Sed/// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI. 751193323Sed/// Rewrite it to copy or set the elements of the scalarized memory. 752193323Sedvoid SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst, 753198892Srdivacky AllocaInst *AI, 754193323Sed SmallVector<AllocaInst*, 32> &NewElts) { 755193323Sed 756193323Sed // If this is a memcpy/memmove, construct the other pointer as the 757193323Sed // appropriate type. The "Other" pointer is the pointer that goes to memory 758193323Sed // that doesn't have anything to do with the alloca that we are promoting. For 759193323Sed // memset, this Value* stays null. 760193323Sed Value *OtherPtr = 0; 761198090Srdivacky LLVMContext &Context = MI->getContext(); 762193323Sed unsigned MemAlignment = MI->getAlignment(); 763193323Sed if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy 764193323Sed if (BCInst == MTI->getRawDest()) 765193323Sed OtherPtr = MTI->getRawSource(); 766193323Sed else { 767193323Sed assert(BCInst == MTI->getRawSource()); 768193323Sed OtherPtr = MTI->getRawDest(); 769193323Sed } 770193323Sed } 771200581Srdivacky 772200581Srdivacky // Keep track of the other intrinsic argument, so it can be removed if it 773200581Srdivacky // is dead when the intrinsic is replaced. 774200581Srdivacky Value *PossiblyDead = OtherPtr; 775193323Sed 776193323Sed // If there is an other pointer, we want to convert it to the same pointer 777193323Sed // type as AI has, so we can GEP through it safely. 778193323Sed if (OtherPtr) { 779193323Sed // It is likely that OtherPtr is a bitcast, if so, remove it. 780193323Sed if (BitCastInst *BC = dyn_cast<BitCastInst>(OtherPtr)) 781193323Sed OtherPtr = BC->getOperand(0); 782193323Sed // All zero GEPs are effectively bitcasts. 783193323Sed if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(OtherPtr)) 784193323Sed if (GEP->hasAllZeroIndices()) 785193323Sed OtherPtr = GEP->getOperand(0); 786193323Sed 787193323Sed if (ConstantExpr *BCE = dyn_cast<ConstantExpr>(OtherPtr)) 788193323Sed if (BCE->getOpcode() == Instruction::BitCast) 789193323Sed OtherPtr = BCE->getOperand(0); 790193323Sed 791193323Sed // If the pointer is not the right type, insert a bitcast to the right 792193323Sed // type. 793193323Sed if (OtherPtr->getType() != AI->getType()) 794193323Sed OtherPtr = new BitCastInst(OtherPtr, AI->getType(), OtherPtr->getName(), 795193323Sed MI); 796193323Sed } 797193323Sed 798193323Sed // Process each element of the aggregate. 799193323Sed Value *TheFn = MI->getOperand(0); 800193323Sed const Type *BytePtrTy = MI->getRawDest()->getType(); 801193323Sed bool SROADest = MI->getRawDest() == BCInst; 802193323Sed 803198090Srdivacky Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext())); 804193323Sed 805193323Sed for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 806193323Sed // If this is a memcpy/memmove, emit a GEP of the other element address. 807193323Sed Value *OtherElt = 0; 808193323Sed unsigned OtherEltAlign = MemAlignment; 809193323Sed 810193323Sed if (OtherPtr) { 811198090Srdivacky Value *Idx[2] = { Zero, 812198090Srdivacky ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) }; 813193323Sed OtherElt = GetElementPtrInst::Create(OtherPtr, Idx, Idx + 2, 814198090Srdivacky OtherPtr->getNameStr()+"."+Twine(i), 815193323Sed MI); 816193323Sed uint64_t EltOffset; 817193323Sed const PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType()); 818193323Sed if (const StructType *ST = 819193323Sed dyn_cast<StructType>(OtherPtrTy->getElementType())) { 820193323Sed EltOffset = TD->getStructLayout(ST)->getElementOffset(i); 821193323Sed } else { 822193323Sed const Type *EltTy = 823193323Sed cast<SequentialType>(OtherPtr->getType())->getElementType(); 824193323Sed EltOffset = TD->getTypeAllocSize(EltTy)*i; 825193323Sed } 826193323Sed 827193323Sed // The alignment of the other pointer is the guaranteed alignment of the 828193323Sed // element, which is affected by both the known alignment of the whole 829193323Sed // mem intrinsic and the alignment of the element. If the alignment of 830193323Sed // the memcpy (f.e.) is 32 but the element is at a 4-byte offset, then the 831193323Sed // known alignment is just 4 bytes. 832193323Sed OtherEltAlign = (unsigned)MinAlign(OtherEltAlign, EltOffset); 833193323Sed } 834193323Sed 835193323Sed Value *EltPtr = NewElts[i]; 836193323Sed const Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType(); 837193323Sed 838193323Sed // If we got down to a scalar, insert a load or store as appropriate. 839193323Sed if (EltTy->isSingleValueType()) { 840193323Sed if (isa<MemTransferInst>(MI)) { 841193323Sed if (SROADest) { 842193323Sed // From Other to Alloca. 843193323Sed Value *Elt = new LoadInst(OtherElt, "tmp", false, OtherEltAlign, MI); 844193323Sed new StoreInst(Elt, EltPtr, MI); 845193323Sed } else { 846193323Sed // From Alloca to Other. 847193323Sed Value *Elt = new LoadInst(EltPtr, "tmp", MI); 848193323Sed new StoreInst(Elt, OtherElt, false, OtherEltAlign, MI); 849193323Sed } 850193323Sed continue; 851193323Sed } 852193323Sed assert(isa<MemSetInst>(MI)); 853193323Sed 854193323Sed // If the stored element is zero (common case), just store a null 855193323Sed // constant. 856193323Sed Constant *StoreVal; 857193323Sed if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getOperand(2))) { 858193323Sed if (CI->isZero()) { 859198090Srdivacky StoreVal = Constant::getNullValue(EltTy); // 0.0, null, 0, <0,0> 860193323Sed } else { 861193323Sed // If EltTy is a vector type, get the element type. 862194612Sed const Type *ValTy = EltTy->getScalarType(); 863194612Sed 864193323Sed // Construct an integer with the right value. 865193323Sed unsigned EltSize = TD->getTypeSizeInBits(ValTy); 866193323Sed APInt OneVal(EltSize, CI->getZExtValue()); 867193323Sed APInt TotalVal(OneVal); 868193323Sed // Set each byte. 869193323Sed for (unsigned i = 0; 8*i < EltSize; ++i) { 870193323Sed TotalVal = TotalVal.shl(8); 871193323Sed TotalVal |= OneVal; 872193323Sed } 873193323Sed 874193323Sed // Convert the integer value to the appropriate type. 875198090Srdivacky StoreVal = ConstantInt::get(Context, TotalVal); 876193323Sed if (isa<PointerType>(ValTy)) 877198090Srdivacky StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy); 878193323Sed else if (ValTy->isFloatingPoint()) 879198090Srdivacky StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy); 880193323Sed assert(StoreVal->getType() == ValTy && "Type mismatch!"); 881193323Sed 882193323Sed // If the requested value was a vector constant, create it. 883193323Sed if (EltTy != ValTy) { 884193323Sed unsigned NumElts = cast<VectorType>(ValTy)->getNumElements(); 885193323Sed SmallVector<Constant*, 16> Elts(NumElts, StoreVal); 886198090Srdivacky StoreVal = ConstantVector::get(&Elts[0], NumElts); 887193323Sed } 888193323Sed } 889193323Sed new StoreInst(StoreVal, EltPtr, MI); 890193323Sed continue; 891193323Sed } 892193323Sed // Otherwise, if we're storing a byte variable, use a memset call for 893193323Sed // this element. 894193323Sed } 895193323Sed 896193323Sed // Cast the element pointer to BytePtrTy. 897193323Sed if (EltPtr->getType() != BytePtrTy) 898193323Sed EltPtr = new BitCastInst(EltPtr, BytePtrTy, EltPtr->getNameStr(), MI); 899193323Sed 900193323Sed // Cast the other pointer (if we have one) to BytePtrTy. 901193323Sed if (OtherElt && OtherElt->getType() != BytePtrTy) 902193323Sed OtherElt = new BitCastInst(OtherElt, BytePtrTy,OtherElt->getNameStr(), 903193323Sed MI); 904193323Sed 905193323Sed unsigned EltSize = TD->getTypeAllocSize(EltTy); 906193323Sed 907193323Sed // Finally, insert the meminst for this element. 908193323Sed if (isa<MemTransferInst>(MI)) { 909193323Sed Value *Ops[] = { 910193323Sed SROADest ? EltPtr : OtherElt, // Dest ptr 911193323Sed SROADest ? OtherElt : EltPtr, // Src ptr 912198090Srdivacky ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size 913198090Srdivacky // Align 914198090Srdivacky ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign) 915193323Sed }; 916193323Sed CallInst::Create(TheFn, Ops, Ops + 4, "", MI); 917193323Sed } else { 918193323Sed assert(isa<MemSetInst>(MI)); 919193323Sed Value *Ops[] = { 920193323Sed EltPtr, MI->getOperand(2), // Dest, Value, 921198090Srdivacky ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size 922193323Sed Zero // Align 923193323Sed }; 924193323Sed CallInst::Create(TheFn, Ops, Ops + 4, "", MI); 925193323Sed } 926193323Sed } 927193323Sed MI->eraseFromParent(); 928200581Srdivacky if (PossiblyDead) 929200581Srdivacky RecursivelyDeleteTriviallyDeadInstructions(PossiblyDead); 930193323Sed} 931193323Sed 932200581Srdivacky/// RewriteStoreUserOfWholeAlloca - We found a store of an integer that 933193323Sed/// overwrites the entire allocation. Extract out the pieces of the stored 934193323Sed/// integer and store them individually. 935198892Srdivackyvoid SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI, 936193323Sed SmallVector<AllocaInst*, 32> &NewElts){ 937193323Sed // Extract each element out of the integer according to its structure offset 938193323Sed // and store the element value to the individual alloca. 939193323Sed Value *SrcVal = SI->getOperand(0); 940193323Sed const Type *AllocaEltTy = AI->getType()->getElementType(); 941193323Sed uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); 942193323Sed 943193323Sed // If this isn't a store of an integer to the whole alloca, it may be a store 944193323Sed // to the first element. Just ignore the store in this case and normal SROA 945193323Sed // will handle it. 946193323Sed if (!isa<IntegerType>(SrcVal->getType()) || 947193323Sed TD->getTypeAllocSizeInBits(SrcVal->getType()) != AllocaSizeBits) 948193323Sed return; 949193323Sed // Handle tail padding by extending the operand 950193323Sed if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits) 951195340Sed SrcVal = new ZExtInst(SrcVal, 952198090Srdivacky IntegerType::get(SI->getContext(), AllocaSizeBits), 953198090Srdivacky "", SI); 954193323Sed 955198090Srdivacky DEBUG(errs() << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << '\n' << *SI 956198090Srdivacky << '\n'); 957193323Sed 958193323Sed // There are two forms here: AI could be an array or struct. Both cases 959193323Sed // have different ways to compute the element offset. 960193323Sed if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { 961193323Sed const StructLayout *Layout = TD->getStructLayout(EltSTy); 962193323Sed 963193323Sed for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 964193323Sed // Get the number of bits to shift SrcVal to get the value. 965193323Sed const Type *FieldTy = EltSTy->getElementType(i); 966193323Sed uint64_t Shift = Layout->getElementOffsetInBits(i); 967193323Sed 968193323Sed if (TD->isBigEndian()) 969193323Sed Shift = AllocaSizeBits-Shift-TD->getTypeAllocSizeInBits(FieldTy); 970193323Sed 971193323Sed Value *EltVal = SrcVal; 972193323Sed if (Shift) { 973198090Srdivacky Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift); 974193323Sed EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal, 975193323Sed "sroa.store.elt", SI); 976193323Sed } 977193323Sed 978193323Sed // Truncate down to an integer of the right size. 979193323Sed uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy); 980193323Sed 981193323Sed // Ignore zero sized fields like {}, they obviously contain no data. 982193323Sed if (FieldSizeBits == 0) continue; 983193323Sed 984193323Sed if (FieldSizeBits != AllocaSizeBits) 985195340Sed EltVal = new TruncInst(EltVal, 986198090Srdivacky IntegerType::get(SI->getContext(), FieldSizeBits), 987198090Srdivacky "", SI); 988193323Sed Value *DestField = NewElts[i]; 989193323Sed if (EltVal->getType() == FieldTy) { 990193323Sed // Storing to an integer field of this size, just do it. 991193323Sed } else if (FieldTy->isFloatingPoint() || isa<VectorType>(FieldTy)) { 992193323Sed // Bitcast to the right element type (for fp/vector values). 993193323Sed EltVal = new BitCastInst(EltVal, FieldTy, "", SI); 994193323Sed } else { 995193323Sed // Otherwise, bitcast the dest pointer (for aggregates). 996193323Sed DestField = new BitCastInst(DestField, 997198090Srdivacky PointerType::getUnqual(EltVal->getType()), 998193323Sed "", SI); 999193323Sed } 1000193323Sed new StoreInst(EltVal, DestField, SI); 1001193323Sed } 1002193323Sed 1003193323Sed } else { 1004193323Sed const ArrayType *ATy = cast<ArrayType>(AllocaEltTy); 1005193323Sed const Type *ArrayEltTy = ATy->getElementType(); 1006193323Sed uint64_t ElementOffset = TD->getTypeAllocSizeInBits(ArrayEltTy); 1007193323Sed uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy); 1008193323Sed 1009193323Sed uint64_t Shift; 1010193323Sed 1011193323Sed if (TD->isBigEndian()) 1012193323Sed Shift = AllocaSizeBits-ElementOffset; 1013193323Sed else 1014193323Sed Shift = 0; 1015193323Sed 1016193323Sed for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 1017193323Sed // Ignore zero sized fields like {}, they obviously contain no data. 1018193323Sed if (ElementSizeBits == 0) continue; 1019193323Sed 1020193323Sed Value *EltVal = SrcVal; 1021193323Sed if (Shift) { 1022198090Srdivacky Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift); 1023193323Sed EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal, 1024193323Sed "sroa.store.elt", SI); 1025193323Sed } 1026193323Sed 1027193323Sed // Truncate down to an integer of the right size. 1028193323Sed if (ElementSizeBits != AllocaSizeBits) 1029195340Sed EltVal = new TruncInst(EltVal, 1030198090Srdivacky IntegerType::get(SI->getContext(), 1031198090Srdivacky ElementSizeBits),"",SI); 1032193323Sed Value *DestField = NewElts[i]; 1033193323Sed if (EltVal->getType() == ArrayEltTy) { 1034193323Sed // Storing to an integer field of this size, just do it. 1035193323Sed } else if (ArrayEltTy->isFloatingPoint() || isa<VectorType>(ArrayEltTy)) { 1036193323Sed // Bitcast to the right element type (for fp/vector values). 1037193323Sed EltVal = new BitCastInst(EltVal, ArrayEltTy, "", SI); 1038193323Sed } else { 1039193323Sed // Otherwise, bitcast the dest pointer (for aggregates). 1040193323Sed DestField = new BitCastInst(DestField, 1041198090Srdivacky PointerType::getUnqual(EltVal->getType()), 1042193323Sed "", SI); 1043193323Sed } 1044193323Sed new StoreInst(EltVal, DestField, SI); 1045193323Sed 1046193323Sed if (TD->isBigEndian()) 1047193323Sed Shift -= ElementOffset; 1048193323Sed else 1049193323Sed Shift += ElementOffset; 1050193323Sed } 1051193323Sed } 1052193323Sed 1053193323Sed SI->eraseFromParent(); 1054193323Sed} 1055193323Sed 1056200581Srdivacky/// RewriteLoadUserOfWholeAlloca - We found a load of the entire allocation to 1057193323Sed/// an integer. Load the individual pieces to form the aggregate value. 1058198892Srdivackyvoid SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI, 1059193323Sed SmallVector<AllocaInst*, 32> &NewElts) { 1060193323Sed // Extract each element out of the NewElts according to its structure offset 1061193323Sed // and form the result value. 1062193323Sed const Type *AllocaEltTy = AI->getType()->getElementType(); 1063193323Sed uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy); 1064193323Sed 1065193323Sed // If this isn't a load of the whole alloca to an integer, it may be a load 1066193323Sed // of the first element. Just ignore the load in this case and normal SROA 1067193323Sed // will handle it. 1068193323Sed if (!isa<IntegerType>(LI->getType()) || 1069193323Sed TD->getTypeAllocSizeInBits(LI->getType()) != AllocaSizeBits) 1070193323Sed return; 1071193323Sed 1072198090Srdivacky DEBUG(errs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI 1073198090Srdivacky << '\n'); 1074193323Sed 1075193323Sed // There are two forms here: AI could be an array or struct. Both cases 1076193323Sed // have different ways to compute the element offset. 1077193323Sed const StructLayout *Layout = 0; 1078193323Sed uint64_t ArrayEltBitOffset = 0; 1079193323Sed if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) { 1080193323Sed Layout = TD->getStructLayout(EltSTy); 1081193323Sed } else { 1082193323Sed const Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType(); 1083193323Sed ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy); 1084193323Sed } 1085193323Sed 1086198090Srdivacky Value *ResultVal = 1087198090Srdivacky Constant::getNullValue(IntegerType::get(LI->getContext(), AllocaSizeBits)); 1088198090Srdivacky 1089193323Sed for (unsigned i = 0, e = NewElts.size(); i != e; ++i) { 1090193323Sed // Load the value from the alloca. If the NewElt is an aggregate, cast 1091193323Sed // the pointer to an integer of the same size before doing the load. 1092193323Sed Value *SrcField = NewElts[i]; 1093193323Sed const Type *FieldTy = 1094193323Sed cast<PointerType>(SrcField->getType())->getElementType(); 1095193323Sed uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy); 1096193323Sed 1097193323Sed // Ignore zero sized fields like {}, they obviously contain no data. 1098193323Sed if (FieldSizeBits == 0) continue; 1099193323Sed 1100198090Srdivacky const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(), 1101198090Srdivacky FieldSizeBits); 1102193323Sed if (!isa<IntegerType>(FieldTy) && !FieldTy->isFloatingPoint() && 1103193323Sed !isa<VectorType>(FieldTy)) 1104195340Sed SrcField = new BitCastInst(SrcField, 1105198090Srdivacky PointerType::getUnqual(FieldIntTy), 1106193323Sed "", LI); 1107193323Sed SrcField = new LoadInst(SrcField, "sroa.load.elt", LI); 1108193323Sed 1109193323Sed // If SrcField is a fp or vector of the right size but that isn't an 1110193323Sed // integer type, bitcast to an integer so we can shift it. 1111193323Sed if (SrcField->getType() != FieldIntTy) 1112193323Sed SrcField = new BitCastInst(SrcField, FieldIntTy, "", LI); 1113193323Sed 1114193323Sed // Zero extend the field to be the same size as the final alloca so that 1115193323Sed // we can shift and insert it. 1116193323Sed if (SrcField->getType() != ResultVal->getType()) 1117193323Sed SrcField = new ZExtInst(SrcField, ResultVal->getType(), "", LI); 1118193323Sed 1119193323Sed // Determine the number of bits to shift SrcField. 1120193323Sed uint64_t Shift; 1121193323Sed if (Layout) // Struct case. 1122193323Sed Shift = Layout->getElementOffsetInBits(i); 1123193323Sed else // Array case. 1124193323Sed Shift = i*ArrayEltBitOffset; 1125193323Sed 1126193323Sed if (TD->isBigEndian()) 1127193323Sed Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth(); 1128193323Sed 1129193323Sed if (Shift) { 1130198090Srdivacky Value *ShiftVal = ConstantInt::get(SrcField->getType(), Shift); 1131193323Sed SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI); 1132193323Sed } 1133193323Sed 1134193323Sed ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI); 1135193323Sed } 1136193323Sed 1137193323Sed // Handle tail padding by truncating the result 1138193323Sed if (TD->getTypeSizeInBits(LI->getType()) != AllocaSizeBits) 1139193323Sed ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI); 1140193323Sed 1141193323Sed LI->replaceAllUsesWith(ResultVal); 1142193323Sed LI->eraseFromParent(); 1143193323Sed} 1144193323Sed 1145193323Sed 1146193323Sed/// HasPadding - Return true if the specified type has any structure or 1147193323Sed/// alignment padding, false otherwise. 1148193323Sedstatic bool HasPadding(const Type *Ty, const TargetData &TD) { 1149193323Sed if (const StructType *STy = dyn_cast<StructType>(Ty)) { 1150193323Sed const StructLayout *SL = TD.getStructLayout(STy); 1151193323Sed unsigned PrevFieldBitOffset = 0; 1152193323Sed for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 1153193323Sed unsigned FieldBitOffset = SL->getElementOffsetInBits(i); 1154193323Sed 1155193323Sed // Padding in sub-elements? 1156193323Sed if (HasPadding(STy->getElementType(i), TD)) 1157193323Sed return true; 1158193323Sed 1159193323Sed // Check to see if there is any padding between this element and the 1160193323Sed // previous one. 1161193323Sed if (i) { 1162193323Sed unsigned PrevFieldEnd = 1163193323Sed PrevFieldBitOffset+TD.getTypeSizeInBits(STy->getElementType(i-1)); 1164193323Sed if (PrevFieldEnd < FieldBitOffset) 1165193323Sed return true; 1166193323Sed } 1167193323Sed 1168193323Sed PrevFieldBitOffset = FieldBitOffset; 1169193323Sed } 1170193323Sed 1171193323Sed // Check for tail padding. 1172193323Sed if (unsigned EltCount = STy->getNumElements()) { 1173193323Sed unsigned PrevFieldEnd = PrevFieldBitOffset + 1174193323Sed TD.getTypeSizeInBits(STy->getElementType(EltCount-1)); 1175193323Sed if (PrevFieldEnd < SL->getSizeInBits()) 1176193323Sed return true; 1177193323Sed } 1178193323Sed 1179193323Sed } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { 1180193323Sed return HasPadding(ATy->getElementType(), TD); 1181193323Sed } else if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) { 1182193323Sed return HasPadding(VTy->getElementType(), TD); 1183193323Sed } 1184193323Sed return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty); 1185193323Sed} 1186193323Sed 1187193323Sed/// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of 1188193323Sed/// an aggregate can be broken down into elements. Return 0 if not, 3 if safe, 1189193323Sed/// or 1 if safe after canonicalization has been performed. 1190198892Srdivackyint SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) { 1191193323Sed // Loop over the use list of the alloca. We can only transform it if all of 1192193323Sed // the users are safe to transform. 1193193323Sed AllocaInfo Info; 1194193323Sed 1195193323Sed for (Value::use_iterator I = AI->use_begin(), E = AI->use_end(); 1196193323Sed I != E; ++I) { 1197193323Sed isSafeUseOfAllocation(cast<Instruction>(*I), AI, Info); 1198193323Sed if (Info.isUnsafe) { 1199198090Srdivacky DEBUG(errs() << "Cannot transform: " << *AI << "\n due to user: " 1200198090Srdivacky << **I << '\n'); 1201193323Sed return 0; 1202193323Sed } 1203193323Sed } 1204193323Sed 1205193323Sed // Okay, we know all the users are promotable. If the aggregate is a memcpy 1206193323Sed // source and destination, we have to be careful. In particular, the memcpy 1207193323Sed // could be moving around elements that live in structure padding of the LLVM 1208193323Sed // types, but may actually be used. In these cases, we refuse to promote the 1209193323Sed // struct. 1210193323Sed if (Info.isMemCpySrc && Info.isMemCpyDst && 1211193323Sed HasPadding(AI->getType()->getElementType(), *TD)) 1212193323Sed return 0; 1213193323Sed 1214193323Sed // If we require cleanup, return 1, otherwise return 3. 1215193323Sed return Info.needsCleanup ? 1 : 3; 1216193323Sed} 1217193323Sed 1218200581Srdivacky/// CleanupGEP - GEP is used by an Alloca, which can be promoted after the GEP 1219193323Sed/// is canonicalized here. 1220193323Sedvoid SROA::CleanupGEP(GetElementPtrInst *GEPI) { 1221193323Sed gep_type_iterator I = gep_type_begin(GEPI); 1222193323Sed ++I; 1223193323Sed 1224193323Sed const ArrayType *AT = dyn_cast<ArrayType>(*I); 1225193323Sed if (!AT) 1226193323Sed return; 1227193323Sed 1228193323Sed uint64_t NumElements = AT->getNumElements(); 1229193323Sed 1230193323Sed if (isa<ConstantInt>(I.getOperand())) 1231193323Sed return; 1232193323Sed 1233193323Sed if (NumElements == 1) { 1234198090Srdivacky GEPI->setOperand(2, 1235198090Srdivacky Constant::getNullValue(Type::getInt32Ty(GEPI->getContext()))); 1236193323Sed return; 1237193323Sed } 1238193323Sed 1239193323Sed assert(NumElements == 2 && "Unhandled case!"); 1240193323Sed // All users of the GEP must be loads. At each use of the GEP, insert 1241193323Sed // two loads of the appropriate indexed GEP and select between them. 1242198090Srdivacky Value *IsOne = new ICmpInst(GEPI, ICmpInst::ICMP_NE, I.getOperand(), 1243198090Srdivacky Constant::getNullValue(I.getOperand()->getType()), 1244198090Srdivacky "isone"); 1245193323Sed // Insert the new GEP instructions, which are properly indexed. 1246193323Sed SmallVector<Value*, 8> Indices(GEPI->op_begin()+1, GEPI->op_end()); 1247198090Srdivacky Indices[1] = Constant::getNullValue(Type::getInt32Ty(GEPI->getContext())); 1248193323Sed Value *ZeroIdx = GetElementPtrInst::Create(GEPI->getOperand(0), 1249193323Sed Indices.begin(), 1250193323Sed Indices.end(), 1251193323Sed GEPI->getName()+".0", GEPI); 1252198090Srdivacky Indices[1] = ConstantInt::get(Type::getInt32Ty(GEPI->getContext()), 1); 1253193323Sed Value *OneIdx = GetElementPtrInst::Create(GEPI->getOperand(0), 1254193323Sed Indices.begin(), 1255193323Sed Indices.end(), 1256193323Sed GEPI->getName()+".1", GEPI); 1257193323Sed // Replace all loads of the variable index GEP with loads from both 1258193323Sed // indexes and a select. 1259193323Sed while (!GEPI->use_empty()) { 1260193323Sed LoadInst *LI = cast<LoadInst>(GEPI->use_back()); 1261193323Sed Value *Zero = new LoadInst(ZeroIdx, LI->getName()+".0", LI); 1262193323Sed Value *One = new LoadInst(OneIdx , LI->getName()+".1", LI); 1263193323Sed Value *R = SelectInst::Create(IsOne, One, Zero, LI->getName(), LI); 1264193323Sed LI->replaceAllUsesWith(R); 1265193323Sed LI->eraseFromParent(); 1266193323Sed } 1267193323Sed GEPI->eraseFromParent(); 1268193323Sed} 1269193323Sed 1270193323Sed 1271193323Sed/// CleanupAllocaUsers - If SROA reported that it can promote the specified 1272193323Sed/// allocation, but only if cleaned up, perform the cleanups required. 1273198892Srdivackyvoid SROA::CleanupAllocaUsers(AllocaInst *AI) { 1274193323Sed // At this point, we know that the end result will be SROA'd and promoted, so 1275193323Sed // we can insert ugly code if required so long as sroa+mem2reg will clean it 1276193323Sed // up. 1277193323Sed for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); 1278193323Sed UI != E; ) { 1279193323Sed User *U = *UI++; 1280193323Sed if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U)) 1281193323Sed CleanupGEP(GEPI); 1282193630Sed else { 1283193630Sed Instruction *I = cast<Instruction>(U); 1284193323Sed SmallVector<DbgInfoIntrinsic *, 2> DbgInUses; 1285193323Sed if (!isa<StoreInst>(I) && OnlyUsedByDbgInfoIntrinsics(I, &DbgInUses)) { 1286193323Sed // Safe to remove debug info uses. 1287193323Sed while (!DbgInUses.empty()) { 1288193323Sed DbgInfoIntrinsic *DI = DbgInUses.back(); DbgInUses.pop_back(); 1289193323Sed DI->eraseFromParent(); 1290193323Sed } 1291193323Sed I->eraseFromParent(); 1292193323Sed } 1293193323Sed } 1294193323Sed } 1295193323Sed} 1296193323Sed 1297193323Sed/// MergeInType - Add the 'In' type to the accumulated type (Accum) so far at 1298193323Sed/// the offset specified by Offset (which is specified in bytes). 1299193323Sed/// 1300193323Sed/// There are two cases we handle here: 1301193323Sed/// 1) A union of vector types of the same size and potentially its elements. 1302193323Sed/// Here we turn element accesses into insert/extract element operations. 1303193323Sed/// This promotes a <4 x float> with a store of float to the third element 1304193323Sed/// into a <4 x float> that uses insert element. 1305193323Sed/// 2) A fully general blob of memory, which we turn into some (potentially 1306193323Sed/// large) integer type with extract and insert operations where the loads 1307193323Sed/// and stores would mutate the memory. 1308193323Sedstatic void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy, 1309195340Sed unsigned AllocaSize, const TargetData &TD, 1310198090Srdivacky LLVMContext &Context) { 1311193323Sed // If this could be contributing to a vector, analyze it. 1312198090Srdivacky if (VecTy != Type::getVoidTy(Context)) { // either null or a vector type. 1313193323Sed 1314193323Sed // If the In type is a vector that is the same size as the alloca, see if it 1315193323Sed // matches the existing VecTy. 1316193323Sed if (const VectorType *VInTy = dyn_cast<VectorType>(In)) { 1317193323Sed if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) { 1318193323Sed // If we're storing/loading a vector of the right size, allow it as a 1319193323Sed // vector. If this the first vector we see, remember the type so that 1320193323Sed // we know the element size. 1321193323Sed if (VecTy == 0) 1322193323Sed VecTy = VInTy; 1323193323Sed return; 1324193323Sed } 1325198090Srdivacky } else if (In->isFloatTy() || In->isDoubleTy() || 1326193323Sed (isa<IntegerType>(In) && In->getPrimitiveSizeInBits() >= 8 && 1327193323Sed isPowerOf2_32(In->getPrimitiveSizeInBits()))) { 1328193323Sed // If we're accessing something that could be an element of a vector, see 1329193323Sed // if the implied vector agrees with what we already have and if Offset is 1330193323Sed // compatible with it. 1331193323Sed unsigned EltSize = In->getPrimitiveSizeInBits()/8; 1332193323Sed if (Offset % EltSize == 0 && 1333193323Sed AllocaSize % EltSize == 0 && 1334193323Sed (VecTy == 0 || 1335193323Sed cast<VectorType>(VecTy)->getElementType() 1336193323Sed ->getPrimitiveSizeInBits()/8 == EltSize)) { 1337193323Sed if (VecTy == 0) 1338198090Srdivacky VecTy = VectorType::get(In, AllocaSize/EltSize); 1339193323Sed return; 1340193323Sed } 1341193323Sed } 1342193323Sed } 1343193323Sed 1344193323Sed // Otherwise, we have a case that we can't handle with an optimized vector 1345193323Sed // form. We can still turn this into a large integer. 1346198090Srdivacky VecTy = Type::getVoidTy(Context); 1347193323Sed} 1348193323Sed 1349193323Sed/// CanConvertToScalar - V is a pointer. If we can convert the pointee and all 1350200581Srdivacky/// its accesses to a single vector type, return true and set VecTy to 1351193323Sed/// the new type. If we could convert the alloca into a single promotable 1352193323Sed/// integer, return true but set VecTy to VoidTy. Further, if the use is not a 1353193323Sed/// completely trivial use that mem2reg could promote, set IsNotTrivial. Offset 1354193323Sed/// is the current offset from the base of the alloca being analyzed. 1355193323Sed/// 1356193323Sed/// If we see at least one access to the value that is as a vector type, set the 1357193323Sed/// SawVec flag. 1358193323Sedbool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy, 1359193323Sed bool &SawVec, uint64_t Offset, 1360193323Sed unsigned AllocaSize) { 1361193323Sed for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { 1362193323Sed Instruction *User = cast<Instruction>(*UI); 1363193323Sed 1364193323Sed if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 1365193323Sed // Don't break volatile loads. 1366193323Sed if (LI->isVolatile()) 1367193323Sed return false; 1368198090Srdivacky MergeInType(LI->getType(), Offset, VecTy, 1369198090Srdivacky AllocaSize, *TD, V->getContext()); 1370193323Sed SawVec |= isa<VectorType>(LI->getType()); 1371193323Sed continue; 1372193323Sed } 1373193323Sed 1374193323Sed if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 1375193323Sed // Storing the pointer, not into the value? 1376193323Sed if (SI->getOperand(0) == V || SI->isVolatile()) return 0; 1377195340Sed MergeInType(SI->getOperand(0)->getType(), Offset, 1378198090Srdivacky VecTy, AllocaSize, *TD, V->getContext()); 1379193323Sed SawVec |= isa<VectorType>(SI->getOperand(0)->getType()); 1380193323Sed continue; 1381193323Sed } 1382193323Sed 1383193323Sed if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) { 1384193323Sed if (!CanConvertToScalar(BCI, IsNotTrivial, VecTy, SawVec, Offset, 1385193323Sed AllocaSize)) 1386193323Sed return false; 1387193323Sed IsNotTrivial = true; 1388193323Sed continue; 1389193323Sed } 1390193323Sed 1391193323Sed if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) { 1392193323Sed // If this is a GEP with a variable indices, we can't handle it. 1393193323Sed if (!GEP->hasAllConstantIndices()) 1394193323Sed return false; 1395193323Sed 1396193323Sed // Compute the offset that this GEP adds to the pointer. 1397193323Sed SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); 1398193323Sed uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(), 1399193323Sed &Indices[0], Indices.size()); 1400193323Sed // See if all uses can be converted. 1401193323Sed if (!CanConvertToScalar(GEP, IsNotTrivial, VecTy, SawVec,Offset+GEPOffset, 1402193323Sed AllocaSize)) 1403193323Sed return false; 1404193323Sed IsNotTrivial = true; 1405193323Sed continue; 1406193323Sed } 1407193323Sed 1408193323Sed // If this is a constant sized memset of a constant value (e.g. 0) we can 1409193323Sed // handle it. 1410193323Sed if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) { 1411193323Sed // Store of constant value and constant size. 1412193323Sed if (isa<ConstantInt>(MSI->getValue()) && 1413193323Sed isa<ConstantInt>(MSI->getLength())) { 1414193323Sed IsNotTrivial = true; 1415193323Sed continue; 1416193323Sed } 1417193323Sed } 1418193323Sed 1419193323Sed // If this is a memcpy or memmove into or out of the whole allocation, we 1420193323Sed // can handle it like a load or store of the scalar type. 1421193323Sed if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) { 1422193323Sed if (ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength())) 1423193323Sed if (Len->getZExtValue() == AllocaSize && Offset == 0) { 1424193323Sed IsNotTrivial = true; 1425193323Sed continue; 1426193323Sed } 1427193323Sed } 1428193323Sed 1429193323Sed // Ignore dbg intrinsic. 1430193323Sed if (isa<DbgInfoIntrinsic>(User)) 1431193323Sed continue; 1432193323Sed 1433193323Sed // Otherwise, we cannot handle this! 1434193323Sed return false; 1435193323Sed } 1436193323Sed 1437193323Sed return true; 1438193323Sed} 1439193323Sed 1440193323Sed/// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca 1441193323Sed/// directly. This happens when we are converting an "integer union" to a 1442193323Sed/// single integer scalar, or when we are converting a "vector union" to a 1443193323Sed/// vector with insert/extractelement instructions. 1444193323Sed/// 1445193323Sed/// Offset is an offset from the original alloca, in bits that need to be 1446193323Sed/// shifted to the right. By the end of this, there should be no uses of Ptr. 1447193323Sedvoid SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) { 1448193323Sed while (!Ptr->use_empty()) { 1449193323Sed Instruction *User = cast<Instruction>(Ptr->use_back()); 1450193323Sed 1451193323Sed if (BitCastInst *CI = dyn_cast<BitCastInst>(User)) { 1452193323Sed ConvertUsesToScalar(CI, NewAI, Offset); 1453193323Sed CI->eraseFromParent(); 1454193323Sed continue; 1455193323Sed } 1456193323Sed 1457193323Sed if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) { 1458193323Sed // Compute the offset that this GEP adds to the pointer. 1459193323Sed SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end()); 1460193323Sed uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(), 1461193323Sed &Indices[0], Indices.size()); 1462193323Sed ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8); 1463193323Sed GEP->eraseFromParent(); 1464193323Sed continue; 1465193323Sed } 1466193323Sed 1467193323Sed IRBuilder<> Builder(User->getParent(), User); 1468193323Sed 1469193323Sed if (LoadInst *LI = dyn_cast<LoadInst>(User)) { 1470193323Sed // The load is a bit extract from NewAI shifted right by Offset bits. 1471193323Sed Value *LoadedVal = Builder.CreateLoad(NewAI, "tmp"); 1472193323Sed Value *NewLoadVal 1473193323Sed = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, Builder); 1474193323Sed LI->replaceAllUsesWith(NewLoadVal); 1475193323Sed LI->eraseFromParent(); 1476193323Sed continue; 1477193323Sed } 1478193323Sed 1479193323Sed if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 1480193323Sed assert(SI->getOperand(0) != Ptr && "Consistency error!"); 1481198090Srdivacky // FIXME: Remove once builder has Twine API. 1482200581Srdivacky Value *Old = Builder.CreateLoad(NewAI, 1483200581Srdivacky (NewAI->getName()+".in").str().c_str()); 1484193323Sed Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset, 1485193323Sed Builder); 1486193323Sed Builder.CreateStore(New, NewAI); 1487193323Sed SI->eraseFromParent(); 1488193323Sed continue; 1489193323Sed } 1490193323Sed 1491193323Sed // If this is a constant sized memset of a constant value (e.g. 0) we can 1492193323Sed // transform it into a store of the expanded constant value. 1493193323Sed if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) { 1494193323Sed assert(MSI->getRawDest() == Ptr && "Consistency error!"); 1495193323Sed unsigned NumBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue(); 1496193323Sed if (NumBytes != 0) { 1497193323Sed unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue(); 1498193323Sed 1499193323Sed // Compute the value replicated the right number of times. 1500193323Sed APInt APVal(NumBytes*8, Val); 1501193323Sed 1502193323Sed // Splat the value if non-zero. 1503193323Sed if (Val) 1504193323Sed for (unsigned i = 1; i != NumBytes; ++i) 1505193323Sed APVal |= APVal << 8; 1506193323Sed 1507198090Srdivacky // FIXME: Remove once builder has Twine API. 1508200581Srdivacky Value *Old = Builder.CreateLoad(NewAI, 1509200581Srdivacky (NewAI->getName()+".in").str().c_str()); 1510198090Srdivacky Value *New = ConvertScalar_InsertValue( 1511198090Srdivacky ConstantInt::get(User->getContext(), APVal), 1512195340Sed Old, Offset, Builder); 1513193323Sed Builder.CreateStore(New, NewAI); 1514193323Sed } 1515193323Sed MSI->eraseFromParent(); 1516193323Sed continue; 1517193323Sed } 1518193323Sed 1519193323Sed // If this is a memcpy or memmove into or out of the whole allocation, we 1520193323Sed // can handle it like a load or store of the scalar type. 1521193323Sed if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) { 1522193323Sed assert(Offset == 0 && "must be store to start of alloca"); 1523193323Sed 1524193323Sed // If the source and destination are both to the same alloca, then this is 1525193323Sed // a noop copy-to-self, just delete it. Otherwise, emit a load and store 1526193323Sed // as appropriate. 1527193323Sed AllocaInst *OrigAI = cast<AllocaInst>(Ptr->getUnderlyingObject()); 1528193323Sed 1529193323Sed if (MTI->getSource()->getUnderlyingObject() != OrigAI) { 1530193323Sed // Dest must be OrigAI, change this to be a load from the original 1531193323Sed // pointer (bitcasted), then a store to our new alloca. 1532193323Sed assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?"); 1533193323Sed Value *SrcPtr = MTI->getSource(); 1534193323Sed SrcPtr = Builder.CreateBitCast(SrcPtr, NewAI->getType()); 1535193323Sed 1536193323Sed LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval"); 1537193323Sed SrcVal->setAlignment(MTI->getAlignment()); 1538193323Sed Builder.CreateStore(SrcVal, NewAI); 1539193323Sed } else if (MTI->getDest()->getUnderlyingObject() != OrigAI) { 1540193323Sed // Src must be OrigAI, change this to be a load from NewAI then a store 1541193323Sed // through the original dest pointer (bitcasted). 1542193323Sed assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?"); 1543193323Sed LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval"); 1544193323Sed 1545193323Sed Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), NewAI->getType()); 1546193323Sed StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr); 1547193323Sed NewStore->setAlignment(MTI->getAlignment()); 1548193323Sed } else { 1549193323Sed // Noop transfer. Src == Dst 1550193323Sed } 1551193323Sed 1552193323Sed 1553193323Sed MTI->eraseFromParent(); 1554193323Sed continue; 1555193323Sed } 1556193323Sed 1557193323Sed // If user is a dbg info intrinsic then it is safe to remove it. 1558193323Sed if (isa<DbgInfoIntrinsic>(User)) { 1559193323Sed User->eraseFromParent(); 1560193323Sed continue; 1561193323Sed } 1562193323Sed 1563198090Srdivacky llvm_unreachable("Unsupported operation!"); 1564193323Sed } 1565193323Sed} 1566193323Sed 1567193323Sed/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer 1568193323Sed/// or vector value FromVal, extracting the bits from the offset specified by 1569193323Sed/// Offset. This returns the value, which is of type ToType. 1570193323Sed/// 1571193323Sed/// This happens when we are converting an "integer union" to a single 1572193323Sed/// integer scalar, or when we are converting a "vector union" to a vector with 1573193323Sed/// insert/extractelement instructions. 1574193323Sed/// 1575193323Sed/// Offset is an offset from the original alloca, in bits that need to be 1576193323Sed/// shifted to the right. 1577193323SedValue *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType, 1578193323Sed uint64_t Offset, IRBuilder<> &Builder) { 1579193323Sed // If the load is of the whole new alloca, no conversion is needed. 1580193323Sed if (FromVal->getType() == ToType && Offset == 0) 1581193323Sed return FromVal; 1582193323Sed 1583193323Sed // If the result alloca is a vector type, this is either an element 1584193323Sed // access or a bitcast to another vector type of the same size. 1585193323Sed if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) { 1586193323Sed if (isa<VectorType>(ToType)) 1587193323Sed return Builder.CreateBitCast(FromVal, ToType, "tmp"); 1588193323Sed 1589193323Sed // Otherwise it must be an element access. 1590193323Sed unsigned Elt = 0; 1591193323Sed if (Offset) { 1592193323Sed unsigned EltSize = TD->getTypeAllocSizeInBits(VTy->getElementType()); 1593193323Sed Elt = Offset/EltSize; 1594193323Sed assert(EltSize*Elt == Offset && "Invalid modulus in validity checking"); 1595193323Sed } 1596193323Sed // Return the element extracted out of it. 1597198090Srdivacky Value *V = Builder.CreateExtractElement(FromVal, ConstantInt::get( 1598198090Srdivacky Type::getInt32Ty(FromVal->getContext()), Elt), "tmp"); 1599193323Sed if (V->getType() != ToType) 1600193323Sed V = Builder.CreateBitCast(V, ToType, "tmp"); 1601193323Sed return V; 1602193323Sed } 1603193323Sed 1604193323Sed // If ToType is a first class aggregate, extract out each of the pieces and 1605193323Sed // use insertvalue's to form the FCA. 1606193323Sed if (const StructType *ST = dyn_cast<StructType>(ToType)) { 1607193323Sed const StructLayout &Layout = *TD->getStructLayout(ST); 1608198090Srdivacky Value *Res = UndefValue::get(ST); 1609193323Sed for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { 1610193323Sed Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i), 1611193323Sed Offset+Layout.getElementOffsetInBits(i), 1612193323Sed Builder); 1613193323Sed Res = Builder.CreateInsertValue(Res, Elt, i, "tmp"); 1614193323Sed } 1615193323Sed return Res; 1616193323Sed } 1617193323Sed 1618193323Sed if (const ArrayType *AT = dyn_cast<ArrayType>(ToType)) { 1619193323Sed uint64_t EltSize = TD->getTypeAllocSizeInBits(AT->getElementType()); 1620198090Srdivacky Value *Res = UndefValue::get(AT); 1621193323Sed for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { 1622193323Sed Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(), 1623193323Sed Offset+i*EltSize, Builder); 1624193323Sed Res = Builder.CreateInsertValue(Res, Elt, i, "tmp"); 1625193323Sed } 1626193323Sed return Res; 1627193323Sed } 1628193323Sed 1629193323Sed // Otherwise, this must be a union that was converted to an integer value. 1630193323Sed const IntegerType *NTy = cast<IntegerType>(FromVal->getType()); 1631193323Sed 1632193323Sed // If this is a big-endian system and the load is narrower than the 1633193323Sed // full alloca type, we need to do a shift to get the right bits. 1634193323Sed int ShAmt = 0; 1635193323Sed if (TD->isBigEndian()) { 1636193323Sed // On big-endian machines, the lowest bit is stored at the bit offset 1637193323Sed // from the pointer given by getTypeStoreSizeInBits. This matters for 1638193323Sed // integers with a bitwidth that is not a multiple of 8. 1639193323Sed ShAmt = TD->getTypeStoreSizeInBits(NTy) - 1640193323Sed TD->getTypeStoreSizeInBits(ToType) - Offset; 1641193323Sed } else { 1642193323Sed ShAmt = Offset; 1643193323Sed } 1644193323Sed 1645193323Sed // Note: we support negative bitwidths (with shl) which are not defined. 1646193323Sed // We do this to support (f.e.) loads off the end of a structure where 1647193323Sed // only some bits are used. 1648193323Sed if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth()) 1649195340Sed FromVal = Builder.CreateLShr(FromVal, 1650198090Srdivacky ConstantInt::get(FromVal->getType(), 1651193323Sed ShAmt), "tmp"); 1652193323Sed else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth()) 1653195340Sed FromVal = Builder.CreateShl(FromVal, 1654198090Srdivacky ConstantInt::get(FromVal->getType(), 1655193323Sed -ShAmt), "tmp"); 1656193323Sed 1657193323Sed // Finally, unconditionally truncate the integer to the right width. 1658193323Sed unsigned LIBitWidth = TD->getTypeSizeInBits(ToType); 1659193323Sed if (LIBitWidth < NTy->getBitWidth()) 1660195340Sed FromVal = 1661198090Srdivacky Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(), 1662198090Srdivacky LIBitWidth), "tmp"); 1663193323Sed else if (LIBitWidth > NTy->getBitWidth()) 1664195340Sed FromVal = 1665198090Srdivacky Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(), 1666198090Srdivacky LIBitWidth), "tmp"); 1667193323Sed 1668193323Sed // If the result is an integer, this is a trunc or bitcast. 1669193323Sed if (isa<IntegerType>(ToType)) { 1670193323Sed // Should be done. 1671193323Sed } else if (ToType->isFloatingPoint() || isa<VectorType>(ToType)) { 1672193323Sed // Just do a bitcast, we know the sizes match up. 1673193323Sed FromVal = Builder.CreateBitCast(FromVal, ToType, "tmp"); 1674193323Sed } else { 1675193323Sed // Otherwise must be a pointer. 1676193323Sed FromVal = Builder.CreateIntToPtr(FromVal, ToType, "tmp"); 1677193323Sed } 1678193323Sed assert(FromVal->getType() == ToType && "Didn't convert right?"); 1679193323Sed return FromVal; 1680193323Sed} 1681193323Sed 1682193323Sed/// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer 1683193323Sed/// or vector value "Old" at the offset specified by Offset. 1684193323Sed/// 1685193323Sed/// This happens when we are converting an "integer union" to a 1686193323Sed/// single integer scalar, or when we are converting a "vector union" to a 1687193323Sed/// vector with insert/extractelement instructions. 1688193323Sed/// 1689193323Sed/// Offset is an offset from the original alloca, in bits that need to be 1690193323Sed/// shifted to the right. 1691193323SedValue *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old, 1692193323Sed uint64_t Offset, IRBuilder<> &Builder) { 1693193323Sed 1694193323Sed // Convert the stored type to the actual type, shift it left to insert 1695193323Sed // then 'or' into place. 1696193323Sed const Type *AllocaType = Old->getType(); 1697198090Srdivacky LLVMContext &Context = Old->getContext(); 1698193323Sed 1699193323Sed if (const VectorType *VTy = dyn_cast<VectorType>(AllocaType)) { 1700193323Sed uint64_t VecSize = TD->getTypeAllocSizeInBits(VTy); 1701193323Sed uint64_t ValSize = TD->getTypeAllocSizeInBits(SV->getType()); 1702193323Sed 1703193323Sed // Changing the whole vector with memset or with an access of a different 1704193323Sed // vector type? 1705193323Sed if (ValSize == VecSize) 1706193323Sed return Builder.CreateBitCast(SV, AllocaType, "tmp"); 1707193323Sed 1708193323Sed uint64_t EltSize = TD->getTypeAllocSizeInBits(VTy->getElementType()); 1709193323Sed 1710193323Sed // Must be an element insertion. 1711193323Sed unsigned Elt = Offset/EltSize; 1712193323Sed 1713193323Sed if (SV->getType() != VTy->getElementType()) 1714193323Sed SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp"); 1715193323Sed 1716193323Sed SV = Builder.CreateInsertElement(Old, SV, 1717198090Srdivacky ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt), 1718193323Sed "tmp"); 1719193323Sed return SV; 1720193323Sed } 1721193323Sed 1722193323Sed // If SV is a first-class aggregate value, insert each value recursively. 1723193323Sed if (const StructType *ST = dyn_cast<StructType>(SV->getType())) { 1724193323Sed const StructLayout &Layout = *TD->getStructLayout(ST); 1725193323Sed for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) { 1726193323Sed Value *Elt = Builder.CreateExtractValue(SV, i, "tmp"); 1727193323Sed Old = ConvertScalar_InsertValue(Elt, Old, 1728193323Sed Offset+Layout.getElementOffsetInBits(i), 1729193323Sed Builder); 1730193323Sed } 1731193323Sed return Old; 1732193323Sed } 1733193323Sed 1734193323Sed if (const ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) { 1735193323Sed uint64_t EltSize = TD->getTypeAllocSizeInBits(AT->getElementType()); 1736193323Sed for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) { 1737193323Sed Value *Elt = Builder.CreateExtractValue(SV, i, "tmp"); 1738193323Sed Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, Builder); 1739193323Sed } 1740193323Sed return Old; 1741193323Sed } 1742193323Sed 1743193323Sed // If SV is a float, convert it to the appropriate integer type. 1744193323Sed // If it is a pointer, do the same. 1745193323Sed unsigned SrcWidth = TD->getTypeSizeInBits(SV->getType()); 1746193323Sed unsigned DestWidth = TD->getTypeSizeInBits(AllocaType); 1747193323Sed unsigned SrcStoreWidth = TD->getTypeStoreSizeInBits(SV->getType()); 1748193323Sed unsigned DestStoreWidth = TD->getTypeStoreSizeInBits(AllocaType); 1749193323Sed if (SV->getType()->isFloatingPoint() || isa<VectorType>(SV->getType())) 1750198090Srdivacky SV = Builder.CreateBitCast(SV, 1751198090Srdivacky IntegerType::get(SV->getContext(),SrcWidth), "tmp"); 1752193323Sed else if (isa<PointerType>(SV->getType())) 1753198090Srdivacky SV = Builder.CreatePtrToInt(SV, TD->getIntPtrType(SV->getContext()), "tmp"); 1754193323Sed 1755193323Sed // Zero extend or truncate the value if needed. 1756193323Sed if (SV->getType() != AllocaType) { 1757193323Sed if (SV->getType()->getPrimitiveSizeInBits() < 1758193323Sed AllocaType->getPrimitiveSizeInBits()) 1759193323Sed SV = Builder.CreateZExt(SV, AllocaType, "tmp"); 1760193323Sed else { 1761193323Sed // Truncation may be needed if storing more than the alloca can hold 1762193323Sed // (undefined behavior). 1763193323Sed SV = Builder.CreateTrunc(SV, AllocaType, "tmp"); 1764193323Sed SrcWidth = DestWidth; 1765193323Sed SrcStoreWidth = DestStoreWidth; 1766193323Sed } 1767193323Sed } 1768193323Sed 1769193323Sed // If this is a big-endian system and the store is narrower than the 1770193323Sed // full alloca type, we need to do a shift to get the right bits. 1771193323Sed int ShAmt = 0; 1772193323Sed if (TD->isBigEndian()) { 1773193323Sed // On big-endian machines, the lowest bit is stored at the bit offset 1774193323Sed // from the pointer given by getTypeStoreSizeInBits. This matters for 1775193323Sed // integers with a bitwidth that is not a multiple of 8. 1776193323Sed ShAmt = DestStoreWidth - SrcStoreWidth - Offset; 1777193323Sed } else { 1778193323Sed ShAmt = Offset; 1779193323Sed } 1780193323Sed 1781193323Sed // Note: we support negative bitwidths (with shr) which are not defined. 1782193323Sed // We do this to support (f.e.) stores off the end of a structure where 1783193323Sed // only some bits in the structure are set. 1784193323Sed APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth)); 1785193323Sed if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) { 1786198090Srdivacky SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(), 1787195340Sed ShAmt), "tmp"); 1788193323Sed Mask <<= ShAmt; 1789193323Sed } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) { 1790198090Srdivacky SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(), 1791195340Sed -ShAmt), "tmp"); 1792193323Sed Mask = Mask.lshr(-ShAmt); 1793193323Sed } 1794193323Sed 1795193323Sed // Mask out the bits we are about to insert from the old value, and or 1796193323Sed // in the new bits. 1797193323Sed if (SrcWidth != DestWidth) { 1798193323Sed assert(DestWidth > SrcWidth); 1799198090Srdivacky Old = Builder.CreateAnd(Old, ConstantInt::get(Context, ~Mask), "mask"); 1800193323Sed SV = Builder.CreateOr(Old, SV, "ins"); 1801193323Sed } 1802193323Sed return SV; 1803193323Sed} 1804193323Sed 1805193323Sed 1806193323Sed 1807193323Sed/// PointsToConstantGlobal - Return true if V (possibly indirectly) points to 1808193323Sed/// some part of a constant global variable. This intentionally only accepts 1809193323Sed/// constant expressions because we don't can't rewrite arbitrary instructions. 1810193323Sedstatic bool PointsToConstantGlobal(Value *V) { 1811193323Sed if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) 1812193323Sed return GV->isConstant(); 1813193323Sed if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) 1814193323Sed if (CE->getOpcode() == Instruction::BitCast || 1815193323Sed CE->getOpcode() == Instruction::GetElementPtr) 1816193323Sed return PointsToConstantGlobal(CE->getOperand(0)); 1817193323Sed return false; 1818193323Sed} 1819193323Sed 1820193323Sed/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived) 1821193323Sed/// pointer to an alloca. Ignore any reads of the pointer, return false if we 1822193323Sed/// see any stores or other unknown uses. If we see pointer arithmetic, keep 1823193323Sed/// track of whether it moves the pointer (with isOffset) but otherwise traverse 1824193323Sed/// the uses. If we see a memcpy/memmove that targets an unoffseted pointer to 1825193323Sed/// the alloca, and if the source pointer is a pointer to a constant global, we 1826193323Sed/// can optimize this. 1827193323Sedstatic bool isOnlyCopiedFromConstantGlobal(Value *V, Instruction *&TheCopy, 1828193323Sed bool isOffset) { 1829193323Sed for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) { 1830193323Sed if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) 1831193323Sed // Ignore non-volatile loads, they are always ok. 1832193323Sed if (!LI->isVolatile()) 1833193323Sed continue; 1834193323Sed 1835193323Sed if (BitCastInst *BCI = dyn_cast<BitCastInst>(*UI)) { 1836193323Sed // If uses of the bitcast are ok, we are ok. 1837193323Sed if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset)) 1838193323Sed return false; 1839193323Sed continue; 1840193323Sed } 1841193323Sed if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(*UI)) { 1842193323Sed // If the GEP has all zero indices, it doesn't offset the pointer. If it 1843193323Sed // doesn't, it does. 1844193323Sed if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy, 1845193323Sed isOffset || !GEP->hasAllZeroIndices())) 1846193323Sed return false; 1847193323Sed continue; 1848193323Sed } 1849193323Sed 1850193323Sed // If this is isn't our memcpy/memmove, reject it as something we can't 1851193323Sed // handle. 1852193323Sed if (!isa<MemTransferInst>(*UI)) 1853193323Sed return false; 1854193323Sed 1855193323Sed // If we already have seen a copy, reject the second one. 1856193323Sed if (TheCopy) return false; 1857193323Sed 1858193323Sed // If the pointer has been offset from the start of the alloca, we can't 1859193323Sed // safely handle this. 1860193323Sed if (isOffset) return false; 1861193323Sed 1862193323Sed // If the memintrinsic isn't using the alloca as the dest, reject it. 1863193323Sed if (UI.getOperandNo() != 1) return false; 1864193323Sed 1865193323Sed MemIntrinsic *MI = cast<MemIntrinsic>(*UI); 1866193323Sed 1867193323Sed // If the source of the memcpy/move is not a constant global, reject it. 1868193323Sed if (!PointsToConstantGlobal(MI->getOperand(2))) 1869193323Sed return false; 1870193323Sed 1871193323Sed // Otherwise, the transform is safe. Remember the copy instruction. 1872193323Sed TheCopy = MI; 1873193323Sed } 1874193323Sed return true; 1875193323Sed} 1876193323Sed 1877193323Sed/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only 1878193323Sed/// modified by a copy from a constant global. If we can prove this, we can 1879193323Sed/// replace any uses of the alloca with uses of the global directly. 1880198892SrdivackyInstruction *SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI) { 1881193323Sed Instruction *TheCopy = 0; 1882193323Sed if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false)) 1883193323Sed return TheCopy; 1884193323Sed return 0; 1885193323Sed} 1886