ScalarReplAggregates.cpp revision 200581
1193323Sed//===- ScalarReplAggregates.cpp - Scalar Replacement of Aggregates --------===//
2193323Sed//
3193323Sed//                     The LLVM Compiler Infrastructure
4193323Sed//
5193323Sed// This file is distributed under the University of Illinois Open Source
6193323Sed// License. See LICENSE.TXT for details.
7193323Sed//
8193323Sed//===----------------------------------------------------------------------===//
9193323Sed//
10193323Sed// This transformation implements the well known scalar replacement of
11193323Sed// aggregates transformation.  This xform breaks up alloca instructions of
12193323Sed// aggregate type (structure or array) into individual alloca instructions for
13193323Sed// each member (if possible).  Then, if possible, it transforms the individual
14193323Sed// alloca instructions into nice clean scalar SSA form.
15193323Sed//
16193323Sed// This combines a simple SRoA algorithm with the Mem2Reg algorithm because
17193323Sed// often interact, especially for C++ programs.  As such, iterating between
18193323Sed// SRoA, then Mem2Reg until we run out of things to promote works well.
19193323Sed//
20193323Sed//===----------------------------------------------------------------------===//
21193323Sed
22193323Sed#define DEBUG_TYPE "scalarrepl"
23193323Sed#include "llvm/Transforms/Scalar.h"
24193323Sed#include "llvm/Constants.h"
25193323Sed#include "llvm/DerivedTypes.h"
26193323Sed#include "llvm/Function.h"
27193323Sed#include "llvm/GlobalVariable.h"
28193323Sed#include "llvm/Instructions.h"
29193323Sed#include "llvm/IntrinsicInst.h"
30195340Sed#include "llvm/LLVMContext.h"
31193323Sed#include "llvm/Pass.h"
32193323Sed#include "llvm/Analysis/Dominators.h"
33193323Sed#include "llvm/Target/TargetData.h"
34193323Sed#include "llvm/Transforms/Utils/PromoteMemToReg.h"
35193323Sed#include "llvm/Transforms/Utils/Local.h"
36193323Sed#include "llvm/Support/Debug.h"
37198090Srdivacky#include "llvm/Support/ErrorHandling.h"
38193323Sed#include "llvm/Support/GetElementPtrTypeIterator.h"
39193323Sed#include "llvm/Support/IRBuilder.h"
40193323Sed#include "llvm/Support/MathExtras.h"
41198090Srdivacky#include "llvm/Support/raw_ostream.h"
42193323Sed#include "llvm/ADT/SmallVector.h"
43193323Sed#include "llvm/ADT/Statistic.h"
44193323Sedusing namespace llvm;
45193323Sed
46193323SedSTATISTIC(NumReplaced,  "Number of allocas broken up");
47193323SedSTATISTIC(NumPromoted,  "Number of allocas promoted");
48193323SedSTATISTIC(NumConverted, "Number of aggregates converted to scalar");
49193323SedSTATISTIC(NumGlobals,   "Number of allocas copied from constant global");
50193323Sed
51193323Sednamespace {
52198090Srdivacky  struct SROA : public FunctionPass {
53193323Sed    static char ID; // Pass identification, replacement for typeid
54193323Sed    explicit SROA(signed T = -1) : FunctionPass(&ID) {
55193323Sed      if (T == -1)
56193323Sed        SRThreshold = 128;
57193323Sed      else
58193323Sed        SRThreshold = T;
59193323Sed    }
60193323Sed
61193323Sed    bool runOnFunction(Function &F);
62193323Sed
63193323Sed    bool performScalarRepl(Function &F);
64193323Sed    bool performPromotion(Function &F);
65193323Sed
66193323Sed    // getAnalysisUsage - This pass does not require any passes, but we know it
67193323Sed    // will not alter the CFG, so say so.
68193323Sed    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
69193323Sed      AU.addRequired<DominatorTree>();
70193323Sed      AU.addRequired<DominanceFrontier>();
71193323Sed      AU.setPreservesCFG();
72193323Sed    }
73193323Sed
74193323Sed  private:
75193323Sed    TargetData *TD;
76193323Sed
77193323Sed    /// AllocaInfo - When analyzing uses of an alloca instruction, this captures
78193323Sed    /// information about the uses.  All these fields are initialized to false
79193323Sed    /// and set to true when something is learned.
80193323Sed    struct AllocaInfo {
81193323Sed      /// isUnsafe - This is set to true if the alloca cannot be SROA'd.
82193323Sed      bool isUnsafe : 1;
83193323Sed
84193323Sed      /// needsCleanup - This is set to true if there is some use of the alloca
85193323Sed      /// that requires cleanup.
86193323Sed      bool needsCleanup : 1;
87193323Sed
88193323Sed      /// isMemCpySrc - This is true if this aggregate is memcpy'd from.
89193323Sed      bool isMemCpySrc : 1;
90193323Sed
91193323Sed      /// isMemCpyDst - This is true if this aggregate is memcpy'd into.
92193323Sed      bool isMemCpyDst : 1;
93193323Sed
94193323Sed      AllocaInfo()
95193323Sed        : isUnsafe(false), needsCleanup(false),
96193323Sed          isMemCpySrc(false), isMemCpyDst(false) {}
97193323Sed    };
98193323Sed
99193323Sed    unsigned SRThreshold;
100193323Sed
101193323Sed    void MarkUnsafe(AllocaInfo &I) { I.isUnsafe = true; }
102193323Sed
103198892Srdivacky    int isSafeAllocaToScalarRepl(AllocaInst *AI);
104193323Sed
105198892Srdivacky    void isSafeUseOfAllocation(Instruction *User, AllocaInst *AI,
106193323Sed                               AllocaInfo &Info);
107198892Srdivacky    void isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI,
108200581Srdivacky                          AllocaInfo &Info);
109198892Srdivacky    void isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI,
110193323Sed                                        unsigned OpNo, AllocaInfo &Info);
111198892Srdivacky    void isSafeUseOfBitCastedAllocation(BitCastInst *User, AllocaInst *AI,
112193323Sed                                        AllocaInfo &Info);
113193323Sed
114198892Srdivacky    void DoScalarReplacement(AllocaInst *AI,
115198892Srdivacky                             std::vector<AllocaInst*> &WorkList);
116193323Sed    void CleanupGEP(GetElementPtrInst *GEP);
117198892Srdivacky    void CleanupAllocaUsers(AllocaInst *AI);
118198892Srdivacky    AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocaInst *Base);
119193323Sed
120198892Srdivacky    void RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocaInst *AI,
121193323Sed                                    SmallVector<AllocaInst*, 32> &NewElts);
122193323Sed
123193323Sed    void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
124198892Srdivacky                                      AllocaInst *AI,
125193323Sed                                      SmallVector<AllocaInst*, 32> &NewElts);
126198892Srdivacky    void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
127193323Sed                                       SmallVector<AllocaInst*, 32> &NewElts);
128198892Srdivacky    void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
129193323Sed                                      SmallVector<AllocaInst*, 32> &NewElts);
130193323Sed
131193323Sed    bool CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy,
132193323Sed                            bool &SawVec, uint64_t Offset, unsigned AllocaSize);
133193323Sed    void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset);
134193323Sed    Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType,
135193323Sed                                     uint64_t Offset, IRBuilder<> &Builder);
136193323Sed    Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal,
137193323Sed                                     uint64_t Offset, IRBuilder<> &Builder);
138198892Srdivacky    static Instruction *isOnlyCopiedFromConstantGlobal(AllocaInst *AI);
139193323Sed  };
140193323Sed}
141193323Sed
142193323Sedchar SROA::ID = 0;
143193323Sedstatic RegisterPass<SROA> X("scalarrepl", "Scalar Replacement of Aggregates");
144193323Sed
145193323Sed// Public interface to the ScalarReplAggregates pass
146193323SedFunctionPass *llvm::createScalarReplAggregatesPass(signed int Threshold) {
147193323Sed  return new SROA(Threshold);
148193323Sed}
149193323Sed
150193323Sed
151193323Sedbool SROA::runOnFunction(Function &F) {
152198090Srdivacky  TD = getAnalysisIfAvailable<TargetData>();
153198090Srdivacky
154193323Sed  bool Changed = performPromotion(F);
155198090Srdivacky
156198090Srdivacky  // FIXME: ScalarRepl currently depends on TargetData more than it
157198090Srdivacky  // theoretically needs to. It should be refactored in order to support
158198090Srdivacky  // target-independent IR. Until this is done, just skip the actual
159198090Srdivacky  // scalar-replacement portion of this pass.
160198090Srdivacky  if (!TD) return Changed;
161198090Srdivacky
162193323Sed  while (1) {
163193323Sed    bool LocalChange = performScalarRepl(F);
164193323Sed    if (!LocalChange) break;   // No need to repromote if no scalarrepl
165193323Sed    Changed = true;
166193323Sed    LocalChange = performPromotion(F);
167193323Sed    if (!LocalChange) break;   // No need to re-scalarrepl if no promotion
168193323Sed  }
169193323Sed
170193323Sed  return Changed;
171193323Sed}
172193323Sed
173193323Sed
174193323Sedbool SROA::performPromotion(Function &F) {
175193323Sed  std::vector<AllocaInst*> Allocas;
176193323Sed  DominatorTree         &DT = getAnalysis<DominatorTree>();
177193323Sed  DominanceFrontier &DF = getAnalysis<DominanceFrontier>();
178193323Sed
179193323Sed  BasicBlock &BB = F.getEntryBlock();  // Get the entry node for the function
180193323Sed
181193323Sed  bool Changed = false;
182193323Sed
183193323Sed  while (1) {
184193323Sed    Allocas.clear();
185193323Sed
186193323Sed    // Find allocas that are safe to promote, by looking at all instructions in
187193323Sed    // the entry node
188193323Sed    for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
189193323Sed      if (AllocaInst *AI = dyn_cast<AllocaInst>(I))       // Is it an alloca?
190193323Sed        if (isAllocaPromotable(AI))
191193323Sed          Allocas.push_back(AI);
192193323Sed
193193323Sed    if (Allocas.empty()) break;
194193323Sed
195199989Srdivacky    PromoteMemToReg(Allocas, DT, DF);
196193323Sed    NumPromoted += Allocas.size();
197193323Sed    Changed = true;
198193323Sed  }
199193323Sed
200193323Sed  return Changed;
201193323Sed}
202193323Sed
203193323Sed/// getNumSAElements - Return the number of elements in the specific struct or
204193323Sed/// array.
205193323Sedstatic uint64_t getNumSAElements(const Type *T) {
206193323Sed  if (const StructType *ST = dyn_cast<StructType>(T))
207193323Sed    return ST->getNumElements();
208193323Sed  return cast<ArrayType>(T)->getNumElements();
209193323Sed}
210193323Sed
211193323Sed// performScalarRepl - This algorithm is a simple worklist driven algorithm,
212193323Sed// which runs on all of the malloc/alloca instructions in the function, removing
213193323Sed// them if they are only used by getelementptr instructions.
214193323Sed//
215193323Sedbool SROA::performScalarRepl(Function &F) {
216198892Srdivacky  std::vector<AllocaInst*> WorkList;
217193323Sed
218193323Sed  // Scan the entry basic block, adding any alloca's and mallocs to the worklist
219193323Sed  BasicBlock &BB = F.getEntryBlock();
220193323Sed  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
221198892Srdivacky    if (AllocaInst *A = dyn_cast<AllocaInst>(I))
222193323Sed      WorkList.push_back(A);
223193323Sed
224193323Sed  // Process the worklist
225193323Sed  bool Changed = false;
226193323Sed  while (!WorkList.empty()) {
227198892Srdivacky    AllocaInst *AI = WorkList.back();
228193323Sed    WorkList.pop_back();
229193323Sed
230193323Sed    // Handle dead allocas trivially.  These can be formed by SROA'ing arrays
231193323Sed    // with unused elements.
232193323Sed    if (AI->use_empty()) {
233193323Sed      AI->eraseFromParent();
234193323Sed      continue;
235193323Sed    }
236193323Sed
237193323Sed    // If this alloca is impossible for us to promote, reject it early.
238193323Sed    if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized())
239193323Sed      continue;
240193323Sed
241193323Sed    // Check to see if this allocation is only modified by a memcpy/memmove from
242193323Sed    // a constant global.  If this is the case, we can change all users to use
243193323Sed    // the constant global instead.  This is commonly produced by the CFE by
244193323Sed    // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
245193323Sed    // is only subsequently read.
246193323Sed    if (Instruction *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) {
247198090Srdivacky      DEBUG(errs() << "Found alloca equal to global: " << *AI << '\n');
248198090Srdivacky      DEBUG(errs() << "  memcpy = " << *TheCopy << '\n');
249193323Sed      Constant *TheSrc = cast<Constant>(TheCopy->getOperand(2));
250198090Srdivacky      AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType()));
251193323Sed      TheCopy->eraseFromParent();  // Don't mutate the global.
252193323Sed      AI->eraseFromParent();
253193323Sed      ++NumGlobals;
254193323Sed      Changed = true;
255193323Sed      continue;
256193323Sed    }
257193323Sed
258193323Sed    // Check to see if we can perform the core SROA transformation.  We cannot
259193323Sed    // transform the allocation instruction if it is an array allocation
260193323Sed    // (allocations OF arrays are ok though), and an allocation of a scalar
261193323Sed    // value cannot be decomposed at all.
262193323Sed    uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType());
263193323Sed
264198090Srdivacky    // Do not promote [0 x %struct].
265198090Srdivacky    if (AllocaSize == 0) continue;
266198090Srdivacky
267193323Sed    // Do not promote any struct whose size is too big.
268193323Sed    if (AllocaSize > SRThreshold) continue;
269198090Srdivacky
270193323Sed    if ((isa<StructType>(AI->getAllocatedType()) ||
271193323Sed         isa<ArrayType>(AI->getAllocatedType())) &&
272193323Sed        // Do not promote any struct into more than "32" separate vars.
273193323Sed        getNumSAElements(AI->getAllocatedType()) <= SRThreshold/4) {
274193323Sed      // Check that all of the users of the allocation are capable of being
275193323Sed      // transformed.
276193323Sed      switch (isSafeAllocaToScalarRepl(AI)) {
277198090Srdivacky      default: llvm_unreachable("Unexpected value!");
278193323Sed      case 0:  // Not safe to scalar replace.
279193323Sed        break;
280193323Sed      case 1:  // Safe, but requires cleanup/canonicalizations first
281193323Sed        CleanupAllocaUsers(AI);
282193323Sed        // FALL THROUGH.
283193323Sed      case 3:  // Safe to scalar replace.
284193323Sed        DoScalarReplacement(AI, WorkList);
285193323Sed        Changed = true;
286193323Sed        continue;
287193323Sed      }
288193323Sed    }
289193323Sed
290193323Sed    // If we can turn this aggregate value (potentially with casts) into a
291193323Sed    // simple scalar value that can be mem2reg'd into a register value.
292193323Sed    // IsNotTrivial tracks whether this is something that mem2reg could have
293193323Sed    // promoted itself.  If so, we don't want to transform it needlessly.  Note
294193323Sed    // that we can't just check based on the type: the alloca may be of an i32
295193323Sed    // but that has pointer arithmetic to set byte 3 of it or something.
296193323Sed    bool IsNotTrivial = false;
297193323Sed    const Type *VectorTy = 0;
298193323Sed    bool HadAVector = false;
299193323Sed    if (CanConvertToScalar(AI, IsNotTrivial, VectorTy, HadAVector,
300193323Sed                           0, unsigned(AllocaSize)) && IsNotTrivial) {
301193323Sed      AllocaInst *NewAI;
302193323Sed      // If we were able to find a vector type that can handle this with
303193323Sed      // insert/extract elements, and if there was at least one use that had
304193323Sed      // a vector type, promote this to a vector.  We don't want to promote
305193323Sed      // random stuff that doesn't use vectors (e.g. <9 x double>) because then
306193323Sed      // we just get a lot of insert/extracts.  If at least one vector is
307193323Sed      // involved, then we probably really do have a union of vector/array.
308193323Sed      if (VectorTy && isa<VectorType>(VectorTy) && HadAVector) {
309198090Srdivacky        DEBUG(errs() << "CONVERT TO VECTOR: " << *AI << "\n  TYPE = "
310198090Srdivacky                     << *VectorTy << '\n');
311193323Sed
312193323Sed        // Create and insert the vector alloca.
313198090Srdivacky        NewAI = new AllocaInst(VectorTy, 0, "",  AI->getParent()->begin());
314193323Sed        ConvertUsesToScalar(AI, NewAI, 0);
315193323Sed      } else {
316198090Srdivacky        DEBUG(errs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
317193323Sed
318193323Sed        // Create and insert the integer alloca.
319198090Srdivacky        const Type *NewTy = IntegerType::get(AI->getContext(), AllocaSize*8);
320193323Sed        NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin());
321193323Sed        ConvertUsesToScalar(AI, NewAI, 0);
322193323Sed      }
323193323Sed      NewAI->takeName(AI);
324193323Sed      AI->eraseFromParent();
325193323Sed      ++NumConverted;
326193323Sed      Changed = true;
327193323Sed      continue;
328193323Sed    }
329193323Sed
330193323Sed    // Otherwise, couldn't process this alloca.
331193323Sed  }
332193323Sed
333193323Sed  return Changed;
334193323Sed}
335193323Sed
336193323Sed/// DoScalarReplacement - This alloca satisfied the isSafeAllocaToScalarRepl
337193323Sed/// predicate, do SROA now.
338198892Srdivackyvoid SROA::DoScalarReplacement(AllocaInst *AI,
339198892Srdivacky                               std::vector<AllocaInst*> &WorkList) {
340198090Srdivacky  DEBUG(errs() << "Found inst to SROA: " << *AI << '\n');
341193323Sed  SmallVector<AllocaInst*, 32> ElementAllocas;
342193323Sed  if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
343193323Sed    ElementAllocas.reserve(ST->getNumContainedTypes());
344193323Sed    for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) {
345193323Sed      AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0,
346193323Sed                                      AI->getAlignment(),
347198090Srdivacky                                      AI->getName() + "." + Twine(i), AI);
348193323Sed      ElementAllocas.push_back(NA);
349193323Sed      WorkList.push_back(NA);  // Add to worklist for recursive processing
350193323Sed    }
351193323Sed  } else {
352193323Sed    const ArrayType *AT = cast<ArrayType>(AI->getAllocatedType());
353193323Sed    ElementAllocas.reserve(AT->getNumElements());
354193323Sed    const Type *ElTy = AT->getElementType();
355193323Sed    for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
356193323Sed      AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(),
357198090Srdivacky                                      AI->getName() + "." + Twine(i), AI);
358193323Sed      ElementAllocas.push_back(NA);
359193323Sed      WorkList.push_back(NA);  // Add to worklist for recursive processing
360193323Sed    }
361193323Sed  }
362193323Sed
363193323Sed  // Now that we have created the alloca instructions that we want to use,
364193323Sed  // expand the getelementptr instructions to use them.
365193323Sed  while (!AI->use_empty()) {
366193323Sed    Instruction *User = cast<Instruction>(AI->use_back());
367193323Sed    if (BitCastInst *BCInst = dyn_cast<BitCastInst>(User)) {
368193323Sed      RewriteBitCastUserOfAlloca(BCInst, AI, ElementAllocas);
369193323Sed      BCInst->eraseFromParent();
370193323Sed      continue;
371193323Sed    }
372193323Sed
373193323Sed    // Replace:
374193323Sed    //   %res = load { i32, i32 }* %alloc
375193323Sed    // with:
376193323Sed    //   %load.0 = load i32* %alloc.0
377193323Sed    //   %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0
378193323Sed    //   %load.1 = load i32* %alloc.1
379193323Sed    //   %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1
380193323Sed    // (Also works for arrays instead of structs)
381193323Sed    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
382198090Srdivacky      Value *Insert = UndefValue::get(LI->getType());
383193323Sed      for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) {
384193323Sed        Value *Load = new LoadInst(ElementAllocas[i], "load", LI);
385193323Sed        Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI);
386193323Sed      }
387193323Sed      LI->replaceAllUsesWith(Insert);
388193323Sed      LI->eraseFromParent();
389193323Sed      continue;
390193323Sed    }
391193323Sed
392193323Sed    // Replace:
393193323Sed    //   store { i32, i32 } %val, { i32, i32 }* %alloc
394193323Sed    // with:
395193323Sed    //   %val.0 = extractvalue { i32, i32 } %val, 0
396193323Sed    //   store i32 %val.0, i32* %alloc.0
397193323Sed    //   %val.1 = extractvalue { i32, i32 } %val, 1
398193323Sed    //   store i32 %val.1, i32* %alloc.1
399193323Sed    // (Also works for arrays instead of structs)
400193323Sed    if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
401193323Sed      Value *Val = SI->getOperand(0);
402193323Sed      for (unsigned i = 0, e = ElementAllocas.size(); i != e; ++i) {
403193323Sed        Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI);
404193323Sed        new StoreInst(Extract, ElementAllocas[i], SI);
405193323Sed      }
406193323Sed      SI->eraseFromParent();
407193323Sed      continue;
408193323Sed    }
409193323Sed
410193323Sed    GetElementPtrInst *GEPI = cast<GetElementPtrInst>(User);
411193323Sed    // We now know that the GEP is of the form: GEP <ptr>, 0, <cst>
412193323Sed    unsigned Idx =
413193323Sed       (unsigned)cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
414193323Sed
415193323Sed    assert(Idx < ElementAllocas.size() && "Index out of range?");
416193323Sed    AllocaInst *AllocaToUse = ElementAllocas[Idx];
417193323Sed
418193323Sed    Value *RepValue;
419193323Sed    if (GEPI->getNumOperands() == 3) {
420193323Sed      // Do not insert a new getelementptr instruction with zero indices, only
421193323Sed      // to have it optimized out later.
422193323Sed      RepValue = AllocaToUse;
423193323Sed    } else {
424193323Sed      // We are indexing deeply into the structure, so we still need a
425193323Sed      // getelement ptr instruction to finish the indexing.  This may be
426193323Sed      // expanded itself once the worklist is rerun.
427193323Sed      //
428193323Sed      SmallVector<Value*, 8> NewArgs;
429198090Srdivacky      NewArgs.push_back(Constant::getNullValue(
430198090Srdivacky                                           Type::getInt32Ty(AI->getContext())));
431193323Sed      NewArgs.append(GEPI->op_begin()+3, GEPI->op_end());
432193323Sed      RepValue = GetElementPtrInst::Create(AllocaToUse, NewArgs.begin(),
433193323Sed                                           NewArgs.end(), "", GEPI);
434193323Sed      RepValue->takeName(GEPI);
435193323Sed    }
436193323Sed
437193323Sed    // If this GEP is to the start of the aggregate, check for memcpys.
438193323Sed    if (Idx == 0 && GEPI->hasAllZeroIndices())
439193323Sed      RewriteBitCastUserOfAlloca(GEPI, AI, ElementAllocas);
440193323Sed
441193323Sed    // Move all of the users over to the new GEP.
442193323Sed    GEPI->replaceAllUsesWith(RepValue);
443193323Sed    // Delete the old GEP
444193323Sed    GEPI->eraseFromParent();
445193323Sed  }
446193323Sed
447193323Sed  // Finally, delete the Alloca instruction
448193323Sed  AI->eraseFromParent();
449193323Sed  NumReplaced++;
450193323Sed}
451193323Sed
452193323Sed/// isSafeElementUse - Check to see if this use is an allowed use for a
453193323Sed/// getelementptr instruction of an array aggregate allocation.  isFirstElt
454193323Sed/// indicates whether Ptr is known to the start of the aggregate.
455198892Srdivackyvoid SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI,
456193323Sed                            AllocaInfo &Info) {
457193323Sed  for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end();
458193323Sed       I != E; ++I) {
459193323Sed    Instruction *User = cast<Instruction>(*I);
460193323Sed    switch (User->getOpcode()) {
461193323Sed    case Instruction::Load:  break;
462193323Sed    case Instruction::Store:
463193323Sed      // Store is ok if storing INTO the pointer, not storing the pointer
464193323Sed      if (User->getOperand(0) == Ptr) return MarkUnsafe(Info);
465193323Sed      break;
466193323Sed    case Instruction::GetElementPtr: {
467193323Sed      GetElementPtrInst *GEP = cast<GetElementPtrInst>(User);
468193323Sed      bool AreAllZeroIndices = isFirstElt;
469199989Srdivacky      if (GEP->getNumOperands() > 1 &&
470199989Srdivacky          (!isa<ConstantInt>(GEP->getOperand(1)) ||
471199989Srdivacky           !cast<ConstantInt>(GEP->getOperand(1))->isZero()))
472199989Srdivacky        // Using pointer arithmetic to navigate the array.
473199989Srdivacky        return MarkUnsafe(Info);
474199989Srdivacky
475199989Srdivacky      // Verify that any array subscripts are in range.
476199989Srdivacky      for (gep_type_iterator GEPIt = gep_type_begin(GEP),
477199989Srdivacky           E = gep_type_end(GEP); GEPIt != E; ++GEPIt) {
478199989Srdivacky        // Ignore struct elements, no extra checking needed for these.
479199989Srdivacky        if (isa<StructType>(*GEPIt))
480199989Srdivacky          continue;
481199989Srdivacky
482199989Srdivacky        // This GEP indexes an array.  Verify that this is an in-range
483199989Srdivacky        // constant integer. Specifically, consider A[0][i]. We cannot know that
484199989Srdivacky        // the user isn't doing invalid things like allowing i to index an
485199989Srdivacky        // out-of-range subscript that accesses A[1].  Because of this, we have
486199989Srdivacky        // to reject SROA of any accesses into structs where any of the
487199989Srdivacky        // components are variables.
488199989Srdivacky        ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
489199989Srdivacky        if (!IdxVal) return MarkUnsafe(Info);
490199989Srdivacky
491199989Srdivacky        // Are all indices still zero?
492199989Srdivacky        AreAllZeroIndices &= IdxVal->isZero();
493199989Srdivacky
494199989Srdivacky        if (const ArrayType *AT = dyn_cast<ArrayType>(*GEPIt)) {
495199989Srdivacky          if (IdxVal->getZExtValue() >= AT->getNumElements())
496199989Srdivacky            return MarkUnsafe(Info);
497199989Srdivacky        } else if (const VectorType *VT = dyn_cast<VectorType>(*GEPIt)) {
498199989Srdivacky          if (IdxVal->getZExtValue() >= VT->getNumElements())
499199989Srdivacky            return MarkUnsafe(Info);
500199989Srdivacky        }
501193323Sed      }
502199989Srdivacky
503193323Sed      isSafeElementUse(GEP, AreAllZeroIndices, AI, Info);
504193323Sed      if (Info.isUnsafe) return;
505193323Sed      break;
506193323Sed    }
507193323Sed    case Instruction::BitCast:
508193323Sed      if (isFirstElt) {
509193323Sed        isSafeUseOfBitCastedAllocation(cast<BitCastInst>(User), AI, Info);
510193323Sed        if (Info.isUnsafe) return;
511193323Sed        break;
512193323Sed      }
513198090Srdivacky      DEBUG(errs() << "  Transformation preventing inst: " << *User << '\n');
514193323Sed      return MarkUnsafe(Info);
515193323Sed    case Instruction::Call:
516193323Sed      if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
517193323Sed        if (isFirstElt) {
518193323Sed          isSafeMemIntrinsicOnAllocation(MI, AI, I.getOperandNo(), Info);
519193323Sed          if (Info.isUnsafe) return;
520193323Sed          break;
521193323Sed        }
522193323Sed      }
523198090Srdivacky      DEBUG(errs() << "  Transformation preventing inst: " << *User << '\n');
524193323Sed      return MarkUnsafe(Info);
525193323Sed    default:
526198090Srdivacky      DEBUG(errs() << "  Transformation preventing inst: " << *User << '\n');
527193323Sed      return MarkUnsafe(Info);
528193323Sed    }
529193323Sed  }
530193323Sed  return;  // All users look ok :)
531193323Sed}
532193323Sed
533193323Sed/// AllUsersAreLoads - Return true if all users of this value are loads.
534193323Sedstatic bool AllUsersAreLoads(Value *Ptr) {
535193323Sed  for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end();
536193323Sed       I != E; ++I)
537193323Sed    if (cast<Instruction>(*I)->getOpcode() != Instruction::Load)
538193323Sed      return false;
539193323Sed  return true;
540193323Sed}
541193323Sed
542200581Srdivacky/// isSafeUseOfAllocation - Check if this user is an allowed use for an
543193323Sed/// aggregate allocation.
544198892Srdivackyvoid SROA::isSafeUseOfAllocation(Instruction *User, AllocaInst *AI,
545193323Sed                                 AllocaInfo &Info) {
546193323Sed  if (BitCastInst *C = dyn_cast<BitCastInst>(User))
547193323Sed    return isSafeUseOfBitCastedAllocation(C, AI, Info);
548193323Sed
549193323Sed  if (LoadInst *LI = dyn_cast<LoadInst>(User))
550193323Sed    if (!LI->isVolatile())
551193323Sed      return;// Loads (returning a first class aggregrate) are always rewritable
552193323Sed
553193323Sed  if (StoreInst *SI = dyn_cast<StoreInst>(User))
554193323Sed    if (!SI->isVolatile() && SI->getOperand(0) != AI)
555193323Sed      return;// Store is ok if storing INTO the pointer, not storing the pointer
556193323Sed
557193323Sed  GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User);
558193323Sed  if (GEPI == 0)
559193323Sed    return MarkUnsafe(Info);
560193323Sed
561193323Sed  gep_type_iterator I = gep_type_begin(GEPI), E = gep_type_end(GEPI);
562193323Sed
563193323Sed  // The GEP is not safe to transform if not of the form "GEP <ptr>, 0, <cst>".
564193323Sed  if (I == E ||
565198090Srdivacky      I.getOperand() != Constant::getNullValue(I.getOperand()->getType())) {
566193323Sed    return MarkUnsafe(Info);
567193323Sed  }
568193323Sed
569193323Sed  ++I;
570193323Sed  if (I == E) return MarkUnsafe(Info);  // ran out of GEP indices??
571193323Sed
572193323Sed  bool IsAllZeroIndices = true;
573193323Sed
574193323Sed  // If the first index is a non-constant index into an array, see if we can
575193323Sed  // handle it as a special case.
576193323Sed  if (const ArrayType *AT = dyn_cast<ArrayType>(*I)) {
577193323Sed    if (!isa<ConstantInt>(I.getOperand())) {
578193323Sed      IsAllZeroIndices = 0;
579193323Sed      uint64_t NumElements = AT->getNumElements();
580193323Sed
581193323Sed      // If this is an array index and the index is not constant, we cannot
582193323Sed      // promote... that is unless the array has exactly one or two elements in
583193323Sed      // it, in which case we CAN promote it, but we have to canonicalize this
584193323Sed      // out if this is the only problem.
585193323Sed      if ((NumElements == 1 || NumElements == 2) &&
586193323Sed          AllUsersAreLoads(GEPI)) {
587193323Sed        Info.needsCleanup = true;
588193323Sed        return;  // Canonicalization required!
589193323Sed      }
590193323Sed      return MarkUnsafe(Info);
591193323Sed    }
592193323Sed  }
593193323Sed
594193323Sed  // Walk through the GEP type indices, checking the types that this indexes
595193323Sed  // into.
596193323Sed  for (; I != E; ++I) {
597193323Sed    // Ignore struct elements, no extra checking needed for these.
598193323Sed    if (isa<StructType>(*I))
599193323Sed      continue;
600193323Sed
601193323Sed    ConstantInt *IdxVal = dyn_cast<ConstantInt>(I.getOperand());
602193323Sed    if (!IdxVal) return MarkUnsafe(Info);
603193323Sed
604193323Sed    // Are all indices still zero?
605193323Sed    IsAllZeroIndices &= IdxVal->isZero();
606193323Sed
607193323Sed    if (const ArrayType *AT = dyn_cast<ArrayType>(*I)) {
608193323Sed      // This GEP indexes an array.  Verify that this is an in-range constant
609193323Sed      // integer. Specifically, consider A[0][i]. We cannot know that the user
610193323Sed      // isn't doing invalid things like allowing i to index an out-of-range
611193323Sed      // subscript that accesses A[1].  Because of this, we have to reject SROA
612200581Srdivacky      // of any accesses into structs where any of the components are variables.
613193323Sed      if (IdxVal->getZExtValue() >= AT->getNumElements())
614193323Sed        return MarkUnsafe(Info);
615193323Sed    } else if (const VectorType *VT = dyn_cast<VectorType>(*I)) {
616193323Sed      if (IdxVal->getZExtValue() >= VT->getNumElements())
617193323Sed        return MarkUnsafe(Info);
618193323Sed    }
619193323Sed  }
620193323Sed
621193323Sed  // If there are any non-simple uses of this getelementptr, make sure to reject
622193323Sed  // them.
623193323Sed  return isSafeElementUse(GEPI, IsAllZeroIndices, AI, Info);
624193323Sed}
625193323Sed
626200581Srdivacky/// isSafeMemIntrinsicOnAllocation - Check if the specified memory
627193323Sed/// intrinsic can be promoted by SROA.  At this point, we know that the operand
628193323Sed/// of the memintrinsic is a pointer to the beginning of the allocation.
629198892Srdivackyvoid SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI,
630193323Sed                                          unsigned OpNo, AllocaInfo &Info) {
631193323Sed  // If not constant length, give up.
632193323Sed  ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
633193323Sed  if (!Length) return MarkUnsafe(Info);
634193323Sed
635193323Sed  // If not the whole aggregate, give up.
636193323Sed  if (Length->getZExtValue() !=
637193323Sed      TD->getTypeAllocSize(AI->getType()->getElementType()))
638193323Sed    return MarkUnsafe(Info);
639193323Sed
640193323Sed  // We only know about memcpy/memset/memmove.
641193323Sed  if (!isa<MemIntrinsic>(MI))
642193323Sed    return MarkUnsafe(Info);
643193323Sed
644193323Sed  // Otherwise, we can transform it.  Determine whether this is a memcpy/set
645193323Sed  // into or out of the aggregate.
646193323Sed  if (OpNo == 1)
647193323Sed    Info.isMemCpyDst = true;
648193323Sed  else {
649193323Sed    assert(OpNo == 2);
650193323Sed    Info.isMemCpySrc = true;
651193323Sed  }
652193323Sed}
653193323Sed
654200581Srdivacky/// isSafeUseOfBitCastedAllocation - Check if all users of this bitcast
655200581Srdivacky/// from an alloca are safe for SROA of that alloca.
656198892Srdivackyvoid SROA::isSafeUseOfBitCastedAllocation(BitCastInst *BC, AllocaInst *AI,
657193323Sed                                          AllocaInfo &Info) {
658193323Sed  for (Value::use_iterator UI = BC->use_begin(), E = BC->use_end();
659193323Sed       UI != E; ++UI) {
660193323Sed    if (BitCastInst *BCU = dyn_cast<BitCastInst>(UI)) {
661193323Sed      isSafeUseOfBitCastedAllocation(BCU, AI, Info);
662193323Sed    } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(UI)) {
663193323Sed      isSafeMemIntrinsicOnAllocation(MI, AI, UI.getOperandNo(), Info);
664193323Sed    } else if (StoreInst *SI = dyn_cast<StoreInst>(UI)) {
665193323Sed      if (SI->isVolatile())
666193323Sed        return MarkUnsafe(Info);
667193323Sed
668193323Sed      // If storing the entire alloca in one chunk through a bitcasted pointer
669193323Sed      // to integer, we can transform it.  This happens (for example) when you
670193323Sed      // cast a {i32,i32}* to i64* and store through it.  This is similar to the
671193323Sed      // memcpy case and occurs in various "byval" cases and emulated memcpys.
672193323Sed      if (isa<IntegerType>(SI->getOperand(0)->getType()) &&
673193323Sed          TD->getTypeAllocSize(SI->getOperand(0)->getType()) ==
674193323Sed          TD->getTypeAllocSize(AI->getType()->getElementType())) {
675193323Sed        Info.isMemCpyDst = true;
676193323Sed        continue;
677193323Sed      }
678193323Sed      return MarkUnsafe(Info);
679193323Sed    } else if (LoadInst *LI = dyn_cast<LoadInst>(UI)) {
680193323Sed      if (LI->isVolatile())
681193323Sed        return MarkUnsafe(Info);
682193323Sed
683193323Sed      // If loading the entire alloca in one chunk through a bitcasted pointer
684193323Sed      // to integer, we can transform it.  This happens (for example) when you
685193323Sed      // cast a {i32,i32}* to i64* and load through it.  This is similar to the
686193323Sed      // memcpy case and occurs in various "byval" cases and emulated memcpys.
687193323Sed      if (isa<IntegerType>(LI->getType()) &&
688193323Sed          TD->getTypeAllocSize(LI->getType()) ==
689193323Sed          TD->getTypeAllocSize(AI->getType()->getElementType())) {
690193323Sed        Info.isMemCpySrc = true;
691193323Sed        continue;
692193323Sed      }
693193323Sed      return MarkUnsafe(Info);
694193323Sed    } else if (isa<DbgInfoIntrinsic>(UI)) {
695193323Sed      // If one user is DbgInfoIntrinsic then check if all users are
696193323Sed      // DbgInfoIntrinsics.
697193323Sed      if (OnlyUsedByDbgInfoIntrinsics(BC)) {
698193323Sed        Info.needsCleanup = true;
699193323Sed        return;
700193323Sed      }
701193323Sed      else
702193323Sed        MarkUnsafe(Info);
703193323Sed    }
704193323Sed    else {
705193323Sed      return MarkUnsafe(Info);
706193323Sed    }
707193323Sed    if (Info.isUnsafe) return;
708193323Sed  }
709193323Sed}
710193323Sed
711193323Sed/// RewriteBitCastUserOfAlloca - BCInst (transitively) bitcasts AI, or indexes
712193323Sed/// to its first element.  Transform users of the cast to use the new values
713193323Sed/// instead.
714198892Srdivackyvoid SROA::RewriteBitCastUserOfAlloca(Instruction *BCInst, AllocaInst *AI,
715193323Sed                                      SmallVector<AllocaInst*, 32> &NewElts) {
716193323Sed  Value::use_iterator UI = BCInst->use_begin(), UE = BCInst->use_end();
717193323Sed  while (UI != UE) {
718193323Sed    Instruction *User = cast<Instruction>(*UI++);
719193323Sed    if (BitCastInst *BCU = dyn_cast<BitCastInst>(User)) {
720193323Sed      RewriteBitCastUserOfAlloca(BCU, AI, NewElts);
721193323Sed      if (BCU->use_empty()) BCU->eraseFromParent();
722193323Sed      continue;
723193323Sed    }
724193323Sed
725193323Sed    if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
726193323Sed      // This must be memcpy/memmove/memset of the entire aggregate.
727193323Sed      // Split into one per element.
728193323Sed      RewriteMemIntrinUserOfAlloca(MI, BCInst, AI, NewElts);
729193323Sed      continue;
730193323Sed    }
731193323Sed
732193323Sed    if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
733193323Sed      // If this is a store of the entire alloca from an integer, rewrite it.
734193323Sed      RewriteStoreUserOfWholeAlloca(SI, AI, NewElts);
735193323Sed      continue;
736193323Sed    }
737193323Sed
738193323Sed    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
739193323Sed      // If this is a load of the entire alloca to an integer, rewrite it.
740193323Sed      RewriteLoadUserOfWholeAlloca(LI, AI, NewElts);
741193323Sed      continue;
742193323Sed    }
743193323Sed
744193323Sed    // Otherwise it must be some other user of a gep of the first pointer.  Just
745193323Sed    // leave these alone.
746193323Sed    continue;
747193323Sed  }
748193323Sed}
749193323Sed
750193323Sed/// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI.
751193323Sed/// Rewrite it to copy or set the elements of the scalarized memory.
752193323Sedvoid SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
753198892Srdivacky                                        AllocaInst *AI,
754193323Sed                                        SmallVector<AllocaInst*, 32> &NewElts) {
755193323Sed
756193323Sed  // If this is a memcpy/memmove, construct the other pointer as the
757193323Sed  // appropriate type.  The "Other" pointer is the pointer that goes to memory
758193323Sed  // that doesn't have anything to do with the alloca that we are promoting. For
759193323Sed  // memset, this Value* stays null.
760193323Sed  Value *OtherPtr = 0;
761198090Srdivacky  LLVMContext &Context = MI->getContext();
762193323Sed  unsigned MemAlignment = MI->getAlignment();
763193323Sed  if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy
764193323Sed    if (BCInst == MTI->getRawDest())
765193323Sed      OtherPtr = MTI->getRawSource();
766193323Sed    else {
767193323Sed      assert(BCInst == MTI->getRawSource());
768193323Sed      OtherPtr = MTI->getRawDest();
769193323Sed    }
770193323Sed  }
771200581Srdivacky
772200581Srdivacky  // Keep track of the other intrinsic argument, so it can be removed if it
773200581Srdivacky  // is dead when the intrinsic is replaced.
774200581Srdivacky  Value *PossiblyDead = OtherPtr;
775193323Sed
776193323Sed  // If there is an other pointer, we want to convert it to the same pointer
777193323Sed  // type as AI has, so we can GEP through it safely.
778193323Sed  if (OtherPtr) {
779193323Sed    // It is likely that OtherPtr is a bitcast, if so, remove it.
780193323Sed    if (BitCastInst *BC = dyn_cast<BitCastInst>(OtherPtr))
781193323Sed      OtherPtr = BC->getOperand(0);
782193323Sed    // All zero GEPs are effectively bitcasts.
783193323Sed    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(OtherPtr))
784193323Sed      if (GEP->hasAllZeroIndices())
785193323Sed        OtherPtr = GEP->getOperand(0);
786193323Sed
787193323Sed    if (ConstantExpr *BCE = dyn_cast<ConstantExpr>(OtherPtr))
788193323Sed      if (BCE->getOpcode() == Instruction::BitCast)
789193323Sed        OtherPtr = BCE->getOperand(0);
790193323Sed
791193323Sed    // If the pointer is not the right type, insert a bitcast to the right
792193323Sed    // type.
793193323Sed    if (OtherPtr->getType() != AI->getType())
794193323Sed      OtherPtr = new BitCastInst(OtherPtr, AI->getType(), OtherPtr->getName(),
795193323Sed                                 MI);
796193323Sed  }
797193323Sed
798193323Sed  // Process each element of the aggregate.
799193323Sed  Value *TheFn = MI->getOperand(0);
800193323Sed  const Type *BytePtrTy = MI->getRawDest()->getType();
801193323Sed  bool SROADest = MI->getRawDest() == BCInst;
802193323Sed
803198090Srdivacky  Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext()));
804193323Sed
805193323Sed  for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
806193323Sed    // If this is a memcpy/memmove, emit a GEP of the other element address.
807193323Sed    Value *OtherElt = 0;
808193323Sed    unsigned OtherEltAlign = MemAlignment;
809193323Sed
810193323Sed    if (OtherPtr) {
811198090Srdivacky      Value *Idx[2] = { Zero,
812198090Srdivacky                      ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) };
813193323Sed      OtherElt = GetElementPtrInst::Create(OtherPtr, Idx, Idx + 2,
814198090Srdivacky                                           OtherPtr->getNameStr()+"."+Twine(i),
815193323Sed                                           MI);
816193323Sed      uint64_t EltOffset;
817193323Sed      const PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType());
818193323Sed      if (const StructType *ST =
819193323Sed            dyn_cast<StructType>(OtherPtrTy->getElementType())) {
820193323Sed        EltOffset = TD->getStructLayout(ST)->getElementOffset(i);
821193323Sed      } else {
822193323Sed        const Type *EltTy =
823193323Sed          cast<SequentialType>(OtherPtr->getType())->getElementType();
824193323Sed        EltOffset = TD->getTypeAllocSize(EltTy)*i;
825193323Sed      }
826193323Sed
827193323Sed      // The alignment of the other pointer is the guaranteed alignment of the
828193323Sed      // element, which is affected by both the known alignment of the whole
829193323Sed      // mem intrinsic and the alignment of the element.  If the alignment of
830193323Sed      // the memcpy (f.e.) is 32 but the element is at a 4-byte offset, then the
831193323Sed      // known alignment is just 4 bytes.
832193323Sed      OtherEltAlign = (unsigned)MinAlign(OtherEltAlign, EltOffset);
833193323Sed    }
834193323Sed
835193323Sed    Value *EltPtr = NewElts[i];
836193323Sed    const Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType();
837193323Sed
838193323Sed    // If we got down to a scalar, insert a load or store as appropriate.
839193323Sed    if (EltTy->isSingleValueType()) {
840193323Sed      if (isa<MemTransferInst>(MI)) {
841193323Sed        if (SROADest) {
842193323Sed          // From Other to Alloca.
843193323Sed          Value *Elt = new LoadInst(OtherElt, "tmp", false, OtherEltAlign, MI);
844193323Sed          new StoreInst(Elt, EltPtr, MI);
845193323Sed        } else {
846193323Sed          // From Alloca to Other.
847193323Sed          Value *Elt = new LoadInst(EltPtr, "tmp", MI);
848193323Sed          new StoreInst(Elt, OtherElt, false, OtherEltAlign, MI);
849193323Sed        }
850193323Sed        continue;
851193323Sed      }
852193323Sed      assert(isa<MemSetInst>(MI));
853193323Sed
854193323Sed      // If the stored element is zero (common case), just store a null
855193323Sed      // constant.
856193323Sed      Constant *StoreVal;
857193323Sed      if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getOperand(2))) {
858193323Sed        if (CI->isZero()) {
859198090Srdivacky          StoreVal = Constant::getNullValue(EltTy);  // 0.0, null, 0, <0,0>
860193323Sed        } else {
861193323Sed          // If EltTy is a vector type, get the element type.
862194612Sed          const Type *ValTy = EltTy->getScalarType();
863194612Sed
864193323Sed          // Construct an integer with the right value.
865193323Sed          unsigned EltSize = TD->getTypeSizeInBits(ValTy);
866193323Sed          APInt OneVal(EltSize, CI->getZExtValue());
867193323Sed          APInt TotalVal(OneVal);
868193323Sed          // Set each byte.
869193323Sed          for (unsigned i = 0; 8*i < EltSize; ++i) {
870193323Sed            TotalVal = TotalVal.shl(8);
871193323Sed            TotalVal |= OneVal;
872193323Sed          }
873193323Sed
874193323Sed          // Convert the integer value to the appropriate type.
875198090Srdivacky          StoreVal = ConstantInt::get(Context, TotalVal);
876193323Sed          if (isa<PointerType>(ValTy))
877198090Srdivacky            StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy);
878193323Sed          else if (ValTy->isFloatingPoint())
879198090Srdivacky            StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy);
880193323Sed          assert(StoreVal->getType() == ValTy && "Type mismatch!");
881193323Sed
882193323Sed          // If the requested value was a vector constant, create it.
883193323Sed          if (EltTy != ValTy) {
884193323Sed            unsigned NumElts = cast<VectorType>(ValTy)->getNumElements();
885193323Sed            SmallVector<Constant*, 16> Elts(NumElts, StoreVal);
886198090Srdivacky            StoreVal = ConstantVector::get(&Elts[0], NumElts);
887193323Sed          }
888193323Sed        }
889193323Sed        new StoreInst(StoreVal, EltPtr, MI);
890193323Sed        continue;
891193323Sed      }
892193323Sed      // Otherwise, if we're storing a byte variable, use a memset call for
893193323Sed      // this element.
894193323Sed    }
895193323Sed
896193323Sed    // Cast the element pointer to BytePtrTy.
897193323Sed    if (EltPtr->getType() != BytePtrTy)
898193323Sed      EltPtr = new BitCastInst(EltPtr, BytePtrTy, EltPtr->getNameStr(), MI);
899193323Sed
900193323Sed    // Cast the other pointer (if we have one) to BytePtrTy.
901193323Sed    if (OtherElt && OtherElt->getType() != BytePtrTy)
902193323Sed      OtherElt = new BitCastInst(OtherElt, BytePtrTy,OtherElt->getNameStr(),
903193323Sed                                 MI);
904193323Sed
905193323Sed    unsigned EltSize = TD->getTypeAllocSize(EltTy);
906193323Sed
907193323Sed    // Finally, insert the meminst for this element.
908193323Sed    if (isa<MemTransferInst>(MI)) {
909193323Sed      Value *Ops[] = {
910193323Sed        SROADest ? EltPtr : OtherElt,  // Dest ptr
911193323Sed        SROADest ? OtherElt : EltPtr,  // Src ptr
912198090Srdivacky        ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size
913198090Srdivacky        // Align
914198090Srdivacky        ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign)
915193323Sed      };
916193323Sed      CallInst::Create(TheFn, Ops, Ops + 4, "", MI);
917193323Sed    } else {
918193323Sed      assert(isa<MemSetInst>(MI));
919193323Sed      Value *Ops[] = {
920193323Sed        EltPtr, MI->getOperand(2),  // Dest, Value,
921198090Srdivacky        ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size
922193323Sed        Zero  // Align
923193323Sed      };
924193323Sed      CallInst::Create(TheFn, Ops, Ops + 4, "", MI);
925193323Sed    }
926193323Sed  }
927193323Sed  MI->eraseFromParent();
928200581Srdivacky  if (PossiblyDead)
929200581Srdivacky    RecursivelyDeleteTriviallyDeadInstructions(PossiblyDead);
930193323Sed}
931193323Sed
932200581Srdivacky/// RewriteStoreUserOfWholeAlloca - We found a store of an integer that
933193323Sed/// overwrites the entire allocation.  Extract out the pieces of the stored
934193323Sed/// integer and store them individually.
935198892Srdivackyvoid SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
936193323Sed                                         SmallVector<AllocaInst*, 32> &NewElts){
937193323Sed  // Extract each element out of the integer according to its structure offset
938193323Sed  // and store the element value to the individual alloca.
939193323Sed  Value *SrcVal = SI->getOperand(0);
940193323Sed  const Type *AllocaEltTy = AI->getType()->getElementType();
941193323Sed  uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
942193323Sed
943193323Sed  // If this isn't a store of an integer to the whole alloca, it may be a store
944193323Sed  // to the first element.  Just ignore the store in this case and normal SROA
945193323Sed  // will handle it.
946193323Sed  if (!isa<IntegerType>(SrcVal->getType()) ||
947193323Sed      TD->getTypeAllocSizeInBits(SrcVal->getType()) != AllocaSizeBits)
948193323Sed    return;
949193323Sed  // Handle tail padding by extending the operand
950193323Sed  if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)
951195340Sed    SrcVal = new ZExtInst(SrcVal,
952198090Srdivacky                          IntegerType::get(SI->getContext(), AllocaSizeBits),
953198090Srdivacky                          "", SI);
954193323Sed
955198090Srdivacky  DEBUG(errs() << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << '\n' << *SI
956198090Srdivacky               << '\n');
957193323Sed
958193323Sed  // There are two forms here: AI could be an array or struct.  Both cases
959193323Sed  // have different ways to compute the element offset.
960193323Sed  if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
961193323Sed    const StructLayout *Layout = TD->getStructLayout(EltSTy);
962193323Sed
963193323Sed    for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
964193323Sed      // Get the number of bits to shift SrcVal to get the value.
965193323Sed      const Type *FieldTy = EltSTy->getElementType(i);
966193323Sed      uint64_t Shift = Layout->getElementOffsetInBits(i);
967193323Sed
968193323Sed      if (TD->isBigEndian())
969193323Sed        Shift = AllocaSizeBits-Shift-TD->getTypeAllocSizeInBits(FieldTy);
970193323Sed
971193323Sed      Value *EltVal = SrcVal;
972193323Sed      if (Shift) {
973198090Srdivacky        Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
974193323Sed        EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal,
975193323Sed                                            "sroa.store.elt", SI);
976193323Sed      }
977193323Sed
978193323Sed      // Truncate down to an integer of the right size.
979193323Sed      uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
980193323Sed
981193323Sed      // Ignore zero sized fields like {}, they obviously contain no data.
982193323Sed      if (FieldSizeBits == 0) continue;
983193323Sed
984193323Sed      if (FieldSizeBits != AllocaSizeBits)
985195340Sed        EltVal = new TruncInst(EltVal,
986198090Srdivacky                             IntegerType::get(SI->getContext(), FieldSizeBits),
987198090Srdivacky                              "", SI);
988193323Sed      Value *DestField = NewElts[i];
989193323Sed      if (EltVal->getType() == FieldTy) {
990193323Sed        // Storing to an integer field of this size, just do it.
991193323Sed      } else if (FieldTy->isFloatingPoint() || isa<VectorType>(FieldTy)) {
992193323Sed        // Bitcast to the right element type (for fp/vector values).
993193323Sed        EltVal = new BitCastInst(EltVal, FieldTy, "", SI);
994193323Sed      } else {
995193323Sed        // Otherwise, bitcast the dest pointer (for aggregates).
996193323Sed        DestField = new BitCastInst(DestField,
997198090Srdivacky                              PointerType::getUnqual(EltVal->getType()),
998193323Sed                                    "", SI);
999193323Sed      }
1000193323Sed      new StoreInst(EltVal, DestField, SI);
1001193323Sed    }
1002193323Sed
1003193323Sed  } else {
1004193323Sed    const ArrayType *ATy = cast<ArrayType>(AllocaEltTy);
1005193323Sed    const Type *ArrayEltTy = ATy->getElementType();
1006193323Sed    uint64_t ElementOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
1007193323Sed    uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy);
1008193323Sed
1009193323Sed    uint64_t Shift;
1010193323Sed
1011193323Sed    if (TD->isBigEndian())
1012193323Sed      Shift = AllocaSizeBits-ElementOffset;
1013193323Sed    else
1014193323Sed      Shift = 0;
1015193323Sed
1016193323Sed    for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
1017193323Sed      // Ignore zero sized fields like {}, they obviously contain no data.
1018193323Sed      if (ElementSizeBits == 0) continue;
1019193323Sed
1020193323Sed      Value *EltVal = SrcVal;
1021193323Sed      if (Shift) {
1022198090Srdivacky        Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
1023193323Sed        EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal,
1024193323Sed                                            "sroa.store.elt", SI);
1025193323Sed      }
1026193323Sed
1027193323Sed      // Truncate down to an integer of the right size.
1028193323Sed      if (ElementSizeBits != AllocaSizeBits)
1029195340Sed        EltVal = new TruncInst(EltVal,
1030198090Srdivacky                               IntegerType::get(SI->getContext(),
1031198090Srdivacky                                                ElementSizeBits),"",SI);
1032193323Sed      Value *DestField = NewElts[i];
1033193323Sed      if (EltVal->getType() == ArrayEltTy) {
1034193323Sed        // Storing to an integer field of this size, just do it.
1035193323Sed      } else if (ArrayEltTy->isFloatingPoint() || isa<VectorType>(ArrayEltTy)) {
1036193323Sed        // Bitcast to the right element type (for fp/vector values).
1037193323Sed        EltVal = new BitCastInst(EltVal, ArrayEltTy, "", SI);
1038193323Sed      } else {
1039193323Sed        // Otherwise, bitcast the dest pointer (for aggregates).
1040193323Sed        DestField = new BitCastInst(DestField,
1041198090Srdivacky                              PointerType::getUnqual(EltVal->getType()),
1042193323Sed                                    "", SI);
1043193323Sed      }
1044193323Sed      new StoreInst(EltVal, DestField, SI);
1045193323Sed
1046193323Sed      if (TD->isBigEndian())
1047193323Sed        Shift -= ElementOffset;
1048193323Sed      else
1049193323Sed        Shift += ElementOffset;
1050193323Sed    }
1051193323Sed  }
1052193323Sed
1053193323Sed  SI->eraseFromParent();
1054193323Sed}
1055193323Sed
1056200581Srdivacky/// RewriteLoadUserOfWholeAlloca - We found a load of the entire allocation to
1057193323Sed/// an integer.  Load the individual pieces to form the aggregate value.
1058198892Srdivackyvoid SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
1059193323Sed                                        SmallVector<AllocaInst*, 32> &NewElts) {
1060193323Sed  // Extract each element out of the NewElts according to its structure offset
1061193323Sed  // and form the result value.
1062193323Sed  const Type *AllocaEltTy = AI->getType()->getElementType();
1063193323Sed  uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
1064193323Sed
1065193323Sed  // If this isn't a load of the whole alloca to an integer, it may be a load
1066193323Sed  // of the first element.  Just ignore the load in this case and normal SROA
1067193323Sed  // will handle it.
1068193323Sed  if (!isa<IntegerType>(LI->getType()) ||
1069193323Sed      TD->getTypeAllocSizeInBits(LI->getType()) != AllocaSizeBits)
1070193323Sed    return;
1071193323Sed
1072198090Srdivacky  DEBUG(errs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI
1073198090Srdivacky               << '\n');
1074193323Sed
1075193323Sed  // There are two forms here: AI could be an array or struct.  Both cases
1076193323Sed  // have different ways to compute the element offset.
1077193323Sed  const StructLayout *Layout = 0;
1078193323Sed  uint64_t ArrayEltBitOffset = 0;
1079193323Sed  if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
1080193323Sed    Layout = TD->getStructLayout(EltSTy);
1081193323Sed  } else {
1082193323Sed    const Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType();
1083193323Sed    ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
1084193323Sed  }
1085193323Sed
1086198090Srdivacky  Value *ResultVal =
1087198090Srdivacky    Constant::getNullValue(IntegerType::get(LI->getContext(), AllocaSizeBits));
1088198090Srdivacky
1089193323Sed  for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
1090193323Sed    // Load the value from the alloca.  If the NewElt is an aggregate, cast
1091193323Sed    // the pointer to an integer of the same size before doing the load.
1092193323Sed    Value *SrcField = NewElts[i];
1093193323Sed    const Type *FieldTy =
1094193323Sed      cast<PointerType>(SrcField->getType())->getElementType();
1095193323Sed    uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
1096193323Sed
1097193323Sed    // Ignore zero sized fields like {}, they obviously contain no data.
1098193323Sed    if (FieldSizeBits == 0) continue;
1099193323Sed
1100198090Srdivacky    const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(),
1101198090Srdivacky                                                     FieldSizeBits);
1102193323Sed    if (!isa<IntegerType>(FieldTy) && !FieldTy->isFloatingPoint() &&
1103193323Sed        !isa<VectorType>(FieldTy))
1104195340Sed      SrcField = new BitCastInst(SrcField,
1105198090Srdivacky                                 PointerType::getUnqual(FieldIntTy),
1106193323Sed                                 "", LI);
1107193323Sed    SrcField = new LoadInst(SrcField, "sroa.load.elt", LI);
1108193323Sed
1109193323Sed    // If SrcField is a fp or vector of the right size but that isn't an
1110193323Sed    // integer type, bitcast to an integer so we can shift it.
1111193323Sed    if (SrcField->getType() != FieldIntTy)
1112193323Sed      SrcField = new BitCastInst(SrcField, FieldIntTy, "", LI);
1113193323Sed
1114193323Sed    // Zero extend the field to be the same size as the final alloca so that
1115193323Sed    // we can shift and insert it.
1116193323Sed    if (SrcField->getType() != ResultVal->getType())
1117193323Sed      SrcField = new ZExtInst(SrcField, ResultVal->getType(), "", LI);
1118193323Sed
1119193323Sed    // Determine the number of bits to shift SrcField.
1120193323Sed    uint64_t Shift;
1121193323Sed    if (Layout) // Struct case.
1122193323Sed      Shift = Layout->getElementOffsetInBits(i);
1123193323Sed    else  // Array case.
1124193323Sed      Shift = i*ArrayEltBitOffset;
1125193323Sed
1126193323Sed    if (TD->isBigEndian())
1127193323Sed      Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth();
1128193323Sed
1129193323Sed    if (Shift) {
1130198090Srdivacky      Value *ShiftVal = ConstantInt::get(SrcField->getType(), Shift);
1131193323Sed      SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI);
1132193323Sed    }
1133193323Sed
1134193323Sed    ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI);
1135193323Sed  }
1136193323Sed
1137193323Sed  // Handle tail padding by truncating the result
1138193323Sed  if (TD->getTypeSizeInBits(LI->getType()) != AllocaSizeBits)
1139193323Sed    ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI);
1140193323Sed
1141193323Sed  LI->replaceAllUsesWith(ResultVal);
1142193323Sed  LI->eraseFromParent();
1143193323Sed}
1144193323Sed
1145193323Sed
1146193323Sed/// HasPadding - Return true if the specified type has any structure or
1147193323Sed/// alignment padding, false otherwise.
1148193323Sedstatic bool HasPadding(const Type *Ty, const TargetData &TD) {
1149193323Sed  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
1150193323Sed    const StructLayout *SL = TD.getStructLayout(STy);
1151193323Sed    unsigned PrevFieldBitOffset = 0;
1152193323Sed    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
1153193323Sed      unsigned FieldBitOffset = SL->getElementOffsetInBits(i);
1154193323Sed
1155193323Sed      // Padding in sub-elements?
1156193323Sed      if (HasPadding(STy->getElementType(i), TD))
1157193323Sed        return true;
1158193323Sed
1159193323Sed      // Check to see if there is any padding between this element and the
1160193323Sed      // previous one.
1161193323Sed      if (i) {
1162193323Sed        unsigned PrevFieldEnd =
1163193323Sed        PrevFieldBitOffset+TD.getTypeSizeInBits(STy->getElementType(i-1));
1164193323Sed        if (PrevFieldEnd < FieldBitOffset)
1165193323Sed          return true;
1166193323Sed      }
1167193323Sed
1168193323Sed      PrevFieldBitOffset = FieldBitOffset;
1169193323Sed    }
1170193323Sed
1171193323Sed    //  Check for tail padding.
1172193323Sed    if (unsigned EltCount = STy->getNumElements()) {
1173193323Sed      unsigned PrevFieldEnd = PrevFieldBitOffset +
1174193323Sed                   TD.getTypeSizeInBits(STy->getElementType(EltCount-1));
1175193323Sed      if (PrevFieldEnd < SL->getSizeInBits())
1176193323Sed        return true;
1177193323Sed    }
1178193323Sed
1179193323Sed  } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1180193323Sed    return HasPadding(ATy->getElementType(), TD);
1181193323Sed  } else if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1182193323Sed    return HasPadding(VTy->getElementType(), TD);
1183193323Sed  }
1184193323Sed  return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty);
1185193323Sed}
1186193323Sed
1187193323Sed/// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of
1188193323Sed/// an aggregate can be broken down into elements.  Return 0 if not, 3 if safe,
1189193323Sed/// or 1 if safe after canonicalization has been performed.
1190198892Srdivackyint SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
1191193323Sed  // Loop over the use list of the alloca.  We can only transform it if all of
1192193323Sed  // the users are safe to transform.
1193193323Sed  AllocaInfo Info;
1194193323Sed
1195193323Sed  for (Value::use_iterator I = AI->use_begin(), E = AI->use_end();
1196193323Sed       I != E; ++I) {
1197193323Sed    isSafeUseOfAllocation(cast<Instruction>(*I), AI, Info);
1198193323Sed    if (Info.isUnsafe) {
1199198090Srdivacky      DEBUG(errs() << "Cannot transform: " << *AI << "\n  due to user: "
1200198090Srdivacky                   << **I << '\n');
1201193323Sed      return 0;
1202193323Sed    }
1203193323Sed  }
1204193323Sed
1205193323Sed  // Okay, we know all the users are promotable.  If the aggregate is a memcpy
1206193323Sed  // source and destination, we have to be careful.  In particular, the memcpy
1207193323Sed  // could be moving around elements that live in structure padding of the LLVM
1208193323Sed  // types, but may actually be used.  In these cases, we refuse to promote the
1209193323Sed  // struct.
1210193323Sed  if (Info.isMemCpySrc && Info.isMemCpyDst &&
1211193323Sed      HasPadding(AI->getType()->getElementType(), *TD))
1212193323Sed    return 0;
1213193323Sed
1214193323Sed  // If we require cleanup, return 1, otherwise return 3.
1215193323Sed  return Info.needsCleanup ? 1 : 3;
1216193323Sed}
1217193323Sed
1218200581Srdivacky/// CleanupGEP - GEP is used by an Alloca, which can be promoted after the GEP
1219193323Sed/// is canonicalized here.
1220193323Sedvoid SROA::CleanupGEP(GetElementPtrInst *GEPI) {
1221193323Sed  gep_type_iterator I = gep_type_begin(GEPI);
1222193323Sed  ++I;
1223193323Sed
1224193323Sed  const ArrayType *AT = dyn_cast<ArrayType>(*I);
1225193323Sed  if (!AT)
1226193323Sed    return;
1227193323Sed
1228193323Sed  uint64_t NumElements = AT->getNumElements();
1229193323Sed
1230193323Sed  if (isa<ConstantInt>(I.getOperand()))
1231193323Sed    return;
1232193323Sed
1233193323Sed  if (NumElements == 1) {
1234198090Srdivacky    GEPI->setOperand(2,
1235198090Srdivacky                  Constant::getNullValue(Type::getInt32Ty(GEPI->getContext())));
1236193323Sed    return;
1237193323Sed  }
1238193323Sed
1239193323Sed  assert(NumElements == 2 && "Unhandled case!");
1240193323Sed  // All users of the GEP must be loads.  At each use of the GEP, insert
1241193323Sed  // two loads of the appropriate indexed GEP and select between them.
1242198090Srdivacky  Value *IsOne = new ICmpInst(GEPI, ICmpInst::ICMP_NE, I.getOperand(),
1243198090Srdivacky                              Constant::getNullValue(I.getOperand()->getType()),
1244198090Srdivacky                              "isone");
1245193323Sed  // Insert the new GEP instructions, which are properly indexed.
1246193323Sed  SmallVector<Value*, 8> Indices(GEPI->op_begin()+1, GEPI->op_end());
1247198090Srdivacky  Indices[1] = Constant::getNullValue(Type::getInt32Ty(GEPI->getContext()));
1248193323Sed  Value *ZeroIdx = GetElementPtrInst::Create(GEPI->getOperand(0),
1249193323Sed                                             Indices.begin(),
1250193323Sed                                             Indices.end(),
1251193323Sed                                             GEPI->getName()+".0", GEPI);
1252198090Srdivacky  Indices[1] = ConstantInt::get(Type::getInt32Ty(GEPI->getContext()), 1);
1253193323Sed  Value *OneIdx = GetElementPtrInst::Create(GEPI->getOperand(0),
1254193323Sed                                            Indices.begin(),
1255193323Sed                                            Indices.end(),
1256193323Sed                                            GEPI->getName()+".1", GEPI);
1257193323Sed  // Replace all loads of the variable index GEP with loads from both
1258193323Sed  // indexes and a select.
1259193323Sed  while (!GEPI->use_empty()) {
1260193323Sed    LoadInst *LI = cast<LoadInst>(GEPI->use_back());
1261193323Sed    Value *Zero = new LoadInst(ZeroIdx, LI->getName()+".0", LI);
1262193323Sed    Value *One  = new LoadInst(OneIdx , LI->getName()+".1", LI);
1263193323Sed    Value *R = SelectInst::Create(IsOne, One, Zero, LI->getName(), LI);
1264193323Sed    LI->replaceAllUsesWith(R);
1265193323Sed    LI->eraseFromParent();
1266193323Sed  }
1267193323Sed  GEPI->eraseFromParent();
1268193323Sed}
1269193323Sed
1270193323Sed
1271193323Sed/// CleanupAllocaUsers - If SROA reported that it can promote the specified
1272193323Sed/// allocation, but only if cleaned up, perform the cleanups required.
1273198892Srdivackyvoid SROA::CleanupAllocaUsers(AllocaInst *AI) {
1274193323Sed  // At this point, we know that the end result will be SROA'd and promoted, so
1275193323Sed  // we can insert ugly code if required so long as sroa+mem2reg will clean it
1276193323Sed  // up.
1277193323Sed  for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
1278193323Sed       UI != E; ) {
1279193323Sed    User *U = *UI++;
1280193323Sed    if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(U))
1281193323Sed      CleanupGEP(GEPI);
1282193630Sed    else {
1283193630Sed      Instruction *I = cast<Instruction>(U);
1284193323Sed      SmallVector<DbgInfoIntrinsic *, 2> DbgInUses;
1285193323Sed      if (!isa<StoreInst>(I) && OnlyUsedByDbgInfoIntrinsics(I, &DbgInUses)) {
1286193323Sed        // Safe to remove debug info uses.
1287193323Sed        while (!DbgInUses.empty()) {
1288193323Sed          DbgInfoIntrinsic *DI = DbgInUses.back(); DbgInUses.pop_back();
1289193323Sed          DI->eraseFromParent();
1290193323Sed        }
1291193323Sed        I->eraseFromParent();
1292193323Sed      }
1293193323Sed    }
1294193323Sed  }
1295193323Sed}
1296193323Sed
1297193323Sed/// MergeInType - Add the 'In' type to the accumulated type (Accum) so far at
1298193323Sed/// the offset specified by Offset (which is specified in bytes).
1299193323Sed///
1300193323Sed/// There are two cases we handle here:
1301193323Sed///   1) A union of vector types of the same size and potentially its elements.
1302193323Sed///      Here we turn element accesses into insert/extract element operations.
1303193323Sed///      This promotes a <4 x float> with a store of float to the third element
1304193323Sed///      into a <4 x float> that uses insert element.
1305193323Sed///   2) A fully general blob of memory, which we turn into some (potentially
1306193323Sed///      large) integer type with extract and insert operations where the loads
1307193323Sed///      and stores would mutate the memory.
1308193323Sedstatic void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy,
1309195340Sed                        unsigned AllocaSize, const TargetData &TD,
1310198090Srdivacky                        LLVMContext &Context) {
1311193323Sed  // If this could be contributing to a vector, analyze it.
1312198090Srdivacky  if (VecTy != Type::getVoidTy(Context)) { // either null or a vector type.
1313193323Sed
1314193323Sed    // If the In type is a vector that is the same size as the alloca, see if it
1315193323Sed    // matches the existing VecTy.
1316193323Sed    if (const VectorType *VInTy = dyn_cast<VectorType>(In)) {
1317193323Sed      if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) {
1318193323Sed        // If we're storing/loading a vector of the right size, allow it as a
1319193323Sed        // vector.  If this the first vector we see, remember the type so that
1320193323Sed        // we know the element size.
1321193323Sed        if (VecTy == 0)
1322193323Sed          VecTy = VInTy;
1323193323Sed        return;
1324193323Sed      }
1325198090Srdivacky    } else if (In->isFloatTy() || In->isDoubleTy() ||
1326193323Sed               (isa<IntegerType>(In) && In->getPrimitiveSizeInBits() >= 8 &&
1327193323Sed                isPowerOf2_32(In->getPrimitiveSizeInBits()))) {
1328193323Sed      // If we're accessing something that could be an element of a vector, see
1329193323Sed      // if the implied vector agrees with what we already have and if Offset is
1330193323Sed      // compatible with it.
1331193323Sed      unsigned EltSize = In->getPrimitiveSizeInBits()/8;
1332193323Sed      if (Offset % EltSize == 0 &&
1333193323Sed          AllocaSize % EltSize == 0 &&
1334193323Sed          (VecTy == 0 ||
1335193323Sed           cast<VectorType>(VecTy)->getElementType()
1336193323Sed                 ->getPrimitiveSizeInBits()/8 == EltSize)) {
1337193323Sed        if (VecTy == 0)
1338198090Srdivacky          VecTy = VectorType::get(In, AllocaSize/EltSize);
1339193323Sed        return;
1340193323Sed      }
1341193323Sed    }
1342193323Sed  }
1343193323Sed
1344193323Sed  // Otherwise, we have a case that we can't handle with an optimized vector
1345193323Sed  // form.  We can still turn this into a large integer.
1346198090Srdivacky  VecTy = Type::getVoidTy(Context);
1347193323Sed}
1348193323Sed
1349193323Sed/// CanConvertToScalar - V is a pointer.  If we can convert the pointee and all
1350200581Srdivacky/// its accesses to a single vector type, return true and set VecTy to
1351193323Sed/// the new type.  If we could convert the alloca into a single promotable
1352193323Sed/// integer, return true but set VecTy to VoidTy.  Further, if the use is not a
1353193323Sed/// completely trivial use that mem2reg could promote, set IsNotTrivial.  Offset
1354193323Sed/// is the current offset from the base of the alloca being analyzed.
1355193323Sed///
1356193323Sed/// If we see at least one access to the value that is as a vector type, set the
1357193323Sed/// SawVec flag.
1358193323Sedbool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy,
1359193323Sed                              bool &SawVec, uint64_t Offset,
1360193323Sed                              unsigned AllocaSize) {
1361193323Sed  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
1362193323Sed    Instruction *User = cast<Instruction>(*UI);
1363193323Sed
1364193323Sed    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
1365193323Sed      // Don't break volatile loads.
1366193323Sed      if (LI->isVolatile())
1367193323Sed        return false;
1368198090Srdivacky      MergeInType(LI->getType(), Offset, VecTy,
1369198090Srdivacky                  AllocaSize, *TD, V->getContext());
1370193323Sed      SawVec |= isa<VectorType>(LI->getType());
1371193323Sed      continue;
1372193323Sed    }
1373193323Sed
1374193323Sed    if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
1375193323Sed      // Storing the pointer, not into the value?
1376193323Sed      if (SI->getOperand(0) == V || SI->isVolatile()) return 0;
1377195340Sed      MergeInType(SI->getOperand(0)->getType(), Offset,
1378198090Srdivacky                  VecTy, AllocaSize, *TD, V->getContext());
1379193323Sed      SawVec |= isa<VectorType>(SI->getOperand(0)->getType());
1380193323Sed      continue;
1381193323Sed    }
1382193323Sed
1383193323Sed    if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
1384193323Sed      if (!CanConvertToScalar(BCI, IsNotTrivial, VecTy, SawVec, Offset,
1385193323Sed                              AllocaSize))
1386193323Sed        return false;
1387193323Sed      IsNotTrivial = true;
1388193323Sed      continue;
1389193323Sed    }
1390193323Sed
1391193323Sed    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
1392193323Sed      // If this is a GEP with a variable indices, we can't handle it.
1393193323Sed      if (!GEP->hasAllConstantIndices())
1394193323Sed        return false;
1395193323Sed
1396193323Sed      // Compute the offset that this GEP adds to the pointer.
1397193323Sed      SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
1398193323Sed      uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(),
1399193323Sed                                                &Indices[0], Indices.size());
1400193323Sed      // See if all uses can be converted.
1401193323Sed      if (!CanConvertToScalar(GEP, IsNotTrivial, VecTy, SawVec,Offset+GEPOffset,
1402193323Sed                              AllocaSize))
1403193323Sed        return false;
1404193323Sed      IsNotTrivial = true;
1405193323Sed      continue;
1406193323Sed    }
1407193323Sed
1408193323Sed    // If this is a constant sized memset of a constant value (e.g. 0) we can
1409193323Sed    // handle it.
1410193323Sed    if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
1411193323Sed      // Store of constant value and constant size.
1412193323Sed      if (isa<ConstantInt>(MSI->getValue()) &&
1413193323Sed          isa<ConstantInt>(MSI->getLength())) {
1414193323Sed        IsNotTrivial = true;
1415193323Sed        continue;
1416193323Sed      }
1417193323Sed    }
1418193323Sed
1419193323Sed    // If this is a memcpy or memmove into or out of the whole allocation, we
1420193323Sed    // can handle it like a load or store of the scalar type.
1421193323Sed    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
1422193323Sed      if (ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength()))
1423193323Sed        if (Len->getZExtValue() == AllocaSize && Offset == 0) {
1424193323Sed          IsNotTrivial = true;
1425193323Sed          continue;
1426193323Sed        }
1427193323Sed    }
1428193323Sed
1429193323Sed    // Ignore dbg intrinsic.
1430193323Sed    if (isa<DbgInfoIntrinsic>(User))
1431193323Sed      continue;
1432193323Sed
1433193323Sed    // Otherwise, we cannot handle this!
1434193323Sed    return false;
1435193323Sed  }
1436193323Sed
1437193323Sed  return true;
1438193323Sed}
1439193323Sed
1440193323Sed/// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca
1441193323Sed/// directly.  This happens when we are converting an "integer union" to a
1442193323Sed/// single integer scalar, or when we are converting a "vector union" to a
1443193323Sed/// vector with insert/extractelement instructions.
1444193323Sed///
1445193323Sed/// Offset is an offset from the original alloca, in bits that need to be
1446193323Sed/// shifted to the right.  By the end of this, there should be no uses of Ptr.
1447193323Sedvoid SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) {
1448193323Sed  while (!Ptr->use_empty()) {
1449193323Sed    Instruction *User = cast<Instruction>(Ptr->use_back());
1450193323Sed
1451193323Sed    if (BitCastInst *CI = dyn_cast<BitCastInst>(User)) {
1452193323Sed      ConvertUsesToScalar(CI, NewAI, Offset);
1453193323Sed      CI->eraseFromParent();
1454193323Sed      continue;
1455193323Sed    }
1456193323Sed
1457193323Sed    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
1458193323Sed      // Compute the offset that this GEP adds to the pointer.
1459193323Sed      SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
1460193323Sed      uint64_t GEPOffset = TD->getIndexedOffset(GEP->getOperand(0)->getType(),
1461193323Sed                                                &Indices[0], Indices.size());
1462193323Sed      ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8);
1463193323Sed      GEP->eraseFromParent();
1464193323Sed      continue;
1465193323Sed    }
1466193323Sed
1467193323Sed    IRBuilder<> Builder(User->getParent(), User);
1468193323Sed
1469193323Sed    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
1470193323Sed      // The load is a bit extract from NewAI shifted right by Offset bits.
1471193323Sed      Value *LoadedVal = Builder.CreateLoad(NewAI, "tmp");
1472193323Sed      Value *NewLoadVal
1473193323Sed        = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, Builder);
1474193323Sed      LI->replaceAllUsesWith(NewLoadVal);
1475193323Sed      LI->eraseFromParent();
1476193323Sed      continue;
1477193323Sed    }
1478193323Sed
1479193323Sed    if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
1480193323Sed      assert(SI->getOperand(0) != Ptr && "Consistency error!");
1481198090Srdivacky      // FIXME: Remove once builder has Twine API.
1482200581Srdivacky      Value *Old = Builder.CreateLoad(NewAI,
1483200581Srdivacky                                      (NewAI->getName()+".in").str().c_str());
1484193323Sed      Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset,
1485193323Sed                                             Builder);
1486193323Sed      Builder.CreateStore(New, NewAI);
1487193323Sed      SI->eraseFromParent();
1488193323Sed      continue;
1489193323Sed    }
1490193323Sed
1491193323Sed    // If this is a constant sized memset of a constant value (e.g. 0) we can
1492193323Sed    // transform it into a store of the expanded constant value.
1493193323Sed    if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
1494193323Sed      assert(MSI->getRawDest() == Ptr && "Consistency error!");
1495193323Sed      unsigned NumBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
1496193323Sed      if (NumBytes != 0) {
1497193323Sed        unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue();
1498193323Sed
1499193323Sed        // Compute the value replicated the right number of times.
1500193323Sed        APInt APVal(NumBytes*8, Val);
1501193323Sed
1502193323Sed        // Splat the value if non-zero.
1503193323Sed        if (Val)
1504193323Sed          for (unsigned i = 1; i != NumBytes; ++i)
1505193323Sed            APVal |= APVal << 8;
1506193323Sed
1507198090Srdivacky        // FIXME: Remove once builder has Twine API.
1508200581Srdivacky        Value *Old = Builder.CreateLoad(NewAI,
1509200581Srdivacky                                        (NewAI->getName()+".in").str().c_str());
1510198090Srdivacky        Value *New = ConvertScalar_InsertValue(
1511198090Srdivacky                                    ConstantInt::get(User->getContext(), APVal),
1512195340Sed                                               Old, Offset, Builder);
1513193323Sed        Builder.CreateStore(New, NewAI);
1514193323Sed      }
1515193323Sed      MSI->eraseFromParent();
1516193323Sed      continue;
1517193323Sed    }
1518193323Sed
1519193323Sed    // If this is a memcpy or memmove into or out of the whole allocation, we
1520193323Sed    // can handle it like a load or store of the scalar type.
1521193323Sed    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
1522193323Sed      assert(Offset == 0 && "must be store to start of alloca");
1523193323Sed
1524193323Sed      // If the source and destination are both to the same alloca, then this is
1525193323Sed      // a noop copy-to-self, just delete it.  Otherwise, emit a load and store
1526193323Sed      // as appropriate.
1527193323Sed      AllocaInst *OrigAI = cast<AllocaInst>(Ptr->getUnderlyingObject());
1528193323Sed
1529193323Sed      if (MTI->getSource()->getUnderlyingObject() != OrigAI) {
1530193323Sed        // Dest must be OrigAI, change this to be a load from the original
1531193323Sed        // pointer (bitcasted), then a store to our new alloca.
1532193323Sed        assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?");
1533193323Sed        Value *SrcPtr = MTI->getSource();
1534193323Sed        SrcPtr = Builder.CreateBitCast(SrcPtr, NewAI->getType());
1535193323Sed
1536193323Sed        LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval");
1537193323Sed        SrcVal->setAlignment(MTI->getAlignment());
1538193323Sed        Builder.CreateStore(SrcVal, NewAI);
1539193323Sed      } else if (MTI->getDest()->getUnderlyingObject() != OrigAI) {
1540193323Sed        // Src must be OrigAI, change this to be a load from NewAI then a store
1541193323Sed        // through the original dest pointer (bitcasted).
1542193323Sed        assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?");
1543193323Sed        LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval");
1544193323Sed
1545193323Sed        Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), NewAI->getType());
1546193323Sed        StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr);
1547193323Sed        NewStore->setAlignment(MTI->getAlignment());
1548193323Sed      } else {
1549193323Sed        // Noop transfer. Src == Dst
1550193323Sed      }
1551193323Sed
1552193323Sed
1553193323Sed      MTI->eraseFromParent();
1554193323Sed      continue;
1555193323Sed    }
1556193323Sed
1557193323Sed    // If user is a dbg info intrinsic then it is safe to remove it.
1558193323Sed    if (isa<DbgInfoIntrinsic>(User)) {
1559193323Sed      User->eraseFromParent();
1560193323Sed      continue;
1561193323Sed    }
1562193323Sed
1563198090Srdivacky    llvm_unreachable("Unsupported operation!");
1564193323Sed  }
1565193323Sed}
1566193323Sed
1567193323Sed/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer
1568193323Sed/// or vector value FromVal, extracting the bits from the offset specified by
1569193323Sed/// Offset.  This returns the value, which is of type ToType.
1570193323Sed///
1571193323Sed/// This happens when we are converting an "integer union" to a single
1572193323Sed/// integer scalar, or when we are converting a "vector union" to a vector with
1573193323Sed/// insert/extractelement instructions.
1574193323Sed///
1575193323Sed/// Offset is an offset from the original alloca, in bits that need to be
1576193323Sed/// shifted to the right.
1577193323SedValue *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
1578193323Sed                                        uint64_t Offset, IRBuilder<> &Builder) {
1579193323Sed  // If the load is of the whole new alloca, no conversion is needed.
1580193323Sed  if (FromVal->getType() == ToType && Offset == 0)
1581193323Sed    return FromVal;
1582193323Sed
1583193323Sed  // If the result alloca is a vector type, this is either an element
1584193323Sed  // access or a bitcast to another vector type of the same size.
1585193323Sed  if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) {
1586193323Sed    if (isa<VectorType>(ToType))
1587193323Sed      return Builder.CreateBitCast(FromVal, ToType, "tmp");
1588193323Sed
1589193323Sed    // Otherwise it must be an element access.
1590193323Sed    unsigned Elt = 0;
1591193323Sed    if (Offset) {
1592193323Sed      unsigned EltSize = TD->getTypeAllocSizeInBits(VTy->getElementType());
1593193323Sed      Elt = Offset/EltSize;
1594193323Sed      assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
1595193323Sed    }
1596193323Sed    // Return the element extracted out of it.
1597198090Srdivacky    Value *V = Builder.CreateExtractElement(FromVal, ConstantInt::get(
1598198090Srdivacky                    Type::getInt32Ty(FromVal->getContext()), Elt), "tmp");
1599193323Sed    if (V->getType() != ToType)
1600193323Sed      V = Builder.CreateBitCast(V, ToType, "tmp");
1601193323Sed    return V;
1602193323Sed  }
1603193323Sed
1604193323Sed  // If ToType is a first class aggregate, extract out each of the pieces and
1605193323Sed  // use insertvalue's to form the FCA.
1606193323Sed  if (const StructType *ST = dyn_cast<StructType>(ToType)) {
1607193323Sed    const StructLayout &Layout = *TD->getStructLayout(ST);
1608198090Srdivacky    Value *Res = UndefValue::get(ST);
1609193323Sed    for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
1610193323Sed      Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i),
1611193323Sed                                        Offset+Layout.getElementOffsetInBits(i),
1612193323Sed                                              Builder);
1613193323Sed      Res = Builder.CreateInsertValue(Res, Elt, i, "tmp");
1614193323Sed    }
1615193323Sed    return Res;
1616193323Sed  }
1617193323Sed
1618193323Sed  if (const ArrayType *AT = dyn_cast<ArrayType>(ToType)) {
1619193323Sed    uint64_t EltSize = TD->getTypeAllocSizeInBits(AT->getElementType());
1620198090Srdivacky    Value *Res = UndefValue::get(AT);
1621193323Sed    for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
1622193323Sed      Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(),
1623193323Sed                                              Offset+i*EltSize, Builder);
1624193323Sed      Res = Builder.CreateInsertValue(Res, Elt, i, "tmp");
1625193323Sed    }
1626193323Sed    return Res;
1627193323Sed  }
1628193323Sed
1629193323Sed  // Otherwise, this must be a union that was converted to an integer value.
1630193323Sed  const IntegerType *NTy = cast<IntegerType>(FromVal->getType());
1631193323Sed
1632193323Sed  // If this is a big-endian system and the load is narrower than the
1633193323Sed  // full alloca type, we need to do a shift to get the right bits.
1634193323Sed  int ShAmt = 0;
1635193323Sed  if (TD->isBigEndian()) {
1636193323Sed    // On big-endian machines, the lowest bit is stored at the bit offset
1637193323Sed    // from the pointer given by getTypeStoreSizeInBits.  This matters for
1638193323Sed    // integers with a bitwidth that is not a multiple of 8.
1639193323Sed    ShAmt = TD->getTypeStoreSizeInBits(NTy) -
1640193323Sed            TD->getTypeStoreSizeInBits(ToType) - Offset;
1641193323Sed  } else {
1642193323Sed    ShAmt = Offset;
1643193323Sed  }
1644193323Sed
1645193323Sed  // Note: we support negative bitwidths (with shl) which are not defined.
1646193323Sed  // We do this to support (f.e.) loads off the end of a structure where
1647193323Sed  // only some bits are used.
1648193323Sed  if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth())
1649195340Sed    FromVal = Builder.CreateLShr(FromVal,
1650198090Srdivacky                                 ConstantInt::get(FromVal->getType(),
1651193323Sed                                                           ShAmt), "tmp");
1652193323Sed  else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth())
1653195340Sed    FromVal = Builder.CreateShl(FromVal,
1654198090Srdivacky                                ConstantInt::get(FromVal->getType(),
1655193323Sed                                                          -ShAmt), "tmp");
1656193323Sed
1657193323Sed  // Finally, unconditionally truncate the integer to the right width.
1658193323Sed  unsigned LIBitWidth = TD->getTypeSizeInBits(ToType);
1659193323Sed  if (LIBitWidth < NTy->getBitWidth())
1660195340Sed    FromVal =
1661198090Srdivacky      Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(),
1662198090Srdivacky                                                    LIBitWidth), "tmp");
1663193323Sed  else if (LIBitWidth > NTy->getBitWidth())
1664195340Sed    FromVal =
1665198090Srdivacky       Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(),
1666198090Srdivacky                                                    LIBitWidth), "tmp");
1667193323Sed
1668193323Sed  // If the result is an integer, this is a trunc or bitcast.
1669193323Sed  if (isa<IntegerType>(ToType)) {
1670193323Sed    // Should be done.
1671193323Sed  } else if (ToType->isFloatingPoint() || isa<VectorType>(ToType)) {
1672193323Sed    // Just do a bitcast, we know the sizes match up.
1673193323Sed    FromVal = Builder.CreateBitCast(FromVal, ToType, "tmp");
1674193323Sed  } else {
1675193323Sed    // Otherwise must be a pointer.
1676193323Sed    FromVal = Builder.CreateIntToPtr(FromVal, ToType, "tmp");
1677193323Sed  }
1678193323Sed  assert(FromVal->getType() == ToType && "Didn't convert right?");
1679193323Sed  return FromVal;
1680193323Sed}
1681193323Sed
1682193323Sed/// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer
1683193323Sed/// or vector value "Old" at the offset specified by Offset.
1684193323Sed///
1685193323Sed/// This happens when we are converting an "integer union" to a
1686193323Sed/// single integer scalar, or when we are converting a "vector union" to a
1687193323Sed/// vector with insert/extractelement instructions.
1688193323Sed///
1689193323Sed/// Offset is an offset from the original alloca, in bits that need to be
1690193323Sed/// shifted to the right.
1691193323SedValue *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old,
1692193323Sed                                       uint64_t Offset, IRBuilder<> &Builder) {
1693193323Sed
1694193323Sed  // Convert the stored type to the actual type, shift it left to insert
1695193323Sed  // then 'or' into place.
1696193323Sed  const Type *AllocaType = Old->getType();
1697198090Srdivacky  LLVMContext &Context = Old->getContext();
1698193323Sed
1699193323Sed  if (const VectorType *VTy = dyn_cast<VectorType>(AllocaType)) {
1700193323Sed    uint64_t VecSize = TD->getTypeAllocSizeInBits(VTy);
1701193323Sed    uint64_t ValSize = TD->getTypeAllocSizeInBits(SV->getType());
1702193323Sed
1703193323Sed    // Changing the whole vector with memset or with an access of a different
1704193323Sed    // vector type?
1705193323Sed    if (ValSize == VecSize)
1706193323Sed      return Builder.CreateBitCast(SV, AllocaType, "tmp");
1707193323Sed
1708193323Sed    uint64_t EltSize = TD->getTypeAllocSizeInBits(VTy->getElementType());
1709193323Sed
1710193323Sed    // Must be an element insertion.
1711193323Sed    unsigned Elt = Offset/EltSize;
1712193323Sed
1713193323Sed    if (SV->getType() != VTy->getElementType())
1714193323Sed      SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp");
1715193323Sed
1716193323Sed    SV = Builder.CreateInsertElement(Old, SV,
1717198090Srdivacky                     ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt),
1718193323Sed                                     "tmp");
1719193323Sed    return SV;
1720193323Sed  }
1721193323Sed
1722193323Sed  // If SV is a first-class aggregate value, insert each value recursively.
1723193323Sed  if (const StructType *ST = dyn_cast<StructType>(SV->getType())) {
1724193323Sed    const StructLayout &Layout = *TD->getStructLayout(ST);
1725193323Sed    for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
1726193323Sed      Value *Elt = Builder.CreateExtractValue(SV, i, "tmp");
1727193323Sed      Old = ConvertScalar_InsertValue(Elt, Old,
1728193323Sed                                      Offset+Layout.getElementOffsetInBits(i),
1729193323Sed                                      Builder);
1730193323Sed    }
1731193323Sed    return Old;
1732193323Sed  }
1733193323Sed
1734193323Sed  if (const ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) {
1735193323Sed    uint64_t EltSize = TD->getTypeAllocSizeInBits(AT->getElementType());
1736193323Sed    for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
1737193323Sed      Value *Elt = Builder.CreateExtractValue(SV, i, "tmp");
1738193323Sed      Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, Builder);
1739193323Sed    }
1740193323Sed    return Old;
1741193323Sed  }
1742193323Sed
1743193323Sed  // If SV is a float, convert it to the appropriate integer type.
1744193323Sed  // If it is a pointer, do the same.
1745193323Sed  unsigned SrcWidth = TD->getTypeSizeInBits(SV->getType());
1746193323Sed  unsigned DestWidth = TD->getTypeSizeInBits(AllocaType);
1747193323Sed  unsigned SrcStoreWidth = TD->getTypeStoreSizeInBits(SV->getType());
1748193323Sed  unsigned DestStoreWidth = TD->getTypeStoreSizeInBits(AllocaType);
1749193323Sed  if (SV->getType()->isFloatingPoint() || isa<VectorType>(SV->getType()))
1750198090Srdivacky    SV = Builder.CreateBitCast(SV,
1751198090Srdivacky                            IntegerType::get(SV->getContext(),SrcWidth), "tmp");
1752193323Sed  else if (isa<PointerType>(SV->getType()))
1753198090Srdivacky    SV = Builder.CreatePtrToInt(SV, TD->getIntPtrType(SV->getContext()), "tmp");
1754193323Sed
1755193323Sed  // Zero extend or truncate the value if needed.
1756193323Sed  if (SV->getType() != AllocaType) {
1757193323Sed    if (SV->getType()->getPrimitiveSizeInBits() <
1758193323Sed             AllocaType->getPrimitiveSizeInBits())
1759193323Sed      SV = Builder.CreateZExt(SV, AllocaType, "tmp");
1760193323Sed    else {
1761193323Sed      // Truncation may be needed if storing more than the alloca can hold
1762193323Sed      // (undefined behavior).
1763193323Sed      SV = Builder.CreateTrunc(SV, AllocaType, "tmp");
1764193323Sed      SrcWidth = DestWidth;
1765193323Sed      SrcStoreWidth = DestStoreWidth;
1766193323Sed    }
1767193323Sed  }
1768193323Sed
1769193323Sed  // If this is a big-endian system and the store is narrower than the
1770193323Sed  // full alloca type, we need to do a shift to get the right bits.
1771193323Sed  int ShAmt = 0;
1772193323Sed  if (TD->isBigEndian()) {
1773193323Sed    // On big-endian machines, the lowest bit is stored at the bit offset
1774193323Sed    // from the pointer given by getTypeStoreSizeInBits.  This matters for
1775193323Sed    // integers with a bitwidth that is not a multiple of 8.
1776193323Sed    ShAmt = DestStoreWidth - SrcStoreWidth - Offset;
1777193323Sed  } else {
1778193323Sed    ShAmt = Offset;
1779193323Sed  }
1780193323Sed
1781193323Sed  // Note: we support negative bitwidths (with shr) which are not defined.
1782193323Sed  // We do this to support (f.e.) stores off the end of a structure where
1783193323Sed  // only some bits in the structure are set.
1784193323Sed  APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth));
1785193323Sed  if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) {
1786198090Srdivacky    SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(),
1787195340Sed                           ShAmt), "tmp");
1788193323Sed    Mask <<= ShAmt;
1789193323Sed  } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) {
1790198090Srdivacky    SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(),
1791195340Sed                            -ShAmt), "tmp");
1792193323Sed    Mask = Mask.lshr(-ShAmt);
1793193323Sed  }
1794193323Sed
1795193323Sed  // Mask out the bits we are about to insert from the old value, and or
1796193323Sed  // in the new bits.
1797193323Sed  if (SrcWidth != DestWidth) {
1798193323Sed    assert(DestWidth > SrcWidth);
1799198090Srdivacky    Old = Builder.CreateAnd(Old, ConstantInt::get(Context, ~Mask), "mask");
1800193323Sed    SV = Builder.CreateOr(Old, SV, "ins");
1801193323Sed  }
1802193323Sed  return SV;
1803193323Sed}
1804193323Sed
1805193323Sed
1806193323Sed
1807193323Sed/// PointsToConstantGlobal - Return true if V (possibly indirectly) points to
1808193323Sed/// some part of a constant global variable.  This intentionally only accepts
1809193323Sed/// constant expressions because we don't can't rewrite arbitrary instructions.
1810193323Sedstatic bool PointsToConstantGlobal(Value *V) {
1811193323Sed  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
1812193323Sed    return GV->isConstant();
1813193323Sed  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
1814193323Sed    if (CE->getOpcode() == Instruction::BitCast ||
1815193323Sed        CE->getOpcode() == Instruction::GetElementPtr)
1816193323Sed      return PointsToConstantGlobal(CE->getOperand(0));
1817193323Sed  return false;
1818193323Sed}
1819193323Sed
1820193323Sed/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
1821193323Sed/// pointer to an alloca.  Ignore any reads of the pointer, return false if we
1822193323Sed/// see any stores or other unknown uses.  If we see pointer arithmetic, keep
1823193323Sed/// track of whether it moves the pointer (with isOffset) but otherwise traverse
1824193323Sed/// the uses.  If we see a memcpy/memmove that targets an unoffseted pointer to
1825193323Sed/// the alloca, and if the source pointer is a pointer to a constant  global, we
1826193323Sed/// can optimize this.
1827193323Sedstatic bool isOnlyCopiedFromConstantGlobal(Value *V, Instruction *&TheCopy,
1828193323Sed                                           bool isOffset) {
1829193323Sed  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
1830193323Sed    if (LoadInst *LI = dyn_cast<LoadInst>(*UI))
1831193323Sed      // Ignore non-volatile loads, they are always ok.
1832193323Sed      if (!LI->isVolatile())
1833193323Sed        continue;
1834193323Sed
1835193323Sed    if (BitCastInst *BCI = dyn_cast<BitCastInst>(*UI)) {
1836193323Sed      // If uses of the bitcast are ok, we are ok.
1837193323Sed      if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset))
1838193323Sed        return false;
1839193323Sed      continue;
1840193323Sed    }
1841193323Sed    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(*UI)) {
1842193323Sed      // If the GEP has all zero indices, it doesn't offset the pointer.  If it
1843193323Sed      // doesn't, it does.
1844193323Sed      if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy,
1845193323Sed                                         isOffset || !GEP->hasAllZeroIndices()))
1846193323Sed        return false;
1847193323Sed      continue;
1848193323Sed    }
1849193323Sed
1850193323Sed    // If this is isn't our memcpy/memmove, reject it as something we can't
1851193323Sed    // handle.
1852193323Sed    if (!isa<MemTransferInst>(*UI))
1853193323Sed      return false;
1854193323Sed
1855193323Sed    // If we already have seen a copy, reject the second one.
1856193323Sed    if (TheCopy) return false;
1857193323Sed
1858193323Sed    // If the pointer has been offset from the start of the alloca, we can't
1859193323Sed    // safely handle this.
1860193323Sed    if (isOffset) return false;
1861193323Sed
1862193323Sed    // If the memintrinsic isn't using the alloca as the dest, reject it.
1863193323Sed    if (UI.getOperandNo() != 1) return false;
1864193323Sed
1865193323Sed    MemIntrinsic *MI = cast<MemIntrinsic>(*UI);
1866193323Sed
1867193323Sed    // If the source of the memcpy/move is not a constant global, reject it.
1868193323Sed    if (!PointsToConstantGlobal(MI->getOperand(2)))
1869193323Sed      return false;
1870193323Sed
1871193323Sed    // Otherwise, the transform is safe.  Remember the copy instruction.
1872193323Sed    TheCopy = MI;
1873193323Sed  }
1874193323Sed  return true;
1875193323Sed}
1876193323Sed
1877193323Sed/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
1878193323Sed/// modified by a copy from a constant global.  If we can prove this, we can
1879193323Sed/// replace any uses of the alloca with uses of the global directly.
1880198892SrdivackyInstruction *SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI) {
1881193323Sed  Instruction *TheCopy = 0;
1882193323Sed  if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false))
1883193323Sed    return TheCopy;
1884193323Sed  return 0;
1885193323Sed}
1886