ScalarReplAggregates.cpp revision 204642
1193323Sed//===- ScalarReplAggregates.cpp - Scalar Replacement of Aggregates --------===//
2193323Sed//
3193323Sed//                     The LLVM Compiler Infrastructure
4193323Sed//
5193323Sed// This file is distributed under the University of Illinois Open Source
6193323Sed// License. See LICENSE.TXT for details.
7193323Sed//
8193323Sed//===----------------------------------------------------------------------===//
9193323Sed//
10193323Sed// This transformation implements the well known scalar replacement of
11193323Sed// aggregates transformation.  This xform breaks up alloca instructions of
12193323Sed// aggregate type (structure or array) into individual alloca instructions for
13193323Sed// each member (if possible).  Then, if possible, it transforms the individual
14193323Sed// alloca instructions into nice clean scalar SSA form.
15193323Sed//
16193323Sed// This combines a simple SRoA algorithm with the Mem2Reg algorithm because
17193323Sed// often interact, especially for C++ programs.  As such, iterating between
18193323Sed// SRoA, then Mem2Reg until we run out of things to promote works well.
19193323Sed//
20193323Sed//===----------------------------------------------------------------------===//
21193323Sed
22193323Sed#define DEBUG_TYPE "scalarrepl"
23193323Sed#include "llvm/Transforms/Scalar.h"
24193323Sed#include "llvm/Constants.h"
25193323Sed#include "llvm/DerivedTypes.h"
26193323Sed#include "llvm/Function.h"
27193323Sed#include "llvm/GlobalVariable.h"
28193323Sed#include "llvm/Instructions.h"
29193323Sed#include "llvm/IntrinsicInst.h"
30195340Sed#include "llvm/LLVMContext.h"
31193323Sed#include "llvm/Pass.h"
32193323Sed#include "llvm/Analysis/Dominators.h"
33193323Sed#include "llvm/Target/TargetData.h"
34193323Sed#include "llvm/Transforms/Utils/PromoteMemToReg.h"
35193323Sed#include "llvm/Transforms/Utils/Local.h"
36193323Sed#include "llvm/Support/Debug.h"
37198090Srdivacky#include "llvm/Support/ErrorHandling.h"
38193323Sed#include "llvm/Support/GetElementPtrTypeIterator.h"
39193323Sed#include "llvm/Support/IRBuilder.h"
40193323Sed#include "llvm/Support/MathExtras.h"
41198090Srdivacky#include "llvm/Support/raw_ostream.h"
42193323Sed#include "llvm/ADT/SmallVector.h"
43193323Sed#include "llvm/ADT/Statistic.h"
44193323Sedusing namespace llvm;
45193323Sed
46193323SedSTATISTIC(NumReplaced,  "Number of allocas broken up");
47193323SedSTATISTIC(NumPromoted,  "Number of allocas promoted");
48193323SedSTATISTIC(NumConverted, "Number of aggregates converted to scalar");
49193323SedSTATISTIC(NumGlobals,   "Number of allocas copied from constant global");
50193323Sed
51193323Sednamespace {
52198090Srdivacky  struct SROA : public FunctionPass {
53193323Sed    static char ID; // Pass identification, replacement for typeid
54193323Sed    explicit SROA(signed T = -1) : FunctionPass(&ID) {
55193323Sed      if (T == -1)
56193323Sed        SRThreshold = 128;
57193323Sed      else
58193323Sed        SRThreshold = T;
59193323Sed    }
60193323Sed
61193323Sed    bool runOnFunction(Function &F);
62193323Sed
63193323Sed    bool performScalarRepl(Function &F);
64193323Sed    bool performPromotion(Function &F);
65193323Sed
66193323Sed    // getAnalysisUsage - This pass does not require any passes, but we know it
67193323Sed    // will not alter the CFG, so say so.
68193323Sed    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
69193323Sed      AU.addRequired<DominatorTree>();
70193323Sed      AU.addRequired<DominanceFrontier>();
71193323Sed      AU.setPreservesCFG();
72193323Sed    }
73193323Sed
74193323Sed  private:
75193323Sed    TargetData *TD;
76193323Sed
77201360Srdivacky    /// DeadInsts - Keep track of instructions we have made dead, so that
78201360Srdivacky    /// we can remove them after we are done working.
79201360Srdivacky    SmallVector<Value*, 32> DeadInsts;
80201360Srdivacky
81193323Sed    /// AllocaInfo - When analyzing uses of an alloca instruction, this captures
82193323Sed    /// information about the uses.  All these fields are initialized to false
83193323Sed    /// and set to true when something is learned.
84193323Sed    struct AllocaInfo {
85193323Sed      /// isUnsafe - This is set to true if the alloca cannot be SROA'd.
86193323Sed      bool isUnsafe : 1;
87193323Sed
88193323Sed      /// isMemCpySrc - This is true if this aggregate is memcpy'd from.
89193323Sed      bool isMemCpySrc : 1;
90193323Sed
91193323Sed      /// isMemCpyDst - This is true if this aggregate is memcpy'd into.
92193323Sed      bool isMemCpyDst : 1;
93193323Sed
94193323Sed      AllocaInfo()
95202878Srdivacky        : isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false) {}
96193323Sed    };
97193323Sed
98193323Sed    unsigned SRThreshold;
99193323Sed
100193323Sed    void MarkUnsafe(AllocaInfo &I) { I.isUnsafe = true; }
101193323Sed
102202878Srdivacky    bool isSafeAllocaToScalarRepl(AllocaInst *AI);
103193323Sed
104201360Srdivacky    void isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
105201360Srdivacky                             AllocaInfo &Info);
106201360Srdivacky    void isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t &Offset,
107201360Srdivacky                   AllocaInfo &Info);
108201360Srdivacky    void isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t MemSize,
109201360Srdivacky                         const Type *MemOpType, bool isStore, AllocaInfo &Info);
110201360Srdivacky    bool TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size);
111201360Srdivacky    uint64_t FindElementAndOffset(const Type *&T, uint64_t &Offset,
112201360Srdivacky                                  const Type *&IdxTy);
113193323Sed
114198892Srdivacky    void DoScalarReplacement(AllocaInst *AI,
115198892Srdivacky                             std::vector<AllocaInst*> &WorkList);
116201360Srdivacky    void DeleteDeadInstructions();
117198892Srdivacky    AllocaInst *AddNewAlloca(Function &F, const Type *Ty, AllocaInst *Base);
118193323Sed
119201360Srdivacky    void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
120201360Srdivacky                              SmallVector<AllocaInst*, 32> &NewElts);
121201360Srdivacky    void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
122201360Srdivacky                        SmallVector<AllocaInst*, 32> &NewElts);
123201360Srdivacky    void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
124201360Srdivacky                    SmallVector<AllocaInst*, 32> &NewElts);
125201360Srdivacky    void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
126198892Srdivacky                                      AllocaInst *AI,
127193323Sed                                      SmallVector<AllocaInst*, 32> &NewElts);
128198892Srdivacky    void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
129193323Sed                                       SmallVector<AllocaInst*, 32> &NewElts);
130198892Srdivacky    void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
131193323Sed                                      SmallVector<AllocaInst*, 32> &NewElts);
132193323Sed
133193323Sed    bool CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy,
134193323Sed                            bool &SawVec, uint64_t Offset, unsigned AllocaSize);
135193323Sed    void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset);
136193323Sed    Value *ConvertScalar_ExtractValue(Value *NV, const Type *ToType,
137193323Sed                                     uint64_t Offset, IRBuilder<> &Builder);
138193323Sed    Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal,
139193323Sed                                     uint64_t Offset, IRBuilder<> &Builder);
140198892Srdivacky    static Instruction *isOnlyCopiedFromConstantGlobal(AllocaInst *AI);
141193323Sed  };
142193323Sed}
143193323Sed
144193323Sedchar SROA::ID = 0;
145193323Sedstatic RegisterPass<SROA> X("scalarrepl", "Scalar Replacement of Aggregates");
146193323Sed
147193323Sed// Public interface to the ScalarReplAggregates pass
148193323SedFunctionPass *llvm::createScalarReplAggregatesPass(signed int Threshold) {
149193323Sed  return new SROA(Threshold);
150193323Sed}
151193323Sed
152193323Sed
153193323Sedbool SROA::runOnFunction(Function &F) {
154198090Srdivacky  TD = getAnalysisIfAvailable<TargetData>();
155198090Srdivacky
156193323Sed  bool Changed = performPromotion(F);
157198090Srdivacky
158198090Srdivacky  // FIXME: ScalarRepl currently depends on TargetData more than it
159198090Srdivacky  // theoretically needs to. It should be refactored in order to support
160198090Srdivacky  // target-independent IR. Until this is done, just skip the actual
161198090Srdivacky  // scalar-replacement portion of this pass.
162198090Srdivacky  if (!TD) return Changed;
163198090Srdivacky
164193323Sed  while (1) {
165193323Sed    bool LocalChange = performScalarRepl(F);
166193323Sed    if (!LocalChange) break;   // No need to repromote if no scalarrepl
167193323Sed    Changed = true;
168193323Sed    LocalChange = performPromotion(F);
169193323Sed    if (!LocalChange) break;   // No need to re-scalarrepl if no promotion
170193323Sed  }
171193323Sed
172193323Sed  return Changed;
173193323Sed}
174193323Sed
175193323Sed
176193323Sedbool SROA::performPromotion(Function &F) {
177193323Sed  std::vector<AllocaInst*> Allocas;
178193323Sed  DominatorTree         &DT = getAnalysis<DominatorTree>();
179193323Sed  DominanceFrontier &DF = getAnalysis<DominanceFrontier>();
180193323Sed
181193323Sed  BasicBlock &BB = F.getEntryBlock();  // Get the entry node for the function
182193323Sed
183193323Sed  bool Changed = false;
184193323Sed
185193323Sed  while (1) {
186193323Sed    Allocas.clear();
187193323Sed
188193323Sed    // Find allocas that are safe to promote, by looking at all instructions in
189193323Sed    // the entry node
190193323Sed    for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
191193323Sed      if (AllocaInst *AI = dyn_cast<AllocaInst>(I))       // Is it an alloca?
192193323Sed        if (isAllocaPromotable(AI))
193193323Sed          Allocas.push_back(AI);
194193323Sed
195193323Sed    if (Allocas.empty()) break;
196193323Sed
197199989Srdivacky    PromoteMemToReg(Allocas, DT, DF);
198193323Sed    NumPromoted += Allocas.size();
199193323Sed    Changed = true;
200193323Sed  }
201193323Sed
202193323Sed  return Changed;
203193323Sed}
204193323Sed
205203954Srdivacky/// ShouldAttemptScalarRepl - Decide if an alloca is a good candidate for
206203954Srdivacky/// SROA.  It must be a struct or array type with a small number of elements.
207203954Srdivackystatic bool ShouldAttemptScalarRepl(AllocaInst *AI) {
208203954Srdivacky  const Type *T = AI->getAllocatedType();
209203954Srdivacky  // Do not promote any struct into more than 32 separate vars.
210193323Sed  if (const StructType *ST = dyn_cast<StructType>(T))
211203954Srdivacky    return ST->getNumElements() <= 32;
212203954Srdivacky  // Arrays are much less likely to be safe for SROA; only consider
213203954Srdivacky  // them if they are very small.
214203954Srdivacky  if (const ArrayType *AT = dyn_cast<ArrayType>(T))
215203954Srdivacky    return AT->getNumElements() <= 8;
216203954Srdivacky  return false;
217193323Sed}
218193323Sed
219193323Sed// performScalarRepl - This algorithm is a simple worklist driven algorithm,
220193323Sed// which runs on all of the malloc/alloca instructions in the function, removing
221193323Sed// them if they are only used by getelementptr instructions.
222193323Sed//
223193323Sedbool SROA::performScalarRepl(Function &F) {
224198892Srdivacky  std::vector<AllocaInst*> WorkList;
225193323Sed
226193323Sed  // Scan the entry basic block, adding any alloca's and mallocs to the worklist
227193323Sed  BasicBlock &BB = F.getEntryBlock();
228193323Sed  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
229198892Srdivacky    if (AllocaInst *A = dyn_cast<AllocaInst>(I))
230193323Sed      WorkList.push_back(A);
231193323Sed
232193323Sed  // Process the worklist
233193323Sed  bool Changed = false;
234193323Sed  while (!WorkList.empty()) {
235198892Srdivacky    AllocaInst *AI = WorkList.back();
236193323Sed    WorkList.pop_back();
237193323Sed
238193323Sed    // Handle dead allocas trivially.  These can be formed by SROA'ing arrays
239193323Sed    // with unused elements.
240193323Sed    if (AI->use_empty()) {
241193323Sed      AI->eraseFromParent();
242193323Sed      continue;
243193323Sed    }
244193323Sed
245193323Sed    // If this alloca is impossible for us to promote, reject it early.
246193323Sed    if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized())
247193323Sed      continue;
248193323Sed
249193323Sed    // Check to see if this allocation is only modified by a memcpy/memmove from
250193323Sed    // a constant global.  If this is the case, we can change all users to use
251193323Sed    // the constant global instead.  This is commonly produced by the CFE by
252193323Sed    // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
253193323Sed    // is only subsequently read.
254193323Sed    if (Instruction *TheCopy = isOnlyCopiedFromConstantGlobal(AI)) {
255202375Srdivacky      DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n');
256202375Srdivacky      DEBUG(dbgs() << "  memcpy = " << *TheCopy << '\n');
257193323Sed      Constant *TheSrc = cast<Constant>(TheCopy->getOperand(2));
258198090Srdivacky      AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType()));
259193323Sed      TheCopy->eraseFromParent();  // Don't mutate the global.
260193323Sed      AI->eraseFromParent();
261193323Sed      ++NumGlobals;
262193323Sed      Changed = true;
263193323Sed      continue;
264193323Sed    }
265193323Sed
266193323Sed    // Check to see if we can perform the core SROA transformation.  We cannot
267193323Sed    // transform the allocation instruction if it is an array allocation
268193323Sed    // (allocations OF arrays are ok though), and an allocation of a scalar
269193323Sed    // value cannot be decomposed at all.
270193323Sed    uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType());
271193323Sed
272198090Srdivacky    // Do not promote [0 x %struct].
273198090Srdivacky    if (AllocaSize == 0) continue;
274198090Srdivacky
275203954Srdivacky    // If the alloca looks like a good candidate for scalar replacement, and if
276203954Srdivacky    // all its users can be transformed, then split up the aggregate into its
277203954Srdivacky    // separate elements.
278203954Srdivacky    if (ShouldAttemptScalarRepl(AI) && isSafeAllocaToScalarRepl(AI)) {
279203954Srdivacky      DoScalarReplacement(AI, WorkList);
280203954Srdivacky      Changed = true;
281203954Srdivacky      continue;
282203954Srdivacky    }
283203954Srdivacky
284193323Sed    // Do not promote any struct whose size is too big.
285193323Sed    if (AllocaSize > SRThreshold) continue;
286198090Srdivacky
287193323Sed    // If we can turn this aggregate value (potentially with casts) into a
288193323Sed    // simple scalar value that can be mem2reg'd into a register value.
289193323Sed    // IsNotTrivial tracks whether this is something that mem2reg could have
290193323Sed    // promoted itself.  If so, we don't want to transform it needlessly.  Note
291193323Sed    // that we can't just check based on the type: the alloca may be of an i32
292193323Sed    // but that has pointer arithmetic to set byte 3 of it or something.
293193323Sed    bool IsNotTrivial = false;
294193323Sed    const Type *VectorTy = 0;
295193323Sed    bool HadAVector = false;
296193323Sed    if (CanConvertToScalar(AI, IsNotTrivial, VectorTy, HadAVector,
297193323Sed                           0, unsigned(AllocaSize)) && IsNotTrivial) {
298193323Sed      AllocaInst *NewAI;
299193323Sed      // If we were able to find a vector type that can handle this with
300193323Sed      // insert/extract elements, and if there was at least one use that had
301193323Sed      // a vector type, promote this to a vector.  We don't want to promote
302193323Sed      // random stuff that doesn't use vectors (e.g. <9 x double>) because then
303193323Sed      // we just get a lot of insert/extracts.  If at least one vector is
304193323Sed      // involved, then we probably really do have a union of vector/array.
305204642Srdivacky      if (VectorTy && VectorTy->isVectorTy() && HadAVector) {
306202375Srdivacky        DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n  TYPE = "
307198090Srdivacky                     << *VectorTy << '\n');
308193323Sed
309193323Sed        // Create and insert the vector alloca.
310198090Srdivacky        NewAI = new AllocaInst(VectorTy, 0, "",  AI->getParent()->begin());
311193323Sed        ConvertUsesToScalar(AI, NewAI, 0);
312193323Sed      } else {
313202375Srdivacky        DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
314193323Sed
315193323Sed        // Create and insert the integer alloca.
316198090Srdivacky        const Type *NewTy = IntegerType::get(AI->getContext(), AllocaSize*8);
317193323Sed        NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin());
318193323Sed        ConvertUsesToScalar(AI, NewAI, 0);
319193323Sed      }
320193323Sed      NewAI->takeName(AI);
321193323Sed      AI->eraseFromParent();
322193323Sed      ++NumConverted;
323193323Sed      Changed = true;
324193323Sed      continue;
325193323Sed    }
326193323Sed
327193323Sed    // Otherwise, couldn't process this alloca.
328193323Sed  }
329193323Sed
330193323Sed  return Changed;
331193323Sed}
332193323Sed
333193323Sed/// DoScalarReplacement - This alloca satisfied the isSafeAllocaToScalarRepl
334193323Sed/// predicate, do SROA now.
335198892Srdivackyvoid SROA::DoScalarReplacement(AllocaInst *AI,
336198892Srdivacky                               std::vector<AllocaInst*> &WorkList) {
337202375Srdivacky  DEBUG(dbgs() << "Found inst to SROA: " << *AI << '\n');
338193323Sed  SmallVector<AllocaInst*, 32> ElementAllocas;
339193323Sed  if (const StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
340193323Sed    ElementAllocas.reserve(ST->getNumContainedTypes());
341193323Sed    for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) {
342193323Sed      AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0,
343193323Sed                                      AI->getAlignment(),
344198090Srdivacky                                      AI->getName() + "." + Twine(i), AI);
345193323Sed      ElementAllocas.push_back(NA);
346193323Sed      WorkList.push_back(NA);  // Add to worklist for recursive processing
347193323Sed    }
348193323Sed  } else {
349193323Sed    const ArrayType *AT = cast<ArrayType>(AI->getAllocatedType());
350193323Sed    ElementAllocas.reserve(AT->getNumElements());
351193323Sed    const Type *ElTy = AT->getElementType();
352193323Sed    for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
353193323Sed      AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(),
354198090Srdivacky                                      AI->getName() + "." + Twine(i), AI);
355193323Sed      ElementAllocas.push_back(NA);
356193323Sed      WorkList.push_back(NA);  // Add to worklist for recursive processing
357193323Sed    }
358193323Sed  }
359193323Sed
360201360Srdivacky  // Now that we have created the new alloca instructions, rewrite all the
361201360Srdivacky  // uses of the old alloca.
362201360Srdivacky  RewriteForScalarRepl(AI, AI, 0, ElementAllocas);
363193323Sed
364201360Srdivacky  // Now erase any instructions that were made dead while rewriting the alloca.
365201360Srdivacky  DeleteDeadInstructions();
366201360Srdivacky  AI->eraseFromParent();
367193323Sed
368201360Srdivacky  NumReplaced++;
369201360Srdivacky}
370193323Sed
371201360Srdivacky/// DeleteDeadInstructions - Erase instructions on the DeadInstrs list,
372201360Srdivacky/// recursively including all their operands that become trivially dead.
373201360Srdivackyvoid SROA::DeleteDeadInstructions() {
374201360Srdivacky  while (!DeadInsts.empty()) {
375201360Srdivacky    Instruction *I = cast<Instruction>(DeadInsts.pop_back_val());
376193323Sed
377201360Srdivacky    for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
378201360Srdivacky      if (Instruction *U = dyn_cast<Instruction>(*OI)) {
379201360Srdivacky        // Zero out the operand and see if it becomes trivially dead.
380201360Srdivacky        // (But, don't add allocas to the dead instruction list -- they are
381201360Srdivacky        // already on the worklist and will be deleted separately.)
382201360Srdivacky        *OI = 0;
383201360Srdivacky        if (isInstructionTriviallyDead(U) && !isa<AllocaInst>(U))
384201360Srdivacky          DeadInsts.push_back(U);
385201360Srdivacky      }
386201360Srdivacky
387201360Srdivacky    I->eraseFromParent();
388193323Sed  }
389193323Sed}
390201360Srdivacky
391201360Srdivacky/// isSafeForScalarRepl - Check if instruction I is a safe use with regard to
392201360Srdivacky/// performing scalar replacement of alloca AI.  The results are flagged in
393201360Srdivacky/// the Info parameter.  Offset indicates the position within AI that is
394201360Srdivacky/// referenced by this instruction.
395201360Srdivackyvoid SROA::isSafeForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
396201360Srdivacky                               AllocaInfo &Info) {
397201360Srdivacky  for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
398201360Srdivacky    Instruction *User = cast<Instruction>(*UI);
399193323Sed
400201360Srdivacky    if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
401201360Srdivacky      isSafeForScalarRepl(BC, AI, Offset, Info);
402201360Srdivacky    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
403201360Srdivacky      uint64_t GEPOffset = Offset;
404201360Srdivacky      isSafeGEP(GEPI, AI, GEPOffset, Info);
405201360Srdivacky      if (!Info.isUnsafe)
406201360Srdivacky        isSafeForScalarRepl(GEPI, AI, GEPOffset, Info);
407201360Srdivacky    } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(UI)) {
408201360Srdivacky      ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
409201360Srdivacky      if (Length)
410201360Srdivacky        isSafeMemAccess(AI, Offset, Length->getZExtValue(), 0,
411201360Srdivacky                        UI.getOperandNo() == 1, Info);
412201360Srdivacky      else
413201360Srdivacky        MarkUnsafe(Info);
414201360Srdivacky    } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
415201360Srdivacky      if (!LI->isVolatile()) {
416201360Srdivacky        const Type *LIType = LI->getType();
417201360Srdivacky        isSafeMemAccess(AI, Offset, TD->getTypeAllocSize(LIType),
418201360Srdivacky                        LIType, false, Info);
419201360Srdivacky      } else
420201360Srdivacky        MarkUnsafe(Info);
421201360Srdivacky    } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
422193323Sed      // Store is ok if storing INTO the pointer, not storing the pointer
423201360Srdivacky      if (!SI->isVolatile() && SI->getOperand(0) != I) {
424201360Srdivacky        const Type *SIType = SI->getOperand(0)->getType();
425201360Srdivacky        isSafeMemAccess(AI, Offset, TD->getTypeAllocSize(SIType),
426201360Srdivacky                        SIType, true, Info);
427201360Srdivacky      } else
428201360Srdivacky        MarkUnsafe(Info);
429201360Srdivacky    } else {
430198090Srdivacky      DEBUG(errs() << "  Transformation preventing inst: " << *User << '\n');
431201360Srdivacky      MarkUnsafe(Info);
432193323Sed    }
433201360Srdivacky    if (Info.isUnsafe) return;
434193323Sed  }
435193323Sed}
436193323Sed
437201360Srdivacky/// isSafeGEP - Check if a GEP instruction can be handled for scalar
438201360Srdivacky/// replacement.  It is safe when all the indices are constant, in-bounds
439201360Srdivacky/// references, and when the resulting offset corresponds to an element within
440201360Srdivacky/// the alloca type.  The results are flagged in the Info parameter.  Upon
441201360Srdivacky/// return, Offset is adjusted as specified by the GEP indices.
442201360Srdivackyvoid SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI,
443201360Srdivacky                     uint64_t &Offset, AllocaInfo &Info) {
444201360Srdivacky  gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI);
445201360Srdivacky  if (GEPIt == E)
446201360Srdivacky    return;
447193323Sed
448201360Srdivacky  // Walk through the GEP type indices, checking the types that this indexes
449201360Srdivacky  // into.
450201360Srdivacky  for (; GEPIt != E; ++GEPIt) {
451201360Srdivacky    // Ignore struct elements, no extra checking needed for these.
452204642Srdivacky    if ((*GEPIt)->isStructTy())
453201360Srdivacky      continue;
454193323Sed
455201360Srdivacky    ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
456201360Srdivacky    if (!IdxVal)
457201360Srdivacky      return MarkUnsafe(Info);
458193323Sed  }
459193323Sed
460201360Srdivacky  // Compute the offset due to this GEP and check if the alloca has a
461201360Srdivacky  // component element at that offset.
462201360Srdivacky  SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
463201360Srdivacky  Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(),
464201360Srdivacky                                 &Indices[0], Indices.size());
465201360Srdivacky  if (!TypeHasComponent(AI->getAllocatedType(), Offset, 0))
466201360Srdivacky    MarkUnsafe(Info);
467201360Srdivacky}
468193323Sed
469201360Srdivacky/// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI
470201360Srdivacky/// alloca or has an offset and size that corresponds to a component element
471201360Srdivacky/// within it.  The offset checked here may have been formed from a GEP with a
472201360Srdivacky/// pointer bitcasted to a different type.
473201360Srdivackyvoid SROA::isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t MemSize,
474201360Srdivacky                           const Type *MemOpType, bool isStore,
475201360Srdivacky                           AllocaInfo &Info) {
476201360Srdivacky  // Check if this is a load/store of the entire alloca.
477201360Srdivacky  if (Offset == 0 && MemSize == TD->getTypeAllocSize(AI->getAllocatedType())) {
478201360Srdivacky    bool UsesAggregateType = (MemOpType == AI->getAllocatedType());
479201360Srdivacky    // This is safe for MemIntrinsics (where MemOpType is 0), integer types
480201360Srdivacky    // (which are essentially the same as the MemIntrinsics, especially with
481201360Srdivacky    // regard to copying padding between elements), or references using the
482201360Srdivacky    // aggregate type of the alloca.
483204642Srdivacky    if (!MemOpType || MemOpType->isIntegerTy() || UsesAggregateType) {
484201360Srdivacky      if (!UsesAggregateType) {
485201360Srdivacky        if (isStore)
486201360Srdivacky          Info.isMemCpyDst = true;
487201360Srdivacky        else
488201360Srdivacky          Info.isMemCpySrc = true;
489193323Sed      }
490201360Srdivacky      return;
491193323Sed    }
492193323Sed  }
493201360Srdivacky  // Check if the offset/size correspond to a component within the alloca type.
494201360Srdivacky  const Type *T = AI->getAllocatedType();
495201360Srdivacky  if (TypeHasComponent(T, Offset, MemSize))
496201360Srdivacky    return;
497193323Sed
498201360Srdivacky  return MarkUnsafe(Info);
499193323Sed}
500193323Sed
501201360Srdivacky/// TypeHasComponent - Return true if T has a component type with the
502201360Srdivacky/// specified offset and size.  If Size is zero, do not check the size.
503201360Srdivackybool SROA::TypeHasComponent(const Type *T, uint64_t Offset, uint64_t Size) {
504201360Srdivacky  const Type *EltTy;
505201360Srdivacky  uint64_t EltSize;
506201360Srdivacky  if (const StructType *ST = dyn_cast<StructType>(T)) {
507201360Srdivacky    const StructLayout *Layout = TD->getStructLayout(ST);
508201360Srdivacky    unsigned EltIdx = Layout->getElementContainingOffset(Offset);
509201360Srdivacky    EltTy = ST->getContainedType(EltIdx);
510201360Srdivacky    EltSize = TD->getTypeAllocSize(EltTy);
511201360Srdivacky    Offset -= Layout->getElementOffset(EltIdx);
512201360Srdivacky  } else if (const ArrayType *AT = dyn_cast<ArrayType>(T)) {
513201360Srdivacky    EltTy = AT->getElementType();
514201360Srdivacky    EltSize = TD->getTypeAllocSize(EltTy);
515201360Srdivacky    if (Offset >= AT->getNumElements() * EltSize)
516201360Srdivacky      return false;
517201360Srdivacky    Offset %= EltSize;
518201360Srdivacky  } else {
519201360Srdivacky    return false;
520193323Sed  }
521201360Srdivacky  if (Offset == 0 && (Size == 0 || EltSize == Size))
522201360Srdivacky    return true;
523201360Srdivacky  // Check if the component spans multiple elements.
524201360Srdivacky  if (Offset + Size > EltSize)
525201360Srdivacky    return false;
526201360Srdivacky  return TypeHasComponent(EltTy, Offset, Size);
527193323Sed}
528193323Sed
529201360Srdivacky/// RewriteForScalarRepl - Alloca AI is being split into NewElts, so rewrite
530201360Srdivacky/// the instruction I, which references it, to use the separate elements.
531201360Srdivacky/// Offset indicates the position within AI that is referenced by this
532201360Srdivacky/// instruction.
533201360Srdivackyvoid SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
534201360Srdivacky                                SmallVector<AllocaInst*, 32> &NewElts) {
535201360Srdivacky  for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
536201360Srdivacky    Instruction *User = cast<Instruction>(*UI);
537193323Sed
538201360Srdivacky    if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
539201360Srdivacky      RewriteBitCast(BC, AI, Offset, NewElts);
540201360Srdivacky    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
541201360Srdivacky      RewriteGEP(GEPI, AI, Offset, NewElts);
542201360Srdivacky    } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
543201360Srdivacky      ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
544201360Srdivacky      uint64_t MemSize = Length->getZExtValue();
545201360Srdivacky      if (Offset == 0 &&
546201360Srdivacky          MemSize == TD->getTypeAllocSize(AI->getAllocatedType()))
547201360Srdivacky        RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts);
548201360Srdivacky      // Otherwise the intrinsic can only touch a single element and the
549201360Srdivacky      // address operand will be updated, so nothing else needs to be done.
550201360Srdivacky    } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
551201360Srdivacky      const Type *LIType = LI->getType();
552201360Srdivacky      if (LIType == AI->getAllocatedType()) {
553201360Srdivacky        // Replace:
554201360Srdivacky        //   %res = load { i32, i32 }* %alloc
555201360Srdivacky        // with:
556201360Srdivacky        //   %load.0 = load i32* %alloc.0
557201360Srdivacky        //   %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0
558201360Srdivacky        //   %load.1 = load i32* %alloc.1
559201360Srdivacky        //   %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1
560201360Srdivacky        // (Also works for arrays instead of structs)
561201360Srdivacky        Value *Insert = UndefValue::get(LIType);
562201360Srdivacky        for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
563201360Srdivacky          Value *Load = new LoadInst(NewElts[i], "load", LI);
564201360Srdivacky          Insert = InsertValueInst::Create(Insert, Load, i, "insert", LI);
565201360Srdivacky        }
566201360Srdivacky        LI->replaceAllUsesWith(Insert);
567201360Srdivacky        DeadInsts.push_back(LI);
568204642Srdivacky      } else if (LIType->isIntegerTy() &&
569201360Srdivacky                 TD->getTypeAllocSize(LIType) ==
570201360Srdivacky                 TD->getTypeAllocSize(AI->getAllocatedType())) {
571201360Srdivacky        // If this is a load of the entire alloca to an integer, rewrite it.
572201360Srdivacky        RewriteLoadUserOfWholeAlloca(LI, AI, NewElts);
573193323Sed      }
574201360Srdivacky    } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
575201360Srdivacky      Value *Val = SI->getOperand(0);
576201360Srdivacky      const Type *SIType = Val->getType();
577201360Srdivacky      if (SIType == AI->getAllocatedType()) {
578201360Srdivacky        // Replace:
579201360Srdivacky        //   store { i32, i32 } %val, { i32, i32 }* %alloc
580201360Srdivacky        // with:
581201360Srdivacky        //   %val.0 = extractvalue { i32, i32 } %val, 0
582201360Srdivacky        //   store i32 %val.0, i32* %alloc.0
583201360Srdivacky        //   %val.1 = extractvalue { i32, i32 } %val, 1
584201360Srdivacky        //   store i32 %val.1, i32* %alloc.1
585201360Srdivacky        // (Also works for arrays instead of structs)
586201360Srdivacky        for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
587201360Srdivacky          Value *Extract = ExtractValueInst::Create(Val, i, Val->getName(), SI);
588201360Srdivacky          new StoreInst(Extract, NewElts[i], SI);
589201360Srdivacky        }
590201360Srdivacky        DeadInsts.push_back(SI);
591204642Srdivacky      } else if (SIType->isIntegerTy() &&
592201360Srdivacky                 TD->getTypeAllocSize(SIType) ==
593201360Srdivacky                 TD->getTypeAllocSize(AI->getAllocatedType())) {
594201360Srdivacky        // If this is a store of the entire alloca from an integer, rewrite it.
595201360Srdivacky        RewriteStoreUserOfWholeAlloca(SI, AI, NewElts);
596193323Sed      }
597193323Sed    }
598193323Sed  }
599193323Sed}
600193323Sed
601201360Srdivacky/// RewriteBitCast - Update a bitcast reference to the alloca being replaced
602201360Srdivacky/// and recursively continue updating all of its uses.
603201360Srdivackyvoid SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
604201360Srdivacky                          SmallVector<AllocaInst*, 32> &NewElts) {
605201360Srdivacky  RewriteForScalarRepl(BC, AI, Offset, NewElts);
606201360Srdivacky  if (BC->getOperand(0) != AI)
607201360Srdivacky    return;
608193323Sed
609201360Srdivacky  // The bitcast references the original alloca.  Replace its uses with
610201360Srdivacky  // references to the first new element alloca.
611201360Srdivacky  Instruction *Val = NewElts[0];
612201360Srdivacky  if (Val->getType() != BC->getDestTy()) {
613201360Srdivacky    Val = new BitCastInst(Val, BC->getDestTy(), "", BC);
614201360Srdivacky    Val->takeName(BC);
615201360Srdivacky  }
616201360Srdivacky  BC->replaceAllUsesWith(Val);
617201360Srdivacky  DeadInsts.push_back(BC);
618201360Srdivacky}
619193323Sed
620201360Srdivacky/// FindElementAndOffset - Return the index of the element containing Offset
621201360Srdivacky/// within the specified type, which must be either a struct or an array.
622201360Srdivacky/// Sets T to the type of the element and Offset to the offset within that
623201360Srdivacky/// element.  IdxTy is set to the type of the index result to be used in a
624201360Srdivacky/// GEP instruction.
625201360Srdivackyuint64_t SROA::FindElementAndOffset(const Type *&T, uint64_t &Offset,
626201360Srdivacky                                    const Type *&IdxTy) {
627201360Srdivacky  uint64_t Idx = 0;
628201360Srdivacky  if (const StructType *ST = dyn_cast<StructType>(T)) {
629201360Srdivacky    const StructLayout *Layout = TD->getStructLayout(ST);
630201360Srdivacky    Idx = Layout->getElementContainingOffset(Offset);
631201360Srdivacky    T = ST->getContainedType(Idx);
632201360Srdivacky    Offset -= Layout->getElementOffset(Idx);
633201360Srdivacky    IdxTy = Type::getInt32Ty(T->getContext());
634201360Srdivacky    return Idx;
635193323Sed  }
636201360Srdivacky  const ArrayType *AT = cast<ArrayType>(T);
637201360Srdivacky  T = AT->getElementType();
638201360Srdivacky  uint64_t EltSize = TD->getTypeAllocSize(T);
639201360Srdivacky  Idx = Offset / EltSize;
640201360Srdivacky  Offset -= Idx * EltSize;
641201360Srdivacky  IdxTy = Type::getInt64Ty(T->getContext());
642201360Srdivacky  return Idx;
643193323Sed}
644193323Sed
645201360Srdivacky/// RewriteGEP - Check if this GEP instruction moves the pointer across
646201360Srdivacky/// elements of the alloca that are being split apart, and if so, rewrite
647201360Srdivacky/// the GEP to be relative to the new element.
648201360Srdivackyvoid SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
649201360Srdivacky                      SmallVector<AllocaInst*, 32> &NewElts) {
650201360Srdivacky  uint64_t OldOffset = Offset;
651201360Srdivacky  SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
652201360Srdivacky  Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(),
653201360Srdivacky                                 &Indices[0], Indices.size());
654201360Srdivacky
655201360Srdivacky  RewriteForScalarRepl(GEPI, AI, Offset, NewElts);
656201360Srdivacky
657201360Srdivacky  const Type *T = AI->getAllocatedType();
658201360Srdivacky  const Type *IdxTy;
659201360Srdivacky  uint64_t OldIdx = FindElementAndOffset(T, OldOffset, IdxTy);
660201360Srdivacky  if (GEPI->getOperand(0) == AI)
661201360Srdivacky    OldIdx = ~0ULL; // Force the GEP to be rewritten.
662201360Srdivacky
663201360Srdivacky  T = AI->getAllocatedType();
664201360Srdivacky  uint64_t EltOffset = Offset;
665201360Srdivacky  uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy);
666201360Srdivacky
667201360Srdivacky  // If this GEP does not move the pointer across elements of the alloca
668201360Srdivacky  // being split, then it does not needs to be rewritten.
669201360Srdivacky  if (Idx == OldIdx)
670201360Srdivacky    return;
671201360Srdivacky
672201360Srdivacky  const Type *i32Ty = Type::getInt32Ty(AI->getContext());
673201360Srdivacky  SmallVector<Value*, 8> NewArgs;
674201360Srdivacky  NewArgs.push_back(Constant::getNullValue(i32Ty));
675201360Srdivacky  while (EltOffset != 0) {
676201360Srdivacky    uint64_t EltIdx = FindElementAndOffset(T, EltOffset, IdxTy);
677201360Srdivacky    NewArgs.push_back(ConstantInt::get(IdxTy, EltIdx));
678201360Srdivacky  }
679201360Srdivacky  Instruction *Val = NewElts[Idx];
680201360Srdivacky  if (NewArgs.size() > 1) {
681201360Srdivacky    Val = GetElementPtrInst::CreateInBounds(Val, NewArgs.begin(),
682201360Srdivacky                                            NewArgs.end(), "", GEPI);
683201360Srdivacky    Val->takeName(GEPI);
684201360Srdivacky  }
685201360Srdivacky  if (Val->getType() != GEPI->getType())
686203954Srdivacky    Val = new BitCastInst(Val, GEPI->getType(), Val->getName(), GEPI);
687201360Srdivacky  GEPI->replaceAllUsesWith(Val);
688201360Srdivacky  DeadInsts.push_back(GEPI);
689201360Srdivacky}
690201360Srdivacky
691193323Sed/// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI.
692193323Sed/// Rewrite it to copy or set the elements of the scalarized memory.
693201360Srdivackyvoid SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
694198892Srdivacky                                        AllocaInst *AI,
695193323Sed                                        SmallVector<AllocaInst*, 32> &NewElts) {
696193323Sed  // If this is a memcpy/memmove, construct the other pointer as the
697193323Sed  // appropriate type.  The "Other" pointer is the pointer that goes to memory
698193323Sed  // that doesn't have anything to do with the alloca that we are promoting. For
699193323Sed  // memset, this Value* stays null.
700193323Sed  Value *OtherPtr = 0;
701198090Srdivacky  LLVMContext &Context = MI->getContext();
702193323Sed  unsigned MemAlignment = MI->getAlignment();
703193323Sed  if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy
704201360Srdivacky    if (Inst == MTI->getRawDest())
705193323Sed      OtherPtr = MTI->getRawSource();
706193323Sed    else {
707201360Srdivacky      assert(Inst == MTI->getRawSource());
708193323Sed      OtherPtr = MTI->getRawDest();
709193323Sed    }
710193323Sed  }
711200581Srdivacky
712193323Sed  // If there is an other pointer, we want to convert it to the same pointer
713193323Sed  // type as AI has, so we can GEP through it safely.
714193323Sed  if (OtherPtr) {
715201360Srdivacky
716201360Srdivacky    // Remove bitcasts and all-zero GEPs from OtherPtr.  This is an
717201360Srdivacky    // optimization, but it's also required to detect the corner case where
718201360Srdivacky    // both pointer operands are referencing the same memory, and where
719201360Srdivacky    // OtherPtr may be a bitcast or GEP that currently being rewritten.  (This
720201360Srdivacky    // function is only called for mem intrinsics that access the whole
721201360Srdivacky    // aggregate, so non-zero GEPs are not an issue here.)
722201360Srdivacky    while (1) {
723201360Srdivacky      if (BitCastInst *BC = dyn_cast<BitCastInst>(OtherPtr)) {
724201360Srdivacky        OtherPtr = BC->getOperand(0);
725201360Srdivacky        continue;
726201360Srdivacky      }
727201360Srdivacky      if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(OtherPtr)) {
728201360Srdivacky        // All zero GEPs are effectively bitcasts.
729201360Srdivacky        if (GEP->hasAllZeroIndices()) {
730201360Srdivacky          OtherPtr = GEP->getOperand(0);
731201360Srdivacky          continue;
732201360Srdivacky        }
733201360Srdivacky      }
734201360Srdivacky      break;
735201360Srdivacky    }
736202878Srdivacky    // Copying the alloca to itself is a no-op: just delete it.
737202878Srdivacky    if (OtherPtr == AI || OtherPtr == NewElts[0]) {
738202878Srdivacky      // This code will run twice for a no-op memcpy -- once for each operand.
739202878Srdivacky      // Put only one reference to MI on the DeadInsts list.
740202878Srdivacky      for (SmallVector<Value*, 32>::const_iterator I = DeadInsts.begin(),
741202878Srdivacky             E = DeadInsts.end(); I != E; ++I)
742202878Srdivacky        if (*I == MI) return;
743202878Srdivacky      DeadInsts.push_back(MI);
744201360Srdivacky      return;
745202878Srdivacky    }
746193323Sed
747193323Sed    if (ConstantExpr *BCE = dyn_cast<ConstantExpr>(OtherPtr))
748193323Sed      if (BCE->getOpcode() == Instruction::BitCast)
749193323Sed        OtherPtr = BCE->getOperand(0);
750193323Sed
751193323Sed    // If the pointer is not the right type, insert a bitcast to the right
752193323Sed    // type.
753193323Sed    if (OtherPtr->getType() != AI->getType())
754193323Sed      OtherPtr = new BitCastInst(OtherPtr, AI->getType(), OtherPtr->getName(),
755193323Sed                                 MI);
756193323Sed  }
757193323Sed
758193323Sed  // Process each element of the aggregate.
759193323Sed  Value *TheFn = MI->getOperand(0);
760193323Sed  const Type *BytePtrTy = MI->getRawDest()->getType();
761201360Srdivacky  bool SROADest = MI->getRawDest() == Inst;
762193323Sed
763198090Srdivacky  Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext()));
764193323Sed
765193323Sed  for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
766193323Sed    // If this is a memcpy/memmove, emit a GEP of the other element address.
767193323Sed    Value *OtherElt = 0;
768193323Sed    unsigned OtherEltAlign = MemAlignment;
769193323Sed
770202878Srdivacky    if (OtherPtr) {
771198090Srdivacky      Value *Idx[2] = { Zero,
772198090Srdivacky                      ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) };
773201360Srdivacky      OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx, Idx + 2,
774203954Srdivacky                                              OtherPtr->getName()+"."+Twine(i),
775201360Srdivacky                                                   MI);
776193323Sed      uint64_t EltOffset;
777193323Sed      const PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType());
778193323Sed      if (const StructType *ST =
779193323Sed            dyn_cast<StructType>(OtherPtrTy->getElementType())) {
780193323Sed        EltOffset = TD->getStructLayout(ST)->getElementOffset(i);
781193323Sed      } else {
782193323Sed        const Type *EltTy =
783193323Sed          cast<SequentialType>(OtherPtr->getType())->getElementType();
784193323Sed        EltOffset = TD->getTypeAllocSize(EltTy)*i;
785193323Sed      }
786193323Sed
787193323Sed      // The alignment of the other pointer is the guaranteed alignment of the
788193323Sed      // element, which is affected by both the known alignment of the whole
789193323Sed      // mem intrinsic and the alignment of the element.  If the alignment of
790193323Sed      // the memcpy (f.e.) is 32 but the element is at a 4-byte offset, then the
791193323Sed      // known alignment is just 4 bytes.
792193323Sed      OtherEltAlign = (unsigned)MinAlign(OtherEltAlign, EltOffset);
793193323Sed    }
794193323Sed
795193323Sed    Value *EltPtr = NewElts[i];
796193323Sed    const Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType();
797193323Sed
798193323Sed    // If we got down to a scalar, insert a load or store as appropriate.
799193323Sed    if (EltTy->isSingleValueType()) {
800193323Sed      if (isa<MemTransferInst>(MI)) {
801193323Sed        if (SROADest) {
802193323Sed          // From Other to Alloca.
803193323Sed          Value *Elt = new LoadInst(OtherElt, "tmp", false, OtherEltAlign, MI);
804193323Sed          new StoreInst(Elt, EltPtr, MI);
805193323Sed        } else {
806193323Sed          // From Alloca to Other.
807193323Sed          Value *Elt = new LoadInst(EltPtr, "tmp", MI);
808193323Sed          new StoreInst(Elt, OtherElt, false, OtherEltAlign, MI);
809193323Sed        }
810193323Sed        continue;
811193323Sed      }
812193323Sed      assert(isa<MemSetInst>(MI));
813193323Sed
814193323Sed      // If the stored element is zero (common case), just store a null
815193323Sed      // constant.
816193323Sed      Constant *StoreVal;
817193323Sed      if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getOperand(2))) {
818193323Sed        if (CI->isZero()) {
819198090Srdivacky          StoreVal = Constant::getNullValue(EltTy);  // 0.0, null, 0, <0,0>
820193323Sed        } else {
821193323Sed          // If EltTy is a vector type, get the element type.
822194612Sed          const Type *ValTy = EltTy->getScalarType();
823194612Sed
824193323Sed          // Construct an integer with the right value.
825193323Sed          unsigned EltSize = TD->getTypeSizeInBits(ValTy);
826193323Sed          APInt OneVal(EltSize, CI->getZExtValue());
827193323Sed          APInt TotalVal(OneVal);
828193323Sed          // Set each byte.
829193323Sed          for (unsigned i = 0; 8*i < EltSize; ++i) {
830193323Sed            TotalVal = TotalVal.shl(8);
831193323Sed            TotalVal |= OneVal;
832193323Sed          }
833193323Sed
834193323Sed          // Convert the integer value to the appropriate type.
835198090Srdivacky          StoreVal = ConstantInt::get(Context, TotalVal);
836204642Srdivacky          if (ValTy->isPointerTy())
837198090Srdivacky            StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy);
838203954Srdivacky          else if (ValTy->isFloatingPointTy())
839198090Srdivacky            StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy);
840193323Sed          assert(StoreVal->getType() == ValTy && "Type mismatch!");
841193323Sed
842193323Sed          // If the requested value was a vector constant, create it.
843193323Sed          if (EltTy != ValTy) {
844193323Sed            unsigned NumElts = cast<VectorType>(ValTy)->getNumElements();
845193323Sed            SmallVector<Constant*, 16> Elts(NumElts, StoreVal);
846198090Srdivacky            StoreVal = ConstantVector::get(&Elts[0], NumElts);
847193323Sed          }
848193323Sed        }
849193323Sed        new StoreInst(StoreVal, EltPtr, MI);
850193323Sed        continue;
851193323Sed      }
852193323Sed      // Otherwise, if we're storing a byte variable, use a memset call for
853193323Sed      // this element.
854193323Sed    }
855193323Sed
856193323Sed    // Cast the element pointer to BytePtrTy.
857193323Sed    if (EltPtr->getType() != BytePtrTy)
858203954Srdivacky      EltPtr = new BitCastInst(EltPtr, BytePtrTy, EltPtr->getName(), MI);
859193323Sed
860193323Sed    // Cast the other pointer (if we have one) to BytePtrTy.
861193323Sed    if (OtherElt && OtherElt->getType() != BytePtrTy)
862203954Srdivacky      OtherElt = new BitCastInst(OtherElt, BytePtrTy, OtherElt->getName(), MI);
863193323Sed
864193323Sed    unsigned EltSize = TD->getTypeAllocSize(EltTy);
865193323Sed
866193323Sed    // Finally, insert the meminst for this element.
867193323Sed    if (isa<MemTransferInst>(MI)) {
868193323Sed      Value *Ops[] = {
869193323Sed        SROADest ? EltPtr : OtherElt,  // Dest ptr
870193323Sed        SROADest ? OtherElt : EltPtr,  // Src ptr
871198090Srdivacky        ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size
872198090Srdivacky        // Align
873198090Srdivacky        ConstantInt::get(Type::getInt32Ty(MI->getContext()), OtherEltAlign)
874193323Sed      };
875193323Sed      CallInst::Create(TheFn, Ops, Ops + 4, "", MI);
876193323Sed    } else {
877193323Sed      assert(isa<MemSetInst>(MI));
878193323Sed      Value *Ops[] = {
879193323Sed        EltPtr, MI->getOperand(2),  // Dest, Value,
880198090Srdivacky        ConstantInt::get(MI->getOperand(3)->getType(), EltSize), // Size
881193323Sed        Zero  // Align
882193323Sed      };
883193323Sed      CallInst::Create(TheFn, Ops, Ops + 4, "", MI);
884193323Sed    }
885193323Sed  }
886201360Srdivacky  DeadInsts.push_back(MI);
887193323Sed}
888193323Sed
889200581Srdivacky/// RewriteStoreUserOfWholeAlloca - We found a store of an integer that
890193323Sed/// overwrites the entire allocation.  Extract out the pieces of the stored
891193323Sed/// integer and store them individually.
892198892Srdivackyvoid SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
893193323Sed                                         SmallVector<AllocaInst*, 32> &NewElts){
894193323Sed  // Extract each element out of the integer according to its structure offset
895193323Sed  // and store the element value to the individual alloca.
896193323Sed  Value *SrcVal = SI->getOperand(0);
897201360Srdivacky  const Type *AllocaEltTy = AI->getAllocatedType();
898193323Sed  uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
899193323Sed
900193323Sed  // Handle tail padding by extending the operand
901193323Sed  if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)
902195340Sed    SrcVal = new ZExtInst(SrcVal,
903198090Srdivacky                          IntegerType::get(SI->getContext(), AllocaSizeBits),
904198090Srdivacky                          "", SI);
905193323Sed
906202375Srdivacky  DEBUG(dbgs() << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << '\n' << *SI
907198090Srdivacky               << '\n');
908193323Sed
909193323Sed  // There are two forms here: AI could be an array or struct.  Both cases
910193323Sed  // have different ways to compute the element offset.
911193323Sed  if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
912193323Sed    const StructLayout *Layout = TD->getStructLayout(EltSTy);
913193323Sed
914193323Sed    for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
915193323Sed      // Get the number of bits to shift SrcVal to get the value.
916193323Sed      const Type *FieldTy = EltSTy->getElementType(i);
917193323Sed      uint64_t Shift = Layout->getElementOffsetInBits(i);
918193323Sed
919193323Sed      if (TD->isBigEndian())
920193323Sed        Shift = AllocaSizeBits-Shift-TD->getTypeAllocSizeInBits(FieldTy);
921193323Sed
922193323Sed      Value *EltVal = SrcVal;
923193323Sed      if (Shift) {
924198090Srdivacky        Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
925193323Sed        EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal,
926193323Sed                                            "sroa.store.elt", SI);
927193323Sed      }
928193323Sed
929193323Sed      // Truncate down to an integer of the right size.
930193323Sed      uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
931193323Sed
932193323Sed      // Ignore zero sized fields like {}, they obviously contain no data.
933193323Sed      if (FieldSizeBits == 0) continue;
934193323Sed
935193323Sed      if (FieldSizeBits != AllocaSizeBits)
936195340Sed        EltVal = new TruncInst(EltVal,
937198090Srdivacky                             IntegerType::get(SI->getContext(), FieldSizeBits),
938198090Srdivacky                              "", SI);
939193323Sed      Value *DestField = NewElts[i];
940193323Sed      if (EltVal->getType() == FieldTy) {
941193323Sed        // Storing to an integer field of this size, just do it.
942204642Srdivacky      } else if (FieldTy->isFloatingPointTy() || FieldTy->isVectorTy()) {
943193323Sed        // Bitcast to the right element type (for fp/vector values).
944193323Sed        EltVal = new BitCastInst(EltVal, FieldTy, "", SI);
945193323Sed      } else {
946193323Sed        // Otherwise, bitcast the dest pointer (for aggregates).
947193323Sed        DestField = new BitCastInst(DestField,
948198090Srdivacky                              PointerType::getUnqual(EltVal->getType()),
949193323Sed                                    "", SI);
950193323Sed      }
951193323Sed      new StoreInst(EltVal, DestField, SI);
952193323Sed    }
953193323Sed
954193323Sed  } else {
955193323Sed    const ArrayType *ATy = cast<ArrayType>(AllocaEltTy);
956193323Sed    const Type *ArrayEltTy = ATy->getElementType();
957193323Sed    uint64_t ElementOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
958193323Sed    uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy);
959193323Sed
960193323Sed    uint64_t Shift;
961193323Sed
962193323Sed    if (TD->isBigEndian())
963193323Sed      Shift = AllocaSizeBits-ElementOffset;
964193323Sed    else
965193323Sed      Shift = 0;
966193323Sed
967193323Sed    for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
968193323Sed      // Ignore zero sized fields like {}, they obviously contain no data.
969193323Sed      if (ElementSizeBits == 0) continue;
970193323Sed
971193323Sed      Value *EltVal = SrcVal;
972193323Sed      if (Shift) {
973198090Srdivacky        Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
974193323Sed        EltVal = BinaryOperator::CreateLShr(EltVal, ShiftVal,
975193323Sed                                            "sroa.store.elt", SI);
976193323Sed      }
977193323Sed
978193323Sed      // Truncate down to an integer of the right size.
979193323Sed      if (ElementSizeBits != AllocaSizeBits)
980195340Sed        EltVal = new TruncInst(EltVal,
981198090Srdivacky                               IntegerType::get(SI->getContext(),
982198090Srdivacky                                                ElementSizeBits),"",SI);
983193323Sed      Value *DestField = NewElts[i];
984193323Sed      if (EltVal->getType() == ArrayEltTy) {
985193323Sed        // Storing to an integer field of this size, just do it.
986203954Srdivacky      } else if (ArrayEltTy->isFloatingPointTy() ||
987204642Srdivacky                 ArrayEltTy->isVectorTy()) {
988193323Sed        // Bitcast to the right element type (for fp/vector values).
989193323Sed        EltVal = new BitCastInst(EltVal, ArrayEltTy, "", SI);
990193323Sed      } else {
991193323Sed        // Otherwise, bitcast the dest pointer (for aggregates).
992193323Sed        DestField = new BitCastInst(DestField,
993198090Srdivacky                              PointerType::getUnqual(EltVal->getType()),
994193323Sed                                    "", SI);
995193323Sed      }
996193323Sed      new StoreInst(EltVal, DestField, SI);
997193323Sed
998193323Sed      if (TD->isBigEndian())
999193323Sed        Shift -= ElementOffset;
1000193323Sed      else
1001193323Sed        Shift += ElementOffset;
1002193323Sed    }
1003193323Sed  }
1004193323Sed
1005201360Srdivacky  DeadInsts.push_back(SI);
1006193323Sed}
1007193323Sed
1008200581Srdivacky/// RewriteLoadUserOfWholeAlloca - We found a load of the entire allocation to
1009193323Sed/// an integer.  Load the individual pieces to form the aggregate value.
1010198892Srdivackyvoid SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
1011193323Sed                                        SmallVector<AllocaInst*, 32> &NewElts) {
1012193323Sed  // Extract each element out of the NewElts according to its structure offset
1013193323Sed  // and form the result value.
1014201360Srdivacky  const Type *AllocaEltTy = AI->getAllocatedType();
1015193323Sed  uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
1016193323Sed
1017202375Srdivacky  DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI
1018198090Srdivacky               << '\n');
1019193323Sed
1020193323Sed  // There are two forms here: AI could be an array or struct.  Both cases
1021193323Sed  // have different ways to compute the element offset.
1022193323Sed  const StructLayout *Layout = 0;
1023193323Sed  uint64_t ArrayEltBitOffset = 0;
1024193323Sed  if (const StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
1025193323Sed    Layout = TD->getStructLayout(EltSTy);
1026193323Sed  } else {
1027193323Sed    const Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType();
1028193323Sed    ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
1029193323Sed  }
1030193323Sed
1031198090Srdivacky  Value *ResultVal =
1032198090Srdivacky    Constant::getNullValue(IntegerType::get(LI->getContext(), AllocaSizeBits));
1033198090Srdivacky
1034193323Sed  for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
1035193323Sed    // Load the value from the alloca.  If the NewElt is an aggregate, cast
1036193323Sed    // the pointer to an integer of the same size before doing the load.
1037193323Sed    Value *SrcField = NewElts[i];
1038193323Sed    const Type *FieldTy =
1039193323Sed      cast<PointerType>(SrcField->getType())->getElementType();
1040193323Sed    uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
1041193323Sed
1042193323Sed    // Ignore zero sized fields like {}, they obviously contain no data.
1043193323Sed    if (FieldSizeBits == 0) continue;
1044193323Sed
1045198090Srdivacky    const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(),
1046198090Srdivacky                                                     FieldSizeBits);
1047204642Srdivacky    if (!FieldTy->isIntegerTy() && !FieldTy->isFloatingPointTy() &&
1048204642Srdivacky        !FieldTy->isVectorTy())
1049195340Sed      SrcField = new BitCastInst(SrcField,
1050198090Srdivacky                                 PointerType::getUnqual(FieldIntTy),
1051193323Sed                                 "", LI);
1052193323Sed    SrcField = new LoadInst(SrcField, "sroa.load.elt", LI);
1053193323Sed
1054193323Sed    // If SrcField is a fp or vector of the right size but that isn't an
1055193323Sed    // integer type, bitcast to an integer so we can shift it.
1056193323Sed    if (SrcField->getType() != FieldIntTy)
1057193323Sed      SrcField = new BitCastInst(SrcField, FieldIntTy, "", LI);
1058193323Sed
1059193323Sed    // Zero extend the field to be the same size as the final alloca so that
1060193323Sed    // we can shift and insert it.
1061193323Sed    if (SrcField->getType() != ResultVal->getType())
1062193323Sed      SrcField = new ZExtInst(SrcField, ResultVal->getType(), "", LI);
1063193323Sed
1064193323Sed    // Determine the number of bits to shift SrcField.
1065193323Sed    uint64_t Shift;
1066193323Sed    if (Layout) // Struct case.
1067193323Sed      Shift = Layout->getElementOffsetInBits(i);
1068193323Sed    else  // Array case.
1069193323Sed      Shift = i*ArrayEltBitOffset;
1070193323Sed
1071193323Sed    if (TD->isBigEndian())
1072193323Sed      Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth();
1073193323Sed
1074193323Sed    if (Shift) {
1075198090Srdivacky      Value *ShiftVal = ConstantInt::get(SrcField->getType(), Shift);
1076193323Sed      SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI);
1077193323Sed    }
1078193323Sed
1079193323Sed    ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI);
1080193323Sed  }
1081193323Sed
1082193323Sed  // Handle tail padding by truncating the result
1083193323Sed  if (TD->getTypeSizeInBits(LI->getType()) != AllocaSizeBits)
1084193323Sed    ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI);
1085193323Sed
1086193323Sed  LI->replaceAllUsesWith(ResultVal);
1087201360Srdivacky  DeadInsts.push_back(LI);
1088193323Sed}
1089193323Sed
1090193323Sed/// HasPadding - Return true if the specified type has any structure or
1091193323Sed/// alignment padding, false otherwise.
1092193323Sedstatic bool HasPadding(const Type *Ty, const TargetData &TD) {
1093193323Sed  if (const StructType *STy = dyn_cast<StructType>(Ty)) {
1094193323Sed    const StructLayout *SL = TD.getStructLayout(STy);
1095193323Sed    unsigned PrevFieldBitOffset = 0;
1096193323Sed    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
1097193323Sed      unsigned FieldBitOffset = SL->getElementOffsetInBits(i);
1098193323Sed
1099193323Sed      // Padding in sub-elements?
1100193323Sed      if (HasPadding(STy->getElementType(i), TD))
1101193323Sed        return true;
1102193323Sed
1103193323Sed      // Check to see if there is any padding between this element and the
1104193323Sed      // previous one.
1105193323Sed      if (i) {
1106193323Sed        unsigned PrevFieldEnd =
1107193323Sed        PrevFieldBitOffset+TD.getTypeSizeInBits(STy->getElementType(i-1));
1108193323Sed        if (PrevFieldEnd < FieldBitOffset)
1109193323Sed          return true;
1110193323Sed      }
1111193323Sed
1112193323Sed      PrevFieldBitOffset = FieldBitOffset;
1113193323Sed    }
1114193323Sed
1115193323Sed    //  Check for tail padding.
1116193323Sed    if (unsigned EltCount = STy->getNumElements()) {
1117193323Sed      unsigned PrevFieldEnd = PrevFieldBitOffset +
1118193323Sed                   TD.getTypeSizeInBits(STy->getElementType(EltCount-1));
1119193323Sed      if (PrevFieldEnd < SL->getSizeInBits())
1120193323Sed        return true;
1121193323Sed    }
1122193323Sed
1123193323Sed  } else if (const ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1124193323Sed    return HasPadding(ATy->getElementType(), TD);
1125193323Sed  } else if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1126193323Sed    return HasPadding(VTy->getElementType(), TD);
1127193323Sed  }
1128193323Sed  return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty);
1129193323Sed}
1130193323Sed
1131193323Sed/// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of
1132193323Sed/// an aggregate can be broken down into elements.  Return 0 if not, 3 if safe,
1133193323Sed/// or 1 if safe after canonicalization has been performed.
1134202878Srdivackybool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
1135193323Sed  // Loop over the use list of the alloca.  We can only transform it if all of
1136193323Sed  // the users are safe to transform.
1137193323Sed  AllocaInfo Info;
1138193323Sed
1139201360Srdivacky  isSafeForScalarRepl(AI, AI, 0, Info);
1140201360Srdivacky  if (Info.isUnsafe) {
1141202375Srdivacky    DEBUG(dbgs() << "Cannot transform: " << *AI << '\n');
1142202878Srdivacky    return false;
1143193323Sed  }
1144193323Sed
1145193323Sed  // Okay, we know all the users are promotable.  If the aggregate is a memcpy
1146193323Sed  // source and destination, we have to be careful.  In particular, the memcpy
1147193323Sed  // could be moving around elements that live in structure padding of the LLVM
1148193323Sed  // types, but may actually be used.  In these cases, we refuse to promote the
1149193323Sed  // struct.
1150193323Sed  if (Info.isMemCpySrc && Info.isMemCpyDst &&
1151201360Srdivacky      HasPadding(AI->getAllocatedType(), *TD))
1152202878Srdivacky    return false;
1153193323Sed
1154202878Srdivacky  return true;
1155193323Sed}
1156193323Sed
1157193323Sed/// MergeInType - Add the 'In' type to the accumulated type (Accum) so far at
1158193323Sed/// the offset specified by Offset (which is specified in bytes).
1159193323Sed///
1160193323Sed/// There are two cases we handle here:
1161193323Sed///   1) A union of vector types of the same size and potentially its elements.
1162193323Sed///      Here we turn element accesses into insert/extract element operations.
1163193323Sed///      This promotes a <4 x float> with a store of float to the third element
1164193323Sed///      into a <4 x float> that uses insert element.
1165193323Sed///   2) A fully general blob of memory, which we turn into some (potentially
1166193323Sed///      large) integer type with extract and insert operations where the loads
1167193323Sed///      and stores would mutate the memory.
1168193323Sedstatic void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy,
1169195340Sed                        unsigned AllocaSize, const TargetData &TD,
1170198090Srdivacky                        LLVMContext &Context) {
1171193323Sed  // If this could be contributing to a vector, analyze it.
1172198090Srdivacky  if (VecTy != Type::getVoidTy(Context)) { // either null or a vector type.
1173193323Sed
1174193323Sed    // If the In type is a vector that is the same size as the alloca, see if it
1175193323Sed    // matches the existing VecTy.
1176193323Sed    if (const VectorType *VInTy = dyn_cast<VectorType>(In)) {
1177193323Sed      if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) {
1178193323Sed        // If we're storing/loading a vector of the right size, allow it as a
1179193323Sed        // vector.  If this the first vector we see, remember the type so that
1180193323Sed        // we know the element size.
1181193323Sed        if (VecTy == 0)
1182193323Sed          VecTy = VInTy;
1183193323Sed        return;
1184193323Sed      }
1185198090Srdivacky    } else if (In->isFloatTy() || In->isDoubleTy() ||
1186204642Srdivacky               (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 &&
1187193323Sed                isPowerOf2_32(In->getPrimitiveSizeInBits()))) {
1188193323Sed      // If we're accessing something that could be an element of a vector, see
1189193323Sed      // if the implied vector agrees with what we already have and if Offset is
1190193323Sed      // compatible with it.
1191193323Sed      unsigned EltSize = In->getPrimitiveSizeInBits()/8;
1192193323Sed      if (Offset % EltSize == 0 &&
1193193323Sed          AllocaSize % EltSize == 0 &&
1194193323Sed          (VecTy == 0 ||
1195193323Sed           cast<VectorType>(VecTy)->getElementType()
1196193323Sed                 ->getPrimitiveSizeInBits()/8 == EltSize)) {
1197193323Sed        if (VecTy == 0)
1198198090Srdivacky          VecTy = VectorType::get(In, AllocaSize/EltSize);
1199193323Sed        return;
1200193323Sed      }
1201193323Sed    }
1202193323Sed  }
1203193323Sed
1204193323Sed  // Otherwise, we have a case that we can't handle with an optimized vector
1205193323Sed  // form.  We can still turn this into a large integer.
1206198090Srdivacky  VecTy = Type::getVoidTy(Context);
1207193323Sed}
1208193323Sed
1209193323Sed/// CanConvertToScalar - V is a pointer.  If we can convert the pointee and all
1210200581Srdivacky/// its accesses to a single vector type, return true and set VecTy to
1211193323Sed/// the new type.  If we could convert the alloca into a single promotable
1212193323Sed/// integer, return true but set VecTy to VoidTy.  Further, if the use is not a
1213193323Sed/// completely trivial use that mem2reg could promote, set IsNotTrivial.  Offset
1214193323Sed/// is the current offset from the base of the alloca being analyzed.
1215193323Sed///
1216193323Sed/// If we see at least one access to the value that is as a vector type, set the
1217193323Sed/// SawVec flag.
1218193323Sedbool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy,
1219193323Sed                              bool &SawVec, uint64_t Offset,
1220193323Sed                              unsigned AllocaSize) {
1221193323Sed  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
1222193323Sed    Instruction *User = cast<Instruction>(*UI);
1223193323Sed
1224193323Sed    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
1225193323Sed      // Don't break volatile loads.
1226193323Sed      if (LI->isVolatile())
1227193323Sed        return false;
1228198090Srdivacky      MergeInType(LI->getType(), Offset, VecTy,
1229198090Srdivacky                  AllocaSize, *TD, V->getContext());
1230204642Srdivacky      SawVec |= LI->getType()->isVectorTy();
1231193323Sed      continue;
1232193323Sed    }
1233193323Sed
1234193323Sed    if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
1235193323Sed      // Storing the pointer, not into the value?
1236193323Sed      if (SI->getOperand(0) == V || SI->isVolatile()) return 0;
1237195340Sed      MergeInType(SI->getOperand(0)->getType(), Offset,
1238198090Srdivacky                  VecTy, AllocaSize, *TD, V->getContext());
1239204642Srdivacky      SawVec |= SI->getOperand(0)->getType()->isVectorTy();
1240193323Sed      continue;
1241193323Sed    }
1242193323Sed
1243193323Sed    if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
1244193323Sed      if (!CanConvertToScalar(BCI, IsNotTrivial, VecTy, SawVec, Offset,
1245193323Sed                              AllocaSize))
1246193323Sed        return false;
1247193323Sed      IsNotTrivial = true;
1248193323Sed      continue;
1249193323Sed    }
1250193323Sed
1251193323Sed    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
1252193323Sed      // If this is a GEP with a variable indices, we can't handle it.
1253193323Sed      if (!GEP->hasAllConstantIndices())
1254193323Sed        return false;
1255193323Sed
1256193323Sed      // Compute the offset that this GEP adds to the pointer.
1257193323Sed      SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
1258201360Srdivacky      uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(),
1259193323Sed                                                &Indices[0], Indices.size());
1260193323Sed      // See if all uses can be converted.
1261193323Sed      if (!CanConvertToScalar(GEP, IsNotTrivial, VecTy, SawVec,Offset+GEPOffset,
1262193323Sed                              AllocaSize))
1263193323Sed        return false;
1264193323Sed      IsNotTrivial = true;
1265193323Sed      continue;
1266193323Sed    }
1267193323Sed
1268193323Sed    // If this is a constant sized memset of a constant value (e.g. 0) we can
1269193323Sed    // handle it.
1270193323Sed    if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
1271193323Sed      // Store of constant value and constant size.
1272193323Sed      if (isa<ConstantInt>(MSI->getValue()) &&
1273193323Sed          isa<ConstantInt>(MSI->getLength())) {
1274193323Sed        IsNotTrivial = true;
1275193323Sed        continue;
1276193323Sed      }
1277193323Sed    }
1278193323Sed
1279193323Sed    // If this is a memcpy or memmove into or out of the whole allocation, we
1280193323Sed    // can handle it like a load or store of the scalar type.
1281193323Sed    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
1282193323Sed      if (ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength()))
1283193323Sed        if (Len->getZExtValue() == AllocaSize && Offset == 0) {
1284193323Sed          IsNotTrivial = true;
1285193323Sed          continue;
1286193323Sed        }
1287193323Sed    }
1288193323Sed
1289193323Sed    // Otherwise, we cannot handle this!
1290193323Sed    return false;
1291193323Sed  }
1292193323Sed
1293193323Sed  return true;
1294193323Sed}
1295193323Sed
1296193323Sed/// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca
1297193323Sed/// directly.  This happens when we are converting an "integer union" to a
1298193323Sed/// single integer scalar, or when we are converting a "vector union" to a
1299193323Sed/// vector with insert/extractelement instructions.
1300193323Sed///
1301193323Sed/// Offset is an offset from the original alloca, in bits that need to be
1302193323Sed/// shifted to the right.  By the end of this, there should be no uses of Ptr.
1303193323Sedvoid SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) {
1304193323Sed  while (!Ptr->use_empty()) {
1305193323Sed    Instruction *User = cast<Instruction>(Ptr->use_back());
1306193323Sed
1307193323Sed    if (BitCastInst *CI = dyn_cast<BitCastInst>(User)) {
1308193323Sed      ConvertUsesToScalar(CI, NewAI, Offset);
1309193323Sed      CI->eraseFromParent();
1310193323Sed      continue;
1311193323Sed    }
1312193323Sed
1313193323Sed    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
1314193323Sed      // Compute the offset that this GEP adds to the pointer.
1315193323Sed      SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
1316201360Srdivacky      uint64_t GEPOffset = TD->getIndexedOffset(GEP->getPointerOperandType(),
1317193323Sed                                                &Indices[0], Indices.size());
1318193323Sed      ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8);
1319193323Sed      GEP->eraseFromParent();
1320193323Sed      continue;
1321193323Sed    }
1322193323Sed
1323193323Sed    IRBuilder<> Builder(User->getParent(), User);
1324193323Sed
1325193323Sed    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
1326193323Sed      // The load is a bit extract from NewAI shifted right by Offset bits.
1327193323Sed      Value *LoadedVal = Builder.CreateLoad(NewAI, "tmp");
1328193323Sed      Value *NewLoadVal
1329193323Sed        = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, Builder);
1330193323Sed      LI->replaceAllUsesWith(NewLoadVal);
1331193323Sed      LI->eraseFromParent();
1332193323Sed      continue;
1333193323Sed    }
1334193323Sed
1335193323Sed    if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
1336193323Sed      assert(SI->getOperand(0) != Ptr && "Consistency error!");
1337201360Srdivacky      Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
1338193323Sed      Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset,
1339193323Sed                                             Builder);
1340193323Sed      Builder.CreateStore(New, NewAI);
1341193323Sed      SI->eraseFromParent();
1342201360Srdivacky
1343201360Srdivacky      // If the load we just inserted is now dead, then the inserted store
1344201360Srdivacky      // overwrote the entire thing.
1345201360Srdivacky      if (Old->use_empty())
1346201360Srdivacky        Old->eraseFromParent();
1347193323Sed      continue;
1348193323Sed    }
1349193323Sed
1350193323Sed    // If this is a constant sized memset of a constant value (e.g. 0) we can
1351193323Sed    // transform it into a store of the expanded constant value.
1352193323Sed    if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
1353193323Sed      assert(MSI->getRawDest() == Ptr && "Consistency error!");
1354193323Sed      unsigned NumBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
1355193323Sed      if (NumBytes != 0) {
1356193323Sed        unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue();
1357193323Sed
1358193323Sed        // Compute the value replicated the right number of times.
1359193323Sed        APInt APVal(NumBytes*8, Val);
1360193323Sed
1361193323Sed        // Splat the value if non-zero.
1362193323Sed        if (Val)
1363193323Sed          for (unsigned i = 1; i != NumBytes; ++i)
1364193323Sed            APVal |= APVal << 8;
1365193323Sed
1366201360Srdivacky        Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
1367198090Srdivacky        Value *New = ConvertScalar_InsertValue(
1368198090Srdivacky                                    ConstantInt::get(User->getContext(), APVal),
1369195340Sed                                               Old, Offset, Builder);
1370193323Sed        Builder.CreateStore(New, NewAI);
1371201360Srdivacky
1372201360Srdivacky        // If the load we just inserted is now dead, then the memset overwrote
1373201360Srdivacky        // the entire thing.
1374201360Srdivacky        if (Old->use_empty())
1375201360Srdivacky          Old->eraseFromParent();
1376193323Sed      }
1377193323Sed      MSI->eraseFromParent();
1378193323Sed      continue;
1379193323Sed    }
1380193323Sed
1381193323Sed    // If this is a memcpy or memmove into or out of the whole allocation, we
1382193323Sed    // can handle it like a load or store of the scalar type.
1383193323Sed    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
1384193323Sed      assert(Offset == 0 && "must be store to start of alloca");
1385193323Sed
1386193323Sed      // If the source and destination are both to the same alloca, then this is
1387193323Sed      // a noop copy-to-self, just delete it.  Otherwise, emit a load and store
1388193323Sed      // as appropriate.
1389203954Srdivacky      AllocaInst *OrigAI = cast<AllocaInst>(Ptr->getUnderlyingObject(0));
1390193323Sed
1391203954Srdivacky      if (MTI->getSource()->getUnderlyingObject(0) != OrigAI) {
1392193323Sed        // Dest must be OrigAI, change this to be a load from the original
1393193323Sed        // pointer (bitcasted), then a store to our new alloca.
1394193323Sed        assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?");
1395193323Sed        Value *SrcPtr = MTI->getSource();
1396193323Sed        SrcPtr = Builder.CreateBitCast(SrcPtr, NewAI->getType());
1397193323Sed
1398193323Sed        LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval");
1399193323Sed        SrcVal->setAlignment(MTI->getAlignment());
1400193323Sed        Builder.CreateStore(SrcVal, NewAI);
1401203954Srdivacky      } else if (MTI->getDest()->getUnderlyingObject(0) != OrigAI) {
1402193323Sed        // Src must be OrigAI, change this to be a load from NewAI then a store
1403193323Sed        // through the original dest pointer (bitcasted).
1404193323Sed        assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?");
1405193323Sed        LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval");
1406193323Sed
1407193323Sed        Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), NewAI->getType());
1408193323Sed        StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr);
1409193323Sed        NewStore->setAlignment(MTI->getAlignment());
1410193323Sed      } else {
1411193323Sed        // Noop transfer. Src == Dst
1412193323Sed      }
1413193323Sed
1414193323Sed      MTI->eraseFromParent();
1415193323Sed      continue;
1416193323Sed    }
1417193323Sed
1418198090Srdivacky    llvm_unreachable("Unsupported operation!");
1419193323Sed  }
1420193323Sed}
1421193323Sed
1422193323Sed/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer
1423193323Sed/// or vector value FromVal, extracting the bits from the offset specified by
1424193323Sed/// Offset.  This returns the value, which is of type ToType.
1425193323Sed///
1426193323Sed/// This happens when we are converting an "integer union" to a single
1427193323Sed/// integer scalar, or when we are converting a "vector union" to a vector with
1428193323Sed/// insert/extractelement instructions.
1429193323Sed///
1430193323Sed/// Offset is an offset from the original alloca, in bits that need to be
1431193323Sed/// shifted to the right.
1432193323SedValue *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
1433193323Sed                                        uint64_t Offset, IRBuilder<> &Builder) {
1434193323Sed  // If the load is of the whole new alloca, no conversion is needed.
1435193323Sed  if (FromVal->getType() == ToType && Offset == 0)
1436193323Sed    return FromVal;
1437193323Sed
1438193323Sed  // If the result alloca is a vector type, this is either an element
1439193323Sed  // access or a bitcast to another vector type of the same size.
1440193323Sed  if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) {
1441204642Srdivacky    if (ToType->isVectorTy())
1442193323Sed      return Builder.CreateBitCast(FromVal, ToType, "tmp");
1443193323Sed
1444193323Sed    // Otherwise it must be an element access.
1445193323Sed    unsigned Elt = 0;
1446193323Sed    if (Offset) {
1447193323Sed      unsigned EltSize = TD->getTypeAllocSizeInBits(VTy->getElementType());
1448193323Sed      Elt = Offset/EltSize;
1449193323Sed      assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
1450193323Sed    }
1451193323Sed    // Return the element extracted out of it.
1452198090Srdivacky    Value *V = Builder.CreateExtractElement(FromVal, ConstantInt::get(
1453198090Srdivacky                    Type::getInt32Ty(FromVal->getContext()), Elt), "tmp");
1454193323Sed    if (V->getType() != ToType)
1455193323Sed      V = Builder.CreateBitCast(V, ToType, "tmp");
1456193323Sed    return V;
1457193323Sed  }
1458193323Sed
1459193323Sed  // If ToType is a first class aggregate, extract out each of the pieces and
1460193323Sed  // use insertvalue's to form the FCA.
1461193323Sed  if (const StructType *ST = dyn_cast<StructType>(ToType)) {
1462193323Sed    const StructLayout &Layout = *TD->getStructLayout(ST);
1463198090Srdivacky    Value *Res = UndefValue::get(ST);
1464193323Sed    for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
1465193323Sed      Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i),
1466193323Sed                                        Offset+Layout.getElementOffsetInBits(i),
1467193323Sed                                              Builder);
1468193323Sed      Res = Builder.CreateInsertValue(Res, Elt, i, "tmp");
1469193323Sed    }
1470193323Sed    return Res;
1471193323Sed  }
1472193323Sed
1473193323Sed  if (const ArrayType *AT = dyn_cast<ArrayType>(ToType)) {
1474193323Sed    uint64_t EltSize = TD->getTypeAllocSizeInBits(AT->getElementType());
1475198090Srdivacky    Value *Res = UndefValue::get(AT);
1476193323Sed    for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
1477193323Sed      Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(),
1478193323Sed                                              Offset+i*EltSize, Builder);
1479193323Sed      Res = Builder.CreateInsertValue(Res, Elt, i, "tmp");
1480193323Sed    }
1481193323Sed    return Res;
1482193323Sed  }
1483193323Sed
1484193323Sed  // Otherwise, this must be a union that was converted to an integer value.
1485193323Sed  const IntegerType *NTy = cast<IntegerType>(FromVal->getType());
1486193323Sed
1487193323Sed  // If this is a big-endian system and the load is narrower than the
1488193323Sed  // full alloca type, we need to do a shift to get the right bits.
1489193323Sed  int ShAmt = 0;
1490193323Sed  if (TD->isBigEndian()) {
1491193323Sed    // On big-endian machines, the lowest bit is stored at the bit offset
1492193323Sed    // from the pointer given by getTypeStoreSizeInBits.  This matters for
1493193323Sed    // integers with a bitwidth that is not a multiple of 8.
1494193323Sed    ShAmt = TD->getTypeStoreSizeInBits(NTy) -
1495193323Sed            TD->getTypeStoreSizeInBits(ToType) - Offset;
1496193323Sed  } else {
1497193323Sed    ShAmt = Offset;
1498193323Sed  }
1499193323Sed
1500193323Sed  // Note: we support negative bitwidths (with shl) which are not defined.
1501193323Sed  // We do this to support (f.e.) loads off the end of a structure where
1502193323Sed  // only some bits are used.
1503193323Sed  if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth())
1504195340Sed    FromVal = Builder.CreateLShr(FromVal,
1505198090Srdivacky                                 ConstantInt::get(FromVal->getType(),
1506193323Sed                                                           ShAmt), "tmp");
1507193323Sed  else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth())
1508195340Sed    FromVal = Builder.CreateShl(FromVal,
1509198090Srdivacky                                ConstantInt::get(FromVal->getType(),
1510193323Sed                                                          -ShAmt), "tmp");
1511193323Sed
1512193323Sed  // Finally, unconditionally truncate the integer to the right width.
1513193323Sed  unsigned LIBitWidth = TD->getTypeSizeInBits(ToType);
1514193323Sed  if (LIBitWidth < NTy->getBitWidth())
1515195340Sed    FromVal =
1516198090Srdivacky      Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(),
1517198090Srdivacky                                                    LIBitWidth), "tmp");
1518193323Sed  else if (LIBitWidth > NTy->getBitWidth())
1519195340Sed    FromVal =
1520198090Srdivacky       Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(),
1521198090Srdivacky                                                    LIBitWidth), "tmp");
1522193323Sed
1523193323Sed  // If the result is an integer, this is a trunc or bitcast.
1524204642Srdivacky  if (ToType->isIntegerTy()) {
1525193323Sed    // Should be done.
1526204642Srdivacky  } else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) {
1527193323Sed    // Just do a bitcast, we know the sizes match up.
1528193323Sed    FromVal = Builder.CreateBitCast(FromVal, ToType, "tmp");
1529193323Sed  } else {
1530193323Sed    // Otherwise must be a pointer.
1531193323Sed    FromVal = Builder.CreateIntToPtr(FromVal, ToType, "tmp");
1532193323Sed  }
1533193323Sed  assert(FromVal->getType() == ToType && "Didn't convert right?");
1534193323Sed  return FromVal;
1535193323Sed}
1536193323Sed
1537193323Sed/// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer
1538193323Sed/// or vector value "Old" at the offset specified by Offset.
1539193323Sed///
1540193323Sed/// This happens when we are converting an "integer union" to a
1541193323Sed/// single integer scalar, or when we are converting a "vector union" to a
1542193323Sed/// vector with insert/extractelement instructions.
1543193323Sed///
1544193323Sed/// Offset is an offset from the original alloca, in bits that need to be
1545193323Sed/// shifted to the right.
1546193323SedValue *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old,
1547193323Sed                                       uint64_t Offset, IRBuilder<> &Builder) {
1548193323Sed
1549193323Sed  // Convert the stored type to the actual type, shift it left to insert
1550193323Sed  // then 'or' into place.
1551193323Sed  const Type *AllocaType = Old->getType();
1552198090Srdivacky  LLVMContext &Context = Old->getContext();
1553193323Sed
1554193323Sed  if (const VectorType *VTy = dyn_cast<VectorType>(AllocaType)) {
1555193323Sed    uint64_t VecSize = TD->getTypeAllocSizeInBits(VTy);
1556193323Sed    uint64_t ValSize = TD->getTypeAllocSizeInBits(SV->getType());
1557193323Sed
1558193323Sed    // Changing the whole vector with memset or with an access of a different
1559193323Sed    // vector type?
1560193323Sed    if (ValSize == VecSize)
1561193323Sed      return Builder.CreateBitCast(SV, AllocaType, "tmp");
1562193323Sed
1563193323Sed    uint64_t EltSize = TD->getTypeAllocSizeInBits(VTy->getElementType());
1564193323Sed
1565193323Sed    // Must be an element insertion.
1566193323Sed    unsigned Elt = Offset/EltSize;
1567193323Sed
1568193323Sed    if (SV->getType() != VTy->getElementType())
1569193323Sed      SV = Builder.CreateBitCast(SV, VTy->getElementType(), "tmp");
1570193323Sed
1571193323Sed    SV = Builder.CreateInsertElement(Old, SV,
1572198090Srdivacky                     ConstantInt::get(Type::getInt32Ty(SV->getContext()), Elt),
1573193323Sed                                     "tmp");
1574193323Sed    return SV;
1575193323Sed  }
1576193323Sed
1577193323Sed  // If SV is a first-class aggregate value, insert each value recursively.
1578193323Sed  if (const StructType *ST = dyn_cast<StructType>(SV->getType())) {
1579193323Sed    const StructLayout &Layout = *TD->getStructLayout(ST);
1580193323Sed    for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
1581193323Sed      Value *Elt = Builder.CreateExtractValue(SV, i, "tmp");
1582193323Sed      Old = ConvertScalar_InsertValue(Elt, Old,
1583193323Sed                                      Offset+Layout.getElementOffsetInBits(i),
1584193323Sed                                      Builder);
1585193323Sed    }
1586193323Sed    return Old;
1587193323Sed  }
1588193323Sed
1589193323Sed  if (const ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) {
1590193323Sed    uint64_t EltSize = TD->getTypeAllocSizeInBits(AT->getElementType());
1591193323Sed    for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
1592193323Sed      Value *Elt = Builder.CreateExtractValue(SV, i, "tmp");
1593193323Sed      Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, Builder);
1594193323Sed    }
1595193323Sed    return Old;
1596193323Sed  }
1597193323Sed
1598193323Sed  // If SV is a float, convert it to the appropriate integer type.
1599193323Sed  // If it is a pointer, do the same.
1600193323Sed  unsigned SrcWidth = TD->getTypeSizeInBits(SV->getType());
1601193323Sed  unsigned DestWidth = TD->getTypeSizeInBits(AllocaType);
1602193323Sed  unsigned SrcStoreWidth = TD->getTypeStoreSizeInBits(SV->getType());
1603193323Sed  unsigned DestStoreWidth = TD->getTypeStoreSizeInBits(AllocaType);
1604204642Srdivacky  if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy())
1605198090Srdivacky    SV = Builder.CreateBitCast(SV,
1606198090Srdivacky                            IntegerType::get(SV->getContext(),SrcWidth), "tmp");
1607204642Srdivacky  else if (SV->getType()->isPointerTy())
1608198090Srdivacky    SV = Builder.CreatePtrToInt(SV, TD->getIntPtrType(SV->getContext()), "tmp");
1609193323Sed
1610193323Sed  // Zero extend or truncate the value if needed.
1611193323Sed  if (SV->getType() != AllocaType) {
1612193323Sed    if (SV->getType()->getPrimitiveSizeInBits() <
1613193323Sed             AllocaType->getPrimitiveSizeInBits())
1614193323Sed      SV = Builder.CreateZExt(SV, AllocaType, "tmp");
1615193323Sed    else {
1616193323Sed      // Truncation may be needed if storing more than the alloca can hold
1617193323Sed      // (undefined behavior).
1618193323Sed      SV = Builder.CreateTrunc(SV, AllocaType, "tmp");
1619193323Sed      SrcWidth = DestWidth;
1620193323Sed      SrcStoreWidth = DestStoreWidth;
1621193323Sed    }
1622193323Sed  }
1623193323Sed
1624193323Sed  // If this is a big-endian system and the store is narrower than the
1625193323Sed  // full alloca type, we need to do a shift to get the right bits.
1626193323Sed  int ShAmt = 0;
1627193323Sed  if (TD->isBigEndian()) {
1628193323Sed    // On big-endian machines, the lowest bit is stored at the bit offset
1629193323Sed    // from the pointer given by getTypeStoreSizeInBits.  This matters for
1630193323Sed    // integers with a bitwidth that is not a multiple of 8.
1631193323Sed    ShAmt = DestStoreWidth - SrcStoreWidth - Offset;
1632193323Sed  } else {
1633193323Sed    ShAmt = Offset;
1634193323Sed  }
1635193323Sed
1636193323Sed  // Note: we support negative bitwidths (with shr) which are not defined.
1637193323Sed  // We do this to support (f.e.) stores off the end of a structure where
1638193323Sed  // only some bits in the structure are set.
1639193323Sed  APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth));
1640193323Sed  if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) {
1641198090Srdivacky    SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(),
1642195340Sed                           ShAmt), "tmp");
1643193323Sed    Mask <<= ShAmt;
1644193323Sed  } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) {
1645198090Srdivacky    SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(),
1646195340Sed                            -ShAmt), "tmp");
1647193323Sed    Mask = Mask.lshr(-ShAmt);
1648193323Sed  }
1649193323Sed
1650193323Sed  // Mask out the bits we are about to insert from the old value, and or
1651193323Sed  // in the new bits.
1652193323Sed  if (SrcWidth != DestWidth) {
1653193323Sed    assert(DestWidth > SrcWidth);
1654198090Srdivacky    Old = Builder.CreateAnd(Old, ConstantInt::get(Context, ~Mask), "mask");
1655193323Sed    SV = Builder.CreateOr(Old, SV, "ins");
1656193323Sed  }
1657193323Sed  return SV;
1658193323Sed}
1659193323Sed
1660193323Sed
1661193323Sed
1662193323Sed/// PointsToConstantGlobal - Return true if V (possibly indirectly) points to
1663193323Sed/// some part of a constant global variable.  This intentionally only accepts
1664193323Sed/// constant expressions because we don't can't rewrite arbitrary instructions.
1665193323Sedstatic bool PointsToConstantGlobal(Value *V) {
1666193323Sed  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
1667193323Sed    return GV->isConstant();
1668193323Sed  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
1669193323Sed    if (CE->getOpcode() == Instruction::BitCast ||
1670193323Sed        CE->getOpcode() == Instruction::GetElementPtr)
1671193323Sed      return PointsToConstantGlobal(CE->getOperand(0));
1672193323Sed  return false;
1673193323Sed}
1674193323Sed
1675193323Sed/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
1676193323Sed/// pointer to an alloca.  Ignore any reads of the pointer, return false if we
1677193323Sed/// see any stores or other unknown uses.  If we see pointer arithmetic, keep
1678193323Sed/// track of whether it moves the pointer (with isOffset) but otherwise traverse
1679193323Sed/// the uses.  If we see a memcpy/memmove that targets an unoffseted pointer to
1680193323Sed/// the alloca, and if the source pointer is a pointer to a constant  global, we
1681193323Sed/// can optimize this.
1682193323Sedstatic bool isOnlyCopiedFromConstantGlobal(Value *V, Instruction *&TheCopy,
1683193323Sed                                           bool isOffset) {
1684193323Sed  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
1685193323Sed    if (LoadInst *LI = dyn_cast<LoadInst>(*UI))
1686193323Sed      // Ignore non-volatile loads, they are always ok.
1687193323Sed      if (!LI->isVolatile())
1688193323Sed        continue;
1689193323Sed
1690193323Sed    if (BitCastInst *BCI = dyn_cast<BitCastInst>(*UI)) {
1691193323Sed      // If uses of the bitcast are ok, we are ok.
1692193323Sed      if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset))
1693193323Sed        return false;
1694193323Sed      continue;
1695193323Sed    }
1696193323Sed    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(*UI)) {
1697193323Sed      // If the GEP has all zero indices, it doesn't offset the pointer.  If it
1698193323Sed      // doesn't, it does.
1699193323Sed      if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy,
1700193323Sed                                         isOffset || !GEP->hasAllZeroIndices()))
1701193323Sed        return false;
1702193323Sed      continue;
1703193323Sed    }
1704193323Sed
1705193323Sed    // If this is isn't our memcpy/memmove, reject it as something we can't
1706193323Sed    // handle.
1707193323Sed    if (!isa<MemTransferInst>(*UI))
1708193323Sed      return false;
1709193323Sed
1710193323Sed    // If we already have seen a copy, reject the second one.
1711193323Sed    if (TheCopy) return false;
1712193323Sed
1713193323Sed    // If the pointer has been offset from the start of the alloca, we can't
1714193323Sed    // safely handle this.
1715193323Sed    if (isOffset) return false;
1716193323Sed
1717193323Sed    // If the memintrinsic isn't using the alloca as the dest, reject it.
1718193323Sed    if (UI.getOperandNo() != 1) return false;
1719193323Sed
1720193323Sed    MemIntrinsic *MI = cast<MemIntrinsic>(*UI);
1721193323Sed
1722193323Sed    // If the source of the memcpy/move is not a constant global, reject it.
1723193323Sed    if (!PointsToConstantGlobal(MI->getOperand(2)))
1724193323Sed      return false;
1725193323Sed
1726193323Sed    // Otherwise, the transform is safe.  Remember the copy instruction.
1727193323Sed    TheCopy = MI;
1728193323Sed  }
1729193323Sed  return true;
1730193323Sed}
1731193323Sed
1732193323Sed/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
1733193323Sed/// modified by a copy from a constant global.  If we can prove this, we can
1734193323Sed/// replace any uses of the alloca with uses of the global directly.
1735198892SrdivackyInstruction *SROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI) {
1736193323Sed  Instruction *TheCopy = 0;
1737193323Sed  if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false))
1738193323Sed    return TheCopy;
1739193323Sed  return 0;
1740193323Sed}
1741