ScalarReplAggregates.cpp revision 226890
1193323Sed//===- ScalarReplAggregates.cpp - Scalar Replacement of Aggregates --------===//
2193323Sed//
3193323Sed//                     The LLVM Compiler Infrastructure
4193323Sed//
5193323Sed// This file is distributed under the University of Illinois Open Source
6193323Sed// License. See LICENSE.TXT for details.
7193323Sed//
8193323Sed//===----------------------------------------------------------------------===//
9193323Sed//
10193323Sed// This transformation implements the well known scalar replacement of
11193323Sed// aggregates transformation.  This xform breaks up alloca instructions of
12193323Sed// aggregate type (structure or array) into individual alloca instructions for
13193323Sed// each member (if possible).  Then, if possible, it transforms the individual
14193323Sed// alloca instructions into nice clean scalar SSA form.
15193323Sed//
16193323Sed// This combines a simple SRoA algorithm with the Mem2Reg algorithm because
17193323Sed// often interact, especially for C++ programs.  As such, iterating between
18193323Sed// SRoA, then Mem2Reg until we run out of things to promote works well.
19193323Sed//
20193323Sed//===----------------------------------------------------------------------===//
21193323Sed
22193323Sed#define DEBUG_TYPE "scalarrepl"
23193323Sed#include "llvm/Transforms/Scalar.h"
24193323Sed#include "llvm/Constants.h"
25193323Sed#include "llvm/DerivedTypes.h"
26193323Sed#include "llvm/Function.h"
27193323Sed#include "llvm/GlobalVariable.h"
28193323Sed#include "llvm/Instructions.h"
29193323Sed#include "llvm/IntrinsicInst.h"
30195340Sed#include "llvm/LLVMContext.h"
31212904Sdim#include "llvm/Module.h"
32193323Sed#include "llvm/Pass.h"
33224145Sdim#include "llvm/Analysis/DebugInfo.h"
34223017Sdim#include "llvm/Analysis/DIBuilder.h"
35193323Sed#include "llvm/Analysis/Dominators.h"
36218893Sdim#include "llvm/Analysis/Loads.h"
37218893Sdim#include "llvm/Analysis/ValueTracking.h"
38193323Sed#include "llvm/Target/TargetData.h"
39193323Sed#include "llvm/Transforms/Utils/PromoteMemToReg.h"
40193323Sed#include "llvm/Transforms/Utils/Local.h"
41218893Sdim#include "llvm/Transforms/Utils/SSAUpdater.h"
42218893Sdim#include "llvm/Support/CallSite.h"
43193323Sed#include "llvm/Support/Debug.h"
44198090Srdivacky#include "llvm/Support/ErrorHandling.h"
45193323Sed#include "llvm/Support/GetElementPtrTypeIterator.h"
46193323Sed#include "llvm/Support/IRBuilder.h"
47193323Sed#include "llvm/Support/MathExtras.h"
48198090Srdivacky#include "llvm/Support/raw_ostream.h"
49218893Sdim#include "llvm/ADT/SetVector.h"
50193323Sed#include "llvm/ADT/SmallVector.h"
51193323Sed#include "llvm/ADT/Statistic.h"
52193323Sedusing namespace llvm;
53193323Sed
54193323SedSTATISTIC(NumReplaced,  "Number of allocas broken up");
55193323SedSTATISTIC(NumPromoted,  "Number of allocas promoted");
56218893SdimSTATISTIC(NumAdjusted,  "Number of scalar allocas adjusted to allow promotion");
57193323SedSTATISTIC(NumConverted, "Number of aggregates converted to scalar");
58193323SedSTATISTIC(NumGlobals,   "Number of allocas copied from constant global");
59193323Sed
60193323Sednamespace {
61198090Srdivacky  struct SROA : public FunctionPass {
62218893Sdim    SROA(int T, bool hasDT, char &ID)
63218893Sdim      : FunctionPass(ID), HasDomTree(hasDT) {
64193323Sed      if (T == -1)
65193323Sed        SRThreshold = 128;
66193323Sed      else
67193323Sed        SRThreshold = T;
68193323Sed    }
69193323Sed
70193323Sed    bool runOnFunction(Function &F);
71193323Sed
72193323Sed    bool performScalarRepl(Function &F);
73193323Sed    bool performPromotion(Function &F);
74193323Sed
75193323Sed  private:
76218893Sdim    bool HasDomTree;
77193323Sed    TargetData *TD;
78218893Sdim
79201360Srdivacky    /// DeadInsts - Keep track of instructions we have made dead, so that
80201360Srdivacky    /// we can remove them after we are done working.
81201360Srdivacky    SmallVector<Value*, 32> DeadInsts;
82201360Srdivacky
83193323Sed    /// AllocaInfo - When analyzing uses of an alloca instruction, this captures
84193323Sed    /// information about the uses.  All these fields are initialized to false
85193323Sed    /// and set to true when something is learned.
86193323Sed    struct AllocaInfo {
87218893Sdim      /// The alloca to promote.
88218893Sdim      AllocaInst *AI;
89218893Sdim
90218893Sdim      /// CheckedPHIs - This is a set of verified PHI nodes, to prevent infinite
91218893Sdim      /// looping and avoid redundant work.
92218893Sdim      SmallPtrSet<PHINode*, 8> CheckedPHIs;
93218893Sdim
94193323Sed      /// isUnsafe - This is set to true if the alloca cannot be SROA'd.
95193323Sed      bool isUnsafe : 1;
96218893Sdim
97193323Sed      /// isMemCpySrc - This is true if this aggregate is memcpy'd from.
98193323Sed      bool isMemCpySrc : 1;
99193323Sed
100193323Sed      /// isMemCpyDst - This is true if this aggregate is memcpy'd into.
101193323Sed      bool isMemCpyDst : 1;
102193323Sed
103218893Sdim      /// hasSubelementAccess - This is true if a subelement of the alloca is
104218893Sdim      /// ever accessed, or false if the alloca is only accessed with mem
105218893Sdim      /// intrinsics or load/store that only access the entire alloca at once.
106218893Sdim      bool hasSubelementAccess : 1;
107218893Sdim
108218893Sdim      /// hasALoadOrStore - This is true if there are any loads or stores to it.
109218893Sdim      /// The alloca may just be accessed with memcpy, for example, which would
110218893Sdim      /// not set this.
111218893Sdim      bool hasALoadOrStore : 1;
112218893Sdim
113218893Sdim      explicit AllocaInfo(AllocaInst *ai)
114218893Sdim        : AI(ai), isUnsafe(false), isMemCpySrc(false), isMemCpyDst(false),
115218893Sdim          hasSubelementAccess(false), hasALoadOrStore(false) {}
116193323Sed    };
117218893Sdim
118193323Sed    unsigned SRThreshold;
119193323Sed
120218893Sdim    void MarkUnsafe(AllocaInfo &I, Instruction *User) {
121218893Sdim      I.isUnsafe = true;
122218893Sdim      DEBUG(dbgs() << "  Transformation preventing inst: " << *User << '\n');
123218893Sdim    }
124193323Sed
125202878Srdivacky    bool isSafeAllocaToScalarRepl(AllocaInst *AI);
126193323Sed
127218893Sdim    void isSafeForScalarRepl(Instruction *I, uint64_t Offset, AllocaInfo &Info);
128218893Sdim    void isSafePHISelectUseForScalarRepl(Instruction *User, uint64_t Offset,
129218893Sdim                                         AllocaInfo &Info);
130218893Sdim    void isSafeGEP(GetElementPtrInst *GEPI, uint64_t &Offset, AllocaInfo &Info);
131218893Sdim    void isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
132226890Sdim                         Type *MemOpType, bool isStore, AllocaInfo &Info,
133218893Sdim                         Instruction *TheAccess, bool AllowWholeAccess);
134226890Sdim    bool TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size);
135226890Sdim    uint64_t FindElementAndOffset(Type *&T, uint64_t &Offset,
136226890Sdim                                  Type *&IdxTy);
137218893Sdim
138218893Sdim    void DoScalarReplacement(AllocaInst *AI,
139198892Srdivacky                             std::vector<AllocaInst*> &WorkList);
140201360Srdivacky    void DeleteDeadInstructions();
141218893Sdim
142201360Srdivacky    void RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
143201360Srdivacky                              SmallVector<AllocaInst*, 32> &NewElts);
144201360Srdivacky    void RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
145201360Srdivacky                        SmallVector<AllocaInst*, 32> &NewElts);
146201360Srdivacky    void RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
147201360Srdivacky                    SmallVector<AllocaInst*, 32> &NewElts);
148226890Sdim    void RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,
149226890Sdim                                  uint64_t Offset,
150226890Sdim                                  SmallVector<AllocaInst*, 32> &NewElts);
151201360Srdivacky    void RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
152198892Srdivacky                                      AllocaInst *AI,
153193323Sed                                      SmallVector<AllocaInst*, 32> &NewElts);
154198892Srdivacky    void RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
155193323Sed                                       SmallVector<AllocaInst*, 32> &NewElts);
156198892Srdivacky    void RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
157193323Sed                                      SmallVector<AllocaInst*, 32> &NewElts);
158218893Sdim
159224145Sdim    static MemTransferInst *isOnlyCopiedFromConstantGlobal(
160224145Sdim        AllocaInst *AI, SmallVector<Instruction*, 4> &ToDelete);
161193323Sed  };
162218893Sdim
163218893Sdim  // SROA_DT - SROA that uses DominatorTree.
164218893Sdim  struct SROA_DT : public SROA {
165218893Sdim    static char ID;
166218893Sdim  public:
167218893Sdim    SROA_DT(int T = -1) : SROA(T, true, ID) {
168218893Sdim      initializeSROA_DTPass(*PassRegistry::getPassRegistry());
169218893Sdim    }
170218893Sdim
171218893Sdim    // getAnalysisUsage - This pass does not require any passes, but we know it
172218893Sdim    // will not alter the CFG, so say so.
173218893Sdim    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
174218893Sdim      AU.addRequired<DominatorTree>();
175218893Sdim      AU.setPreservesCFG();
176218893Sdim    }
177218893Sdim  };
178218893Sdim
179218893Sdim  // SROA_SSAUp - SROA that uses SSAUpdater.
180218893Sdim  struct SROA_SSAUp : public SROA {
181218893Sdim    static char ID;
182218893Sdim  public:
183218893Sdim    SROA_SSAUp(int T = -1) : SROA(T, false, ID) {
184218893Sdim      initializeSROA_SSAUpPass(*PassRegistry::getPassRegistry());
185218893Sdim    }
186218893Sdim
187218893Sdim    // getAnalysisUsage - This pass does not require any passes, but we know it
188218893Sdim    // will not alter the CFG, so say so.
189218893Sdim    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
190218893Sdim      AU.setPreservesCFG();
191218893Sdim    }
192218893Sdim  };
193218893Sdim
194193323Sed}
195193323Sed
196218893Sdimchar SROA_DT::ID = 0;
197218893Sdimchar SROA_SSAUp::ID = 0;
198193323Sed
199218893SdimINITIALIZE_PASS_BEGIN(SROA_DT, "scalarrepl",
200218893Sdim                "Scalar Replacement of Aggregates (DT)", false, false)
201218893SdimINITIALIZE_PASS_DEPENDENCY(DominatorTree)
202218893SdimINITIALIZE_PASS_END(SROA_DT, "scalarrepl",
203218893Sdim                "Scalar Replacement of Aggregates (DT)", false, false)
204218893Sdim
205218893SdimINITIALIZE_PASS_BEGIN(SROA_SSAUp, "scalarrepl-ssa",
206218893Sdim                      "Scalar Replacement of Aggregates (SSAUp)", false, false)
207218893SdimINITIALIZE_PASS_END(SROA_SSAUp, "scalarrepl-ssa",
208218893Sdim                    "Scalar Replacement of Aggregates (SSAUp)", false, false)
209218893Sdim
210193323Sed// Public interface to the ScalarReplAggregates pass
211218893SdimFunctionPass *llvm::createScalarReplAggregatesPass(int Threshold,
212218893Sdim                                                   bool UseDomTree) {
213218893Sdim  if (UseDomTree)
214218893Sdim    return new SROA_DT(Threshold);
215218893Sdim  return new SROA_SSAUp(Threshold);
216193323Sed}
217193323Sed
218193323Sed
219207618Srdivacky//===----------------------------------------------------------------------===//
220207618Srdivacky// Convert To Scalar Optimization.
221207618Srdivacky//===----------------------------------------------------------------------===//
222207618Srdivacky
223207618Srdivackynamespace {
224207618Srdivacky/// ConvertToScalarInfo - This class implements the "Convert To Scalar"
225207618Srdivacky/// optimization, which scans the uses of an alloca and determines if it can
226207618Srdivacky/// rewrite it in terms of a single new alloca that can be mem2reg'd.
227207618Srdivackyclass ConvertToScalarInfo {
228221345Sdim  /// AllocaSize - The size of the alloca being considered in bytes.
229207618Srdivacky  unsigned AllocaSize;
230207618Srdivacky  const TargetData &TD;
231218893Sdim
232207618Srdivacky  /// IsNotTrivial - This is set to true if there is some access to the object
233207618Srdivacky  /// which means that mem2reg can't promote it.
234207618Srdivacky  bool IsNotTrivial;
235218893Sdim
236224145Sdim  /// ScalarKind - Tracks the kind of alloca being considered for promotion,
237224145Sdim  /// computed based on the uses of the alloca rather than the LLVM type system.
238224145Sdim  enum {
239224145Sdim    Unknown,
240224145Sdim
241224145Sdim    // Accesses via GEPs that are consistent with element access of a vector
242224145Sdim    // type. This will not be converted into a vector unless there is a later
243224145Sdim    // access using an actual vector type.
244224145Sdim    ImplicitVector,
245224145Sdim
246224145Sdim    // Accesses via vector operations and GEPs that are consistent with the
247224145Sdim    // layout of a vector type.
248224145Sdim    Vector,
249224145Sdim
250224145Sdim    // An integer bag-of-bits with bitwise operations for insertion and
251224145Sdim    // extraction. Any combination of types can be converted into this kind
252224145Sdim    // of scalar.
253224145Sdim    Integer
254224145Sdim  } ScalarKind;
255224145Sdim
256207618Srdivacky  /// VectorTy - This tracks the type that we should promote the vector to if
257207618Srdivacky  /// it is possible to turn it into a vector.  This starts out null, and if it
258207618Srdivacky  /// isn't possible to turn into a vector type, it gets set to VoidTy.
259226890Sdim  VectorType *VectorTy;
260218893Sdim
261221345Sdim  /// HadNonMemTransferAccess - True if there is at least one access to the
262221345Sdim  /// alloca that is not a MemTransferInst.  We don't want to turn structs into
263221345Sdim  /// large integers unless there is some potential for optimization.
264221345Sdim  bool HadNonMemTransferAccess;
265221345Sdim
266207618Srdivackypublic:
267207618Srdivacky  explicit ConvertToScalarInfo(unsigned Size, const TargetData &td)
268224145Sdim    : AllocaSize(Size), TD(td), IsNotTrivial(false), ScalarKind(Unknown),
269224145Sdim      VectorTy(0), HadNonMemTransferAccess(false) { }
270218893Sdim
271207618Srdivacky  AllocaInst *TryConvert(AllocaInst *AI);
272218893Sdim
273207618Srdivackyprivate:
274207618Srdivacky  bool CanConvertToScalar(Value *V, uint64_t Offset);
275226890Sdim  void MergeInTypeForLoadOrStore(Type *In, uint64_t Offset);
276226890Sdim  bool MergeInVectorType(VectorType *VInTy, uint64_t Offset);
277207618Srdivacky  void ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset);
278218893Sdim
279226890Sdim  Value *ConvertScalar_ExtractValue(Value *NV, Type *ToType,
280207618Srdivacky                                    uint64_t Offset, IRBuilder<> &Builder);
281207618Srdivacky  Value *ConvertScalar_InsertValue(Value *StoredVal, Value *ExistingVal,
282207618Srdivacky                                   uint64_t Offset, IRBuilder<> &Builder);
283207618Srdivacky};
284207618Srdivacky} // end anonymous namespace.
285207618Srdivacky
286212904Sdim
287207618Srdivacky/// TryConvert - Analyze the specified alloca, and if it is safe to do so,
288207618Srdivacky/// rewrite it to be a new alloca which is mem2reg'able.  This returns the new
289207618Srdivacky/// alloca if possible or null if not.
290207618SrdivackyAllocaInst *ConvertToScalarInfo::TryConvert(AllocaInst *AI) {
291207618Srdivacky  // If we can't convert this scalar, or if mem2reg can trivially do it, bail
292207618Srdivacky  // out.
293207618Srdivacky  if (!CanConvertToScalar(AI, 0) || !IsNotTrivial)
294207618Srdivacky    return 0;
295218893Sdim
296224145Sdim  // If an alloca has only memset / memcpy uses, it may still have an Unknown
297224145Sdim  // ScalarKind. Treat it as an Integer below.
298224145Sdim  if (ScalarKind == Unknown)
299224145Sdim    ScalarKind = Integer;
300224145Sdim
301224145Sdim  if (ScalarKind == Vector && VectorTy->getBitWidth() != AllocaSize * 8)
302224145Sdim    ScalarKind = Integer;
303224145Sdim
304207618Srdivacky  // If we were able to find a vector type that can handle this with
305207618Srdivacky  // insert/extract elements, and if there was at least one use that had
306207618Srdivacky  // a vector type, promote this to a vector.  We don't want to promote
307207618Srdivacky  // random stuff that doesn't use vectors (e.g. <9 x double>) because then
308207618Srdivacky  // we just get a lot of insert/extracts.  If at least one vector is
309207618Srdivacky  // involved, then we probably really do have a union of vector/array.
310226890Sdim  Type *NewTy;
311224145Sdim  if (ScalarKind == Vector) {
312224145Sdim    assert(VectorTy && "Missing type for vector scalar.");
313207618Srdivacky    DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n  TYPE = "
314207618Srdivacky          << *VectorTy << '\n');
315207618Srdivacky    NewTy = VectorTy;  // Use the vector type.
316207618Srdivacky  } else {
317221345Sdim    unsigned BitWidth = AllocaSize * 8;
318224145Sdim    if ((ScalarKind == ImplicitVector || ScalarKind == Integer) &&
319224145Sdim        !HadNonMemTransferAccess && !TD.fitsInLegalInteger(BitWidth))
320221345Sdim      return 0;
321221345Sdim
322207618Srdivacky    DEBUG(dbgs() << "CONVERT TO SCALAR INTEGER: " << *AI << "\n");
323207618Srdivacky    // Create and insert the integer alloca.
324221345Sdim    NewTy = IntegerType::get(AI->getContext(), BitWidth);
325207618Srdivacky  }
326207618Srdivacky  AllocaInst *NewAI = new AllocaInst(NewTy, 0, "", AI->getParent()->begin());
327207618Srdivacky  ConvertUsesToScalar(AI, NewAI, 0);
328207618Srdivacky  return NewAI;
329207618Srdivacky}
330207618Srdivacky
331224145Sdim/// MergeInTypeForLoadOrStore - Add the 'In' type to the accumulated vector type
332224145Sdim/// (VectorTy) so far at the offset specified by Offset (which is specified in
333224145Sdim/// bytes).
334207618Srdivacky///
335226890Sdim/// There are two cases we handle here:
336207618Srdivacky///   1) A union of vector types of the same size and potentially its elements.
337207618Srdivacky///      Here we turn element accesses into insert/extract element operations.
338207618Srdivacky///      This promotes a <4 x float> with a store of float to the third element
339207618Srdivacky///      into a <4 x float> that uses insert element.
340226890Sdim///   2) A fully general blob of memory, which we turn into some (potentially
341207618Srdivacky///      large) integer type with extract and insert operations where the loads
342207618Srdivacky///      and stores would mutate the memory.  We mark this by setting VectorTy
343207618Srdivacky///      to VoidTy.
344226890Sdimvoid ConvertToScalarInfo::MergeInTypeForLoadOrStore(Type *In,
345224145Sdim                                                    uint64_t Offset) {
346207618Srdivacky  // If we already decided to turn this into a blob of integer memory, there is
347207618Srdivacky  // nothing to be done.
348224145Sdim  if (ScalarKind == Integer)
349207618Srdivacky    return;
350218893Sdim
351207618Srdivacky  // If this could be contributing to a vector, analyze it.
352207618Srdivacky
353207618Srdivacky  // If the In type is a vector that is the same size as the alloca, see if it
354207618Srdivacky  // matches the existing VecTy.
355226890Sdim  if (VectorType *VInTy = dyn_cast<VectorType>(In)) {
356221345Sdim    if (MergeInVectorType(VInTy, Offset))
357207618Srdivacky      return;
358207618Srdivacky  } else if (In->isFloatTy() || In->isDoubleTy() ||
359207618Srdivacky             (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 &&
360207618Srdivacky              isPowerOf2_32(In->getPrimitiveSizeInBits()))) {
361221345Sdim    // Full width accesses can be ignored, because they can always be turned
362221345Sdim    // into bitcasts.
363221345Sdim    unsigned EltSize = In->getPrimitiveSizeInBits()/8;
364224145Sdim    if (EltSize == AllocaSize)
365221345Sdim      return;
366221345Sdim
367207618Srdivacky    // If we're accessing something that could be an element of a vector, see
368207618Srdivacky    // if the implied vector agrees with what we already have and if Offset is
369207618Srdivacky    // compatible with it.
370223017Sdim    if (Offset % EltSize == 0 && AllocaSize % EltSize == 0 &&
371226890Sdim        (!VectorTy || EltSize == VectorTy->getElementType()
372226890Sdim                                         ->getPrimitiveSizeInBits()/8)) {
373221345Sdim      if (!VectorTy) {
374224145Sdim        ScalarKind = ImplicitVector;
375207618Srdivacky        VectorTy = VectorType::get(In, AllocaSize/EltSize);
376221345Sdim      }
377226890Sdim      return;
378207618Srdivacky    }
379207618Srdivacky  }
380218893Sdim
381207618Srdivacky  // Otherwise, we have a case that we can't handle with an optimized vector
382207618Srdivacky  // form.  We can still turn this into a large integer.
383224145Sdim  ScalarKind = Integer;
384207618Srdivacky}
385207618Srdivacky
386224145Sdim/// MergeInVectorType - Handles the vector case of MergeInTypeForLoadOrStore,
387224145Sdim/// returning true if the type was successfully merged and false otherwise.
388226890Sdimbool ConvertToScalarInfo::MergeInVectorType(VectorType *VInTy,
389221345Sdim                                            uint64_t Offset) {
390226890Sdim  if (VInTy->getBitWidth()/8 == AllocaSize && Offset == 0) {
391226890Sdim    // If we're storing/loading a vector of the right size, allow it as a
392226890Sdim    // vector.  If this the first vector we see, remember the type so that
393226890Sdim    // we know the element size. If this is a subsequent access, ignore it
394226890Sdim    // even if it is a differing type but the same size. Worst case we can
395226890Sdim    // bitcast the resultant vectors.
396226890Sdim    if (!VectorTy)
397226890Sdim      VectorTy = VInTy;
398224145Sdim    ScalarKind = Vector;
399221345Sdim    return true;
400221345Sdim  }
401221345Sdim
402226890Sdim  return false;
403221345Sdim}
404221345Sdim
405207618Srdivacky/// CanConvertToScalar - V is a pointer.  If we can convert the pointee and all
406207618Srdivacky/// its accesses to a single vector type, return true and set VecTy to
407207618Srdivacky/// the new type.  If we could convert the alloca into a single promotable
408207618Srdivacky/// integer, return true but set VecTy to VoidTy.  Further, if the use is not a
409207618Srdivacky/// completely trivial use that mem2reg could promote, set IsNotTrivial.  Offset
410207618Srdivacky/// is the current offset from the base of the alloca being analyzed.
411207618Srdivacky///
412207618Srdivacky/// If we see at least one access to the value that is as a vector type, set the
413207618Srdivacky/// SawVec flag.
414207618Srdivackybool ConvertToScalarInfo::CanConvertToScalar(Value *V, uint64_t Offset) {
415207618Srdivacky  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
416207618Srdivacky    Instruction *User = cast<Instruction>(*UI);
417218893Sdim
418207618Srdivacky    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
419207618Srdivacky      // Don't break volatile loads.
420226890Sdim      if (!LI->isSimple())
421207618Srdivacky        return false;
422218893Sdim      // Don't touch MMX operations.
423218893Sdim      if (LI->getType()->isX86_MMXTy())
424218893Sdim        return false;
425221345Sdim      HadNonMemTransferAccess = true;
426224145Sdim      MergeInTypeForLoadOrStore(LI->getType(), Offset);
427207618Srdivacky      continue;
428207618Srdivacky    }
429218893Sdim
430207618Srdivacky    if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
431207618Srdivacky      // Storing the pointer, not into the value?
432226890Sdim      if (SI->getOperand(0) == V || !SI->isSimple()) return false;
433218893Sdim      // Don't touch MMX operations.
434218893Sdim      if (SI->getOperand(0)->getType()->isX86_MMXTy())
435218893Sdim        return false;
436221345Sdim      HadNonMemTransferAccess = true;
437224145Sdim      MergeInTypeForLoadOrStore(SI->getOperand(0)->getType(), Offset);
438207618Srdivacky      continue;
439207618Srdivacky    }
440218893Sdim
441207618Srdivacky    if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
442226890Sdim      if (!onlyUsedByLifetimeMarkers(BCI))
443226890Sdim        IsNotTrivial = true;  // Can't be mem2reg'd.
444207618Srdivacky      if (!CanConvertToScalar(BCI, Offset))
445207618Srdivacky        return false;
446207618Srdivacky      continue;
447207618Srdivacky    }
448207618Srdivacky
449207618Srdivacky    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
450207618Srdivacky      // If this is a GEP with a variable indices, we can't handle it.
451207618Srdivacky      if (!GEP->hasAllConstantIndices())
452207618Srdivacky        return false;
453218893Sdim
454207618Srdivacky      // Compute the offset that this GEP adds to the pointer.
455207618Srdivacky      SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
456207618Srdivacky      uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
457226890Sdim                                               Indices);
458207618Srdivacky      // See if all uses can be converted.
459207618Srdivacky      if (!CanConvertToScalar(GEP, Offset+GEPOffset))
460207618Srdivacky        return false;
461207618Srdivacky      IsNotTrivial = true;  // Can't be mem2reg'd.
462221345Sdim      HadNonMemTransferAccess = true;
463207618Srdivacky      continue;
464207618Srdivacky    }
465207618Srdivacky
466207618Srdivacky    // If this is a constant sized memset of a constant value (e.g. 0) we can
467207618Srdivacky    // handle it.
468207618Srdivacky    if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
469224145Sdim      // Store of constant value.
470224145Sdim      if (!isa<ConstantInt>(MSI->getValue()))
471207618Srdivacky        return false;
472224145Sdim
473224145Sdim      // Store of constant size.
474224145Sdim      ConstantInt *Len = dyn_cast<ConstantInt>(MSI->getLength());
475224145Sdim      if (!Len)
476224145Sdim        return false;
477224145Sdim
478224145Sdim      // If the size differs from the alloca, we can only convert the alloca to
479224145Sdim      // an integer bag-of-bits.
480224145Sdim      // FIXME: This should handle all of the cases that are currently accepted
481224145Sdim      // as vector element insertions.
482224145Sdim      if (Len->getZExtValue() != AllocaSize || Offset != 0)
483224145Sdim        ScalarKind = Integer;
484224145Sdim
485207618Srdivacky      IsNotTrivial = true;  // Can't be mem2reg'd.
486221345Sdim      HadNonMemTransferAccess = true;
487207618Srdivacky      continue;
488207618Srdivacky    }
489207618Srdivacky
490207618Srdivacky    // If this is a memcpy or memmove into or out of the whole allocation, we
491207618Srdivacky    // can handle it like a load or store of the scalar type.
492207618Srdivacky    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
493207618Srdivacky      ConstantInt *Len = dyn_cast<ConstantInt>(MTI->getLength());
494207618Srdivacky      if (Len == 0 || Len->getZExtValue() != AllocaSize || Offset != 0)
495207618Srdivacky        return false;
496218893Sdim
497207618Srdivacky      IsNotTrivial = true;  // Can't be mem2reg'd.
498207618Srdivacky      continue;
499207618Srdivacky    }
500218893Sdim
501226890Sdim    // If this is a lifetime intrinsic, we can handle it.
502226890Sdim    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
503226890Sdim      if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
504226890Sdim          II->getIntrinsicID() == Intrinsic::lifetime_end) {
505226890Sdim        continue;
506226890Sdim      }
507226890Sdim    }
508226890Sdim
509207618Srdivacky    // Otherwise, we cannot handle this!
510207618Srdivacky    return false;
511207618Srdivacky  }
512218893Sdim
513207618Srdivacky  return true;
514207618Srdivacky}
515207618Srdivacky
516207618Srdivacky/// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca
517207618Srdivacky/// directly.  This happens when we are converting an "integer union" to a
518207618Srdivacky/// single integer scalar, or when we are converting a "vector union" to a
519207618Srdivacky/// vector with insert/extractelement instructions.
520207618Srdivacky///
521207618Srdivacky/// Offset is an offset from the original alloca, in bits that need to be
522207618Srdivacky/// shifted to the right.  By the end of this, there should be no uses of Ptr.
523207618Srdivackyvoid ConvertToScalarInfo::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI,
524207618Srdivacky                                              uint64_t Offset) {
525207618Srdivacky  while (!Ptr->use_empty()) {
526207618Srdivacky    Instruction *User = cast<Instruction>(Ptr->use_back());
527207618Srdivacky
528207618Srdivacky    if (BitCastInst *CI = dyn_cast<BitCastInst>(User)) {
529207618Srdivacky      ConvertUsesToScalar(CI, NewAI, Offset);
530207618Srdivacky      CI->eraseFromParent();
531207618Srdivacky      continue;
532207618Srdivacky    }
533207618Srdivacky
534207618Srdivacky    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(User)) {
535207618Srdivacky      // Compute the offset that this GEP adds to the pointer.
536207618Srdivacky      SmallVector<Value*, 8> Indices(GEP->op_begin()+1, GEP->op_end());
537207618Srdivacky      uint64_t GEPOffset = TD.getIndexedOffset(GEP->getPointerOperandType(),
538226890Sdim                                               Indices);
539207618Srdivacky      ConvertUsesToScalar(GEP, NewAI, Offset+GEPOffset*8);
540207618Srdivacky      GEP->eraseFromParent();
541207618Srdivacky      continue;
542207618Srdivacky    }
543218893Sdim
544218893Sdim    IRBuilder<> Builder(User);
545218893Sdim
546207618Srdivacky    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
547207618Srdivacky      // The load is a bit extract from NewAI shifted right by Offset bits.
548226890Sdim      Value *LoadedVal = Builder.CreateLoad(NewAI);
549207618Srdivacky      Value *NewLoadVal
550207618Srdivacky        = ConvertScalar_ExtractValue(LoadedVal, LI->getType(), Offset, Builder);
551207618Srdivacky      LI->replaceAllUsesWith(NewLoadVal);
552207618Srdivacky      LI->eraseFromParent();
553207618Srdivacky      continue;
554207618Srdivacky    }
555218893Sdim
556207618Srdivacky    if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
557207618Srdivacky      assert(SI->getOperand(0) != Ptr && "Consistency error!");
558207618Srdivacky      Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
559207618Srdivacky      Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset,
560207618Srdivacky                                             Builder);
561207618Srdivacky      Builder.CreateStore(New, NewAI);
562207618Srdivacky      SI->eraseFromParent();
563218893Sdim
564207618Srdivacky      // If the load we just inserted is now dead, then the inserted store
565207618Srdivacky      // overwrote the entire thing.
566207618Srdivacky      if (Old->use_empty())
567207618Srdivacky        Old->eraseFromParent();
568207618Srdivacky      continue;
569207618Srdivacky    }
570218893Sdim
571207618Srdivacky    // If this is a constant sized memset of a constant value (e.g. 0) we can
572207618Srdivacky    // transform it into a store of the expanded constant value.
573207618Srdivacky    if (MemSetInst *MSI = dyn_cast<MemSetInst>(User)) {
574207618Srdivacky      assert(MSI->getRawDest() == Ptr && "Consistency error!");
575207618Srdivacky      unsigned NumBytes = cast<ConstantInt>(MSI->getLength())->getZExtValue();
576207618Srdivacky      if (NumBytes != 0) {
577207618Srdivacky        unsigned Val = cast<ConstantInt>(MSI->getValue())->getZExtValue();
578218893Sdim
579207618Srdivacky        // Compute the value replicated the right number of times.
580207618Srdivacky        APInt APVal(NumBytes*8, Val);
581207618Srdivacky
582207618Srdivacky        // Splat the value if non-zero.
583207618Srdivacky        if (Val)
584207618Srdivacky          for (unsigned i = 1; i != NumBytes; ++i)
585207618Srdivacky            APVal |= APVal << 8;
586218893Sdim
587207618Srdivacky        Instruction *Old = Builder.CreateLoad(NewAI, NewAI->getName()+".in");
588207618Srdivacky        Value *New = ConvertScalar_InsertValue(
589207618Srdivacky                                    ConstantInt::get(User->getContext(), APVal),
590207618Srdivacky                                               Old, Offset, Builder);
591207618Srdivacky        Builder.CreateStore(New, NewAI);
592218893Sdim
593207618Srdivacky        // If the load we just inserted is now dead, then the memset overwrote
594207618Srdivacky        // the entire thing.
595207618Srdivacky        if (Old->use_empty())
596218893Sdim          Old->eraseFromParent();
597207618Srdivacky      }
598207618Srdivacky      MSI->eraseFromParent();
599207618Srdivacky      continue;
600207618Srdivacky    }
601207618Srdivacky
602207618Srdivacky    // If this is a memcpy or memmove into or out of the whole allocation, we
603207618Srdivacky    // can handle it like a load or store of the scalar type.
604207618Srdivacky    if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(User)) {
605207618Srdivacky      assert(Offset == 0 && "must be store to start of alloca");
606218893Sdim
607207618Srdivacky      // If the source and destination are both to the same alloca, then this is
608207618Srdivacky      // a noop copy-to-self, just delete it.  Otherwise, emit a load and store
609207618Srdivacky      // as appropriate.
610218893Sdim      AllocaInst *OrigAI = cast<AllocaInst>(GetUnderlyingObject(Ptr, &TD, 0));
611218893Sdim
612218893Sdim      if (GetUnderlyingObject(MTI->getSource(), &TD, 0) != OrigAI) {
613207618Srdivacky        // Dest must be OrigAI, change this to be a load from the original
614207618Srdivacky        // pointer (bitcasted), then a store to our new alloca.
615207618Srdivacky        assert(MTI->getRawDest() == Ptr && "Neither use is of pointer?");
616207618Srdivacky        Value *SrcPtr = MTI->getSource();
617226890Sdim        PointerType* SPTy = cast<PointerType>(SrcPtr->getType());
618226890Sdim        PointerType* AIPTy = cast<PointerType>(NewAI->getType());
619218893Sdim        if (SPTy->getAddressSpace() != AIPTy->getAddressSpace()) {
620218893Sdim          AIPTy = PointerType::get(AIPTy->getElementType(),
621218893Sdim                                   SPTy->getAddressSpace());
622218893Sdim        }
623218893Sdim        SrcPtr = Builder.CreateBitCast(SrcPtr, AIPTy);
624218893Sdim
625207618Srdivacky        LoadInst *SrcVal = Builder.CreateLoad(SrcPtr, "srcval");
626207618Srdivacky        SrcVal->setAlignment(MTI->getAlignment());
627207618Srdivacky        Builder.CreateStore(SrcVal, NewAI);
628218893Sdim      } else if (GetUnderlyingObject(MTI->getDest(), &TD, 0) != OrigAI) {
629207618Srdivacky        // Src must be OrigAI, change this to be a load from NewAI then a store
630207618Srdivacky        // through the original dest pointer (bitcasted).
631207618Srdivacky        assert(MTI->getRawSource() == Ptr && "Neither use is of pointer?");
632207618Srdivacky        LoadInst *SrcVal = Builder.CreateLoad(NewAI, "srcval");
633207618Srdivacky
634226890Sdim        PointerType* DPTy = cast<PointerType>(MTI->getDest()->getType());
635226890Sdim        PointerType* AIPTy = cast<PointerType>(NewAI->getType());
636218893Sdim        if (DPTy->getAddressSpace() != AIPTy->getAddressSpace()) {
637218893Sdim          AIPTy = PointerType::get(AIPTy->getElementType(),
638218893Sdim                                   DPTy->getAddressSpace());
639218893Sdim        }
640218893Sdim        Value *DstPtr = Builder.CreateBitCast(MTI->getDest(), AIPTy);
641218893Sdim
642207618Srdivacky        StoreInst *NewStore = Builder.CreateStore(SrcVal, DstPtr);
643207618Srdivacky        NewStore->setAlignment(MTI->getAlignment());
644207618Srdivacky      } else {
645207618Srdivacky        // Noop transfer. Src == Dst
646207618Srdivacky      }
647207618Srdivacky
648207618Srdivacky      MTI->eraseFromParent();
649207618Srdivacky      continue;
650207618Srdivacky    }
651218893Sdim
652226890Sdim    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
653226890Sdim      if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
654226890Sdim          II->getIntrinsicID() == Intrinsic::lifetime_end) {
655226890Sdim        // There's no need to preserve these, as the resulting alloca will be
656226890Sdim        // converted to a register anyways.
657226890Sdim        II->eraseFromParent();
658226890Sdim        continue;
659226890Sdim      }
660226890Sdim    }
661226890Sdim
662207618Srdivacky    llvm_unreachable("Unsupported operation!");
663207618Srdivacky  }
664207618Srdivacky}
665207618Srdivacky
666207618Srdivacky/// ConvertScalar_ExtractValue - Extract a value of type ToType from an integer
667207618Srdivacky/// or vector value FromVal, extracting the bits from the offset specified by
668207618Srdivacky/// Offset.  This returns the value, which is of type ToType.
669207618Srdivacky///
670207618Srdivacky/// This happens when we are converting an "integer union" to a single
671207618Srdivacky/// integer scalar, or when we are converting a "vector union" to a vector with
672207618Srdivacky/// insert/extractelement instructions.
673207618Srdivacky///
674207618Srdivacky/// Offset is an offset from the original alloca, in bits that need to be
675207618Srdivacky/// shifted to the right.
676207618SrdivackyValue *ConvertToScalarInfo::
677226890SdimConvertScalar_ExtractValue(Value *FromVal, Type *ToType,
678207618Srdivacky                           uint64_t Offset, IRBuilder<> &Builder) {
679207618Srdivacky  // If the load is of the whole new alloca, no conversion is needed.
680226890Sdim  Type *FromType = FromVal->getType();
681221345Sdim  if (FromType == ToType && Offset == 0)
682207618Srdivacky    return FromVal;
683207618Srdivacky
684207618Srdivacky  // If the result alloca is a vector type, this is either an element
685207618Srdivacky  // access or a bitcast to another vector type of the same size.
686226890Sdim  if (VectorType *VTy = dyn_cast<VectorType>(FromType)) {
687223017Sdim    unsigned FromTypeSize = TD.getTypeAllocSize(FromType);
688221345Sdim    unsigned ToTypeSize = TD.getTypeAllocSize(ToType);
689226890Sdim    if (FromTypeSize == ToTypeSize)
690226890Sdim        return Builder.CreateBitCast(FromVal, ToType);
691207618Srdivacky
692207618Srdivacky    // Otherwise it must be an element access.
693207618Srdivacky    unsigned Elt = 0;
694207618Srdivacky    if (Offset) {
695207618Srdivacky      unsigned EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
696207618Srdivacky      Elt = Offset/EltSize;
697207618Srdivacky      assert(EltSize*Elt == Offset && "Invalid modulus in validity checking");
698207618Srdivacky    }
699207618Srdivacky    // Return the element extracted out of it.
700226890Sdim    Value *V = Builder.CreateExtractElement(FromVal, Builder.getInt32(Elt));
701207618Srdivacky    if (V->getType() != ToType)
702226890Sdim      V = Builder.CreateBitCast(V, ToType);
703207618Srdivacky    return V;
704207618Srdivacky  }
705218893Sdim
706207618Srdivacky  // If ToType is a first class aggregate, extract out each of the pieces and
707207618Srdivacky  // use insertvalue's to form the FCA.
708226890Sdim  if (StructType *ST = dyn_cast<StructType>(ToType)) {
709207618Srdivacky    const StructLayout &Layout = *TD.getStructLayout(ST);
710207618Srdivacky    Value *Res = UndefValue::get(ST);
711207618Srdivacky    for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
712207618Srdivacky      Value *Elt = ConvertScalar_ExtractValue(FromVal, ST->getElementType(i),
713207618Srdivacky                                        Offset+Layout.getElementOffsetInBits(i),
714207618Srdivacky                                              Builder);
715226890Sdim      Res = Builder.CreateInsertValue(Res, Elt, i);
716207618Srdivacky    }
717207618Srdivacky    return Res;
718207618Srdivacky  }
719218893Sdim
720226890Sdim  if (ArrayType *AT = dyn_cast<ArrayType>(ToType)) {
721207618Srdivacky    uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
722207618Srdivacky    Value *Res = UndefValue::get(AT);
723207618Srdivacky    for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
724207618Srdivacky      Value *Elt = ConvertScalar_ExtractValue(FromVal, AT->getElementType(),
725207618Srdivacky                                              Offset+i*EltSize, Builder);
726226890Sdim      Res = Builder.CreateInsertValue(Res, Elt, i);
727207618Srdivacky    }
728207618Srdivacky    return Res;
729207618Srdivacky  }
730207618Srdivacky
731207618Srdivacky  // Otherwise, this must be a union that was converted to an integer value.
732226890Sdim  IntegerType *NTy = cast<IntegerType>(FromVal->getType());
733207618Srdivacky
734207618Srdivacky  // If this is a big-endian system and the load is narrower than the
735207618Srdivacky  // full alloca type, we need to do a shift to get the right bits.
736207618Srdivacky  int ShAmt = 0;
737207618Srdivacky  if (TD.isBigEndian()) {
738207618Srdivacky    // On big-endian machines, the lowest bit is stored at the bit offset
739207618Srdivacky    // from the pointer given by getTypeStoreSizeInBits.  This matters for
740207618Srdivacky    // integers with a bitwidth that is not a multiple of 8.
741207618Srdivacky    ShAmt = TD.getTypeStoreSizeInBits(NTy) -
742207618Srdivacky            TD.getTypeStoreSizeInBits(ToType) - Offset;
743207618Srdivacky  } else {
744207618Srdivacky    ShAmt = Offset;
745207618Srdivacky  }
746207618Srdivacky
747207618Srdivacky  // Note: we support negative bitwidths (with shl) which are not defined.
748207618Srdivacky  // We do this to support (f.e.) loads off the end of a structure where
749207618Srdivacky  // only some bits are used.
750207618Srdivacky  if (ShAmt > 0 && (unsigned)ShAmt < NTy->getBitWidth())
751207618Srdivacky    FromVal = Builder.CreateLShr(FromVal,
752226890Sdim                                 ConstantInt::get(FromVal->getType(), ShAmt));
753207618Srdivacky  else if (ShAmt < 0 && (unsigned)-ShAmt < NTy->getBitWidth())
754218893Sdim    FromVal = Builder.CreateShl(FromVal,
755226890Sdim                                ConstantInt::get(FromVal->getType(), -ShAmt));
756207618Srdivacky
757207618Srdivacky  // Finally, unconditionally truncate the integer to the right width.
758207618Srdivacky  unsigned LIBitWidth = TD.getTypeSizeInBits(ToType);
759207618Srdivacky  if (LIBitWidth < NTy->getBitWidth())
760207618Srdivacky    FromVal =
761218893Sdim      Builder.CreateTrunc(FromVal, IntegerType::get(FromVal->getContext(),
762226890Sdim                                                    LIBitWidth));
763207618Srdivacky  else if (LIBitWidth > NTy->getBitWidth())
764207618Srdivacky    FromVal =
765218893Sdim       Builder.CreateZExt(FromVal, IntegerType::get(FromVal->getContext(),
766226890Sdim                                                    LIBitWidth));
767207618Srdivacky
768207618Srdivacky  // If the result is an integer, this is a trunc or bitcast.
769207618Srdivacky  if (ToType->isIntegerTy()) {
770207618Srdivacky    // Should be done.
771207618Srdivacky  } else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) {
772207618Srdivacky    // Just do a bitcast, we know the sizes match up.
773226890Sdim    FromVal = Builder.CreateBitCast(FromVal, ToType);
774207618Srdivacky  } else {
775207618Srdivacky    // Otherwise must be a pointer.
776226890Sdim    FromVal = Builder.CreateIntToPtr(FromVal, ToType);
777207618Srdivacky  }
778207618Srdivacky  assert(FromVal->getType() == ToType && "Didn't convert right?");
779207618Srdivacky  return FromVal;
780207618Srdivacky}
781207618Srdivacky
782207618Srdivacky/// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer
783207618Srdivacky/// or vector value "Old" at the offset specified by Offset.
784207618Srdivacky///
785207618Srdivacky/// This happens when we are converting an "integer union" to a
786207618Srdivacky/// single integer scalar, or when we are converting a "vector union" to a
787207618Srdivacky/// vector with insert/extractelement instructions.
788207618Srdivacky///
789207618Srdivacky/// Offset is an offset from the original alloca, in bits that need to be
790207618Srdivacky/// shifted to the right.
791207618SrdivackyValue *ConvertToScalarInfo::
792207618SrdivackyConvertScalar_InsertValue(Value *SV, Value *Old,
793207618Srdivacky                          uint64_t Offset, IRBuilder<> &Builder) {
794207618Srdivacky  // Convert the stored type to the actual type, shift it left to insert
795207618Srdivacky  // then 'or' into place.
796226890Sdim  Type *AllocaType = Old->getType();
797207618Srdivacky  LLVMContext &Context = Old->getContext();
798207618Srdivacky
799226890Sdim  if (VectorType *VTy = dyn_cast<VectorType>(AllocaType)) {
800207618Srdivacky    uint64_t VecSize = TD.getTypeAllocSizeInBits(VTy);
801207618Srdivacky    uint64_t ValSize = TD.getTypeAllocSizeInBits(SV->getType());
802218893Sdim
803207618Srdivacky    // Changing the whole vector with memset or with an access of a different
804207618Srdivacky    // vector type?
805226890Sdim    if (ValSize == VecSize)
806226890Sdim        return Builder.CreateBitCast(SV, AllocaType);
807207618Srdivacky
808207618Srdivacky    // Must be an element insertion.
809221345Sdim    assert(SV->getType() == VTy->getElementType());
810221345Sdim    uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType());
811207618Srdivacky    unsigned Elt = Offset/EltSize;
812226890Sdim    return Builder.CreateInsertElement(Old, SV, Builder.getInt32(Elt));
813207618Srdivacky  }
814218893Sdim
815207618Srdivacky  // If SV is a first-class aggregate value, insert each value recursively.
816226890Sdim  if (StructType *ST = dyn_cast<StructType>(SV->getType())) {
817207618Srdivacky    const StructLayout &Layout = *TD.getStructLayout(ST);
818207618Srdivacky    for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
819226890Sdim      Value *Elt = Builder.CreateExtractValue(SV, i);
820218893Sdim      Old = ConvertScalar_InsertValue(Elt, Old,
821207618Srdivacky                                      Offset+Layout.getElementOffsetInBits(i),
822207618Srdivacky                                      Builder);
823207618Srdivacky    }
824207618Srdivacky    return Old;
825207618Srdivacky  }
826218893Sdim
827226890Sdim  if (ArrayType *AT = dyn_cast<ArrayType>(SV->getType())) {
828207618Srdivacky    uint64_t EltSize = TD.getTypeAllocSizeInBits(AT->getElementType());
829207618Srdivacky    for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
830226890Sdim      Value *Elt = Builder.CreateExtractValue(SV, i);
831207618Srdivacky      Old = ConvertScalar_InsertValue(Elt, Old, Offset+i*EltSize, Builder);
832207618Srdivacky    }
833207618Srdivacky    return Old;
834207618Srdivacky  }
835207618Srdivacky
836207618Srdivacky  // If SV is a float, convert it to the appropriate integer type.
837207618Srdivacky  // If it is a pointer, do the same.
838207618Srdivacky  unsigned SrcWidth = TD.getTypeSizeInBits(SV->getType());
839207618Srdivacky  unsigned DestWidth = TD.getTypeSizeInBits(AllocaType);
840207618Srdivacky  unsigned SrcStoreWidth = TD.getTypeStoreSizeInBits(SV->getType());
841207618Srdivacky  unsigned DestStoreWidth = TD.getTypeStoreSizeInBits(AllocaType);
842207618Srdivacky  if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy())
843226890Sdim    SV = Builder.CreateBitCast(SV, IntegerType::get(SV->getContext(),SrcWidth));
844207618Srdivacky  else if (SV->getType()->isPointerTy())
845226890Sdim    SV = Builder.CreatePtrToInt(SV, TD.getIntPtrType(SV->getContext()));
846207618Srdivacky
847207618Srdivacky  // Zero extend or truncate the value if needed.
848207618Srdivacky  if (SV->getType() != AllocaType) {
849207618Srdivacky    if (SV->getType()->getPrimitiveSizeInBits() <
850207618Srdivacky             AllocaType->getPrimitiveSizeInBits())
851226890Sdim      SV = Builder.CreateZExt(SV, AllocaType);
852207618Srdivacky    else {
853207618Srdivacky      // Truncation may be needed if storing more than the alloca can hold
854207618Srdivacky      // (undefined behavior).
855226890Sdim      SV = Builder.CreateTrunc(SV, AllocaType);
856207618Srdivacky      SrcWidth = DestWidth;
857207618Srdivacky      SrcStoreWidth = DestStoreWidth;
858207618Srdivacky    }
859207618Srdivacky  }
860207618Srdivacky
861207618Srdivacky  // If this is a big-endian system and the store is narrower than the
862207618Srdivacky  // full alloca type, we need to do a shift to get the right bits.
863207618Srdivacky  int ShAmt = 0;
864207618Srdivacky  if (TD.isBigEndian()) {
865207618Srdivacky    // On big-endian machines, the lowest bit is stored at the bit offset
866207618Srdivacky    // from the pointer given by getTypeStoreSizeInBits.  This matters for
867207618Srdivacky    // integers with a bitwidth that is not a multiple of 8.
868207618Srdivacky    ShAmt = DestStoreWidth - SrcStoreWidth - Offset;
869207618Srdivacky  } else {
870207618Srdivacky    ShAmt = Offset;
871207618Srdivacky  }
872207618Srdivacky
873207618Srdivacky  // Note: we support negative bitwidths (with shr) which are not defined.
874207618Srdivacky  // We do this to support (f.e.) stores off the end of a structure where
875207618Srdivacky  // only some bits in the structure are set.
876207618Srdivacky  APInt Mask(APInt::getLowBitsSet(DestWidth, SrcWidth));
877207618Srdivacky  if (ShAmt > 0 && (unsigned)ShAmt < DestWidth) {
878226890Sdim    SV = Builder.CreateShl(SV, ConstantInt::get(SV->getType(), ShAmt));
879207618Srdivacky    Mask <<= ShAmt;
880207618Srdivacky  } else if (ShAmt < 0 && (unsigned)-ShAmt < DestWidth) {
881226890Sdim    SV = Builder.CreateLShr(SV, ConstantInt::get(SV->getType(), -ShAmt));
882207618Srdivacky    Mask = Mask.lshr(-ShAmt);
883207618Srdivacky  }
884207618Srdivacky
885207618Srdivacky  // Mask out the bits we are about to insert from the old value, and or
886207618Srdivacky  // in the new bits.
887207618Srdivacky  if (SrcWidth != DestWidth) {
888207618Srdivacky    assert(DestWidth > SrcWidth);
889207618Srdivacky    Old = Builder.CreateAnd(Old, ConstantInt::get(Context, ~Mask), "mask");
890207618Srdivacky    SV = Builder.CreateOr(Old, SV, "ins");
891207618Srdivacky  }
892207618Srdivacky  return SV;
893207618Srdivacky}
894207618Srdivacky
895207618Srdivacky
896207618Srdivacky//===----------------------------------------------------------------------===//
897207618Srdivacky// SRoA Driver
898207618Srdivacky//===----------------------------------------------------------------------===//
899207618Srdivacky
900207618Srdivacky
901193323Sedbool SROA::runOnFunction(Function &F) {
902198090Srdivacky  TD = getAnalysisIfAvailable<TargetData>();
903198090Srdivacky
904193323Sed  bool Changed = performPromotion(F);
905198090Srdivacky
906198090Srdivacky  // FIXME: ScalarRepl currently depends on TargetData more than it
907198090Srdivacky  // theoretically needs to. It should be refactored in order to support
908198090Srdivacky  // target-independent IR. Until this is done, just skip the actual
909198090Srdivacky  // scalar-replacement portion of this pass.
910198090Srdivacky  if (!TD) return Changed;
911198090Srdivacky
912193323Sed  while (1) {
913193323Sed    bool LocalChange = performScalarRepl(F);
914193323Sed    if (!LocalChange) break;   // No need to repromote if no scalarrepl
915193323Sed    Changed = true;
916193323Sed    LocalChange = performPromotion(F);
917193323Sed    if (!LocalChange) break;   // No need to re-scalarrepl if no promotion
918193323Sed  }
919193323Sed
920193323Sed  return Changed;
921193323Sed}
922193323Sed
923218893Sdimnamespace {
924218893Sdimclass AllocaPromoter : public LoadAndStorePromoter {
925218893Sdim  AllocaInst *AI;
926224145Sdim  DIBuilder *DIB;
927224145Sdim  SmallVector<DbgDeclareInst *, 4> DDIs;
928224145Sdim  SmallVector<DbgValueInst *, 4> DVIs;
929218893Sdimpublic:
930223017Sdim  AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S,
931224145Sdim                 DIBuilder *DB)
932224145Sdim    : LoadAndStorePromoter(Insts, S), AI(0), DIB(DB) {}
933218893Sdim
934218893Sdim  void run(AllocaInst *AI, const SmallVectorImpl<Instruction*> &Insts) {
935218893Sdim    // Remember which alloca we're promoting (for isInstInList).
936218893Sdim    this->AI = AI;
937224145Sdim    if (MDNode *DebugNode = MDNode::getIfExists(AI->getContext(), AI))
938224145Sdim      for (Value::use_iterator UI = DebugNode->use_begin(),
939224145Sdim             E = DebugNode->use_end(); UI != E; ++UI)
940224145Sdim        if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI))
941224145Sdim          DDIs.push_back(DDI);
942224145Sdim        else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(*UI))
943224145Sdim          DVIs.push_back(DVI);
944224145Sdim
945218893Sdim    LoadAndStorePromoter::run(Insts);
946218893Sdim    AI->eraseFromParent();
947224145Sdim    for (SmallVector<DbgDeclareInst *, 4>::iterator I = DDIs.begin(),
948224145Sdim           E = DDIs.end(); I != E; ++I) {
949224145Sdim      DbgDeclareInst *DDI = *I;
950224145Sdim      DDI->eraseFromParent();
951224145Sdim    }
952224145Sdim    for (SmallVector<DbgValueInst *, 4>::iterator I = DVIs.begin(),
953224145Sdim           E = DVIs.end(); I != E; ++I) {
954224145Sdim      DbgValueInst *DVI = *I;
955224145Sdim      DVI->eraseFromParent();
956224145Sdim    }
957218893Sdim  }
958218893Sdim
959218893Sdim  virtual bool isInstInList(Instruction *I,
960218893Sdim                            const SmallVectorImpl<Instruction*> &Insts) const {
961218893Sdim    if (LoadInst *LI = dyn_cast<LoadInst>(I))
962218893Sdim      return LI->getOperand(0) == AI;
963218893Sdim    return cast<StoreInst>(I)->getPointerOperand() == AI;
964218893Sdim  }
965224145Sdim
966224145Sdim  virtual void updateDebugInfo(Instruction *Inst) const {
967224145Sdim    for (SmallVector<DbgDeclareInst *, 4>::const_iterator I = DDIs.begin(),
968224145Sdim           E = DDIs.end(); I != E; ++I) {
969224145Sdim      DbgDeclareInst *DDI = *I;
970224145Sdim      if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
971224145Sdim        ConvertDebugDeclareToDebugValue(DDI, SI, *DIB);
972224145Sdim      else if (LoadInst *LI = dyn_cast<LoadInst>(Inst))
973224145Sdim        ConvertDebugDeclareToDebugValue(DDI, LI, *DIB);
974224145Sdim    }
975224145Sdim    for (SmallVector<DbgValueInst *, 4>::const_iterator I = DVIs.begin(),
976224145Sdim           E = DVIs.end(); I != E; ++I) {
977224145Sdim      DbgValueInst *DVI = *I;
978224145Sdim      if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
979224145Sdim        Instruction *DbgVal = NULL;
980224145Sdim        // If an argument is zero extended then use argument directly. The ZExt
981224145Sdim        // may be zapped by an optimization pass in future.
982224145Sdim        Argument *ExtendedArg = NULL;
983224145Sdim        if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0)))
984224145Sdim          ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0));
985224145Sdim        if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
986224145Sdim          ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0));
987224145Sdim        if (ExtendedArg)
988224145Sdim          DbgVal = DIB->insertDbgValueIntrinsic(ExtendedArg, 0,
989224145Sdim                                                DIVariable(DVI->getVariable()),
990224145Sdim                                                SI);
991224145Sdim        else
992224145Sdim          DbgVal = DIB->insertDbgValueIntrinsic(SI->getOperand(0), 0,
993224145Sdim                                                DIVariable(DVI->getVariable()),
994224145Sdim                                                SI);
995224145Sdim        DbgVal->setDebugLoc(DVI->getDebugLoc());
996224145Sdim      } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
997224145Sdim        Instruction *DbgVal =
998224145Sdim          DIB->insertDbgValueIntrinsic(LI->getOperand(0), 0,
999224145Sdim                                       DIVariable(DVI->getVariable()), LI);
1000224145Sdim        DbgVal->setDebugLoc(DVI->getDebugLoc());
1001224145Sdim      }
1002224145Sdim    }
1003224145Sdim  }
1004218893Sdim};
1005218893Sdim} // end anon namespace
1006193323Sed
1007218893Sdim/// isSafeSelectToSpeculate - Select instructions that use an alloca and are
1008218893Sdim/// subsequently loaded can be rewritten to load both input pointers and then
1009218893Sdim/// select between the result, allowing the load of the alloca to be promoted.
1010218893Sdim/// From this:
1011218893Sdim///   %P2 = select i1 %cond, i32* %Alloca, i32* %Other
1012218893Sdim///   %V = load i32* %P2
1013218893Sdim/// to:
1014218893Sdim///   %V1 = load i32* %Alloca      -> will be mem2reg'd
1015218893Sdim///   %V2 = load i32* %Other
1016218893Sdim///   %V = select i1 %cond, i32 %V1, i32 %V2
1017218893Sdim///
1018218893Sdim/// We can do this to a select if its only uses are loads and if the operand to
1019218893Sdim/// the select can be loaded unconditionally.
1020218893Sdimstatic bool isSafeSelectToSpeculate(SelectInst *SI, const TargetData *TD) {
1021218893Sdim  bool TDerefable = SI->getTrueValue()->isDereferenceablePointer();
1022218893Sdim  bool FDerefable = SI->getFalseValue()->isDereferenceablePointer();
1023218893Sdim
1024218893Sdim  for (Value::use_iterator UI = SI->use_begin(), UE = SI->use_end();
1025218893Sdim       UI != UE; ++UI) {
1026218893Sdim    LoadInst *LI = dyn_cast<LoadInst>(*UI);
1027226890Sdim    if (LI == 0 || !LI->isSimple()) return false;
1028218893Sdim
1029218893Sdim    // Both operands to the select need to be dereferencable, either absolutely
1030218893Sdim    // (e.g. allocas) or at this point because we can see other accesses to it.
1031218893Sdim    if (!TDerefable && !isSafeToLoadUnconditionally(SI->getTrueValue(), LI,
1032218893Sdim                                                    LI->getAlignment(), TD))
1033218893Sdim      return false;
1034218893Sdim    if (!FDerefable && !isSafeToLoadUnconditionally(SI->getFalseValue(), LI,
1035218893Sdim                                                    LI->getAlignment(), TD))
1036218893Sdim      return false;
1037218893Sdim  }
1038218893Sdim
1039218893Sdim  return true;
1040218893Sdim}
1041218893Sdim
1042218893Sdim/// isSafePHIToSpeculate - PHI instructions that use an alloca and are
1043218893Sdim/// subsequently loaded can be rewritten to load both input pointers in the pred
1044218893Sdim/// blocks and then PHI the results, allowing the load of the alloca to be
1045218893Sdim/// promoted.
1046218893Sdim/// From this:
1047218893Sdim///   %P2 = phi [i32* %Alloca, i32* %Other]
1048218893Sdim///   %V = load i32* %P2
1049218893Sdim/// to:
1050218893Sdim///   %V1 = load i32* %Alloca      -> will be mem2reg'd
1051218893Sdim///   ...
1052218893Sdim///   %V2 = load i32* %Other
1053218893Sdim///   ...
1054218893Sdim///   %V = phi [i32 %V1, i32 %V2]
1055218893Sdim///
1056218893Sdim/// We can do this to a select if its only uses are loads and if the operand to
1057218893Sdim/// the select can be loaded unconditionally.
1058218893Sdimstatic bool isSafePHIToSpeculate(PHINode *PN, const TargetData *TD) {
1059218893Sdim  // For now, we can only do this promotion if the load is in the same block as
1060218893Sdim  // the PHI, and if there are no stores between the phi and load.
1061218893Sdim  // TODO: Allow recursive phi users.
1062218893Sdim  // TODO: Allow stores.
1063218893Sdim  BasicBlock *BB = PN->getParent();
1064218893Sdim  unsigned MaxAlign = 0;
1065218893Sdim  for (Value::use_iterator UI = PN->use_begin(), UE = PN->use_end();
1066218893Sdim       UI != UE; ++UI) {
1067218893Sdim    LoadInst *LI = dyn_cast<LoadInst>(*UI);
1068226890Sdim    if (LI == 0 || !LI->isSimple()) return false;
1069218893Sdim
1070218893Sdim    // For now we only allow loads in the same block as the PHI.  This is a
1071218893Sdim    // common case that happens when instcombine merges two loads through a PHI.
1072218893Sdim    if (LI->getParent() != BB) return false;
1073218893Sdim
1074218893Sdim    // Ensure that there are no instructions between the PHI and the load that
1075218893Sdim    // could store.
1076218893Sdim    for (BasicBlock::iterator BBI = PN; &*BBI != LI; ++BBI)
1077218893Sdim      if (BBI->mayWriteToMemory())
1078218893Sdim        return false;
1079218893Sdim
1080218893Sdim    MaxAlign = std::max(MaxAlign, LI->getAlignment());
1081218893Sdim  }
1082218893Sdim
1083218893Sdim  // Okay, we know that we have one or more loads in the same block as the PHI.
1084218893Sdim  // We can transform this if it is safe to push the loads into the predecessor
1085218893Sdim  // blocks.  The only thing to watch out for is that we can't put a possibly
1086218893Sdim  // trapping load in the predecessor if it is a critical edge.
1087218893Sdim  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
1088218893Sdim    BasicBlock *Pred = PN->getIncomingBlock(i);
1089226890Sdim    Value *InVal = PN->getIncomingValue(i);
1090218893Sdim
1091226890Sdim    // If the terminator of the predecessor has side-effects (an invoke),
1092226890Sdim    // there is no safe place to put a load in the predecessor.
1093226890Sdim    if (Pred->getTerminator()->mayHaveSideEffects())
1094226890Sdim      return false;
1095226890Sdim
1096226890Sdim    // If the value is produced by the terminator of the predecessor
1097226890Sdim    // (an invoke), there is no valid place to put a load in the predecessor.
1098226890Sdim    if (Pred->getTerminator() == InVal)
1099226890Sdim      return false;
1100226890Sdim
1101218893Sdim    // If the predecessor has a single successor, then the edge isn't critical.
1102218893Sdim    if (Pred->getTerminator()->getNumSuccessors() == 1)
1103218893Sdim      continue;
1104218893Sdim
1105218893Sdim    // If this pointer is always safe to load, or if we can prove that there is
1106218893Sdim    // already a load in the block, then we can move the load to the pred block.
1107218893Sdim    if (InVal->isDereferenceablePointer() ||
1108218893Sdim        isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign, TD))
1109218893Sdim      continue;
1110218893Sdim
1111218893Sdim    return false;
1112218893Sdim  }
1113218893Sdim
1114218893Sdim  return true;
1115218893Sdim}
1116218893Sdim
1117218893Sdim
1118218893Sdim/// tryToMakeAllocaBePromotable - This returns true if the alloca only has
1119218893Sdim/// direct (non-volatile) loads and stores to it.  If the alloca is close but
1120218893Sdim/// not quite there, this will transform the code to allow promotion.  As such,
1121218893Sdim/// it is a non-pure predicate.
1122218893Sdimstatic bool tryToMakeAllocaBePromotable(AllocaInst *AI, const TargetData *TD) {
1123218893Sdim  SetVector<Instruction*, SmallVector<Instruction*, 4>,
1124218893Sdim            SmallPtrSet<Instruction*, 4> > InstsToRewrite;
1125218893Sdim
1126218893Sdim  for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end();
1127218893Sdim       UI != UE; ++UI) {
1128218893Sdim    User *U = *UI;
1129218893Sdim    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
1130226890Sdim      if (!LI->isSimple())
1131218893Sdim        return false;
1132218893Sdim      continue;
1133218893Sdim    }
1134218893Sdim
1135218893Sdim    if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
1136226890Sdim      if (SI->getOperand(0) == AI || !SI->isSimple())
1137218893Sdim        return false;   // Don't allow a store OF the AI, only INTO the AI.
1138218893Sdim      continue;
1139218893Sdim    }
1140218893Sdim
1141218893Sdim    if (SelectInst *SI = dyn_cast<SelectInst>(U)) {
1142218893Sdim      // If the condition being selected on is a constant, fold the select, yes
1143218893Sdim      // this does (rarely) happen early on.
1144218893Sdim      if (ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition())) {
1145218893Sdim        Value *Result = SI->getOperand(1+CI->isZero());
1146218893Sdim        SI->replaceAllUsesWith(Result);
1147218893Sdim        SI->eraseFromParent();
1148218893Sdim
1149218893Sdim        // This is very rare and we just scrambled the use list of AI, start
1150218893Sdim        // over completely.
1151218893Sdim        return tryToMakeAllocaBePromotable(AI, TD);
1152218893Sdim      }
1153218893Sdim
1154218893Sdim      // If it is safe to turn "load (select c, AI, ptr)" into a select of two
1155218893Sdim      // loads, then we can transform this by rewriting the select.
1156218893Sdim      if (!isSafeSelectToSpeculate(SI, TD))
1157218893Sdim        return false;
1158218893Sdim
1159218893Sdim      InstsToRewrite.insert(SI);
1160218893Sdim      continue;
1161218893Sdim    }
1162218893Sdim
1163218893Sdim    if (PHINode *PN = dyn_cast<PHINode>(U)) {
1164218893Sdim      if (PN->use_empty()) {  // Dead PHIs can be stripped.
1165218893Sdim        InstsToRewrite.insert(PN);
1166218893Sdim        continue;
1167218893Sdim      }
1168218893Sdim
1169218893Sdim      // If it is safe to turn "load (phi [AI, ptr, ...])" into a PHI of loads
1170218893Sdim      // in the pred blocks, then we can transform this by rewriting the PHI.
1171218893Sdim      if (!isSafePHIToSpeculate(PN, TD))
1172218893Sdim        return false;
1173218893Sdim
1174218893Sdim      InstsToRewrite.insert(PN);
1175218893Sdim      continue;
1176218893Sdim    }
1177218893Sdim
1178226890Sdim    if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
1179226890Sdim      if (onlyUsedByLifetimeMarkers(BCI)) {
1180226890Sdim        InstsToRewrite.insert(BCI);
1181226890Sdim        continue;
1182226890Sdim      }
1183226890Sdim    }
1184226890Sdim
1185218893Sdim    return false;
1186218893Sdim  }
1187218893Sdim
1188218893Sdim  // If there are no instructions to rewrite, then all uses are load/stores and
1189218893Sdim  // we're done!
1190218893Sdim  if (InstsToRewrite.empty())
1191218893Sdim    return true;
1192218893Sdim
1193218893Sdim  // If we have instructions that need to be rewritten for this to be promotable
1194218893Sdim  // take care of it now.
1195218893Sdim  for (unsigned i = 0, e = InstsToRewrite.size(); i != e; ++i) {
1196226890Sdim    if (BitCastInst *BCI = dyn_cast<BitCastInst>(InstsToRewrite[i])) {
1197226890Sdim      // This could only be a bitcast used by nothing but lifetime intrinsics.
1198226890Sdim      for (BitCastInst::use_iterator I = BCI->use_begin(), E = BCI->use_end();
1199226890Sdim           I != E;) {
1200226890Sdim        Use &U = I.getUse();
1201226890Sdim        ++I;
1202226890Sdim        cast<Instruction>(U.getUser())->eraseFromParent();
1203226890Sdim      }
1204226890Sdim      BCI->eraseFromParent();
1205226890Sdim      continue;
1206226890Sdim    }
1207226890Sdim
1208218893Sdim    if (SelectInst *SI = dyn_cast<SelectInst>(InstsToRewrite[i])) {
1209218893Sdim      // Selects in InstsToRewrite only have load uses.  Rewrite each as two
1210218893Sdim      // loads with a new select.
1211218893Sdim      while (!SI->use_empty()) {
1212218893Sdim        LoadInst *LI = cast<LoadInst>(SI->use_back());
1213218893Sdim
1214218893Sdim        IRBuilder<> Builder(LI);
1215218893Sdim        LoadInst *TrueLoad =
1216218893Sdim          Builder.CreateLoad(SI->getTrueValue(), LI->getName()+".t");
1217218893Sdim        LoadInst *FalseLoad =
1218224145Sdim          Builder.CreateLoad(SI->getFalseValue(), LI->getName()+".f");
1219218893Sdim
1220218893Sdim        // Transfer alignment and TBAA info if present.
1221218893Sdim        TrueLoad->setAlignment(LI->getAlignment());
1222218893Sdim        FalseLoad->setAlignment(LI->getAlignment());
1223218893Sdim        if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) {
1224218893Sdim          TrueLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
1225218893Sdim          FalseLoad->setMetadata(LLVMContext::MD_tbaa, Tag);
1226218893Sdim        }
1227218893Sdim
1228218893Sdim        Value *V = Builder.CreateSelect(SI->getCondition(), TrueLoad, FalseLoad);
1229218893Sdim        V->takeName(LI);
1230218893Sdim        LI->replaceAllUsesWith(V);
1231218893Sdim        LI->eraseFromParent();
1232218893Sdim      }
1233218893Sdim
1234218893Sdim      // Now that all the loads are gone, the select is gone too.
1235218893Sdim      SI->eraseFromParent();
1236218893Sdim      continue;
1237218893Sdim    }
1238218893Sdim
1239218893Sdim    // Otherwise, we have a PHI node which allows us to push the loads into the
1240218893Sdim    // predecessors.
1241218893Sdim    PHINode *PN = cast<PHINode>(InstsToRewrite[i]);
1242218893Sdim    if (PN->use_empty()) {
1243218893Sdim      PN->eraseFromParent();
1244218893Sdim      continue;
1245218893Sdim    }
1246218893Sdim
1247226890Sdim    Type *LoadTy = cast<PointerType>(PN->getType())->getElementType();
1248221345Sdim    PHINode *NewPN = PHINode::Create(LoadTy, PN->getNumIncomingValues(),
1249221345Sdim                                     PN->getName()+".ld", PN);
1250218893Sdim
1251218893Sdim    // Get the TBAA tag and alignment to use from one of the loads.  It doesn't
1252218893Sdim    // matter which one we get and if any differ, it doesn't matter.
1253218893Sdim    LoadInst *SomeLoad = cast<LoadInst>(PN->use_back());
1254218893Sdim    MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa);
1255218893Sdim    unsigned Align = SomeLoad->getAlignment();
1256218893Sdim
1257218893Sdim    // Rewrite all loads of the PN to use the new PHI.
1258218893Sdim    while (!PN->use_empty()) {
1259218893Sdim      LoadInst *LI = cast<LoadInst>(PN->use_back());
1260218893Sdim      LI->replaceAllUsesWith(NewPN);
1261218893Sdim      LI->eraseFromParent();
1262218893Sdim    }
1263218893Sdim
1264218893Sdim    // Inject loads into all of the pred blocks.  Keep track of which blocks we
1265218893Sdim    // insert them into in case we have multiple edges from the same block.
1266218893Sdim    DenseMap<BasicBlock*, LoadInst*> InsertedLoads;
1267218893Sdim
1268218893Sdim    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
1269218893Sdim      BasicBlock *Pred = PN->getIncomingBlock(i);
1270218893Sdim      LoadInst *&Load = InsertedLoads[Pred];
1271218893Sdim      if (Load == 0) {
1272218893Sdim        Load = new LoadInst(PN->getIncomingValue(i),
1273218893Sdim                            PN->getName() + "." + Pred->getName(),
1274218893Sdim                            Pred->getTerminator());
1275218893Sdim        Load->setAlignment(Align);
1276218893Sdim        if (TBAATag) Load->setMetadata(LLVMContext::MD_tbaa, TBAATag);
1277218893Sdim      }
1278218893Sdim
1279218893Sdim      NewPN->addIncoming(Load, Pred);
1280218893Sdim    }
1281218893Sdim
1282218893Sdim    PN->eraseFromParent();
1283218893Sdim  }
1284218893Sdim
1285218893Sdim  ++NumAdjusted;
1286218893Sdim  return true;
1287218893Sdim}
1288218893Sdim
1289193323Sedbool SROA::performPromotion(Function &F) {
1290193323Sed  std::vector<AllocaInst*> Allocas;
1291218893Sdim  DominatorTree *DT = 0;
1292218893Sdim  if (HasDomTree)
1293218893Sdim    DT = &getAnalysis<DominatorTree>();
1294193323Sed
1295193323Sed  BasicBlock &BB = F.getEntryBlock();  // Get the entry node for the function
1296224145Sdim  DIBuilder DIB(*F.getParent());
1297193323Sed  bool Changed = false;
1298218893Sdim  SmallVector<Instruction*, 64> Insts;
1299193323Sed  while (1) {
1300193323Sed    Allocas.clear();
1301193323Sed
1302193323Sed    // Find allocas that are safe to promote, by looking at all instructions in
1303193323Sed    // the entry node
1304193323Sed    for (BasicBlock::iterator I = BB.begin(), E = --BB.end(); I != E; ++I)
1305193323Sed      if (AllocaInst *AI = dyn_cast<AllocaInst>(I))       // Is it an alloca?
1306218893Sdim        if (tryToMakeAllocaBePromotable(AI, TD))
1307193323Sed          Allocas.push_back(AI);
1308193323Sed
1309193323Sed    if (Allocas.empty()) break;
1310193323Sed
1311218893Sdim    if (HasDomTree)
1312218893Sdim      PromoteMemToReg(Allocas, *DT);
1313218893Sdim    else {
1314218893Sdim      SSAUpdater SSA;
1315218893Sdim      for (unsigned i = 0, e = Allocas.size(); i != e; ++i) {
1316218893Sdim        AllocaInst *AI = Allocas[i];
1317218893Sdim
1318218893Sdim        // Build list of instructions to promote.
1319218893Sdim        for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end();
1320218893Sdim             UI != E; ++UI)
1321218893Sdim          Insts.push_back(cast<Instruction>(*UI));
1322224145Sdim        AllocaPromoter(Insts, SSA, &DIB).run(AI, Insts);
1323218893Sdim        Insts.clear();
1324218893Sdim      }
1325218893Sdim    }
1326193323Sed    NumPromoted += Allocas.size();
1327193323Sed    Changed = true;
1328193323Sed  }
1329193323Sed
1330193323Sed  return Changed;
1331193323Sed}
1332193323Sed
1333207618Srdivacky
1334203954Srdivacky/// ShouldAttemptScalarRepl - Decide if an alloca is a good candidate for
1335203954Srdivacky/// SROA.  It must be a struct or array type with a small number of elements.
1336203954Srdivackystatic bool ShouldAttemptScalarRepl(AllocaInst *AI) {
1337226890Sdim  Type *T = AI->getAllocatedType();
1338203954Srdivacky  // Do not promote any struct into more than 32 separate vars.
1339226890Sdim  if (StructType *ST = dyn_cast<StructType>(T))
1340203954Srdivacky    return ST->getNumElements() <= 32;
1341203954Srdivacky  // Arrays are much less likely to be safe for SROA; only consider
1342203954Srdivacky  // them if they are very small.
1343226890Sdim  if (ArrayType *AT = dyn_cast<ArrayType>(T))
1344203954Srdivacky    return AT->getNumElements() <= 8;
1345203954Srdivacky  return false;
1346193323Sed}
1347193323Sed
1348207618Srdivacky
1349193323Sed// performScalarRepl - This algorithm is a simple worklist driven algorithm,
1350224145Sdim// which runs on all of the alloca instructions in the function, removing them
1351224145Sdim// if they are only used by getelementptr instructions.
1352193323Sed//
1353193323Sedbool SROA::performScalarRepl(Function &F) {
1354198892Srdivacky  std::vector<AllocaInst*> WorkList;
1355193323Sed
1356207618Srdivacky  // Scan the entry basic block, adding allocas to the worklist.
1357193323Sed  BasicBlock &BB = F.getEntryBlock();
1358193323Sed  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I)
1359198892Srdivacky    if (AllocaInst *A = dyn_cast<AllocaInst>(I))
1360193323Sed      WorkList.push_back(A);
1361193323Sed
1362193323Sed  // Process the worklist
1363193323Sed  bool Changed = false;
1364193323Sed  while (!WorkList.empty()) {
1365198892Srdivacky    AllocaInst *AI = WorkList.back();
1366193323Sed    WorkList.pop_back();
1367218893Sdim
1368193323Sed    // Handle dead allocas trivially.  These can be formed by SROA'ing arrays
1369193323Sed    // with unused elements.
1370193323Sed    if (AI->use_empty()) {
1371193323Sed      AI->eraseFromParent();
1372207618Srdivacky      Changed = true;
1373193323Sed      continue;
1374193323Sed    }
1375193323Sed
1376193323Sed    // If this alloca is impossible for us to promote, reject it early.
1377193323Sed    if (AI->isArrayAllocation() || !AI->getAllocatedType()->isSized())
1378193323Sed      continue;
1379218893Sdim
1380193323Sed    // Check to see if this allocation is only modified by a memcpy/memmove from
1381193323Sed    // a constant global.  If this is the case, we can change all users to use
1382193323Sed    // the constant global instead.  This is commonly produced by the CFE by
1383193323Sed    // constructs like "void foo() { int A[] = {1,2,3,4,5,6,7,8,9...}; }" if 'A'
1384193323Sed    // is only subsequently read.
1385224145Sdim    SmallVector<Instruction *, 4> ToDelete;
1386224145Sdim    if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(AI, ToDelete)) {
1387202375Srdivacky      DEBUG(dbgs() << "Found alloca equal to global: " << *AI << '\n');
1388224145Sdim      DEBUG(dbgs() << "  memcpy = " << *Copy << '\n');
1389224145Sdim      for (unsigned i = 0, e = ToDelete.size(); i != e; ++i)
1390224145Sdim        ToDelete[i]->eraseFromParent();
1391224145Sdim      Constant *TheSrc = cast<Constant>(Copy->getSource());
1392198090Srdivacky      AI->replaceAllUsesWith(ConstantExpr::getBitCast(TheSrc, AI->getType()));
1393224145Sdim      Copy->eraseFromParent();  // Don't mutate the global.
1394193323Sed      AI->eraseFromParent();
1395193323Sed      ++NumGlobals;
1396193323Sed      Changed = true;
1397193323Sed      continue;
1398193323Sed    }
1399218893Sdim
1400193323Sed    // Check to see if we can perform the core SROA transformation.  We cannot
1401193323Sed    // transform the allocation instruction if it is an array allocation
1402193323Sed    // (allocations OF arrays are ok though), and an allocation of a scalar
1403193323Sed    // value cannot be decomposed at all.
1404193323Sed    uint64_t AllocaSize = TD->getTypeAllocSize(AI->getAllocatedType());
1405193323Sed
1406198090Srdivacky    // Do not promote [0 x %struct].
1407198090Srdivacky    if (AllocaSize == 0) continue;
1408218893Sdim
1409207618Srdivacky    // Do not promote any struct whose size is too big.
1410207618Srdivacky    if (AllocaSize > SRThreshold) continue;
1411218893Sdim
1412203954Srdivacky    // If the alloca looks like a good candidate for scalar replacement, and if
1413203954Srdivacky    // all its users can be transformed, then split up the aggregate into its
1414203954Srdivacky    // separate elements.
1415203954Srdivacky    if (ShouldAttemptScalarRepl(AI) && isSafeAllocaToScalarRepl(AI)) {
1416203954Srdivacky      DoScalarReplacement(AI, WorkList);
1417203954Srdivacky      Changed = true;
1418203954Srdivacky      continue;
1419203954Srdivacky    }
1420203954Srdivacky
1421193323Sed    // If we can turn this aggregate value (potentially with casts) into a
1422193323Sed    // simple scalar value that can be mem2reg'd into a register value.
1423193323Sed    // IsNotTrivial tracks whether this is something that mem2reg could have
1424193323Sed    // promoted itself.  If so, we don't want to transform it needlessly.  Note
1425193323Sed    // that we can't just check based on the type: the alloca may be of an i32
1426193323Sed    // but that has pointer arithmetic to set byte 3 of it or something.
1427207618Srdivacky    if (AllocaInst *NewAI =
1428207618Srdivacky          ConvertToScalarInfo((unsigned)AllocaSize, *TD).TryConvert(AI)) {
1429193323Sed      NewAI->takeName(AI);
1430193323Sed      AI->eraseFromParent();
1431193323Sed      ++NumConverted;
1432193323Sed      Changed = true;
1433193323Sed      continue;
1434218893Sdim    }
1435218893Sdim
1436193323Sed    // Otherwise, couldn't process this alloca.
1437193323Sed  }
1438193323Sed
1439193323Sed  return Changed;
1440193323Sed}
1441193323Sed
1442193323Sed/// DoScalarReplacement - This alloca satisfied the isSafeAllocaToScalarRepl
1443193323Sed/// predicate, do SROA now.
1444218893Sdimvoid SROA::DoScalarReplacement(AllocaInst *AI,
1445198892Srdivacky                               std::vector<AllocaInst*> &WorkList) {
1446202375Srdivacky  DEBUG(dbgs() << "Found inst to SROA: " << *AI << '\n');
1447193323Sed  SmallVector<AllocaInst*, 32> ElementAllocas;
1448226890Sdim  if (StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
1449193323Sed    ElementAllocas.reserve(ST->getNumContainedTypes());
1450193323Sed    for (unsigned i = 0, e = ST->getNumContainedTypes(); i != e; ++i) {
1451218893Sdim      AllocaInst *NA = new AllocaInst(ST->getContainedType(i), 0,
1452193323Sed                                      AI->getAlignment(),
1453198090Srdivacky                                      AI->getName() + "." + Twine(i), AI);
1454193323Sed      ElementAllocas.push_back(NA);
1455193323Sed      WorkList.push_back(NA);  // Add to worklist for recursive processing
1456193323Sed    }
1457193323Sed  } else {
1458226890Sdim    ArrayType *AT = cast<ArrayType>(AI->getAllocatedType());
1459193323Sed    ElementAllocas.reserve(AT->getNumElements());
1460226890Sdim    Type *ElTy = AT->getElementType();
1461193323Sed    for (unsigned i = 0, e = AT->getNumElements(); i != e; ++i) {
1462193323Sed      AllocaInst *NA = new AllocaInst(ElTy, 0, AI->getAlignment(),
1463198090Srdivacky                                      AI->getName() + "." + Twine(i), AI);
1464193323Sed      ElementAllocas.push_back(NA);
1465193323Sed      WorkList.push_back(NA);  // Add to worklist for recursive processing
1466193323Sed    }
1467193323Sed  }
1468193323Sed
1469201360Srdivacky  // Now that we have created the new alloca instructions, rewrite all the
1470201360Srdivacky  // uses of the old alloca.
1471201360Srdivacky  RewriteForScalarRepl(AI, AI, 0, ElementAllocas);
1472193323Sed
1473201360Srdivacky  // Now erase any instructions that were made dead while rewriting the alloca.
1474201360Srdivacky  DeleteDeadInstructions();
1475201360Srdivacky  AI->eraseFromParent();
1476193323Sed
1477210299Sed  ++NumReplaced;
1478201360Srdivacky}
1479193323Sed
1480201360Srdivacky/// DeleteDeadInstructions - Erase instructions on the DeadInstrs list,
1481201360Srdivacky/// recursively including all their operands that become trivially dead.
1482201360Srdivackyvoid SROA::DeleteDeadInstructions() {
1483201360Srdivacky  while (!DeadInsts.empty()) {
1484201360Srdivacky    Instruction *I = cast<Instruction>(DeadInsts.pop_back_val());
1485193323Sed
1486201360Srdivacky    for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
1487201360Srdivacky      if (Instruction *U = dyn_cast<Instruction>(*OI)) {
1488201360Srdivacky        // Zero out the operand and see if it becomes trivially dead.
1489201360Srdivacky        // (But, don't add allocas to the dead instruction list -- they are
1490201360Srdivacky        // already on the worklist and will be deleted separately.)
1491201360Srdivacky        *OI = 0;
1492201360Srdivacky        if (isInstructionTriviallyDead(U) && !isa<AllocaInst>(U))
1493201360Srdivacky          DeadInsts.push_back(U);
1494201360Srdivacky      }
1495201360Srdivacky
1496201360Srdivacky    I->eraseFromParent();
1497193323Sed  }
1498193323Sed}
1499218893Sdim
1500201360Srdivacky/// isSafeForScalarRepl - Check if instruction I is a safe use with regard to
1501201360Srdivacky/// performing scalar replacement of alloca AI.  The results are flagged in
1502201360Srdivacky/// the Info parameter.  Offset indicates the position within AI that is
1503201360Srdivacky/// referenced by this instruction.
1504218893Sdimvoid SROA::isSafeForScalarRepl(Instruction *I, uint64_t Offset,
1505201360Srdivacky                               AllocaInfo &Info) {
1506201360Srdivacky  for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
1507201360Srdivacky    Instruction *User = cast<Instruction>(*UI);
1508193323Sed
1509201360Srdivacky    if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
1510218893Sdim      isSafeForScalarRepl(BC, Offset, Info);
1511201360Srdivacky    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
1512201360Srdivacky      uint64_t GEPOffset = Offset;
1513218893Sdim      isSafeGEP(GEPI, GEPOffset, Info);
1514201360Srdivacky      if (!Info.isUnsafe)
1515218893Sdim        isSafeForScalarRepl(GEPI, GEPOffset, Info);
1516210299Sed    } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
1517201360Srdivacky      ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
1518218893Sdim      if (Length == 0)
1519218893Sdim        return MarkUnsafe(Info, User);
1520218893Sdim      isSafeMemAccess(Offset, Length->getZExtValue(), 0,
1521218893Sdim                      UI.getOperandNo() == 0, Info, MI,
1522218893Sdim                      true /*AllowWholeAccess*/);
1523201360Srdivacky    } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
1524226890Sdim      if (!LI->isSimple())
1525218893Sdim        return MarkUnsafe(Info, User);
1526226890Sdim      Type *LIType = LI->getType();
1527218893Sdim      isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType),
1528218893Sdim                      LIType, false, Info, LI, true /*AllowWholeAccess*/);
1529218893Sdim      Info.hasALoadOrStore = true;
1530218893Sdim
1531201360Srdivacky    } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
1532193323Sed      // Store is ok if storing INTO the pointer, not storing the pointer
1533226890Sdim      if (!SI->isSimple() || SI->getOperand(0) == I)
1534218893Sdim        return MarkUnsafe(Info, User);
1535218893Sdim
1536226890Sdim      Type *SIType = SI->getOperand(0)->getType();
1537218893Sdim      isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType),
1538218893Sdim                      SIType, true, Info, SI, true /*AllowWholeAccess*/);
1539218893Sdim      Info.hasALoadOrStore = true;
1540226890Sdim    } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
1541226890Sdim      if (II->getIntrinsicID() != Intrinsic::lifetime_start &&
1542226890Sdim          II->getIntrinsicID() != Intrinsic::lifetime_end)
1543226890Sdim        return MarkUnsafe(Info, User);
1544218893Sdim    } else if (isa<PHINode>(User) || isa<SelectInst>(User)) {
1545218893Sdim      isSafePHISelectUseForScalarRepl(User, Offset, Info);
1546201360Srdivacky    } else {
1547218893Sdim      return MarkUnsafe(Info, User);
1548193323Sed    }
1549201360Srdivacky    if (Info.isUnsafe) return;
1550193323Sed  }
1551193323Sed}
1552218893Sdim
1553193323Sed
1554218893Sdim/// isSafePHIUseForScalarRepl - If we see a PHI node or select using a pointer
1555218893Sdim/// derived from the alloca, we can often still split the alloca into elements.
1556218893Sdim/// This is useful if we have a large alloca where one element is phi'd
1557218893Sdim/// together somewhere: we can SRoA and promote all the other elements even if
1558218893Sdim/// we end up not being able to promote this one.
1559218893Sdim///
1560218893Sdim/// All we require is that the uses of the PHI do not index into other parts of
1561218893Sdim/// the alloca.  The most important use case for this is single load and stores
1562218893Sdim/// that are PHI'd together, which can happen due to code sinking.
1563218893Sdimvoid SROA::isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset,
1564218893Sdim                                           AllocaInfo &Info) {
1565218893Sdim  // If we've already checked this PHI, don't do it again.
1566218893Sdim  if (PHINode *PN = dyn_cast<PHINode>(I))
1567218893Sdim    if (!Info.CheckedPHIs.insert(PN))
1568218893Sdim      return;
1569218893Sdim
1570218893Sdim  for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E; ++UI) {
1571218893Sdim    Instruction *User = cast<Instruction>(*UI);
1572218893Sdim
1573218893Sdim    if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
1574218893Sdim      isSafePHISelectUseForScalarRepl(BC, Offset, Info);
1575218893Sdim    } else if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
1576218893Sdim      // Only allow "bitcast" GEPs for simplicity.  We could generalize this,
1577218893Sdim      // but would have to prove that we're staying inside of an element being
1578218893Sdim      // promoted.
1579218893Sdim      if (!GEPI->hasAllZeroIndices())
1580218893Sdim        return MarkUnsafe(Info, User);
1581218893Sdim      isSafePHISelectUseForScalarRepl(GEPI, Offset, Info);
1582218893Sdim    } else if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
1583226890Sdim      if (!LI->isSimple())
1584218893Sdim        return MarkUnsafe(Info, User);
1585226890Sdim      Type *LIType = LI->getType();
1586218893Sdim      isSafeMemAccess(Offset, TD->getTypeAllocSize(LIType),
1587218893Sdim                      LIType, false, Info, LI, false /*AllowWholeAccess*/);
1588218893Sdim      Info.hasALoadOrStore = true;
1589218893Sdim
1590218893Sdim    } else if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
1591218893Sdim      // Store is ok if storing INTO the pointer, not storing the pointer
1592226890Sdim      if (!SI->isSimple() || SI->getOperand(0) == I)
1593218893Sdim        return MarkUnsafe(Info, User);
1594218893Sdim
1595226890Sdim      Type *SIType = SI->getOperand(0)->getType();
1596218893Sdim      isSafeMemAccess(Offset, TD->getTypeAllocSize(SIType),
1597218893Sdim                      SIType, true, Info, SI, false /*AllowWholeAccess*/);
1598218893Sdim      Info.hasALoadOrStore = true;
1599218893Sdim    } else if (isa<PHINode>(User) || isa<SelectInst>(User)) {
1600218893Sdim      isSafePHISelectUseForScalarRepl(User, Offset, Info);
1601218893Sdim    } else {
1602218893Sdim      return MarkUnsafe(Info, User);
1603218893Sdim    }
1604218893Sdim    if (Info.isUnsafe) return;
1605218893Sdim  }
1606218893Sdim}
1607218893Sdim
1608201360Srdivacky/// isSafeGEP - Check if a GEP instruction can be handled for scalar
1609201360Srdivacky/// replacement.  It is safe when all the indices are constant, in-bounds
1610201360Srdivacky/// references, and when the resulting offset corresponds to an element within
1611201360Srdivacky/// the alloca type.  The results are flagged in the Info parameter.  Upon
1612201360Srdivacky/// return, Offset is adjusted as specified by the GEP indices.
1613218893Sdimvoid SROA::isSafeGEP(GetElementPtrInst *GEPI,
1614201360Srdivacky                     uint64_t &Offset, AllocaInfo &Info) {
1615201360Srdivacky  gep_type_iterator GEPIt = gep_type_begin(GEPI), E = gep_type_end(GEPI);
1616201360Srdivacky  if (GEPIt == E)
1617201360Srdivacky    return;
1618193323Sed
1619201360Srdivacky  // Walk through the GEP type indices, checking the types that this indexes
1620201360Srdivacky  // into.
1621201360Srdivacky  for (; GEPIt != E; ++GEPIt) {
1622201360Srdivacky    // Ignore struct elements, no extra checking needed for these.
1623204642Srdivacky    if ((*GEPIt)->isStructTy())
1624201360Srdivacky      continue;
1625193323Sed
1626201360Srdivacky    ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
1627201360Srdivacky    if (!IdxVal)
1628218893Sdim      return MarkUnsafe(Info, GEPI);
1629193323Sed  }
1630193323Sed
1631201360Srdivacky  // Compute the offset due to this GEP and check if the alloca has a
1632201360Srdivacky  // component element at that offset.
1633201360Srdivacky  SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
1634226890Sdim  Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), Indices);
1635218893Sdim  if (!TypeHasComponent(Info.AI->getAllocatedType(), Offset, 0))
1636218893Sdim    MarkUnsafe(Info, GEPI);
1637201360Srdivacky}
1638193323Sed
1639218893Sdim/// isHomogeneousAggregate - Check if type T is a struct or array containing
1640218893Sdim/// elements of the same type (which is always true for arrays).  If so,
1641218893Sdim/// return true with NumElts and EltTy set to the number of elements and the
1642218893Sdim/// element type, respectively.
1643226890Sdimstatic bool isHomogeneousAggregate(Type *T, unsigned &NumElts,
1644226890Sdim                                   Type *&EltTy) {
1645226890Sdim  if (ArrayType *AT = dyn_cast<ArrayType>(T)) {
1646218893Sdim    NumElts = AT->getNumElements();
1647218893Sdim    EltTy = (NumElts == 0 ? 0 : AT->getElementType());
1648218893Sdim    return true;
1649218893Sdim  }
1650226890Sdim  if (StructType *ST = dyn_cast<StructType>(T)) {
1651218893Sdim    NumElts = ST->getNumContainedTypes();
1652218893Sdim    EltTy = (NumElts == 0 ? 0 : ST->getContainedType(0));
1653218893Sdim    for (unsigned n = 1; n < NumElts; ++n) {
1654218893Sdim      if (ST->getContainedType(n) != EltTy)
1655218893Sdim        return false;
1656218893Sdim    }
1657218893Sdim    return true;
1658218893Sdim  }
1659218893Sdim  return false;
1660218893Sdim}
1661218893Sdim
1662218893Sdim/// isCompatibleAggregate - Check if T1 and T2 are either the same type or are
1663218893Sdim/// "homogeneous" aggregates with the same element type and number of elements.
1664226890Sdimstatic bool isCompatibleAggregate(Type *T1, Type *T2) {
1665218893Sdim  if (T1 == T2)
1666218893Sdim    return true;
1667218893Sdim
1668218893Sdim  unsigned NumElts1, NumElts2;
1669226890Sdim  Type *EltTy1, *EltTy2;
1670218893Sdim  if (isHomogeneousAggregate(T1, NumElts1, EltTy1) &&
1671218893Sdim      isHomogeneousAggregate(T2, NumElts2, EltTy2) &&
1672218893Sdim      NumElts1 == NumElts2 &&
1673218893Sdim      EltTy1 == EltTy2)
1674218893Sdim    return true;
1675218893Sdim
1676218893Sdim  return false;
1677218893Sdim}
1678218893Sdim
1679201360Srdivacky/// isSafeMemAccess - Check if a load/store/memcpy operates on the entire AI
1680201360Srdivacky/// alloca or has an offset and size that corresponds to a component element
1681201360Srdivacky/// within it.  The offset checked here may have been formed from a GEP with a
1682201360Srdivacky/// pointer bitcasted to a different type.
1683218893Sdim///
1684218893Sdim/// If AllowWholeAccess is true, then this allows uses of the entire alloca as a
1685218893Sdim/// unit.  If false, it only allows accesses known to be in a single element.
1686218893Sdimvoid SROA::isSafeMemAccess(uint64_t Offset, uint64_t MemSize,
1687226890Sdim                           Type *MemOpType, bool isStore,
1688218893Sdim                           AllocaInfo &Info, Instruction *TheAccess,
1689218893Sdim                           bool AllowWholeAccess) {
1690201360Srdivacky  // Check if this is a load/store of the entire alloca.
1691218893Sdim  if (Offset == 0 && AllowWholeAccess &&
1692218893Sdim      MemSize == TD->getTypeAllocSize(Info.AI->getAllocatedType())) {
1693218893Sdim    // This can be safe for MemIntrinsics (where MemOpType is 0) and integer
1694218893Sdim    // loads/stores (which are essentially the same as the MemIntrinsics with
1695218893Sdim    // regard to copying padding between elements).  But, if an alloca is
1696218893Sdim    // flagged as both a source and destination of such operations, we'll need
1697218893Sdim    // to check later for padding between elements.
1698218893Sdim    if (!MemOpType || MemOpType->isIntegerTy()) {
1699218893Sdim      if (isStore)
1700218893Sdim        Info.isMemCpyDst = true;
1701218893Sdim      else
1702218893Sdim        Info.isMemCpySrc = true;
1703201360Srdivacky      return;
1704193323Sed    }
1705218893Sdim    // This is also safe for references using a type that is compatible with
1706218893Sdim    // the type of the alloca, so that loads/stores can be rewritten using
1707218893Sdim    // insertvalue/extractvalue.
1708218893Sdim    if (isCompatibleAggregate(MemOpType, Info.AI->getAllocatedType())) {
1709218893Sdim      Info.hasSubelementAccess = true;
1710218893Sdim      return;
1711218893Sdim    }
1712193323Sed  }
1713201360Srdivacky  // Check if the offset/size correspond to a component within the alloca type.
1714226890Sdim  Type *T = Info.AI->getAllocatedType();
1715218893Sdim  if (TypeHasComponent(T, Offset, MemSize)) {
1716218893Sdim    Info.hasSubelementAccess = true;
1717201360Srdivacky    return;
1718218893Sdim  }
1719193323Sed
1720218893Sdim  return MarkUnsafe(Info, TheAccess);
1721193323Sed}
1722193323Sed
1723201360Srdivacky/// TypeHasComponent - Return true if T has a component type with the
1724201360Srdivacky/// specified offset and size.  If Size is zero, do not check the size.
1725226890Sdimbool SROA::TypeHasComponent(Type *T, uint64_t Offset, uint64_t Size) {
1726226890Sdim  Type *EltTy;
1727201360Srdivacky  uint64_t EltSize;
1728226890Sdim  if (StructType *ST = dyn_cast<StructType>(T)) {
1729201360Srdivacky    const StructLayout *Layout = TD->getStructLayout(ST);
1730201360Srdivacky    unsigned EltIdx = Layout->getElementContainingOffset(Offset);
1731201360Srdivacky    EltTy = ST->getContainedType(EltIdx);
1732201360Srdivacky    EltSize = TD->getTypeAllocSize(EltTy);
1733201360Srdivacky    Offset -= Layout->getElementOffset(EltIdx);
1734226890Sdim  } else if (ArrayType *AT = dyn_cast<ArrayType>(T)) {
1735201360Srdivacky    EltTy = AT->getElementType();
1736201360Srdivacky    EltSize = TD->getTypeAllocSize(EltTy);
1737201360Srdivacky    if (Offset >= AT->getNumElements() * EltSize)
1738201360Srdivacky      return false;
1739201360Srdivacky    Offset %= EltSize;
1740201360Srdivacky  } else {
1741201360Srdivacky    return false;
1742193323Sed  }
1743201360Srdivacky  if (Offset == 0 && (Size == 0 || EltSize == Size))
1744201360Srdivacky    return true;
1745201360Srdivacky  // Check if the component spans multiple elements.
1746201360Srdivacky  if (Offset + Size > EltSize)
1747201360Srdivacky    return false;
1748201360Srdivacky  return TypeHasComponent(EltTy, Offset, Size);
1749193323Sed}
1750193323Sed
1751201360Srdivacky/// RewriteForScalarRepl - Alloca AI is being split into NewElts, so rewrite
1752201360Srdivacky/// the instruction I, which references it, to use the separate elements.
1753201360Srdivacky/// Offset indicates the position within AI that is referenced by this
1754201360Srdivacky/// instruction.
1755201360Srdivackyvoid SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
1756201360Srdivacky                                SmallVector<AllocaInst*, 32> &NewElts) {
1757218893Sdim  for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI!=E;) {
1758218893Sdim    Use &TheUse = UI.getUse();
1759218893Sdim    Instruction *User = cast<Instruction>(*UI++);
1760193323Sed
1761201360Srdivacky    if (BitCastInst *BC = dyn_cast<BitCastInst>(User)) {
1762201360Srdivacky      RewriteBitCast(BC, AI, Offset, NewElts);
1763218893Sdim      continue;
1764218893Sdim    }
1765218893Sdim
1766218893Sdim    if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(User)) {
1767201360Srdivacky      RewriteGEP(GEPI, AI, Offset, NewElts);
1768218893Sdim      continue;
1769218893Sdim    }
1770218893Sdim
1771218893Sdim    if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(User)) {
1772201360Srdivacky      ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
1773201360Srdivacky      uint64_t MemSize = Length->getZExtValue();
1774201360Srdivacky      if (Offset == 0 &&
1775201360Srdivacky          MemSize == TD->getTypeAllocSize(AI->getAllocatedType()))
1776201360Srdivacky        RewriteMemIntrinUserOfAlloca(MI, I, AI, NewElts);
1777201360Srdivacky      // Otherwise the intrinsic can only touch a single element and the
1778201360Srdivacky      // address operand will be updated, so nothing else needs to be done.
1779218893Sdim      continue;
1780218893Sdim    }
1781226890Sdim
1782226890Sdim    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(User)) {
1783226890Sdim      if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
1784226890Sdim          II->getIntrinsicID() == Intrinsic::lifetime_end) {
1785226890Sdim        RewriteLifetimeIntrinsic(II, AI, Offset, NewElts);
1786226890Sdim      }
1787226890Sdim      continue;
1788226890Sdim    }
1789218893Sdim
1790218893Sdim    if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
1791226890Sdim      Type *LIType = LI->getType();
1792218893Sdim
1793218893Sdim      if (isCompatibleAggregate(LIType, AI->getAllocatedType())) {
1794201360Srdivacky        // Replace:
1795201360Srdivacky        //   %res = load { i32, i32 }* %alloc
1796201360Srdivacky        // with:
1797201360Srdivacky        //   %load.0 = load i32* %alloc.0
1798201360Srdivacky        //   %insert.0 insertvalue { i32, i32 } zeroinitializer, i32 %load.0, 0
1799201360Srdivacky        //   %load.1 = load i32* %alloc.1
1800201360Srdivacky        //   %insert = insertvalue { i32, i32 } %insert.0, i32 %load.1, 1
1801201360Srdivacky        // (Also works for arrays instead of structs)
1802201360Srdivacky        Value *Insert = UndefValue::get(LIType);
1803223017Sdim        IRBuilder<> Builder(LI);
1804201360Srdivacky        for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
1805223017Sdim          Value *Load = Builder.CreateLoad(NewElts[i], "load");
1806223017Sdim          Insert = Builder.CreateInsertValue(Insert, Load, i, "insert");
1807201360Srdivacky        }
1808201360Srdivacky        LI->replaceAllUsesWith(Insert);
1809201360Srdivacky        DeadInsts.push_back(LI);
1810204642Srdivacky      } else if (LIType->isIntegerTy() &&
1811201360Srdivacky                 TD->getTypeAllocSize(LIType) ==
1812201360Srdivacky                 TD->getTypeAllocSize(AI->getAllocatedType())) {
1813201360Srdivacky        // If this is a load of the entire alloca to an integer, rewrite it.
1814201360Srdivacky        RewriteLoadUserOfWholeAlloca(LI, AI, NewElts);
1815193323Sed      }
1816218893Sdim      continue;
1817218893Sdim    }
1818218893Sdim
1819218893Sdim    if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
1820201360Srdivacky      Value *Val = SI->getOperand(0);
1821226890Sdim      Type *SIType = Val->getType();
1822218893Sdim      if (isCompatibleAggregate(SIType, AI->getAllocatedType())) {
1823201360Srdivacky        // Replace:
1824201360Srdivacky        //   store { i32, i32 } %val, { i32, i32 }* %alloc
1825201360Srdivacky        // with:
1826201360Srdivacky        //   %val.0 = extractvalue { i32, i32 } %val, 0
1827201360Srdivacky        //   store i32 %val.0, i32* %alloc.0
1828201360Srdivacky        //   %val.1 = extractvalue { i32, i32 } %val, 1
1829201360Srdivacky        //   store i32 %val.1, i32* %alloc.1
1830201360Srdivacky        // (Also works for arrays instead of structs)
1831223017Sdim        IRBuilder<> Builder(SI);
1832201360Srdivacky        for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
1833223017Sdim          Value *Extract = Builder.CreateExtractValue(Val, i, Val->getName());
1834223017Sdim          Builder.CreateStore(Extract, NewElts[i]);
1835201360Srdivacky        }
1836201360Srdivacky        DeadInsts.push_back(SI);
1837204642Srdivacky      } else if (SIType->isIntegerTy() &&
1838201360Srdivacky                 TD->getTypeAllocSize(SIType) ==
1839201360Srdivacky                 TD->getTypeAllocSize(AI->getAllocatedType())) {
1840201360Srdivacky        // If this is a store of the entire alloca from an integer, rewrite it.
1841201360Srdivacky        RewriteStoreUserOfWholeAlloca(SI, AI, NewElts);
1842193323Sed      }
1843218893Sdim      continue;
1844193323Sed    }
1845218893Sdim
1846218893Sdim    if (isa<SelectInst>(User) || isa<PHINode>(User)) {
1847218893Sdim      // If we have a PHI user of the alloca itself (as opposed to a GEP or
1848218893Sdim      // bitcast) we have to rewrite it.  GEP and bitcast uses will be RAUW'd to
1849218893Sdim      // the new pointer.
1850218893Sdim      if (!isa<AllocaInst>(I)) continue;
1851218893Sdim
1852218893Sdim      assert(Offset == 0 && NewElts[0] &&
1853218893Sdim             "Direct alloca use should have a zero offset");
1854218893Sdim
1855218893Sdim      // If we have a use of the alloca, we know the derived uses will be
1856218893Sdim      // utilizing just the first element of the scalarized result.  Insert a
1857218893Sdim      // bitcast of the first alloca before the user as required.
1858218893Sdim      AllocaInst *NewAI = NewElts[0];
1859218893Sdim      BitCastInst *BCI = new BitCastInst(NewAI, AI->getType(), "", NewAI);
1860218893Sdim      NewAI->moveBefore(BCI);
1861218893Sdim      TheUse = BCI;
1862218893Sdim      continue;
1863218893Sdim    }
1864193323Sed  }
1865193323Sed}
1866193323Sed
1867201360Srdivacky/// RewriteBitCast - Update a bitcast reference to the alloca being replaced
1868201360Srdivacky/// and recursively continue updating all of its uses.
1869201360Srdivackyvoid SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset,
1870201360Srdivacky                          SmallVector<AllocaInst*, 32> &NewElts) {
1871201360Srdivacky  RewriteForScalarRepl(BC, AI, Offset, NewElts);
1872201360Srdivacky  if (BC->getOperand(0) != AI)
1873201360Srdivacky    return;
1874193323Sed
1875201360Srdivacky  // The bitcast references the original alloca.  Replace its uses with
1876201360Srdivacky  // references to the first new element alloca.
1877201360Srdivacky  Instruction *Val = NewElts[0];
1878201360Srdivacky  if (Val->getType() != BC->getDestTy()) {
1879201360Srdivacky    Val = new BitCastInst(Val, BC->getDestTy(), "", BC);
1880201360Srdivacky    Val->takeName(BC);
1881201360Srdivacky  }
1882201360Srdivacky  BC->replaceAllUsesWith(Val);
1883201360Srdivacky  DeadInsts.push_back(BC);
1884201360Srdivacky}
1885193323Sed
1886201360Srdivacky/// FindElementAndOffset - Return the index of the element containing Offset
1887201360Srdivacky/// within the specified type, which must be either a struct or an array.
1888201360Srdivacky/// Sets T to the type of the element and Offset to the offset within that
1889201360Srdivacky/// element.  IdxTy is set to the type of the index result to be used in a
1890201360Srdivacky/// GEP instruction.
1891226890Sdimuint64_t SROA::FindElementAndOffset(Type *&T, uint64_t &Offset,
1892226890Sdim                                    Type *&IdxTy) {
1893201360Srdivacky  uint64_t Idx = 0;
1894226890Sdim  if (StructType *ST = dyn_cast<StructType>(T)) {
1895201360Srdivacky    const StructLayout *Layout = TD->getStructLayout(ST);
1896201360Srdivacky    Idx = Layout->getElementContainingOffset(Offset);
1897201360Srdivacky    T = ST->getContainedType(Idx);
1898201360Srdivacky    Offset -= Layout->getElementOffset(Idx);
1899201360Srdivacky    IdxTy = Type::getInt32Ty(T->getContext());
1900201360Srdivacky    return Idx;
1901193323Sed  }
1902226890Sdim  ArrayType *AT = cast<ArrayType>(T);
1903201360Srdivacky  T = AT->getElementType();
1904201360Srdivacky  uint64_t EltSize = TD->getTypeAllocSize(T);
1905201360Srdivacky  Idx = Offset / EltSize;
1906201360Srdivacky  Offset -= Idx * EltSize;
1907201360Srdivacky  IdxTy = Type::getInt64Ty(T->getContext());
1908201360Srdivacky  return Idx;
1909193323Sed}
1910193323Sed
1911201360Srdivacky/// RewriteGEP - Check if this GEP instruction moves the pointer across
1912201360Srdivacky/// elements of the alloca that are being split apart, and if so, rewrite
1913201360Srdivacky/// the GEP to be relative to the new element.
1914201360Srdivackyvoid SROA::RewriteGEP(GetElementPtrInst *GEPI, AllocaInst *AI, uint64_t Offset,
1915201360Srdivacky                      SmallVector<AllocaInst*, 32> &NewElts) {
1916201360Srdivacky  uint64_t OldOffset = Offset;
1917201360Srdivacky  SmallVector<Value*, 8> Indices(GEPI->op_begin() + 1, GEPI->op_end());
1918226890Sdim  Offset += TD->getIndexedOffset(GEPI->getPointerOperandType(), Indices);
1919201360Srdivacky
1920201360Srdivacky  RewriteForScalarRepl(GEPI, AI, Offset, NewElts);
1921201360Srdivacky
1922226890Sdim  Type *T = AI->getAllocatedType();
1923226890Sdim  Type *IdxTy;
1924201360Srdivacky  uint64_t OldIdx = FindElementAndOffset(T, OldOffset, IdxTy);
1925201360Srdivacky  if (GEPI->getOperand(0) == AI)
1926201360Srdivacky    OldIdx = ~0ULL; // Force the GEP to be rewritten.
1927201360Srdivacky
1928201360Srdivacky  T = AI->getAllocatedType();
1929201360Srdivacky  uint64_t EltOffset = Offset;
1930201360Srdivacky  uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy);
1931201360Srdivacky
1932201360Srdivacky  // If this GEP does not move the pointer across elements of the alloca
1933201360Srdivacky  // being split, then it does not needs to be rewritten.
1934201360Srdivacky  if (Idx == OldIdx)
1935201360Srdivacky    return;
1936201360Srdivacky
1937226890Sdim  Type *i32Ty = Type::getInt32Ty(AI->getContext());
1938201360Srdivacky  SmallVector<Value*, 8> NewArgs;
1939201360Srdivacky  NewArgs.push_back(Constant::getNullValue(i32Ty));
1940201360Srdivacky  while (EltOffset != 0) {
1941201360Srdivacky    uint64_t EltIdx = FindElementAndOffset(T, EltOffset, IdxTy);
1942201360Srdivacky    NewArgs.push_back(ConstantInt::get(IdxTy, EltIdx));
1943201360Srdivacky  }
1944201360Srdivacky  Instruction *Val = NewElts[Idx];
1945201360Srdivacky  if (NewArgs.size() > 1) {
1946226890Sdim    Val = GetElementPtrInst::CreateInBounds(Val, NewArgs, "", GEPI);
1947201360Srdivacky    Val->takeName(GEPI);
1948201360Srdivacky  }
1949201360Srdivacky  if (Val->getType() != GEPI->getType())
1950203954Srdivacky    Val = new BitCastInst(Val, GEPI->getType(), Val->getName(), GEPI);
1951201360Srdivacky  GEPI->replaceAllUsesWith(Val);
1952201360Srdivacky  DeadInsts.push_back(GEPI);
1953201360Srdivacky}
1954201360Srdivacky
1955226890Sdim/// RewriteLifetimeIntrinsic - II is a lifetime.start/lifetime.end. Rewrite it
1956226890Sdim/// to mark the lifetime of the scalarized memory.
1957226890Sdimvoid SROA::RewriteLifetimeIntrinsic(IntrinsicInst *II, AllocaInst *AI,
1958226890Sdim                                    uint64_t Offset,
1959226890Sdim                                    SmallVector<AllocaInst*, 32> &NewElts) {
1960226890Sdim  ConstantInt *OldSize = cast<ConstantInt>(II->getArgOperand(0));
1961226890Sdim  // Put matching lifetime markers on everything from Offset up to
1962226890Sdim  // Offset+OldSize.
1963226890Sdim  Type *AIType = AI->getAllocatedType();
1964226890Sdim  uint64_t NewOffset = Offset;
1965226890Sdim  Type *IdxTy;
1966226890Sdim  uint64_t Idx = FindElementAndOffset(AIType, NewOffset, IdxTy);
1967226890Sdim
1968226890Sdim  IRBuilder<> Builder(II);
1969226890Sdim  uint64_t Size = OldSize->getLimitedValue();
1970226890Sdim
1971226890Sdim  if (NewOffset) {
1972226890Sdim    // Splice the first element and index 'NewOffset' bytes in.  SROA will
1973226890Sdim    // split the alloca again later.
1974226890Sdim    Value *V = Builder.CreateBitCast(NewElts[Idx], Builder.getInt8PtrTy());
1975226890Sdim    V = Builder.CreateGEP(V, Builder.getInt64(NewOffset));
1976226890Sdim
1977226890Sdim    IdxTy = NewElts[Idx]->getAllocatedType();
1978226890Sdim    uint64_t EltSize = TD->getTypeAllocSize(IdxTy) - NewOffset;
1979226890Sdim    if (EltSize > Size) {
1980226890Sdim      EltSize = Size;
1981226890Sdim      Size = 0;
1982226890Sdim    } else {
1983226890Sdim      Size -= EltSize;
1984226890Sdim    }
1985226890Sdim    if (II->getIntrinsicID() == Intrinsic::lifetime_start)
1986226890Sdim      Builder.CreateLifetimeStart(V, Builder.getInt64(EltSize));
1987226890Sdim    else
1988226890Sdim      Builder.CreateLifetimeEnd(V, Builder.getInt64(EltSize));
1989226890Sdim    ++Idx;
1990226890Sdim  }
1991226890Sdim
1992226890Sdim  for (; Idx != NewElts.size() && Size; ++Idx) {
1993226890Sdim    IdxTy = NewElts[Idx]->getAllocatedType();
1994226890Sdim    uint64_t EltSize = TD->getTypeAllocSize(IdxTy);
1995226890Sdim    if (EltSize > Size) {
1996226890Sdim      EltSize = Size;
1997226890Sdim      Size = 0;
1998226890Sdim    } else {
1999226890Sdim      Size -= EltSize;
2000226890Sdim    }
2001226890Sdim    if (II->getIntrinsicID() == Intrinsic::lifetime_start)
2002226890Sdim      Builder.CreateLifetimeStart(NewElts[Idx],
2003226890Sdim                                  Builder.getInt64(EltSize));
2004226890Sdim    else
2005226890Sdim      Builder.CreateLifetimeEnd(NewElts[Idx],
2006226890Sdim                                Builder.getInt64(EltSize));
2007226890Sdim  }
2008226890Sdim  DeadInsts.push_back(II);
2009226890Sdim}
2010226890Sdim
2011193323Sed/// RewriteMemIntrinUserOfAlloca - MI is a memcpy/memset/memmove from or to AI.
2012193323Sed/// Rewrite it to copy or set the elements of the scalarized memory.
2013201360Srdivackyvoid SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
2014198892Srdivacky                                        AllocaInst *AI,
2015193323Sed                                        SmallVector<AllocaInst*, 32> &NewElts) {
2016193323Sed  // If this is a memcpy/memmove, construct the other pointer as the
2017193323Sed  // appropriate type.  The "Other" pointer is the pointer that goes to memory
2018193323Sed  // that doesn't have anything to do with the alloca that we are promoting. For
2019193323Sed  // memset, this Value* stays null.
2020193323Sed  Value *OtherPtr = 0;
2021193323Sed  unsigned MemAlignment = MI->getAlignment();
2022193323Sed  if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(MI)) { // memmove/memcopy
2023201360Srdivacky    if (Inst == MTI->getRawDest())
2024193323Sed      OtherPtr = MTI->getRawSource();
2025193323Sed    else {
2026201360Srdivacky      assert(Inst == MTI->getRawSource());
2027193323Sed      OtherPtr = MTI->getRawDest();
2028193323Sed    }
2029193323Sed  }
2030200581Srdivacky
2031193323Sed  // If there is an other pointer, we want to convert it to the same pointer
2032193323Sed  // type as AI has, so we can GEP through it safely.
2033193323Sed  if (OtherPtr) {
2034210299Sed    unsigned AddrSpace =
2035210299Sed      cast<PointerType>(OtherPtr->getType())->getAddressSpace();
2036201360Srdivacky
2037201360Srdivacky    // Remove bitcasts and all-zero GEPs from OtherPtr.  This is an
2038201360Srdivacky    // optimization, but it's also required to detect the corner case where
2039201360Srdivacky    // both pointer operands are referencing the same memory, and where
2040201360Srdivacky    // OtherPtr may be a bitcast or GEP that currently being rewritten.  (This
2041201360Srdivacky    // function is only called for mem intrinsics that access the whole
2042201360Srdivacky    // aggregate, so non-zero GEPs are not an issue here.)
2043210299Sed    OtherPtr = OtherPtr->stripPointerCasts();
2044218893Sdim
2045202878Srdivacky    // Copying the alloca to itself is a no-op: just delete it.
2046202878Srdivacky    if (OtherPtr == AI || OtherPtr == NewElts[0]) {
2047202878Srdivacky      // This code will run twice for a no-op memcpy -- once for each operand.
2048202878Srdivacky      // Put only one reference to MI on the DeadInsts list.
2049202878Srdivacky      for (SmallVector<Value*, 32>::const_iterator I = DeadInsts.begin(),
2050202878Srdivacky             E = DeadInsts.end(); I != E; ++I)
2051202878Srdivacky        if (*I == MI) return;
2052202878Srdivacky      DeadInsts.push_back(MI);
2053201360Srdivacky      return;
2054202878Srdivacky    }
2055218893Sdim
2056193323Sed    // If the pointer is not the right type, insert a bitcast to the right
2057193323Sed    // type.
2058226890Sdim    Type *NewTy =
2059210299Sed      PointerType::get(AI->getType()->getElementType(), AddrSpace);
2060218893Sdim
2061210299Sed    if (OtherPtr->getType() != NewTy)
2062210299Sed      OtherPtr = new BitCastInst(OtherPtr, NewTy, OtherPtr->getName(), MI);
2063193323Sed  }
2064218893Sdim
2065193323Sed  // Process each element of the aggregate.
2066201360Srdivacky  bool SROADest = MI->getRawDest() == Inst;
2067218893Sdim
2068198090Srdivacky  Constant *Zero = Constant::getNullValue(Type::getInt32Ty(MI->getContext()));
2069193323Sed
2070193323Sed  for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
2071193323Sed    // If this is a memcpy/memmove, emit a GEP of the other element address.
2072193323Sed    Value *OtherElt = 0;
2073193323Sed    unsigned OtherEltAlign = MemAlignment;
2074218893Sdim
2075202878Srdivacky    if (OtherPtr) {
2076198090Srdivacky      Value *Idx[2] = { Zero,
2077198090Srdivacky                      ConstantInt::get(Type::getInt32Ty(MI->getContext()), i) };
2078226890Sdim      OtherElt = GetElementPtrInst::CreateInBounds(OtherPtr, Idx,
2079203954Srdivacky                                              OtherPtr->getName()+"."+Twine(i),
2080201360Srdivacky                                                   MI);
2081193323Sed      uint64_t EltOffset;
2082226890Sdim      PointerType *OtherPtrTy = cast<PointerType>(OtherPtr->getType());
2083226890Sdim      Type *OtherTy = OtherPtrTy->getElementType();
2084226890Sdim      if (StructType *ST = dyn_cast<StructType>(OtherTy)) {
2085193323Sed        EltOffset = TD->getStructLayout(ST)->getElementOffset(i);
2086193323Sed      } else {
2087226890Sdim        Type *EltTy = cast<SequentialType>(OtherTy)->getElementType();
2088193323Sed        EltOffset = TD->getTypeAllocSize(EltTy)*i;
2089193323Sed      }
2090218893Sdim
2091193323Sed      // The alignment of the other pointer is the guaranteed alignment of the
2092193323Sed      // element, which is affected by both the known alignment of the whole
2093193323Sed      // mem intrinsic and the alignment of the element.  If the alignment of
2094193323Sed      // the memcpy (f.e.) is 32 but the element is at a 4-byte offset, then the
2095193323Sed      // known alignment is just 4 bytes.
2096193323Sed      OtherEltAlign = (unsigned)MinAlign(OtherEltAlign, EltOffset);
2097193323Sed    }
2098218893Sdim
2099193323Sed    Value *EltPtr = NewElts[i];
2100226890Sdim    Type *EltTy = cast<PointerType>(EltPtr->getType())->getElementType();
2101218893Sdim
2102193323Sed    // If we got down to a scalar, insert a load or store as appropriate.
2103193323Sed    if (EltTy->isSingleValueType()) {
2104193323Sed      if (isa<MemTransferInst>(MI)) {
2105193323Sed        if (SROADest) {
2106193323Sed          // From Other to Alloca.
2107193323Sed          Value *Elt = new LoadInst(OtherElt, "tmp", false, OtherEltAlign, MI);
2108193323Sed          new StoreInst(Elt, EltPtr, MI);
2109193323Sed        } else {
2110193323Sed          // From Alloca to Other.
2111193323Sed          Value *Elt = new LoadInst(EltPtr, "tmp", MI);
2112193323Sed          new StoreInst(Elt, OtherElt, false, OtherEltAlign, MI);
2113193323Sed        }
2114193323Sed        continue;
2115193323Sed      }
2116193323Sed      assert(isa<MemSetInst>(MI));
2117218893Sdim
2118193323Sed      // If the stored element is zero (common case), just store a null
2119193323Sed      // constant.
2120193323Sed      Constant *StoreVal;
2121210299Sed      if (ConstantInt *CI = dyn_cast<ConstantInt>(MI->getArgOperand(1))) {
2122193323Sed        if (CI->isZero()) {
2123198090Srdivacky          StoreVal = Constant::getNullValue(EltTy);  // 0.0, null, 0, <0,0>
2124193323Sed        } else {
2125193323Sed          // If EltTy is a vector type, get the element type.
2126226890Sdim          Type *ValTy = EltTy->getScalarType();
2127194612Sed
2128193323Sed          // Construct an integer with the right value.
2129193323Sed          unsigned EltSize = TD->getTypeSizeInBits(ValTy);
2130193323Sed          APInt OneVal(EltSize, CI->getZExtValue());
2131193323Sed          APInt TotalVal(OneVal);
2132193323Sed          // Set each byte.
2133193323Sed          for (unsigned i = 0; 8*i < EltSize; ++i) {
2134193323Sed            TotalVal = TotalVal.shl(8);
2135193323Sed            TotalVal |= OneVal;
2136193323Sed          }
2137218893Sdim
2138193323Sed          // Convert the integer value to the appropriate type.
2139207618Srdivacky          StoreVal = ConstantInt::get(CI->getContext(), TotalVal);
2140204642Srdivacky          if (ValTy->isPointerTy())
2141198090Srdivacky            StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy);
2142203954Srdivacky          else if (ValTy->isFloatingPointTy())
2143198090Srdivacky            StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy);
2144193323Sed          assert(StoreVal->getType() == ValTy && "Type mismatch!");
2145218893Sdim
2146193323Sed          // If the requested value was a vector constant, create it.
2147226890Sdim          if (EltTy->isVectorTy()) {
2148226890Sdim            unsigned NumElts = cast<VectorType>(EltTy)->getNumElements();
2149193323Sed            SmallVector<Constant*, 16> Elts(NumElts, StoreVal);
2150218893Sdim            StoreVal = ConstantVector::get(Elts);
2151193323Sed          }
2152193323Sed        }
2153193323Sed        new StoreInst(StoreVal, EltPtr, MI);
2154193323Sed        continue;
2155193323Sed      }
2156193323Sed      // Otherwise, if we're storing a byte variable, use a memset call for
2157193323Sed      // this element.
2158193323Sed    }
2159218893Sdim
2160193323Sed    unsigned EltSize = TD->getTypeAllocSize(EltTy);
2161218893Sdim
2162218893Sdim    IRBuilder<> Builder(MI);
2163218893Sdim
2164193323Sed    // Finally, insert the meminst for this element.
2165218893Sdim    if (isa<MemSetInst>(MI)) {
2166218893Sdim      Builder.CreateMemSet(EltPtr, MI->getArgOperand(1), EltSize,
2167218893Sdim                           MI->isVolatile());
2168193323Sed    } else {
2169218893Sdim      assert(isa<MemTransferInst>(MI));
2170218893Sdim      Value *Dst = SROADest ? EltPtr : OtherElt;  // Dest ptr
2171218893Sdim      Value *Src = SROADest ? OtherElt : EltPtr;  // Src ptr
2172218893Sdim
2173218893Sdim      if (isa<MemCpyInst>(MI))
2174218893Sdim        Builder.CreateMemCpy(Dst, Src, EltSize, OtherEltAlign,MI->isVolatile());
2175218893Sdim      else
2176218893Sdim        Builder.CreateMemMove(Dst, Src, EltSize,OtherEltAlign,MI->isVolatile());
2177193323Sed    }
2178193323Sed  }
2179201360Srdivacky  DeadInsts.push_back(MI);
2180193323Sed}
2181193323Sed
2182200581Srdivacky/// RewriteStoreUserOfWholeAlloca - We found a store of an integer that
2183193323Sed/// overwrites the entire allocation.  Extract out the pieces of the stored
2184193323Sed/// integer and store them individually.
2185198892Srdivackyvoid SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
2186193323Sed                                         SmallVector<AllocaInst*, 32> &NewElts){
2187193323Sed  // Extract each element out of the integer according to its structure offset
2188193323Sed  // and store the element value to the individual alloca.
2189193323Sed  Value *SrcVal = SI->getOperand(0);
2190226890Sdim  Type *AllocaEltTy = AI->getAllocatedType();
2191193323Sed  uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
2192218893Sdim
2193218893Sdim  IRBuilder<> Builder(SI);
2194193323Sed
2195193323Sed  // Handle tail padding by extending the operand
2196193323Sed  if (TD->getTypeSizeInBits(SrcVal->getType()) != AllocaSizeBits)
2197218893Sdim    SrcVal = Builder.CreateZExt(SrcVal,
2198218893Sdim                            IntegerType::get(SI->getContext(), AllocaSizeBits));
2199193323Sed
2200202375Srdivacky  DEBUG(dbgs() << "PROMOTING STORE TO WHOLE ALLOCA: " << *AI << '\n' << *SI
2201198090Srdivacky               << '\n');
2202193323Sed
2203193323Sed  // There are two forms here: AI could be an array or struct.  Both cases
2204193323Sed  // have different ways to compute the element offset.
2205226890Sdim  if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
2206193323Sed    const StructLayout *Layout = TD->getStructLayout(EltSTy);
2207218893Sdim
2208193323Sed    for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
2209193323Sed      // Get the number of bits to shift SrcVal to get the value.
2210226890Sdim      Type *FieldTy = EltSTy->getElementType(i);
2211193323Sed      uint64_t Shift = Layout->getElementOffsetInBits(i);
2212218893Sdim
2213193323Sed      if (TD->isBigEndian())
2214193323Sed        Shift = AllocaSizeBits-Shift-TD->getTypeAllocSizeInBits(FieldTy);
2215218893Sdim
2216193323Sed      Value *EltVal = SrcVal;
2217193323Sed      if (Shift) {
2218198090Srdivacky        Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
2219218893Sdim        EltVal = Builder.CreateLShr(EltVal, ShiftVal, "sroa.store.elt");
2220193323Sed      }
2221218893Sdim
2222193323Sed      // Truncate down to an integer of the right size.
2223193323Sed      uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
2224218893Sdim
2225193323Sed      // Ignore zero sized fields like {}, they obviously contain no data.
2226193323Sed      if (FieldSizeBits == 0) continue;
2227218893Sdim
2228193323Sed      if (FieldSizeBits != AllocaSizeBits)
2229218893Sdim        EltVal = Builder.CreateTrunc(EltVal,
2230218893Sdim                             IntegerType::get(SI->getContext(), FieldSizeBits));
2231193323Sed      Value *DestField = NewElts[i];
2232193323Sed      if (EltVal->getType() == FieldTy) {
2233193323Sed        // Storing to an integer field of this size, just do it.
2234204642Srdivacky      } else if (FieldTy->isFloatingPointTy() || FieldTy->isVectorTy()) {
2235193323Sed        // Bitcast to the right element type (for fp/vector values).
2236218893Sdim        EltVal = Builder.CreateBitCast(EltVal, FieldTy);
2237193323Sed      } else {
2238193323Sed        // Otherwise, bitcast the dest pointer (for aggregates).
2239218893Sdim        DestField = Builder.CreateBitCast(DestField,
2240218893Sdim                                     PointerType::getUnqual(EltVal->getType()));
2241193323Sed      }
2242193323Sed      new StoreInst(EltVal, DestField, SI);
2243193323Sed    }
2244218893Sdim
2245193323Sed  } else {
2246226890Sdim    ArrayType *ATy = cast<ArrayType>(AllocaEltTy);
2247226890Sdim    Type *ArrayEltTy = ATy->getElementType();
2248193323Sed    uint64_t ElementOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
2249193323Sed    uint64_t ElementSizeBits = TD->getTypeSizeInBits(ArrayEltTy);
2250193323Sed
2251193323Sed    uint64_t Shift;
2252218893Sdim
2253193323Sed    if (TD->isBigEndian())
2254193323Sed      Shift = AllocaSizeBits-ElementOffset;
2255218893Sdim    else
2256193323Sed      Shift = 0;
2257218893Sdim
2258193323Sed    for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
2259193323Sed      // Ignore zero sized fields like {}, they obviously contain no data.
2260193323Sed      if (ElementSizeBits == 0) continue;
2261218893Sdim
2262193323Sed      Value *EltVal = SrcVal;
2263193323Sed      if (Shift) {
2264198090Srdivacky        Value *ShiftVal = ConstantInt::get(EltVal->getType(), Shift);
2265218893Sdim        EltVal = Builder.CreateLShr(EltVal, ShiftVal, "sroa.store.elt");
2266193323Sed      }
2267218893Sdim
2268193323Sed      // Truncate down to an integer of the right size.
2269193323Sed      if (ElementSizeBits != AllocaSizeBits)
2270218893Sdim        EltVal = Builder.CreateTrunc(EltVal,
2271218893Sdim                                     IntegerType::get(SI->getContext(),
2272218893Sdim                                                      ElementSizeBits));
2273193323Sed      Value *DestField = NewElts[i];
2274193323Sed      if (EltVal->getType() == ArrayEltTy) {
2275193323Sed        // Storing to an integer field of this size, just do it.
2276203954Srdivacky      } else if (ArrayEltTy->isFloatingPointTy() ||
2277204642Srdivacky                 ArrayEltTy->isVectorTy()) {
2278193323Sed        // Bitcast to the right element type (for fp/vector values).
2279218893Sdim        EltVal = Builder.CreateBitCast(EltVal, ArrayEltTy);
2280193323Sed      } else {
2281193323Sed        // Otherwise, bitcast the dest pointer (for aggregates).
2282218893Sdim        DestField = Builder.CreateBitCast(DestField,
2283218893Sdim                                     PointerType::getUnqual(EltVal->getType()));
2284193323Sed      }
2285193323Sed      new StoreInst(EltVal, DestField, SI);
2286218893Sdim
2287193323Sed      if (TD->isBigEndian())
2288193323Sed        Shift -= ElementOffset;
2289218893Sdim      else
2290193323Sed        Shift += ElementOffset;
2291193323Sed    }
2292193323Sed  }
2293218893Sdim
2294201360Srdivacky  DeadInsts.push_back(SI);
2295193323Sed}
2296193323Sed
2297200581Srdivacky/// RewriteLoadUserOfWholeAlloca - We found a load of the entire allocation to
2298193323Sed/// an integer.  Load the individual pieces to form the aggregate value.
2299198892Srdivackyvoid SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
2300193323Sed                                        SmallVector<AllocaInst*, 32> &NewElts) {
2301193323Sed  // Extract each element out of the NewElts according to its structure offset
2302193323Sed  // and form the result value.
2303226890Sdim  Type *AllocaEltTy = AI->getAllocatedType();
2304193323Sed  uint64_t AllocaSizeBits = TD->getTypeAllocSizeInBits(AllocaEltTy);
2305218893Sdim
2306202375Srdivacky  DEBUG(dbgs() << "PROMOTING LOAD OF WHOLE ALLOCA: " << *AI << '\n' << *LI
2307198090Srdivacky               << '\n');
2308218893Sdim
2309193323Sed  // There are two forms here: AI could be an array or struct.  Both cases
2310193323Sed  // have different ways to compute the element offset.
2311193323Sed  const StructLayout *Layout = 0;
2312193323Sed  uint64_t ArrayEltBitOffset = 0;
2313226890Sdim  if (StructType *EltSTy = dyn_cast<StructType>(AllocaEltTy)) {
2314193323Sed    Layout = TD->getStructLayout(EltSTy);
2315193323Sed  } else {
2316226890Sdim    Type *ArrayEltTy = cast<ArrayType>(AllocaEltTy)->getElementType();
2317193323Sed    ArrayEltBitOffset = TD->getTypeAllocSizeInBits(ArrayEltTy);
2318218893Sdim  }
2319218893Sdim
2320218893Sdim  Value *ResultVal =
2321198090Srdivacky    Constant::getNullValue(IntegerType::get(LI->getContext(), AllocaSizeBits));
2322218893Sdim
2323193323Sed  for (unsigned i = 0, e = NewElts.size(); i != e; ++i) {
2324193323Sed    // Load the value from the alloca.  If the NewElt is an aggregate, cast
2325193323Sed    // the pointer to an integer of the same size before doing the load.
2326193323Sed    Value *SrcField = NewElts[i];
2327226890Sdim    Type *FieldTy =
2328193323Sed      cast<PointerType>(SrcField->getType())->getElementType();
2329193323Sed    uint64_t FieldSizeBits = TD->getTypeSizeInBits(FieldTy);
2330218893Sdim
2331193323Sed    // Ignore zero sized fields like {}, they obviously contain no data.
2332193323Sed    if (FieldSizeBits == 0) continue;
2333218893Sdim
2334226890Sdim    IntegerType *FieldIntTy = IntegerType::get(LI->getContext(),
2335198090Srdivacky                                                     FieldSizeBits);
2336204642Srdivacky    if (!FieldTy->isIntegerTy() && !FieldTy->isFloatingPointTy() &&
2337204642Srdivacky        !FieldTy->isVectorTy())
2338195340Sed      SrcField = new BitCastInst(SrcField,
2339198090Srdivacky                                 PointerType::getUnqual(FieldIntTy),
2340193323Sed                                 "", LI);
2341193323Sed    SrcField = new LoadInst(SrcField, "sroa.load.elt", LI);
2342193323Sed
2343193323Sed    // If SrcField is a fp or vector of the right size but that isn't an
2344193323Sed    // integer type, bitcast to an integer so we can shift it.
2345193323Sed    if (SrcField->getType() != FieldIntTy)
2346193323Sed      SrcField = new BitCastInst(SrcField, FieldIntTy, "", LI);
2347193323Sed
2348193323Sed    // Zero extend the field to be the same size as the final alloca so that
2349193323Sed    // we can shift and insert it.
2350193323Sed    if (SrcField->getType() != ResultVal->getType())
2351193323Sed      SrcField = new ZExtInst(SrcField, ResultVal->getType(), "", LI);
2352218893Sdim
2353193323Sed    // Determine the number of bits to shift SrcField.
2354193323Sed    uint64_t Shift;
2355193323Sed    if (Layout) // Struct case.
2356193323Sed      Shift = Layout->getElementOffsetInBits(i);
2357193323Sed    else  // Array case.
2358193323Sed      Shift = i*ArrayEltBitOffset;
2359218893Sdim
2360193323Sed    if (TD->isBigEndian())
2361193323Sed      Shift = AllocaSizeBits-Shift-FieldIntTy->getBitWidth();
2362218893Sdim
2363193323Sed    if (Shift) {
2364198090Srdivacky      Value *ShiftVal = ConstantInt::get(SrcField->getType(), Shift);
2365193323Sed      SrcField = BinaryOperator::CreateShl(SrcField, ShiftVal, "", LI);
2366193323Sed    }
2367193323Sed
2368210299Sed    // Don't create an 'or x, 0' on the first iteration.
2369210299Sed    if (!isa<Constant>(ResultVal) ||
2370210299Sed        !cast<Constant>(ResultVal)->isNullValue())
2371210299Sed      ResultVal = BinaryOperator::CreateOr(SrcField, ResultVal, "", LI);
2372210299Sed    else
2373210299Sed      ResultVal = SrcField;
2374193323Sed  }
2375193323Sed
2376193323Sed  // Handle tail padding by truncating the result
2377193323Sed  if (TD->getTypeSizeInBits(LI->getType()) != AllocaSizeBits)
2378193323Sed    ResultVal = new TruncInst(ResultVal, LI->getType(), "", LI);
2379193323Sed
2380193323Sed  LI->replaceAllUsesWith(ResultVal);
2381201360Srdivacky  DeadInsts.push_back(LI);
2382193323Sed}
2383193323Sed
2384193323Sed/// HasPadding - Return true if the specified type has any structure or
2385218893Sdim/// alignment padding in between the elements that would be split apart
2386218893Sdim/// by SROA; return false otherwise.
2387226890Sdimstatic bool HasPadding(Type *Ty, const TargetData &TD) {
2388226890Sdim  if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
2389218893Sdim    Ty = ATy->getElementType();
2390218893Sdim    return TD.getTypeSizeInBits(Ty) != TD.getTypeAllocSizeInBits(Ty);
2391218893Sdim  }
2392193323Sed
2393218893Sdim  // SROA currently handles only Arrays and Structs.
2394226890Sdim  StructType *STy = cast<StructType>(Ty);
2395218893Sdim  const StructLayout *SL = TD.getStructLayout(STy);
2396218893Sdim  unsigned PrevFieldBitOffset = 0;
2397218893Sdim  for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
2398218893Sdim    unsigned FieldBitOffset = SL->getElementOffsetInBits(i);
2399193323Sed
2400218893Sdim    // Check to see if there is any padding between this element and the
2401218893Sdim    // previous one.
2402218893Sdim    if (i) {
2403218893Sdim      unsigned PrevFieldEnd =
2404193323Sed        PrevFieldBitOffset+TD.getTypeSizeInBits(STy->getElementType(i-1));
2405218893Sdim      if (PrevFieldEnd < FieldBitOffset)
2406193323Sed        return true;
2407193323Sed    }
2408218893Sdim    PrevFieldBitOffset = FieldBitOffset;
2409193323Sed  }
2410218893Sdim  // Check for tail padding.
2411218893Sdim  if (unsigned EltCount = STy->getNumElements()) {
2412218893Sdim    unsigned PrevFieldEnd = PrevFieldBitOffset +
2413218893Sdim      TD.getTypeSizeInBits(STy->getElementType(EltCount-1));
2414218893Sdim    if (PrevFieldEnd < SL->getSizeInBits())
2415218893Sdim      return true;
2416218893Sdim  }
2417218893Sdim  return false;
2418193323Sed}
2419193323Sed
2420193323Sed/// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of
2421193323Sed/// an aggregate can be broken down into elements.  Return 0 if not, 3 if safe,
2422193323Sed/// or 1 if safe after canonicalization has been performed.
2423202878Srdivackybool SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
2424193323Sed  // Loop over the use list of the alloca.  We can only transform it if all of
2425193323Sed  // the users are safe to transform.
2426218893Sdim  AllocaInfo Info(AI);
2427218893Sdim
2428218893Sdim  isSafeForScalarRepl(AI, 0, Info);
2429201360Srdivacky  if (Info.isUnsafe) {
2430202375Srdivacky    DEBUG(dbgs() << "Cannot transform: " << *AI << '\n');
2431202878Srdivacky    return false;
2432193323Sed  }
2433218893Sdim
2434193323Sed  // Okay, we know all the users are promotable.  If the aggregate is a memcpy
2435193323Sed  // source and destination, we have to be careful.  In particular, the memcpy
2436193323Sed  // could be moving around elements that live in structure padding of the LLVM
2437193323Sed  // types, but may actually be used.  In these cases, we refuse to promote the
2438193323Sed  // struct.
2439193323Sed  if (Info.isMemCpySrc && Info.isMemCpyDst &&
2440201360Srdivacky      HasPadding(AI->getAllocatedType(), *TD))
2441202878Srdivacky    return false;
2442193323Sed
2443218893Sdim  // If the alloca never has an access to just *part* of it, but is accessed
2444218893Sdim  // via loads and stores, then we should use ConvertToScalarInfo to promote
2445218893Sdim  // the alloca instead of promoting each piece at a time and inserting fission
2446218893Sdim  // and fusion code.
2447218893Sdim  if (!Info.hasSubelementAccess && Info.hasALoadOrStore) {
2448218893Sdim    // If the struct/array just has one element, use basic SRoA.
2449226890Sdim    if (StructType *ST = dyn_cast<StructType>(AI->getAllocatedType())) {
2450218893Sdim      if (ST->getNumElements() > 1) return false;
2451218893Sdim    } else {
2452218893Sdim      if (cast<ArrayType>(AI->getAllocatedType())->getNumElements() > 1)
2453218893Sdim        return false;
2454218893Sdim    }
2455218893Sdim  }
2456218893Sdim
2457202878Srdivacky  return true;
2458193323Sed}
2459193323Sed
2460193323Sed
2461193323Sed
2462193323Sed/// PointsToConstantGlobal - Return true if V (possibly indirectly) points to
2463193323Sed/// some part of a constant global variable.  This intentionally only accepts
2464193323Sed/// constant expressions because we don't can't rewrite arbitrary instructions.
2465193323Sedstatic bool PointsToConstantGlobal(Value *V) {
2466193323Sed  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
2467193323Sed    return GV->isConstant();
2468193323Sed  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
2469218893Sdim    if (CE->getOpcode() == Instruction::BitCast ||
2470193323Sed        CE->getOpcode() == Instruction::GetElementPtr)
2471193323Sed      return PointsToConstantGlobal(CE->getOperand(0));
2472193323Sed  return false;
2473193323Sed}
2474193323Sed
2475193323Sed/// isOnlyCopiedFromConstantGlobal - Recursively walk the uses of a (derived)
2476193323Sed/// pointer to an alloca.  Ignore any reads of the pointer, return false if we
2477193323Sed/// see any stores or other unknown uses.  If we see pointer arithmetic, keep
2478193323Sed/// track of whether it moves the pointer (with isOffset) but otherwise traverse
2479193323Sed/// the uses.  If we see a memcpy/memmove that targets an unoffseted pointer to
2480218893Sdim/// the alloca, and if the source pointer is a pointer to a constant global, we
2481193323Sed/// can optimize this.
2482224145Sdimstatic bool
2483224145SdimisOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
2484224145Sdim                               bool isOffset,
2485224145Sdim                               SmallVector<Instruction *, 4> &LifetimeMarkers) {
2486224145Sdim  // We track lifetime intrinsics as we encounter them.  If we decide to go
2487224145Sdim  // ahead and replace the value with the global, this lets the caller quickly
2488224145Sdim  // eliminate the markers.
2489224145Sdim
2490193323Sed  for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI!=E; ++UI) {
2491207618Srdivacky    User *U = cast<Instruction>(*UI);
2492207618Srdivacky
2493218893Sdim    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
2494193323Sed      // Ignore non-volatile loads, they are always ok.
2495226890Sdim      if (!LI->isSimple()) return false;
2496218893Sdim      continue;
2497218893Sdim    }
2498218893Sdim
2499207618Srdivacky    if (BitCastInst *BCI = dyn_cast<BitCastInst>(U)) {
2500193323Sed      // If uses of the bitcast are ok, we are ok.
2501224145Sdim      if (!isOnlyCopiedFromConstantGlobal(BCI, TheCopy, isOffset,
2502224145Sdim                                          LifetimeMarkers))
2503193323Sed        return false;
2504193323Sed      continue;
2505193323Sed    }
2506207618Srdivacky    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
2507193323Sed      // If the GEP has all zero indices, it doesn't offset the pointer.  If it
2508193323Sed      // doesn't, it does.
2509193323Sed      if (!isOnlyCopiedFromConstantGlobal(GEP, TheCopy,
2510224145Sdim                                          isOffset || !GEP->hasAllZeroIndices(),
2511224145Sdim                                          LifetimeMarkers))
2512193323Sed        return false;
2513193323Sed      continue;
2514193323Sed    }
2515218893Sdim
2516218893Sdim    if (CallSite CS = U) {
2517218893Sdim      // If this is the function being called then we treat it like a load and
2518218893Sdim      // ignore it.
2519218893Sdim      if (CS.isCallee(UI))
2520218893Sdim        continue;
2521218893Sdim
2522223017Sdim      // If this is a readonly/readnone call site, then we know it is just a
2523223017Sdim      // load (but one that potentially returns the value itself), so we can
2524223017Sdim      // ignore it if we know that the value isn't captured.
2525223017Sdim      unsigned ArgNo = CS.getArgumentNo(UI);
2526223017Sdim      if (CS.onlyReadsMemory() &&
2527223017Sdim          (CS.getInstruction()->use_empty() ||
2528223017Sdim           CS.paramHasAttr(ArgNo+1, Attribute::NoCapture)))
2529223017Sdim        continue;
2530223017Sdim
2531218893Sdim      // If this is being passed as a byval argument, the caller is making a
2532218893Sdim      // copy, so it is only a read of the alloca.
2533218893Sdim      if (CS.paramHasAttr(ArgNo+1, Attribute::ByVal))
2534218893Sdim        continue;
2535218893Sdim    }
2536218893Sdim
2537224145Sdim    // Lifetime intrinsics can be handled by the caller.
2538224145Sdim    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U)) {
2539224145Sdim      if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
2540224145Sdim          II->getIntrinsicID() == Intrinsic::lifetime_end) {
2541224145Sdim        assert(II->use_empty() && "Lifetime markers have no result to use!");
2542224145Sdim        LifetimeMarkers.push_back(II);
2543224145Sdim        continue;
2544224145Sdim      }
2545224145Sdim    }
2546224145Sdim
2547193323Sed    // If this is isn't our memcpy/memmove, reject it as something we can't
2548193323Sed    // handle.
2549207618Srdivacky    MemTransferInst *MI = dyn_cast<MemTransferInst>(U);
2550207618Srdivacky    if (MI == 0)
2551193323Sed      return false;
2552193323Sed
2553218893Sdim    // If the transfer is using the alloca as a source of the transfer, then
2554218893Sdim    // ignore it since it is a load (unless the transfer is volatile).
2555218893Sdim    if (UI.getOperandNo() == 1) {
2556218893Sdim      if (MI->isVolatile()) return false;
2557218893Sdim      continue;
2558218893Sdim    }
2559218893Sdim
2560193323Sed    // If we already have seen a copy, reject the second one.
2561193323Sed    if (TheCopy) return false;
2562218893Sdim
2563193323Sed    // If the pointer has been offset from the start of the alloca, we can't
2564193323Sed    // safely handle this.
2565193323Sed    if (isOffset) return false;
2566193323Sed
2567193323Sed    // If the memintrinsic isn't using the alloca as the dest, reject it.
2568212904Sdim    if (UI.getOperandNo() != 0) return false;
2569218893Sdim
2570193323Sed    // If the source of the memcpy/move is not a constant global, reject it.
2571207618Srdivacky    if (!PointsToConstantGlobal(MI->getSource()))
2572193323Sed      return false;
2573218893Sdim
2574193323Sed    // Otherwise, the transform is safe.  Remember the copy instruction.
2575193323Sed    TheCopy = MI;
2576193323Sed  }
2577193323Sed  return true;
2578193323Sed}
2579193323Sed
2580193323Sed/// isOnlyCopiedFromConstantGlobal - Return true if the specified alloca is only
2581193323Sed/// modified by a copy from a constant global.  If we can prove this, we can
2582193323Sed/// replace any uses of the alloca with uses of the global directly.
2583224145SdimMemTransferInst *
2584224145SdimSROA::isOnlyCopiedFromConstantGlobal(AllocaInst *AI,
2585224145Sdim                                     SmallVector<Instruction*, 4> &ToDelete) {
2586207618Srdivacky  MemTransferInst *TheCopy = 0;
2587224145Sdim  if (::isOnlyCopiedFromConstantGlobal(AI, TheCopy, false, ToDelete))
2588193323Sed    return TheCopy;
2589193323Sed  return 0;
2590193323Sed}
2591