1//===--- Scalarizer.cpp - Scalarize vector operations ---------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass converts vector operations into scalar operations, in order
11// to expose optimization opportunities on the individual scalar operations.
12// It is mainly intended for targets that do not have vector units, but it
13// may also be useful for revectorizing code to different vector widths.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/STLExtras.h"
18#include "llvm/IR/IRBuilder.h"
19#include "llvm/IR/InstVisitor.h"
20#include "llvm/Pass.h"
21#include "llvm/Support/CommandLine.h"
22#include "llvm/Transforms/Scalar.h"
23#include "llvm/Transforms/Utils/BasicBlockUtils.h"
24
25using namespace llvm;
26
27#define DEBUG_TYPE "scalarizer"
28
29namespace {
30// Used to store the scattered form of a vector.
31typedef SmallVector<Value *, 8> ValueVector;
32
33// Used to map a vector Value to its scattered form.  We use std::map
34// because we want iterators to persist across insertion and because the
35// values are relatively large.
36typedef std::map<Value *, ValueVector> ScatterMap;
37
38// Lists Instructions that have been replaced with scalar implementations,
39// along with a pointer to their scattered forms.
40typedef SmallVector<std::pair<Instruction *, ValueVector *>, 16> GatherList;
41
42// Provides a very limited vector-like interface for lazily accessing one
43// component of a scattered vector or vector pointer.
44class Scatterer {
45public:
46  Scatterer() {}
47
48  // Scatter V into Size components.  If new instructions are needed,
49  // insert them before BBI in BB.  If Cache is nonnull, use it to cache
50  // the results.
51  Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
52            ValueVector *cachePtr = nullptr);
53
54  // Return component I, creating a new Value for it if necessary.
55  Value *operator[](unsigned I);
56
57  // Return the number of components.
58  unsigned size() const { return Size; }
59
60private:
61  BasicBlock *BB;
62  BasicBlock::iterator BBI;
63  Value *V;
64  ValueVector *CachePtr;
65  PointerType *PtrTy;
66  ValueVector Tmp;
67  unsigned Size;
68};
69
70// FCmpSpliiter(FCI)(Builder, X, Y, Name) uses Builder to create an FCmp
71// called Name that compares X and Y in the same way as FCI.
72struct FCmpSplitter {
73  FCmpSplitter(FCmpInst &fci) : FCI(fci) {}
74  Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
75                    const Twine &Name) const {
76    return Builder.CreateFCmp(FCI.getPredicate(), Op0, Op1, Name);
77  }
78  FCmpInst &FCI;
79};
80
81// ICmpSpliiter(ICI)(Builder, X, Y, Name) uses Builder to create an ICmp
82// called Name that compares X and Y in the same way as ICI.
83struct ICmpSplitter {
84  ICmpSplitter(ICmpInst &ici) : ICI(ici) {}
85  Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
86                    const Twine &Name) const {
87    return Builder.CreateICmp(ICI.getPredicate(), Op0, Op1, Name);
88  }
89  ICmpInst &ICI;
90};
91
92// BinarySpliiter(BO)(Builder, X, Y, Name) uses Builder to create
93// a binary operator like BO called Name with operands X and Y.
94struct BinarySplitter {
95  BinarySplitter(BinaryOperator &bo) : BO(bo) {}
96  Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
97                    const Twine &Name) const {
98    return Builder.CreateBinOp(BO.getOpcode(), Op0, Op1, Name);
99  }
100  BinaryOperator &BO;
101};
102
103// Information about a load or store that we're scalarizing.
104struct VectorLayout {
105  VectorLayout() : VecTy(nullptr), ElemTy(nullptr), VecAlign(0), ElemSize(0) {}
106
107  // Return the alignment of element I.
108  uint64_t getElemAlign(unsigned I) {
109    return MinAlign(VecAlign, I * ElemSize);
110  }
111
112  // The type of the vector.
113  VectorType *VecTy;
114
115  // The type of each element.
116  Type *ElemTy;
117
118  // The alignment of the vector.
119  uint64_t VecAlign;
120
121  // The size of each element.
122  uint64_t ElemSize;
123};
124
125class Scalarizer : public FunctionPass,
126                   public InstVisitor<Scalarizer, bool> {
127public:
128  static char ID;
129
130  Scalarizer() :
131    FunctionPass(ID) {
132    initializeScalarizerPass(*PassRegistry::getPassRegistry());
133  }
134
135  bool doInitialization(Module &M) override;
136  bool runOnFunction(Function &F) override;
137
138  // InstVisitor methods.  They return true if the instruction was scalarized,
139  // false if nothing changed.
140  bool visitInstruction(Instruction &) { return false; }
141  bool visitSelectInst(SelectInst &SI);
142  bool visitICmpInst(ICmpInst &);
143  bool visitFCmpInst(FCmpInst &);
144  bool visitBinaryOperator(BinaryOperator &);
145  bool visitGetElementPtrInst(GetElementPtrInst &);
146  bool visitCastInst(CastInst &);
147  bool visitBitCastInst(BitCastInst &);
148  bool visitShuffleVectorInst(ShuffleVectorInst &);
149  bool visitPHINode(PHINode &);
150  bool visitLoadInst(LoadInst &);
151  bool visitStoreInst(StoreInst &);
152
153  static void registerOptions() {
154    // This is disabled by default because having separate loads and stores
155    // makes it more likely that the -combiner-alias-analysis limits will be
156    // reached.
157    OptionRegistry::registerOption<bool, Scalarizer,
158                                 &Scalarizer::ScalarizeLoadStore>(
159        "scalarize-load-store",
160        "Allow the scalarizer pass to scalarize loads and store", false);
161  }
162
163private:
164  Scatterer scatter(Instruction *, Value *);
165  void gather(Instruction *, const ValueVector &);
166  bool canTransferMetadata(unsigned Kind);
167  void transferMetadata(Instruction *, const ValueVector &);
168  bool getVectorLayout(Type *, unsigned, VectorLayout &, const DataLayout &);
169  bool finish();
170
171  template<typename T> bool splitBinary(Instruction &, const T &);
172
173  ScatterMap Scattered;
174  GatherList Gathered;
175  unsigned ParallelLoopAccessMDKind;
176  bool ScalarizeLoadStore;
177};
178
179char Scalarizer::ID = 0;
180} // end anonymous namespace
181
182INITIALIZE_PASS_WITH_OPTIONS(Scalarizer, "scalarizer",
183                             "Scalarize vector operations", false, false)
184
185Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
186                     ValueVector *cachePtr)
187  : BB(bb), BBI(bbi), V(v), CachePtr(cachePtr) {
188  Type *Ty = V->getType();
189  PtrTy = dyn_cast<PointerType>(Ty);
190  if (PtrTy)
191    Ty = PtrTy->getElementType();
192  Size = Ty->getVectorNumElements();
193  if (!CachePtr)
194    Tmp.resize(Size, nullptr);
195  else if (CachePtr->empty())
196    CachePtr->resize(Size, nullptr);
197  else
198    assert(Size == CachePtr->size() && "Inconsistent vector sizes");
199}
200
201// Return component I, creating a new Value for it if necessary.
202Value *Scatterer::operator[](unsigned I) {
203  ValueVector &CV = (CachePtr ? *CachePtr : Tmp);
204  // Try to reuse a previous value.
205  if (CV[I])
206    return CV[I];
207  IRBuilder<> Builder(BB, BBI);
208  if (PtrTy) {
209    if (!CV[0]) {
210      Type *Ty =
211        PointerType::get(PtrTy->getElementType()->getVectorElementType(),
212                         PtrTy->getAddressSpace());
213      CV[0] = Builder.CreateBitCast(V, Ty, V->getName() + ".i0");
214    }
215    if (I != 0)
216      CV[I] = Builder.CreateConstGEP1_32(nullptr, CV[0], I,
217                                         V->getName() + ".i" + Twine(I));
218  } else {
219    // Search through a chain of InsertElementInsts looking for element I.
220    // Record other elements in the cache.  The new V is still suitable
221    // for all uncached indices.
222    for (;;) {
223      InsertElementInst *Insert = dyn_cast<InsertElementInst>(V);
224      if (!Insert)
225        break;
226      ConstantInt *Idx = dyn_cast<ConstantInt>(Insert->getOperand(2));
227      if (!Idx)
228        break;
229      unsigned J = Idx->getZExtValue();
230      V = Insert->getOperand(0);
231      if (I == J) {
232        CV[J] = Insert->getOperand(1);
233        return CV[J];
234      } else if (!CV[J]) {
235        // Only cache the first entry we find for each index we're not actively
236        // searching for. This prevents us from going too far up the chain and
237        // caching incorrect entries.
238        CV[J] = Insert->getOperand(1);
239      }
240    }
241    CV[I] = Builder.CreateExtractElement(V, Builder.getInt32(I),
242                                         V->getName() + ".i" + Twine(I));
243  }
244  return CV[I];
245}
246
247bool Scalarizer::doInitialization(Module &M) {
248  ParallelLoopAccessMDKind =
249      M.getContext().getMDKindID("llvm.mem.parallel_loop_access");
250  ScalarizeLoadStore =
251      M.getContext().getOption<bool, Scalarizer, &Scalarizer::ScalarizeLoadStore>();
252  return false;
253}
254
255bool Scalarizer::runOnFunction(Function &F) {
256  assert(Gathered.empty() && Scattered.empty());
257  for (BasicBlock &BB : F) {
258    for (BasicBlock::iterator II = BB.begin(), IE = BB.end(); II != IE;) {
259      Instruction *I = &*II;
260      bool Done = visit(I);
261      ++II;
262      if (Done && I->getType()->isVoidTy())
263        I->eraseFromParent();
264    }
265  }
266  return finish();
267}
268
269// Return a scattered form of V that can be accessed by Point.  V must be a
270// vector or a pointer to a vector.
271Scatterer Scalarizer::scatter(Instruction *Point, Value *V) {
272  if (Argument *VArg = dyn_cast<Argument>(V)) {
273    // Put the scattered form of arguments in the entry block,
274    // so that it can be used everywhere.
275    Function *F = VArg->getParent();
276    BasicBlock *BB = &F->getEntryBlock();
277    return Scatterer(BB, BB->begin(), V, &Scattered[V]);
278  }
279  if (Instruction *VOp = dyn_cast<Instruction>(V)) {
280    // Put the scattered form of an instruction directly after the
281    // instruction.
282    BasicBlock *BB = VOp->getParent();
283    return Scatterer(BB, std::next(BasicBlock::iterator(VOp)),
284                     V, &Scattered[V]);
285  }
286  // In the fallback case, just put the scattered before Point and
287  // keep the result local to Point.
288  return Scatterer(Point->getParent(), Point->getIterator(), V);
289}
290
291// Replace Op with the gathered form of the components in CV.  Defer the
292// deletion of Op and creation of the gathered form to the end of the pass,
293// so that we can avoid creating the gathered form if all uses of Op are
294// replaced with uses of CV.
295void Scalarizer::gather(Instruction *Op, const ValueVector &CV) {
296  // Since we're not deleting Op yet, stub out its operands, so that it
297  // doesn't make anything live unnecessarily.
298  for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I)
299    Op->setOperand(I, UndefValue::get(Op->getOperand(I)->getType()));
300
301  transferMetadata(Op, CV);
302
303  // If we already have a scattered form of Op (created from ExtractElements
304  // of Op itself), replace them with the new form.
305  ValueVector &SV = Scattered[Op];
306  if (!SV.empty()) {
307    for (unsigned I = 0, E = SV.size(); I != E; ++I) {
308      Instruction *Old = cast<Instruction>(SV[I]);
309      CV[I]->takeName(Old);
310      Old->replaceAllUsesWith(CV[I]);
311      Old->eraseFromParent();
312    }
313  }
314  SV = CV;
315  Gathered.push_back(GatherList::value_type(Op, &SV));
316}
317
318// Return true if it is safe to transfer the given metadata tag from
319// vector to scalar instructions.
320bool Scalarizer::canTransferMetadata(unsigned Tag) {
321  return (Tag == LLVMContext::MD_tbaa
322          || Tag == LLVMContext::MD_fpmath
323          || Tag == LLVMContext::MD_tbaa_struct
324          || Tag == LLVMContext::MD_invariant_load
325          || Tag == LLVMContext::MD_alias_scope
326          || Tag == LLVMContext::MD_noalias
327          || Tag == ParallelLoopAccessMDKind);
328}
329
330// Transfer metadata from Op to the instructions in CV if it is known
331// to be safe to do so.
332void Scalarizer::transferMetadata(Instruction *Op, const ValueVector &CV) {
333  SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
334  Op->getAllMetadataOtherThanDebugLoc(MDs);
335  for (unsigned I = 0, E = CV.size(); I != E; ++I) {
336    if (Instruction *New = dyn_cast<Instruction>(CV[I])) {
337      for (SmallVectorImpl<std::pair<unsigned, MDNode *>>::iterator
338               MI = MDs.begin(),
339               ME = MDs.end();
340           MI != ME; ++MI)
341        if (canTransferMetadata(MI->first))
342          New->setMetadata(MI->first, MI->second);
343      New->setDebugLoc(Op->getDebugLoc());
344    }
345  }
346}
347
348// Try to fill in Layout from Ty, returning true on success.  Alignment is
349// the alignment of the vector, or 0 if the ABI default should be used.
350bool Scalarizer::getVectorLayout(Type *Ty, unsigned Alignment,
351                                 VectorLayout &Layout, const DataLayout &DL) {
352  // Make sure we're dealing with a vector.
353  Layout.VecTy = dyn_cast<VectorType>(Ty);
354  if (!Layout.VecTy)
355    return false;
356
357  // Check that we're dealing with full-byte elements.
358  Layout.ElemTy = Layout.VecTy->getElementType();
359  if (DL.getTypeSizeInBits(Layout.ElemTy) !=
360      DL.getTypeStoreSizeInBits(Layout.ElemTy))
361    return false;
362
363  if (Alignment)
364    Layout.VecAlign = Alignment;
365  else
366    Layout.VecAlign = DL.getABITypeAlignment(Layout.VecTy);
367  Layout.ElemSize = DL.getTypeStoreSize(Layout.ElemTy);
368  return true;
369}
370
371// Scalarize two-operand instruction I, using Split(Builder, X, Y, Name)
372// to create an instruction like I with operands X and Y and name Name.
373template<typename Splitter>
374bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) {
375  VectorType *VT = dyn_cast<VectorType>(I.getType());
376  if (!VT)
377    return false;
378
379  unsigned NumElems = VT->getNumElements();
380  IRBuilder<> Builder(&I);
381  Scatterer Op0 = scatter(&I, I.getOperand(0));
382  Scatterer Op1 = scatter(&I, I.getOperand(1));
383  assert(Op0.size() == NumElems && "Mismatched binary operation");
384  assert(Op1.size() == NumElems && "Mismatched binary operation");
385  ValueVector Res;
386  Res.resize(NumElems);
387  for (unsigned Elem = 0; Elem < NumElems; ++Elem)
388    Res[Elem] = Split(Builder, Op0[Elem], Op1[Elem],
389                      I.getName() + ".i" + Twine(Elem));
390  gather(&I, Res);
391  return true;
392}
393
394bool Scalarizer::visitSelectInst(SelectInst &SI) {
395  VectorType *VT = dyn_cast<VectorType>(SI.getType());
396  if (!VT)
397    return false;
398
399  unsigned NumElems = VT->getNumElements();
400  IRBuilder<> Builder(&SI);
401  Scatterer Op1 = scatter(&SI, SI.getOperand(1));
402  Scatterer Op2 = scatter(&SI, SI.getOperand(2));
403  assert(Op1.size() == NumElems && "Mismatched select");
404  assert(Op2.size() == NumElems && "Mismatched select");
405  ValueVector Res;
406  Res.resize(NumElems);
407
408  if (SI.getOperand(0)->getType()->isVectorTy()) {
409    Scatterer Op0 = scatter(&SI, SI.getOperand(0));
410    assert(Op0.size() == NumElems && "Mismatched select");
411    for (unsigned I = 0; I < NumElems; ++I)
412      Res[I] = Builder.CreateSelect(Op0[I], Op1[I], Op2[I],
413                                    SI.getName() + ".i" + Twine(I));
414  } else {
415    Value *Op0 = SI.getOperand(0);
416    for (unsigned I = 0; I < NumElems; ++I)
417      Res[I] = Builder.CreateSelect(Op0, Op1[I], Op2[I],
418                                    SI.getName() + ".i" + Twine(I));
419  }
420  gather(&SI, Res);
421  return true;
422}
423
424bool Scalarizer::visitICmpInst(ICmpInst &ICI) {
425  return splitBinary(ICI, ICmpSplitter(ICI));
426}
427
428bool Scalarizer::visitFCmpInst(FCmpInst &FCI) {
429  return splitBinary(FCI, FCmpSplitter(FCI));
430}
431
432bool Scalarizer::visitBinaryOperator(BinaryOperator &BO) {
433  return splitBinary(BO, BinarySplitter(BO));
434}
435
436bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
437  VectorType *VT = dyn_cast<VectorType>(GEPI.getType());
438  if (!VT)
439    return false;
440
441  IRBuilder<> Builder(&GEPI);
442  unsigned NumElems = VT->getNumElements();
443  unsigned NumIndices = GEPI.getNumIndices();
444
445  Scatterer Base = scatter(&GEPI, GEPI.getOperand(0));
446
447  SmallVector<Scatterer, 8> Ops;
448  Ops.resize(NumIndices);
449  for (unsigned I = 0; I < NumIndices; ++I)
450    Ops[I] = scatter(&GEPI, GEPI.getOperand(I + 1));
451
452  ValueVector Res;
453  Res.resize(NumElems);
454  for (unsigned I = 0; I < NumElems; ++I) {
455    SmallVector<Value *, 8> Indices;
456    Indices.resize(NumIndices);
457    for (unsigned J = 0; J < NumIndices; ++J)
458      Indices[J] = Ops[J][I];
459    Res[I] = Builder.CreateGEP(GEPI.getSourceElementType(), Base[I], Indices,
460                               GEPI.getName() + ".i" + Twine(I));
461    if (GEPI.isInBounds())
462      if (GetElementPtrInst *NewGEPI = dyn_cast<GetElementPtrInst>(Res[I]))
463        NewGEPI->setIsInBounds();
464  }
465  gather(&GEPI, Res);
466  return true;
467}
468
469bool Scalarizer::visitCastInst(CastInst &CI) {
470  VectorType *VT = dyn_cast<VectorType>(CI.getDestTy());
471  if (!VT)
472    return false;
473
474  unsigned NumElems = VT->getNumElements();
475  IRBuilder<> Builder(&CI);
476  Scatterer Op0 = scatter(&CI, CI.getOperand(0));
477  assert(Op0.size() == NumElems && "Mismatched cast");
478  ValueVector Res;
479  Res.resize(NumElems);
480  for (unsigned I = 0; I < NumElems; ++I)
481    Res[I] = Builder.CreateCast(CI.getOpcode(), Op0[I], VT->getElementType(),
482                                CI.getName() + ".i" + Twine(I));
483  gather(&CI, Res);
484  return true;
485}
486
487bool Scalarizer::visitBitCastInst(BitCastInst &BCI) {
488  VectorType *DstVT = dyn_cast<VectorType>(BCI.getDestTy());
489  VectorType *SrcVT = dyn_cast<VectorType>(BCI.getSrcTy());
490  if (!DstVT || !SrcVT)
491    return false;
492
493  unsigned DstNumElems = DstVT->getNumElements();
494  unsigned SrcNumElems = SrcVT->getNumElements();
495  IRBuilder<> Builder(&BCI);
496  Scatterer Op0 = scatter(&BCI, BCI.getOperand(0));
497  ValueVector Res;
498  Res.resize(DstNumElems);
499
500  if (DstNumElems == SrcNumElems) {
501    for (unsigned I = 0; I < DstNumElems; ++I)
502      Res[I] = Builder.CreateBitCast(Op0[I], DstVT->getElementType(),
503                                     BCI.getName() + ".i" + Twine(I));
504  } else if (DstNumElems > SrcNumElems) {
505    // <M x t1> -> <N*M x t2>.  Convert each t1 to <N x t2> and copy the
506    // individual elements to the destination.
507    unsigned FanOut = DstNumElems / SrcNumElems;
508    Type *MidTy = VectorType::get(DstVT->getElementType(), FanOut);
509    unsigned ResI = 0;
510    for (unsigned Op0I = 0; Op0I < SrcNumElems; ++Op0I) {
511      Value *V = Op0[Op0I];
512      Instruction *VI;
513      // Look through any existing bitcasts before converting to <N x t2>.
514      // In the best case, the resulting conversion might be a no-op.
515      while ((VI = dyn_cast<Instruction>(V)) &&
516             VI->getOpcode() == Instruction::BitCast)
517        V = VI->getOperand(0);
518      V = Builder.CreateBitCast(V, MidTy, V->getName() + ".cast");
519      Scatterer Mid = scatter(&BCI, V);
520      for (unsigned MidI = 0; MidI < FanOut; ++MidI)
521        Res[ResI++] = Mid[MidI];
522    }
523  } else {
524    // <N*M x t1> -> <M x t2>.  Convert each group of <N x t1> into a t2.
525    unsigned FanIn = SrcNumElems / DstNumElems;
526    Type *MidTy = VectorType::get(SrcVT->getElementType(), FanIn);
527    unsigned Op0I = 0;
528    for (unsigned ResI = 0; ResI < DstNumElems; ++ResI) {
529      Value *V = UndefValue::get(MidTy);
530      for (unsigned MidI = 0; MidI < FanIn; ++MidI)
531        V = Builder.CreateInsertElement(V, Op0[Op0I++], Builder.getInt32(MidI),
532                                        BCI.getName() + ".i" + Twine(ResI)
533                                        + ".upto" + Twine(MidI));
534      Res[ResI] = Builder.CreateBitCast(V, DstVT->getElementType(),
535                                        BCI.getName() + ".i" + Twine(ResI));
536    }
537  }
538  gather(&BCI, Res);
539  return true;
540}
541
542bool Scalarizer::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
543  VectorType *VT = dyn_cast<VectorType>(SVI.getType());
544  if (!VT)
545    return false;
546
547  unsigned NumElems = VT->getNumElements();
548  Scatterer Op0 = scatter(&SVI, SVI.getOperand(0));
549  Scatterer Op1 = scatter(&SVI, SVI.getOperand(1));
550  ValueVector Res;
551  Res.resize(NumElems);
552
553  for (unsigned I = 0; I < NumElems; ++I) {
554    int Selector = SVI.getMaskValue(I);
555    if (Selector < 0)
556      Res[I] = UndefValue::get(VT->getElementType());
557    else if (unsigned(Selector) < Op0.size())
558      Res[I] = Op0[Selector];
559    else
560      Res[I] = Op1[Selector - Op0.size()];
561  }
562  gather(&SVI, Res);
563  return true;
564}
565
566bool Scalarizer::visitPHINode(PHINode &PHI) {
567  VectorType *VT = dyn_cast<VectorType>(PHI.getType());
568  if (!VT)
569    return false;
570
571  unsigned NumElems = VT->getNumElements();
572  IRBuilder<> Builder(&PHI);
573  ValueVector Res;
574  Res.resize(NumElems);
575
576  unsigned NumOps = PHI.getNumOperands();
577  for (unsigned I = 0; I < NumElems; ++I)
578    Res[I] = Builder.CreatePHI(VT->getElementType(), NumOps,
579                               PHI.getName() + ".i" + Twine(I));
580
581  for (unsigned I = 0; I < NumOps; ++I) {
582    Scatterer Op = scatter(&PHI, PHI.getIncomingValue(I));
583    BasicBlock *IncomingBlock = PHI.getIncomingBlock(I);
584    for (unsigned J = 0; J < NumElems; ++J)
585      cast<PHINode>(Res[J])->addIncoming(Op[J], IncomingBlock);
586  }
587  gather(&PHI, Res);
588  return true;
589}
590
591bool Scalarizer::visitLoadInst(LoadInst &LI) {
592  if (!ScalarizeLoadStore)
593    return false;
594  if (!LI.isSimple())
595    return false;
596
597  VectorLayout Layout;
598  if (!getVectorLayout(LI.getType(), LI.getAlignment(), Layout,
599                       LI.getModule()->getDataLayout()))
600    return false;
601
602  unsigned NumElems = Layout.VecTy->getNumElements();
603  IRBuilder<> Builder(&LI);
604  Scatterer Ptr = scatter(&LI, LI.getPointerOperand());
605  ValueVector Res;
606  Res.resize(NumElems);
607
608  for (unsigned I = 0; I < NumElems; ++I)
609    Res[I] = Builder.CreateAlignedLoad(Ptr[I], Layout.getElemAlign(I),
610                                       LI.getName() + ".i" + Twine(I));
611  gather(&LI, Res);
612  return true;
613}
614
615bool Scalarizer::visitStoreInst(StoreInst &SI) {
616  if (!ScalarizeLoadStore)
617    return false;
618  if (!SI.isSimple())
619    return false;
620
621  VectorLayout Layout;
622  Value *FullValue = SI.getValueOperand();
623  if (!getVectorLayout(FullValue->getType(), SI.getAlignment(), Layout,
624                       SI.getModule()->getDataLayout()))
625    return false;
626
627  unsigned NumElems = Layout.VecTy->getNumElements();
628  IRBuilder<> Builder(&SI);
629  Scatterer Ptr = scatter(&SI, SI.getPointerOperand());
630  Scatterer Val = scatter(&SI, FullValue);
631
632  ValueVector Stores;
633  Stores.resize(NumElems);
634  for (unsigned I = 0; I < NumElems; ++I) {
635    unsigned Align = Layout.getElemAlign(I);
636    Stores[I] = Builder.CreateAlignedStore(Val[I], Ptr[I], Align);
637  }
638  transferMetadata(&SI, Stores);
639  return true;
640}
641
642// Delete the instructions that we scalarized.  If a full vector result
643// is still needed, recreate it using InsertElements.
644bool Scalarizer::finish() {
645  // The presence of data in Gathered or Scattered indicates changes
646  // made to the Function.
647  if (Gathered.empty() && Scattered.empty())
648    return false;
649  for (GatherList::iterator GMI = Gathered.begin(), GME = Gathered.end();
650       GMI != GME; ++GMI) {
651    Instruction *Op = GMI->first;
652    ValueVector &CV = *GMI->second;
653    if (!Op->use_empty()) {
654      // The value is still needed, so recreate it using a series of
655      // InsertElements.
656      Type *Ty = Op->getType();
657      Value *Res = UndefValue::get(Ty);
658      BasicBlock *BB = Op->getParent();
659      unsigned Count = Ty->getVectorNumElements();
660      IRBuilder<> Builder(Op);
661      if (isa<PHINode>(Op))
662        Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
663      for (unsigned I = 0; I < Count; ++I)
664        Res = Builder.CreateInsertElement(Res, CV[I], Builder.getInt32(I),
665                                          Op->getName() + ".upto" + Twine(I));
666      Res->takeName(Op);
667      Op->replaceAllUsesWith(Res);
668    }
669    Op->eraseFromParent();
670  }
671  Gathered.clear();
672  Scattered.clear();
673  return true;
674}
675
676FunctionPass *llvm::createScalarizerPass() {
677  return new Scalarizer();
678}
679