VecUtils.cpp revision 251607
191094Sdes//===- VecUtils.cpp --- Vectorization Utilities ---------------------------===//
292289Sdes//
391094Sdes//                     The LLVM Compiler Infrastructure
491094Sdes//
591094Sdes// This file is distributed under the University of Illinois Open Source
699158Sdes// License. See LICENSE.TXT for details.
799158Sdes//
899158Sdes//===----------------------------------------------------------------------===//
991094Sdes#define DEBUG_TYPE "SLP"
1091094Sdes
1191094Sdes#include "VecUtils.h"
1291094Sdes#include "llvm/ADT/DenseMap.h"
1391094Sdes#include "llvm/ADT/SmallPtrSet.h"
1491094Sdes#include "llvm/ADT/SmallSet.h"
1591094Sdes#include "llvm/ADT/SmallVector.h"
1691094Sdes#include "llvm/Analysis/AliasAnalysis.h"
1791094Sdes#include "llvm/Analysis/ScalarEvolution.h"
1891094Sdes#include "llvm/Analysis/ScalarEvolutionExpressions.h"
1991094Sdes#include "llvm/Analysis/TargetTransformInfo.h"
2091094Sdes#include "llvm/Analysis/Verifier.h"
2191094Sdes#include "llvm/Analysis/LoopInfo.h"
2291094Sdes#include "llvm/IR/Constants.h"
2391094Sdes#include "llvm/IR/DataLayout.h"
2491094Sdes#include "llvm/IR/Function.h"
2591094Sdes#include "llvm/IR/Instructions.h"
2691094Sdes#include "llvm/IR/Module.h"
2791094Sdes#include "llvm/IR/Type.h"
2891094Sdes#include "llvm/IR/Value.h"
2991094Sdes#include "llvm/Pass.h"
3091094Sdes#include "llvm/Support/CommandLine.h"
3191094Sdes#include "llvm/Support/Debug.h"
3291094Sdes#include "llvm/Support/raw_ostream.h"
3391094Sdes#include "llvm/Target/TargetLibraryInfo.h"
3499158Sdes#include "llvm/Transforms/Scalar.h"
35#include "llvm/Transforms/Utils/Local.h"
36#include <algorithm>
37#include <map>
38
39using namespace llvm;
40
41static const unsigned MinVecRegSize = 128;
42
43static const unsigned RecursionMaxDepth = 6;
44
45namespace llvm {
46
47BoUpSLP::BoUpSLP(BasicBlock *Bb, ScalarEvolution *S, DataLayout *Dl,
48                 TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp) :
49  BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa), L(Lp)  {
50  numberInstructions();
51}
52
53void BoUpSLP::numberInstructions() {
54  int Loc = 0;
55  InstrIdx.clear();
56  InstrVec.clear();
57  // Number the instructions in the block.
58  for (BasicBlock::iterator it=BB->begin(), e=BB->end(); it != e; ++it) {
59    InstrIdx[it] = Loc++;
60    InstrVec.push_back(it);
61    assert(InstrVec[InstrIdx[it]] == it && "Invalid allocation");
62  }
63}
64
65Value *BoUpSLP::getPointerOperand(Value *I) {
66  if (LoadInst *LI = dyn_cast<LoadInst>(I)) return LI->getPointerOperand();
67  if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getPointerOperand();
68  return 0;
69}
70
71unsigned BoUpSLP::getAddressSpaceOperand(Value *I) {
72  if (LoadInst *L=dyn_cast<LoadInst>(I)) return L->getPointerAddressSpace();
73  if (StoreInst *S=dyn_cast<StoreInst>(I)) return S->getPointerAddressSpace();
74  return -1;
75}
76
77bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {
78  Value *PtrA = getPointerOperand(A);
79  Value *PtrB = getPointerOperand(B);
80  unsigned ASA = getAddressSpaceOperand(A);
81  unsigned ASB = getAddressSpaceOperand(B);
82
83  // Check that the address spaces match and that the pointers are valid.
84  if (!PtrA || !PtrB || (ASA != ASB)) return false;
85
86  // Check that A and B are of the same type.
87  if (PtrA->getType() != PtrB->getType()) return false;
88
89  // Calculate the distance.
90  const SCEV *PtrSCEVA = SE->getSCEV(PtrA);
91  const SCEV *PtrSCEVB = SE->getSCEV(PtrB);
92  const SCEV *OffsetSCEV = SE->getMinusSCEV(PtrSCEVA, PtrSCEVB);
93  const SCEVConstant *ConstOffSCEV = dyn_cast<SCEVConstant>(OffsetSCEV);
94
95  // Non constant distance.
96  if (!ConstOffSCEV) return false;
97
98  int64_t Offset = ConstOffSCEV->getValue()->getSExtValue();
99  Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
100  // The Instructions are connsecutive if the size of the first load/store is
101  // the same as the offset.
102  int64_t Sz = DL->getTypeStoreSize(Ty);
103  return ((-Offset) == Sz);
104}
105
106bool BoUpSLP::vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold) {
107  Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType();
108  unsigned Sz = DL->getTypeSizeInBits(StoreTy);
109  unsigned VF = MinVecRegSize / Sz;
110
111  if (!isPowerOf2_32(Sz) || VF < 2) return false;
112
113  bool Changed = false;
114  // Look for profitable vectorizable trees at all offsets, starting at zero.
115  for (unsigned i = 0, e = Chain.size(); i < e; ++i) {
116    if (i + VF > e) return Changed;
117    DEBUG(dbgs()<<"SLP: Analyzing " << VF << " stores at offset "<< i << "\n");
118    ArrayRef<Value *> Operands = Chain.slice(i, VF);
119
120    int Cost = getTreeCost(Operands);
121    DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n");
122    if (Cost < CostThreshold) {
123      DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n");
124      vectorizeTree(Operands, VF);
125      i += VF - 1;
126      Changed = true;
127    }
128  }
129
130  return Changed;
131}
132
133bool BoUpSLP::vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold) {
134  ValueSet Heads, Tails;
135  SmallDenseMap<Value*, Value*> ConsecutiveChain;
136
137  // We may run into multiple chains that merge into a single chain. We mark the
138  // stores that we vectorized so that we don't visit the same store twice.
139  ValueSet VectorizedStores;
140  bool Changed = false;
141
142  // Do a quadratic search on all of the given stores and find
143  // all of the pairs of loads that follow each other.
144  for (unsigned i = 0, e = Stores.size(); i < e; ++i)
145    for (unsigned j = 0; j < e; ++j) {
146      if (i == j) continue;
147      if (isConsecutiveAccess(Stores[i], Stores[j])) {
148        Tails.insert(Stores[j]);
149        Heads.insert(Stores[i]);
150        ConsecutiveChain[Stores[i]] = Stores[j];
151      }
152    }
153
154  // For stores that start but don't end a link in the chain:
155  for (ValueSet::iterator it = Heads.begin(), e = Heads.end();it != e; ++it) {
156    if (Tails.count(*it)) continue;
157
158    // We found a store instr that starts a chain. Now follow the chain and try
159    // to vectorize it.
160    ValueList Operands;
161    Value *I = *it;
162    // Collect the chain into a list.
163    while (Tails.count(I) || Heads.count(I)) {
164      if (VectorizedStores.count(I)) break;
165      Operands.push_back(I);
166      // Move to the next value in the chain.
167      I = ConsecutiveChain[I];
168    }
169
170    bool Vectorized = vectorizeStoreChain(Operands, costThreshold);
171
172    // Mark the vectorized stores so that we don't vectorize them again.
173    if (Vectorized)
174      VectorizedStores.insert(Operands.begin(), Operands.end());
175    Changed |= Vectorized;
176  }
177
178  return Changed;
179}
180
181int BoUpSLP::getScalarizationCost(ArrayRef<Value *> VL) {
182  // Find the type of the operands in VL.
183  Type *ScalarTy = VL[0]->getType();
184  if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
185    ScalarTy = SI->getValueOperand()->getType();
186  VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
187  // Find the cost of inserting/extracting values from the vector.
188  return getScalarizationCost(VecTy);
189}
190
191int BoUpSLP::getScalarizationCost(Type *Ty) {
192  int Cost = 0;
193  for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i)
194    Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
195  return Cost;
196}
197
198AliasAnalysis::Location BoUpSLP::getLocation(Instruction *I) {
199  if (StoreInst *SI = dyn_cast<StoreInst>(I)) return AA->getLocation(SI);
200  if (LoadInst *LI = dyn_cast<LoadInst>(I)) return AA->getLocation(LI);
201  return AliasAnalysis::Location();
202}
203
204Value *BoUpSLP::isUnsafeToSink(Instruction *Src, Instruction *Dst) {
205  assert(Src->getParent() == Dst->getParent() && "Not the same BB");
206  BasicBlock::iterator I = Src, E = Dst;
207  /// Scan all of the instruction from SRC to DST and check if
208  /// the source may alias.
209  for (++I; I != E; ++I) {
210    // Ignore store instructions that are marked as 'ignore'.
211    if (MemBarrierIgnoreList.count(I)) continue;
212    if (Src->mayWriteToMemory()) /* Write */ {
213      if (!I->mayReadOrWriteMemory()) continue;
214    } else /* Read */ {
215      if (!I->mayWriteToMemory()) continue;
216    }
217    AliasAnalysis::Location A = getLocation(&*I);
218    AliasAnalysis::Location B = getLocation(Src);
219
220    if (!A.Ptr || !B.Ptr || AA->alias(A, B))
221      return I;
222  }
223  return 0;
224}
225
226void BoUpSLP::vectorizeArith(ArrayRef<Value *> Operands) {
227  Value *Vec = vectorizeTree(Operands, Operands.size());
228  BasicBlock::iterator Loc = cast<Instruction>(Vec);
229  IRBuilder<> Builder(++Loc);
230  // After vectorizing the operands we need to generate extractelement
231  // instructions and replace all of the uses of the scalar values with
232  // the values that we extracted from the vectorized tree.
233  for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
234    Value *S = Builder.CreateExtractElement(Vec, Builder.getInt32(i));
235    Operands[i]->replaceAllUsesWith(S);
236  }
237}
238
239int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) {
240  // Get rid of the list of stores that were removed, and from the
241  // lists of instructions with multiple users.
242  MemBarrierIgnoreList.clear();
243  LaneMap.clear();
244  MultiUserVals.clear();
245  MustScalarize.clear();
246
247  // Scan the tree and find which value is used by which lane, and which values
248  // must be scalarized.
249  getTreeUses_rec(VL, 0);
250
251  // Check that instructions with multiple users can be vectorized. Mark unsafe
252  // instructions.
253  for (ValueSet::iterator it = MultiUserVals.begin(),
254       e = MultiUserVals.end(); it != e; ++it) {
255    // Check that all of the users of this instr are within the tree
256    // and that they are all from the same lane.
257    int Lane = -1;
258    for (Value::use_iterator I = (*it)->use_begin(), E = (*it)->use_end();
259         I != E; ++I) {
260      if (LaneMap.find(*I) == LaneMap.end()) {
261        MustScalarize.insert(*it);
262        DEBUG(dbgs()<<"SLP: Adding " << **it <<
263              " to MustScalarize because of an out of tree usage.\n");
264        break;
265      }
266      if (Lane == -1) Lane = LaneMap[*I];
267      if (Lane != LaneMap[*I]) {
268        MustScalarize.insert(*it);
269        DEBUG(dbgs()<<"Adding " << **it <<
270              " to MustScalarize because multiple lane use it: "
271              << Lane << " and " << LaneMap[*I] << ".\n");
272        break;
273      }
274    }
275  }
276
277  // Now calculate the cost of vectorizing the tree.
278  return getTreeCost_rec(VL, 0);
279}
280
281void BoUpSLP::getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth) {
282  if (Depth == RecursionMaxDepth) return;
283
284  // Don't handle vectors.
285  if (VL[0]->getType()->isVectorTy()) return;
286  if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
287    if (SI->getValueOperand()->getType()->isVectorTy()) return;
288
289  // Check if all of the operands are constants.
290  bool AllConst = true;
291  bool AllSameScalar = true;
292  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
293    AllConst &= isa<Constant>(VL[i]);
294    AllSameScalar &= (VL[0] == VL[i]);
295    Instruction *I = dyn_cast<Instruction>(VL[i]);
296    // If one of the instructions is out of this BB, we need to scalarize all.
297    if (I && I->getParent() != BB) return;
298  }
299
300  // If all of the operands are identical or constant we have a simple solution.
301  if (AllConst || AllSameScalar) return;
302
303  // Scalarize unknown structures.
304  Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
305  if (!VL0) return;
306
307  unsigned Opcode = VL0->getOpcode();
308  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
309    Instruction *I = dyn_cast<Instruction>(VL[i]);
310    // If not all of the instructions are identical then we have to scalarize.
311    if (!I || Opcode != I->getOpcode()) return;
312  }
313
314  // Mark instructions with multiple users.
315  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
316    Instruction *I = dyn_cast<Instruction>(VL[i]);
317    // Remember to check if all of the users of this instr are vectorized
318    // within our tree.
319    if (I && I->getNumUses() > 1) MultiUserVals.insert(I);
320  }
321
322  for (int i = 0, e = VL.size(); i < e; ++i) {
323    // Check that the instruction is only used within
324    // one lane.
325    if (LaneMap.count(VL[i]) && LaneMap[VL[i]] != i) return;
326    // Make this instruction as 'seen' and remember the lane.
327    LaneMap[VL[i]] = i;
328  }
329
330  switch (Opcode) {
331    case Instruction::ZExt:
332    case Instruction::SExt:
333    case Instruction::FPToUI:
334    case Instruction::FPToSI:
335    case Instruction::FPExt:
336    case Instruction::PtrToInt:
337    case Instruction::IntToPtr:
338    case Instruction::SIToFP:
339    case Instruction::UIToFP:
340    case Instruction::Trunc:
341    case Instruction::FPTrunc:
342    case Instruction::BitCast:
343    case Instruction::Add:
344    case Instruction::FAdd:
345    case Instruction::Sub:
346    case Instruction::FSub:
347    case Instruction::Mul:
348    case Instruction::FMul:
349    case Instruction::UDiv:
350    case Instruction::SDiv:
351    case Instruction::FDiv:
352    case Instruction::URem:
353    case Instruction::SRem:
354    case Instruction::FRem:
355    case Instruction::Shl:
356    case Instruction::LShr:
357    case Instruction::AShr:
358    case Instruction::And:
359    case Instruction::Or:
360    case Instruction::Xor: {
361      for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
362        ValueList Operands;
363        // Prepare the operand vector.
364        for (unsigned j = 0; j < VL.size(); ++j)
365          Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
366
367        getTreeUses_rec(Operands, Depth+1);
368      }
369      return;
370    }
371    case Instruction::Store: {
372      ValueList Operands;
373      for (unsigned j = 0; j < VL.size(); ++j)
374        Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
375      getTreeUses_rec(Operands, Depth+1);
376      return;
377    }
378    default:
379    return;
380  }
381}
382
383int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
384  Type *ScalarTy = VL[0]->getType();
385
386  if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
387    ScalarTy = SI->getValueOperand()->getType();
388
389  /// Don't mess with vectors.
390  if (ScalarTy->isVectorTy()) return max_cost;
391  VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
392
393  if (Depth == RecursionMaxDepth) return getScalarizationCost(VecTy);
394
395  // Check if all of the operands are constants.
396  bool AllConst = true;
397  bool AllSameScalar = true;
398  bool MustScalarizeFlag = false;
399  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
400    AllConst &= isa<Constant>(VL[i]);
401    AllSameScalar &= (VL[0] == VL[i]);
402    // Must have a single use.
403    Instruction *I = dyn_cast<Instruction>(VL[i]);
404    MustScalarizeFlag |= MustScalarize.count(VL[i]);
405    // This instruction is outside the basic block.
406    if (I && I->getParent() != BB)
407      return getScalarizationCost(VecTy);
408  }
409
410  // Is this a simple vector constant.
411  if (AllConst) return 0;
412
413  // If all of the operands are identical we can broadcast them.
414  Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
415  if (AllSameScalar) {
416    // If we are in a loop, and this is not an instruction (e.g. constant or
417    // argument) or the instruction is defined outside the loop then assume
418    // that the cost is zero.
419    if (L && (!VL0 || !L->contains(VL0)))
420      return 0;
421
422    // We need to broadcast the scalar.
423    return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0);
424  }
425
426  // If this is not a constant, or a scalar from outside the loop then we
427  // need to scalarize it.
428  if (MustScalarizeFlag)
429    return getScalarizationCost(VecTy);
430
431  if (!VL0) return getScalarizationCost(VecTy);
432  assert(VL0->getParent() == BB && "Wrong BB");
433
434  unsigned Opcode = VL0->getOpcode();
435  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
436    Instruction *I = dyn_cast<Instruction>(VL[i]);
437    // If not all of the instructions are identical then we have to scalarize.
438    if (!I || Opcode != I->getOpcode()) return getScalarizationCost(VecTy);
439  }
440
441  // Check if it is safe to sink the loads or the stores.
442  if (Opcode == Instruction::Load || Opcode == Instruction::Store) {
443    int MaxIdx = InstrIdx[VL0];
444    for (unsigned i = 1, e = VL.size(); i < e; ++i )
445      MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
446
447    Instruction *Last = InstrVec[MaxIdx];
448    for (unsigned i = 0, e = VL.size(); i < e; ++i ) {
449      if (VL[i] == Last) continue;
450      Value *Barrier = isUnsafeToSink(cast<Instruction>(VL[i]), Last);
451      if (Barrier) {
452        DEBUG(dbgs() << "SLP: Can't sink " << *VL[i] << "\n down to " <<
453              *Last << "\n because of " << *Barrier << "\n");
454        return max_cost;
455      }
456    }
457  }
458
459  switch (Opcode) {
460  case Instruction::ZExt:
461  case Instruction::SExt:
462  case Instruction::FPToUI:
463  case Instruction::FPToSI:
464  case Instruction::FPExt:
465  case Instruction::PtrToInt:
466  case Instruction::IntToPtr:
467  case Instruction::SIToFP:
468  case Instruction::UIToFP:
469  case Instruction::Trunc:
470  case Instruction::FPTrunc:
471  case Instruction::BitCast: {
472    int Cost = 0;
473    ValueList Operands;
474    Type *SrcTy = VL0->getOperand(0)->getType();
475    // Prepare the operand vector.
476    for (unsigned j = 0; j < VL.size(); ++j) {
477      Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
478      // Check that the casted type is the same for all users.
479      if (cast<Instruction>(VL[j])->getOperand(0)->getType() != SrcTy)
480        return getScalarizationCost(VecTy);
481    }
482
483    Cost += getTreeCost_rec(Operands, Depth+1);
484    if (Cost >= max_cost) return max_cost;
485
486    // Calculate the cost of this instruction.
487    int ScalarCost = VL.size() * TTI->getCastInstrCost(VL0->getOpcode(),
488                                                       VL0->getType(), SrcTy);
489
490    VectorType *SrcVecTy = VectorType::get(SrcTy, VL.size());
491    int VecCost = TTI->getCastInstrCost(VL0->getOpcode(), VecTy, SrcVecTy);
492    Cost += (VecCost - ScalarCost);
493    return Cost;
494  }
495  case Instruction::Add:
496  case Instruction::FAdd:
497  case Instruction::Sub:
498  case Instruction::FSub:
499  case Instruction::Mul:
500  case Instruction::FMul:
501  case Instruction::UDiv:
502  case Instruction::SDiv:
503  case Instruction::FDiv:
504  case Instruction::URem:
505  case Instruction::SRem:
506  case Instruction::FRem:
507  case Instruction::Shl:
508  case Instruction::LShr:
509  case Instruction::AShr:
510  case Instruction::And:
511  case Instruction::Or:
512  case Instruction::Xor: {
513    int Cost = 0;
514    // Calculate the cost of all of the operands.
515    for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
516      ValueList Operands;
517      // Prepare the operand vector.
518      for (unsigned j = 0; j < VL.size(); ++j)
519        Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
520
521      Cost += getTreeCost_rec(Operands, Depth+1);
522      if (Cost >= max_cost) return max_cost;
523    }
524
525    // Calculate the cost of this instruction.
526    int ScalarCost = VecTy->getNumElements() *
527      TTI->getArithmeticInstrCost(Opcode, ScalarTy);
528
529    int VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy);
530    Cost += (VecCost - ScalarCost);
531    return Cost;
532  }
533  case Instruction::Load: {
534    // If we are scalarize the loads, add the cost of forming the vector.
535    for (unsigned i = 0, e = VL.size()-1; i < e; ++i)
536      if (!isConsecutiveAccess(VL[i], VL[i+1]))
537        return getScalarizationCost(VecTy);
538
539    // Cost of wide load - cost of scalar loads.
540    int ScalarLdCost = VecTy->getNumElements() *
541      TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
542    int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
543    return VecLdCost - ScalarLdCost;
544  }
545  case Instruction::Store: {
546    // We know that we can merge the stores. Calculate the cost.
547    int ScalarStCost = VecTy->getNumElements() *
548      TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);
549    int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1,0);
550    int StoreCost = VecStCost - ScalarStCost;
551
552    ValueList Operands;
553    for (unsigned j = 0; j < VL.size(); ++j) {
554      Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
555      MemBarrierIgnoreList.insert(VL[j]);
556    }
557
558    int TotalCost = StoreCost + getTreeCost_rec(Operands, Depth + 1);
559    return TotalCost;
560  }
561  default:
562    // Unable to vectorize unknown instructions.
563    return getScalarizationCost(VecTy);
564  }
565}
566
567Instruction *BoUpSLP::GetLastInstr(ArrayRef<Value *> VL, unsigned VF) {
568  int MaxIdx = InstrIdx[BB->getFirstNonPHI()];
569  for (unsigned i = 0; i < VF; ++i )
570    MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
571  return InstrVec[MaxIdx + 1];
572}
573
574Value *BoUpSLP::Scalarize(ArrayRef<Value *> VL, VectorType *Ty) {
575  IRBuilder<> Builder(GetLastInstr(VL, Ty->getNumElements()));
576  Value *Vec = UndefValue::get(Ty);
577  for (unsigned i=0; i < Ty->getNumElements(); ++i) {
578    // Generate the 'InsertElement' instruction.
579    Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
580    // Remember that this instruction is used as part of a 'gather' sequence.
581    // The caller of the bottom-up slp vectorizer can try to hoist the sequence
582    // if the users are outside of the basic block.
583    GatherInstructions.push_back(Vec);
584  }
585
586  return Vec;
587}
588
589Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) {
590  Value *V = vectorizeTree_rec(VL, VF);
591  // We moved some instructions around. We have to number them again
592  // before we can do any analysis.
593  numberInstructions();
594  MustScalarize.clear();
595  return V;
596}
597
598Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
599  Type *ScalarTy = VL[0]->getType();
600  if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
601    ScalarTy = SI->getValueOperand()->getType();
602  VectorType *VecTy = VectorType::get(ScalarTy, VF);
603
604  // Check if all of the operands are constants or identical.
605  bool AllConst = true;
606  bool AllSameScalar = true;
607  for (unsigned i = 0, e = VF; i < e; ++i) {
608    AllConst &= isa<Constant>(VL[i]);
609    AllSameScalar &= (VL[0] == VL[i]);
610    // The instruction must be in the same BB, and it must be vectorizable.
611    Instruction *I = dyn_cast<Instruction>(VL[i]);
612    if (MustScalarize.count(VL[i]) || (I && I->getParent() != BB))
613      return Scalarize(VL, VecTy);
614  }
615
616  // Check that this is a simple vector constant.
617  if (AllConst || AllSameScalar) return Scalarize(VL, VecTy);
618
619  // Scalarize unknown structures.
620  Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
621  if (!VL0) return Scalarize(VL, VecTy);
622
623  if (VectorizedValues.count(VL0)) return VectorizedValues[VL0];
624
625  unsigned Opcode = VL0->getOpcode();
626  for (unsigned i = 0, e = VF; i < e; ++i) {
627    Instruction *I = dyn_cast<Instruction>(VL[i]);
628    // If not all of the instructions are identical then we have to scalarize.
629    if (!I || Opcode != I->getOpcode()) return Scalarize(VL, VecTy);
630  }
631
632  switch (Opcode) {
633  case Instruction::ZExt:
634  case Instruction::SExt:
635  case Instruction::FPToUI:
636  case Instruction::FPToSI:
637  case Instruction::FPExt:
638  case Instruction::PtrToInt:
639  case Instruction::IntToPtr:
640  case Instruction::SIToFP:
641  case Instruction::UIToFP:
642  case Instruction::Trunc:
643  case Instruction::FPTrunc:
644  case Instruction::BitCast: {
645    ValueList INVL;
646    for (int i = 0; i < VF; ++i)
647      INVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
648    Value *InVec = vectorizeTree_rec(INVL, VF);
649    IRBuilder<> Builder(GetLastInstr(VL, VF));
650    CastInst *CI = dyn_cast<CastInst>(VL0);
651    Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);
652    VectorizedValues[VL0] = V;
653    return V;
654  }
655  case Instruction::Add:
656  case Instruction::FAdd:
657  case Instruction::Sub:
658  case Instruction::FSub:
659  case Instruction::Mul:
660  case Instruction::FMul:
661  case Instruction::UDiv:
662  case Instruction::SDiv:
663  case Instruction::FDiv:
664  case Instruction::URem:
665  case Instruction::SRem:
666  case Instruction::FRem:
667  case Instruction::Shl:
668  case Instruction::LShr:
669  case Instruction::AShr:
670  case Instruction::And:
671  case Instruction::Or:
672  case Instruction::Xor: {
673    ValueList LHSVL, RHSVL;
674    for (int i = 0; i < VF; ++i) {
675      RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
676      LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1));
677    }
678
679    Value *RHS = vectorizeTree_rec(RHSVL, VF);
680    Value *LHS = vectorizeTree_rec(LHSVL, VF);
681    IRBuilder<> Builder(GetLastInstr(VL, VF));
682    BinaryOperator *BinOp = cast<BinaryOperator>(VL0);
683    Value *V = Builder.CreateBinOp(BinOp->getOpcode(), RHS,LHS);
684    VectorizedValues[VL0] = V;
685    return V;
686  }
687  case Instruction::Load: {
688    LoadInst *LI = cast<LoadInst>(VL0);
689    unsigned Alignment = LI->getAlignment();
690
691    // Check if all of the loads are consecutive.
692    for (unsigned i = 1, e = VF; i < e; ++i)
693      if (!isConsecutiveAccess(VL[i-1], VL[i]))
694        return Scalarize(VL, VecTy);
695
696    IRBuilder<> Builder(GetLastInstr(VL, VF));
697    Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(),
698                                          VecTy->getPointerTo());
699    LI = Builder.CreateLoad(VecPtr);
700    LI->setAlignment(Alignment);
701    VectorizedValues[VL0] = LI;
702    return LI;
703  }
704  case Instruction::Store: {
705    StoreInst *SI = cast<StoreInst>(VL0);
706    unsigned Alignment = SI->getAlignment();
707
708    ValueList ValueOp;
709    for (int i = 0; i < VF; ++i)
710      ValueOp.push_back(cast<StoreInst>(VL[i])->getValueOperand());
711
712    Value *VecValue = vectorizeTree_rec(ValueOp, VF);
713
714    IRBuilder<> Builder(GetLastInstr(VL, VF));
715    Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(),
716                                          VecTy->getPointerTo());
717    Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment);
718
719    for (int i = 0; i < VF; ++i)
720      cast<Instruction>(VL[i])->eraseFromParent();
721    return 0;
722  }
723  default:
724    Value *S = Scalarize(VL, VecTy);
725    VectorizedValues[VL0] = S;
726    return S;
727  }
728}
729
730} // end of namespace
731