1251607Sdim//===- VecUtils.cpp --- Vectorization Utilities ---------------------------===//
2251607Sdim//
3251607Sdim//                     The LLVM Compiler Infrastructure
4251607Sdim//
5251607Sdim// This file is distributed under the University of Illinois Open Source
6251607Sdim// License. See LICENSE.TXT for details.
7251607Sdim//
8251607Sdim//===----------------------------------------------------------------------===//
9251607Sdim#define DEBUG_TYPE "SLP"
10251607Sdim
11251607Sdim#include "VecUtils.h"
12251607Sdim#include "llvm/ADT/DenseMap.h"
13251607Sdim#include "llvm/ADT/SmallPtrSet.h"
14251607Sdim#include "llvm/ADT/SmallSet.h"
15251607Sdim#include "llvm/ADT/SmallVector.h"
16251607Sdim#include "llvm/Analysis/AliasAnalysis.h"
17251607Sdim#include "llvm/Analysis/ScalarEvolution.h"
18251607Sdim#include "llvm/Analysis/ScalarEvolutionExpressions.h"
19251607Sdim#include "llvm/Analysis/TargetTransformInfo.h"
20251607Sdim#include "llvm/Analysis/Verifier.h"
21251607Sdim#include "llvm/Analysis/LoopInfo.h"
22251607Sdim#include "llvm/IR/Constants.h"
23251607Sdim#include "llvm/IR/DataLayout.h"
24251607Sdim#include "llvm/IR/Function.h"
25251607Sdim#include "llvm/IR/Instructions.h"
26251607Sdim#include "llvm/IR/Module.h"
27251607Sdim#include "llvm/IR/Type.h"
28251607Sdim#include "llvm/IR/Value.h"
29251607Sdim#include "llvm/Pass.h"
30251607Sdim#include "llvm/Support/CommandLine.h"
31251607Sdim#include "llvm/Support/Debug.h"
32251607Sdim#include "llvm/Support/raw_ostream.h"
33251607Sdim#include "llvm/Target/TargetLibraryInfo.h"
34251607Sdim#include "llvm/Transforms/Scalar.h"
35251607Sdim#include "llvm/Transforms/Utils/Local.h"
36251607Sdim#include <algorithm>
37251607Sdim#include <map>
38251607Sdim
39251607Sdimusing namespace llvm;
40251607Sdim
41251607Sdimstatic const unsigned MinVecRegSize = 128;
42251607Sdim
43251607Sdimstatic const unsigned RecursionMaxDepth = 6;
44251607Sdim
45251607Sdimnamespace llvm {
46251607Sdim
47251607SdimBoUpSLP::BoUpSLP(BasicBlock *Bb, ScalarEvolution *S, DataLayout *Dl,
48251607Sdim                 TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp) :
49251607Sdim  BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa), L(Lp)  {
50251607Sdim  numberInstructions();
51251607Sdim}
52251607Sdim
53251607Sdimvoid BoUpSLP::numberInstructions() {
54251607Sdim  int Loc = 0;
55251607Sdim  InstrIdx.clear();
56251607Sdim  InstrVec.clear();
57251607Sdim  // Number the instructions in the block.
58251607Sdim  for (BasicBlock::iterator it=BB->begin(), e=BB->end(); it != e; ++it) {
59251607Sdim    InstrIdx[it] = Loc++;
60251607Sdim    InstrVec.push_back(it);
61251607Sdim    assert(InstrVec[InstrIdx[it]] == it && "Invalid allocation");
62251607Sdim  }
63251607Sdim}
64251607Sdim
65251607SdimValue *BoUpSLP::getPointerOperand(Value *I) {
66251607Sdim  if (LoadInst *LI = dyn_cast<LoadInst>(I)) return LI->getPointerOperand();
67251607Sdim  if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getPointerOperand();
68251607Sdim  return 0;
69251607Sdim}
70251607Sdim
71251607Sdimunsigned BoUpSLP::getAddressSpaceOperand(Value *I) {
72251607Sdim  if (LoadInst *L=dyn_cast<LoadInst>(I)) return L->getPointerAddressSpace();
73251607Sdim  if (StoreInst *S=dyn_cast<StoreInst>(I)) return S->getPointerAddressSpace();
74251607Sdim  return -1;
75251607Sdim}
76251607Sdim
77251607Sdimbool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {
78251607Sdim  Value *PtrA = getPointerOperand(A);
79251607Sdim  Value *PtrB = getPointerOperand(B);
80251607Sdim  unsigned ASA = getAddressSpaceOperand(A);
81251607Sdim  unsigned ASB = getAddressSpaceOperand(B);
82251607Sdim
83251607Sdim  // Check that the address spaces match and that the pointers are valid.
84251607Sdim  if (!PtrA || !PtrB || (ASA != ASB)) return false;
85251607Sdim
86251607Sdim  // Check that A and B are of the same type.
87251607Sdim  if (PtrA->getType() != PtrB->getType()) return false;
88251607Sdim
89251607Sdim  // Calculate the distance.
90251607Sdim  const SCEV *PtrSCEVA = SE->getSCEV(PtrA);
91251607Sdim  const SCEV *PtrSCEVB = SE->getSCEV(PtrB);
92251607Sdim  const SCEV *OffsetSCEV = SE->getMinusSCEV(PtrSCEVA, PtrSCEVB);
93251607Sdim  const SCEVConstant *ConstOffSCEV = dyn_cast<SCEVConstant>(OffsetSCEV);
94251607Sdim
95251607Sdim  // Non constant distance.
96251607Sdim  if (!ConstOffSCEV) return false;
97251607Sdim
98251607Sdim  int64_t Offset = ConstOffSCEV->getValue()->getSExtValue();
99251607Sdim  Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
100251607Sdim  // The Instructions are connsecutive if the size of the first load/store is
101251607Sdim  // the same as the offset.
102251607Sdim  int64_t Sz = DL->getTypeStoreSize(Ty);
103251607Sdim  return ((-Offset) == Sz);
104251607Sdim}
105251607Sdim
106251607Sdimbool BoUpSLP::vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold) {
107251607Sdim  Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType();
108251607Sdim  unsigned Sz = DL->getTypeSizeInBits(StoreTy);
109251607Sdim  unsigned VF = MinVecRegSize / Sz;
110251607Sdim
111251607Sdim  if (!isPowerOf2_32(Sz) || VF < 2) return false;
112251607Sdim
113251607Sdim  bool Changed = false;
114251607Sdim  // Look for profitable vectorizable trees at all offsets, starting at zero.
115251607Sdim  for (unsigned i = 0, e = Chain.size(); i < e; ++i) {
116251607Sdim    if (i + VF > e) return Changed;
117251607Sdim    DEBUG(dbgs()<<"SLP: Analyzing " << VF << " stores at offset "<< i << "\n");
118251607Sdim    ArrayRef<Value *> Operands = Chain.slice(i, VF);
119251607Sdim
120251607Sdim    int Cost = getTreeCost(Operands);
121251607Sdim    DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n");
122251607Sdim    if (Cost < CostThreshold) {
123251607Sdim      DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n");
124251607Sdim      vectorizeTree(Operands, VF);
125251607Sdim      i += VF - 1;
126251607Sdim      Changed = true;
127251607Sdim    }
128251607Sdim  }
129251607Sdim
130251607Sdim  return Changed;
131251607Sdim}
132251607Sdim
133251607Sdimbool BoUpSLP::vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold) {
134251607Sdim  ValueSet Heads, Tails;
135251607Sdim  SmallDenseMap<Value*, Value*> ConsecutiveChain;
136251607Sdim
137251607Sdim  // We may run into multiple chains that merge into a single chain. We mark the
138251607Sdim  // stores that we vectorized so that we don't visit the same store twice.
139251607Sdim  ValueSet VectorizedStores;
140251607Sdim  bool Changed = false;
141251607Sdim
142251607Sdim  // Do a quadratic search on all of the given stores and find
143251607Sdim  // all of the pairs of loads that follow each other.
144251607Sdim  for (unsigned i = 0, e = Stores.size(); i < e; ++i)
145251607Sdim    for (unsigned j = 0; j < e; ++j) {
146251607Sdim      if (i == j) continue;
147251607Sdim      if (isConsecutiveAccess(Stores[i], Stores[j])) {
148251607Sdim        Tails.insert(Stores[j]);
149251607Sdim        Heads.insert(Stores[i]);
150251607Sdim        ConsecutiveChain[Stores[i]] = Stores[j];
151251607Sdim      }
152251607Sdim    }
153251607Sdim
154251607Sdim  // For stores that start but don't end a link in the chain:
155251607Sdim  for (ValueSet::iterator it = Heads.begin(), e = Heads.end();it != e; ++it) {
156251607Sdim    if (Tails.count(*it)) continue;
157251607Sdim
158251607Sdim    // We found a store instr that starts a chain. Now follow the chain and try
159251607Sdim    // to vectorize it.
160251607Sdim    ValueList Operands;
161251607Sdim    Value *I = *it;
162251607Sdim    // Collect the chain into a list.
163251607Sdim    while (Tails.count(I) || Heads.count(I)) {
164251607Sdim      if (VectorizedStores.count(I)) break;
165251607Sdim      Operands.push_back(I);
166251607Sdim      // Move to the next value in the chain.
167251607Sdim      I = ConsecutiveChain[I];
168251607Sdim    }
169251607Sdim
170251607Sdim    bool Vectorized = vectorizeStoreChain(Operands, costThreshold);
171251607Sdim
172251607Sdim    // Mark the vectorized stores so that we don't vectorize them again.
173251607Sdim    if (Vectorized)
174251607Sdim      VectorizedStores.insert(Operands.begin(), Operands.end());
175251607Sdim    Changed |= Vectorized;
176251607Sdim  }
177251607Sdim
178251607Sdim  return Changed;
179251607Sdim}
180251607Sdim
181251607Sdimint BoUpSLP::getScalarizationCost(ArrayRef<Value *> VL) {
182251607Sdim  // Find the type of the operands in VL.
183251607Sdim  Type *ScalarTy = VL[0]->getType();
184251607Sdim  if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
185251607Sdim    ScalarTy = SI->getValueOperand()->getType();
186251607Sdim  VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
187251607Sdim  // Find the cost of inserting/extracting values from the vector.
188251607Sdim  return getScalarizationCost(VecTy);
189251607Sdim}
190251607Sdim
191251607Sdimint BoUpSLP::getScalarizationCost(Type *Ty) {
192251607Sdim  int Cost = 0;
193251607Sdim  for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i)
194251607Sdim    Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
195251607Sdim  return Cost;
196251607Sdim}
197251607Sdim
198251607SdimAliasAnalysis::Location BoUpSLP::getLocation(Instruction *I) {
199251607Sdim  if (StoreInst *SI = dyn_cast<StoreInst>(I)) return AA->getLocation(SI);
200251607Sdim  if (LoadInst *LI = dyn_cast<LoadInst>(I)) return AA->getLocation(LI);
201251607Sdim  return AliasAnalysis::Location();
202251607Sdim}
203251607Sdim
204251607SdimValue *BoUpSLP::isUnsafeToSink(Instruction *Src, Instruction *Dst) {
205251607Sdim  assert(Src->getParent() == Dst->getParent() && "Not the same BB");
206251607Sdim  BasicBlock::iterator I = Src, E = Dst;
207251607Sdim  /// Scan all of the instruction from SRC to DST and check if
208251607Sdim  /// the source may alias.
209251607Sdim  for (++I; I != E; ++I) {
210251607Sdim    // Ignore store instructions that are marked as 'ignore'.
211251607Sdim    if (MemBarrierIgnoreList.count(I)) continue;
212251607Sdim    if (Src->mayWriteToMemory()) /* Write */ {
213251607Sdim      if (!I->mayReadOrWriteMemory()) continue;
214251607Sdim    } else /* Read */ {
215251607Sdim      if (!I->mayWriteToMemory()) continue;
216251607Sdim    }
217251607Sdim    AliasAnalysis::Location A = getLocation(&*I);
218251607Sdim    AliasAnalysis::Location B = getLocation(Src);
219251607Sdim
220251607Sdim    if (!A.Ptr || !B.Ptr || AA->alias(A, B))
221251607Sdim      return I;
222251607Sdim  }
223251607Sdim  return 0;
224251607Sdim}
225251607Sdim
226251607Sdimvoid BoUpSLP::vectorizeArith(ArrayRef<Value *> Operands) {
227251607Sdim  Value *Vec = vectorizeTree(Operands, Operands.size());
228251607Sdim  BasicBlock::iterator Loc = cast<Instruction>(Vec);
229251607Sdim  IRBuilder<> Builder(++Loc);
230251607Sdim  // After vectorizing the operands we need to generate extractelement
231251607Sdim  // instructions and replace all of the uses of the scalar values with
232251607Sdim  // the values that we extracted from the vectorized tree.
233251607Sdim  for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
234251607Sdim    Value *S = Builder.CreateExtractElement(Vec, Builder.getInt32(i));
235251607Sdim    Operands[i]->replaceAllUsesWith(S);
236251607Sdim  }
237251607Sdim}
238251607Sdim
239251607Sdimint BoUpSLP::getTreeCost(ArrayRef<Value *> VL) {
240251607Sdim  // Get rid of the list of stores that were removed, and from the
241251607Sdim  // lists of instructions with multiple users.
242251607Sdim  MemBarrierIgnoreList.clear();
243251607Sdim  LaneMap.clear();
244251607Sdim  MultiUserVals.clear();
245251607Sdim  MustScalarize.clear();
246251607Sdim
247251607Sdim  // Scan the tree and find which value is used by which lane, and which values
248251607Sdim  // must be scalarized.
249251607Sdim  getTreeUses_rec(VL, 0);
250251607Sdim
251251607Sdim  // Check that instructions with multiple users can be vectorized. Mark unsafe
252251607Sdim  // instructions.
253251607Sdim  for (ValueSet::iterator it = MultiUserVals.begin(),
254251607Sdim       e = MultiUserVals.end(); it != e; ++it) {
255251607Sdim    // Check that all of the users of this instr are within the tree
256251607Sdim    // and that they are all from the same lane.
257251607Sdim    int Lane = -1;
258251607Sdim    for (Value::use_iterator I = (*it)->use_begin(), E = (*it)->use_end();
259251607Sdim         I != E; ++I) {
260251607Sdim      if (LaneMap.find(*I) == LaneMap.end()) {
261251607Sdim        MustScalarize.insert(*it);
262251607Sdim        DEBUG(dbgs()<<"SLP: Adding " << **it <<
263251607Sdim              " to MustScalarize because of an out of tree usage.\n");
264251607Sdim        break;
265251607Sdim      }
266251607Sdim      if (Lane == -1) Lane = LaneMap[*I];
267251607Sdim      if (Lane != LaneMap[*I]) {
268251607Sdim        MustScalarize.insert(*it);
269251607Sdim        DEBUG(dbgs()<<"Adding " << **it <<
270251607Sdim              " to MustScalarize because multiple lane use it: "
271251607Sdim              << Lane << " and " << LaneMap[*I] << ".\n");
272251607Sdim        break;
273251607Sdim      }
274251607Sdim    }
275251607Sdim  }
276251607Sdim
277251607Sdim  // Now calculate the cost of vectorizing the tree.
278251607Sdim  return getTreeCost_rec(VL, 0);
279251607Sdim}
280251607Sdim
281251607Sdimvoid BoUpSLP::getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth) {
282251607Sdim  if (Depth == RecursionMaxDepth) return;
283251607Sdim
284251607Sdim  // Don't handle vectors.
285251607Sdim  if (VL[0]->getType()->isVectorTy()) return;
286251607Sdim  if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
287251607Sdim    if (SI->getValueOperand()->getType()->isVectorTy()) return;
288251607Sdim
289251607Sdim  // Check if all of the operands are constants.
290251607Sdim  bool AllConst = true;
291251607Sdim  bool AllSameScalar = true;
292251607Sdim  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
293251607Sdim    AllConst &= isa<Constant>(VL[i]);
294251607Sdim    AllSameScalar &= (VL[0] == VL[i]);
295251607Sdim    Instruction *I = dyn_cast<Instruction>(VL[i]);
296251607Sdim    // If one of the instructions is out of this BB, we need to scalarize all.
297251607Sdim    if (I && I->getParent() != BB) return;
298251607Sdim  }
299251607Sdim
300251607Sdim  // If all of the operands are identical or constant we have a simple solution.
301251607Sdim  if (AllConst || AllSameScalar) return;
302251607Sdim
303251607Sdim  // Scalarize unknown structures.
304251607Sdim  Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
305251607Sdim  if (!VL0) return;
306251607Sdim
307251607Sdim  unsigned Opcode = VL0->getOpcode();
308251607Sdim  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
309251607Sdim    Instruction *I = dyn_cast<Instruction>(VL[i]);
310251607Sdim    // If not all of the instructions are identical then we have to scalarize.
311251607Sdim    if (!I || Opcode != I->getOpcode()) return;
312251607Sdim  }
313251607Sdim
314251607Sdim  // Mark instructions with multiple users.
315251607Sdim  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
316251607Sdim    Instruction *I = dyn_cast<Instruction>(VL[i]);
317251607Sdim    // Remember to check if all of the users of this instr are vectorized
318251607Sdim    // within our tree.
319251607Sdim    if (I && I->getNumUses() > 1) MultiUserVals.insert(I);
320251607Sdim  }
321251607Sdim
322251607Sdim  for (int i = 0, e = VL.size(); i < e; ++i) {
323251607Sdim    // Check that the instruction is only used within
324251607Sdim    // one lane.
325251607Sdim    if (LaneMap.count(VL[i]) && LaneMap[VL[i]] != i) return;
326251607Sdim    // Make this instruction as 'seen' and remember the lane.
327251607Sdim    LaneMap[VL[i]] = i;
328251607Sdim  }
329251607Sdim
330251607Sdim  switch (Opcode) {
331251607Sdim    case Instruction::ZExt:
332251607Sdim    case Instruction::SExt:
333251607Sdim    case Instruction::FPToUI:
334251607Sdim    case Instruction::FPToSI:
335251607Sdim    case Instruction::FPExt:
336251607Sdim    case Instruction::PtrToInt:
337251607Sdim    case Instruction::IntToPtr:
338251607Sdim    case Instruction::SIToFP:
339251607Sdim    case Instruction::UIToFP:
340251607Sdim    case Instruction::Trunc:
341251607Sdim    case Instruction::FPTrunc:
342251607Sdim    case Instruction::BitCast:
343251607Sdim    case Instruction::Add:
344251607Sdim    case Instruction::FAdd:
345251607Sdim    case Instruction::Sub:
346251607Sdim    case Instruction::FSub:
347251607Sdim    case Instruction::Mul:
348251607Sdim    case Instruction::FMul:
349251607Sdim    case Instruction::UDiv:
350251607Sdim    case Instruction::SDiv:
351251607Sdim    case Instruction::FDiv:
352251607Sdim    case Instruction::URem:
353251607Sdim    case Instruction::SRem:
354251607Sdim    case Instruction::FRem:
355251607Sdim    case Instruction::Shl:
356251607Sdim    case Instruction::LShr:
357251607Sdim    case Instruction::AShr:
358251607Sdim    case Instruction::And:
359251607Sdim    case Instruction::Or:
360251607Sdim    case Instruction::Xor: {
361251607Sdim      for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
362251607Sdim        ValueList Operands;
363251607Sdim        // Prepare the operand vector.
364251607Sdim        for (unsigned j = 0; j < VL.size(); ++j)
365251607Sdim          Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
366251607Sdim
367251607Sdim        getTreeUses_rec(Operands, Depth+1);
368251607Sdim      }
369251607Sdim      return;
370251607Sdim    }
371251607Sdim    case Instruction::Store: {
372251607Sdim      ValueList Operands;
373251607Sdim      for (unsigned j = 0; j < VL.size(); ++j)
374251607Sdim        Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
375251607Sdim      getTreeUses_rec(Operands, Depth+1);
376251607Sdim      return;
377251607Sdim    }
378251607Sdim    default:
379251607Sdim    return;
380251607Sdim  }
381251607Sdim}
382251607Sdim
383251607Sdimint BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
384251607Sdim  Type *ScalarTy = VL[0]->getType();
385251607Sdim
386251607Sdim  if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
387251607Sdim    ScalarTy = SI->getValueOperand()->getType();
388251607Sdim
389251607Sdim  /// Don't mess with vectors.
390251607Sdim  if (ScalarTy->isVectorTy()) return max_cost;
391251607Sdim  VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
392251607Sdim
393251607Sdim  if (Depth == RecursionMaxDepth) return getScalarizationCost(VecTy);
394251607Sdim
395251607Sdim  // Check if all of the operands are constants.
396251607Sdim  bool AllConst = true;
397251607Sdim  bool AllSameScalar = true;
398251607Sdim  bool MustScalarizeFlag = false;
399251607Sdim  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
400251607Sdim    AllConst &= isa<Constant>(VL[i]);
401251607Sdim    AllSameScalar &= (VL[0] == VL[i]);
402251607Sdim    // Must have a single use.
403251607Sdim    Instruction *I = dyn_cast<Instruction>(VL[i]);
404251607Sdim    MustScalarizeFlag |= MustScalarize.count(VL[i]);
405251607Sdim    // This instruction is outside the basic block.
406251607Sdim    if (I && I->getParent() != BB)
407251607Sdim      return getScalarizationCost(VecTy);
408251607Sdim  }
409251607Sdim
410251607Sdim  // Is this a simple vector constant.
411251607Sdim  if (AllConst) return 0;
412251607Sdim
413251607Sdim  // If all of the operands are identical we can broadcast them.
414251607Sdim  Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
415251607Sdim  if (AllSameScalar) {
416251607Sdim    // If we are in a loop, and this is not an instruction (e.g. constant or
417251607Sdim    // argument) or the instruction is defined outside the loop then assume
418251607Sdim    // that the cost is zero.
419251607Sdim    if (L && (!VL0 || !L->contains(VL0)))
420251607Sdim      return 0;
421251607Sdim
422251607Sdim    // We need to broadcast the scalar.
423251607Sdim    return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0);
424251607Sdim  }
425251607Sdim
426251607Sdim  // If this is not a constant, or a scalar from outside the loop then we
427251607Sdim  // need to scalarize it.
428251607Sdim  if (MustScalarizeFlag)
429251607Sdim    return getScalarizationCost(VecTy);
430251607Sdim
431251607Sdim  if (!VL0) return getScalarizationCost(VecTy);
432251607Sdim  assert(VL0->getParent() == BB && "Wrong BB");
433251607Sdim
434251607Sdim  unsigned Opcode = VL0->getOpcode();
435251607Sdim  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
436251607Sdim    Instruction *I = dyn_cast<Instruction>(VL[i]);
437251607Sdim    // If not all of the instructions are identical then we have to scalarize.
438251607Sdim    if (!I || Opcode != I->getOpcode()) return getScalarizationCost(VecTy);
439251607Sdim  }
440251607Sdim
441251607Sdim  // Check if it is safe to sink the loads or the stores.
442251607Sdim  if (Opcode == Instruction::Load || Opcode == Instruction::Store) {
443251607Sdim    int MaxIdx = InstrIdx[VL0];
444251607Sdim    for (unsigned i = 1, e = VL.size(); i < e; ++i )
445251607Sdim      MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
446251607Sdim
447251607Sdim    Instruction *Last = InstrVec[MaxIdx];
448251607Sdim    for (unsigned i = 0, e = VL.size(); i < e; ++i ) {
449251607Sdim      if (VL[i] == Last) continue;
450251607Sdim      Value *Barrier = isUnsafeToSink(cast<Instruction>(VL[i]), Last);
451251607Sdim      if (Barrier) {
452251607Sdim        DEBUG(dbgs() << "SLP: Can't sink " << *VL[i] << "\n down to " <<
453251607Sdim              *Last << "\n because of " << *Barrier << "\n");
454251607Sdim        return max_cost;
455251607Sdim      }
456251607Sdim    }
457251607Sdim  }
458251607Sdim
459251607Sdim  switch (Opcode) {
460251607Sdim  case Instruction::ZExt:
461251607Sdim  case Instruction::SExt:
462251607Sdim  case Instruction::FPToUI:
463251607Sdim  case Instruction::FPToSI:
464251607Sdim  case Instruction::FPExt:
465251607Sdim  case Instruction::PtrToInt:
466251607Sdim  case Instruction::IntToPtr:
467251607Sdim  case Instruction::SIToFP:
468251607Sdim  case Instruction::UIToFP:
469251607Sdim  case Instruction::Trunc:
470251607Sdim  case Instruction::FPTrunc:
471251607Sdim  case Instruction::BitCast: {
472251607Sdim    int Cost = 0;
473251607Sdim    ValueList Operands;
474251607Sdim    Type *SrcTy = VL0->getOperand(0)->getType();
475251607Sdim    // Prepare the operand vector.
476251607Sdim    for (unsigned j = 0; j < VL.size(); ++j) {
477251607Sdim      Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
478251607Sdim      // Check that the casted type is the same for all users.
479251607Sdim      if (cast<Instruction>(VL[j])->getOperand(0)->getType() != SrcTy)
480251607Sdim        return getScalarizationCost(VecTy);
481251607Sdim    }
482251607Sdim
483251607Sdim    Cost += getTreeCost_rec(Operands, Depth+1);
484251607Sdim    if (Cost >= max_cost) return max_cost;
485251607Sdim
486251607Sdim    // Calculate the cost of this instruction.
487251607Sdim    int ScalarCost = VL.size() * TTI->getCastInstrCost(VL0->getOpcode(),
488251607Sdim                                                       VL0->getType(), SrcTy);
489251607Sdim
490251607Sdim    VectorType *SrcVecTy = VectorType::get(SrcTy, VL.size());
491251607Sdim    int VecCost = TTI->getCastInstrCost(VL0->getOpcode(), VecTy, SrcVecTy);
492251607Sdim    Cost += (VecCost - ScalarCost);
493251607Sdim    return Cost;
494251607Sdim  }
495251607Sdim  case Instruction::Add:
496251607Sdim  case Instruction::FAdd:
497251607Sdim  case Instruction::Sub:
498251607Sdim  case Instruction::FSub:
499251607Sdim  case Instruction::Mul:
500251607Sdim  case Instruction::FMul:
501251607Sdim  case Instruction::UDiv:
502251607Sdim  case Instruction::SDiv:
503251607Sdim  case Instruction::FDiv:
504251607Sdim  case Instruction::URem:
505251607Sdim  case Instruction::SRem:
506251607Sdim  case Instruction::FRem:
507251607Sdim  case Instruction::Shl:
508251607Sdim  case Instruction::LShr:
509251607Sdim  case Instruction::AShr:
510251607Sdim  case Instruction::And:
511251607Sdim  case Instruction::Or:
512251607Sdim  case Instruction::Xor: {
513251607Sdim    int Cost = 0;
514251607Sdim    // Calculate the cost of all of the operands.
515251607Sdim    for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
516251607Sdim      ValueList Operands;
517251607Sdim      // Prepare the operand vector.
518251607Sdim      for (unsigned j = 0; j < VL.size(); ++j)
519251607Sdim        Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
520251607Sdim
521251607Sdim      Cost += getTreeCost_rec(Operands, Depth+1);
522251607Sdim      if (Cost >= max_cost) return max_cost;
523251607Sdim    }
524251607Sdim
525251607Sdim    // Calculate the cost of this instruction.
526251607Sdim    int ScalarCost = VecTy->getNumElements() *
527251607Sdim      TTI->getArithmeticInstrCost(Opcode, ScalarTy);
528251607Sdim
529251607Sdim    int VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy);
530251607Sdim    Cost += (VecCost - ScalarCost);
531251607Sdim    return Cost;
532251607Sdim  }
533251607Sdim  case Instruction::Load: {
534251607Sdim    // If we are scalarize the loads, add the cost of forming the vector.
535251607Sdim    for (unsigned i = 0, e = VL.size()-1; i < e; ++i)
536251607Sdim      if (!isConsecutiveAccess(VL[i], VL[i+1]))
537251607Sdim        return getScalarizationCost(VecTy);
538251607Sdim
539251607Sdim    // Cost of wide load - cost of scalar loads.
540251607Sdim    int ScalarLdCost = VecTy->getNumElements() *
541251607Sdim      TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
542251607Sdim    int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
543251607Sdim    return VecLdCost - ScalarLdCost;
544251607Sdim  }
545251607Sdim  case Instruction::Store: {
546251607Sdim    // We know that we can merge the stores. Calculate the cost.
547251607Sdim    int ScalarStCost = VecTy->getNumElements() *
548251607Sdim      TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);
549251607Sdim    int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1,0);
550251607Sdim    int StoreCost = VecStCost - ScalarStCost;
551251607Sdim
552251607Sdim    ValueList Operands;
553251607Sdim    for (unsigned j = 0; j < VL.size(); ++j) {
554251607Sdim      Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
555251607Sdim      MemBarrierIgnoreList.insert(VL[j]);
556251607Sdim    }
557251607Sdim
558251607Sdim    int TotalCost = StoreCost + getTreeCost_rec(Operands, Depth + 1);
559251607Sdim    return TotalCost;
560251607Sdim  }
561251607Sdim  default:
562251607Sdim    // Unable to vectorize unknown instructions.
563251607Sdim    return getScalarizationCost(VecTy);
564251607Sdim  }
565251607Sdim}
566251607Sdim
567251607SdimInstruction *BoUpSLP::GetLastInstr(ArrayRef<Value *> VL, unsigned VF) {
568251607Sdim  int MaxIdx = InstrIdx[BB->getFirstNonPHI()];
569251607Sdim  for (unsigned i = 0; i < VF; ++i )
570251607Sdim    MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
571251607Sdim  return InstrVec[MaxIdx + 1];
572251607Sdim}
573251607Sdim
574251607SdimValue *BoUpSLP::Scalarize(ArrayRef<Value *> VL, VectorType *Ty) {
575251607Sdim  IRBuilder<> Builder(GetLastInstr(VL, Ty->getNumElements()));
576251607Sdim  Value *Vec = UndefValue::get(Ty);
577251607Sdim  for (unsigned i=0; i < Ty->getNumElements(); ++i) {
578251607Sdim    // Generate the 'InsertElement' instruction.
579251607Sdim    Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
580251607Sdim    // Remember that this instruction is used as part of a 'gather' sequence.
581251607Sdim    // The caller of the bottom-up slp vectorizer can try to hoist the sequence
582251607Sdim    // if the users are outside of the basic block.
583251607Sdim    GatherInstructions.push_back(Vec);
584251607Sdim  }
585251607Sdim
586251607Sdim  return Vec;
587251607Sdim}
588251607Sdim
589251607SdimValue *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) {
590251607Sdim  Value *V = vectorizeTree_rec(VL, VF);
591251607Sdim  // We moved some instructions around. We have to number them again
592251607Sdim  // before we can do any analysis.
593251607Sdim  numberInstructions();
594251607Sdim  MustScalarize.clear();
595251607Sdim  return V;
596251607Sdim}
597251607Sdim
598251607SdimValue *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
599251607Sdim  Type *ScalarTy = VL[0]->getType();
600251607Sdim  if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
601251607Sdim    ScalarTy = SI->getValueOperand()->getType();
602251607Sdim  VectorType *VecTy = VectorType::get(ScalarTy, VF);
603251607Sdim
604251607Sdim  // Check if all of the operands are constants or identical.
605251607Sdim  bool AllConst = true;
606251607Sdim  bool AllSameScalar = true;
607251607Sdim  for (unsigned i = 0, e = VF; i < e; ++i) {
608251607Sdim    AllConst &= isa<Constant>(VL[i]);
609251607Sdim    AllSameScalar &= (VL[0] == VL[i]);
610251607Sdim    // The instruction must be in the same BB, and it must be vectorizable.
611251607Sdim    Instruction *I = dyn_cast<Instruction>(VL[i]);
612251607Sdim    if (MustScalarize.count(VL[i]) || (I && I->getParent() != BB))
613251607Sdim      return Scalarize(VL, VecTy);
614251607Sdim  }
615251607Sdim
616251607Sdim  // Check that this is a simple vector constant.
617251607Sdim  if (AllConst || AllSameScalar) return Scalarize(VL, VecTy);
618251607Sdim
619251607Sdim  // Scalarize unknown structures.
620251607Sdim  Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
621251607Sdim  if (!VL0) return Scalarize(VL, VecTy);
622251607Sdim
623251607Sdim  if (VectorizedValues.count(VL0)) return VectorizedValues[VL0];
624251607Sdim
625251607Sdim  unsigned Opcode = VL0->getOpcode();
626251607Sdim  for (unsigned i = 0, e = VF; i < e; ++i) {
627251607Sdim    Instruction *I = dyn_cast<Instruction>(VL[i]);
628251607Sdim    // If not all of the instructions are identical then we have to scalarize.
629251607Sdim    if (!I || Opcode != I->getOpcode()) return Scalarize(VL, VecTy);
630251607Sdim  }
631251607Sdim
632251607Sdim  switch (Opcode) {
633251607Sdim  case Instruction::ZExt:
634251607Sdim  case Instruction::SExt:
635251607Sdim  case Instruction::FPToUI:
636251607Sdim  case Instruction::FPToSI:
637251607Sdim  case Instruction::FPExt:
638251607Sdim  case Instruction::PtrToInt:
639251607Sdim  case Instruction::IntToPtr:
640251607Sdim  case Instruction::SIToFP:
641251607Sdim  case Instruction::UIToFP:
642251607Sdim  case Instruction::Trunc:
643251607Sdim  case Instruction::FPTrunc:
644251607Sdim  case Instruction::BitCast: {
645251607Sdim    ValueList INVL;
646251607Sdim    for (int i = 0; i < VF; ++i)
647251607Sdim      INVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
648251607Sdim    Value *InVec = vectorizeTree_rec(INVL, VF);
649251607Sdim    IRBuilder<> Builder(GetLastInstr(VL, VF));
650251607Sdim    CastInst *CI = dyn_cast<CastInst>(VL0);
651251607Sdim    Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);
652251607Sdim    VectorizedValues[VL0] = V;
653251607Sdim    return V;
654251607Sdim  }
655251607Sdim  case Instruction::Add:
656251607Sdim  case Instruction::FAdd:
657251607Sdim  case Instruction::Sub:
658251607Sdim  case Instruction::FSub:
659251607Sdim  case Instruction::Mul:
660251607Sdim  case Instruction::FMul:
661251607Sdim  case Instruction::UDiv:
662251607Sdim  case Instruction::SDiv:
663251607Sdim  case Instruction::FDiv:
664251607Sdim  case Instruction::URem:
665251607Sdim  case Instruction::SRem:
666251607Sdim  case Instruction::FRem:
667251607Sdim  case Instruction::Shl:
668251607Sdim  case Instruction::LShr:
669251607Sdim  case Instruction::AShr:
670251607Sdim  case Instruction::And:
671251607Sdim  case Instruction::Or:
672251607Sdim  case Instruction::Xor: {
673251607Sdim    ValueList LHSVL, RHSVL;
674251607Sdim    for (int i = 0; i < VF; ++i) {
675251607Sdim      RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
676251607Sdim      LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1));
677251607Sdim    }
678251607Sdim
679251607Sdim    Value *RHS = vectorizeTree_rec(RHSVL, VF);
680251607Sdim    Value *LHS = vectorizeTree_rec(LHSVL, VF);
681251607Sdim    IRBuilder<> Builder(GetLastInstr(VL, VF));
682251607Sdim    BinaryOperator *BinOp = cast<BinaryOperator>(VL0);
683251607Sdim    Value *V = Builder.CreateBinOp(BinOp->getOpcode(), RHS,LHS);
684251607Sdim    VectorizedValues[VL0] = V;
685251607Sdim    return V;
686251607Sdim  }
687251607Sdim  case Instruction::Load: {
688251607Sdim    LoadInst *LI = cast<LoadInst>(VL0);
689251607Sdim    unsigned Alignment = LI->getAlignment();
690251607Sdim
691251607Sdim    // Check if all of the loads are consecutive.
692251607Sdim    for (unsigned i = 1, e = VF; i < e; ++i)
693251607Sdim      if (!isConsecutiveAccess(VL[i-1], VL[i]))
694251607Sdim        return Scalarize(VL, VecTy);
695251607Sdim
696251607Sdim    IRBuilder<> Builder(GetLastInstr(VL, VF));
697251607Sdim    Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(),
698251607Sdim                                          VecTy->getPointerTo());
699251607Sdim    LI = Builder.CreateLoad(VecPtr);
700251607Sdim    LI->setAlignment(Alignment);
701251607Sdim    VectorizedValues[VL0] = LI;
702251607Sdim    return LI;
703251607Sdim  }
704251607Sdim  case Instruction::Store: {
705251607Sdim    StoreInst *SI = cast<StoreInst>(VL0);
706251607Sdim    unsigned Alignment = SI->getAlignment();
707251607Sdim
708251607Sdim    ValueList ValueOp;
709251607Sdim    for (int i = 0; i < VF; ++i)
710251607Sdim      ValueOp.push_back(cast<StoreInst>(VL[i])->getValueOperand());
711251607Sdim
712251607Sdim    Value *VecValue = vectorizeTree_rec(ValueOp, VF);
713251607Sdim
714251607Sdim    IRBuilder<> Builder(GetLastInstr(VL, VF));
715251607Sdim    Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(),
716251607Sdim                                          VecTy->getPointerTo());
717251607Sdim    Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment);
718251607Sdim
719251607Sdim    for (int i = 0; i < VF; ++i)
720251607Sdim      cast<Instruction>(VL[i])->eraseFromParent();
721251607Sdim    return 0;
722251607Sdim  }
723251607Sdim  default:
724251607Sdim    Value *S = Scalarize(VL, VecTy);
725251607Sdim    VectorizedValues[VL0] = S;
726251607Sdim    return S;
727251607Sdim  }
728251607Sdim}
729251607Sdim
730251607Sdim} // end of namespace
731