1251607Sdim//===- VecUtils.cpp --- Vectorization Utilities ---------------------------===// 2251607Sdim// 3251607Sdim// The LLVM Compiler Infrastructure 4251607Sdim// 5251607Sdim// This file is distributed under the University of Illinois Open Source 6251607Sdim// License. See LICENSE.TXT for details. 7251607Sdim// 8251607Sdim//===----------------------------------------------------------------------===// 9251607Sdim#define DEBUG_TYPE "SLP" 10251607Sdim 11251607Sdim#include "VecUtils.h" 12251607Sdim#include "llvm/ADT/DenseMap.h" 13251607Sdim#include "llvm/ADT/SmallPtrSet.h" 14251607Sdim#include "llvm/ADT/SmallSet.h" 15251607Sdim#include "llvm/ADT/SmallVector.h" 16251607Sdim#include "llvm/Analysis/AliasAnalysis.h" 17251607Sdim#include "llvm/Analysis/ScalarEvolution.h" 18251607Sdim#include "llvm/Analysis/ScalarEvolutionExpressions.h" 19251607Sdim#include "llvm/Analysis/TargetTransformInfo.h" 20251607Sdim#include "llvm/Analysis/Verifier.h" 21251607Sdim#include "llvm/Analysis/LoopInfo.h" 22251607Sdim#include "llvm/IR/Constants.h" 23251607Sdim#include "llvm/IR/DataLayout.h" 24251607Sdim#include "llvm/IR/Function.h" 25251607Sdim#include "llvm/IR/Instructions.h" 26251607Sdim#include "llvm/IR/Module.h" 27251607Sdim#include "llvm/IR/Type.h" 28251607Sdim#include "llvm/IR/Value.h" 29251607Sdim#include "llvm/Pass.h" 30251607Sdim#include "llvm/Support/CommandLine.h" 31251607Sdim#include "llvm/Support/Debug.h" 32251607Sdim#include "llvm/Support/raw_ostream.h" 33251607Sdim#include "llvm/Target/TargetLibraryInfo.h" 34251607Sdim#include "llvm/Transforms/Scalar.h" 35251607Sdim#include "llvm/Transforms/Utils/Local.h" 36251607Sdim#include <algorithm> 37251607Sdim#include <map> 38251607Sdim 39251607Sdimusing namespace llvm; 40251607Sdim 41251607Sdimstatic const unsigned MinVecRegSize = 128; 42251607Sdim 43251607Sdimstatic const unsigned RecursionMaxDepth = 6; 44251607Sdim 45251607Sdimnamespace llvm { 46251607Sdim 47251607SdimBoUpSLP::BoUpSLP(BasicBlock *Bb, ScalarEvolution *S, DataLayout *Dl, 48251607Sdim TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp) : 49251607Sdim BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa), L(Lp) { 50251607Sdim numberInstructions(); 51251607Sdim} 52251607Sdim 53251607Sdimvoid BoUpSLP::numberInstructions() { 54251607Sdim int Loc = 0; 55251607Sdim InstrIdx.clear(); 56251607Sdim InstrVec.clear(); 57251607Sdim // Number the instructions in the block. 58251607Sdim for (BasicBlock::iterator it=BB->begin(), e=BB->end(); it != e; ++it) { 59251607Sdim InstrIdx[it] = Loc++; 60251607Sdim InstrVec.push_back(it); 61251607Sdim assert(InstrVec[InstrIdx[it]] == it && "Invalid allocation"); 62251607Sdim } 63251607Sdim} 64251607Sdim 65251607SdimValue *BoUpSLP::getPointerOperand(Value *I) { 66251607Sdim if (LoadInst *LI = dyn_cast<LoadInst>(I)) return LI->getPointerOperand(); 67251607Sdim if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getPointerOperand(); 68251607Sdim return 0; 69251607Sdim} 70251607Sdim 71251607Sdimunsigned BoUpSLP::getAddressSpaceOperand(Value *I) { 72251607Sdim if (LoadInst *L=dyn_cast<LoadInst>(I)) return L->getPointerAddressSpace(); 73251607Sdim if (StoreInst *S=dyn_cast<StoreInst>(I)) return S->getPointerAddressSpace(); 74251607Sdim return -1; 75251607Sdim} 76251607Sdim 77251607Sdimbool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) { 78251607Sdim Value *PtrA = getPointerOperand(A); 79251607Sdim Value *PtrB = getPointerOperand(B); 80251607Sdim unsigned ASA = getAddressSpaceOperand(A); 81251607Sdim unsigned ASB = getAddressSpaceOperand(B); 82251607Sdim 83251607Sdim // Check that the address spaces match and that the pointers are valid. 84251607Sdim if (!PtrA || !PtrB || (ASA != ASB)) return false; 85251607Sdim 86251607Sdim // Check that A and B are of the same type. 87251607Sdim if (PtrA->getType() != PtrB->getType()) return false; 88251607Sdim 89251607Sdim // Calculate the distance. 90251607Sdim const SCEV *PtrSCEVA = SE->getSCEV(PtrA); 91251607Sdim const SCEV *PtrSCEVB = SE->getSCEV(PtrB); 92251607Sdim const SCEV *OffsetSCEV = SE->getMinusSCEV(PtrSCEVA, PtrSCEVB); 93251607Sdim const SCEVConstant *ConstOffSCEV = dyn_cast<SCEVConstant>(OffsetSCEV); 94251607Sdim 95251607Sdim // Non constant distance. 96251607Sdim if (!ConstOffSCEV) return false; 97251607Sdim 98251607Sdim int64_t Offset = ConstOffSCEV->getValue()->getSExtValue(); 99251607Sdim Type *Ty = cast<PointerType>(PtrA->getType())->getElementType(); 100251607Sdim // The Instructions are connsecutive if the size of the first load/store is 101251607Sdim // the same as the offset. 102251607Sdim int64_t Sz = DL->getTypeStoreSize(Ty); 103251607Sdim return ((-Offset) == Sz); 104251607Sdim} 105251607Sdim 106251607Sdimbool BoUpSLP::vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold) { 107251607Sdim Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType(); 108251607Sdim unsigned Sz = DL->getTypeSizeInBits(StoreTy); 109251607Sdim unsigned VF = MinVecRegSize / Sz; 110251607Sdim 111251607Sdim if (!isPowerOf2_32(Sz) || VF < 2) return false; 112251607Sdim 113251607Sdim bool Changed = false; 114251607Sdim // Look for profitable vectorizable trees at all offsets, starting at zero. 115251607Sdim for (unsigned i = 0, e = Chain.size(); i < e; ++i) { 116251607Sdim if (i + VF > e) return Changed; 117251607Sdim DEBUG(dbgs()<<"SLP: Analyzing " << VF << " stores at offset "<< i << "\n"); 118251607Sdim ArrayRef<Value *> Operands = Chain.slice(i, VF); 119251607Sdim 120251607Sdim int Cost = getTreeCost(Operands); 121251607Sdim DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n"); 122251607Sdim if (Cost < CostThreshold) { 123251607Sdim DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n"); 124251607Sdim vectorizeTree(Operands, VF); 125251607Sdim i += VF - 1; 126251607Sdim Changed = true; 127251607Sdim } 128251607Sdim } 129251607Sdim 130251607Sdim return Changed; 131251607Sdim} 132251607Sdim 133251607Sdimbool BoUpSLP::vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold) { 134251607Sdim ValueSet Heads, Tails; 135251607Sdim SmallDenseMap<Value*, Value*> ConsecutiveChain; 136251607Sdim 137251607Sdim // We may run into multiple chains that merge into a single chain. We mark the 138251607Sdim // stores that we vectorized so that we don't visit the same store twice. 139251607Sdim ValueSet VectorizedStores; 140251607Sdim bool Changed = false; 141251607Sdim 142251607Sdim // Do a quadratic search on all of the given stores and find 143251607Sdim // all of the pairs of loads that follow each other. 144251607Sdim for (unsigned i = 0, e = Stores.size(); i < e; ++i) 145251607Sdim for (unsigned j = 0; j < e; ++j) { 146251607Sdim if (i == j) continue; 147251607Sdim if (isConsecutiveAccess(Stores[i], Stores[j])) { 148251607Sdim Tails.insert(Stores[j]); 149251607Sdim Heads.insert(Stores[i]); 150251607Sdim ConsecutiveChain[Stores[i]] = Stores[j]; 151251607Sdim } 152251607Sdim } 153251607Sdim 154251607Sdim // For stores that start but don't end a link in the chain: 155251607Sdim for (ValueSet::iterator it = Heads.begin(), e = Heads.end();it != e; ++it) { 156251607Sdim if (Tails.count(*it)) continue; 157251607Sdim 158251607Sdim // We found a store instr that starts a chain. Now follow the chain and try 159251607Sdim // to vectorize it. 160251607Sdim ValueList Operands; 161251607Sdim Value *I = *it; 162251607Sdim // Collect the chain into a list. 163251607Sdim while (Tails.count(I) || Heads.count(I)) { 164251607Sdim if (VectorizedStores.count(I)) break; 165251607Sdim Operands.push_back(I); 166251607Sdim // Move to the next value in the chain. 167251607Sdim I = ConsecutiveChain[I]; 168251607Sdim } 169251607Sdim 170251607Sdim bool Vectorized = vectorizeStoreChain(Operands, costThreshold); 171251607Sdim 172251607Sdim // Mark the vectorized stores so that we don't vectorize them again. 173251607Sdim if (Vectorized) 174251607Sdim VectorizedStores.insert(Operands.begin(), Operands.end()); 175251607Sdim Changed |= Vectorized; 176251607Sdim } 177251607Sdim 178251607Sdim return Changed; 179251607Sdim} 180251607Sdim 181251607Sdimint BoUpSLP::getScalarizationCost(ArrayRef<Value *> VL) { 182251607Sdim // Find the type of the operands in VL. 183251607Sdim Type *ScalarTy = VL[0]->getType(); 184251607Sdim if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) 185251607Sdim ScalarTy = SI->getValueOperand()->getType(); 186251607Sdim VectorType *VecTy = VectorType::get(ScalarTy, VL.size()); 187251607Sdim // Find the cost of inserting/extracting values from the vector. 188251607Sdim return getScalarizationCost(VecTy); 189251607Sdim} 190251607Sdim 191251607Sdimint BoUpSLP::getScalarizationCost(Type *Ty) { 192251607Sdim int Cost = 0; 193251607Sdim for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i) 194251607Sdim Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i); 195251607Sdim return Cost; 196251607Sdim} 197251607Sdim 198251607SdimAliasAnalysis::Location BoUpSLP::getLocation(Instruction *I) { 199251607Sdim if (StoreInst *SI = dyn_cast<StoreInst>(I)) return AA->getLocation(SI); 200251607Sdim if (LoadInst *LI = dyn_cast<LoadInst>(I)) return AA->getLocation(LI); 201251607Sdim return AliasAnalysis::Location(); 202251607Sdim} 203251607Sdim 204251607SdimValue *BoUpSLP::isUnsafeToSink(Instruction *Src, Instruction *Dst) { 205251607Sdim assert(Src->getParent() == Dst->getParent() && "Not the same BB"); 206251607Sdim BasicBlock::iterator I = Src, E = Dst; 207251607Sdim /// Scan all of the instruction from SRC to DST and check if 208251607Sdim /// the source may alias. 209251607Sdim for (++I; I != E; ++I) { 210251607Sdim // Ignore store instructions that are marked as 'ignore'. 211251607Sdim if (MemBarrierIgnoreList.count(I)) continue; 212251607Sdim if (Src->mayWriteToMemory()) /* Write */ { 213251607Sdim if (!I->mayReadOrWriteMemory()) continue; 214251607Sdim } else /* Read */ { 215251607Sdim if (!I->mayWriteToMemory()) continue; 216251607Sdim } 217251607Sdim AliasAnalysis::Location A = getLocation(&*I); 218251607Sdim AliasAnalysis::Location B = getLocation(Src); 219251607Sdim 220251607Sdim if (!A.Ptr || !B.Ptr || AA->alias(A, B)) 221251607Sdim return I; 222251607Sdim } 223251607Sdim return 0; 224251607Sdim} 225251607Sdim 226251607Sdimvoid BoUpSLP::vectorizeArith(ArrayRef<Value *> Operands) { 227251607Sdim Value *Vec = vectorizeTree(Operands, Operands.size()); 228251607Sdim BasicBlock::iterator Loc = cast<Instruction>(Vec); 229251607Sdim IRBuilder<> Builder(++Loc); 230251607Sdim // After vectorizing the operands we need to generate extractelement 231251607Sdim // instructions and replace all of the uses of the scalar values with 232251607Sdim // the values that we extracted from the vectorized tree. 233251607Sdim for (unsigned i = 0, e = Operands.size(); i != e; ++i) { 234251607Sdim Value *S = Builder.CreateExtractElement(Vec, Builder.getInt32(i)); 235251607Sdim Operands[i]->replaceAllUsesWith(S); 236251607Sdim } 237251607Sdim} 238251607Sdim 239251607Sdimint BoUpSLP::getTreeCost(ArrayRef<Value *> VL) { 240251607Sdim // Get rid of the list of stores that were removed, and from the 241251607Sdim // lists of instructions with multiple users. 242251607Sdim MemBarrierIgnoreList.clear(); 243251607Sdim LaneMap.clear(); 244251607Sdim MultiUserVals.clear(); 245251607Sdim MustScalarize.clear(); 246251607Sdim 247251607Sdim // Scan the tree and find which value is used by which lane, and which values 248251607Sdim // must be scalarized. 249251607Sdim getTreeUses_rec(VL, 0); 250251607Sdim 251251607Sdim // Check that instructions with multiple users can be vectorized. Mark unsafe 252251607Sdim // instructions. 253251607Sdim for (ValueSet::iterator it = MultiUserVals.begin(), 254251607Sdim e = MultiUserVals.end(); it != e; ++it) { 255251607Sdim // Check that all of the users of this instr are within the tree 256251607Sdim // and that they are all from the same lane. 257251607Sdim int Lane = -1; 258251607Sdim for (Value::use_iterator I = (*it)->use_begin(), E = (*it)->use_end(); 259251607Sdim I != E; ++I) { 260251607Sdim if (LaneMap.find(*I) == LaneMap.end()) { 261251607Sdim MustScalarize.insert(*it); 262251607Sdim DEBUG(dbgs()<<"SLP: Adding " << **it << 263251607Sdim " to MustScalarize because of an out of tree usage.\n"); 264251607Sdim break; 265251607Sdim } 266251607Sdim if (Lane == -1) Lane = LaneMap[*I]; 267251607Sdim if (Lane != LaneMap[*I]) { 268251607Sdim MustScalarize.insert(*it); 269251607Sdim DEBUG(dbgs()<<"Adding " << **it << 270251607Sdim " to MustScalarize because multiple lane use it: " 271251607Sdim << Lane << " and " << LaneMap[*I] << ".\n"); 272251607Sdim break; 273251607Sdim } 274251607Sdim } 275251607Sdim } 276251607Sdim 277251607Sdim // Now calculate the cost of vectorizing the tree. 278251607Sdim return getTreeCost_rec(VL, 0); 279251607Sdim} 280251607Sdim 281251607Sdimvoid BoUpSLP::getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth) { 282251607Sdim if (Depth == RecursionMaxDepth) return; 283251607Sdim 284251607Sdim // Don't handle vectors. 285251607Sdim if (VL[0]->getType()->isVectorTy()) return; 286251607Sdim if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) 287251607Sdim if (SI->getValueOperand()->getType()->isVectorTy()) return; 288251607Sdim 289251607Sdim // Check if all of the operands are constants. 290251607Sdim bool AllConst = true; 291251607Sdim bool AllSameScalar = true; 292251607Sdim for (unsigned i = 0, e = VL.size(); i < e; ++i) { 293251607Sdim AllConst &= isa<Constant>(VL[i]); 294251607Sdim AllSameScalar &= (VL[0] == VL[i]); 295251607Sdim Instruction *I = dyn_cast<Instruction>(VL[i]); 296251607Sdim // If one of the instructions is out of this BB, we need to scalarize all. 297251607Sdim if (I && I->getParent() != BB) return; 298251607Sdim } 299251607Sdim 300251607Sdim // If all of the operands are identical or constant we have a simple solution. 301251607Sdim if (AllConst || AllSameScalar) return; 302251607Sdim 303251607Sdim // Scalarize unknown structures. 304251607Sdim Instruction *VL0 = dyn_cast<Instruction>(VL[0]); 305251607Sdim if (!VL0) return; 306251607Sdim 307251607Sdim unsigned Opcode = VL0->getOpcode(); 308251607Sdim for (unsigned i = 0, e = VL.size(); i < e; ++i) { 309251607Sdim Instruction *I = dyn_cast<Instruction>(VL[i]); 310251607Sdim // If not all of the instructions are identical then we have to scalarize. 311251607Sdim if (!I || Opcode != I->getOpcode()) return; 312251607Sdim } 313251607Sdim 314251607Sdim // Mark instructions with multiple users. 315251607Sdim for (unsigned i = 0, e = VL.size(); i < e; ++i) { 316251607Sdim Instruction *I = dyn_cast<Instruction>(VL[i]); 317251607Sdim // Remember to check if all of the users of this instr are vectorized 318251607Sdim // within our tree. 319251607Sdim if (I && I->getNumUses() > 1) MultiUserVals.insert(I); 320251607Sdim } 321251607Sdim 322251607Sdim for (int i = 0, e = VL.size(); i < e; ++i) { 323251607Sdim // Check that the instruction is only used within 324251607Sdim // one lane. 325251607Sdim if (LaneMap.count(VL[i]) && LaneMap[VL[i]] != i) return; 326251607Sdim // Make this instruction as 'seen' and remember the lane. 327251607Sdim LaneMap[VL[i]] = i; 328251607Sdim } 329251607Sdim 330251607Sdim switch (Opcode) { 331251607Sdim case Instruction::ZExt: 332251607Sdim case Instruction::SExt: 333251607Sdim case Instruction::FPToUI: 334251607Sdim case Instruction::FPToSI: 335251607Sdim case Instruction::FPExt: 336251607Sdim case Instruction::PtrToInt: 337251607Sdim case Instruction::IntToPtr: 338251607Sdim case Instruction::SIToFP: 339251607Sdim case Instruction::UIToFP: 340251607Sdim case Instruction::Trunc: 341251607Sdim case Instruction::FPTrunc: 342251607Sdim case Instruction::BitCast: 343251607Sdim case Instruction::Add: 344251607Sdim case Instruction::FAdd: 345251607Sdim case Instruction::Sub: 346251607Sdim case Instruction::FSub: 347251607Sdim case Instruction::Mul: 348251607Sdim case Instruction::FMul: 349251607Sdim case Instruction::UDiv: 350251607Sdim case Instruction::SDiv: 351251607Sdim case Instruction::FDiv: 352251607Sdim case Instruction::URem: 353251607Sdim case Instruction::SRem: 354251607Sdim case Instruction::FRem: 355251607Sdim case Instruction::Shl: 356251607Sdim case Instruction::LShr: 357251607Sdim case Instruction::AShr: 358251607Sdim case Instruction::And: 359251607Sdim case Instruction::Or: 360251607Sdim case Instruction::Xor: { 361251607Sdim for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { 362251607Sdim ValueList Operands; 363251607Sdim // Prepare the operand vector. 364251607Sdim for (unsigned j = 0; j < VL.size(); ++j) 365251607Sdim Operands.push_back(cast<Instruction>(VL[j])->getOperand(i)); 366251607Sdim 367251607Sdim getTreeUses_rec(Operands, Depth+1); 368251607Sdim } 369251607Sdim return; 370251607Sdim } 371251607Sdim case Instruction::Store: { 372251607Sdim ValueList Operands; 373251607Sdim for (unsigned j = 0; j < VL.size(); ++j) 374251607Sdim Operands.push_back(cast<Instruction>(VL[j])->getOperand(0)); 375251607Sdim getTreeUses_rec(Operands, Depth+1); 376251607Sdim return; 377251607Sdim } 378251607Sdim default: 379251607Sdim return; 380251607Sdim } 381251607Sdim} 382251607Sdim 383251607Sdimint BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) { 384251607Sdim Type *ScalarTy = VL[0]->getType(); 385251607Sdim 386251607Sdim if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) 387251607Sdim ScalarTy = SI->getValueOperand()->getType(); 388251607Sdim 389251607Sdim /// Don't mess with vectors. 390251607Sdim if (ScalarTy->isVectorTy()) return max_cost; 391251607Sdim VectorType *VecTy = VectorType::get(ScalarTy, VL.size()); 392251607Sdim 393251607Sdim if (Depth == RecursionMaxDepth) return getScalarizationCost(VecTy); 394251607Sdim 395251607Sdim // Check if all of the operands are constants. 396251607Sdim bool AllConst = true; 397251607Sdim bool AllSameScalar = true; 398251607Sdim bool MustScalarizeFlag = false; 399251607Sdim for (unsigned i = 0, e = VL.size(); i < e; ++i) { 400251607Sdim AllConst &= isa<Constant>(VL[i]); 401251607Sdim AllSameScalar &= (VL[0] == VL[i]); 402251607Sdim // Must have a single use. 403251607Sdim Instruction *I = dyn_cast<Instruction>(VL[i]); 404251607Sdim MustScalarizeFlag |= MustScalarize.count(VL[i]); 405251607Sdim // This instruction is outside the basic block. 406251607Sdim if (I && I->getParent() != BB) 407251607Sdim return getScalarizationCost(VecTy); 408251607Sdim } 409251607Sdim 410251607Sdim // Is this a simple vector constant. 411251607Sdim if (AllConst) return 0; 412251607Sdim 413251607Sdim // If all of the operands are identical we can broadcast them. 414251607Sdim Instruction *VL0 = dyn_cast<Instruction>(VL[0]); 415251607Sdim if (AllSameScalar) { 416251607Sdim // If we are in a loop, and this is not an instruction (e.g. constant or 417251607Sdim // argument) or the instruction is defined outside the loop then assume 418251607Sdim // that the cost is zero. 419251607Sdim if (L && (!VL0 || !L->contains(VL0))) 420251607Sdim return 0; 421251607Sdim 422251607Sdim // We need to broadcast the scalar. 423251607Sdim return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0); 424251607Sdim } 425251607Sdim 426251607Sdim // If this is not a constant, or a scalar from outside the loop then we 427251607Sdim // need to scalarize it. 428251607Sdim if (MustScalarizeFlag) 429251607Sdim return getScalarizationCost(VecTy); 430251607Sdim 431251607Sdim if (!VL0) return getScalarizationCost(VecTy); 432251607Sdim assert(VL0->getParent() == BB && "Wrong BB"); 433251607Sdim 434251607Sdim unsigned Opcode = VL0->getOpcode(); 435251607Sdim for (unsigned i = 0, e = VL.size(); i < e; ++i) { 436251607Sdim Instruction *I = dyn_cast<Instruction>(VL[i]); 437251607Sdim // If not all of the instructions are identical then we have to scalarize. 438251607Sdim if (!I || Opcode != I->getOpcode()) return getScalarizationCost(VecTy); 439251607Sdim } 440251607Sdim 441251607Sdim // Check if it is safe to sink the loads or the stores. 442251607Sdim if (Opcode == Instruction::Load || Opcode == Instruction::Store) { 443251607Sdim int MaxIdx = InstrIdx[VL0]; 444251607Sdim for (unsigned i = 1, e = VL.size(); i < e; ++i ) 445251607Sdim MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]); 446251607Sdim 447251607Sdim Instruction *Last = InstrVec[MaxIdx]; 448251607Sdim for (unsigned i = 0, e = VL.size(); i < e; ++i ) { 449251607Sdim if (VL[i] == Last) continue; 450251607Sdim Value *Barrier = isUnsafeToSink(cast<Instruction>(VL[i]), Last); 451251607Sdim if (Barrier) { 452251607Sdim DEBUG(dbgs() << "SLP: Can't sink " << *VL[i] << "\n down to " << 453251607Sdim *Last << "\n because of " << *Barrier << "\n"); 454251607Sdim return max_cost; 455251607Sdim } 456251607Sdim } 457251607Sdim } 458251607Sdim 459251607Sdim switch (Opcode) { 460251607Sdim case Instruction::ZExt: 461251607Sdim case Instruction::SExt: 462251607Sdim case Instruction::FPToUI: 463251607Sdim case Instruction::FPToSI: 464251607Sdim case Instruction::FPExt: 465251607Sdim case Instruction::PtrToInt: 466251607Sdim case Instruction::IntToPtr: 467251607Sdim case Instruction::SIToFP: 468251607Sdim case Instruction::UIToFP: 469251607Sdim case Instruction::Trunc: 470251607Sdim case Instruction::FPTrunc: 471251607Sdim case Instruction::BitCast: { 472251607Sdim int Cost = 0; 473251607Sdim ValueList Operands; 474251607Sdim Type *SrcTy = VL0->getOperand(0)->getType(); 475251607Sdim // Prepare the operand vector. 476251607Sdim for (unsigned j = 0; j < VL.size(); ++j) { 477251607Sdim Operands.push_back(cast<Instruction>(VL[j])->getOperand(0)); 478251607Sdim // Check that the casted type is the same for all users. 479251607Sdim if (cast<Instruction>(VL[j])->getOperand(0)->getType() != SrcTy) 480251607Sdim return getScalarizationCost(VecTy); 481251607Sdim } 482251607Sdim 483251607Sdim Cost += getTreeCost_rec(Operands, Depth+1); 484251607Sdim if (Cost >= max_cost) return max_cost; 485251607Sdim 486251607Sdim // Calculate the cost of this instruction. 487251607Sdim int ScalarCost = VL.size() * TTI->getCastInstrCost(VL0->getOpcode(), 488251607Sdim VL0->getType(), SrcTy); 489251607Sdim 490251607Sdim VectorType *SrcVecTy = VectorType::get(SrcTy, VL.size()); 491251607Sdim int VecCost = TTI->getCastInstrCost(VL0->getOpcode(), VecTy, SrcVecTy); 492251607Sdim Cost += (VecCost - ScalarCost); 493251607Sdim return Cost; 494251607Sdim } 495251607Sdim case Instruction::Add: 496251607Sdim case Instruction::FAdd: 497251607Sdim case Instruction::Sub: 498251607Sdim case Instruction::FSub: 499251607Sdim case Instruction::Mul: 500251607Sdim case Instruction::FMul: 501251607Sdim case Instruction::UDiv: 502251607Sdim case Instruction::SDiv: 503251607Sdim case Instruction::FDiv: 504251607Sdim case Instruction::URem: 505251607Sdim case Instruction::SRem: 506251607Sdim case Instruction::FRem: 507251607Sdim case Instruction::Shl: 508251607Sdim case Instruction::LShr: 509251607Sdim case Instruction::AShr: 510251607Sdim case Instruction::And: 511251607Sdim case Instruction::Or: 512251607Sdim case Instruction::Xor: { 513251607Sdim int Cost = 0; 514251607Sdim // Calculate the cost of all of the operands. 515251607Sdim for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { 516251607Sdim ValueList Operands; 517251607Sdim // Prepare the operand vector. 518251607Sdim for (unsigned j = 0; j < VL.size(); ++j) 519251607Sdim Operands.push_back(cast<Instruction>(VL[j])->getOperand(i)); 520251607Sdim 521251607Sdim Cost += getTreeCost_rec(Operands, Depth+1); 522251607Sdim if (Cost >= max_cost) return max_cost; 523251607Sdim } 524251607Sdim 525251607Sdim // Calculate the cost of this instruction. 526251607Sdim int ScalarCost = VecTy->getNumElements() * 527251607Sdim TTI->getArithmeticInstrCost(Opcode, ScalarTy); 528251607Sdim 529251607Sdim int VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy); 530251607Sdim Cost += (VecCost - ScalarCost); 531251607Sdim return Cost; 532251607Sdim } 533251607Sdim case Instruction::Load: { 534251607Sdim // If we are scalarize the loads, add the cost of forming the vector. 535251607Sdim for (unsigned i = 0, e = VL.size()-1; i < e; ++i) 536251607Sdim if (!isConsecutiveAccess(VL[i], VL[i+1])) 537251607Sdim return getScalarizationCost(VecTy); 538251607Sdim 539251607Sdim // Cost of wide load - cost of scalar loads. 540251607Sdim int ScalarLdCost = VecTy->getNumElements() * 541251607Sdim TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0); 542251607Sdim int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0); 543251607Sdim return VecLdCost - ScalarLdCost; 544251607Sdim } 545251607Sdim case Instruction::Store: { 546251607Sdim // We know that we can merge the stores. Calculate the cost. 547251607Sdim int ScalarStCost = VecTy->getNumElements() * 548251607Sdim TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0); 549251607Sdim int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1,0); 550251607Sdim int StoreCost = VecStCost - ScalarStCost; 551251607Sdim 552251607Sdim ValueList Operands; 553251607Sdim for (unsigned j = 0; j < VL.size(); ++j) { 554251607Sdim Operands.push_back(cast<Instruction>(VL[j])->getOperand(0)); 555251607Sdim MemBarrierIgnoreList.insert(VL[j]); 556251607Sdim } 557251607Sdim 558251607Sdim int TotalCost = StoreCost + getTreeCost_rec(Operands, Depth + 1); 559251607Sdim return TotalCost; 560251607Sdim } 561251607Sdim default: 562251607Sdim // Unable to vectorize unknown instructions. 563251607Sdim return getScalarizationCost(VecTy); 564251607Sdim } 565251607Sdim} 566251607Sdim 567251607SdimInstruction *BoUpSLP::GetLastInstr(ArrayRef<Value *> VL, unsigned VF) { 568251607Sdim int MaxIdx = InstrIdx[BB->getFirstNonPHI()]; 569251607Sdim for (unsigned i = 0; i < VF; ++i ) 570251607Sdim MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]); 571251607Sdim return InstrVec[MaxIdx + 1]; 572251607Sdim} 573251607Sdim 574251607SdimValue *BoUpSLP::Scalarize(ArrayRef<Value *> VL, VectorType *Ty) { 575251607Sdim IRBuilder<> Builder(GetLastInstr(VL, Ty->getNumElements())); 576251607Sdim Value *Vec = UndefValue::get(Ty); 577251607Sdim for (unsigned i=0; i < Ty->getNumElements(); ++i) { 578251607Sdim // Generate the 'InsertElement' instruction. 579251607Sdim Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i)); 580251607Sdim // Remember that this instruction is used as part of a 'gather' sequence. 581251607Sdim // The caller of the bottom-up slp vectorizer can try to hoist the sequence 582251607Sdim // if the users are outside of the basic block. 583251607Sdim GatherInstructions.push_back(Vec); 584251607Sdim } 585251607Sdim 586251607Sdim return Vec; 587251607Sdim} 588251607Sdim 589251607SdimValue *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) { 590251607Sdim Value *V = vectorizeTree_rec(VL, VF); 591251607Sdim // We moved some instructions around. We have to number them again 592251607Sdim // before we can do any analysis. 593251607Sdim numberInstructions(); 594251607Sdim MustScalarize.clear(); 595251607Sdim return V; 596251607Sdim} 597251607Sdim 598251607SdimValue *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) { 599251607Sdim Type *ScalarTy = VL[0]->getType(); 600251607Sdim if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) 601251607Sdim ScalarTy = SI->getValueOperand()->getType(); 602251607Sdim VectorType *VecTy = VectorType::get(ScalarTy, VF); 603251607Sdim 604251607Sdim // Check if all of the operands are constants or identical. 605251607Sdim bool AllConst = true; 606251607Sdim bool AllSameScalar = true; 607251607Sdim for (unsigned i = 0, e = VF; i < e; ++i) { 608251607Sdim AllConst &= isa<Constant>(VL[i]); 609251607Sdim AllSameScalar &= (VL[0] == VL[i]); 610251607Sdim // The instruction must be in the same BB, and it must be vectorizable. 611251607Sdim Instruction *I = dyn_cast<Instruction>(VL[i]); 612251607Sdim if (MustScalarize.count(VL[i]) || (I && I->getParent() != BB)) 613251607Sdim return Scalarize(VL, VecTy); 614251607Sdim } 615251607Sdim 616251607Sdim // Check that this is a simple vector constant. 617251607Sdim if (AllConst || AllSameScalar) return Scalarize(VL, VecTy); 618251607Sdim 619251607Sdim // Scalarize unknown structures. 620251607Sdim Instruction *VL0 = dyn_cast<Instruction>(VL[0]); 621251607Sdim if (!VL0) return Scalarize(VL, VecTy); 622251607Sdim 623251607Sdim if (VectorizedValues.count(VL0)) return VectorizedValues[VL0]; 624251607Sdim 625251607Sdim unsigned Opcode = VL0->getOpcode(); 626251607Sdim for (unsigned i = 0, e = VF; i < e; ++i) { 627251607Sdim Instruction *I = dyn_cast<Instruction>(VL[i]); 628251607Sdim // If not all of the instructions are identical then we have to scalarize. 629251607Sdim if (!I || Opcode != I->getOpcode()) return Scalarize(VL, VecTy); 630251607Sdim } 631251607Sdim 632251607Sdim switch (Opcode) { 633251607Sdim case Instruction::ZExt: 634251607Sdim case Instruction::SExt: 635251607Sdim case Instruction::FPToUI: 636251607Sdim case Instruction::FPToSI: 637251607Sdim case Instruction::FPExt: 638251607Sdim case Instruction::PtrToInt: 639251607Sdim case Instruction::IntToPtr: 640251607Sdim case Instruction::SIToFP: 641251607Sdim case Instruction::UIToFP: 642251607Sdim case Instruction::Trunc: 643251607Sdim case Instruction::FPTrunc: 644251607Sdim case Instruction::BitCast: { 645251607Sdim ValueList INVL; 646251607Sdim for (int i = 0; i < VF; ++i) 647251607Sdim INVL.push_back(cast<Instruction>(VL[i])->getOperand(0)); 648251607Sdim Value *InVec = vectorizeTree_rec(INVL, VF); 649251607Sdim IRBuilder<> Builder(GetLastInstr(VL, VF)); 650251607Sdim CastInst *CI = dyn_cast<CastInst>(VL0); 651251607Sdim Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy); 652251607Sdim VectorizedValues[VL0] = V; 653251607Sdim return V; 654251607Sdim } 655251607Sdim case Instruction::Add: 656251607Sdim case Instruction::FAdd: 657251607Sdim case Instruction::Sub: 658251607Sdim case Instruction::FSub: 659251607Sdim case Instruction::Mul: 660251607Sdim case Instruction::FMul: 661251607Sdim case Instruction::UDiv: 662251607Sdim case Instruction::SDiv: 663251607Sdim case Instruction::FDiv: 664251607Sdim case Instruction::URem: 665251607Sdim case Instruction::SRem: 666251607Sdim case Instruction::FRem: 667251607Sdim case Instruction::Shl: 668251607Sdim case Instruction::LShr: 669251607Sdim case Instruction::AShr: 670251607Sdim case Instruction::And: 671251607Sdim case Instruction::Or: 672251607Sdim case Instruction::Xor: { 673251607Sdim ValueList LHSVL, RHSVL; 674251607Sdim for (int i = 0; i < VF; ++i) { 675251607Sdim RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0)); 676251607Sdim LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1)); 677251607Sdim } 678251607Sdim 679251607Sdim Value *RHS = vectorizeTree_rec(RHSVL, VF); 680251607Sdim Value *LHS = vectorizeTree_rec(LHSVL, VF); 681251607Sdim IRBuilder<> Builder(GetLastInstr(VL, VF)); 682251607Sdim BinaryOperator *BinOp = cast<BinaryOperator>(VL0); 683251607Sdim Value *V = Builder.CreateBinOp(BinOp->getOpcode(), RHS,LHS); 684251607Sdim VectorizedValues[VL0] = V; 685251607Sdim return V; 686251607Sdim } 687251607Sdim case Instruction::Load: { 688251607Sdim LoadInst *LI = cast<LoadInst>(VL0); 689251607Sdim unsigned Alignment = LI->getAlignment(); 690251607Sdim 691251607Sdim // Check if all of the loads are consecutive. 692251607Sdim for (unsigned i = 1, e = VF; i < e; ++i) 693251607Sdim if (!isConsecutiveAccess(VL[i-1], VL[i])) 694251607Sdim return Scalarize(VL, VecTy); 695251607Sdim 696251607Sdim IRBuilder<> Builder(GetLastInstr(VL, VF)); 697251607Sdim Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(), 698251607Sdim VecTy->getPointerTo()); 699251607Sdim LI = Builder.CreateLoad(VecPtr); 700251607Sdim LI->setAlignment(Alignment); 701251607Sdim VectorizedValues[VL0] = LI; 702251607Sdim return LI; 703251607Sdim } 704251607Sdim case Instruction::Store: { 705251607Sdim StoreInst *SI = cast<StoreInst>(VL0); 706251607Sdim unsigned Alignment = SI->getAlignment(); 707251607Sdim 708251607Sdim ValueList ValueOp; 709251607Sdim for (int i = 0; i < VF; ++i) 710251607Sdim ValueOp.push_back(cast<StoreInst>(VL[i])->getValueOperand()); 711251607Sdim 712251607Sdim Value *VecValue = vectorizeTree_rec(ValueOp, VF); 713251607Sdim 714251607Sdim IRBuilder<> Builder(GetLastInstr(VL, VF)); 715251607Sdim Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(), 716251607Sdim VecTy->getPointerTo()); 717251607Sdim Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment); 718251607Sdim 719251607Sdim for (int i = 0; i < VF; ++i) 720251607Sdim cast<Instruction>(VL[i])->eraseFromParent(); 721251607Sdim return 0; 722251607Sdim } 723251607Sdim default: 724251607Sdim Value *S = Scalarize(VL, VecTy); 725251607Sdim VectorizedValues[VL0] = S; 726251607Sdim return S; 727251607Sdim } 728251607Sdim} 729251607Sdim 730251607Sdim} // end of namespace 731