VecUtils.cpp revision 251607
191094Sdes//===- VecUtils.cpp --- Vectorization Utilities ---------------------------===// 292289Sdes// 391094Sdes// The LLVM Compiler Infrastructure 491094Sdes// 591094Sdes// This file is distributed under the University of Illinois Open Source 699158Sdes// License. See LICENSE.TXT for details. 799158Sdes// 899158Sdes//===----------------------------------------------------------------------===// 991094Sdes#define DEBUG_TYPE "SLP" 1091094Sdes 1191094Sdes#include "VecUtils.h" 1291094Sdes#include "llvm/ADT/DenseMap.h" 1391094Sdes#include "llvm/ADT/SmallPtrSet.h" 1491094Sdes#include "llvm/ADT/SmallSet.h" 1591094Sdes#include "llvm/ADT/SmallVector.h" 1691094Sdes#include "llvm/Analysis/AliasAnalysis.h" 1791094Sdes#include "llvm/Analysis/ScalarEvolution.h" 1891094Sdes#include "llvm/Analysis/ScalarEvolutionExpressions.h" 1991094Sdes#include "llvm/Analysis/TargetTransformInfo.h" 2091094Sdes#include "llvm/Analysis/Verifier.h" 2191094Sdes#include "llvm/Analysis/LoopInfo.h" 2291094Sdes#include "llvm/IR/Constants.h" 2391094Sdes#include "llvm/IR/DataLayout.h" 2491094Sdes#include "llvm/IR/Function.h" 2591094Sdes#include "llvm/IR/Instructions.h" 2691094Sdes#include "llvm/IR/Module.h" 2791094Sdes#include "llvm/IR/Type.h" 2891094Sdes#include "llvm/IR/Value.h" 2991094Sdes#include "llvm/Pass.h" 3091094Sdes#include "llvm/Support/CommandLine.h" 3191094Sdes#include "llvm/Support/Debug.h" 3291094Sdes#include "llvm/Support/raw_ostream.h" 3391094Sdes#include "llvm/Target/TargetLibraryInfo.h" 3499158Sdes#include "llvm/Transforms/Scalar.h" 35#include "llvm/Transforms/Utils/Local.h" 36#include <algorithm> 37#include <map> 38 39using namespace llvm; 40 41static const unsigned MinVecRegSize = 128; 42 43static const unsigned RecursionMaxDepth = 6; 44 45namespace llvm { 46 47BoUpSLP::BoUpSLP(BasicBlock *Bb, ScalarEvolution *S, DataLayout *Dl, 48 TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp) : 49 BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa), L(Lp) { 50 numberInstructions(); 51} 52 53void BoUpSLP::numberInstructions() { 54 int Loc = 0; 55 InstrIdx.clear(); 56 InstrVec.clear(); 57 // Number the instructions in the block. 58 for (BasicBlock::iterator it=BB->begin(), e=BB->end(); it != e; ++it) { 59 InstrIdx[it] = Loc++; 60 InstrVec.push_back(it); 61 assert(InstrVec[InstrIdx[it]] == it && "Invalid allocation"); 62 } 63} 64 65Value *BoUpSLP::getPointerOperand(Value *I) { 66 if (LoadInst *LI = dyn_cast<LoadInst>(I)) return LI->getPointerOperand(); 67 if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getPointerOperand(); 68 return 0; 69} 70 71unsigned BoUpSLP::getAddressSpaceOperand(Value *I) { 72 if (LoadInst *L=dyn_cast<LoadInst>(I)) return L->getPointerAddressSpace(); 73 if (StoreInst *S=dyn_cast<StoreInst>(I)) return S->getPointerAddressSpace(); 74 return -1; 75} 76 77bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) { 78 Value *PtrA = getPointerOperand(A); 79 Value *PtrB = getPointerOperand(B); 80 unsigned ASA = getAddressSpaceOperand(A); 81 unsigned ASB = getAddressSpaceOperand(B); 82 83 // Check that the address spaces match and that the pointers are valid. 84 if (!PtrA || !PtrB || (ASA != ASB)) return false; 85 86 // Check that A and B are of the same type. 87 if (PtrA->getType() != PtrB->getType()) return false; 88 89 // Calculate the distance. 90 const SCEV *PtrSCEVA = SE->getSCEV(PtrA); 91 const SCEV *PtrSCEVB = SE->getSCEV(PtrB); 92 const SCEV *OffsetSCEV = SE->getMinusSCEV(PtrSCEVA, PtrSCEVB); 93 const SCEVConstant *ConstOffSCEV = dyn_cast<SCEVConstant>(OffsetSCEV); 94 95 // Non constant distance. 96 if (!ConstOffSCEV) return false; 97 98 int64_t Offset = ConstOffSCEV->getValue()->getSExtValue(); 99 Type *Ty = cast<PointerType>(PtrA->getType())->getElementType(); 100 // The Instructions are connsecutive if the size of the first load/store is 101 // the same as the offset. 102 int64_t Sz = DL->getTypeStoreSize(Ty); 103 return ((-Offset) == Sz); 104} 105 106bool BoUpSLP::vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold) { 107 Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType(); 108 unsigned Sz = DL->getTypeSizeInBits(StoreTy); 109 unsigned VF = MinVecRegSize / Sz; 110 111 if (!isPowerOf2_32(Sz) || VF < 2) return false; 112 113 bool Changed = false; 114 // Look for profitable vectorizable trees at all offsets, starting at zero. 115 for (unsigned i = 0, e = Chain.size(); i < e; ++i) { 116 if (i + VF > e) return Changed; 117 DEBUG(dbgs()<<"SLP: Analyzing " << VF << " stores at offset "<< i << "\n"); 118 ArrayRef<Value *> Operands = Chain.slice(i, VF); 119 120 int Cost = getTreeCost(Operands); 121 DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n"); 122 if (Cost < CostThreshold) { 123 DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n"); 124 vectorizeTree(Operands, VF); 125 i += VF - 1; 126 Changed = true; 127 } 128 } 129 130 return Changed; 131} 132 133bool BoUpSLP::vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold) { 134 ValueSet Heads, Tails; 135 SmallDenseMap<Value*, Value*> ConsecutiveChain; 136 137 // We may run into multiple chains that merge into a single chain. We mark the 138 // stores that we vectorized so that we don't visit the same store twice. 139 ValueSet VectorizedStores; 140 bool Changed = false; 141 142 // Do a quadratic search on all of the given stores and find 143 // all of the pairs of loads that follow each other. 144 for (unsigned i = 0, e = Stores.size(); i < e; ++i) 145 for (unsigned j = 0; j < e; ++j) { 146 if (i == j) continue; 147 if (isConsecutiveAccess(Stores[i], Stores[j])) { 148 Tails.insert(Stores[j]); 149 Heads.insert(Stores[i]); 150 ConsecutiveChain[Stores[i]] = Stores[j]; 151 } 152 } 153 154 // For stores that start but don't end a link in the chain: 155 for (ValueSet::iterator it = Heads.begin(), e = Heads.end();it != e; ++it) { 156 if (Tails.count(*it)) continue; 157 158 // We found a store instr that starts a chain. Now follow the chain and try 159 // to vectorize it. 160 ValueList Operands; 161 Value *I = *it; 162 // Collect the chain into a list. 163 while (Tails.count(I) || Heads.count(I)) { 164 if (VectorizedStores.count(I)) break; 165 Operands.push_back(I); 166 // Move to the next value in the chain. 167 I = ConsecutiveChain[I]; 168 } 169 170 bool Vectorized = vectorizeStoreChain(Operands, costThreshold); 171 172 // Mark the vectorized stores so that we don't vectorize them again. 173 if (Vectorized) 174 VectorizedStores.insert(Operands.begin(), Operands.end()); 175 Changed |= Vectorized; 176 } 177 178 return Changed; 179} 180 181int BoUpSLP::getScalarizationCost(ArrayRef<Value *> VL) { 182 // Find the type of the operands in VL. 183 Type *ScalarTy = VL[0]->getType(); 184 if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) 185 ScalarTy = SI->getValueOperand()->getType(); 186 VectorType *VecTy = VectorType::get(ScalarTy, VL.size()); 187 // Find the cost of inserting/extracting values from the vector. 188 return getScalarizationCost(VecTy); 189} 190 191int BoUpSLP::getScalarizationCost(Type *Ty) { 192 int Cost = 0; 193 for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i) 194 Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i); 195 return Cost; 196} 197 198AliasAnalysis::Location BoUpSLP::getLocation(Instruction *I) { 199 if (StoreInst *SI = dyn_cast<StoreInst>(I)) return AA->getLocation(SI); 200 if (LoadInst *LI = dyn_cast<LoadInst>(I)) return AA->getLocation(LI); 201 return AliasAnalysis::Location(); 202} 203 204Value *BoUpSLP::isUnsafeToSink(Instruction *Src, Instruction *Dst) { 205 assert(Src->getParent() == Dst->getParent() && "Not the same BB"); 206 BasicBlock::iterator I = Src, E = Dst; 207 /// Scan all of the instruction from SRC to DST and check if 208 /// the source may alias. 209 for (++I; I != E; ++I) { 210 // Ignore store instructions that are marked as 'ignore'. 211 if (MemBarrierIgnoreList.count(I)) continue; 212 if (Src->mayWriteToMemory()) /* Write */ { 213 if (!I->mayReadOrWriteMemory()) continue; 214 } else /* Read */ { 215 if (!I->mayWriteToMemory()) continue; 216 } 217 AliasAnalysis::Location A = getLocation(&*I); 218 AliasAnalysis::Location B = getLocation(Src); 219 220 if (!A.Ptr || !B.Ptr || AA->alias(A, B)) 221 return I; 222 } 223 return 0; 224} 225 226void BoUpSLP::vectorizeArith(ArrayRef<Value *> Operands) { 227 Value *Vec = vectorizeTree(Operands, Operands.size()); 228 BasicBlock::iterator Loc = cast<Instruction>(Vec); 229 IRBuilder<> Builder(++Loc); 230 // After vectorizing the operands we need to generate extractelement 231 // instructions and replace all of the uses of the scalar values with 232 // the values that we extracted from the vectorized tree. 233 for (unsigned i = 0, e = Operands.size(); i != e; ++i) { 234 Value *S = Builder.CreateExtractElement(Vec, Builder.getInt32(i)); 235 Operands[i]->replaceAllUsesWith(S); 236 } 237} 238 239int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) { 240 // Get rid of the list of stores that were removed, and from the 241 // lists of instructions with multiple users. 242 MemBarrierIgnoreList.clear(); 243 LaneMap.clear(); 244 MultiUserVals.clear(); 245 MustScalarize.clear(); 246 247 // Scan the tree and find which value is used by which lane, and which values 248 // must be scalarized. 249 getTreeUses_rec(VL, 0); 250 251 // Check that instructions with multiple users can be vectorized. Mark unsafe 252 // instructions. 253 for (ValueSet::iterator it = MultiUserVals.begin(), 254 e = MultiUserVals.end(); it != e; ++it) { 255 // Check that all of the users of this instr are within the tree 256 // and that they are all from the same lane. 257 int Lane = -1; 258 for (Value::use_iterator I = (*it)->use_begin(), E = (*it)->use_end(); 259 I != E; ++I) { 260 if (LaneMap.find(*I) == LaneMap.end()) { 261 MustScalarize.insert(*it); 262 DEBUG(dbgs()<<"SLP: Adding " << **it << 263 " to MustScalarize because of an out of tree usage.\n"); 264 break; 265 } 266 if (Lane == -1) Lane = LaneMap[*I]; 267 if (Lane != LaneMap[*I]) { 268 MustScalarize.insert(*it); 269 DEBUG(dbgs()<<"Adding " << **it << 270 " to MustScalarize because multiple lane use it: " 271 << Lane << " and " << LaneMap[*I] << ".\n"); 272 break; 273 } 274 } 275 } 276 277 // Now calculate the cost of vectorizing the tree. 278 return getTreeCost_rec(VL, 0); 279} 280 281void BoUpSLP::getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth) { 282 if (Depth == RecursionMaxDepth) return; 283 284 // Don't handle vectors. 285 if (VL[0]->getType()->isVectorTy()) return; 286 if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) 287 if (SI->getValueOperand()->getType()->isVectorTy()) return; 288 289 // Check if all of the operands are constants. 290 bool AllConst = true; 291 bool AllSameScalar = true; 292 for (unsigned i = 0, e = VL.size(); i < e; ++i) { 293 AllConst &= isa<Constant>(VL[i]); 294 AllSameScalar &= (VL[0] == VL[i]); 295 Instruction *I = dyn_cast<Instruction>(VL[i]); 296 // If one of the instructions is out of this BB, we need to scalarize all. 297 if (I && I->getParent() != BB) return; 298 } 299 300 // If all of the operands are identical or constant we have a simple solution. 301 if (AllConst || AllSameScalar) return; 302 303 // Scalarize unknown structures. 304 Instruction *VL0 = dyn_cast<Instruction>(VL[0]); 305 if (!VL0) return; 306 307 unsigned Opcode = VL0->getOpcode(); 308 for (unsigned i = 0, e = VL.size(); i < e; ++i) { 309 Instruction *I = dyn_cast<Instruction>(VL[i]); 310 // If not all of the instructions are identical then we have to scalarize. 311 if (!I || Opcode != I->getOpcode()) return; 312 } 313 314 // Mark instructions with multiple users. 315 for (unsigned i = 0, e = VL.size(); i < e; ++i) { 316 Instruction *I = dyn_cast<Instruction>(VL[i]); 317 // Remember to check if all of the users of this instr are vectorized 318 // within our tree. 319 if (I && I->getNumUses() > 1) MultiUserVals.insert(I); 320 } 321 322 for (int i = 0, e = VL.size(); i < e; ++i) { 323 // Check that the instruction is only used within 324 // one lane. 325 if (LaneMap.count(VL[i]) && LaneMap[VL[i]] != i) return; 326 // Make this instruction as 'seen' and remember the lane. 327 LaneMap[VL[i]] = i; 328 } 329 330 switch (Opcode) { 331 case Instruction::ZExt: 332 case Instruction::SExt: 333 case Instruction::FPToUI: 334 case Instruction::FPToSI: 335 case Instruction::FPExt: 336 case Instruction::PtrToInt: 337 case Instruction::IntToPtr: 338 case Instruction::SIToFP: 339 case Instruction::UIToFP: 340 case Instruction::Trunc: 341 case Instruction::FPTrunc: 342 case Instruction::BitCast: 343 case Instruction::Add: 344 case Instruction::FAdd: 345 case Instruction::Sub: 346 case Instruction::FSub: 347 case Instruction::Mul: 348 case Instruction::FMul: 349 case Instruction::UDiv: 350 case Instruction::SDiv: 351 case Instruction::FDiv: 352 case Instruction::URem: 353 case Instruction::SRem: 354 case Instruction::FRem: 355 case Instruction::Shl: 356 case Instruction::LShr: 357 case Instruction::AShr: 358 case Instruction::And: 359 case Instruction::Or: 360 case Instruction::Xor: { 361 for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { 362 ValueList Operands; 363 // Prepare the operand vector. 364 for (unsigned j = 0; j < VL.size(); ++j) 365 Operands.push_back(cast<Instruction>(VL[j])->getOperand(i)); 366 367 getTreeUses_rec(Operands, Depth+1); 368 } 369 return; 370 } 371 case Instruction::Store: { 372 ValueList Operands; 373 for (unsigned j = 0; j < VL.size(); ++j) 374 Operands.push_back(cast<Instruction>(VL[j])->getOperand(0)); 375 getTreeUses_rec(Operands, Depth+1); 376 return; 377 } 378 default: 379 return; 380 } 381} 382 383int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) { 384 Type *ScalarTy = VL[0]->getType(); 385 386 if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) 387 ScalarTy = SI->getValueOperand()->getType(); 388 389 /// Don't mess with vectors. 390 if (ScalarTy->isVectorTy()) return max_cost; 391 VectorType *VecTy = VectorType::get(ScalarTy, VL.size()); 392 393 if (Depth == RecursionMaxDepth) return getScalarizationCost(VecTy); 394 395 // Check if all of the operands are constants. 396 bool AllConst = true; 397 bool AllSameScalar = true; 398 bool MustScalarizeFlag = false; 399 for (unsigned i = 0, e = VL.size(); i < e; ++i) { 400 AllConst &= isa<Constant>(VL[i]); 401 AllSameScalar &= (VL[0] == VL[i]); 402 // Must have a single use. 403 Instruction *I = dyn_cast<Instruction>(VL[i]); 404 MustScalarizeFlag |= MustScalarize.count(VL[i]); 405 // This instruction is outside the basic block. 406 if (I && I->getParent() != BB) 407 return getScalarizationCost(VecTy); 408 } 409 410 // Is this a simple vector constant. 411 if (AllConst) return 0; 412 413 // If all of the operands are identical we can broadcast them. 414 Instruction *VL0 = dyn_cast<Instruction>(VL[0]); 415 if (AllSameScalar) { 416 // If we are in a loop, and this is not an instruction (e.g. constant or 417 // argument) or the instruction is defined outside the loop then assume 418 // that the cost is zero. 419 if (L && (!VL0 || !L->contains(VL0))) 420 return 0; 421 422 // We need to broadcast the scalar. 423 return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0); 424 } 425 426 // If this is not a constant, or a scalar from outside the loop then we 427 // need to scalarize it. 428 if (MustScalarizeFlag) 429 return getScalarizationCost(VecTy); 430 431 if (!VL0) return getScalarizationCost(VecTy); 432 assert(VL0->getParent() == BB && "Wrong BB"); 433 434 unsigned Opcode = VL0->getOpcode(); 435 for (unsigned i = 0, e = VL.size(); i < e; ++i) { 436 Instruction *I = dyn_cast<Instruction>(VL[i]); 437 // If not all of the instructions are identical then we have to scalarize. 438 if (!I || Opcode != I->getOpcode()) return getScalarizationCost(VecTy); 439 } 440 441 // Check if it is safe to sink the loads or the stores. 442 if (Opcode == Instruction::Load || Opcode == Instruction::Store) { 443 int MaxIdx = InstrIdx[VL0]; 444 for (unsigned i = 1, e = VL.size(); i < e; ++i ) 445 MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]); 446 447 Instruction *Last = InstrVec[MaxIdx]; 448 for (unsigned i = 0, e = VL.size(); i < e; ++i ) { 449 if (VL[i] == Last) continue; 450 Value *Barrier = isUnsafeToSink(cast<Instruction>(VL[i]), Last); 451 if (Barrier) { 452 DEBUG(dbgs() << "SLP: Can't sink " << *VL[i] << "\n down to " << 453 *Last << "\n because of " << *Barrier << "\n"); 454 return max_cost; 455 } 456 } 457 } 458 459 switch (Opcode) { 460 case Instruction::ZExt: 461 case Instruction::SExt: 462 case Instruction::FPToUI: 463 case Instruction::FPToSI: 464 case Instruction::FPExt: 465 case Instruction::PtrToInt: 466 case Instruction::IntToPtr: 467 case Instruction::SIToFP: 468 case Instruction::UIToFP: 469 case Instruction::Trunc: 470 case Instruction::FPTrunc: 471 case Instruction::BitCast: { 472 int Cost = 0; 473 ValueList Operands; 474 Type *SrcTy = VL0->getOperand(0)->getType(); 475 // Prepare the operand vector. 476 for (unsigned j = 0; j < VL.size(); ++j) { 477 Operands.push_back(cast<Instruction>(VL[j])->getOperand(0)); 478 // Check that the casted type is the same for all users. 479 if (cast<Instruction>(VL[j])->getOperand(0)->getType() != SrcTy) 480 return getScalarizationCost(VecTy); 481 } 482 483 Cost += getTreeCost_rec(Operands, Depth+1); 484 if (Cost >= max_cost) return max_cost; 485 486 // Calculate the cost of this instruction. 487 int ScalarCost = VL.size() * TTI->getCastInstrCost(VL0->getOpcode(), 488 VL0->getType(), SrcTy); 489 490 VectorType *SrcVecTy = VectorType::get(SrcTy, VL.size()); 491 int VecCost = TTI->getCastInstrCost(VL0->getOpcode(), VecTy, SrcVecTy); 492 Cost += (VecCost - ScalarCost); 493 return Cost; 494 } 495 case Instruction::Add: 496 case Instruction::FAdd: 497 case Instruction::Sub: 498 case Instruction::FSub: 499 case Instruction::Mul: 500 case Instruction::FMul: 501 case Instruction::UDiv: 502 case Instruction::SDiv: 503 case Instruction::FDiv: 504 case Instruction::URem: 505 case Instruction::SRem: 506 case Instruction::FRem: 507 case Instruction::Shl: 508 case Instruction::LShr: 509 case Instruction::AShr: 510 case Instruction::And: 511 case Instruction::Or: 512 case Instruction::Xor: { 513 int Cost = 0; 514 // Calculate the cost of all of the operands. 515 for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { 516 ValueList Operands; 517 // Prepare the operand vector. 518 for (unsigned j = 0; j < VL.size(); ++j) 519 Operands.push_back(cast<Instruction>(VL[j])->getOperand(i)); 520 521 Cost += getTreeCost_rec(Operands, Depth+1); 522 if (Cost >= max_cost) return max_cost; 523 } 524 525 // Calculate the cost of this instruction. 526 int ScalarCost = VecTy->getNumElements() * 527 TTI->getArithmeticInstrCost(Opcode, ScalarTy); 528 529 int VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy); 530 Cost += (VecCost - ScalarCost); 531 return Cost; 532 } 533 case Instruction::Load: { 534 // If we are scalarize the loads, add the cost of forming the vector. 535 for (unsigned i = 0, e = VL.size()-1; i < e; ++i) 536 if (!isConsecutiveAccess(VL[i], VL[i+1])) 537 return getScalarizationCost(VecTy); 538 539 // Cost of wide load - cost of scalar loads. 540 int ScalarLdCost = VecTy->getNumElements() * 541 TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0); 542 int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0); 543 return VecLdCost - ScalarLdCost; 544 } 545 case Instruction::Store: { 546 // We know that we can merge the stores. Calculate the cost. 547 int ScalarStCost = VecTy->getNumElements() * 548 TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0); 549 int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1,0); 550 int StoreCost = VecStCost - ScalarStCost; 551 552 ValueList Operands; 553 for (unsigned j = 0; j < VL.size(); ++j) { 554 Operands.push_back(cast<Instruction>(VL[j])->getOperand(0)); 555 MemBarrierIgnoreList.insert(VL[j]); 556 } 557 558 int TotalCost = StoreCost + getTreeCost_rec(Operands, Depth + 1); 559 return TotalCost; 560 } 561 default: 562 // Unable to vectorize unknown instructions. 563 return getScalarizationCost(VecTy); 564 } 565} 566 567Instruction *BoUpSLP::GetLastInstr(ArrayRef<Value *> VL, unsigned VF) { 568 int MaxIdx = InstrIdx[BB->getFirstNonPHI()]; 569 for (unsigned i = 0; i < VF; ++i ) 570 MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]); 571 return InstrVec[MaxIdx + 1]; 572} 573 574Value *BoUpSLP::Scalarize(ArrayRef<Value *> VL, VectorType *Ty) { 575 IRBuilder<> Builder(GetLastInstr(VL, Ty->getNumElements())); 576 Value *Vec = UndefValue::get(Ty); 577 for (unsigned i=0; i < Ty->getNumElements(); ++i) { 578 // Generate the 'InsertElement' instruction. 579 Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i)); 580 // Remember that this instruction is used as part of a 'gather' sequence. 581 // The caller of the bottom-up slp vectorizer can try to hoist the sequence 582 // if the users are outside of the basic block. 583 GatherInstructions.push_back(Vec); 584 } 585 586 return Vec; 587} 588 589Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) { 590 Value *V = vectorizeTree_rec(VL, VF); 591 // We moved some instructions around. We have to number them again 592 // before we can do any analysis. 593 numberInstructions(); 594 MustScalarize.clear(); 595 return V; 596} 597 598Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) { 599 Type *ScalarTy = VL[0]->getType(); 600 if (StoreInst *SI = dyn_cast<StoreInst>(VL[0])) 601 ScalarTy = SI->getValueOperand()->getType(); 602 VectorType *VecTy = VectorType::get(ScalarTy, VF); 603 604 // Check if all of the operands are constants or identical. 605 bool AllConst = true; 606 bool AllSameScalar = true; 607 for (unsigned i = 0, e = VF; i < e; ++i) { 608 AllConst &= isa<Constant>(VL[i]); 609 AllSameScalar &= (VL[0] == VL[i]); 610 // The instruction must be in the same BB, and it must be vectorizable. 611 Instruction *I = dyn_cast<Instruction>(VL[i]); 612 if (MustScalarize.count(VL[i]) || (I && I->getParent() != BB)) 613 return Scalarize(VL, VecTy); 614 } 615 616 // Check that this is a simple vector constant. 617 if (AllConst || AllSameScalar) return Scalarize(VL, VecTy); 618 619 // Scalarize unknown structures. 620 Instruction *VL0 = dyn_cast<Instruction>(VL[0]); 621 if (!VL0) return Scalarize(VL, VecTy); 622 623 if (VectorizedValues.count(VL0)) return VectorizedValues[VL0]; 624 625 unsigned Opcode = VL0->getOpcode(); 626 for (unsigned i = 0, e = VF; i < e; ++i) { 627 Instruction *I = dyn_cast<Instruction>(VL[i]); 628 // If not all of the instructions are identical then we have to scalarize. 629 if (!I || Opcode != I->getOpcode()) return Scalarize(VL, VecTy); 630 } 631 632 switch (Opcode) { 633 case Instruction::ZExt: 634 case Instruction::SExt: 635 case Instruction::FPToUI: 636 case Instruction::FPToSI: 637 case Instruction::FPExt: 638 case Instruction::PtrToInt: 639 case Instruction::IntToPtr: 640 case Instruction::SIToFP: 641 case Instruction::UIToFP: 642 case Instruction::Trunc: 643 case Instruction::FPTrunc: 644 case Instruction::BitCast: { 645 ValueList INVL; 646 for (int i = 0; i < VF; ++i) 647 INVL.push_back(cast<Instruction>(VL[i])->getOperand(0)); 648 Value *InVec = vectorizeTree_rec(INVL, VF); 649 IRBuilder<> Builder(GetLastInstr(VL, VF)); 650 CastInst *CI = dyn_cast<CastInst>(VL0); 651 Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy); 652 VectorizedValues[VL0] = V; 653 return V; 654 } 655 case Instruction::Add: 656 case Instruction::FAdd: 657 case Instruction::Sub: 658 case Instruction::FSub: 659 case Instruction::Mul: 660 case Instruction::FMul: 661 case Instruction::UDiv: 662 case Instruction::SDiv: 663 case Instruction::FDiv: 664 case Instruction::URem: 665 case Instruction::SRem: 666 case Instruction::FRem: 667 case Instruction::Shl: 668 case Instruction::LShr: 669 case Instruction::AShr: 670 case Instruction::And: 671 case Instruction::Or: 672 case Instruction::Xor: { 673 ValueList LHSVL, RHSVL; 674 for (int i = 0; i < VF; ++i) { 675 RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0)); 676 LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1)); 677 } 678 679 Value *RHS = vectorizeTree_rec(RHSVL, VF); 680 Value *LHS = vectorizeTree_rec(LHSVL, VF); 681 IRBuilder<> Builder(GetLastInstr(VL, VF)); 682 BinaryOperator *BinOp = cast<BinaryOperator>(VL0); 683 Value *V = Builder.CreateBinOp(BinOp->getOpcode(), RHS,LHS); 684 VectorizedValues[VL0] = V; 685 return V; 686 } 687 case Instruction::Load: { 688 LoadInst *LI = cast<LoadInst>(VL0); 689 unsigned Alignment = LI->getAlignment(); 690 691 // Check if all of the loads are consecutive. 692 for (unsigned i = 1, e = VF; i < e; ++i) 693 if (!isConsecutiveAccess(VL[i-1], VL[i])) 694 return Scalarize(VL, VecTy); 695 696 IRBuilder<> Builder(GetLastInstr(VL, VF)); 697 Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(), 698 VecTy->getPointerTo()); 699 LI = Builder.CreateLoad(VecPtr); 700 LI->setAlignment(Alignment); 701 VectorizedValues[VL0] = LI; 702 return LI; 703 } 704 case Instruction::Store: { 705 StoreInst *SI = cast<StoreInst>(VL0); 706 unsigned Alignment = SI->getAlignment(); 707 708 ValueList ValueOp; 709 for (int i = 0; i < VF; ++i) 710 ValueOp.push_back(cast<StoreInst>(VL[i])->getValueOperand()); 711 712 Value *VecValue = vectorizeTree_rec(ValueOp, VF); 713 714 IRBuilder<> Builder(GetLastInstr(VL, VF)); 715 Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(), 716 VecTy->getPointerTo()); 717 Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment); 718 719 for (int i = 0; i < VF; ++i) 720 cast<Instruction>(VL[i])->eraseFromParent(); 721 return 0; 722 } 723 default: 724 Value *S = Scalarize(VL, VecTy); 725 VectorizedValues[VL0] = S; 726 return S; 727 } 728} 729 730} // end of namespace 731