1//===- SROA.cpp - Scalar Replacement Of Aggregates ------------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9/// \file 10/// This transformation implements the well known scalar replacement of 11/// aggregates transformation. It tries to identify promotable elements of an 12/// aggregate alloca, and promote them to registers. It will also try to 13/// convert uses of an element (or set of elements) of an alloca into a vector 14/// or bitfield-style integer scalar if appropriate. 15/// 16/// It works to do this with minimal slicing of the alloca so that regions 17/// which are merely transferred in and out of external memory remain unchanged 18/// and are not decomposed to scalar code. 19/// 20/// Because this also performs alloca promotion, it can be thought of as also 21/// serving the purpose of SSA formation. The algorithm iterates on the 22/// function until all opportunities for promotion have been realized. 23/// 24//===----------------------------------------------------------------------===// 25 26#define DEBUG_TYPE "sroa" 27#include "llvm/Transforms/Scalar.h" 28#include "llvm/Constants.h" 29#include "llvm/DIBuilder.h" 30#include "llvm/DebugInfo.h" 31#include "llvm/DerivedTypes.h" 32#include "llvm/Function.h" 33#include "llvm/IRBuilder.h" 34#include "llvm/Instructions.h" 35#include "llvm/IntrinsicInst.h" 36#include "llvm/LLVMContext.h" 37#include "llvm/Module.h" 38#include "llvm/Operator.h" 39#include "llvm/Pass.h" 40#include "llvm/ADT/SetVector.h" 41#include "llvm/ADT/SmallVector.h" 42#include "llvm/ADT/Statistic.h" 43#include "llvm/ADT/STLExtras.h" 44#include "llvm/Analysis/Dominators.h" 45#include "llvm/Analysis/Loads.h" 46#include "llvm/Analysis/ValueTracking.h" 47#include "llvm/Support/CommandLine.h" 48#include "llvm/Support/Debug.h" 49#include "llvm/Support/ErrorHandling.h" 50#include "llvm/Support/GetElementPtrTypeIterator.h" 51#include "llvm/Support/InstVisitor.h" 52#include "llvm/Support/MathExtras.h" 53#include "llvm/Support/raw_ostream.h" 54#include "llvm/Target/TargetData.h" 55#include "llvm/Transforms/Utils/Local.h" 56#include "llvm/Transforms/Utils/PromoteMemToReg.h" 57#include "llvm/Transforms/Utils/SSAUpdater.h" 58using namespace llvm; 59 60STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement"); 61STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced"); 62STATISTIC(NumPromoted, "Number of allocas promoted to SSA values"); 63STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion"); 64STATISTIC(NumDeleted, "Number of instructions deleted"); 65STATISTIC(NumVectorized, "Number of vectorized aggregates"); 66 67/// Hidden option to force the pass to not use DomTree and mem2reg, instead 68/// forming SSA values through the SSAUpdater infrastructure. 69static cl::opt<bool> 70ForceSSAUpdater("force-ssa-updater", cl::init(false), cl::Hidden); 71 72namespace { 73/// \brief Alloca partitioning representation. 74/// 75/// This class represents a partitioning of an alloca into slices, and 76/// information about the nature of uses of each slice of the alloca. The goal 77/// is that this information is sufficient to decide if and how to split the 78/// alloca apart and replace slices with scalars. It is also intended that this 79/// structure can capture the relevant information needed both to decide about 80/// and to enact these transformations. 81class AllocaPartitioning { 82public: 83 /// \brief A common base class for representing a half-open byte range. 84 struct ByteRange { 85 /// \brief The beginning offset of the range. 86 uint64_t BeginOffset; 87 88 /// \brief The ending offset, not included in the range. 89 uint64_t EndOffset; 90 91 ByteRange() : BeginOffset(), EndOffset() {} 92 ByteRange(uint64_t BeginOffset, uint64_t EndOffset) 93 : BeginOffset(BeginOffset), EndOffset(EndOffset) {} 94 95 /// \brief Support for ordering ranges. 96 /// 97 /// This provides an ordering over ranges such that start offsets are 98 /// always increasing, and within equal start offsets, the end offsets are 99 /// decreasing. Thus the spanning range comes first in a cluster with the 100 /// same start position. 101 bool operator<(const ByteRange &RHS) const { 102 if (BeginOffset < RHS.BeginOffset) return true; 103 if (BeginOffset > RHS.BeginOffset) return false; 104 if (EndOffset > RHS.EndOffset) return true; 105 return false; 106 } 107 108 /// \brief Support comparison with a single offset to allow binary searches. 109 friend bool operator<(const ByteRange &LHS, uint64_t RHSOffset) { 110 return LHS.BeginOffset < RHSOffset; 111 } 112 113 friend LLVM_ATTRIBUTE_UNUSED bool operator<(uint64_t LHSOffset, 114 const ByteRange &RHS) { 115 return LHSOffset < RHS.BeginOffset; 116 } 117 118 bool operator==(const ByteRange &RHS) const { 119 return BeginOffset == RHS.BeginOffset && EndOffset == RHS.EndOffset; 120 } 121 bool operator!=(const ByteRange &RHS) const { return !operator==(RHS); } 122 }; 123 124 /// \brief A partition of an alloca. 125 /// 126 /// This structure represents a contiguous partition of the alloca. These are 127 /// formed by examining the uses of the alloca. During formation, they may 128 /// overlap but once an AllocaPartitioning is built, the Partitions within it 129 /// are all disjoint. 130 struct Partition : public ByteRange { 131 /// \brief Whether this partition is splittable into smaller partitions. 132 /// 133 /// We flag partitions as splittable when they are formed entirely due to 134 /// accesses by trivially splittable operations such as memset and memcpy. 135 /// 136 /// FIXME: At some point we should consider loads and stores of FCAs to be 137 /// splittable and eagerly split them into scalar values. 138 bool IsSplittable; 139 140 Partition() : ByteRange(), IsSplittable() {} 141 Partition(uint64_t BeginOffset, uint64_t EndOffset, bool IsSplittable) 142 : ByteRange(BeginOffset, EndOffset), IsSplittable(IsSplittable) {} 143 }; 144 145 /// \brief A particular use of a partition of the alloca. 146 /// 147 /// This structure is used to associate uses of a partition with it. They 148 /// mark the range of bytes which are referenced by a particular instruction, 149 /// and includes a handle to the user itself and the pointer value in use. 150 /// The bounds of these uses are determined by intersecting the bounds of the 151 /// memory use itself with a particular partition. As a consequence there is 152 /// intentionally overlap between various uses of the same partition. 153 struct PartitionUse : public ByteRange { 154 /// \brief The use in question. Provides access to both user and used value. 155 Use* U; 156 157 PartitionUse() : ByteRange(), U() {} 158 PartitionUse(uint64_t BeginOffset, uint64_t EndOffset, Use *U) 159 : ByteRange(BeginOffset, EndOffset), U(U) {} 160 }; 161 162 /// \brief Construct a partitioning of a particular alloca. 163 /// 164 /// Construction does most of the work for partitioning the alloca. This 165 /// performs the necessary walks of users and builds a partitioning from it. 166 AllocaPartitioning(const TargetData &TD, AllocaInst &AI); 167 168 /// \brief Test whether a pointer to the allocation escapes our analysis. 169 /// 170 /// If this is true, the partitioning is never fully built and should be 171 /// ignored. 172 bool isEscaped() const { return PointerEscapingInstr; } 173 174 /// \brief Support for iterating over the partitions. 175 /// @{ 176 typedef SmallVectorImpl<Partition>::iterator iterator; 177 iterator begin() { return Partitions.begin(); } 178 iterator end() { return Partitions.end(); } 179 180 typedef SmallVectorImpl<Partition>::const_iterator const_iterator; 181 const_iterator begin() const { return Partitions.begin(); } 182 const_iterator end() const { return Partitions.end(); } 183 /// @} 184 185 /// \brief Support for iterating over and manipulating a particular 186 /// partition's uses. 187 /// 188 /// The iteration support provided for uses is more limited, but also 189 /// includes some manipulation routines to support rewriting the uses of 190 /// partitions during SROA. 191 /// @{ 192 typedef SmallVectorImpl<PartitionUse>::iterator use_iterator; 193 use_iterator use_begin(unsigned Idx) { return Uses[Idx].begin(); } 194 use_iterator use_begin(const_iterator I) { return Uses[I - begin()].begin(); } 195 use_iterator use_end(unsigned Idx) { return Uses[Idx].end(); } 196 use_iterator use_end(const_iterator I) { return Uses[I - begin()].end(); } 197 void use_push_back(unsigned Idx, const PartitionUse &PU) { 198 Uses[Idx].push_back(PU); 199 } 200 void use_push_back(const_iterator I, const PartitionUse &PU) { 201 Uses[I - begin()].push_back(PU); 202 } 203 void use_erase(unsigned Idx, use_iterator UI) { Uses[Idx].erase(UI); } 204 void use_erase(const_iterator I, use_iterator UI) { 205 Uses[I - begin()].erase(UI); 206 } 207 208 typedef SmallVectorImpl<PartitionUse>::const_iterator const_use_iterator; 209 const_use_iterator use_begin(unsigned Idx) const { return Uses[Idx].begin(); } 210 const_use_iterator use_begin(const_iterator I) const { 211 return Uses[I - begin()].begin(); 212 } 213 const_use_iterator use_end(unsigned Idx) const { return Uses[Idx].end(); } 214 const_use_iterator use_end(const_iterator I) const { 215 return Uses[I - begin()].end(); 216 } 217 /// @} 218 219 /// \brief Allow iterating the dead users for this alloca. 220 /// 221 /// These are instructions which will never actually use the alloca as they 222 /// are outside the allocated range. They are safe to replace with undef and 223 /// delete. 224 /// @{ 225 typedef SmallVectorImpl<Instruction *>::const_iterator dead_user_iterator; 226 dead_user_iterator dead_user_begin() const { return DeadUsers.begin(); } 227 dead_user_iterator dead_user_end() const { return DeadUsers.end(); } 228 /// @} 229 230 /// \brief Allow iterating the dead expressions referring to this alloca. 231 /// 232 /// These are operands which have cannot actually be used to refer to the 233 /// alloca as they are outside its range and the user doesn't correct for 234 /// that. These mostly consist of PHI node inputs and the like which we just 235 /// need to replace with undef. 236 /// @{ 237 typedef SmallVectorImpl<Use *>::const_iterator dead_op_iterator; 238 dead_op_iterator dead_op_begin() const { return DeadOperands.begin(); } 239 dead_op_iterator dead_op_end() const { return DeadOperands.end(); } 240 /// @} 241 242 /// \brief MemTransferInst auxiliary data. 243 /// This struct provides some auxiliary data about memory transfer 244 /// intrinsics such as memcpy and memmove. These intrinsics can use two 245 /// different ranges within the same alloca, and provide other challenges to 246 /// correctly represent. We stash extra data to help us untangle this 247 /// after the partitioning is complete. 248 struct MemTransferOffsets { 249 uint64_t DestBegin, DestEnd; 250 uint64_t SourceBegin, SourceEnd; 251 bool IsSplittable; 252 }; 253 MemTransferOffsets getMemTransferOffsets(MemTransferInst &II) const { 254 return MemTransferInstData.lookup(&II); 255 } 256 257 /// \brief Map from a PHI or select operand back to a partition. 258 /// 259 /// When manipulating PHI nodes or selects, they can use more than one 260 /// partition of an alloca. We store a special mapping to allow finding the 261 /// partition referenced by each of these operands, if any. 262 iterator findPartitionForPHIOrSelectOperand(Use *U) { 263 SmallDenseMap<Use *, std::pair<unsigned, unsigned> >::const_iterator MapIt 264 = PHIOrSelectOpMap.find(U); 265 if (MapIt == PHIOrSelectOpMap.end()) 266 return end(); 267 268 return begin() + MapIt->second.first; 269 } 270 271 /// \brief Map from a PHI or select operand back to the specific use of 272 /// a partition. 273 /// 274 /// Similar to mapping these operands back to the partitions, this maps 275 /// directly to the use structure of that partition. 276 use_iterator findPartitionUseForPHIOrSelectOperand(Use *U) { 277 SmallDenseMap<Use *, std::pair<unsigned, unsigned> >::const_iterator MapIt 278 = PHIOrSelectOpMap.find(U); 279 assert(MapIt != PHIOrSelectOpMap.end()); 280 return Uses[MapIt->second.first].begin() + MapIt->second.second; 281 } 282 283 /// \brief Compute a common type among the uses of a particular partition. 284 /// 285 /// This routines walks all of the uses of a particular partition and tries 286 /// to find a common type between them. Untyped operations such as memset and 287 /// memcpy are ignored. 288 Type *getCommonType(iterator I) const; 289 290#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 291 void print(raw_ostream &OS, const_iterator I, StringRef Indent = " ") const; 292 void printUsers(raw_ostream &OS, const_iterator I, 293 StringRef Indent = " ") const; 294 void print(raw_ostream &OS) const; 295 void LLVM_ATTRIBUTE_NOINLINE LLVM_ATTRIBUTE_USED dump(const_iterator I) const; 296 void LLVM_ATTRIBUTE_NOINLINE LLVM_ATTRIBUTE_USED dump() const; 297#endif 298 299private: 300 template <typename DerivedT, typename RetT = void> class BuilderBase; 301 class PartitionBuilder; 302 friend class AllocaPartitioning::PartitionBuilder; 303 class UseBuilder; 304 friend class AllocaPartitioning::UseBuilder; 305 306#ifndef NDEBUG 307 /// \brief Handle to alloca instruction to simplify method interfaces. 308 AllocaInst &AI; 309#endif 310 311 /// \brief The instruction responsible for this alloca having no partitioning. 312 /// 313 /// When an instruction (potentially) escapes the pointer to the alloca, we 314 /// store a pointer to that here and abort trying to partition the alloca. 315 /// This will be null if the alloca is partitioned successfully. 316 Instruction *PointerEscapingInstr; 317 318 /// \brief The partitions of the alloca. 319 /// 320 /// We store a vector of the partitions over the alloca here. This vector is 321 /// sorted by increasing begin offset, and then by decreasing end offset. See 322 /// the Partition inner class for more details. Initially (during 323 /// construction) there are overlaps, but we form a disjoint sequence of 324 /// partitions while finishing construction and a fully constructed object is 325 /// expected to always have this as a disjoint space. 326 SmallVector<Partition, 8> Partitions; 327 328 /// \brief The uses of the partitions. 329 /// 330 /// This is essentially a mapping from each partition to a list of uses of 331 /// that partition. The mapping is done with a Uses vector that has the exact 332 /// same number of entries as the partition vector. Each entry is itself 333 /// a vector of the uses. 334 SmallVector<SmallVector<PartitionUse, 2>, 8> Uses; 335 336 /// \brief Instructions which will become dead if we rewrite the alloca. 337 /// 338 /// Note that these are not separated by partition. This is because we expect 339 /// a partitioned alloca to be completely rewritten or not rewritten at all. 340 /// If rewritten, all these instructions can simply be removed and replaced 341 /// with undef as they come from outside of the allocated space. 342 SmallVector<Instruction *, 8> DeadUsers; 343 344 /// \brief Operands which will become dead if we rewrite the alloca. 345 /// 346 /// These are operands that in their particular use can be replaced with 347 /// undef when we rewrite the alloca. These show up in out-of-bounds inputs 348 /// to PHI nodes and the like. They aren't entirely dead (there might be 349 /// a GEP back into the bounds using it elsewhere) and nor is the PHI, but we 350 /// want to swap this particular input for undef to simplify the use lists of 351 /// the alloca. 352 SmallVector<Use *, 8> DeadOperands; 353 354 /// \brief The underlying storage for auxiliary memcpy and memset info. 355 SmallDenseMap<MemTransferInst *, MemTransferOffsets, 4> MemTransferInstData; 356 357 /// \brief A side datastructure used when building up the partitions and uses. 358 /// 359 /// This mapping is only really used during the initial building of the 360 /// partitioning so that we can retain information about PHI and select nodes 361 /// processed. 362 SmallDenseMap<Instruction *, std::pair<uint64_t, bool> > PHIOrSelectSizes; 363 364 /// \brief Auxiliary information for particular PHI or select operands. 365 SmallDenseMap<Use *, std::pair<unsigned, unsigned>, 4> PHIOrSelectOpMap; 366 367 /// \brief A utility routine called from the constructor. 368 /// 369 /// This does what it says on the tin. It is the key of the alloca partition 370 /// splitting and merging. After it is called we have the desired disjoint 371 /// collection of partitions. 372 void splitAndMergePartitions(); 373}; 374} 375 376template <typename DerivedT, typename RetT> 377class AllocaPartitioning::BuilderBase 378 : public InstVisitor<DerivedT, RetT> { 379public: 380 BuilderBase(const TargetData &TD, AllocaInst &AI, AllocaPartitioning &P) 381 : TD(TD), 382 AllocSize(TD.getTypeAllocSize(AI.getAllocatedType())), 383 P(P) { 384 enqueueUsers(AI, 0); 385 } 386 387protected: 388 const TargetData &TD; 389 const uint64_t AllocSize; 390 AllocaPartitioning &P; 391 392 SmallPtrSet<Use *, 8> VisitedUses; 393 394 struct OffsetUse { 395 Use *U; 396 int64_t Offset; 397 }; 398 SmallVector<OffsetUse, 8> Queue; 399 400 // The active offset and use while visiting. 401 Use *U; 402 int64_t Offset; 403 404 void enqueueUsers(Instruction &I, int64_t UserOffset) { 405 for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); 406 UI != UE; ++UI) { 407 if (VisitedUses.insert(&UI.getUse())) { 408 OffsetUse OU = { &UI.getUse(), UserOffset }; 409 Queue.push_back(OU); 410 } 411 } 412 } 413 414 bool computeConstantGEPOffset(GetElementPtrInst &GEPI, int64_t &GEPOffset) { 415 GEPOffset = Offset; 416 for (gep_type_iterator GTI = gep_type_begin(GEPI), GTE = gep_type_end(GEPI); 417 GTI != GTE; ++GTI) { 418 ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand()); 419 if (!OpC) 420 return false; 421 if (OpC->isZero()) 422 continue; 423 424 // Handle a struct index, which adds its field offset to the pointer. 425 if (StructType *STy = dyn_cast<StructType>(*GTI)) { 426 unsigned ElementIdx = OpC->getZExtValue(); 427 const StructLayout *SL = TD.getStructLayout(STy); 428 uint64_t ElementOffset = SL->getElementOffset(ElementIdx); 429 // Check that we can continue to model this GEP in a signed 64-bit offset. 430 if (ElementOffset > INT64_MAX || 431 (GEPOffset >= 0 && 432 ((uint64_t)GEPOffset + ElementOffset) > INT64_MAX)) { 433 DEBUG(dbgs() << "WARNING: Encountered a cumulative offset exceeding " 434 << "what can be represented in an int64_t!\n" 435 << " alloca: " << P.AI << "\n"); 436 return false; 437 } 438 if (GEPOffset < 0) 439 GEPOffset = ElementOffset + (uint64_t)-GEPOffset; 440 else 441 GEPOffset += ElementOffset; 442 continue; 443 } 444 445 APInt Index = OpC->getValue().sextOrTrunc(TD.getPointerSizeInBits()); 446 Index *= APInt(Index.getBitWidth(), 447 TD.getTypeAllocSize(GTI.getIndexedType())); 448 Index += APInt(Index.getBitWidth(), (uint64_t)GEPOffset, 449 /*isSigned*/true); 450 // Check if the result can be stored in our int64_t offset. 451 if (!Index.isSignedIntN(sizeof(GEPOffset) * 8)) { 452 DEBUG(dbgs() << "WARNING: Encountered a cumulative offset exceeding " 453 << "what can be represented in an int64_t!\n" 454 << " alloca: " << P.AI << "\n"); 455 return false; 456 } 457 458 GEPOffset = Index.getSExtValue(); 459 } 460 return true; 461 } 462 463 Value *foldSelectInst(SelectInst &SI) { 464 // If the condition being selected on is a constant or the same value is 465 // being selected between, fold the select. Yes this does (rarely) happen 466 // early on. 467 if (ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition())) 468 return SI.getOperand(1+CI->isZero()); 469 if (SI.getOperand(1) == SI.getOperand(2)) { 470 assert(*U == SI.getOperand(1)); 471 return SI.getOperand(1); 472 } 473 return 0; 474 } 475}; 476 477/// \brief Builder for the alloca partitioning. 478/// 479/// This class builds an alloca partitioning by recursively visiting the uses 480/// of an alloca and splitting the partitions for each load and store at each 481/// offset. 482class AllocaPartitioning::PartitionBuilder 483 : public BuilderBase<PartitionBuilder, bool> { 484 friend class InstVisitor<PartitionBuilder, bool>; 485 486 SmallDenseMap<Instruction *, unsigned> MemTransferPartitionMap; 487 488public: 489 PartitionBuilder(const TargetData &TD, AllocaInst &AI, AllocaPartitioning &P) 490 : BuilderBase<PartitionBuilder, bool>(TD, AI, P) {} 491 492 /// \brief Run the builder over the allocation. 493 bool operator()() { 494 // Note that we have to re-evaluate size on each trip through the loop as 495 // the queue grows at the tail. 496 for (unsigned Idx = 0; Idx < Queue.size(); ++Idx) { 497 U = Queue[Idx].U; 498 Offset = Queue[Idx].Offset; 499 if (!visit(cast<Instruction>(U->getUser()))) 500 return false; 501 } 502 return true; 503 } 504 505private: 506 bool markAsEscaping(Instruction &I) { 507 P.PointerEscapingInstr = &I; 508 return false; 509 } 510 511 void insertUse(Instruction &I, int64_t Offset, uint64_t Size, 512 bool IsSplittable = false) { 513 // Completely skip uses which have a zero size or don't overlap the 514 // allocation. 515 if (Size == 0 || 516 (Offset >= 0 && (uint64_t)Offset >= AllocSize) || 517 (Offset < 0 && (uint64_t)-Offset >= Size)) { 518 DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @" << Offset 519 << " which starts past the end of the " << AllocSize 520 << " byte alloca:\n" 521 << " alloca: " << P.AI << "\n" 522 << " use: " << I << "\n"); 523 return; 524 } 525 526 // Clamp the start to the beginning of the allocation. 527 if (Offset < 0) { 528 DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset 529 << " to start at the beginning of the alloca:\n" 530 << " alloca: " << P.AI << "\n" 531 << " use: " << I << "\n"); 532 Size -= (uint64_t)-Offset; 533 Offset = 0; 534 } 535 536 uint64_t BeginOffset = Offset, EndOffset = BeginOffset + Size; 537 538 // Clamp the end offset to the end of the allocation. Note that this is 539 // formulated to handle even the case where "BeginOffset + Size" overflows. 540 assert(AllocSize >= BeginOffset); // Established above. 541 if (Size > AllocSize - BeginOffset) { 542 DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @" << Offset 543 << " to remain within the " << AllocSize << " byte alloca:\n" 544 << " alloca: " << P.AI << "\n" 545 << " use: " << I << "\n"); 546 EndOffset = AllocSize; 547 } 548 549 // See if we can just add a user onto the last slot currently occupied. 550 if (!P.Partitions.empty() && 551 P.Partitions.back().BeginOffset == BeginOffset && 552 P.Partitions.back().EndOffset == EndOffset) { 553 P.Partitions.back().IsSplittable &= IsSplittable; 554 return; 555 } 556 557 Partition New(BeginOffset, EndOffset, IsSplittable); 558 P.Partitions.push_back(New); 559 } 560 561 bool handleLoadOrStore(Type *Ty, Instruction &I, int64_t Offset) { 562 uint64_t Size = TD.getTypeStoreSize(Ty); 563 564 // If this memory access can be shown to *statically* extend outside the 565 // bounds of of the allocation, it's behavior is undefined, so simply 566 // ignore it. Note that this is more strict than the generic clamping 567 // behavior of insertUse. We also try to handle cases which might run the 568 // risk of overflow. 569 // FIXME: We should instead consider the pointer to have escaped if this 570 // function is being instrumented for addressing bugs or race conditions. 571 if (Offset < 0 || (uint64_t)Offset >= AllocSize || 572 Size > (AllocSize - (uint64_t)Offset)) { 573 DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte " 574 << (isa<LoadInst>(I) ? "load" : "store") << " @" << Offset 575 << " which extends past the end of the " << AllocSize 576 << " byte alloca:\n" 577 << " alloca: " << P.AI << "\n" 578 << " use: " << I << "\n"); 579 return true; 580 } 581 582 insertUse(I, Offset, Size); 583 return true; 584 } 585 586 bool visitBitCastInst(BitCastInst &BC) { 587 enqueueUsers(BC, Offset); 588 return true; 589 } 590 591 bool visitGetElementPtrInst(GetElementPtrInst &GEPI) { 592 int64_t GEPOffset; 593 if (!computeConstantGEPOffset(GEPI, GEPOffset)) 594 return markAsEscaping(GEPI); 595 596 enqueueUsers(GEPI, GEPOffset); 597 return true; 598 } 599 600 bool visitLoadInst(LoadInst &LI) { 601 assert((!LI.isSimple() || LI.getType()->isSingleValueType()) && 602 "All simple FCA loads should have been pre-split"); 603 return handleLoadOrStore(LI.getType(), LI, Offset); 604 } 605 606 bool visitStoreInst(StoreInst &SI) { 607 Value *ValOp = SI.getValueOperand(); 608 if (ValOp == *U) 609 return markAsEscaping(SI); 610 611 assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) && 612 "All simple FCA stores should have been pre-split"); 613 return handleLoadOrStore(ValOp->getType(), SI, Offset); 614 } 615 616 617 bool visitMemSetInst(MemSetInst &II) { 618 assert(II.getRawDest() == *U && "Pointer use is not the destination?"); 619 ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength()); 620 uint64_t Size = Length ? Length->getZExtValue() : AllocSize - Offset; 621 insertUse(II, Offset, Size, Length); 622 return true; 623 } 624 625 bool visitMemTransferInst(MemTransferInst &II) { 626 ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength()); 627 uint64_t Size = Length ? Length->getZExtValue() : AllocSize - Offset; 628 if (!Size) 629 // Zero-length mem transfer intrinsics can be ignored entirely. 630 return true; 631 632 MemTransferOffsets &Offsets = P.MemTransferInstData[&II]; 633 634 // Only intrinsics with a constant length can be split. 635 Offsets.IsSplittable = Length; 636 637 if (*U != II.getRawDest()) { 638 assert(*U == II.getRawSource()); 639 Offsets.SourceBegin = Offset; 640 Offsets.SourceEnd = Offset + Size; 641 } else { 642 Offsets.DestBegin = Offset; 643 Offsets.DestEnd = Offset + Size; 644 } 645 646 insertUse(II, Offset, Size, Offsets.IsSplittable); 647 unsigned NewIdx = P.Partitions.size() - 1; 648 649 SmallDenseMap<Instruction *, unsigned>::const_iterator PMI; 650 bool Inserted = false; 651 llvm::tie(PMI, Inserted) 652 = MemTransferPartitionMap.insert(std::make_pair(&II, NewIdx)); 653 if (Offsets.IsSplittable && 654 (!Inserted || II.getRawSource() == II.getRawDest())) { 655 // We've found a memory transfer intrinsic which refers to the alloca as 656 // both a source and dest. This is detected either by direct equality of 657 // the operand values, or when we visit the intrinsic twice due to two 658 // different chains of values leading to it. We refuse to split these to 659 // simplify splitting logic. If possible, SROA will still split them into 660 // separate allocas and then re-analyze. 661 Offsets.IsSplittable = false; 662 P.Partitions[PMI->second].IsSplittable = false; 663 P.Partitions[NewIdx].IsSplittable = false; 664 } 665 666 return true; 667 } 668 669 // Disable SRoA for any intrinsics except for lifetime invariants. 670 // FIXME: What about debug instrinsics? This matches old behavior, but 671 // doesn't make sense. 672 bool visitIntrinsicInst(IntrinsicInst &II) { 673 if (II.getIntrinsicID() == Intrinsic::lifetime_start || 674 II.getIntrinsicID() == Intrinsic::lifetime_end) { 675 ConstantInt *Length = cast<ConstantInt>(II.getArgOperand(0)); 676 uint64_t Size = std::min(AllocSize - Offset, Length->getLimitedValue()); 677 insertUse(II, Offset, Size, true); 678 return true; 679 } 680 681 return markAsEscaping(II); 682 } 683 684 Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &Size) { 685 // We consider any PHI or select that results in a direct load or store of 686 // the same offset to be a viable use for partitioning purposes. These uses 687 // are considered unsplittable and the size is the maximum loaded or stored 688 // size. 689 SmallPtrSet<Instruction *, 4> Visited; 690 SmallVector<std::pair<Instruction *, Instruction *>, 4> Uses; 691 Visited.insert(Root); 692 Uses.push_back(std::make_pair(cast<Instruction>(*U), Root)); 693 // If there are no loads or stores, the access is dead. We mark that as 694 // a size zero access. 695 Size = 0; 696 do { 697 Instruction *I, *UsedI; 698 llvm::tie(UsedI, I) = Uses.pop_back_val(); 699 700 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 701 Size = std::max(Size, TD.getTypeStoreSize(LI->getType())); 702 continue; 703 } 704 if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 705 Value *Op = SI->getOperand(0); 706 if (Op == UsedI) 707 return SI; 708 Size = std::max(Size, TD.getTypeStoreSize(Op->getType())); 709 continue; 710 } 711 712 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { 713 if (!GEP->hasAllZeroIndices()) 714 return GEP; 715 } else if (!isa<BitCastInst>(I) && !isa<PHINode>(I) && 716 !isa<SelectInst>(I)) { 717 return I; 718 } 719 720 for (Value::use_iterator UI = I->use_begin(), UE = I->use_end(); UI != UE; 721 ++UI) 722 if (Visited.insert(cast<Instruction>(*UI))) 723 Uses.push_back(std::make_pair(I, cast<Instruction>(*UI))); 724 } while (!Uses.empty()); 725 726 return 0; 727 } 728 729 bool visitPHINode(PHINode &PN) { 730 // See if we already have computed info on this node. 731 std::pair<uint64_t, bool> &PHIInfo = P.PHIOrSelectSizes[&PN]; 732 if (PHIInfo.first) { 733 PHIInfo.second = true; 734 insertUse(PN, Offset, PHIInfo.first); 735 return true; 736 } 737 738 // Check for an unsafe use of the PHI node. 739 if (Instruction *EscapingI = hasUnsafePHIOrSelectUse(&PN, PHIInfo.first)) 740 return markAsEscaping(*EscapingI); 741 742 insertUse(PN, Offset, PHIInfo.first); 743 return true; 744 } 745 746 bool visitSelectInst(SelectInst &SI) { 747 if (Value *Result = foldSelectInst(SI)) { 748 if (Result == *U) 749 // If the result of the constant fold will be the pointer, recurse 750 // through the select as if we had RAUW'ed it. 751 enqueueUsers(SI, Offset); 752 753 return true; 754 } 755 756 // See if we already have computed info on this node. 757 std::pair<uint64_t, bool> &SelectInfo = P.PHIOrSelectSizes[&SI]; 758 if (SelectInfo.first) { 759 SelectInfo.second = true; 760 insertUse(SI, Offset, SelectInfo.first); 761 return true; 762 } 763 764 // Check for an unsafe use of the PHI node. 765 if (Instruction *EscapingI = hasUnsafePHIOrSelectUse(&SI, SelectInfo.first)) 766 return markAsEscaping(*EscapingI); 767 768 insertUse(SI, Offset, SelectInfo.first); 769 return true; 770 } 771 772 /// \brief Disable SROA entirely if there are unhandled users of the alloca. 773 bool visitInstruction(Instruction &I) { return markAsEscaping(I); } 774}; 775 776 777/// \brief Use adder for the alloca partitioning. 778/// 779/// This class adds the uses of an alloca to all of the partitions which they 780/// use. For splittable partitions, this can end up doing essentially a linear 781/// walk of the partitions, but the number of steps remains bounded by the 782/// total result instruction size: 783/// - The number of partitions is a result of the number unsplittable 784/// instructions using the alloca. 785/// - The number of users of each partition is at worst the total number of 786/// splittable instructions using the alloca. 787/// Thus we will produce N * M instructions in the end, where N are the number 788/// of unsplittable uses and M are the number of splittable. This visitor does 789/// the exact same number of updates to the partitioning. 790/// 791/// In the more common case, this visitor will leverage the fact that the 792/// partition space is pre-sorted, and do a logarithmic search for the 793/// partition needed, making the total visit a classical ((N + M) * log(N)) 794/// complexity operation. 795class AllocaPartitioning::UseBuilder : public BuilderBase<UseBuilder> { 796 friend class InstVisitor<UseBuilder>; 797 798 /// \brief Set to de-duplicate dead instructions found in the use walk. 799 SmallPtrSet<Instruction *, 4> VisitedDeadInsts; 800 801public: 802 UseBuilder(const TargetData &TD, AllocaInst &AI, AllocaPartitioning &P) 803 : BuilderBase<UseBuilder>(TD, AI, P) {} 804 805 /// \brief Run the builder over the allocation. 806 void operator()() { 807 // Note that we have to re-evaluate size on each trip through the loop as 808 // the queue grows at the tail. 809 for (unsigned Idx = 0; Idx < Queue.size(); ++Idx) { 810 U = Queue[Idx].U; 811 Offset = Queue[Idx].Offset; 812 this->visit(cast<Instruction>(U->getUser())); 813 } 814 } 815 816private: 817 void markAsDead(Instruction &I) { 818 if (VisitedDeadInsts.insert(&I)) 819 P.DeadUsers.push_back(&I); 820 } 821 822 void insertUse(Instruction &User, int64_t Offset, uint64_t Size) { 823 // If the use has a zero size or extends outside of the allocation, record 824 // it as a dead use for elimination later. 825 if (Size == 0 || (uint64_t)Offset >= AllocSize || 826 (Offset < 0 && (uint64_t)-Offset >= Size)) 827 return markAsDead(User); 828 829 // Clamp the start to the beginning of the allocation. 830 if (Offset < 0) { 831 Size -= (uint64_t)-Offset; 832 Offset = 0; 833 } 834 835 uint64_t BeginOffset = Offset, EndOffset = BeginOffset + Size; 836 837 // Clamp the end offset to the end of the allocation. Note that this is 838 // formulated to handle even the case where "BeginOffset + Size" overflows. 839 assert(AllocSize >= BeginOffset); // Established above. 840 if (Size > AllocSize - BeginOffset) 841 EndOffset = AllocSize; 842 843 // NB: This only works if we have zero overlapping partitions. 844 iterator B = std::lower_bound(P.begin(), P.end(), BeginOffset); 845 if (B != P.begin() && llvm::prior(B)->EndOffset > BeginOffset) 846 B = llvm::prior(B); 847 for (iterator I = B, E = P.end(); I != E && I->BeginOffset < EndOffset; 848 ++I) { 849 PartitionUse NewPU(std::max(I->BeginOffset, BeginOffset), 850 std::min(I->EndOffset, EndOffset), U); 851 P.use_push_back(I, NewPU); 852 if (isa<PHINode>(U->getUser()) || isa<SelectInst>(U->getUser())) 853 P.PHIOrSelectOpMap[U] 854 = std::make_pair(I - P.begin(), P.Uses[I - P.begin()].size() - 1); 855 } 856 } 857 858 void handleLoadOrStore(Type *Ty, Instruction &I, int64_t Offset) { 859 uint64_t Size = TD.getTypeStoreSize(Ty); 860 861 // If this memory access can be shown to *statically* extend outside the 862 // bounds of of the allocation, it's behavior is undefined, so simply 863 // ignore it. Note that this is more strict than the generic clamping 864 // behavior of insertUse. 865 if (Offset < 0 || (uint64_t)Offset >= AllocSize || 866 Size > (AllocSize - (uint64_t)Offset)) 867 return markAsDead(I); 868 869 insertUse(I, Offset, Size); 870 } 871 872 void visitBitCastInst(BitCastInst &BC) { 873 if (BC.use_empty()) 874 return markAsDead(BC); 875 876 enqueueUsers(BC, Offset); 877 } 878 879 void visitGetElementPtrInst(GetElementPtrInst &GEPI) { 880 if (GEPI.use_empty()) 881 return markAsDead(GEPI); 882 883 int64_t GEPOffset; 884 if (!computeConstantGEPOffset(GEPI, GEPOffset)) 885 llvm_unreachable("Unable to compute constant offset for use"); 886 887 enqueueUsers(GEPI, GEPOffset); 888 } 889 890 void visitLoadInst(LoadInst &LI) { 891 handleLoadOrStore(LI.getType(), LI, Offset); 892 } 893 894 void visitStoreInst(StoreInst &SI) { 895 handleLoadOrStore(SI.getOperand(0)->getType(), SI, Offset); 896 } 897 898 void visitMemSetInst(MemSetInst &II) { 899 ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength()); 900 uint64_t Size = Length ? Length->getZExtValue() : AllocSize - Offset; 901 insertUse(II, Offset, Size); 902 } 903 904 void visitMemTransferInst(MemTransferInst &II) { 905 ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength()); 906 uint64_t Size = Length ? Length->getZExtValue() : AllocSize - Offset; 907 insertUse(II, Offset, Size); 908 } 909 910 void visitIntrinsicInst(IntrinsicInst &II) { 911 assert(II.getIntrinsicID() == Intrinsic::lifetime_start || 912 II.getIntrinsicID() == Intrinsic::lifetime_end); 913 914 ConstantInt *Length = cast<ConstantInt>(II.getArgOperand(0)); 915 insertUse(II, Offset, 916 std::min(AllocSize - Offset, Length->getLimitedValue())); 917 } 918 919 void insertPHIOrSelect(Instruction &User, uint64_t Offset) { 920 uint64_t Size = P.PHIOrSelectSizes.lookup(&User).first; 921 922 // For PHI and select operands outside the alloca, we can't nuke the entire 923 // phi or select -- the other side might still be relevant, so we special 924 // case them here and use a separate structure to track the operands 925 // themselves which should be replaced with undef. 926 if (Offset >= AllocSize) { 927 P.DeadOperands.push_back(U); 928 return; 929 } 930 931 insertUse(User, Offset, Size); 932 } 933 void visitPHINode(PHINode &PN) { 934 if (PN.use_empty()) 935 return markAsDead(PN); 936 937 insertPHIOrSelect(PN, Offset); 938 } 939 void visitSelectInst(SelectInst &SI) { 940 if (SI.use_empty()) 941 return markAsDead(SI); 942 943 if (Value *Result = foldSelectInst(SI)) { 944 if (Result == *U) 945 // If the result of the constant fold will be the pointer, recurse 946 // through the select as if we had RAUW'ed it. 947 enqueueUsers(SI, Offset); 948 else 949 // Otherwise the operand to the select is dead, and we can replace it 950 // with undef. 951 P.DeadOperands.push_back(U); 952 953 return; 954 } 955 956 insertPHIOrSelect(SI, Offset); 957 } 958 959 /// \brief Unreachable, we've already visited the alloca once. 960 void visitInstruction(Instruction &I) { 961 llvm_unreachable("Unhandled instruction in use builder."); 962 } 963}; 964 965void AllocaPartitioning::splitAndMergePartitions() { 966 size_t NumDeadPartitions = 0; 967 968 // Track the range of splittable partitions that we pass when accumulating 969 // overlapping unsplittable partitions. 970 uint64_t SplitEndOffset = 0ull; 971 972 Partition New(0ull, 0ull, false); 973 974 for (unsigned i = 0, j = i, e = Partitions.size(); i != e; i = j) { 975 ++j; 976 977 if (!Partitions[i].IsSplittable || New.BeginOffset == New.EndOffset) { 978 assert(New.BeginOffset == New.EndOffset); 979 New = Partitions[i]; 980 } else { 981 assert(New.IsSplittable); 982 New.EndOffset = std::max(New.EndOffset, Partitions[i].EndOffset); 983 } 984 assert(New.BeginOffset != New.EndOffset); 985 986 // Scan the overlapping partitions. 987 while (j != e && New.EndOffset > Partitions[j].BeginOffset) { 988 // If the new partition we are forming is splittable, stop at the first 989 // unsplittable partition. 990 if (New.IsSplittable && !Partitions[j].IsSplittable) 991 break; 992 993 // Grow the new partition to include any equally splittable range. 'j' is 994 // always equally splittable when New is splittable, but when New is not 995 // splittable, we may subsume some (or part of some) splitable partition 996 // without growing the new one. 997 if (New.IsSplittable == Partitions[j].IsSplittable) { 998 New.EndOffset = std::max(New.EndOffset, Partitions[j].EndOffset); 999 } else { 1000 assert(!New.IsSplittable); 1001 assert(Partitions[j].IsSplittable); 1002 SplitEndOffset = std::max(SplitEndOffset, Partitions[j].EndOffset); 1003 } 1004 1005 Partitions[j].BeginOffset = Partitions[j].EndOffset = UINT64_MAX; 1006 ++NumDeadPartitions; 1007 ++j; 1008 } 1009 1010 // If the new partition is splittable, chop off the end as soon as the 1011 // unsplittable subsequent partition starts and ensure we eventually cover 1012 // the splittable area. 1013 if (j != e && New.IsSplittable) { 1014 SplitEndOffset = std::max(SplitEndOffset, New.EndOffset); 1015 New.EndOffset = std::min(New.EndOffset, Partitions[j].BeginOffset); 1016 } 1017 1018 // Add the new partition if it differs from the original one and is 1019 // non-empty. We can end up with an empty partition here if it was 1020 // splittable but there is an unsplittable one that starts at the same 1021 // offset. 1022 if (New != Partitions[i]) { 1023 if (New.BeginOffset != New.EndOffset) 1024 Partitions.push_back(New); 1025 // Mark the old one for removal. 1026 Partitions[i].BeginOffset = Partitions[i].EndOffset = UINT64_MAX; 1027 ++NumDeadPartitions; 1028 } 1029 1030 New.BeginOffset = New.EndOffset; 1031 if (!New.IsSplittable) { 1032 New.EndOffset = std::max(New.EndOffset, SplitEndOffset); 1033 if (j != e && !Partitions[j].IsSplittable) 1034 New.EndOffset = std::min(New.EndOffset, Partitions[j].BeginOffset); 1035 New.IsSplittable = true; 1036 // If there is a trailing splittable partition which won't be fused into 1037 // the next splittable partition go ahead and add it onto the partitions 1038 // list. 1039 if (New.BeginOffset < New.EndOffset && 1040 (j == e || !Partitions[j].IsSplittable || 1041 New.EndOffset < Partitions[j].BeginOffset)) { 1042 Partitions.push_back(New); 1043 New.BeginOffset = New.EndOffset = 0ull; 1044 } 1045 } 1046 } 1047 1048 // Re-sort the partitions now that they have been split and merged into 1049 // disjoint set of partitions. Also remove any of the dead partitions we've 1050 // replaced in the process. 1051 std::sort(Partitions.begin(), Partitions.end()); 1052 if (NumDeadPartitions) { 1053 assert(Partitions.back().BeginOffset == UINT64_MAX); 1054 assert(Partitions.back().EndOffset == UINT64_MAX); 1055 assert((ptrdiff_t)NumDeadPartitions == 1056 std::count(Partitions.begin(), Partitions.end(), Partitions.back())); 1057 } 1058 Partitions.erase(Partitions.end() - NumDeadPartitions, Partitions.end()); 1059} 1060 1061AllocaPartitioning::AllocaPartitioning(const TargetData &TD, AllocaInst &AI) 1062 : 1063#ifndef NDEBUG 1064 AI(AI), 1065#endif 1066 PointerEscapingInstr(0) { 1067 PartitionBuilder PB(TD, AI, *this); 1068 if (!PB()) 1069 return; 1070 1071 if (Partitions.size() > 1) { 1072 // Sort the uses. This arranges for the offsets to be in ascending order, 1073 // and the sizes to be in descending order. 1074 std::sort(Partitions.begin(), Partitions.end()); 1075 1076 // Intersect splittability for all partitions with equal offsets and sizes. 1077 // Then remove all but the first so that we have a sequence of non-equal but 1078 // potentially overlapping partitions. 1079 for (iterator I = Partitions.begin(), J = I, E = Partitions.end(); I != E; 1080 I = J) { 1081 ++J; 1082 while (J != E && *I == *J) { 1083 I->IsSplittable &= J->IsSplittable; 1084 ++J; 1085 } 1086 } 1087 Partitions.erase(std::unique(Partitions.begin(), Partitions.end()), 1088 Partitions.end()); 1089 1090 // Split splittable and merge unsplittable partitions into a disjoint set 1091 // of partitions over the used space of the allocation. 1092 splitAndMergePartitions(); 1093 } 1094 1095 // Now build up the user lists for each of these disjoint partitions by 1096 // re-walking the recursive users of the alloca. 1097 Uses.resize(Partitions.size()); 1098 UseBuilder UB(TD, AI, *this); 1099 UB(); 1100} 1101 1102Type *AllocaPartitioning::getCommonType(iterator I) const { 1103 Type *Ty = 0; 1104 for (const_use_iterator UI = use_begin(I), UE = use_end(I); UI != UE; ++UI) { 1105 if (isa<IntrinsicInst>(*UI->U->getUser())) 1106 continue; 1107 if (UI->BeginOffset != I->BeginOffset || UI->EndOffset != I->EndOffset) 1108 continue; 1109 1110 Type *UserTy = 0; 1111 if (LoadInst *LI = dyn_cast<LoadInst>(UI->U->getUser())) { 1112 UserTy = LI->getType(); 1113 } else if (StoreInst *SI = dyn_cast<StoreInst>(UI->U->getUser())) { 1114 UserTy = SI->getValueOperand()->getType(); 1115 } 1116 1117 if (Ty && Ty != UserTy) 1118 return 0; 1119 1120 Ty = UserTy; 1121 } 1122 return Ty; 1123} 1124 1125#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1126 1127void AllocaPartitioning::print(raw_ostream &OS, const_iterator I, 1128 StringRef Indent) const { 1129 OS << Indent << "partition #" << (I - begin()) 1130 << " [" << I->BeginOffset << "," << I->EndOffset << ")" 1131 << (I->IsSplittable ? " (splittable)" : "") 1132 << (Uses[I - begin()].empty() ? " (zero uses)" : "") 1133 << "\n"; 1134} 1135 1136void AllocaPartitioning::printUsers(raw_ostream &OS, const_iterator I, 1137 StringRef Indent) const { 1138 for (const_use_iterator UI = use_begin(I), UE = use_end(I); 1139 UI != UE; ++UI) { 1140 OS << Indent << " [" << UI->BeginOffset << "," << UI->EndOffset << ") " 1141 << "used by: " << *UI->U->getUser() << "\n"; 1142 if (MemTransferInst *II = dyn_cast<MemTransferInst>(UI->U->getUser())) { 1143 const MemTransferOffsets &MTO = MemTransferInstData.lookup(II); 1144 bool IsDest; 1145 if (!MTO.IsSplittable) 1146 IsDest = UI->BeginOffset == MTO.DestBegin; 1147 else 1148 IsDest = MTO.DestBegin != 0u; 1149 OS << Indent << " (original " << (IsDest ? "dest" : "source") << ": " 1150 << "[" << (IsDest ? MTO.DestBegin : MTO.SourceBegin) 1151 << "," << (IsDest ? MTO.DestEnd : MTO.SourceEnd) << ")\n"; 1152 } 1153 } 1154} 1155 1156void AllocaPartitioning::print(raw_ostream &OS) const { 1157 if (PointerEscapingInstr) { 1158 OS << "No partitioning for alloca: " << AI << "\n" 1159 << " A pointer to this alloca escaped by:\n" 1160 << " " << *PointerEscapingInstr << "\n"; 1161 return; 1162 } 1163 1164 OS << "Partitioning of alloca: " << AI << "\n"; 1165 unsigned Num = 0; 1166 for (const_iterator I = begin(), E = end(); I != E; ++I, ++Num) { 1167 print(OS, I); 1168 printUsers(OS, I); 1169 } 1170} 1171 1172void AllocaPartitioning::dump(const_iterator I) const { print(dbgs(), I); } 1173void AllocaPartitioning::dump() const { print(dbgs()); } 1174 1175#endif // !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 1176 1177 1178namespace { 1179/// \brief Implementation of LoadAndStorePromoter for promoting allocas. 1180/// 1181/// This subclass of LoadAndStorePromoter adds overrides to handle promoting 1182/// the loads and stores of an alloca instruction, as well as updating its 1183/// debug information. This is used when a domtree is unavailable and thus 1184/// mem2reg in its full form can't be used to handle promotion of allocas to 1185/// scalar values. 1186class AllocaPromoter : public LoadAndStorePromoter { 1187 AllocaInst &AI; 1188 DIBuilder &DIB; 1189 1190 SmallVector<DbgDeclareInst *, 4> DDIs; 1191 SmallVector<DbgValueInst *, 4> DVIs; 1192 1193public: 1194 AllocaPromoter(const SmallVectorImpl<Instruction*> &Insts, SSAUpdater &S, 1195 AllocaInst &AI, DIBuilder &DIB) 1196 : LoadAndStorePromoter(Insts, S), AI(AI), DIB(DIB) {} 1197 1198 void run(const SmallVectorImpl<Instruction*> &Insts) { 1199 // Remember which alloca we're promoting (for isInstInList). 1200 if (MDNode *DebugNode = MDNode::getIfExists(AI.getContext(), &AI)) { 1201 for (Value::use_iterator UI = DebugNode->use_begin(), 1202 UE = DebugNode->use_end(); 1203 UI != UE; ++UI) 1204 if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI)) 1205 DDIs.push_back(DDI); 1206 else if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(*UI)) 1207 DVIs.push_back(DVI); 1208 } 1209 1210 LoadAndStorePromoter::run(Insts); 1211 AI.eraseFromParent(); 1212 while (!DDIs.empty()) 1213 DDIs.pop_back_val()->eraseFromParent(); 1214 while (!DVIs.empty()) 1215 DVIs.pop_back_val()->eraseFromParent(); 1216 } 1217 1218 virtual bool isInstInList(Instruction *I, 1219 const SmallVectorImpl<Instruction*> &Insts) const { 1220 if (LoadInst *LI = dyn_cast<LoadInst>(I)) 1221 return LI->getOperand(0) == &AI; 1222 return cast<StoreInst>(I)->getPointerOperand() == &AI; 1223 } 1224 1225 virtual void updateDebugInfo(Instruction *Inst) const { 1226 for (SmallVector<DbgDeclareInst *, 4>::const_iterator I = DDIs.begin(), 1227 E = DDIs.end(); I != E; ++I) { 1228 DbgDeclareInst *DDI = *I; 1229 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) 1230 ConvertDebugDeclareToDebugValue(DDI, SI, DIB); 1231 else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) 1232 ConvertDebugDeclareToDebugValue(DDI, LI, DIB); 1233 } 1234 for (SmallVector<DbgValueInst *, 4>::const_iterator I = DVIs.begin(), 1235 E = DVIs.end(); I != E; ++I) { 1236 DbgValueInst *DVI = *I; 1237 Value *Arg = NULL; 1238 if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { 1239 // If an argument is zero extended then use argument directly. The ZExt 1240 // may be zapped by an optimization pass in future. 1241 if (ZExtInst *ZExt = dyn_cast<ZExtInst>(SI->getOperand(0))) 1242 Arg = dyn_cast<Argument>(ZExt->getOperand(0)); 1243 if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0))) 1244 Arg = dyn_cast<Argument>(SExt->getOperand(0)); 1245 if (!Arg) 1246 Arg = SI->getOperand(0); 1247 } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { 1248 Arg = LI->getOperand(0); 1249 } else { 1250 continue; 1251 } 1252 Instruction *DbgVal = 1253 DIB.insertDbgValueIntrinsic(Arg, 0, DIVariable(DVI->getVariable()), 1254 Inst); 1255 DbgVal->setDebugLoc(DVI->getDebugLoc()); 1256 } 1257 } 1258}; 1259} // end anon namespace 1260 1261 1262namespace { 1263/// \brief An optimization pass providing Scalar Replacement of Aggregates. 1264/// 1265/// This pass takes allocations which can be completely analyzed (that is, they 1266/// don't escape) and tries to turn them into scalar SSA values. There are 1267/// a few steps to this process. 1268/// 1269/// 1) It takes allocations of aggregates and analyzes the ways in which they 1270/// are used to try to split them into smaller allocations, ideally of 1271/// a single scalar data type. It will split up memcpy and memset accesses 1272/// as necessary and try to isolate invidual scalar accesses. 1273/// 2) It will transform accesses into forms which are suitable for SSA value 1274/// promotion. This can be replacing a memset with a scalar store of an 1275/// integer value, or it can involve speculating operations on a PHI or 1276/// select to be a PHI or select of the results. 1277/// 3) Finally, this will try to detect a pattern of accesses which map cleanly 1278/// onto insert and extract operations on a vector value, and convert them to 1279/// this form. By doing so, it will enable promotion of vector aggregates to 1280/// SSA vector values. 1281class SROA : public FunctionPass { 1282 const bool RequiresDomTree; 1283 1284 LLVMContext *C; 1285 const TargetData *TD; 1286 DominatorTree *DT; 1287 1288 /// \brief Worklist of alloca instructions to simplify. 1289 /// 1290 /// Each alloca in the function is added to this. Each new alloca formed gets 1291 /// added to it as well to recursively simplify unless that alloca can be 1292 /// directly promoted. Finally, each time we rewrite a use of an alloca other 1293 /// the one being actively rewritten, we add it back onto the list if not 1294 /// already present to ensure it is re-visited. 1295 SetVector<AllocaInst *, SmallVector<AllocaInst *, 16> > Worklist; 1296 1297 /// \brief A collection of instructions to delete. 1298 /// We try to batch deletions to simplify code and make things a bit more 1299 /// efficient. 1300 SmallVector<Instruction *, 8> DeadInsts; 1301 1302 /// \brief A set to prevent repeatedly marking an instruction split into many 1303 /// uses as dead. Only used to guard insertion into DeadInsts. 1304 SmallPtrSet<Instruction *, 4> DeadSplitInsts; 1305 1306 /// \brief A collection of alloca instructions we can directly promote. 1307 std::vector<AllocaInst *> PromotableAllocas; 1308 1309public: 1310 SROA(bool RequiresDomTree = true) 1311 : FunctionPass(ID), RequiresDomTree(RequiresDomTree), 1312 C(0), TD(0), DT(0) { 1313 initializeSROAPass(*PassRegistry::getPassRegistry()); 1314 } 1315 bool runOnFunction(Function &F); 1316 void getAnalysisUsage(AnalysisUsage &AU) const; 1317 1318 const char *getPassName() const { return "SROA"; } 1319 static char ID; 1320 1321private: 1322 friend class PHIOrSelectSpeculator; 1323 friend class AllocaPartitionRewriter; 1324 friend class AllocaPartitionVectorRewriter; 1325 1326 bool rewriteAllocaPartition(AllocaInst &AI, 1327 AllocaPartitioning &P, 1328 AllocaPartitioning::iterator PI); 1329 bool splitAlloca(AllocaInst &AI, AllocaPartitioning &P); 1330 bool runOnAlloca(AllocaInst &AI); 1331 void deleteDeadInstructions(SmallPtrSet<AllocaInst *, 4> &DeletedAllocas); 1332 bool promoteAllocas(Function &F); 1333}; 1334} 1335 1336char SROA::ID = 0; 1337 1338FunctionPass *llvm::createSROAPass(bool RequiresDomTree) { 1339 return new SROA(RequiresDomTree); 1340} 1341 1342INITIALIZE_PASS_BEGIN(SROA, "sroa", "Scalar Replacement Of Aggregates", 1343 false, false) 1344INITIALIZE_PASS_DEPENDENCY(DominatorTree) 1345INITIALIZE_PASS_END(SROA, "sroa", "Scalar Replacement Of Aggregates", 1346 false, false) 1347 1348/// \brief Accumulate the constant offsets in a GEP into a single APInt offset. 1349/// 1350/// If the provided GEP is all-constant, the total byte offset formed by the 1351/// GEP is computed and Offset is set to it. If the GEP has any non-constant 1352/// operands, the function returns false and the value of Offset is unmodified. 1353static bool accumulateGEPOffsets(const TargetData &TD, GEPOperator &GEP, 1354 APInt &Offset) { 1355 APInt GEPOffset(Offset.getBitWidth(), 0); 1356 for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); 1357 GTI != GTE; ++GTI) { 1358 ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand()); 1359 if (!OpC) 1360 return false; 1361 if (OpC->isZero()) continue; 1362 1363 // Handle a struct index, which adds its field offset to the pointer. 1364 if (StructType *STy = dyn_cast<StructType>(*GTI)) { 1365 unsigned ElementIdx = OpC->getZExtValue(); 1366 const StructLayout *SL = TD.getStructLayout(STy); 1367 GEPOffset += APInt(Offset.getBitWidth(), 1368 SL->getElementOffset(ElementIdx)); 1369 continue; 1370 } 1371 1372 APInt TypeSize(Offset.getBitWidth(), 1373 TD.getTypeAllocSize(GTI.getIndexedType())); 1374 if (VectorType *VTy = dyn_cast<VectorType>(*GTI)) { 1375 assert((VTy->getScalarSizeInBits() % 8) == 0 && 1376 "vector element size is not a multiple of 8, cannot GEP over it"); 1377 TypeSize = VTy->getScalarSizeInBits() / 8; 1378 } 1379 1380 GEPOffset += OpC->getValue().sextOrTrunc(Offset.getBitWidth()) * TypeSize; 1381 } 1382 Offset = GEPOffset; 1383 return true; 1384} 1385 1386/// \brief Build a GEP out of a base pointer and indices. 1387/// 1388/// This will return the BasePtr if that is valid, or build a new GEP 1389/// instruction using the IRBuilder if GEP-ing is needed. 1390static Value *buildGEP(IRBuilder<> &IRB, Value *BasePtr, 1391 SmallVectorImpl<Value *> &Indices, 1392 const Twine &Prefix) { 1393 if (Indices.empty()) 1394 return BasePtr; 1395 1396 // A single zero index is a no-op, so check for this and avoid building a GEP 1397 // in that case. 1398 if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero()) 1399 return BasePtr; 1400 1401 return IRB.CreateInBoundsGEP(BasePtr, Indices, Prefix + ".idx"); 1402} 1403 1404/// \brief Get a natural GEP off of the BasePtr walking through Ty toward 1405/// TargetTy without changing the offset of the pointer. 1406/// 1407/// This routine assumes we've already established a properly offset GEP with 1408/// Indices, and arrived at the Ty type. The goal is to continue to GEP with 1409/// zero-indices down through type layers until we find one the same as 1410/// TargetTy. If we can't find one with the same type, we at least try to use 1411/// one with the same size. If none of that works, we just produce the GEP as 1412/// indicated by Indices to have the correct offset. 1413static Value *getNaturalGEPWithType(IRBuilder<> &IRB, const TargetData &TD, 1414 Value *BasePtr, Type *Ty, Type *TargetTy, 1415 SmallVectorImpl<Value *> &Indices, 1416 const Twine &Prefix) { 1417 if (Ty == TargetTy) 1418 return buildGEP(IRB, BasePtr, Indices, Prefix); 1419 1420 // See if we can descend into a struct and locate a field with the correct 1421 // type. 1422 unsigned NumLayers = 0; 1423 Type *ElementTy = Ty; 1424 do { 1425 if (ElementTy->isPointerTy()) 1426 break; 1427 if (SequentialType *SeqTy = dyn_cast<SequentialType>(ElementTy)) { 1428 ElementTy = SeqTy->getElementType(); 1429 Indices.push_back(IRB.getInt(APInt(TD.getPointerSizeInBits(), 0))); 1430 } else if (StructType *STy = dyn_cast<StructType>(ElementTy)) { 1431 ElementTy = *STy->element_begin(); 1432 Indices.push_back(IRB.getInt32(0)); 1433 } else { 1434 break; 1435 } 1436 ++NumLayers; 1437 } while (ElementTy != TargetTy); 1438 if (ElementTy != TargetTy) 1439 Indices.erase(Indices.end() - NumLayers, Indices.end()); 1440 1441 return buildGEP(IRB, BasePtr, Indices, Prefix); 1442} 1443 1444/// \brief Recursively compute indices for a natural GEP. 1445/// 1446/// This is the recursive step for getNaturalGEPWithOffset that walks down the 1447/// element types adding appropriate indices for the GEP. 1448static Value *getNaturalGEPRecursively(IRBuilder<> &IRB, const TargetData &TD, 1449 Value *Ptr, Type *Ty, APInt &Offset, 1450 Type *TargetTy, 1451 SmallVectorImpl<Value *> &Indices, 1452 const Twine &Prefix) { 1453 if (Offset == 0) 1454 return getNaturalGEPWithType(IRB, TD, Ptr, Ty, TargetTy, Indices, Prefix); 1455 1456 // We can't recurse through pointer types. 1457 if (Ty->isPointerTy()) 1458 return 0; 1459 1460 // We try to analyze GEPs over vectors here, but note that these GEPs are 1461 // extremely poorly defined currently. The long-term goal is to remove GEPing 1462 // over a vector from the IR completely. 1463 if (VectorType *VecTy = dyn_cast<VectorType>(Ty)) { 1464 unsigned ElementSizeInBits = VecTy->getScalarSizeInBits(); 1465 if (ElementSizeInBits % 8) 1466 return 0; // GEPs over non-multiple of 8 size vector elements are invalid. 1467 APInt ElementSize(Offset.getBitWidth(), ElementSizeInBits / 8); 1468 APInt NumSkippedElements = Offset.udiv(ElementSize); 1469 if (NumSkippedElements.ugt(VecTy->getNumElements())) 1470 return 0; 1471 Offset -= NumSkippedElements * ElementSize; 1472 Indices.push_back(IRB.getInt(NumSkippedElements)); 1473 return getNaturalGEPRecursively(IRB, TD, Ptr, VecTy->getElementType(), 1474 Offset, TargetTy, Indices, Prefix); 1475 } 1476 1477 if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) { 1478 Type *ElementTy = ArrTy->getElementType(); 1479 APInt ElementSize(Offset.getBitWidth(), TD.getTypeAllocSize(ElementTy)); 1480 APInt NumSkippedElements = Offset.udiv(ElementSize); 1481 if (NumSkippedElements.ugt(ArrTy->getNumElements())) 1482 return 0; 1483 1484 Offset -= NumSkippedElements * ElementSize; 1485 Indices.push_back(IRB.getInt(NumSkippedElements)); 1486 return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy, 1487 Indices, Prefix); 1488 } 1489 1490 StructType *STy = dyn_cast<StructType>(Ty); 1491 if (!STy) 1492 return 0; 1493 1494 const StructLayout *SL = TD.getStructLayout(STy); 1495 uint64_t StructOffset = Offset.getZExtValue(); 1496 if (StructOffset >= SL->getSizeInBytes()) 1497 return 0; 1498 unsigned Index = SL->getElementContainingOffset(StructOffset); 1499 Offset -= APInt(Offset.getBitWidth(), SL->getElementOffset(Index)); 1500 Type *ElementTy = STy->getElementType(Index); 1501 if (Offset.uge(TD.getTypeAllocSize(ElementTy))) 1502 return 0; // The offset points into alignment padding. 1503 1504 Indices.push_back(IRB.getInt32(Index)); 1505 return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy, 1506 Indices, Prefix); 1507} 1508 1509/// \brief Get a natural GEP from a base pointer to a particular offset and 1510/// resulting in a particular type. 1511/// 1512/// The goal is to produce a "natural" looking GEP that works with the existing 1513/// composite types to arrive at the appropriate offset and element type for 1514/// a pointer. TargetTy is the element type the returned GEP should point-to if 1515/// possible. We recurse by decreasing Offset, adding the appropriate index to 1516/// Indices, and setting Ty to the result subtype. 1517/// 1518/// If no natural GEP can be constructed, this function returns null. 1519static Value *getNaturalGEPWithOffset(IRBuilder<> &IRB, const TargetData &TD, 1520 Value *Ptr, APInt Offset, Type *TargetTy, 1521 SmallVectorImpl<Value *> &Indices, 1522 const Twine &Prefix) { 1523 PointerType *Ty = cast<PointerType>(Ptr->getType()); 1524 1525 // Don't consider any GEPs through an i8* as natural unless the TargetTy is 1526 // an i8. 1527 if (Ty == IRB.getInt8PtrTy() && TargetTy->isIntegerTy(8)) 1528 return 0; 1529 1530 Type *ElementTy = Ty->getElementType(); 1531 if (!ElementTy->isSized()) 1532 return 0; // We can't GEP through an unsized element. 1533 APInt ElementSize(Offset.getBitWidth(), TD.getTypeAllocSize(ElementTy)); 1534 if (ElementSize == 0) 1535 return 0; // Zero-length arrays can't help us build a natural GEP. 1536 APInt NumSkippedElements = Offset.udiv(ElementSize); 1537 1538 Offset -= NumSkippedElements * ElementSize; 1539 Indices.push_back(IRB.getInt(NumSkippedElements)); 1540 return getNaturalGEPRecursively(IRB, TD, Ptr, ElementTy, Offset, TargetTy, 1541 Indices, Prefix); 1542} 1543 1544/// \brief Compute an adjusted pointer from Ptr by Offset bytes where the 1545/// resulting pointer has PointerTy. 1546/// 1547/// This tries very hard to compute a "natural" GEP which arrives at the offset 1548/// and produces the pointer type desired. Where it cannot, it will try to use 1549/// the natural GEP to arrive at the offset and bitcast to the type. Where that 1550/// fails, it will try to use an existing i8* and GEP to the byte offset and 1551/// bitcast to the type. 1552/// 1553/// The strategy for finding the more natural GEPs is to peel off layers of the 1554/// pointer, walking back through bit casts and GEPs, searching for a base 1555/// pointer from which we can compute a natural GEP with the desired 1556/// properities. The algorithm tries to fold as many constant indices into 1557/// a single GEP as possible, thus making each GEP more independent of the 1558/// surrounding code. 1559static Value *getAdjustedPtr(IRBuilder<> &IRB, const TargetData &TD, 1560 Value *Ptr, APInt Offset, Type *PointerTy, 1561 const Twine &Prefix) { 1562 // Even though we don't look through PHI nodes, we could be called on an 1563 // instruction in an unreachable block, which may be on a cycle. 1564 SmallPtrSet<Value *, 4> Visited; 1565 Visited.insert(Ptr); 1566 SmallVector<Value *, 4> Indices; 1567 1568 // We may end up computing an offset pointer that has the wrong type. If we 1569 // never are able to compute one directly that has the correct type, we'll 1570 // fall back to it, so keep it around here. 1571 Value *OffsetPtr = 0; 1572 1573 // Remember any i8 pointer we come across to re-use if we need to do a raw 1574 // byte offset. 1575 Value *Int8Ptr = 0; 1576 APInt Int8PtrOffset(Offset.getBitWidth(), 0); 1577 1578 Type *TargetTy = PointerTy->getPointerElementType(); 1579 1580 do { 1581 // First fold any existing GEPs into the offset. 1582 while (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) { 1583 APInt GEPOffset(Offset.getBitWidth(), 0); 1584 if (!accumulateGEPOffsets(TD, *GEP, GEPOffset)) 1585 break; 1586 Offset += GEPOffset; 1587 Ptr = GEP->getPointerOperand(); 1588 if (!Visited.insert(Ptr)) 1589 break; 1590 } 1591 1592 // See if we can perform a natural GEP here. 1593 Indices.clear(); 1594 if (Value *P = getNaturalGEPWithOffset(IRB, TD, Ptr, Offset, TargetTy, 1595 Indices, Prefix)) { 1596 if (P->getType() == PointerTy) { 1597 // Zap any offset pointer that we ended up computing in previous rounds. 1598 if (OffsetPtr && OffsetPtr->use_empty()) 1599 if (Instruction *I = dyn_cast<Instruction>(OffsetPtr)) 1600 I->eraseFromParent(); 1601 return P; 1602 } 1603 if (!OffsetPtr) { 1604 OffsetPtr = P; 1605 } 1606 } 1607 1608 // Stash this pointer if we've found an i8*. 1609 if (Ptr->getType()->isIntegerTy(8)) { 1610 Int8Ptr = Ptr; 1611 Int8PtrOffset = Offset; 1612 } 1613 1614 // Peel off a layer of the pointer and update the offset appropriately. 1615 if (Operator::getOpcode(Ptr) == Instruction::BitCast) { 1616 Ptr = cast<Operator>(Ptr)->getOperand(0); 1617 } else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) { 1618 if (GA->mayBeOverridden()) 1619 break; 1620 Ptr = GA->getAliasee(); 1621 } else { 1622 break; 1623 } 1624 assert(Ptr->getType()->isPointerTy() && "Unexpected operand type!"); 1625 } while (Visited.insert(Ptr)); 1626 1627 if (!OffsetPtr) { 1628 if (!Int8Ptr) { 1629 Int8Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy(), 1630 Prefix + ".raw_cast"); 1631 Int8PtrOffset = Offset; 1632 } 1633 1634 OffsetPtr = Int8PtrOffset == 0 ? Int8Ptr : 1635 IRB.CreateInBoundsGEP(Int8Ptr, IRB.getInt(Int8PtrOffset), 1636 Prefix + ".raw_idx"); 1637 } 1638 Ptr = OffsetPtr; 1639 1640 // On the off chance we were targeting i8*, guard the bitcast here. 1641 if (Ptr->getType() != PointerTy) 1642 Ptr = IRB.CreateBitCast(Ptr, PointerTy, Prefix + ".cast"); 1643 1644 return Ptr; 1645} 1646 1647/// \brief Test whether the given alloca partition can be promoted to a vector. 1648/// 1649/// This is a quick test to check whether we can rewrite a particular alloca 1650/// partition (and its newly formed alloca) into a vector alloca with only 1651/// whole-vector loads and stores such that it could be promoted to a vector 1652/// SSA value. We only can ensure this for a limited set of operations, and we 1653/// don't want to do the rewrites unless we are confident that the result will 1654/// be promotable, so we have an early test here. 1655static bool isVectorPromotionViable(const TargetData &TD, 1656 Type *AllocaTy, 1657 AllocaPartitioning &P, 1658 uint64_t PartitionBeginOffset, 1659 uint64_t PartitionEndOffset, 1660 AllocaPartitioning::const_use_iterator I, 1661 AllocaPartitioning::const_use_iterator E) { 1662 VectorType *Ty = dyn_cast<VectorType>(AllocaTy); 1663 if (!Ty) 1664 return false; 1665 1666 uint64_t VecSize = TD.getTypeSizeInBits(Ty); 1667 uint64_t ElementSize = Ty->getScalarSizeInBits(); 1668 1669 // While the definition of LLVM vectors is bitpacked, we don't support sizes 1670 // that aren't byte sized. 1671 if (ElementSize % 8) 1672 return false; 1673 assert((VecSize % 8) == 0 && "vector size not a multiple of element size?"); 1674 VecSize /= 8; 1675 ElementSize /= 8; 1676 1677 for (; I != E; ++I) { 1678 uint64_t BeginOffset = I->BeginOffset - PartitionBeginOffset; 1679 uint64_t BeginIndex = BeginOffset / ElementSize; 1680 if (BeginIndex * ElementSize != BeginOffset || 1681 BeginIndex >= Ty->getNumElements()) 1682 return false; 1683 uint64_t EndOffset = I->EndOffset - PartitionBeginOffset; 1684 uint64_t EndIndex = EndOffset / ElementSize; 1685 if (EndIndex * ElementSize != EndOffset || 1686 EndIndex > Ty->getNumElements()) 1687 return false; 1688 1689 // FIXME: We should build shuffle vector instructions to handle 1690 // non-element-sized accesses. 1691 if ((EndOffset - BeginOffset) != ElementSize && 1692 (EndOffset - BeginOffset) != VecSize) 1693 return false; 1694 1695 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) { 1696 if (MI->isVolatile()) 1697 return false; 1698 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) { 1699 const AllocaPartitioning::MemTransferOffsets &MTO 1700 = P.getMemTransferOffsets(*MTI); 1701 if (!MTO.IsSplittable) 1702 return false; 1703 } 1704 } else if (I->U->get()->getType()->getPointerElementType()->isStructTy()) { 1705 // Disable vector promotion when there are loads or stores of an FCA. 1706 return false; 1707 } else if (!isa<LoadInst>(I->U->getUser()) && 1708 !isa<StoreInst>(I->U->getUser())) { 1709 return false; 1710 } 1711 } 1712 return true; 1713} 1714 1715/// \brief Test whether the given alloca partition can be promoted to an int. 1716/// 1717/// This is a quick test to check whether we can rewrite a particular alloca 1718/// partition (and its newly formed alloca) into an integer alloca suitable for 1719/// promotion to an SSA value. We only can ensure this for a limited set of 1720/// operations, and we don't want to do the rewrites unless we are confident 1721/// that the result will be promotable, so we have an early test here. 1722static bool isIntegerPromotionViable(const TargetData &TD, 1723 Type *AllocaTy, 1724 AllocaPartitioning &P, 1725 AllocaPartitioning::const_use_iterator I, 1726 AllocaPartitioning::const_use_iterator E) { 1727 IntegerType *Ty = dyn_cast<IntegerType>(AllocaTy); 1728 if (!Ty) 1729 return false; 1730 1731 // Check the uses to ensure the uses are (likely) promoteable integer uses. 1732 // Also ensure that the alloca has a covering load or store. We don't want 1733 // promote because of some other unsplittable entry (which we may make 1734 // splittable later) and lose the ability to promote each element access. 1735 bool WholeAllocaOp = false; 1736 for (; I != E; ++I) { 1737 if (LoadInst *LI = dyn_cast<LoadInst>(I->U->getUser())) { 1738 if (LI->isVolatile() || !LI->getType()->isIntegerTy()) 1739 return false; 1740 if (LI->getType() == Ty) 1741 WholeAllocaOp = true; 1742 } else if (StoreInst *SI = dyn_cast<StoreInst>(I->U->getUser())) { 1743 if (SI->isVolatile() || !SI->getValueOperand()->getType()->isIntegerTy()) 1744 return false; 1745 if (SI->getValueOperand()->getType() == Ty) 1746 WholeAllocaOp = true; 1747 } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I->U->getUser())) { 1748 if (MI->isVolatile()) 1749 return false; 1750 if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I->U->getUser())) { 1751 const AllocaPartitioning::MemTransferOffsets &MTO 1752 = P.getMemTransferOffsets(*MTI); 1753 if (!MTO.IsSplittable) 1754 return false; 1755 } 1756 } else { 1757 return false; 1758 } 1759 } 1760 return WholeAllocaOp; 1761} 1762 1763namespace { 1764/// \brief Visitor to speculate PHIs and Selects where possible. 1765class PHIOrSelectSpeculator : public InstVisitor<PHIOrSelectSpeculator> { 1766 // Befriend the base class so it can delegate to private visit methods. 1767 friend class llvm::InstVisitor<PHIOrSelectSpeculator>; 1768 1769 const TargetData &TD; 1770 AllocaPartitioning &P; 1771 SROA &Pass; 1772 1773public: 1774 PHIOrSelectSpeculator(const TargetData &TD, AllocaPartitioning &P, SROA &Pass) 1775 : TD(TD), P(P), Pass(Pass) {} 1776 1777 /// \brief Visit the users of the alloca partition and rewrite them. 1778 void visitUsers(AllocaPartitioning::const_use_iterator I, 1779 AllocaPartitioning::const_use_iterator E) { 1780 for (; I != E; ++I) 1781 visit(cast<Instruction>(I->U->getUser())); 1782 } 1783 1784private: 1785 // By default, skip this instruction. 1786 void visitInstruction(Instruction &I) {} 1787 1788 /// PHI instructions that use an alloca and are subsequently loaded can be 1789 /// rewritten to load both input pointers in the pred blocks and then PHI the 1790 /// results, allowing the load of the alloca to be promoted. 1791 /// From this: 1792 /// %P2 = phi [i32* %Alloca, i32* %Other] 1793 /// %V = load i32* %P2 1794 /// to: 1795 /// %V1 = load i32* %Alloca -> will be mem2reg'd 1796 /// ... 1797 /// %V2 = load i32* %Other 1798 /// ... 1799 /// %V = phi [i32 %V1, i32 %V2] 1800 /// 1801 /// We can do this to a select if its only uses are loads and if the operands 1802 /// to the select can be loaded unconditionally. 1803 /// 1804 /// FIXME: This should be hoisted into a generic utility, likely in 1805 /// Transforms/Util/Local.h 1806 bool isSafePHIToSpeculate(PHINode &PN, SmallVectorImpl<LoadInst *> &Loads) { 1807 // For now, we can only do this promotion if the load is in the same block 1808 // as the PHI, and if there are no stores between the phi and load. 1809 // TODO: Allow recursive phi users. 1810 // TODO: Allow stores. 1811 BasicBlock *BB = PN.getParent(); 1812 unsigned MaxAlign = 0; 1813 for (Value::use_iterator UI = PN.use_begin(), UE = PN.use_end(); 1814 UI != UE; ++UI) { 1815 LoadInst *LI = dyn_cast<LoadInst>(*UI); 1816 if (LI == 0 || !LI->isSimple()) return false; 1817 1818 // For now we only allow loads in the same block as the PHI. This is 1819 // a common case that happens when instcombine merges two loads through 1820 // a PHI. 1821 if (LI->getParent() != BB) return false; 1822 1823 // Ensure that there are no instructions between the PHI and the load that 1824 // could store. 1825 for (BasicBlock::iterator BBI = &PN; &*BBI != LI; ++BBI) 1826 if (BBI->mayWriteToMemory()) 1827 return false; 1828 1829 MaxAlign = std::max(MaxAlign, LI->getAlignment()); 1830 Loads.push_back(LI); 1831 } 1832 1833 // We can only transform this if it is safe to push the loads into the 1834 // predecessor blocks. The only thing to watch out for is that we can't put 1835 // a possibly trapping load in the predecessor if it is a critical edge. 1836 for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; 1837 ++Idx) { 1838 TerminatorInst *TI = PN.getIncomingBlock(Idx)->getTerminator(); 1839 Value *InVal = PN.getIncomingValue(Idx); 1840 1841 // If the value is produced by the terminator of the predecessor (an 1842 // invoke) or it has side-effects, there is no valid place to put a load 1843 // in the predecessor. 1844 if (TI == InVal || TI->mayHaveSideEffects()) 1845 return false; 1846 1847 // If the predecessor has a single successor, then the edge isn't 1848 // critical. 1849 if (TI->getNumSuccessors() == 1) 1850 continue; 1851 1852 // If this pointer is always safe to load, or if we can prove that there 1853 // is already a load in the block, then we can move the load to the pred 1854 // block. 1855 if (InVal->isDereferenceablePointer() || 1856 isSafeToLoadUnconditionally(InVal, TI, MaxAlign, &TD)) 1857 continue; 1858 1859 return false; 1860 } 1861 1862 return true; 1863 } 1864 1865 void visitPHINode(PHINode &PN) { 1866 DEBUG(dbgs() << " original: " << PN << "\n"); 1867 1868 SmallVector<LoadInst *, 4> Loads; 1869 if (!isSafePHIToSpeculate(PN, Loads)) 1870 return; 1871 1872 assert(!Loads.empty()); 1873 1874 Type *LoadTy = cast<PointerType>(PN.getType())->getElementType(); 1875 IRBuilder<> PHIBuilder(&PN); 1876 PHINode *NewPN = PHIBuilder.CreatePHI(LoadTy, PN.getNumIncomingValues(), 1877 PN.getName() + ".sroa.speculated"); 1878 1879 // Get the TBAA tag and alignment to use from one of the loads. It doesn't 1880 // matter which one we get and if any differ, it doesn't matter. 1881 LoadInst *SomeLoad = cast<LoadInst>(Loads.back()); 1882 MDNode *TBAATag = SomeLoad->getMetadata(LLVMContext::MD_tbaa); 1883 unsigned Align = SomeLoad->getAlignment(); 1884 1885 // Rewrite all loads of the PN to use the new PHI. 1886 do { 1887 LoadInst *LI = Loads.pop_back_val(); 1888 LI->replaceAllUsesWith(NewPN); 1889 Pass.DeadInsts.push_back(LI); 1890 } while (!Loads.empty()); 1891 1892 // Inject loads into all of the pred blocks. 1893 for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) { 1894 BasicBlock *Pred = PN.getIncomingBlock(Idx); 1895 TerminatorInst *TI = Pred->getTerminator(); 1896 Use *InUse = &PN.getOperandUse(PN.getOperandNumForIncomingValue(Idx)); 1897 Value *InVal = PN.getIncomingValue(Idx); 1898 IRBuilder<> PredBuilder(TI); 1899 1900 LoadInst *Load 1901 = PredBuilder.CreateLoad(InVal, (PN.getName() + ".sroa.speculate.load." + 1902 Pred->getName())); 1903 ++NumLoadsSpeculated; 1904 Load->setAlignment(Align); 1905 if (TBAATag) 1906 Load->setMetadata(LLVMContext::MD_tbaa, TBAATag); 1907 NewPN->addIncoming(Load, Pred); 1908 1909 Instruction *Ptr = dyn_cast<Instruction>(InVal); 1910 if (!Ptr) 1911 // No uses to rewrite. 1912 continue; 1913 1914 // Try to lookup and rewrite any partition uses corresponding to this phi 1915 // input. 1916 AllocaPartitioning::iterator PI 1917 = P.findPartitionForPHIOrSelectOperand(InUse); 1918 if (PI == P.end()) 1919 continue; 1920 1921 // Replace the Use in the PartitionUse for this operand with the Use 1922 // inside the load. 1923 AllocaPartitioning::use_iterator UI 1924 = P.findPartitionUseForPHIOrSelectOperand(InUse); 1925 assert(isa<PHINode>(*UI->U->getUser())); 1926 UI->U = &Load->getOperandUse(Load->getPointerOperandIndex()); 1927 } 1928 DEBUG(dbgs() << " speculated to: " << *NewPN << "\n"); 1929 } 1930 1931 /// Select instructions that use an alloca and are subsequently loaded can be 1932 /// rewritten to load both input pointers and then select between the result, 1933 /// allowing the load of the alloca to be promoted. 1934 /// From this: 1935 /// %P2 = select i1 %cond, i32* %Alloca, i32* %Other 1936 /// %V = load i32* %P2 1937 /// to: 1938 /// %V1 = load i32* %Alloca -> will be mem2reg'd 1939 /// %V2 = load i32* %Other 1940 /// %V = select i1 %cond, i32 %V1, i32 %V2 1941 /// 1942 /// We can do this to a select if its only uses are loads and if the operand 1943 /// to the select can be loaded unconditionally. 1944 bool isSafeSelectToSpeculate(SelectInst &SI, 1945 SmallVectorImpl<LoadInst *> &Loads) { 1946 Value *TValue = SI.getTrueValue(); 1947 Value *FValue = SI.getFalseValue(); 1948 bool TDerefable = TValue->isDereferenceablePointer(); 1949 bool FDerefable = FValue->isDereferenceablePointer(); 1950 1951 for (Value::use_iterator UI = SI.use_begin(), UE = SI.use_end(); 1952 UI != UE; ++UI) { 1953 LoadInst *LI = dyn_cast<LoadInst>(*UI); 1954 if (LI == 0 || !LI->isSimple()) return false; 1955 1956 // Both operands to the select need to be dereferencable, either 1957 // absolutely (e.g. allocas) or at this point because we can see other 1958 // accesses to it. 1959 if (!TDerefable && !isSafeToLoadUnconditionally(TValue, LI, 1960 LI->getAlignment(), &TD)) 1961 return false; 1962 if (!FDerefable && !isSafeToLoadUnconditionally(FValue, LI, 1963 LI->getAlignment(), &TD)) 1964 return false; 1965 Loads.push_back(LI); 1966 } 1967 1968 return true; 1969 } 1970 1971 void visitSelectInst(SelectInst &SI) { 1972 DEBUG(dbgs() << " original: " << SI << "\n"); 1973 IRBuilder<> IRB(&SI); 1974 1975 // If the select isn't safe to speculate, just use simple logic to emit it. 1976 SmallVector<LoadInst *, 4> Loads; 1977 if (!isSafeSelectToSpeculate(SI, Loads)) 1978 return; 1979 1980 Use *Ops[2] = { &SI.getOperandUse(1), &SI.getOperandUse(2) }; 1981 AllocaPartitioning::iterator PIs[2]; 1982 AllocaPartitioning::PartitionUse PUs[2]; 1983 for (unsigned i = 0, e = 2; i != e; ++i) { 1984 PIs[i] = P.findPartitionForPHIOrSelectOperand(Ops[i]); 1985 if (PIs[i] != P.end()) { 1986 // If the pointer is within the partitioning, remove the select from 1987 // its uses. We'll add in the new loads below. 1988 AllocaPartitioning::use_iterator UI 1989 = P.findPartitionUseForPHIOrSelectOperand(Ops[i]); 1990 PUs[i] = *UI; 1991 P.use_erase(PIs[i], UI); 1992 } 1993 } 1994 1995 Value *TV = SI.getTrueValue(); 1996 Value *FV = SI.getFalseValue(); 1997 // Replace the loads of the select with a select of two loads. 1998 while (!Loads.empty()) { 1999 LoadInst *LI = Loads.pop_back_val(); 2000 2001 IRB.SetInsertPoint(LI); 2002 LoadInst *TL = 2003 IRB.CreateLoad(TV, LI->getName() + ".sroa.speculate.load.true"); 2004 LoadInst *FL = 2005 IRB.CreateLoad(FV, LI->getName() + ".sroa.speculate.load.false"); 2006 NumLoadsSpeculated += 2; 2007 2008 // Transfer alignment and TBAA info if present. 2009 TL->setAlignment(LI->getAlignment()); 2010 FL->setAlignment(LI->getAlignment()); 2011 if (MDNode *Tag = LI->getMetadata(LLVMContext::MD_tbaa)) { 2012 TL->setMetadata(LLVMContext::MD_tbaa, Tag); 2013 FL->setMetadata(LLVMContext::MD_tbaa, Tag); 2014 } 2015 2016 Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL, 2017 LI->getName() + ".sroa.speculated"); 2018 2019 LoadInst *Loads[2] = { TL, FL }; 2020 for (unsigned i = 0, e = 2; i != e; ++i) { 2021 if (PIs[i] != P.end()) { 2022 Use *LoadUse = &Loads[i]->getOperandUse(0); 2023 assert(PUs[i].U->get() == LoadUse->get()); 2024 PUs[i].U = LoadUse; 2025 P.use_push_back(PIs[i], PUs[i]); 2026 } 2027 } 2028 2029 DEBUG(dbgs() << " speculated to: " << *V << "\n"); 2030 LI->replaceAllUsesWith(V); 2031 Pass.DeadInsts.push_back(LI); 2032 } 2033 } 2034}; 2035 2036/// \brief Visitor to rewrite instructions using a partition of an alloca to 2037/// use a new alloca. 2038/// 2039/// Also implements the rewriting to vector-based accesses when the partition 2040/// passes the isVectorPromotionViable predicate. Most of the rewriting logic 2041/// lives here. 2042class AllocaPartitionRewriter : public InstVisitor<AllocaPartitionRewriter, 2043 bool> { 2044 // Befriend the base class so it can delegate to private visit methods. 2045 friend class llvm::InstVisitor<AllocaPartitionRewriter, bool>; 2046 2047 const TargetData &TD; 2048 AllocaPartitioning &P; 2049 SROA &Pass; 2050 AllocaInst &OldAI, &NewAI; 2051 const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset; 2052 2053 // If we are rewriting an alloca partition which can be written as pure 2054 // vector operations, we stash extra information here. When VecTy is 2055 // non-null, we have some strict guarantees about the rewriten alloca: 2056 // - The new alloca is exactly the size of the vector type here. 2057 // - The accesses all either map to the entire vector or to a single 2058 // element. 2059 // - The set of accessing instructions is only one of those handled above 2060 // in isVectorPromotionViable. Generally these are the same access kinds 2061 // which are promotable via mem2reg. 2062 VectorType *VecTy; 2063 Type *ElementTy; 2064 uint64_t ElementSize; 2065 2066 // This is a convenience and flag variable that will be null unless the new 2067 // alloca has a promotion-targeted integer type due to passing 2068 // isIntegerPromotionViable above. If it is non-null does, the desired 2069 // integer type will be stored here for easy access during rewriting. 2070 IntegerType *IntPromotionTy; 2071 2072 // The offset of the partition user currently being rewritten. 2073 uint64_t BeginOffset, EndOffset; 2074 Use *OldUse; 2075 Instruction *OldPtr; 2076 2077 // The name prefix to use when rewriting instructions for this alloca. 2078 std::string NamePrefix; 2079 2080public: 2081 AllocaPartitionRewriter(const TargetData &TD, AllocaPartitioning &P, 2082 AllocaPartitioning::iterator PI, 2083 SROA &Pass, AllocaInst &OldAI, AllocaInst &NewAI, 2084 uint64_t NewBeginOffset, uint64_t NewEndOffset) 2085 : TD(TD), P(P), Pass(Pass), 2086 OldAI(OldAI), NewAI(NewAI), 2087 NewAllocaBeginOffset(NewBeginOffset), 2088 NewAllocaEndOffset(NewEndOffset), 2089 VecTy(), ElementTy(), ElementSize(), IntPromotionTy(), 2090 BeginOffset(), EndOffset() { 2091 } 2092 2093 /// \brief Visit the users of the alloca partition and rewrite them. 2094 bool visitUsers(AllocaPartitioning::const_use_iterator I, 2095 AllocaPartitioning::const_use_iterator E) { 2096 if (isVectorPromotionViable(TD, NewAI.getAllocatedType(), P, 2097 NewAllocaBeginOffset, NewAllocaEndOffset, 2098 I, E)) { 2099 ++NumVectorized; 2100 VecTy = cast<VectorType>(NewAI.getAllocatedType()); 2101 ElementTy = VecTy->getElementType(); 2102 assert((VecTy->getScalarSizeInBits() % 8) == 0 && 2103 "Only multiple-of-8 sized vector elements are viable"); 2104 ElementSize = VecTy->getScalarSizeInBits() / 8; 2105 } else if (isIntegerPromotionViable(TD, NewAI.getAllocatedType(), 2106 P, I, E)) { 2107 IntPromotionTy = cast<IntegerType>(NewAI.getAllocatedType()); 2108 } 2109 bool CanSROA = true; 2110 for (; I != E; ++I) { 2111 BeginOffset = I->BeginOffset; 2112 EndOffset = I->EndOffset; 2113 OldUse = I->U; 2114 OldPtr = cast<Instruction>(I->U->get()); 2115 NamePrefix = (Twine(NewAI.getName()) + "." + Twine(BeginOffset)).str(); 2116 CanSROA &= visit(cast<Instruction>(I->U->getUser())); 2117 } 2118 if (VecTy) { 2119 assert(CanSROA); 2120 VecTy = 0; 2121 ElementTy = 0; 2122 ElementSize = 0; 2123 } 2124 return CanSROA; 2125 } 2126 2127private: 2128 // Every instruction which can end up as a user must have a rewrite rule. 2129 bool visitInstruction(Instruction &I) { 2130 DEBUG(dbgs() << " !!!! Cannot rewrite: " << I << "\n"); 2131 llvm_unreachable("No rewrite rule for this instruction!"); 2132 } 2133 2134 Twine getName(const Twine &Suffix) { 2135 return NamePrefix + Suffix; 2136 } 2137 2138 Value *getAdjustedAllocaPtr(IRBuilder<> &IRB, Type *PointerTy) { 2139 assert(BeginOffset >= NewAllocaBeginOffset); 2140 APInt Offset(TD.getPointerSizeInBits(), BeginOffset - NewAllocaBeginOffset); 2141 return getAdjustedPtr(IRB, TD, &NewAI, Offset, PointerTy, getName("")); 2142 } 2143 2144 unsigned getAdjustedAlign(uint64_t Offset) { 2145 unsigned NewAIAlign = NewAI.getAlignment(); 2146 if (!NewAIAlign) 2147 NewAIAlign = TD.getABITypeAlignment(NewAI.getAllocatedType()); 2148 return MinAlign(NewAIAlign, Offset); 2149 } 2150 unsigned getAdjustedAlign() { 2151 return getAdjustedAlign(BeginOffset - NewAllocaBeginOffset); 2152 } 2153 2154 bool isTypeAlignSufficient(Type *Ty) { 2155 return TD.getABITypeAlignment(Ty) >= getAdjustedAlign(); 2156 } 2157 2158 ConstantInt *getIndex(IRBuilder<> &IRB, uint64_t Offset) { 2159 assert(VecTy && "Can only call getIndex when rewriting a vector"); 2160 uint64_t RelOffset = Offset - NewAllocaBeginOffset; 2161 assert(RelOffset / ElementSize < UINT32_MAX && "Index out of bounds"); 2162 uint32_t Index = RelOffset / ElementSize; 2163 assert(Index * ElementSize == RelOffset); 2164 return IRB.getInt32(Index); 2165 } 2166 2167 Value *extractInteger(IRBuilder<> &IRB, IntegerType *TargetTy, 2168 uint64_t Offset) { 2169 assert(IntPromotionTy && "Alloca is not an integer we can extract from"); 2170 Value *V = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), 2171 getName(".load")); 2172 assert(Offset >= NewAllocaBeginOffset && "Out of bounds offset"); 2173 uint64_t RelOffset = Offset - NewAllocaBeginOffset; 2174 if (RelOffset) 2175 V = IRB.CreateLShr(V, RelOffset*8, getName(".shift")); 2176 if (TargetTy != IntPromotionTy) { 2177 assert(TargetTy->getBitWidth() < IntPromotionTy->getBitWidth() && 2178 "Cannot extract to a larger integer!"); 2179 V = IRB.CreateTrunc(V, TargetTy, getName(".trunc")); 2180 } 2181 return V; 2182 } 2183 2184 StoreInst *insertInteger(IRBuilder<> &IRB, Value *V, uint64_t Offset) { 2185 IntegerType *Ty = cast<IntegerType>(V->getType()); 2186 if (Ty == IntPromotionTy) 2187 return IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); 2188 2189 assert(Ty->getBitWidth() < IntPromotionTy->getBitWidth() && 2190 "Cannot insert a larger integer!"); 2191 V = IRB.CreateZExt(V, IntPromotionTy, getName(".ext")); 2192 assert(Offset >= NewAllocaBeginOffset && "Out of bounds offset"); 2193 uint64_t RelOffset = Offset - NewAllocaBeginOffset; 2194 if (RelOffset) 2195 V = IRB.CreateShl(V, RelOffset*8, getName(".shift")); 2196 2197 APInt Mask = ~Ty->getMask().zext(IntPromotionTy->getBitWidth()) 2198 .shl(RelOffset*8); 2199 Value *Old = IRB.CreateAnd(IRB.CreateAlignedLoad(&NewAI, 2200 NewAI.getAlignment(), 2201 getName(".oldload")), 2202 Mask, getName(".mask")); 2203 return IRB.CreateAlignedStore(IRB.CreateOr(Old, V, getName(".insert")), 2204 &NewAI, NewAI.getAlignment()); 2205 } 2206 2207 void deleteIfTriviallyDead(Value *V) { 2208 Instruction *I = cast<Instruction>(V); 2209 if (isInstructionTriviallyDead(I)) 2210 Pass.DeadInsts.push_back(I); 2211 } 2212 2213 Value *getValueCast(IRBuilder<> &IRB, Value *V, Type *Ty) { 2214 if (V->getType()->isIntegerTy() && Ty->isPointerTy()) 2215 return IRB.CreateIntToPtr(V, Ty); 2216 if (V->getType()->isPointerTy() && Ty->isIntegerTy()) 2217 return IRB.CreatePtrToInt(V, Ty); 2218 2219 return IRB.CreateBitCast(V, Ty); 2220 } 2221 2222 bool rewriteVectorizedLoadInst(IRBuilder<> &IRB, LoadInst &LI, Value *OldOp) { 2223 Value *Result; 2224 if (LI.getType() == VecTy->getElementType() || 2225 BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) { 2226 Result = IRB.CreateExtractElement( 2227 IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")), 2228 getIndex(IRB, BeginOffset), getName(".extract")); 2229 } else { 2230 Result = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), 2231 getName(".load")); 2232 } 2233 if (Result->getType() != LI.getType()) 2234 Result = getValueCast(IRB, Result, LI.getType()); 2235 LI.replaceAllUsesWith(Result); 2236 Pass.DeadInsts.push_back(&LI); 2237 2238 DEBUG(dbgs() << " to: " << *Result << "\n"); 2239 return true; 2240 } 2241 2242 bool rewriteIntegerLoad(IRBuilder<> &IRB, LoadInst &LI) { 2243 assert(!LI.isVolatile()); 2244 Value *Result = extractInteger(IRB, cast<IntegerType>(LI.getType()), 2245 BeginOffset); 2246 LI.replaceAllUsesWith(Result); 2247 Pass.DeadInsts.push_back(&LI); 2248 DEBUG(dbgs() << " to: " << *Result << "\n"); 2249 return true; 2250 } 2251 2252 bool visitLoadInst(LoadInst &LI) { 2253 DEBUG(dbgs() << " original: " << LI << "\n"); 2254 Value *OldOp = LI.getOperand(0); 2255 assert(OldOp == OldPtr); 2256 IRBuilder<> IRB(&LI); 2257 2258 if (VecTy) 2259 return rewriteVectorizedLoadInst(IRB, LI, OldOp); 2260 if (IntPromotionTy) 2261 return rewriteIntegerLoad(IRB, LI); 2262 2263 Value *NewPtr = getAdjustedAllocaPtr(IRB, 2264 LI.getPointerOperand()->getType()); 2265 LI.setOperand(0, NewPtr); 2266 if (LI.getAlignment() || !isTypeAlignSufficient(LI.getType())) 2267 LI.setAlignment(getAdjustedAlign()); 2268 DEBUG(dbgs() << " to: " << LI << "\n"); 2269 2270 deleteIfTriviallyDead(OldOp); 2271 return NewPtr == &NewAI && !LI.isVolatile(); 2272 } 2273 2274 bool rewriteVectorizedStoreInst(IRBuilder<> &IRB, StoreInst &SI, 2275 Value *OldOp) { 2276 Value *V = SI.getValueOperand(); 2277 if (V->getType() == ElementTy || 2278 BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset) { 2279 if (V->getType() != ElementTy) 2280 V = getValueCast(IRB, V, ElementTy); 2281 LoadInst *LI = IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), 2282 getName(".load")); 2283 V = IRB.CreateInsertElement(LI, V, getIndex(IRB, BeginOffset), 2284 getName(".insert")); 2285 } else if (V->getType() != VecTy) { 2286 V = getValueCast(IRB, V, VecTy); 2287 } 2288 StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment()); 2289 Pass.DeadInsts.push_back(&SI); 2290 2291 (void)Store; 2292 DEBUG(dbgs() << " to: " << *Store << "\n"); 2293 return true; 2294 } 2295 2296 bool rewriteIntegerStore(IRBuilder<> &IRB, StoreInst &SI) { 2297 assert(!SI.isVolatile()); 2298 StoreInst *Store = insertInteger(IRB, SI.getValueOperand(), BeginOffset); 2299 Pass.DeadInsts.push_back(&SI); 2300 (void)Store; 2301 DEBUG(dbgs() << " to: " << *Store << "\n"); 2302 return true; 2303 } 2304 2305 bool visitStoreInst(StoreInst &SI) { 2306 DEBUG(dbgs() << " original: " << SI << "\n"); 2307 Value *OldOp = SI.getOperand(1); 2308 assert(OldOp == OldPtr); 2309 IRBuilder<> IRB(&SI); 2310 2311 if (VecTy) 2312 return rewriteVectorizedStoreInst(IRB, SI, OldOp); 2313 if (IntPromotionTy) 2314 return rewriteIntegerStore(IRB, SI); 2315 2316 Value *NewPtr = getAdjustedAllocaPtr(IRB, 2317 SI.getPointerOperand()->getType()); 2318 SI.setOperand(1, NewPtr); 2319 if (SI.getAlignment() || 2320 !isTypeAlignSufficient(SI.getValueOperand()->getType())) 2321 SI.setAlignment(getAdjustedAlign()); 2322 if (SI.getAlignment()) 2323 SI.setAlignment(MinAlign(NewAI.getAlignment(), 2324 BeginOffset - NewAllocaBeginOffset)); 2325 DEBUG(dbgs() << " to: " << SI << "\n"); 2326 2327 deleteIfTriviallyDead(OldOp); 2328 return NewPtr == &NewAI && !SI.isVolatile(); 2329 } 2330 2331 bool visitMemSetInst(MemSetInst &II) { 2332 DEBUG(dbgs() << " original: " << II << "\n"); 2333 IRBuilder<> IRB(&II); 2334 assert(II.getRawDest() == OldPtr); 2335 2336 // If the memset has a variable size, it cannot be split, just adjust the 2337 // pointer to the new alloca. 2338 if (!isa<Constant>(II.getLength())) { 2339 II.setDest(getAdjustedAllocaPtr(IRB, II.getRawDest()->getType())); 2340 Type *CstTy = II.getAlignmentCst()->getType(); 2341 II.setAlignment(ConstantInt::get(CstTy, getAdjustedAlign())); 2342 2343 deleteIfTriviallyDead(OldPtr); 2344 return false; 2345 } 2346 2347 // Record this instruction for deletion. 2348 if (Pass.DeadSplitInsts.insert(&II)) 2349 Pass.DeadInsts.push_back(&II); 2350 2351 Type *AllocaTy = NewAI.getAllocatedType(); 2352 Type *ScalarTy = AllocaTy->getScalarType(); 2353 2354 // If this doesn't map cleanly onto the alloca type, and that type isn't 2355 // a single value type, just emit a memset. 2356 if (!VecTy && (BeginOffset != NewAllocaBeginOffset || 2357 EndOffset != NewAllocaEndOffset || 2358 !AllocaTy->isSingleValueType() || 2359 !TD.isLegalInteger(TD.getTypeSizeInBits(ScalarTy)))) { 2360 Type *SizeTy = II.getLength()->getType(); 2361 Constant *Size = ConstantInt::get(SizeTy, EndOffset - BeginOffset); 2362 CallInst *New 2363 = IRB.CreateMemSet(getAdjustedAllocaPtr(IRB, 2364 II.getRawDest()->getType()), 2365 II.getValue(), Size, getAdjustedAlign(), 2366 II.isVolatile()); 2367 (void)New; 2368 DEBUG(dbgs() << " to: " << *New << "\n"); 2369 return false; 2370 } 2371 2372 // If we can represent this as a simple value, we have to build the actual 2373 // value to store, which requires expanding the byte present in memset to 2374 // a sensible representation for the alloca type. This is essentially 2375 // splatting the byte to a sufficiently wide integer, bitcasting to the 2376 // desired scalar type, and splatting it across any desired vector type. 2377 Value *V = II.getValue(); 2378 IntegerType *VTy = cast<IntegerType>(V->getType()); 2379 Type *IntTy = Type::getIntNTy(VTy->getContext(), 2380 TD.getTypeSizeInBits(ScalarTy)); 2381 if (TD.getTypeSizeInBits(ScalarTy) > VTy->getBitWidth()) 2382 V = IRB.CreateMul(IRB.CreateZExt(V, IntTy, getName(".zext")), 2383 ConstantExpr::getUDiv( 2384 Constant::getAllOnesValue(IntTy), 2385 ConstantExpr::getZExt( 2386 Constant::getAllOnesValue(V->getType()), 2387 IntTy)), 2388 getName(".isplat")); 2389 if (V->getType() != ScalarTy) { 2390 if (ScalarTy->isPointerTy()) 2391 V = IRB.CreateIntToPtr(V, ScalarTy); 2392 else if (ScalarTy->isPrimitiveType() || ScalarTy->isVectorTy()) 2393 V = IRB.CreateBitCast(V, ScalarTy); 2394 else if (ScalarTy->isIntegerTy()) 2395 llvm_unreachable("Computed different integer types with equal widths"); 2396 else 2397 llvm_unreachable("Invalid scalar type"); 2398 } 2399 2400 // If this is an element-wide memset of a vectorizable alloca, insert it. 2401 if (VecTy && (BeginOffset > NewAllocaBeginOffset || 2402 EndOffset < NewAllocaEndOffset)) { 2403 StoreInst *Store = IRB.CreateAlignedStore( 2404 IRB.CreateInsertElement(IRB.CreateAlignedLoad(&NewAI, 2405 NewAI.getAlignment(), 2406 getName(".load")), 2407 V, getIndex(IRB, BeginOffset), 2408 getName(".insert")), 2409 &NewAI, NewAI.getAlignment()); 2410 (void)Store; 2411 DEBUG(dbgs() << " to: " << *Store << "\n"); 2412 return true; 2413 } 2414 2415 // Splat to a vector if needed. 2416 if (VectorType *VecTy = dyn_cast<VectorType>(AllocaTy)) { 2417 VectorType *SplatSourceTy = VectorType::get(V->getType(), 1); 2418 V = IRB.CreateShuffleVector( 2419 IRB.CreateInsertElement(UndefValue::get(SplatSourceTy), V, 2420 IRB.getInt32(0), getName(".vsplat.insert")), 2421 UndefValue::get(SplatSourceTy), 2422 ConstantVector::getSplat(VecTy->getNumElements(), IRB.getInt32(0)), 2423 getName(".vsplat.shuffle")); 2424 assert(V->getType() == VecTy); 2425 } 2426 2427 Value *New = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment(), 2428 II.isVolatile()); 2429 (void)New; 2430 DEBUG(dbgs() << " to: " << *New << "\n"); 2431 return !II.isVolatile(); 2432 } 2433 2434 bool visitMemTransferInst(MemTransferInst &II) { 2435 // Rewriting of memory transfer instructions can be a bit tricky. We break 2436 // them into two categories: split intrinsics and unsplit intrinsics. 2437 2438 DEBUG(dbgs() << " original: " << II << "\n"); 2439 IRBuilder<> IRB(&II); 2440 2441 assert(II.getRawSource() == OldPtr || II.getRawDest() == OldPtr); 2442 bool IsDest = II.getRawDest() == OldPtr; 2443 2444 const AllocaPartitioning::MemTransferOffsets &MTO 2445 = P.getMemTransferOffsets(II); 2446 2447 // Compute the relative offset within the transfer. 2448 unsigned IntPtrWidth = TD.getPointerSizeInBits(); 2449 APInt RelOffset(IntPtrWidth, BeginOffset - (IsDest ? MTO.DestBegin 2450 : MTO.SourceBegin)); 2451 2452 unsigned Align = II.getAlignment(); 2453 if (Align > 1) 2454 Align = MinAlign(RelOffset.zextOrTrunc(64).getZExtValue(), 2455 MinAlign(II.getAlignment(), getAdjustedAlign())); 2456 2457 // For unsplit intrinsics, we simply modify the source and destination 2458 // pointers in place. This isn't just an optimization, it is a matter of 2459 // correctness. With unsplit intrinsics we may be dealing with transfers 2460 // within a single alloca before SROA ran, or with transfers that have 2461 // a variable length. We may also be dealing with memmove instead of 2462 // memcpy, and so simply updating the pointers is the necessary for us to 2463 // update both source and dest of a single call. 2464 if (!MTO.IsSplittable) { 2465 Value *OldOp = IsDest ? II.getRawDest() : II.getRawSource(); 2466 if (IsDest) 2467 II.setDest(getAdjustedAllocaPtr(IRB, II.getRawDest()->getType())); 2468 else 2469 II.setSource(getAdjustedAllocaPtr(IRB, II.getRawSource()->getType())); 2470 2471 Type *CstTy = II.getAlignmentCst()->getType(); 2472 II.setAlignment(ConstantInt::get(CstTy, Align)); 2473 2474 DEBUG(dbgs() << " to: " << II << "\n"); 2475 deleteIfTriviallyDead(OldOp); 2476 return false; 2477 } 2478 // For split transfer intrinsics we have an incredibly useful assurance: 2479 // the source and destination do not reside within the same alloca, and at 2480 // least one of them does not escape. This means that we can replace 2481 // memmove with memcpy, and we don't need to worry about all manner of 2482 // downsides to splitting and transforming the operations. 2483 2484 // If this doesn't map cleanly onto the alloca type, and that type isn't 2485 // a single value type, just emit a memcpy. 2486 bool EmitMemCpy 2487 = !VecTy && (BeginOffset != NewAllocaBeginOffset || 2488 EndOffset != NewAllocaEndOffset || 2489 !NewAI.getAllocatedType()->isSingleValueType()); 2490 2491 // If we're just going to emit a memcpy, the alloca hasn't changed, and the 2492 // size hasn't been shrunk based on analysis of the viable range, this is 2493 // a no-op. 2494 if (EmitMemCpy && &OldAI == &NewAI) { 2495 uint64_t OrigBegin = IsDest ? MTO.DestBegin : MTO.SourceBegin; 2496 uint64_t OrigEnd = IsDest ? MTO.DestEnd : MTO.SourceEnd; 2497 // Ensure the start lines up. 2498 assert(BeginOffset == OrigBegin); 2499 (void)OrigBegin; 2500 2501 // Rewrite the size as needed. 2502 if (EndOffset != OrigEnd) 2503 II.setLength(ConstantInt::get(II.getLength()->getType(), 2504 EndOffset - BeginOffset)); 2505 return false; 2506 } 2507 // Record this instruction for deletion. 2508 if (Pass.DeadSplitInsts.insert(&II)) 2509 Pass.DeadInsts.push_back(&II); 2510 2511 bool IsVectorElement = VecTy && (BeginOffset > NewAllocaBeginOffset || 2512 EndOffset < NewAllocaEndOffset); 2513 2514 Type *OtherPtrTy = IsDest ? II.getRawSource()->getType() 2515 : II.getRawDest()->getType(); 2516 if (!EmitMemCpy) 2517 OtherPtrTy = IsVectorElement ? VecTy->getElementType()->getPointerTo() 2518 : NewAI.getType(); 2519 2520 // Compute the other pointer, folding as much as possible to produce 2521 // a single, simple GEP in most cases. 2522 Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest(); 2523 OtherPtr = getAdjustedPtr(IRB, TD, OtherPtr, RelOffset, OtherPtrTy, 2524 getName("." + OtherPtr->getName())); 2525 2526 // Strip all inbounds GEPs and pointer casts to try to dig out any root 2527 // alloca that should be re-examined after rewriting this instruction. 2528 if (AllocaInst *AI 2529 = dyn_cast<AllocaInst>(OtherPtr->stripInBoundsOffsets())) 2530 Pass.Worklist.insert(AI); 2531 2532 if (EmitMemCpy) { 2533 Value *OurPtr 2534 = getAdjustedAllocaPtr(IRB, IsDest ? II.getRawDest()->getType() 2535 : II.getRawSource()->getType()); 2536 Type *SizeTy = II.getLength()->getType(); 2537 Constant *Size = ConstantInt::get(SizeTy, EndOffset - BeginOffset); 2538 2539 CallInst *New = IRB.CreateMemCpy(IsDest ? OurPtr : OtherPtr, 2540 IsDest ? OtherPtr : OurPtr, 2541 Size, Align, II.isVolatile()); 2542 (void)New; 2543 DEBUG(dbgs() << " to: " << *New << "\n"); 2544 return false; 2545 } 2546 2547 Value *SrcPtr = OtherPtr; 2548 Value *DstPtr = &NewAI; 2549 if (!IsDest) 2550 std::swap(SrcPtr, DstPtr); 2551 2552 Value *Src; 2553 if (IsVectorElement && !IsDest) { 2554 // We have to extract rather than load. 2555 Src = IRB.CreateExtractElement( 2556 IRB.CreateAlignedLoad(SrcPtr, Align, getName(".copyload")), 2557 getIndex(IRB, BeginOffset), 2558 getName(".copyextract")); 2559 } else { 2560 Src = IRB.CreateAlignedLoad(SrcPtr, Align, II.isVolatile(), 2561 getName(".copyload")); 2562 } 2563 2564 if (IsVectorElement && IsDest) { 2565 // We have to insert into a loaded copy before storing. 2566 Src = IRB.CreateInsertElement( 2567 IRB.CreateAlignedLoad(&NewAI, NewAI.getAlignment(), getName(".load")), 2568 Src, getIndex(IRB, BeginOffset), 2569 getName(".insert")); 2570 } 2571 2572 StoreInst *Store = cast<StoreInst>( 2573 IRB.CreateAlignedStore(Src, DstPtr, Align, II.isVolatile())); 2574 (void)Store; 2575 DEBUG(dbgs() << " to: " << *Store << "\n"); 2576 return !II.isVolatile(); 2577 } 2578 2579 bool visitIntrinsicInst(IntrinsicInst &II) { 2580 assert(II.getIntrinsicID() == Intrinsic::lifetime_start || 2581 II.getIntrinsicID() == Intrinsic::lifetime_end); 2582 DEBUG(dbgs() << " original: " << II << "\n"); 2583 IRBuilder<> IRB(&II); 2584 assert(II.getArgOperand(1) == OldPtr); 2585 2586 // Record this instruction for deletion. 2587 if (Pass.DeadSplitInsts.insert(&II)) 2588 Pass.DeadInsts.push_back(&II); 2589 2590 ConstantInt *Size 2591 = ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()), 2592 EndOffset - BeginOffset); 2593 Value *Ptr = getAdjustedAllocaPtr(IRB, II.getArgOperand(1)->getType()); 2594 Value *New; 2595 if (II.getIntrinsicID() == Intrinsic::lifetime_start) 2596 New = IRB.CreateLifetimeStart(Ptr, Size); 2597 else 2598 New = IRB.CreateLifetimeEnd(Ptr, Size); 2599 2600 DEBUG(dbgs() << " to: " << *New << "\n"); 2601 return true; 2602 } 2603 2604 bool visitPHINode(PHINode &PN) { 2605 DEBUG(dbgs() << " original: " << PN << "\n"); 2606 2607 // We would like to compute a new pointer in only one place, but have it be 2608 // as local as possible to the PHI. To do that, we re-use the location of 2609 // the old pointer, which necessarily must be in the right position to 2610 // dominate the PHI. 2611 IRBuilder<> PtrBuilder(cast<Instruction>(OldPtr)); 2612 2613 Value *NewPtr = getAdjustedAllocaPtr(PtrBuilder, OldPtr->getType()); 2614 // Replace the operands which were using the old pointer. 2615 User::op_iterator OI = PN.op_begin(), OE = PN.op_end(); 2616 for (; OI != OE; ++OI) 2617 if (*OI == OldPtr) 2618 *OI = NewPtr; 2619 2620 DEBUG(dbgs() << " to: " << PN << "\n"); 2621 deleteIfTriviallyDead(OldPtr); 2622 return false; 2623 } 2624 2625 bool visitSelectInst(SelectInst &SI) { 2626 DEBUG(dbgs() << " original: " << SI << "\n"); 2627 IRBuilder<> IRB(&SI); 2628 2629 // Find the operand we need to rewrite here. 2630 bool IsTrueVal = SI.getTrueValue() == OldPtr; 2631 if (IsTrueVal) 2632 assert(SI.getFalseValue() != OldPtr && "Pointer is both operands!"); 2633 else 2634 assert(SI.getFalseValue() == OldPtr && "Pointer isn't an operand!"); 2635 2636 Value *NewPtr = getAdjustedAllocaPtr(IRB, OldPtr->getType()); 2637 SI.setOperand(IsTrueVal ? 1 : 2, NewPtr); 2638 DEBUG(dbgs() << " to: " << SI << "\n"); 2639 deleteIfTriviallyDead(OldPtr); 2640 return false; 2641 } 2642 2643}; 2644} 2645 2646namespace { 2647/// \brief Visitor to rewrite aggregate loads and stores as scalar. 2648/// 2649/// This pass aggressively rewrites all aggregate loads and stores on 2650/// a particular pointer (or any pointer derived from it which we can identify) 2651/// with scalar loads and stores. 2652class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> { 2653 // Befriend the base class so it can delegate to private visit methods. 2654 friend class llvm::InstVisitor<AggLoadStoreRewriter, bool>; 2655 2656 const TargetData &TD; 2657 2658 /// Queue of pointer uses to analyze and potentially rewrite. 2659 SmallVector<Use *, 8> Queue; 2660 2661 /// Set to prevent us from cycling with phi nodes and loops. 2662 SmallPtrSet<User *, 8> Visited; 2663 2664 /// The current pointer use being rewritten. This is used to dig up the used 2665 /// value (as opposed to the user). 2666 Use *U; 2667 2668public: 2669 AggLoadStoreRewriter(const TargetData &TD) : TD(TD) {} 2670 2671 /// Rewrite loads and stores through a pointer and all pointers derived from 2672 /// it. 2673 bool rewrite(Instruction &I) { 2674 DEBUG(dbgs() << " Rewriting FCA loads and stores...\n"); 2675 enqueueUsers(I); 2676 bool Changed = false; 2677 while (!Queue.empty()) { 2678 U = Queue.pop_back_val(); 2679 Changed |= visit(cast<Instruction>(U->getUser())); 2680 } 2681 return Changed; 2682 } 2683 2684private: 2685 /// Enqueue all the users of the given instruction for further processing. 2686 /// This uses a set to de-duplicate users. 2687 void enqueueUsers(Instruction &I) { 2688 for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE; 2689 ++UI) 2690 if (Visited.insert(*UI)) 2691 Queue.push_back(&UI.getUse()); 2692 } 2693 2694 // Conservative default is to not rewrite anything. 2695 bool visitInstruction(Instruction &I) { return false; } 2696 2697 /// \brief Generic recursive split emission class. 2698 template <typename Derived> 2699 class OpSplitter { 2700 protected: 2701 /// The builder used to form new instructions. 2702 IRBuilder<> IRB; 2703 /// The indices which to be used with insert- or extractvalue to select the 2704 /// appropriate value within the aggregate. 2705 SmallVector<unsigned, 4> Indices; 2706 /// The indices to a GEP instruction which will move Ptr to the correct slot 2707 /// within the aggregate. 2708 SmallVector<Value *, 4> GEPIndices; 2709 /// The base pointer of the original op, used as a base for GEPing the 2710 /// split operations. 2711 Value *Ptr; 2712 2713 /// Initialize the splitter with an insertion point, Ptr and start with a 2714 /// single zero GEP index. 2715 OpSplitter(Instruction *InsertionPoint, Value *Ptr) 2716 : IRB(InsertionPoint), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr) {} 2717 2718 public: 2719 /// \brief Generic recursive split emission routine. 2720 /// 2721 /// This method recursively splits an aggregate op (load or store) into 2722 /// scalar or vector ops. It splits recursively until it hits a single value 2723 /// and emits that single value operation via the template argument. 2724 /// 2725 /// The logic of this routine relies on GEPs and insertvalue and 2726 /// extractvalue all operating with the same fundamental index list, merely 2727 /// formatted differently (GEPs need actual values). 2728 /// 2729 /// \param Ty The type being split recursively into smaller ops. 2730 /// \param Agg The aggregate value being built up or stored, depending on 2731 /// whether this is splitting a load or a store respectively. 2732 void emitSplitOps(Type *Ty, Value *&Agg, const Twine &Name) { 2733 if (Ty->isSingleValueType()) 2734 return static_cast<Derived *>(this)->emitFunc(Ty, Agg, Name); 2735 2736 if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { 2737 unsigned OldSize = Indices.size(); 2738 (void)OldSize; 2739 for (unsigned Idx = 0, Size = ATy->getNumElements(); Idx != Size; 2740 ++Idx) { 2741 assert(Indices.size() == OldSize && "Did not return to the old size"); 2742 Indices.push_back(Idx); 2743 GEPIndices.push_back(IRB.getInt32(Idx)); 2744 emitSplitOps(ATy->getElementType(), Agg, Name + "." + Twine(Idx)); 2745 GEPIndices.pop_back(); 2746 Indices.pop_back(); 2747 } 2748 return; 2749 } 2750 2751 if (StructType *STy = dyn_cast<StructType>(Ty)) { 2752 unsigned OldSize = Indices.size(); 2753 (void)OldSize; 2754 for (unsigned Idx = 0, Size = STy->getNumElements(); Idx != Size; 2755 ++Idx) { 2756 assert(Indices.size() == OldSize && "Did not return to the old size"); 2757 Indices.push_back(Idx); 2758 GEPIndices.push_back(IRB.getInt32(Idx)); 2759 emitSplitOps(STy->getElementType(Idx), Agg, Name + "." + Twine(Idx)); 2760 GEPIndices.pop_back(); 2761 Indices.pop_back(); 2762 } 2763 return; 2764 } 2765 2766 llvm_unreachable("Only arrays and structs are aggregate loadable types"); 2767 } 2768 }; 2769 2770 struct LoadOpSplitter : public OpSplitter<LoadOpSplitter> { 2771 LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr) 2772 : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr) {} 2773 2774 /// Emit a leaf load of a single value. This is called at the leaves of the 2775 /// recursive emission to actually load values. 2776 void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) { 2777 assert(Ty->isSingleValueType()); 2778 // Load the single value and insert it using the indices. 2779 Value *Load = IRB.CreateLoad(IRB.CreateInBoundsGEP(Ptr, GEPIndices, 2780 Name + ".gep"), 2781 Name + ".load"); 2782 Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert"); 2783 DEBUG(dbgs() << " to: " << *Load << "\n"); 2784 } 2785 }; 2786 2787 bool visitLoadInst(LoadInst &LI) { 2788 assert(LI.getPointerOperand() == *U); 2789 if (!LI.isSimple() || LI.getType()->isSingleValueType()) 2790 return false; 2791 2792 // We have an aggregate being loaded, split it apart. 2793 DEBUG(dbgs() << " original: " << LI << "\n"); 2794 LoadOpSplitter Splitter(&LI, *U); 2795 Value *V = UndefValue::get(LI.getType()); 2796 Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca"); 2797 LI.replaceAllUsesWith(V); 2798 LI.eraseFromParent(); 2799 return true; 2800 } 2801 2802 struct StoreOpSplitter : public OpSplitter<StoreOpSplitter> { 2803 StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr) 2804 : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr) {} 2805 2806 /// Emit a leaf store of a single value. This is called at the leaves of the 2807 /// recursive emission to actually produce stores. 2808 void emitFunc(Type *Ty, Value *&Agg, const Twine &Name) { 2809 assert(Ty->isSingleValueType()); 2810 // Extract the single value and store it using the indices. 2811 Value *Store = IRB.CreateStore( 2812 IRB.CreateExtractValue(Agg, Indices, Name + ".extract"), 2813 IRB.CreateInBoundsGEP(Ptr, GEPIndices, Name + ".gep")); 2814 (void)Store; 2815 DEBUG(dbgs() << " to: " << *Store << "\n"); 2816 } 2817 }; 2818 2819 bool visitStoreInst(StoreInst &SI) { 2820 if (!SI.isSimple() || SI.getPointerOperand() != *U) 2821 return false; 2822 Value *V = SI.getValueOperand(); 2823 if (V->getType()->isSingleValueType()) 2824 return false; 2825 2826 // We have an aggregate being stored, split it apart. 2827 DEBUG(dbgs() << " original: " << SI << "\n"); 2828 StoreOpSplitter Splitter(&SI, *U); 2829 Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca"); 2830 SI.eraseFromParent(); 2831 return true; 2832 } 2833 2834 bool visitBitCastInst(BitCastInst &BC) { 2835 enqueueUsers(BC); 2836 return false; 2837 } 2838 2839 bool visitGetElementPtrInst(GetElementPtrInst &GEPI) { 2840 enqueueUsers(GEPI); 2841 return false; 2842 } 2843 2844 bool visitPHINode(PHINode &PN) { 2845 enqueueUsers(PN); 2846 return false; 2847 } 2848 2849 bool visitSelectInst(SelectInst &SI) { 2850 enqueueUsers(SI); 2851 return false; 2852 } 2853}; 2854} 2855 2856/// \brief Try to find a partition of the aggregate type passed in for a given 2857/// offset and size. 2858/// 2859/// This recurses through the aggregate type and tries to compute a subtype 2860/// based on the offset and size. When the offset and size span a sub-section 2861/// of an array, it will even compute a new array type for that sub-section, 2862/// and the same for structs. 2863/// 2864/// Note that this routine is very strict and tries to find a partition of the 2865/// type which produces the *exact* right offset and size. It is not forgiving 2866/// when the size or offset cause either end of type-based partition to be off. 2867/// Also, this is a best-effort routine. It is reasonable to give up and not 2868/// return a type if necessary. 2869static Type *getTypePartition(const TargetData &TD, Type *Ty, 2870 uint64_t Offset, uint64_t Size) { 2871 if (Offset == 0 && TD.getTypeAllocSize(Ty) == Size) 2872 return Ty; 2873 2874 if (SequentialType *SeqTy = dyn_cast<SequentialType>(Ty)) { 2875 // We can't partition pointers... 2876 if (SeqTy->isPointerTy()) 2877 return 0; 2878 2879 Type *ElementTy = SeqTy->getElementType(); 2880 uint64_t ElementSize = TD.getTypeAllocSize(ElementTy); 2881 uint64_t NumSkippedElements = Offset / ElementSize; 2882 if (ArrayType *ArrTy = dyn_cast<ArrayType>(SeqTy)) 2883 if (NumSkippedElements >= ArrTy->getNumElements()) 2884 return 0; 2885 if (VectorType *VecTy = dyn_cast<VectorType>(SeqTy)) 2886 if (NumSkippedElements >= VecTy->getNumElements()) 2887 return 0; 2888 Offset -= NumSkippedElements * ElementSize; 2889 2890 // First check if we need to recurse. 2891 if (Offset > 0 || Size < ElementSize) { 2892 // Bail if the partition ends in a different array element. 2893 if ((Offset + Size) > ElementSize) 2894 return 0; 2895 // Recurse through the element type trying to peel off offset bytes. 2896 return getTypePartition(TD, ElementTy, Offset, Size); 2897 } 2898 assert(Offset == 0); 2899 2900 if (Size == ElementSize) 2901 return ElementTy; 2902 assert(Size > ElementSize); 2903 uint64_t NumElements = Size / ElementSize; 2904 if (NumElements * ElementSize != Size) 2905 return 0; 2906 return ArrayType::get(ElementTy, NumElements); 2907 } 2908 2909 StructType *STy = dyn_cast<StructType>(Ty); 2910 if (!STy) 2911 return 0; 2912 2913 const StructLayout *SL = TD.getStructLayout(STy); 2914 if (Offset >= SL->getSizeInBytes()) 2915 return 0; 2916 uint64_t EndOffset = Offset + Size; 2917 if (EndOffset > SL->getSizeInBytes()) 2918 return 0; 2919 2920 unsigned Index = SL->getElementContainingOffset(Offset); 2921 Offset -= SL->getElementOffset(Index); 2922 2923 Type *ElementTy = STy->getElementType(Index); 2924 uint64_t ElementSize = TD.getTypeAllocSize(ElementTy); 2925 if (Offset >= ElementSize) 2926 return 0; // The offset points into alignment padding. 2927 2928 // See if any partition must be contained by the element. 2929 if (Offset > 0 || Size < ElementSize) { 2930 if ((Offset + Size) > ElementSize) 2931 return 0; 2932 return getTypePartition(TD, ElementTy, Offset, Size); 2933 } 2934 assert(Offset == 0); 2935 2936 if (Size == ElementSize) 2937 return ElementTy; 2938 2939 StructType::element_iterator EI = STy->element_begin() + Index, 2940 EE = STy->element_end(); 2941 if (EndOffset < SL->getSizeInBytes()) { 2942 unsigned EndIndex = SL->getElementContainingOffset(EndOffset); 2943 if (Index == EndIndex) 2944 return 0; // Within a single element and its padding. 2945 2946 // Don't try to form "natural" types if the elements don't line up with the 2947 // expected size. 2948 // FIXME: We could potentially recurse down through the last element in the 2949 // sub-struct to find a natural end point. 2950 if (SL->getElementOffset(EndIndex) != EndOffset) 2951 return 0; 2952 2953 assert(Index < EndIndex); 2954 EE = STy->element_begin() + EndIndex; 2955 } 2956 2957 // Try to build up a sub-structure. 2958 SmallVector<Type *, 4> ElementTys; 2959 do { 2960 ElementTys.push_back(*EI++); 2961 } while (EI != EE); 2962 StructType *SubTy = StructType::get(STy->getContext(), ElementTys, 2963 STy->isPacked()); 2964 const StructLayout *SubSL = TD.getStructLayout(SubTy); 2965 if (Size != SubSL->getSizeInBytes()) 2966 return 0; // The sub-struct doesn't have quite the size needed. 2967 2968 return SubTy; 2969} 2970 2971/// \brief Rewrite an alloca partition's users. 2972/// 2973/// This routine drives both of the rewriting goals of the SROA pass. It tries 2974/// to rewrite uses of an alloca partition to be conducive for SSA value 2975/// promotion. If the partition needs a new, more refined alloca, this will 2976/// build that new alloca, preserving as much type information as possible, and 2977/// rewrite the uses of the old alloca to point at the new one and have the 2978/// appropriate new offsets. It also evaluates how successful the rewrite was 2979/// at enabling promotion and if it was successful queues the alloca to be 2980/// promoted. 2981bool SROA::rewriteAllocaPartition(AllocaInst &AI, 2982 AllocaPartitioning &P, 2983 AllocaPartitioning::iterator PI) { 2984 uint64_t AllocaSize = PI->EndOffset - PI->BeginOffset; 2985 if (P.use_begin(PI) == P.use_end(PI)) 2986 return false; // No live uses left of this partition. 2987 2988 DEBUG(dbgs() << "Speculating PHIs and selects in partition " 2989 << "[" << PI->BeginOffset << "," << PI->EndOffset << ")\n"); 2990 2991 PHIOrSelectSpeculator Speculator(*TD, P, *this); 2992 DEBUG(dbgs() << " speculating "); 2993 DEBUG(P.print(dbgs(), PI, "")); 2994 Speculator.visitUsers(P.use_begin(PI), P.use_end(PI)); 2995 2996 // Try to compute a friendly type for this partition of the alloca. This 2997 // won't always succeed, in which case we fall back to a legal integer type 2998 // or an i8 array of an appropriate size. 2999 Type *AllocaTy = 0; 3000 if (Type *PartitionTy = P.getCommonType(PI)) 3001 if (TD->getTypeAllocSize(PartitionTy) >= AllocaSize) 3002 AllocaTy = PartitionTy; 3003 if (!AllocaTy) 3004 if (Type *PartitionTy = getTypePartition(*TD, AI.getAllocatedType(), 3005 PI->BeginOffset, AllocaSize)) 3006 AllocaTy = PartitionTy; 3007 if ((!AllocaTy || 3008 (AllocaTy->isArrayTy() && 3009 AllocaTy->getArrayElementType()->isIntegerTy())) && 3010 TD->isLegalInteger(AllocaSize * 8)) 3011 AllocaTy = Type::getIntNTy(*C, AllocaSize * 8); 3012 if (!AllocaTy) 3013 AllocaTy = ArrayType::get(Type::getInt8Ty(*C), AllocaSize); 3014 assert(TD->getTypeAllocSize(AllocaTy) >= AllocaSize); 3015 3016 // Check for the case where we're going to rewrite to a new alloca of the 3017 // exact same type as the original, and with the same access offsets. In that 3018 // case, re-use the existing alloca, but still run through the rewriter to 3019 // performe phi and select speculation. 3020 AllocaInst *NewAI; 3021 if (AllocaTy == AI.getAllocatedType()) { 3022 assert(PI->BeginOffset == 0 && 3023 "Non-zero begin offset but same alloca type"); 3024 assert(PI == P.begin() && "Begin offset is zero on later partition"); 3025 NewAI = &AI; 3026 } else { 3027 unsigned Alignment = AI.getAlignment(); 3028 if (!Alignment) { 3029 // The minimum alignment which users can rely on when the explicit 3030 // alignment is omitted or zero is that required by the ABI for this 3031 // type. 3032 Alignment = TD->getABITypeAlignment(AI.getAllocatedType()); 3033 } 3034 Alignment = MinAlign(Alignment, PI->BeginOffset); 3035 // If we will get at least this much alignment from the type alone, leave 3036 // the alloca's alignment unconstrained. 3037 if (Alignment <= TD->getABITypeAlignment(AllocaTy)) 3038 Alignment = 0; 3039 NewAI = new AllocaInst(AllocaTy, 0, Alignment, 3040 AI.getName() + ".sroa." + Twine(PI - P.begin()), 3041 &AI); 3042 ++NumNewAllocas; 3043 } 3044 3045 DEBUG(dbgs() << "Rewriting alloca partition " 3046 << "[" << PI->BeginOffset << "," << PI->EndOffset << ") to: " 3047 << *NewAI << "\n"); 3048 3049 AllocaPartitionRewriter Rewriter(*TD, P, PI, *this, AI, *NewAI, 3050 PI->BeginOffset, PI->EndOffset); 3051 DEBUG(dbgs() << " rewriting "); 3052 DEBUG(P.print(dbgs(), PI, "")); 3053 if (Rewriter.visitUsers(P.use_begin(PI), P.use_end(PI))) { 3054 DEBUG(dbgs() << " and queuing for promotion\n"); 3055 PromotableAllocas.push_back(NewAI); 3056 } else if (NewAI != &AI) { 3057 // If we can't promote the alloca, iterate on it to check for new 3058 // refinements exposed by splitting the current alloca. Don't iterate on an 3059 // alloca which didn't actually change and didn't get promoted. 3060 Worklist.insert(NewAI); 3061 } 3062 return true; 3063} 3064 3065/// \brief Walks the partitioning of an alloca rewriting uses of each partition. 3066bool SROA::splitAlloca(AllocaInst &AI, AllocaPartitioning &P) { 3067 bool Changed = false; 3068 for (AllocaPartitioning::iterator PI = P.begin(), PE = P.end(); PI != PE; 3069 ++PI) 3070 Changed |= rewriteAllocaPartition(AI, P, PI); 3071 3072 return Changed; 3073} 3074 3075/// \brief Analyze an alloca for SROA. 3076/// 3077/// This analyzes the alloca to ensure we can reason about it, builds 3078/// a partitioning of the alloca, and then hands it off to be split and 3079/// rewritten as needed. 3080bool SROA::runOnAlloca(AllocaInst &AI) { 3081 DEBUG(dbgs() << "SROA alloca: " << AI << "\n"); 3082 ++NumAllocasAnalyzed; 3083 3084 // Special case dead allocas, as they're trivial. 3085 if (AI.use_empty()) { 3086 AI.eraseFromParent(); 3087 return true; 3088 } 3089 3090 // Skip alloca forms that this analysis can't handle. 3091 if (AI.isArrayAllocation() || !AI.getAllocatedType()->isSized() || 3092 TD->getTypeAllocSize(AI.getAllocatedType()) == 0) 3093 return false; 3094 3095 // First check if this is a non-aggregate type that we should simply promote. 3096 if (!AI.getAllocatedType()->isAggregateType() && isAllocaPromotable(&AI)) { 3097 DEBUG(dbgs() << " Trivially scalar type, queuing for promotion...\n"); 3098 PromotableAllocas.push_back(&AI); 3099 return false; 3100 } 3101 3102 bool Changed = false; 3103 3104 // First, split any FCA loads and stores touching this alloca to promote 3105 // better splitting and promotion opportunities. 3106 AggLoadStoreRewriter AggRewriter(*TD); 3107 Changed |= AggRewriter.rewrite(AI); 3108 3109 // Build the partition set using a recursive instruction-visiting builder. 3110 AllocaPartitioning P(*TD, AI); 3111 DEBUG(P.print(dbgs())); 3112 if (P.isEscaped()) 3113 return Changed; 3114 3115 // No partitions to split. Leave the dead alloca for a later pass to clean up. 3116 if (P.begin() == P.end()) 3117 return Changed; 3118 3119 // Delete all the dead users of this alloca before splitting and rewriting it. 3120 for (AllocaPartitioning::dead_user_iterator DI = P.dead_user_begin(), 3121 DE = P.dead_user_end(); 3122 DI != DE; ++DI) { 3123 Changed = true; 3124 (*DI)->replaceAllUsesWith(UndefValue::get((*DI)->getType())); 3125 DeadInsts.push_back(*DI); 3126 } 3127 for (AllocaPartitioning::dead_op_iterator DO = P.dead_op_begin(), 3128 DE = P.dead_op_end(); 3129 DO != DE; ++DO) { 3130 Value *OldV = **DO; 3131 // Clobber the use with an undef value. 3132 **DO = UndefValue::get(OldV->getType()); 3133 if (Instruction *OldI = dyn_cast<Instruction>(OldV)) 3134 if (isInstructionTriviallyDead(OldI)) { 3135 Changed = true; 3136 DeadInsts.push_back(OldI); 3137 } 3138 } 3139 3140 return splitAlloca(AI, P) || Changed; 3141} 3142 3143/// \brief Delete the dead instructions accumulated in this run. 3144/// 3145/// Recursively deletes the dead instructions we've accumulated. This is done 3146/// at the very end to maximize locality of the recursive delete and to 3147/// minimize the problems of invalidated instruction pointers as such pointers 3148/// are used heavily in the intermediate stages of the algorithm. 3149/// 3150/// We also record the alloca instructions deleted here so that they aren't 3151/// subsequently handed to mem2reg to promote. 3152void SROA::deleteDeadInstructions(SmallPtrSet<AllocaInst*, 4> &DeletedAllocas) { 3153 DeadSplitInsts.clear(); 3154 while (!DeadInsts.empty()) { 3155 Instruction *I = DeadInsts.pop_back_val(); 3156 DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n"); 3157 3158 for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) 3159 if (Instruction *U = dyn_cast<Instruction>(*OI)) { 3160 // Zero out the operand and see if it becomes trivially dead. 3161 *OI = 0; 3162 if (isInstructionTriviallyDead(U)) 3163 DeadInsts.push_back(U); 3164 } 3165 3166 if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) 3167 DeletedAllocas.insert(AI); 3168 3169 ++NumDeleted; 3170 I->eraseFromParent(); 3171 } 3172} 3173 3174/// \brief Promote the allocas, using the best available technique. 3175/// 3176/// This attempts to promote whatever allocas have been identified as viable in 3177/// the PromotableAllocas list. If that list is empty, there is nothing to do. 3178/// If there is a domtree available, we attempt to promote using the full power 3179/// of mem2reg. Otherwise, we build and use the AllocaPromoter above which is 3180/// based on the SSAUpdater utilities. This function returns whether any 3181/// promotion occured. 3182bool SROA::promoteAllocas(Function &F) { 3183 if (PromotableAllocas.empty()) 3184 return false; 3185 3186 NumPromoted += PromotableAllocas.size(); 3187 3188 if (DT && !ForceSSAUpdater) { 3189 DEBUG(dbgs() << "Promoting allocas with mem2reg...\n"); 3190 PromoteMemToReg(PromotableAllocas, *DT); 3191 PromotableAllocas.clear(); 3192 return true; 3193 } 3194 3195 DEBUG(dbgs() << "Promoting allocas with SSAUpdater...\n"); 3196 SSAUpdater SSA; 3197 DIBuilder DIB(*F.getParent()); 3198 SmallVector<Instruction*, 64> Insts; 3199 3200 for (unsigned Idx = 0, Size = PromotableAllocas.size(); Idx != Size; ++Idx) { 3201 AllocaInst *AI = PromotableAllocas[Idx]; 3202 for (Value::use_iterator UI = AI->use_begin(), UE = AI->use_end(); 3203 UI != UE;) { 3204 Instruction *I = cast<Instruction>(*UI++); 3205 // FIXME: Currently the SSAUpdater infrastructure doesn't reason about 3206 // lifetime intrinsics and so we strip them (and the bitcasts+GEPs 3207 // leading to them) here. Eventually it should use them to optimize the 3208 // scalar values produced. 3209 if (isa<BitCastInst>(I) || isa<GetElementPtrInst>(I)) { 3210 assert(onlyUsedByLifetimeMarkers(I) && 3211 "Found a bitcast used outside of a lifetime marker."); 3212 while (!I->use_empty()) 3213 cast<Instruction>(*I->use_begin())->eraseFromParent(); 3214 I->eraseFromParent(); 3215 continue; 3216 } 3217 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 3218 assert(II->getIntrinsicID() == Intrinsic::lifetime_start || 3219 II->getIntrinsicID() == Intrinsic::lifetime_end); 3220 II->eraseFromParent(); 3221 continue; 3222 } 3223 3224 Insts.push_back(I); 3225 } 3226 AllocaPromoter(Insts, SSA, *AI, DIB).run(Insts); 3227 Insts.clear(); 3228 } 3229 3230 PromotableAllocas.clear(); 3231 return true; 3232} 3233 3234namespace { 3235 /// \brief A predicate to test whether an alloca belongs to a set. 3236 class IsAllocaInSet { 3237 typedef SmallPtrSet<AllocaInst *, 4> SetType; 3238 const SetType &Set; 3239 3240 public: 3241 IsAllocaInSet(const SetType &Set) : Set(Set) {} 3242 bool operator()(AllocaInst *AI) { return Set.count(AI); } 3243 }; 3244} 3245 3246bool SROA::runOnFunction(Function &F) { 3247 DEBUG(dbgs() << "SROA function: " << F.getName() << "\n"); 3248 C = &F.getContext(); 3249 TD = getAnalysisIfAvailable<TargetData>(); 3250 if (!TD) { 3251 DEBUG(dbgs() << " Skipping SROA -- no target data!\n"); 3252 return false; 3253 } 3254 DT = getAnalysisIfAvailable<DominatorTree>(); 3255 3256 BasicBlock &EntryBB = F.getEntryBlock(); 3257 for (BasicBlock::iterator I = EntryBB.begin(), E = llvm::prior(EntryBB.end()); 3258 I != E; ++I) 3259 if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) 3260 Worklist.insert(AI); 3261 3262 bool Changed = false; 3263 // A set of deleted alloca instruction pointers which should be removed from 3264 // the list of promotable allocas. 3265 SmallPtrSet<AllocaInst *, 4> DeletedAllocas; 3266 3267 while (!Worklist.empty()) { 3268 Changed |= runOnAlloca(*Worklist.pop_back_val()); 3269 deleteDeadInstructions(DeletedAllocas); 3270 if (!DeletedAllocas.empty()) { 3271 PromotableAllocas.erase(std::remove_if(PromotableAllocas.begin(), 3272 PromotableAllocas.end(), 3273 IsAllocaInSet(DeletedAllocas)), 3274 PromotableAllocas.end()); 3275 DeletedAllocas.clear(); 3276 } 3277 } 3278 3279 Changed |= promoteAllocas(F); 3280 3281 return Changed; 3282} 3283 3284void SROA::getAnalysisUsage(AnalysisUsage &AU) const { 3285 if (RequiresDomTree) 3286 AU.addRequired<DominatorTree>(); 3287 AU.setPreservesCFG(); 3288} 3289