PromoteMemoryToRegister.cpp revision 218893
1//===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file promotes memory references to be register references. It promotes 11// alloca instructions which only have loads and stores as uses. An alloca is 12// transformed by using iterated dominator frontiers to place PHI nodes, then 13// traversing the function in depth-first order to rewrite loads and stores as 14// appropriate. 15// 16// The algorithm used here is based on: 17// 18// Sreedhar and Gao. A linear time algorithm for placing phi-nodes. 19// In Proceedings of the 22nd ACM SIGPLAN-SIGACT Symposium on Principles of 20// Programming Languages 21// POPL '95. ACM, New York, NY, 62-73. 22// 23// It has been modified to not explicitly use the DJ graph data structure and to 24// directly compute pruned SSA using per-variable liveness information. 25// 26//===----------------------------------------------------------------------===// 27 28#define DEBUG_TYPE "mem2reg" 29#include "llvm/Transforms/Utils/PromoteMemToReg.h" 30#include "llvm/Constants.h" 31#include "llvm/DerivedTypes.h" 32#include "llvm/Function.h" 33#include "llvm/Instructions.h" 34#include "llvm/IntrinsicInst.h" 35#include "llvm/Metadata.h" 36#include "llvm/Analysis/AliasSetTracker.h" 37#include "llvm/Analysis/DebugInfo.h" 38#include "llvm/Analysis/Dominators.h" 39#include "llvm/Analysis/InstructionSimplify.h" 40#include "llvm/ADT/DenseMap.h" 41#include "llvm/ADT/SmallPtrSet.h" 42#include "llvm/ADT/SmallVector.h" 43#include "llvm/ADT/Statistic.h" 44#include "llvm/ADT/STLExtras.h" 45#include "llvm/Support/CFG.h" 46#include <algorithm> 47#include <map> 48#include <queue> 49using namespace llvm; 50 51STATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block"); 52STATISTIC(NumSingleStore, "Number of alloca's promoted with a single store"); 53STATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); 54STATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); 55 56namespace llvm { 57template<> 58struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > { 59 typedef std::pair<BasicBlock*, unsigned> EltTy; 60 static inline EltTy getEmptyKey() { 61 return EltTy(reinterpret_cast<BasicBlock*>(-1), ~0U); 62 } 63 static inline EltTy getTombstoneKey() { 64 return EltTy(reinterpret_cast<BasicBlock*>(-2), 0U); 65 } 66 static unsigned getHashValue(const std::pair<BasicBlock*, unsigned> &Val) { 67 return DenseMapInfo<void*>::getHashValue(Val.first) + Val.second*2; 68 } 69 static bool isEqual(const EltTy &LHS, const EltTy &RHS) { 70 return LHS == RHS; 71 } 72}; 73} 74 75/// isAllocaPromotable - Return true if this alloca is legal for promotion. 76/// This is true if there are only loads and stores to the alloca. 77/// 78bool llvm::isAllocaPromotable(const AllocaInst *AI) { 79 // FIXME: If the memory unit is of pointer or integer type, we can permit 80 // assignments to subsections of the memory unit. 81 82 // Only allow direct and non-volatile loads and stores... 83 for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end(); 84 UI != UE; ++UI) { // Loop over all of the uses of the alloca 85 const User *U = *UI; 86 if (const LoadInst *LI = dyn_cast<LoadInst>(U)) { 87 if (LI->isVolatile()) 88 return false; 89 } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { 90 if (SI->getOperand(0) == AI) 91 return false; // Don't allow a store OF the AI, only INTO the AI. 92 if (SI->isVolatile()) 93 return false; 94 } else { 95 return false; 96 } 97 } 98 99 return true; 100} 101 102/// FindAllocaDbgDeclare - Finds the llvm.dbg.declare intrinsic describing the 103/// alloca 'V', if any. 104static DbgDeclareInst *FindAllocaDbgDeclare(Value *V) { 105 if (MDNode *DebugNode = MDNode::getIfExists(V->getContext(), &V, 1)) 106 for (Value::use_iterator UI = DebugNode->use_begin(), 107 E = DebugNode->use_end(); UI != E; ++UI) 108 if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI)) 109 return DDI; 110 111 return 0; 112} 113 114namespace { 115 struct AllocaInfo; 116 117 // Data package used by RenamePass() 118 class RenamePassData { 119 public: 120 typedef std::vector<Value *> ValVector; 121 122 RenamePassData() : BB(NULL), Pred(NULL), Values() {} 123 RenamePassData(BasicBlock *B, BasicBlock *P, 124 const ValVector &V) : BB(B), Pred(P), Values(V) {} 125 BasicBlock *BB; 126 BasicBlock *Pred; 127 ValVector Values; 128 129 void swap(RenamePassData &RHS) { 130 std::swap(BB, RHS.BB); 131 std::swap(Pred, RHS.Pred); 132 Values.swap(RHS.Values); 133 } 134 }; 135 136 /// LargeBlockInfo - This assigns and keeps a per-bb relative ordering of 137 /// load/store instructions in the block that directly load or store an alloca. 138 /// 139 /// This functionality is important because it avoids scanning large basic 140 /// blocks multiple times when promoting many allocas in the same block. 141 class LargeBlockInfo { 142 /// InstNumbers - For each instruction that we track, keep the index of the 143 /// instruction. The index starts out as the number of the instruction from 144 /// the start of the block. 145 DenseMap<const Instruction *, unsigned> InstNumbers; 146 public: 147 148 /// isInterestingInstruction - This code only looks at accesses to allocas. 149 static bool isInterestingInstruction(const Instruction *I) { 150 return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) || 151 (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1))); 152 } 153 154 /// getInstructionIndex - Get or calculate the index of the specified 155 /// instruction. 156 unsigned getInstructionIndex(const Instruction *I) { 157 assert(isInterestingInstruction(I) && 158 "Not a load/store to/from an alloca?"); 159 160 // If we already have this instruction number, return it. 161 DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I); 162 if (It != InstNumbers.end()) return It->second; 163 164 // Scan the whole block to get the instruction. This accumulates 165 // information for every interesting instruction in the block, in order to 166 // avoid gratuitus rescans. 167 const BasicBlock *BB = I->getParent(); 168 unsigned InstNo = 0; 169 for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end(); 170 BBI != E; ++BBI) 171 if (isInterestingInstruction(BBI)) 172 InstNumbers[BBI] = InstNo++; 173 It = InstNumbers.find(I); 174 175 assert(It != InstNumbers.end() && "Didn't insert instruction?"); 176 return It->second; 177 } 178 179 void deleteValue(const Instruction *I) { 180 InstNumbers.erase(I); 181 } 182 183 void clear() { 184 InstNumbers.clear(); 185 } 186 }; 187 188 struct PromoteMem2Reg { 189 /// Allocas - The alloca instructions being promoted. 190 /// 191 std::vector<AllocaInst*> Allocas; 192 DominatorTree &DT; 193 DIFactory *DIF; 194 195 /// AST - An AliasSetTracker object to update. If null, don't update it. 196 /// 197 AliasSetTracker *AST; 198 199 /// AllocaLookup - Reverse mapping of Allocas. 200 /// 201 DenseMap<AllocaInst*, unsigned> AllocaLookup; 202 203 /// NewPhiNodes - The PhiNodes we're adding. 204 /// 205 DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*> NewPhiNodes; 206 207 /// PhiToAllocaMap - For each PHI node, keep track of which entry in Allocas 208 /// it corresponds to. 209 DenseMap<PHINode*, unsigned> PhiToAllocaMap; 210 211 /// PointerAllocaValues - If we are updating an AliasSetTracker, then for 212 /// each alloca that is of pointer type, we keep track of what to copyValue 213 /// to the inserted PHI nodes here. 214 /// 215 std::vector<Value*> PointerAllocaValues; 216 217 /// AllocaDbgDeclares - For each alloca, we keep track of the dbg.declare 218 /// intrinsic that describes it, if any, so that we can convert it to a 219 /// dbg.value intrinsic if the alloca gets promoted. 220 SmallVector<DbgDeclareInst*, 8> AllocaDbgDeclares; 221 222 /// Visited - The set of basic blocks the renamer has already visited. 223 /// 224 SmallPtrSet<BasicBlock*, 16> Visited; 225 226 /// BBNumbers - Contains a stable numbering of basic blocks to avoid 227 /// non-determinstic behavior. 228 DenseMap<BasicBlock*, unsigned> BBNumbers; 229 230 /// DomLevels - Maps DomTreeNodes to their level in the dominator tree. 231 DenseMap<DomTreeNode*, unsigned> DomLevels; 232 233 /// BBNumPreds - Lazily compute the number of predecessors a block has. 234 DenseMap<const BasicBlock*, unsigned> BBNumPreds; 235 public: 236 PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt, 237 AliasSetTracker *ast) 238 : Allocas(A), DT(dt), DIF(0), AST(ast) {} 239 ~PromoteMem2Reg() { 240 delete DIF; 241 } 242 243 void run(); 244 245 /// dominates - Return true if BB1 dominates BB2 using the DominatorTree. 246 /// 247 bool dominates(BasicBlock *BB1, BasicBlock *BB2) const { 248 return DT.dominates(BB1, BB2); 249 } 250 251 private: 252 void RemoveFromAllocasList(unsigned &AllocaIdx) { 253 Allocas[AllocaIdx] = Allocas.back(); 254 Allocas.pop_back(); 255 --AllocaIdx; 256 } 257 258 unsigned getNumPreds(const BasicBlock *BB) { 259 unsigned &NP = BBNumPreds[BB]; 260 if (NP == 0) 261 NP = std::distance(pred_begin(BB), pred_end(BB))+1; 262 return NP-1; 263 } 264 265 void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, 266 AllocaInfo &Info); 267 void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, 268 const SmallPtrSet<BasicBlock*, 32> &DefBlocks, 269 SmallPtrSet<BasicBlock*, 32> &LiveInBlocks); 270 271 void RewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, 272 LargeBlockInfo &LBI); 273 void PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, 274 LargeBlockInfo &LBI); 275 void ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, StoreInst *SI); 276 277 278 void RenamePass(BasicBlock *BB, BasicBlock *Pred, 279 RenamePassData::ValVector &IncVals, 280 std::vector<RenamePassData> &Worklist); 281 bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version); 282 }; 283 284 struct AllocaInfo { 285 SmallVector<BasicBlock*, 32> DefiningBlocks; 286 SmallVector<BasicBlock*, 32> UsingBlocks; 287 288 StoreInst *OnlyStore; 289 BasicBlock *OnlyBlock; 290 bool OnlyUsedInOneBlock; 291 292 Value *AllocaPointerVal; 293 DbgDeclareInst *DbgDeclare; 294 295 void clear() { 296 DefiningBlocks.clear(); 297 UsingBlocks.clear(); 298 OnlyStore = 0; 299 OnlyBlock = 0; 300 OnlyUsedInOneBlock = true; 301 AllocaPointerVal = 0; 302 DbgDeclare = 0; 303 } 304 305 /// AnalyzeAlloca - Scan the uses of the specified alloca, filling in our 306 /// ivars. 307 void AnalyzeAlloca(AllocaInst *AI) { 308 clear(); 309 310 // As we scan the uses of the alloca instruction, keep track of stores, 311 // and decide whether all of the loads and stores to the alloca are within 312 // the same basic block. 313 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); 314 UI != E;) { 315 Instruction *User = cast<Instruction>(*UI++); 316 317 if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 318 // Remember the basic blocks which define new values for the alloca 319 DefiningBlocks.push_back(SI->getParent()); 320 AllocaPointerVal = SI->getOperand(0); 321 OnlyStore = SI; 322 } else { 323 LoadInst *LI = cast<LoadInst>(User); 324 // Otherwise it must be a load instruction, keep track of variable 325 // reads. 326 UsingBlocks.push_back(LI->getParent()); 327 AllocaPointerVal = LI; 328 } 329 330 if (OnlyUsedInOneBlock) { 331 if (OnlyBlock == 0) 332 OnlyBlock = User->getParent(); 333 else if (OnlyBlock != User->getParent()) 334 OnlyUsedInOneBlock = false; 335 } 336 } 337 338 DbgDeclare = FindAllocaDbgDeclare(AI); 339 } 340 }; 341 342 typedef std::pair<DomTreeNode*, unsigned> DomTreeNodePair; 343 344 struct DomTreeNodeCompare { 345 bool operator()(const DomTreeNodePair &LHS, const DomTreeNodePair &RHS) { 346 return LHS.second < RHS.second; 347 } 348 }; 349} // end of anonymous namespace 350 351 352void PromoteMem2Reg::run() { 353 Function &F = *DT.getRoot()->getParent(); 354 355 if (AST) PointerAllocaValues.resize(Allocas.size()); 356 AllocaDbgDeclares.resize(Allocas.size()); 357 358 AllocaInfo Info; 359 LargeBlockInfo LBI; 360 361 for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { 362 AllocaInst *AI = Allocas[AllocaNum]; 363 364 assert(isAllocaPromotable(AI) && 365 "Cannot promote non-promotable alloca!"); 366 assert(AI->getParent()->getParent() == &F && 367 "All allocas should be in the same function, which is same as DF!"); 368 369 if (AI->use_empty()) { 370 // If there are no uses of the alloca, just delete it now. 371 if (AST) AST->deleteValue(AI); 372 AI->eraseFromParent(); 373 374 // Remove the alloca from the Allocas list, since it has been processed 375 RemoveFromAllocasList(AllocaNum); 376 ++NumDeadAlloca; 377 continue; 378 } 379 380 // Calculate the set of read and write-locations for each alloca. This is 381 // analogous to finding the 'uses' and 'definitions' of each variable. 382 Info.AnalyzeAlloca(AI); 383 384 // If there is only a single store to this value, replace any loads of 385 // it that are directly dominated by the definition with the value stored. 386 if (Info.DefiningBlocks.size() == 1) { 387 RewriteSingleStoreAlloca(AI, Info, LBI); 388 389 // Finally, after the scan, check to see if the store is all that is left. 390 if (Info.UsingBlocks.empty()) { 391 // Record debuginfo for the store and remove the declaration's debuginfo. 392 if (DbgDeclareInst *DDI = Info.DbgDeclare) { 393 ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore); 394 DDI->eraseFromParent(); 395 } 396 // Remove the (now dead) store and alloca. 397 Info.OnlyStore->eraseFromParent(); 398 LBI.deleteValue(Info.OnlyStore); 399 400 if (AST) AST->deleteValue(AI); 401 AI->eraseFromParent(); 402 LBI.deleteValue(AI); 403 404 // The alloca has been processed, move on. 405 RemoveFromAllocasList(AllocaNum); 406 407 ++NumSingleStore; 408 continue; 409 } 410 } 411 412 // If the alloca is only read and written in one basic block, just perform a 413 // linear sweep over the block to eliminate it. 414 if (Info.OnlyUsedInOneBlock) { 415 PromoteSingleBlockAlloca(AI, Info, LBI); 416 417 // Finally, after the scan, check to see if the stores are all that is 418 // left. 419 if (Info.UsingBlocks.empty()) { 420 421 // Remove the (now dead) stores and alloca. 422 while (!AI->use_empty()) { 423 StoreInst *SI = cast<StoreInst>(AI->use_back()); 424 // Record debuginfo for the store before removing it. 425 if (DbgDeclareInst *DDI = Info.DbgDeclare) 426 ConvertDebugDeclareToDebugValue(DDI, SI); 427 SI->eraseFromParent(); 428 LBI.deleteValue(SI); 429 } 430 431 if (AST) AST->deleteValue(AI); 432 AI->eraseFromParent(); 433 LBI.deleteValue(AI); 434 435 // The alloca has been processed, move on. 436 RemoveFromAllocasList(AllocaNum); 437 438 // The alloca's debuginfo can be removed as well. 439 if (DbgDeclareInst *DDI = Info.DbgDeclare) 440 DDI->eraseFromParent(); 441 442 ++NumLocalPromoted; 443 continue; 444 } 445 } 446 447 // If we haven't computed dominator tree levels, do so now. 448 if (DomLevels.empty()) { 449 SmallVector<DomTreeNode*, 32> Worklist; 450 451 DomTreeNode *Root = DT.getRootNode(); 452 DomLevels[Root] = 0; 453 Worklist.push_back(Root); 454 455 while (!Worklist.empty()) { 456 DomTreeNode *Node = Worklist.pop_back_val(); 457 unsigned ChildLevel = DomLevels[Node] + 1; 458 for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); 459 CI != CE; ++CI) { 460 DomLevels[*CI] = ChildLevel; 461 Worklist.push_back(*CI); 462 } 463 } 464 } 465 466 // If we haven't computed a numbering for the BB's in the function, do so 467 // now. 468 if (BBNumbers.empty()) { 469 unsigned ID = 0; 470 for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) 471 BBNumbers[I] = ID++; 472 } 473 474 // If we have an AST to keep updated, remember some pointer value that is 475 // stored into the alloca. 476 if (AST) 477 PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal; 478 479 // Remember the dbg.declare intrinsic describing this alloca, if any. 480 if (Info.DbgDeclare) AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare; 481 482 // Keep the reverse mapping of the 'Allocas' array for the rename pass. 483 AllocaLookup[Allocas[AllocaNum]] = AllocaNum; 484 485 // At this point, we're committed to promoting the alloca using IDF's, and 486 // the standard SSA construction algorithm. Determine which blocks need PHI 487 // nodes and see if we can optimize out some work by avoiding insertion of 488 // dead phi nodes. 489 DetermineInsertionPoint(AI, AllocaNum, Info); 490 } 491 492 if (Allocas.empty()) 493 return; // All of the allocas must have been trivial! 494 495 LBI.clear(); 496 497 498 // Set the incoming values for the basic block to be null values for all of 499 // the alloca's. We do this in case there is a load of a value that has not 500 // been stored yet. In this case, it will get this null value. 501 // 502 RenamePassData::ValVector Values(Allocas.size()); 503 for (unsigned i = 0, e = Allocas.size(); i != e; ++i) 504 Values[i] = UndefValue::get(Allocas[i]->getAllocatedType()); 505 506 // Walks all basic blocks in the function performing the SSA rename algorithm 507 // and inserting the phi nodes we marked as necessary 508 // 509 std::vector<RenamePassData> RenamePassWorkList; 510 RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values)); 511 do { 512 RenamePassData RPD; 513 RPD.swap(RenamePassWorkList.back()); 514 RenamePassWorkList.pop_back(); 515 // RenamePass may add new worklist entries. 516 RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList); 517 } while (!RenamePassWorkList.empty()); 518 519 // The renamer uses the Visited set to avoid infinite loops. Clear it now. 520 Visited.clear(); 521 522 // Remove the allocas themselves from the function. 523 for (unsigned i = 0, e = Allocas.size(); i != e; ++i) { 524 Instruction *A = Allocas[i]; 525 526 // If there are any uses of the alloca instructions left, they must be in 527 // unreachable basic blocks that were not processed by walking the dominator 528 // tree. Just delete the users now. 529 if (!A->use_empty()) 530 A->replaceAllUsesWith(UndefValue::get(A->getType())); 531 if (AST) AST->deleteValue(A); 532 A->eraseFromParent(); 533 } 534 535 // Remove alloca's dbg.declare instrinsics from the function. 536 for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i) 537 if (DbgDeclareInst *DDI = AllocaDbgDeclares[i]) 538 DDI->eraseFromParent(); 539 540 // Loop over all of the PHI nodes and see if there are any that we can get 541 // rid of because they merge all of the same incoming values. This can 542 // happen due to undef values coming into the PHI nodes. This process is 543 // iterative, because eliminating one PHI node can cause others to be removed. 544 bool EliminatedAPHI = true; 545 while (EliminatedAPHI) { 546 EliminatedAPHI = false; 547 548 for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I = 549 NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) { 550 PHINode *PN = I->second; 551 552 // If this PHI node merges one value and/or undefs, get the value. 553 if (Value *V = SimplifyInstruction(PN, 0, &DT)) { 554 if (AST && PN->getType()->isPointerTy()) 555 AST->deleteValue(PN); 556 PN->replaceAllUsesWith(V); 557 PN->eraseFromParent(); 558 NewPhiNodes.erase(I++); 559 EliminatedAPHI = true; 560 continue; 561 } 562 ++I; 563 } 564 } 565 566 // At this point, the renamer has added entries to PHI nodes for all reachable 567 // code. Unfortunately, there may be unreachable blocks which the renamer 568 // hasn't traversed. If this is the case, the PHI nodes may not 569 // have incoming values for all predecessors. Loop over all PHI nodes we have 570 // created, inserting undef values if they are missing any incoming values. 571 // 572 for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I = 573 NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) { 574 // We want to do this once per basic block. As such, only process a block 575 // when we find the PHI that is the first entry in the block. 576 PHINode *SomePHI = I->second; 577 BasicBlock *BB = SomePHI->getParent(); 578 if (&BB->front() != SomePHI) 579 continue; 580 581 // Only do work here if there the PHI nodes are missing incoming values. We 582 // know that all PHI nodes that were inserted in a block will have the same 583 // number of incoming values, so we can just check any of them. 584 if (SomePHI->getNumIncomingValues() == getNumPreds(BB)) 585 continue; 586 587 // Get the preds for BB. 588 SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB)); 589 590 // Ok, now we know that all of the PHI nodes are missing entries for some 591 // basic blocks. Start by sorting the incoming predecessors for efficient 592 // access. 593 std::sort(Preds.begin(), Preds.end()); 594 595 // Now we loop through all BB's which have entries in SomePHI and remove 596 // them from the Preds list. 597 for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) { 598 // Do a log(n) search of the Preds list for the entry we want. 599 SmallVector<BasicBlock*, 16>::iterator EntIt = 600 std::lower_bound(Preds.begin(), Preds.end(), 601 SomePHI->getIncomingBlock(i)); 602 assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i)&& 603 "PHI node has entry for a block which is not a predecessor!"); 604 605 // Remove the entry 606 Preds.erase(EntIt); 607 } 608 609 // At this point, the blocks left in the preds list must have dummy 610 // entries inserted into every PHI nodes for the block. Update all the phi 611 // nodes in this block that we are inserting (there could be phis before 612 // mem2reg runs). 613 unsigned NumBadPreds = SomePHI->getNumIncomingValues(); 614 BasicBlock::iterator BBI = BB->begin(); 615 while ((SomePHI = dyn_cast<PHINode>(BBI++)) && 616 SomePHI->getNumIncomingValues() == NumBadPreds) { 617 Value *UndefVal = UndefValue::get(SomePHI->getType()); 618 for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred) 619 SomePHI->addIncoming(UndefVal, Preds[pred]); 620 } 621 } 622 623 NewPhiNodes.clear(); 624} 625 626 627/// ComputeLiveInBlocks - Determine which blocks the value is live in. These 628/// are blocks which lead to uses. Knowing this allows us to avoid inserting 629/// PHI nodes into blocks which don't lead to uses (thus, the inserted phi nodes 630/// would be dead). 631void PromoteMem2Reg:: 632ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, 633 const SmallPtrSet<BasicBlock*, 32> &DefBlocks, 634 SmallPtrSet<BasicBlock*, 32> &LiveInBlocks) { 635 636 // To determine liveness, we must iterate through the predecessors of blocks 637 // where the def is live. Blocks are added to the worklist if we need to 638 // check their predecessors. Start with all the using blocks. 639 SmallVector<BasicBlock*, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(), 640 Info.UsingBlocks.end()); 641 642 // If any of the using blocks is also a definition block, check to see if the 643 // definition occurs before or after the use. If it happens before the use, 644 // the value isn't really live-in. 645 for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) { 646 BasicBlock *BB = LiveInBlockWorklist[i]; 647 if (!DefBlocks.count(BB)) continue; 648 649 // Okay, this is a block that both uses and defines the value. If the first 650 // reference to the alloca is a def (store), then we know it isn't live-in. 651 for (BasicBlock::iterator I = BB->begin(); ; ++I) { 652 if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 653 if (SI->getOperand(1) != AI) continue; 654 655 // We found a store to the alloca before a load. The alloca is not 656 // actually live-in here. 657 LiveInBlockWorklist[i] = LiveInBlockWorklist.back(); 658 LiveInBlockWorklist.pop_back(); 659 --i, --e; 660 break; 661 } 662 663 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 664 if (LI->getOperand(0) != AI) continue; 665 666 // Okay, we found a load before a store to the alloca. It is actually 667 // live into this block. 668 break; 669 } 670 } 671 } 672 673 // Now that we have a set of blocks where the phi is live-in, recursively add 674 // their predecessors until we find the full region the value is live. 675 while (!LiveInBlockWorklist.empty()) { 676 BasicBlock *BB = LiveInBlockWorklist.pop_back_val(); 677 678 // The block really is live in here, insert it into the set. If already in 679 // the set, then it has already been processed. 680 if (!LiveInBlocks.insert(BB)) 681 continue; 682 683 // Since the value is live into BB, it is either defined in a predecessor or 684 // live into it to. Add the preds to the worklist unless they are a 685 // defining block. 686 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { 687 BasicBlock *P = *PI; 688 689 // The value is not live into a predecessor if it defines the value. 690 if (DefBlocks.count(P)) 691 continue; 692 693 // Otherwise it is, add to the worklist. 694 LiveInBlockWorklist.push_back(P); 695 } 696 } 697} 698 699/// DetermineInsertionPoint - At this point, we're committed to promoting the 700/// alloca using IDF's, and the standard SSA construction algorithm. Determine 701/// which blocks need phi nodes and see if we can optimize out some work by 702/// avoiding insertion of dead phi nodes. 703void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, 704 AllocaInfo &Info) { 705 // Unique the set of defining blocks for efficient lookup. 706 SmallPtrSet<BasicBlock*, 32> DefBlocks; 707 DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end()); 708 709 // Determine which blocks the value is live in. These are blocks which lead 710 // to uses. 711 SmallPtrSet<BasicBlock*, 32> LiveInBlocks; 712 ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks); 713 714 // Use a priority queue keyed on dominator tree level so that inserted nodes 715 // are handled from the bottom of the dominator tree upwards. 716 typedef std::priority_queue<DomTreeNodePair, SmallVector<DomTreeNodePair, 32>, 717 DomTreeNodeCompare> IDFPriorityQueue; 718 IDFPriorityQueue PQ; 719 720 for (SmallPtrSet<BasicBlock*, 32>::const_iterator I = DefBlocks.begin(), 721 E = DefBlocks.end(); I != E; ++I) { 722 if (DomTreeNode *Node = DT.getNode(*I)) 723 PQ.push(std::make_pair(Node, DomLevels[Node])); 724 } 725 726 SmallVector<std::pair<unsigned, BasicBlock*>, 32> DFBlocks; 727 SmallPtrSet<DomTreeNode*, 32> Visited; 728 SmallVector<DomTreeNode*, 32> Worklist; 729 while (!PQ.empty()) { 730 DomTreeNodePair RootPair = PQ.top(); 731 PQ.pop(); 732 DomTreeNode *Root = RootPair.first; 733 unsigned RootLevel = RootPair.second; 734 735 // Walk all dominator tree children of Root, inspecting their CFG edges with 736 // targets elsewhere on the dominator tree. Only targets whose level is at 737 // most Root's level are added to the iterated dominance frontier of the 738 // definition set. 739 740 Worklist.clear(); 741 Worklist.push_back(Root); 742 743 while (!Worklist.empty()) { 744 DomTreeNode *Node = Worklist.pop_back_val(); 745 BasicBlock *BB = Node->getBlock(); 746 747 for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); SI != SE; 748 ++SI) { 749 DomTreeNode *SuccNode = DT.getNode(*SI); 750 751 // Quickly skip all CFG edges that are also dominator tree edges instead 752 // of catching them below. 753 if (SuccNode->getIDom() == Node) 754 continue; 755 756 unsigned SuccLevel = DomLevels[SuccNode]; 757 if (SuccLevel > RootLevel) 758 continue; 759 760 if (!Visited.insert(SuccNode)) 761 continue; 762 763 BasicBlock *SuccBB = SuccNode->getBlock(); 764 if (!LiveInBlocks.count(SuccBB)) 765 continue; 766 767 DFBlocks.push_back(std::make_pair(BBNumbers[SuccBB], SuccBB)); 768 if (!DefBlocks.count(SuccBB)) 769 PQ.push(std::make_pair(SuccNode, SuccLevel)); 770 } 771 772 for (DomTreeNode::iterator CI = Node->begin(), CE = Node->end(); CI != CE; 773 ++CI) { 774 if (!Visited.count(*CI)) 775 Worklist.push_back(*CI); 776 } 777 } 778 } 779 780 if (DFBlocks.size() > 1) 781 std::sort(DFBlocks.begin(), DFBlocks.end()); 782 783 unsigned CurrentVersion = 0; 784 for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) 785 QueuePhiNode(DFBlocks[i].second, AllocaNum, CurrentVersion); 786} 787 788/// RewriteSingleStoreAlloca - If there is only a single store to this value, 789/// replace any loads of it that are directly dominated by the definition with 790/// the value stored. 791void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI, 792 AllocaInfo &Info, 793 LargeBlockInfo &LBI) { 794 StoreInst *OnlyStore = Info.OnlyStore; 795 bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0)); 796 BasicBlock *StoreBB = OnlyStore->getParent(); 797 int StoreIndex = -1; 798 799 // Clear out UsingBlocks. We will reconstruct it here if needed. 800 Info.UsingBlocks.clear(); 801 802 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ) { 803 Instruction *UserInst = cast<Instruction>(*UI++); 804 if (!isa<LoadInst>(UserInst)) { 805 assert(UserInst == OnlyStore && "Should only have load/stores"); 806 continue; 807 } 808 LoadInst *LI = cast<LoadInst>(UserInst); 809 810 // Okay, if we have a load from the alloca, we want to replace it with the 811 // only value stored to the alloca. We can do this if the value is 812 // dominated by the store. If not, we use the rest of the mem2reg machinery 813 // to insert the phi nodes as needed. 814 if (!StoringGlobalVal) { // Non-instructions are always dominated. 815 if (LI->getParent() == StoreBB) { 816 // If we have a use that is in the same block as the store, compare the 817 // indices of the two instructions to see which one came first. If the 818 // load came before the store, we can't handle it. 819 if (StoreIndex == -1) 820 StoreIndex = LBI.getInstructionIndex(OnlyStore); 821 822 if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) { 823 // Can't handle this load, bail out. 824 Info.UsingBlocks.push_back(StoreBB); 825 continue; 826 } 827 828 } else if (LI->getParent() != StoreBB && 829 !dominates(StoreBB, LI->getParent())) { 830 // If the load and store are in different blocks, use BB dominance to 831 // check their relationships. If the store doesn't dom the use, bail 832 // out. 833 Info.UsingBlocks.push_back(LI->getParent()); 834 continue; 835 } 836 } 837 838 // Otherwise, we *can* safely rewrite this load. 839 Value *ReplVal = OnlyStore->getOperand(0); 840 // If the replacement value is the load, this must occur in unreachable 841 // code. 842 if (ReplVal == LI) 843 ReplVal = UndefValue::get(LI->getType()); 844 LI->replaceAllUsesWith(ReplVal); 845 if (AST && LI->getType()->isPointerTy()) 846 AST->deleteValue(LI); 847 LI->eraseFromParent(); 848 LBI.deleteValue(LI); 849 } 850} 851 852namespace { 853 854/// StoreIndexSearchPredicate - This is a helper predicate used to search by the 855/// first element of a pair. 856struct StoreIndexSearchPredicate { 857 bool operator()(const std::pair<unsigned, StoreInst*> &LHS, 858 const std::pair<unsigned, StoreInst*> &RHS) { 859 return LHS.first < RHS.first; 860 } 861}; 862 863} 864 865/// PromoteSingleBlockAlloca - Many allocas are only used within a single basic 866/// block. If this is the case, avoid traversing the CFG and inserting a lot of 867/// potentially useless PHI nodes by just performing a single linear pass over 868/// the basic block using the Alloca. 869/// 870/// If we cannot promote this alloca (because it is read before it is written), 871/// return true. This is necessary in cases where, due to control flow, the 872/// alloca is potentially undefined on some control flow paths. e.g. code like 873/// this is potentially correct: 874/// 875/// for (...) { if (c) { A = undef; undef = B; } } 876/// 877/// ... so long as A is not used before undef is set. 878/// 879void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, 880 LargeBlockInfo &LBI) { 881 // The trickiest case to handle is when we have large blocks. Because of this, 882 // this code is optimized assuming that large blocks happen. This does not 883 // significantly pessimize the small block case. This uses LargeBlockInfo to 884 // make it efficient to get the index of various operations in the block. 885 886 // Clear out UsingBlocks. We will reconstruct it here if needed. 887 Info.UsingBlocks.clear(); 888 889 // Walk the use-def list of the alloca, getting the locations of all stores. 890 typedef SmallVector<std::pair<unsigned, StoreInst*>, 64> StoresByIndexTy; 891 StoresByIndexTy StoresByIndex; 892 893 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); 894 UI != E; ++UI) 895 if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) 896 StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI)); 897 898 // If there are no stores to the alloca, just replace any loads with undef. 899 if (StoresByIndex.empty()) { 900 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) 901 if (LoadInst *LI = dyn_cast<LoadInst>(*UI++)) { 902 LI->replaceAllUsesWith(UndefValue::get(LI->getType())); 903 if (AST && LI->getType()->isPointerTy()) 904 AST->deleteValue(LI); 905 LBI.deleteValue(LI); 906 LI->eraseFromParent(); 907 } 908 return; 909 } 910 911 // Sort the stores by their index, making it efficient to do a lookup with a 912 // binary search. 913 std::sort(StoresByIndex.begin(), StoresByIndex.end()); 914 915 // Walk all of the loads from this alloca, replacing them with the nearest 916 // store above them, if any. 917 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) { 918 LoadInst *LI = dyn_cast<LoadInst>(*UI++); 919 if (!LI) continue; 920 921 unsigned LoadIdx = LBI.getInstructionIndex(LI); 922 923 // Find the nearest store that has a lower than this load. 924 StoresByIndexTy::iterator I = 925 std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(), 926 std::pair<unsigned, StoreInst*>(LoadIdx, static_cast<StoreInst*>(0)), 927 StoreIndexSearchPredicate()); 928 929 // If there is no store before this load, then we can't promote this load. 930 if (I == StoresByIndex.begin()) { 931 // Can't handle this load, bail out. 932 Info.UsingBlocks.push_back(LI->getParent()); 933 continue; 934 } 935 936 // Otherwise, there was a store before this load, the load takes its value. 937 --I; 938 LI->replaceAllUsesWith(I->second->getOperand(0)); 939 if (AST && LI->getType()->isPointerTy()) 940 AST->deleteValue(LI); 941 LI->eraseFromParent(); 942 LBI.deleteValue(LI); 943 } 944} 945 946// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value 947// that has an associated llvm.dbg.decl intrinsic. 948void PromoteMem2Reg::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, 949 StoreInst *SI) { 950 DIVariable DIVar(DDI->getVariable()); 951 if (!DIVar.Verify()) 952 return; 953 954 if (!DIF) 955 DIF = new DIFactory(*SI->getParent()->getParent()->getParent()); 956 Instruction *DbgVal = DIF->InsertDbgValueIntrinsic(SI->getOperand(0), 0, 957 DIVar, SI); 958 959 // Propagate any debug metadata from the store onto the dbg.value. 960 DebugLoc SIDL = SI->getDebugLoc(); 961 if (!SIDL.isUnknown()) 962 DbgVal->setDebugLoc(SIDL); 963 // Otherwise propagate debug metadata from dbg.declare. 964 else 965 DbgVal->setDebugLoc(DDI->getDebugLoc()); 966} 967 968// QueuePhiNode - queues a phi-node to be added to a basic-block for a specific 969// Alloca returns true if there wasn't already a phi-node for that variable 970// 971bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, 972 unsigned &Version) { 973 // Look up the basic-block in question. 974 PHINode *&PN = NewPhiNodes[std::make_pair(BB, AllocaNo)]; 975 976 // If the BB already has a phi node added for the i'th alloca then we're done! 977 if (PN) return false; 978 979 // Create a PhiNode using the dereferenced type... and add the phi-node to the 980 // BasicBlock. 981 PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), 982 Allocas[AllocaNo]->getName() + "." + Twine(Version++), 983 BB->begin()); 984 ++NumPHIInsert; 985 PhiToAllocaMap[PN] = AllocaNo; 986 PN->reserveOperandSpace(getNumPreds(BB)); 987 988 if (AST && PN->getType()->isPointerTy()) 989 AST->copyValue(PointerAllocaValues[AllocaNo], PN); 990 991 return true; 992} 993 994// RenamePass - Recursively traverse the CFG of the function, renaming loads and 995// stores to the allocas which we are promoting. IncomingVals indicates what 996// value each Alloca contains on exit from the predecessor block Pred. 997// 998void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, 999 RenamePassData::ValVector &IncomingVals, 1000 std::vector<RenamePassData> &Worklist) { 1001NextIteration: 1002 // If we are inserting any phi nodes into this BB, they will already be in the 1003 // block. 1004 if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) { 1005 // If we have PHI nodes to update, compute the number of edges from Pred to 1006 // BB. 1007 if (PhiToAllocaMap.count(APN)) { 1008 // We want to be able to distinguish between PHI nodes being inserted by 1009 // this invocation of mem2reg from those phi nodes that already existed in 1010 // the IR before mem2reg was run. We determine that APN is being inserted 1011 // because it is missing incoming edges. All other PHI nodes being 1012 // inserted by this pass of mem2reg will have the same number of incoming 1013 // operands so far. Remember this count. 1014 unsigned NewPHINumOperands = APN->getNumOperands(); 1015 1016 unsigned NumEdges = 0; 1017 for (succ_iterator I = succ_begin(Pred), E = succ_end(Pred); I != E; ++I) 1018 if (*I == BB) 1019 ++NumEdges; 1020 assert(NumEdges && "Must be at least one edge from Pred to BB!"); 1021 1022 // Add entries for all the phis. 1023 BasicBlock::iterator PNI = BB->begin(); 1024 do { 1025 unsigned AllocaNo = PhiToAllocaMap[APN]; 1026 1027 // Add N incoming values to the PHI node. 1028 for (unsigned i = 0; i != NumEdges; ++i) 1029 APN->addIncoming(IncomingVals[AllocaNo], Pred); 1030 1031 // The currently active variable for this block is now the PHI. 1032 IncomingVals[AllocaNo] = APN; 1033 1034 // Get the next phi node. 1035 ++PNI; 1036 APN = dyn_cast<PHINode>(PNI); 1037 if (APN == 0) break; 1038 1039 // Verify that it is missing entries. If not, it is not being inserted 1040 // by this mem2reg invocation so we want to ignore it. 1041 } while (APN->getNumOperands() == NewPHINumOperands); 1042 } 1043 } 1044 1045 // Don't revisit blocks. 1046 if (!Visited.insert(BB)) return; 1047 1048 for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II); ) { 1049 Instruction *I = II++; // get the instruction, increment iterator 1050 1051 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 1052 AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand()); 1053 if (!Src) continue; 1054 1055 DenseMap<AllocaInst*, unsigned>::iterator AI = AllocaLookup.find(Src); 1056 if (AI == AllocaLookup.end()) continue; 1057 1058 Value *V = IncomingVals[AI->second]; 1059 1060 // Anything using the load now uses the current value. 1061 LI->replaceAllUsesWith(V); 1062 if (AST && LI->getType()->isPointerTy()) 1063 AST->deleteValue(LI); 1064 BB->getInstList().erase(LI); 1065 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 1066 // Delete this instruction and mark the name as the current holder of the 1067 // value 1068 AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand()); 1069 if (!Dest) continue; 1070 1071 DenseMap<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest); 1072 if (ai == AllocaLookup.end()) 1073 continue; 1074 1075 // what value were we writing? 1076 IncomingVals[ai->second] = SI->getOperand(0); 1077 // Record debuginfo for the store before removing it. 1078 if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) 1079 ConvertDebugDeclareToDebugValue(DDI, SI); 1080 BB->getInstList().erase(SI); 1081 } 1082 } 1083 1084 // 'Recurse' to our successors. 1085 succ_iterator I = succ_begin(BB), E = succ_end(BB); 1086 if (I == E) return; 1087 1088 // Keep track of the successors so we don't visit the same successor twice 1089 SmallPtrSet<BasicBlock*, 8> VisitedSuccs; 1090 1091 // Handle the first successor without using the worklist. 1092 VisitedSuccs.insert(*I); 1093 Pred = BB; 1094 BB = *I; 1095 ++I; 1096 1097 for (; I != E; ++I) 1098 if (VisitedSuccs.insert(*I)) 1099 Worklist.push_back(RenamePassData(*I, Pred, IncomingVals)); 1100 1101 goto NextIteration; 1102} 1103 1104/// PromoteMemToReg - Promote the specified list of alloca instructions into 1105/// scalar registers, inserting PHI nodes as appropriate. This function does 1106/// not modify the CFG of the function at all. All allocas must be from the 1107/// same function. 1108/// 1109/// If AST is specified, the specified tracker is updated to reflect changes 1110/// made to the IR. 1111/// 1112void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas, 1113 DominatorTree &DT, AliasSetTracker *AST) { 1114 // If there is nothing to do, bail out... 1115 if (Allocas.empty()) return; 1116 1117 PromoteMem2Reg(Allocas, DT, AST).run(); 1118} 1119