PromoteMemoryToRegister.cpp revision 210299
159415Sobrien//===- PromoteMemoryToRegister.cpp - Convert allocas to registers ---------===// 259243Sobrien// 359243Sobrien// The LLVM Compiler Infrastructure 459243Sobrien// 559243Sobrien// This file is distributed under the University of Illinois Open Source 659243Sobrien// License. See LICENSE.TXT for details. 759243Sobrien// 859243Sobrien//===----------------------------------------------------------------------===// 959243Sobrien// 1059243Sobrien// This file promotes memory references to be register references. It promotes 1159243Sobrien// alloca instructions which only have loads and stores as uses. An alloca is 1259243Sobrien// transformed by using dominator frontiers to place PHI nodes, then traversing 1359243Sobrien// the function in depth-first order to rewrite loads and stores as appropriate. 1459243Sobrien// This is just the standard SSA construction algorithm to construct "pruned" 1559243Sobrien// SSA form. 1659243Sobrien// 1759243Sobrien//===----------------------------------------------------------------------===// 1859243Sobrien 1959243Sobrien#define DEBUG_TYPE "mem2reg" 2059243Sobrien#include "llvm/Transforms/Utils/PromoteMemToReg.h" 2159243Sobrien#include "llvm/Constants.h" 2259243Sobrien#include "llvm/DerivedTypes.h" 2359243Sobrien#include "llvm/Function.h" 2459243Sobrien#include "llvm/Instructions.h" 2559243Sobrien#include "llvm/IntrinsicInst.h" 2659243Sobrien#include "llvm/Metadata.h" 2759243Sobrien#include "llvm/Analysis/DebugInfo.h" 2859243Sobrien#include "llvm/Analysis/Dominators.h" 2959243Sobrien#include "llvm/Analysis/AliasSetTracker.h" 3059243Sobrien#include "llvm/ADT/DenseMap.h" 3159243Sobrien#include "llvm/ADT/SmallPtrSet.h" 3259243Sobrien#include "llvm/ADT/SmallVector.h" 3359243Sobrien#include "llvm/ADT/Statistic.h" 3459243Sobrien#include "llvm/ADT/STLExtras.h" 3559243Sobrien#include "llvm/Support/CFG.h" 3659243Sobrien#include <algorithm> 3759243Sobrienusing namespace llvm; 3859243Sobrien 3959243SobrienSTATISTIC(NumLocalPromoted, "Number of alloca's promoted within one block"); 4059243SobrienSTATISTIC(NumSingleStore, "Number of alloca's promoted with a single store"); 4159243SobrienSTATISTIC(NumDeadAlloca, "Number of dead alloca's removed"); 4259415SobrienSTATISTIC(NumPHIInsert, "Number of PHI nodes inserted"); 4359243Sobrien 4459243Sobriennamespace llvm { 4559243Sobrientemplate<> 4659243Sobrienstruct DenseMapInfo<std::pair<BasicBlock*, unsigned> > { 4759243Sobrien typedef std::pair<BasicBlock*, unsigned> EltTy; 4859243Sobrien static inline EltTy getEmptyKey() { 4959243Sobrien return EltTy(reinterpret_cast<BasicBlock*>(-1), ~0U); 5059243Sobrien } 5159243Sobrien static inline EltTy getTombstoneKey() { 5259243Sobrien return EltTy(reinterpret_cast<BasicBlock*>(-2), 0U); 5359243Sobrien } 5459243Sobrien static unsigned getHashValue(const std::pair<BasicBlock*, unsigned> &Val) { 5559243Sobrien return DenseMapInfo<void*>::getHashValue(Val.first) + Val.second*2; 5659243Sobrien } 5759243Sobrien static bool isEqual(const EltTy &LHS, const EltTy &RHS) { 5859243Sobrien return LHS == RHS; 5959243Sobrien } 6059243Sobrien}; 6159243Sobrien} 6259243Sobrien 6359243Sobrien/// isAllocaPromotable - Return true if this alloca is legal for promotion. 6459243Sobrien/// This is true if there are only loads and stores to the alloca. 6559243Sobrien/// 6659243Sobrienbool llvm::isAllocaPromotable(const AllocaInst *AI) { 6759243Sobrien // FIXME: If the memory unit is of pointer or integer type, we can permit 6859243Sobrien // assignments to subsections of the memory unit. 6959243Sobrien 7059243Sobrien // Only allow direct and non-volatile loads and stores... 7159243Sobrien for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end(); 7259243Sobrien UI != UE; ++UI) { // Loop over all of the uses of the alloca 7359243Sobrien const User *U = *UI; 7459243Sobrien if (const LoadInst *LI = dyn_cast<LoadInst>(U)) { 7559243Sobrien if (LI->isVolatile()) 7659243Sobrien return false; 7759243Sobrien } else if (const StoreInst *SI = dyn_cast<StoreInst>(U)) { 7859243Sobrien if (SI->getOperand(0) == AI) 7959243Sobrien return false; // Don't allow a store OF the AI, only INTO the AI. 8059243Sobrien if (SI->isVolatile()) 8159243Sobrien return false; 8259243Sobrien } else { 8359243Sobrien return false; 8459243Sobrien } 8559243Sobrien } 8659243Sobrien 8759243Sobrien return true; 8859243Sobrien} 8959243Sobrien 9059243Sobrien/// FindAllocaDbgDeclare - Finds the llvm.dbg.declare intrinsic describing the 9159243Sobrien/// alloca 'V', if any. 9259243Sobrienstatic DbgDeclareInst *FindAllocaDbgDeclare(Value *V) { 9359243Sobrien if (MDNode *DebugNode = MDNode::getIfExists(V->getContext(), &V, 1)) 9459243Sobrien for (Value::use_iterator UI = DebugNode->use_begin(), 9559243Sobrien E = DebugNode->use_end(); UI != E; ++UI) 9659243Sobrien if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(*UI)) 9759243Sobrien return DDI; 9859243Sobrien 9959243Sobrien return 0; 10059243Sobrien} 10159243Sobrien 10259243Sobriennamespace { 10359243Sobrien struct AllocaInfo; 10459243Sobrien 10559243Sobrien // Data package used by RenamePass() 10659243Sobrien class RenamePassData { 10759243Sobrien public: 10859243Sobrien typedef std::vector<Value *> ValVector; 10959243Sobrien 11059243Sobrien RenamePassData() : BB(NULL), Pred(NULL), Values() {} 11159243Sobrien RenamePassData(BasicBlock *B, BasicBlock *P, 11259243Sobrien const ValVector &V) : BB(B), Pred(P), Values(V) {} 11359243Sobrien BasicBlock *BB; 11459243Sobrien BasicBlock *Pred; 11559243Sobrien ValVector Values; 11659243Sobrien 11759243Sobrien void swap(RenamePassData &RHS) { 11859243Sobrien std::swap(BB, RHS.BB); 11959243Sobrien std::swap(Pred, RHS.Pred); 12059243Sobrien Values.swap(RHS.Values); 12159243Sobrien } 12259243Sobrien }; 12359243Sobrien 12459243Sobrien /// LargeBlockInfo - This assigns and keeps a per-bb relative ordering of 12559243Sobrien /// load/store instructions in the block that directly load or store an alloca. 12659243Sobrien /// 12759243Sobrien /// This functionality is important because it avoids scanning large basic 12859243Sobrien /// blocks multiple times when promoting many allocas in the same block. 12959243Sobrien class LargeBlockInfo { 13059243Sobrien /// InstNumbers - For each instruction that we track, keep the index of the 13159243Sobrien /// instruction. The index starts out as the number of the instruction from 13259243Sobrien /// the start of the block. 13359243Sobrien DenseMap<const Instruction *, unsigned> InstNumbers; 13459243Sobrien public: 13559243Sobrien 13659243Sobrien /// isInterestingInstruction - This code only looks at accesses to allocas. 13759243Sobrien static bool isInterestingInstruction(const Instruction *I) { 13859243Sobrien return (isa<LoadInst>(I) && isa<AllocaInst>(I->getOperand(0))) || 13959243Sobrien (isa<StoreInst>(I) && isa<AllocaInst>(I->getOperand(1))); 14059243Sobrien } 14159243Sobrien 14259243Sobrien /// getInstructionIndex - Get or calculate the index of the specified 14359243Sobrien /// instruction. 14459243Sobrien unsigned getInstructionIndex(const Instruction *I) { 14559243Sobrien assert(isInterestingInstruction(I) && 14659243Sobrien "Not a load/store to/from an alloca?"); 14759243Sobrien 14859243Sobrien // If we already have this instruction number, return it. 14959243Sobrien DenseMap<const Instruction *, unsigned>::iterator It = InstNumbers.find(I); 15059243Sobrien if (It != InstNumbers.end()) return It->second; 15159243Sobrien 15259243Sobrien // Scan the whole block to get the instruction. This accumulates 15359243Sobrien // information for every interesting instruction in the block, in order to 15459243Sobrien // avoid gratuitus rescans. 15559415Sobrien const BasicBlock *BB = I->getParent(); 15659243Sobrien unsigned InstNo = 0; 15759243Sobrien for (BasicBlock::const_iterator BBI = BB->begin(), E = BB->end(); 15859243Sobrien BBI != E; ++BBI) 15959243Sobrien if (isInterestingInstruction(BBI)) 16059243Sobrien InstNumbers[BBI] = InstNo++; 16159243Sobrien It = InstNumbers.find(I); 16259243Sobrien 16359243Sobrien assert(It != InstNumbers.end() && "Didn't insert instruction?"); 16459243Sobrien return It->second; 16559243Sobrien } 16659243Sobrien 16759243Sobrien void deleteValue(const Instruction *I) { 16859243Sobrien InstNumbers.erase(I); 16959243Sobrien } 17059243Sobrien 17159243Sobrien void clear() { 17259243Sobrien InstNumbers.clear(); 17359243Sobrien } 17459243Sobrien }; 17559243Sobrien 17659243Sobrien struct PromoteMem2Reg { 17759243Sobrien /// Allocas - The alloca instructions being promoted. 17859243Sobrien /// 17959243Sobrien std::vector<AllocaInst*> Allocas; 18059243Sobrien DominatorTree &DT; 18159243Sobrien DominanceFrontier &DF; 18259243Sobrien DIFactory *DIF; 18359243Sobrien 18459243Sobrien /// AST - An AliasSetTracker object to update. If null, don't update it. 18559243Sobrien /// 18659243Sobrien AliasSetTracker *AST; 18759243Sobrien 18859243Sobrien /// AllocaLookup - Reverse mapping of Allocas. 18959243Sobrien /// 19059243Sobrien std::map<AllocaInst*, unsigned> AllocaLookup; 19159243Sobrien 19259243Sobrien /// NewPhiNodes - The PhiNodes we're adding. 19359243Sobrien /// 19459243Sobrien DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*> NewPhiNodes; 19559243Sobrien 19659243Sobrien /// PhiToAllocaMap - For each PHI node, keep track of which entry in Allocas 19759243Sobrien /// it corresponds to. 19859243Sobrien DenseMap<PHINode*, unsigned> PhiToAllocaMap; 19959243Sobrien 20059243Sobrien /// PointerAllocaValues - If we are updating an AliasSetTracker, then for 20159243Sobrien /// each alloca that is of pointer type, we keep track of what to copyValue 20259243Sobrien /// to the inserted PHI nodes here. 20359243Sobrien /// 20459243Sobrien std::vector<Value*> PointerAllocaValues; 20559243Sobrien 20659243Sobrien /// AllocaDbgDeclares - For each alloca, we keep track of the dbg.declare 20759243Sobrien /// intrinsic that describes it, if any, so that we can convert it to a 208 /// dbg.value intrinsic if the alloca gets promoted. 209 SmallVector<DbgDeclareInst*, 8> AllocaDbgDeclares; 210 211 /// Visited - The set of basic blocks the renamer has already visited. 212 /// 213 SmallPtrSet<BasicBlock*, 16> Visited; 214 215 /// BBNumbers - Contains a stable numbering of basic blocks to avoid 216 /// non-determinstic behavior. 217 DenseMap<BasicBlock*, unsigned> BBNumbers; 218 219 /// BBNumPreds - Lazily compute the number of predecessors a block has. 220 DenseMap<const BasicBlock*, unsigned> BBNumPreds; 221 public: 222 PromoteMem2Reg(const std::vector<AllocaInst*> &A, DominatorTree &dt, 223 DominanceFrontier &df, AliasSetTracker *ast) 224 : Allocas(A), DT(dt), DF(df), DIF(0), AST(ast) {} 225 ~PromoteMem2Reg() { 226 delete DIF; 227 } 228 229 void run(); 230 231 /// properlyDominates - Return true if I1 properly dominates I2. 232 /// 233 bool properlyDominates(Instruction *I1, Instruction *I2) const { 234 if (InvokeInst *II = dyn_cast<InvokeInst>(I1)) 235 I1 = II->getNormalDest()->begin(); 236 return DT.properlyDominates(I1->getParent(), I2->getParent()); 237 } 238 239 /// dominates - Return true if BB1 dominates BB2 using the DominatorTree. 240 /// 241 bool dominates(BasicBlock *BB1, BasicBlock *BB2) const { 242 return DT.dominates(BB1, BB2); 243 } 244 245 private: 246 void RemoveFromAllocasList(unsigned &AllocaIdx) { 247 Allocas[AllocaIdx] = Allocas.back(); 248 Allocas.pop_back(); 249 --AllocaIdx; 250 } 251 252 unsigned getNumPreds(const BasicBlock *BB) { 253 unsigned &NP = BBNumPreds[BB]; 254 if (NP == 0) 255 NP = std::distance(pred_begin(BB), pred_end(BB))+1; 256 return NP-1; 257 } 258 259 void DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, 260 AllocaInfo &Info); 261 void ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, 262 const SmallPtrSet<BasicBlock*, 32> &DefBlocks, 263 SmallPtrSet<BasicBlock*, 32> &LiveInBlocks); 264 265 void RewriteSingleStoreAlloca(AllocaInst *AI, AllocaInfo &Info, 266 LargeBlockInfo &LBI); 267 void PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, 268 LargeBlockInfo &LBI); 269 void ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, StoreInst *SI); 270 271 272 void RenamePass(BasicBlock *BB, BasicBlock *Pred, 273 RenamePassData::ValVector &IncVals, 274 std::vector<RenamePassData> &Worklist); 275 bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version, 276 SmallPtrSet<PHINode*, 16> &InsertedPHINodes); 277 }; 278 279 struct AllocaInfo { 280 std::vector<BasicBlock*> DefiningBlocks; 281 std::vector<BasicBlock*> UsingBlocks; 282 283 StoreInst *OnlyStore; 284 BasicBlock *OnlyBlock; 285 bool OnlyUsedInOneBlock; 286 287 Value *AllocaPointerVal; 288 DbgDeclareInst *DbgDeclare; 289 290 void clear() { 291 DefiningBlocks.clear(); 292 UsingBlocks.clear(); 293 OnlyStore = 0; 294 OnlyBlock = 0; 295 OnlyUsedInOneBlock = true; 296 AllocaPointerVal = 0; 297 DbgDeclare = 0; 298 } 299 300 /// AnalyzeAlloca - Scan the uses of the specified alloca, filling in our 301 /// ivars. 302 void AnalyzeAlloca(AllocaInst *AI) { 303 clear(); 304 305 // As we scan the uses of the alloca instruction, keep track of stores, 306 // and decide whether all of the loads and stores to the alloca are within 307 // the same basic block. 308 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); 309 UI != E;) { 310 Instruction *User = cast<Instruction>(*UI++); 311 312 if (StoreInst *SI = dyn_cast<StoreInst>(User)) { 313 // Remember the basic blocks which define new values for the alloca 314 DefiningBlocks.push_back(SI->getParent()); 315 AllocaPointerVal = SI->getOperand(0); 316 OnlyStore = SI; 317 } else { 318 LoadInst *LI = cast<LoadInst>(User); 319 // Otherwise it must be a load instruction, keep track of variable 320 // reads. 321 UsingBlocks.push_back(LI->getParent()); 322 AllocaPointerVal = LI; 323 } 324 325 if (OnlyUsedInOneBlock) { 326 if (OnlyBlock == 0) 327 OnlyBlock = User->getParent(); 328 else if (OnlyBlock != User->getParent()) 329 OnlyUsedInOneBlock = false; 330 } 331 } 332 333 DbgDeclare = FindAllocaDbgDeclare(AI); 334 } 335 }; 336} // end of anonymous namespace 337 338 339void PromoteMem2Reg::run() { 340 Function &F = *DF.getRoot()->getParent(); 341 342 if (AST) PointerAllocaValues.resize(Allocas.size()); 343 AllocaDbgDeclares.resize(Allocas.size()); 344 345 AllocaInfo Info; 346 LargeBlockInfo LBI; 347 348 for (unsigned AllocaNum = 0; AllocaNum != Allocas.size(); ++AllocaNum) { 349 AllocaInst *AI = Allocas[AllocaNum]; 350 351 assert(isAllocaPromotable(AI) && 352 "Cannot promote non-promotable alloca!"); 353 assert(AI->getParent()->getParent() == &F && 354 "All allocas should be in the same function, which is same as DF!"); 355 356 if (AI->use_empty()) { 357 // If there are no uses of the alloca, just delete it now. 358 if (AST) AST->deleteValue(AI); 359 AI->eraseFromParent(); 360 361 // Remove the alloca from the Allocas list, since it has been processed 362 RemoveFromAllocasList(AllocaNum); 363 ++NumDeadAlloca; 364 continue; 365 } 366 367 // Calculate the set of read and write-locations for each alloca. This is 368 // analogous to finding the 'uses' and 'definitions' of each variable. 369 Info.AnalyzeAlloca(AI); 370 371 // If there is only a single store to this value, replace any loads of 372 // it that are directly dominated by the definition with the value stored. 373 if (Info.DefiningBlocks.size() == 1) { 374 RewriteSingleStoreAlloca(AI, Info, LBI); 375 376 // Finally, after the scan, check to see if the store is all that is left. 377 if (Info.UsingBlocks.empty()) { 378 // Record debuginfo for the store and remove the declaration's debuginfo. 379 if (DbgDeclareInst *DDI = Info.DbgDeclare) { 380 ConvertDebugDeclareToDebugValue(DDI, Info.OnlyStore); 381 DDI->eraseFromParent(); 382 } 383 // Remove the (now dead) store and alloca. 384 Info.OnlyStore->eraseFromParent(); 385 LBI.deleteValue(Info.OnlyStore); 386 387 if (AST) AST->deleteValue(AI); 388 AI->eraseFromParent(); 389 LBI.deleteValue(AI); 390 391 // The alloca has been processed, move on. 392 RemoveFromAllocasList(AllocaNum); 393 394 ++NumSingleStore; 395 continue; 396 } 397 } 398 399 // If the alloca is only read and written in one basic block, just perform a 400 // linear sweep over the block to eliminate it. 401 if (Info.OnlyUsedInOneBlock) { 402 PromoteSingleBlockAlloca(AI, Info, LBI); 403 404 // Finally, after the scan, check to see if the stores are all that is 405 // left. 406 if (Info.UsingBlocks.empty()) { 407 408 // Remove the (now dead) stores and alloca. 409 while (!AI->use_empty()) { 410 StoreInst *SI = cast<StoreInst>(AI->use_back()); 411 // Record debuginfo for the store before removing it. 412 if (DbgDeclareInst *DDI = Info.DbgDeclare) 413 ConvertDebugDeclareToDebugValue(DDI, SI); 414 SI->eraseFromParent(); 415 LBI.deleteValue(SI); 416 } 417 418 if (AST) AST->deleteValue(AI); 419 AI->eraseFromParent(); 420 LBI.deleteValue(AI); 421 422 // The alloca has been processed, move on. 423 RemoveFromAllocasList(AllocaNum); 424 425 // The alloca's debuginfo can be removed as well. 426 if (DbgDeclareInst *DDI = Info.DbgDeclare) 427 DDI->eraseFromParent(); 428 429 ++NumLocalPromoted; 430 continue; 431 } 432 } 433 434 // If we haven't computed a numbering for the BB's in the function, do so 435 // now. 436 if (BBNumbers.empty()) { 437 unsigned ID = 0; 438 for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) 439 BBNumbers[I] = ID++; 440 } 441 442 // If we have an AST to keep updated, remember some pointer value that is 443 // stored into the alloca. 444 if (AST) 445 PointerAllocaValues[AllocaNum] = Info.AllocaPointerVal; 446 447 // Remember the dbg.declare intrinsic describing this alloca, if any. 448 if (Info.DbgDeclare) AllocaDbgDeclares[AllocaNum] = Info.DbgDeclare; 449 450 // Keep the reverse mapping of the 'Allocas' array for the rename pass. 451 AllocaLookup[Allocas[AllocaNum]] = AllocaNum; 452 453 // At this point, we're committed to promoting the alloca using IDF's, and 454 // the standard SSA construction algorithm. Determine which blocks need PHI 455 // nodes and see if we can optimize out some work by avoiding insertion of 456 // dead phi nodes. 457 DetermineInsertionPoint(AI, AllocaNum, Info); 458 } 459 460 if (Allocas.empty()) 461 return; // All of the allocas must have been trivial! 462 463 LBI.clear(); 464 465 466 // Set the incoming values for the basic block to be null values for all of 467 // the alloca's. We do this in case there is a load of a value that has not 468 // been stored yet. In this case, it will get this null value. 469 // 470 RenamePassData::ValVector Values(Allocas.size()); 471 for (unsigned i = 0, e = Allocas.size(); i != e; ++i) 472 Values[i] = UndefValue::get(Allocas[i]->getAllocatedType()); 473 474 // Walks all basic blocks in the function performing the SSA rename algorithm 475 // and inserting the phi nodes we marked as necessary 476 // 477 std::vector<RenamePassData> RenamePassWorkList; 478 RenamePassWorkList.push_back(RenamePassData(F.begin(), 0, Values)); 479 do { 480 RenamePassData RPD; 481 RPD.swap(RenamePassWorkList.back()); 482 RenamePassWorkList.pop_back(); 483 // RenamePass may add new worklist entries. 484 RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList); 485 } while (!RenamePassWorkList.empty()); 486 487 // The renamer uses the Visited set to avoid infinite loops. Clear it now. 488 Visited.clear(); 489 490 // Remove the allocas themselves from the function. 491 for (unsigned i = 0, e = Allocas.size(); i != e; ++i) { 492 Instruction *A = Allocas[i]; 493 494 // If there are any uses of the alloca instructions left, they must be in 495 // sections of dead code that were not processed on the dominance frontier. 496 // Just delete the users now. 497 // 498 if (!A->use_empty()) 499 A->replaceAllUsesWith(UndefValue::get(A->getType())); 500 if (AST) AST->deleteValue(A); 501 A->eraseFromParent(); 502 } 503 504 // Remove alloca's dbg.declare instrinsics from the function. 505 for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i) 506 if (DbgDeclareInst *DDI = AllocaDbgDeclares[i]) 507 DDI->eraseFromParent(); 508 509 // Loop over all of the PHI nodes and see if there are any that we can get 510 // rid of because they merge all of the same incoming values. This can 511 // happen due to undef values coming into the PHI nodes. This process is 512 // iterative, because eliminating one PHI node can cause others to be removed. 513 bool EliminatedAPHI = true; 514 while (EliminatedAPHI) { 515 EliminatedAPHI = false; 516 517 for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I = 518 NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E;) { 519 PHINode *PN = I->second; 520 521 // If this PHI node merges one value and/or undefs, get the value. 522 if (Value *V = PN->hasConstantValue(&DT)) { 523 if (AST && PN->getType()->isPointerTy()) 524 AST->deleteValue(PN); 525 PN->replaceAllUsesWith(V); 526 PN->eraseFromParent(); 527 NewPhiNodes.erase(I++); 528 EliminatedAPHI = true; 529 continue; 530 } 531 ++I; 532 } 533 } 534 535 // At this point, the renamer has added entries to PHI nodes for all reachable 536 // code. Unfortunately, there may be unreachable blocks which the renamer 537 // hasn't traversed. If this is the case, the PHI nodes may not 538 // have incoming values for all predecessors. Loop over all PHI nodes we have 539 // created, inserting undef values if they are missing any incoming values. 540 // 541 for (DenseMap<std::pair<BasicBlock*, unsigned>, PHINode*>::iterator I = 542 NewPhiNodes.begin(), E = NewPhiNodes.end(); I != E; ++I) { 543 // We want to do this once per basic block. As such, only process a block 544 // when we find the PHI that is the first entry in the block. 545 PHINode *SomePHI = I->second; 546 BasicBlock *BB = SomePHI->getParent(); 547 if (&BB->front() != SomePHI) 548 continue; 549 550 // Only do work here if there the PHI nodes are missing incoming values. We 551 // know that all PHI nodes that were inserted in a block will have the same 552 // number of incoming values, so we can just check any of them. 553 if (SomePHI->getNumIncomingValues() == getNumPreds(BB)) 554 continue; 555 556 // Get the preds for BB. 557 SmallVector<BasicBlock*, 16> Preds(pred_begin(BB), pred_end(BB)); 558 559 // Ok, now we know that all of the PHI nodes are missing entries for some 560 // basic blocks. Start by sorting the incoming predecessors for efficient 561 // access. 562 std::sort(Preds.begin(), Preds.end()); 563 564 // Now we loop through all BB's which have entries in SomePHI and remove 565 // them from the Preds list. 566 for (unsigned i = 0, e = SomePHI->getNumIncomingValues(); i != e; ++i) { 567 // Do a log(n) search of the Preds list for the entry we want. 568 SmallVector<BasicBlock*, 16>::iterator EntIt = 569 std::lower_bound(Preds.begin(), Preds.end(), 570 SomePHI->getIncomingBlock(i)); 571 assert(EntIt != Preds.end() && *EntIt == SomePHI->getIncomingBlock(i)&& 572 "PHI node has entry for a block which is not a predecessor!"); 573 574 // Remove the entry 575 Preds.erase(EntIt); 576 } 577 578 // At this point, the blocks left in the preds list must have dummy 579 // entries inserted into every PHI nodes for the block. Update all the phi 580 // nodes in this block that we are inserting (there could be phis before 581 // mem2reg runs). 582 unsigned NumBadPreds = SomePHI->getNumIncomingValues(); 583 BasicBlock::iterator BBI = BB->begin(); 584 while ((SomePHI = dyn_cast<PHINode>(BBI++)) && 585 SomePHI->getNumIncomingValues() == NumBadPreds) { 586 Value *UndefVal = UndefValue::get(SomePHI->getType()); 587 for (unsigned pred = 0, e = Preds.size(); pred != e; ++pred) 588 SomePHI->addIncoming(UndefVal, Preds[pred]); 589 } 590 } 591 592 NewPhiNodes.clear(); 593} 594 595 596/// ComputeLiveInBlocks - Determine which blocks the value is live in. These 597/// are blocks which lead to uses. Knowing this allows us to avoid inserting 598/// PHI nodes into blocks which don't lead to uses (thus, the inserted phi nodes 599/// would be dead). 600void PromoteMem2Reg:: 601ComputeLiveInBlocks(AllocaInst *AI, AllocaInfo &Info, 602 const SmallPtrSet<BasicBlock*, 32> &DefBlocks, 603 SmallPtrSet<BasicBlock*, 32> &LiveInBlocks) { 604 605 // To determine liveness, we must iterate through the predecessors of blocks 606 // where the def is live. Blocks are added to the worklist if we need to 607 // check their predecessors. Start with all the using blocks. 608 SmallVector<BasicBlock*, 64> LiveInBlockWorklist(Info.UsingBlocks.begin(), 609 Info.UsingBlocks.end()); 610 611 // If any of the using blocks is also a definition block, check to see if the 612 // definition occurs before or after the use. If it happens before the use, 613 // the value isn't really live-in. 614 for (unsigned i = 0, e = LiveInBlockWorklist.size(); i != e; ++i) { 615 BasicBlock *BB = LiveInBlockWorklist[i]; 616 if (!DefBlocks.count(BB)) continue; 617 618 // Okay, this is a block that both uses and defines the value. If the first 619 // reference to the alloca is a def (store), then we know it isn't live-in. 620 for (BasicBlock::iterator I = BB->begin(); ; ++I) { 621 if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 622 if (SI->getOperand(1) != AI) continue; 623 624 // We found a store to the alloca before a load. The alloca is not 625 // actually live-in here. 626 LiveInBlockWorklist[i] = LiveInBlockWorklist.back(); 627 LiveInBlockWorklist.pop_back(); 628 --i, --e; 629 break; 630 } 631 632 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 633 if (LI->getOperand(0) != AI) continue; 634 635 // Okay, we found a load before a store to the alloca. It is actually 636 // live into this block. 637 break; 638 } 639 } 640 } 641 642 // Now that we have a set of blocks where the phi is live-in, recursively add 643 // their predecessors until we find the full region the value is live. 644 while (!LiveInBlockWorklist.empty()) { 645 BasicBlock *BB = LiveInBlockWorklist.pop_back_val(); 646 647 // The block really is live in here, insert it into the set. If already in 648 // the set, then it has already been processed. 649 if (!LiveInBlocks.insert(BB)) 650 continue; 651 652 // Since the value is live into BB, it is either defined in a predecessor or 653 // live into it to. Add the preds to the worklist unless they are a 654 // defining block. 655 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) { 656 BasicBlock *P = *PI; 657 658 // The value is not live into a predecessor if it defines the value. 659 if (DefBlocks.count(P)) 660 continue; 661 662 // Otherwise it is, add to the worklist. 663 LiveInBlockWorklist.push_back(P); 664 } 665 } 666} 667 668/// DetermineInsertionPoint - At this point, we're committed to promoting the 669/// alloca using IDF's, and the standard SSA construction algorithm. Determine 670/// which blocks need phi nodes and see if we can optimize out some work by 671/// avoiding insertion of dead phi nodes. 672void PromoteMem2Reg::DetermineInsertionPoint(AllocaInst *AI, unsigned AllocaNum, 673 AllocaInfo &Info) { 674 675 // Unique the set of defining blocks for efficient lookup. 676 SmallPtrSet<BasicBlock*, 32> DefBlocks; 677 DefBlocks.insert(Info.DefiningBlocks.begin(), Info.DefiningBlocks.end()); 678 679 // Determine which blocks the value is live in. These are blocks which lead 680 // to uses. 681 SmallPtrSet<BasicBlock*, 32> LiveInBlocks; 682 ComputeLiveInBlocks(AI, Info, DefBlocks, LiveInBlocks); 683 684 // Compute the locations where PhiNodes need to be inserted. Look at the 685 // dominance frontier of EACH basic-block we have a write in. 686 unsigned CurrentVersion = 0; 687 SmallPtrSet<PHINode*, 16> InsertedPHINodes; 688 std::vector<std::pair<unsigned, BasicBlock*> > DFBlocks; 689 while (!Info.DefiningBlocks.empty()) { 690 BasicBlock *BB = Info.DefiningBlocks.back(); 691 Info.DefiningBlocks.pop_back(); 692 693 // Look up the DF for this write, add it to defining blocks. 694 DominanceFrontier::const_iterator it = DF.find(BB); 695 if (it == DF.end()) continue; 696 697 const DominanceFrontier::DomSetType &S = it->second; 698 699 // In theory we don't need the indirection through the DFBlocks vector. 700 // In practice, the order of calling QueuePhiNode would depend on the 701 // (unspecified) ordering of basic blocks in the dominance frontier, 702 // which would give PHI nodes non-determinstic subscripts. Fix this by 703 // processing blocks in order of the occurance in the function. 704 for (DominanceFrontier::DomSetType::const_iterator P = S.begin(), 705 PE = S.end(); P != PE; ++P) { 706 // If the frontier block is not in the live-in set for the alloca, don't 707 // bother processing it. 708 if (!LiveInBlocks.count(*P)) 709 continue; 710 711 DFBlocks.push_back(std::make_pair(BBNumbers[*P], *P)); 712 } 713 714 // Sort by which the block ordering in the function. 715 if (DFBlocks.size() > 1) 716 std::sort(DFBlocks.begin(), DFBlocks.end()); 717 718 for (unsigned i = 0, e = DFBlocks.size(); i != e; ++i) { 719 BasicBlock *BB = DFBlocks[i].second; 720 if (QueuePhiNode(BB, AllocaNum, CurrentVersion, InsertedPHINodes)) 721 Info.DefiningBlocks.push_back(BB); 722 } 723 DFBlocks.clear(); 724 } 725} 726 727/// RewriteSingleStoreAlloca - If there is only a single store to this value, 728/// replace any loads of it that are directly dominated by the definition with 729/// the value stored. 730void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI, 731 AllocaInfo &Info, 732 LargeBlockInfo &LBI) { 733 StoreInst *OnlyStore = Info.OnlyStore; 734 bool StoringGlobalVal = !isa<Instruction>(OnlyStore->getOperand(0)); 735 BasicBlock *StoreBB = OnlyStore->getParent(); 736 int StoreIndex = -1; 737 738 // Clear out UsingBlocks. We will reconstruct it here if needed. 739 Info.UsingBlocks.clear(); 740 741 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E; ) { 742 Instruction *UserInst = cast<Instruction>(*UI++); 743 if (!isa<LoadInst>(UserInst)) { 744 assert(UserInst == OnlyStore && "Should only have load/stores"); 745 continue; 746 } 747 LoadInst *LI = cast<LoadInst>(UserInst); 748 749 // Okay, if we have a load from the alloca, we want to replace it with the 750 // only value stored to the alloca. We can do this if the value is 751 // dominated by the store. If not, we use the rest of the mem2reg machinery 752 // to insert the phi nodes as needed. 753 if (!StoringGlobalVal) { // Non-instructions are always dominated. 754 if (LI->getParent() == StoreBB) { 755 // If we have a use that is in the same block as the store, compare the 756 // indices of the two instructions to see which one came first. If the 757 // load came before the store, we can't handle it. 758 if (StoreIndex == -1) 759 StoreIndex = LBI.getInstructionIndex(OnlyStore); 760 761 if (unsigned(StoreIndex) > LBI.getInstructionIndex(LI)) { 762 // Can't handle this load, bail out. 763 Info.UsingBlocks.push_back(StoreBB); 764 continue; 765 } 766 767 } else if (LI->getParent() != StoreBB && 768 !dominates(StoreBB, LI->getParent())) { 769 // If the load and store are in different blocks, use BB dominance to 770 // check their relationships. If the store doesn't dom the use, bail 771 // out. 772 Info.UsingBlocks.push_back(LI->getParent()); 773 continue; 774 } 775 } 776 777 // Otherwise, we *can* safely rewrite this load. 778 Value *ReplVal = OnlyStore->getOperand(0); 779 // If the replacement value is the load, this must occur in unreachable 780 // code. 781 if (ReplVal == LI) 782 ReplVal = UndefValue::get(LI->getType()); 783 LI->replaceAllUsesWith(ReplVal); 784 if (AST && LI->getType()->isPointerTy()) 785 AST->deleteValue(LI); 786 LI->eraseFromParent(); 787 LBI.deleteValue(LI); 788 } 789} 790 791namespace { 792 793/// StoreIndexSearchPredicate - This is a helper predicate used to search by the 794/// first element of a pair. 795struct StoreIndexSearchPredicate { 796 bool operator()(const std::pair<unsigned, StoreInst*> &LHS, 797 const std::pair<unsigned, StoreInst*> &RHS) { 798 return LHS.first < RHS.first; 799 } 800}; 801 802} 803 804/// PromoteSingleBlockAlloca - Many allocas are only used within a single basic 805/// block. If this is the case, avoid traversing the CFG and inserting a lot of 806/// potentially useless PHI nodes by just performing a single linear pass over 807/// the basic block using the Alloca. 808/// 809/// If we cannot promote this alloca (because it is read before it is written), 810/// return true. This is necessary in cases where, due to control flow, the 811/// alloca is potentially undefined on some control flow paths. e.g. code like 812/// this is potentially correct: 813/// 814/// for (...) { if (c) { A = undef; undef = B; } } 815/// 816/// ... so long as A is not used before undef is set. 817/// 818void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info, 819 LargeBlockInfo &LBI) { 820 // The trickiest case to handle is when we have large blocks. Because of this, 821 // this code is optimized assuming that large blocks happen. This does not 822 // significantly pessimize the small block case. This uses LargeBlockInfo to 823 // make it efficient to get the index of various operations in the block. 824 825 // Clear out UsingBlocks. We will reconstruct it here if needed. 826 Info.UsingBlocks.clear(); 827 828 // Walk the use-def list of the alloca, getting the locations of all stores. 829 typedef SmallVector<std::pair<unsigned, StoreInst*>, 64> StoresByIndexTy; 830 StoresByIndexTy StoresByIndex; 831 832 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); 833 UI != E; ++UI) 834 if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) 835 StoresByIndex.push_back(std::make_pair(LBI.getInstructionIndex(SI), SI)); 836 837 // If there are no stores to the alloca, just replace any loads with undef. 838 if (StoresByIndex.empty()) { 839 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) 840 if (LoadInst *LI = dyn_cast<LoadInst>(*UI++)) { 841 LI->replaceAllUsesWith(UndefValue::get(LI->getType())); 842 if (AST && LI->getType()->isPointerTy()) 843 AST->deleteValue(LI); 844 LBI.deleteValue(LI); 845 LI->eraseFromParent(); 846 } 847 return; 848 } 849 850 // Sort the stores by their index, making it efficient to do a lookup with a 851 // binary search. 852 std::sort(StoresByIndex.begin(), StoresByIndex.end()); 853 854 // Walk all of the loads from this alloca, replacing them with the nearest 855 // store above them, if any. 856 for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;) { 857 LoadInst *LI = dyn_cast<LoadInst>(*UI++); 858 if (!LI) continue; 859 860 unsigned LoadIdx = LBI.getInstructionIndex(LI); 861 862 // Find the nearest store that has a lower than this load. 863 StoresByIndexTy::iterator I = 864 std::lower_bound(StoresByIndex.begin(), StoresByIndex.end(), 865 std::pair<unsigned, StoreInst*>(LoadIdx, static_cast<StoreInst*>(0)), 866 StoreIndexSearchPredicate()); 867 868 // If there is no store before this load, then we can't promote this load. 869 if (I == StoresByIndex.begin()) { 870 // Can't handle this load, bail out. 871 Info.UsingBlocks.push_back(LI->getParent()); 872 continue; 873 } 874 875 // Otherwise, there was a store before this load, the load takes its value. 876 --I; 877 LI->replaceAllUsesWith(I->second->getOperand(0)); 878 if (AST && LI->getType()->isPointerTy()) 879 AST->deleteValue(LI); 880 LI->eraseFromParent(); 881 LBI.deleteValue(LI); 882 } 883} 884 885// Inserts a llvm.dbg.value instrinsic before the stores to an alloca'd value 886// that has an associated llvm.dbg.decl intrinsic. 887void PromoteMem2Reg::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI, 888 StoreInst *SI) { 889 DIVariable DIVar(DDI->getVariable()); 890 if (!DIVar.Verify()) 891 return; 892 893 if (!DIF) 894 DIF = new DIFactory(*SI->getParent()->getParent()->getParent()); 895 Instruction *DbgVal = DIF->InsertDbgValueIntrinsic(SI->getOperand(0), 0, 896 DIVar, SI); 897 898 // Propagate any debug metadata from the store onto the dbg.value. 899 if (MDNode *SIMD = SI->getMetadata("dbg")) 900 DbgVal->setMetadata("dbg", SIMD); 901 // Otherwise propagate debug metadata from dbg.declare. 902 else if (MDNode *MD = DDI->getMetadata("dbg")) 903 DbgVal->setMetadata("dbg", MD); 904} 905 906// QueuePhiNode - queues a phi-node to be added to a basic-block for a specific 907// Alloca returns true if there wasn't already a phi-node for that variable 908// 909bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, 910 unsigned &Version, 911 SmallPtrSet<PHINode*, 16> &InsertedPHINodes) { 912 // Look up the basic-block in question. 913 PHINode *&PN = NewPhiNodes[std::make_pair(BB, AllocaNo)]; 914 915 // If the BB already has a phi node added for the i'th alloca then we're done! 916 if (PN) return false; 917 918 // Create a PhiNode using the dereferenced type... and add the phi-node to the 919 // BasicBlock. 920 PN = PHINode::Create(Allocas[AllocaNo]->getAllocatedType(), 921 Allocas[AllocaNo]->getName() + "." + Twine(Version++), 922 BB->begin()); 923 ++NumPHIInsert; 924 PhiToAllocaMap[PN] = AllocaNo; 925 PN->reserveOperandSpace(getNumPreds(BB)); 926 927 InsertedPHINodes.insert(PN); 928 929 if (AST && PN->getType()->isPointerTy()) 930 AST->copyValue(PointerAllocaValues[AllocaNo], PN); 931 932 return true; 933} 934 935// RenamePass - Recursively traverse the CFG of the function, renaming loads and 936// stores to the allocas which we are promoting. IncomingVals indicates what 937// value each Alloca contains on exit from the predecessor block Pred. 938// 939void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred, 940 RenamePassData::ValVector &IncomingVals, 941 std::vector<RenamePassData> &Worklist) { 942NextIteration: 943 // If we are inserting any phi nodes into this BB, they will already be in the 944 // block. 945 if (PHINode *APN = dyn_cast<PHINode>(BB->begin())) { 946 // If we have PHI nodes to update, compute the number of edges from Pred to 947 // BB. 948 if (PhiToAllocaMap.count(APN)) { 949 // We want to be able to distinguish between PHI nodes being inserted by 950 // this invocation of mem2reg from those phi nodes that already existed in 951 // the IR before mem2reg was run. We determine that APN is being inserted 952 // because it is missing incoming edges. All other PHI nodes being 953 // inserted by this pass of mem2reg will have the same number of incoming 954 // operands so far. Remember this count. 955 unsigned NewPHINumOperands = APN->getNumOperands(); 956 957 unsigned NumEdges = 0; 958 for (succ_iterator I = succ_begin(Pred), E = succ_end(Pred); I != E; ++I) 959 if (*I == BB) 960 ++NumEdges; 961 assert(NumEdges && "Must be at least one edge from Pred to BB!"); 962 963 // Add entries for all the phis. 964 BasicBlock::iterator PNI = BB->begin(); 965 do { 966 unsigned AllocaNo = PhiToAllocaMap[APN]; 967 968 // Add N incoming values to the PHI node. 969 for (unsigned i = 0; i != NumEdges; ++i) 970 APN->addIncoming(IncomingVals[AllocaNo], Pred); 971 972 // The currently active variable for this block is now the PHI. 973 IncomingVals[AllocaNo] = APN; 974 975 // Get the next phi node. 976 ++PNI; 977 APN = dyn_cast<PHINode>(PNI); 978 if (APN == 0) break; 979 980 // Verify that it is missing entries. If not, it is not being inserted 981 // by this mem2reg invocation so we want to ignore it. 982 } while (APN->getNumOperands() == NewPHINumOperands); 983 } 984 } 985 986 // Don't revisit blocks. 987 if (!Visited.insert(BB)) return; 988 989 for (BasicBlock::iterator II = BB->begin(); !isa<TerminatorInst>(II); ) { 990 Instruction *I = II++; // get the instruction, increment iterator 991 992 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 993 AllocaInst *Src = dyn_cast<AllocaInst>(LI->getPointerOperand()); 994 if (!Src) continue; 995 996 std::map<AllocaInst*, unsigned>::iterator AI = AllocaLookup.find(Src); 997 if (AI == AllocaLookup.end()) continue; 998 999 Value *V = IncomingVals[AI->second]; 1000 1001 // Anything using the load now uses the current value. 1002 LI->replaceAllUsesWith(V); 1003 if (AST && LI->getType()->isPointerTy()) 1004 AST->deleteValue(LI); 1005 BB->getInstList().erase(LI); 1006 } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 1007 // Delete this instruction and mark the name as the current holder of the 1008 // value 1009 AllocaInst *Dest = dyn_cast<AllocaInst>(SI->getPointerOperand()); 1010 if (!Dest) continue; 1011 1012 std::map<AllocaInst *, unsigned>::iterator ai = AllocaLookup.find(Dest); 1013 if (ai == AllocaLookup.end()) 1014 continue; 1015 1016 // what value were we writing? 1017 IncomingVals[ai->second] = SI->getOperand(0); 1018 // Record debuginfo for the store before removing it. 1019 if (DbgDeclareInst *DDI = AllocaDbgDeclares[ai->second]) 1020 ConvertDebugDeclareToDebugValue(DDI, SI); 1021 BB->getInstList().erase(SI); 1022 } 1023 } 1024 1025 // 'Recurse' to our successors. 1026 succ_iterator I = succ_begin(BB), E = succ_end(BB); 1027 if (I == E) return; 1028 1029 // Keep track of the successors so we don't visit the same successor twice 1030 SmallPtrSet<BasicBlock*, 8> VisitedSuccs; 1031 1032 // Handle the first successor without using the worklist. 1033 VisitedSuccs.insert(*I); 1034 Pred = BB; 1035 BB = *I; 1036 ++I; 1037 1038 for (; I != E; ++I) 1039 if (VisitedSuccs.insert(*I)) 1040 Worklist.push_back(RenamePassData(*I, Pred, IncomingVals)); 1041 1042 goto NextIteration; 1043} 1044 1045/// PromoteMemToReg - Promote the specified list of alloca instructions into 1046/// scalar registers, inserting PHI nodes as appropriate. This function makes 1047/// use of DominanceFrontier information. This function does not modify the CFG 1048/// of the function at all. All allocas must be from the same function. 1049/// 1050/// If AST is specified, the specified tracker is updated to reflect changes 1051/// made to the IR. 1052/// 1053void llvm::PromoteMemToReg(const std::vector<AllocaInst*> &Allocas, 1054 DominatorTree &DT, DominanceFrontier &DF, 1055 AliasSetTracker *AST) { 1056 // If there is nothing to do, bail out... 1057 if (Allocas.empty()) return; 1058 1059 PromoteMem2Reg(Allocas, DT, DF, AST).run(); 1060} 1061