LoopRotation.cpp revision 198892
1193323Sed//===- LoopRotation.cpp - Loop Rotation Pass ------------------------------===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This file implements Loop Rotation Pass. 11193323Sed// 12193323Sed//===----------------------------------------------------------------------===// 13193323Sed 14193323Sed#define DEBUG_TYPE "loop-rotate" 15193323Sed#include "llvm/Transforms/Scalar.h" 16193323Sed#include "llvm/Function.h" 17193323Sed#include "llvm/IntrinsicInst.h" 18193323Sed#include "llvm/Analysis/LoopInfo.h" 19193323Sed#include "llvm/Analysis/LoopPass.h" 20193323Sed#include "llvm/Analysis/Dominators.h" 21193323Sed#include "llvm/Analysis/ScalarEvolution.h" 22193323Sed#include "llvm/Transforms/Utils/Local.h" 23193323Sed#include "llvm/Transforms/Utils/BasicBlockUtils.h" 24198892Srdivacky#include "llvm/Transforms/Utils/SSAUpdater.h" 25193323Sed#include "llvm/Support/CommandLine.h" 26193323Sed#include "llvm/Support/Debug.h" 27193323Sed#include "llvm/ADT/Statistic.h" 28193323Sed#include "llvm/ADT/SmallVector.h" 29193323Sedusing namespace llvm; 30193323Sed 31193323Sed#define MAX_HEADER_SIZE 16 32193323Sed 33193323SedSTATISTIC(NumRotated, "Number of loops rotated"); 34193323Sednamespace { 35193323Sed 36198090Srdivacky class LoopRotate : public LoopPass { 37193323Sed public: 38193323Sed static char ID; // Pass ID, replacement for typeid 39193323Sed LoopRotate() : LoopPass(&ID) {} 40193323Sed 41193323Sed // Rotate Loop L as many times as possible. Return true if 42193323Sed // loop is rotated at least once. 43193323Sed bool runOnLoop(Loop *L, LPPassManager &LPM); 44193323Sed 45193323Sed // LCSSA form makes instruction renaming easier. 46193323Sed virtual void getAnalysisUsage(AnalysisUsage &AU) const { 47193323Sed AU.addRequiredID(LoopSimplifyID); 48193323Sed AU.addPreservedID(LoopSimplifyID); 49193323Sed AU.addRequiredID(LCSSAID); 50193323Sed AU.addPreservedID(LCSSAID); 51193323Sed AU.addPreserved<ScalarEvolution>(); 52193323Sed AU.addPreserved<LoopInfo>(); 53193323Sed AU.addPreserved<DominatorTree>(); 54193323Sed AU.addPreserved<DominanceFrontier>(); 55193323Sed } 56193323Sed 57193323Sed // Helper functions 58193323Sed 59193323Sed /// Do actual work 60193323Sed bool rotateLoop(Loop *L, LPPassManager &LPM); 61193323Sed 62193323Sed /// Initialize local data 63193323Sed void initialize(); 64193323Sed 65193323Sed /// After loop rotation, loop pre-header has multiple sucessors. 66193323Sed /// Insert one forwarding basic block to ensure that loop pre-header 67193323Sed /// has only one successor. 68193323Sed void preserveCanonicalLoopForm(LPPassManager &LPM); 69193323Sed 70193323Sed private: 71193323Sed Loop *L; 72193323Sed BasicBlock *OrigHeader; 73193323Sed BasicBlock *OrigPreHeader; 74193323Sed BasicBlock *OrigLatch; 75193323Sed BasicBlock *NewHeader; 76193323Sed BasicBlock *Exit; 77193323Sed LPPassManager *LPM_Ptr; 78193323Sed }; 79193323Sed} 80193323Sed 81193323Sedchar LoopRotate::ID = 0; 82193323Sedstatic RegisterPass<LoopRotate> X("loop-rotate", "Rotate Loops"); 83193323Sed 84193323SedPass *llvm::createLoopRotatePass() { return new LoopRotate(); } 85193323Sed 86193323Sed/// Rotate Loop L as many times as possible. Return true if 87195098Sed/// the loop is rotated at least once. 88193323Sedbool LoopRotate::runOnLoop(Loop *Lp, LPPassManager &LPM) { 89193323Sed 90193323Sed bool RotatedOneLoop = false; 91193323Sed initialize(); 92193323Sed LPM_Ptr = &LPM; 93193323Sed 94193323Sed // One loop can be rotated multiple times. 95193323Sed while (rotateLoop(Lp,LPM)) { 96193323Sed RotatedOneLoop = true; 97193323Sed initialize(); 98193323Sed } 99193323Sed 100193323Sed return RotatedOneLoop; 101193323Sed} 102193323Sed 103193323Sed/// Rotate loop LP. Return true if the loop is rotated. 104193323Sedbool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) { 105193323Sed L = Lp; 106193323Sed 107193323Sed OrigHeader = L->getHeader(); 108193323Sed OrigPreHeader = L->getLoopPreheader(); 109193323Sed OrigLatch = L->getLoopLatch(); 110193323Sed 111195098Sed // If the loop has only one block then there is not much to rotate. 112193323Sed if (L->getBlocks().size() == 1) 113193323Sed return false; 114193323Sed 115193323Sed assert(OrigHeader && OrigLatch && OrigPreHeader && 116193323Sed "Loop is not in canonical form"); 117193323Sed 118195098Sed // If the loop header is not one of the loop exiting blocks then 119195098Sed // either this loop is already rotated or it is not 120193323Sed // suitable for loop rotation transformations. 121198892Srdivacky if (!L->isLoopExiting(OrigHeader)) 122193323Sed return false; 123193323Sed 124193323Sed BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator()); 125193323Sed if (!BI) 126193323Sed return false; 127193323Sed assert(BI->isConditional() && "Branch Instruction is not conditional"); 128193323Sed 129193323Sed // Updating PHInodes in loops with multiple exits adds complexity. 130193323Sed // Keep it simple, and restrict loop rotation to loops with one exit only. 131193323Sed // In future, lift this restriction and support for multiple exits if 132193323Sed // required. 133193323Sed SmallVector<BasicBlock*, 8> ExitBlocks; 134193323Sed L->getExitBlocks(ExitBlocks); 135193323Sed if (ExitBlocks.size() > 1) 136193323Sed return false; 137193323Sed 138193323Sed // Check size of original header and reject 139193323Sed // loop if it is very big. 140193323Sed unsigned Size = 0; 141193323Sed 142193323Sed // FIXME: Use common api to estimate size. 143193323Sed for (BasicBlock::const_iterator OI = OrigHeader->begin(), 144193323Sed OE = OrigHeader->end(); OI != OE; ++OI) { 145193323Sed if (isa<PHINode>(OI)) 146193323Sed continue; // PHI nodes don't count. 147193323Sed if (isa<DbgInfoIntrinsic>(OI)) 148193323Sed continue; // Debug intrinsics don't count as size. 149193323Sed Size++; 150193323Sed } 151193323Sed 152193323Sed if (Size > MAX_HEADER_SIZE) 153193323Sed return false; 154193323Sed 155193323Sed // Now, this loop is suitable for rotation. 156193323Sed 157198090Srdivacky // Anything ScalarEvolution may know about this loop or the PHI nodes 158198090Srdivacky // in its header will soon be invalidated. 159198090Srdivacky if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>()) 160198892Srdivacky SE->forgetLoop(L); 161198090Srdivacky 162193323Sed // Find new Loop header. NewHeader is a Header's one and only successor 163193323Sed // that is inside loop. Header's other successor is outside the 164193323Sed // loop. Otherwise loop is not suitable for rotation. 165193323Sed Exit = BI->getSuccessor(0); 166193323Sed NewHeader = BI->getSuccessor(1); 167193323Sed if (L->contains(Exit)) 168193323Sed std::swap(Exit, NewHeader); 169193323Sed assert(NewHeader && "Unable to determine new loop header"); 170193323Sed assert(L->contains(NewHeader) && !L->contains(Exit) && 171193323Sed "Unable to determine loop header and exit blocks"); 172193323Sed 173195098Sed // This code assumes that the new header has exactly one predecessor. 174195098Sed // Remove any single-entry PHI nodes in it. 175193323Sed assert(NewHeader->getSinglePredecessor() && 176193323Sed "New header doesn't have one pred!"); 177193323Sed FoldSingleEntryPHINodes(NewHeader); 178193323Sed 179198892Srdivacky // Begin by walking OrigHeader and populating ValueMap with an entry for 180198892Srdivacky // each Instruction. 181193323Sed BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end(); 182198892Srdivacky DenseMap<const Value *, Value *> ValueMap; 183193323Sed 184198892Srdivacky // For PHI nodes, the value available in OldPreHeader is just the 185198892Srdivacky // incoming value from OldPreHeader. 186198892Srdivacky for (; PHINode *PN = dyn_cast<PHINode>(I); ++I) 187198892Srdivacky ValueMap[PN] = PN->getIncomingValue(PN->getBasicBlockIndex(OrigPreHeader)); 188193323Sed 189198892Srdivacky // For the rest of the instructions, create a clone in the OldPreHeader. 190198892Srdivacky TerminatorInst *LoopEntryBranch = OrigPreHeader->getTerminator(); 191193323Sed for (; I != E; ++I) { 192198892Srdivacky Instruction *C = I->clone(); 193198892Srdivacky C->setName(I->getName()); 194198892Srdivacky C->insertBefore(LoopEntryBranch); 195198892Srdivacky ValueMap[I] = C; 196198892Srdivacky } 197193323Sed 198198892Srdivacky // Along with all the other instructions, we just cloned OrigHeader's 199198892Srdivacky // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's 200198892Srdivacky // successors by duplicating their incoming values for OrigHeader. 201198892Srdivacky TerminatorInst *TI = OrigHeader->getTerminator(); 202198892Srdivacky for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) 203198892Srdivacky for (BasicBlock::iterator BI = TI->getSuccessor(i)->begin(); 204198892Srdivacky PHINode *PN = dyn_cast<PHINode>(BI); ++BI) 205198892Srdivacky PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreHeader); 206193323Sed 207198892Srdivacky // Now that OrigPreHeader has a clone of OrigHeader's terminator, remove 208198892Srdivacky // OrigPreHeader's old terminator (the original branch into the loop), and 209198892Srdivacky // remove the corresponding incoming values from the PHI nodes in OrigHeader. 210198892Srdivacky LoopEntryBranch->eraseFromParent(); 211198892Srdivacky for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I) 212198892Srdivacky PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreHeader)); 213193323Sed 214198892Srdivacky // Now fix up users of the instructions in OrigHeader, inserting PHI nodes 215198892Srdivacky // as necessary. 216198892Srdivacky SSAUpdater SSA; 217198892Srdivacky for (I = OrigHeader->begin(); I != E; ++I) { 218198892Srdivacky Value *OrigHeaderVal = I; 219198892Srdivacky Value *OrigPreHeaderVal = ValueMap[OrigHeaderVal]; 220193323Sed 221198892Srdivacky // The value now exits in two versions: the initial value in the preheader 222198892Srdivacky // and the loop "next" value in the original header. 223198892Srdivacky SSA.Initialize(OrigHeaderVal); 224198892Srdivacky SSA.AddAvailableValue(OrigHeader, OrigHeaderVal); 225198892Srdivacky SSA.AddAvailableValue(OrigPreHeader, OrigPreHeaderVal); 226193323Sed 227198892Srdivacky // Visit each use of the OrigHeader instruction. 228198892Srdivacky for (Value::use_iterator UI = OrigHeaderVal->use_begin(), 229198892Srdivacky UE = OrigHeaderVal->use_end(); UI != UE; ) { 230198892Srdivacky // Grab the use before incrementing the iterator. 231198892Srdivacky Use &U = UI.getUse(); 232193323Sed 233198892Srdivacky // Increment the iterator before removing the use from the list. 234198892Srdivacky ++UI; 235193323Sed 236198892Srdivacky // SSAUpdater can't handle a non-PHI use in the same block as an 237198892Srdivacky // earlier def. We can easily handle those cases manually. 238198892Srdivacky Instruction *UserInst = cast<Instruction>(U.getUser()); 239198892Srdivacky if (!isa<PHINode>(UserInst)) { 240198892Srdivacky BasicBlock *UserBB = UserInst->getParent(); 241193323Sed 242198892Srdivacky // The original users in the OrigHeader are already using the 243198892Srdivacky // original definitions. 244198892Srdivacky if (UserBB == OrigHeader) 245193323Sed continue; 246193323Sed 247198892Srdivacky // Users in the OrigPreHeader need to use the value to which the 248198892Srdivacky // original definitions are mapped. 249198892Srdivacky if (UserBB == OrigPreHeader) { 250198892Srdivacky U = OrigPreHeaderVal; 251193323Sed continue; 252198892Srdivacky } 253193323Sed } 254193323Sed 255198892Srdivacky // Anything else can be handled by SSAUpdater. 256198892Srdivacky SSA.RewriteUse(U); 257193323Sed } 258193323Sed } 259193323Sed 260198892Srdivacky // NewHeader is now the header of the loop. 261193323Sed L->moveToHeader(NewHeader); 262193323Sed 263193323Sed preserveCanonicalLoopForm(LPM); 264193323Sed 265193323Sed NumRotated++; 266193323Sed return true; 267193323Sed} 268193323Sed 269193323Sed/// Initialize local data 270193323Sedvoid LoopRotate::initialize() { 271193323Sed L = NULL; 272193323Sed OrigHeader = NULL; 273193323Sed OrigPreHeader = NULL; 274193323Sed NewHeader = NULL; 275193323Sed Exit = NULL; 276193323Sed} 277193323Sed 278193323Sed/// After loop rotation, loop pre-header has multiple sucessors. 279193323Sed/// Insert one forwarding basic block to ensure that loop pre-header 280193323Sed/// has only one successor. 281193323Sedvoid LoopRotate::preserveCanonicalLoopForm(LPPassManager &LPM) { 282193323Sed 283193323Sed // Right now original pre-header has two successors, new header and 284193323Sed // exit block. Insert new block between original pre-header and 285193323Sed // new header such that loop's new pre-header has only one successor. 286198090Srdivacky BasicBlock *NewPreHeader = BasicBlock::Create(OrigHeader->getContext(), 287198090Srdivacky "bb.nph", 288193323Sed OrigHeader->getParent(), 289193323Sed NewHeader); 290193323Sed LoopInfo &LI = LPM.getAnalysis<LoopInfo>(); 291193323Sed if (Loop *PL = LI.getLoopFor(OrigPreHeader)) 292193323Sed PL->addBasicBlockToLoop(NewPreHeader, LI.getBase()); 293193323Sed BranchInst::Create(NewHeader, NewPreHeader); 294193323Sed 295193323Sed BranchInst *OrigPH_BI = cast<BranchInst>(OrigPreHeader->getTerminator()); 296193323Sed if (OrigPH_BI->getSuccessor(0) == NewHeader) 297193323Sed OrigPH_BI->setSuccessor(0, NewPreHeader); 298193323Sed else { 299193323Sed assert(OrigPH_BI->getSuccessor(1) == NewHeader && 300193323Sed "Unexpected original pre-header terminator"); 301193323Sed OrigPH_BI->setSuccessor(1, NewPreHeader); 302193323Sed } 303193323Sed 304195340Sed PHINode *PN; 305195340Sed for (BasicBlock::iterator I = NewHeader->begin(); 306195340Sed (PN = dyn_cast<PHINode>(I)); ++I) { 307193323Sed int index = PN->getBasicBlockIndex(OrigPreHeader); 308193323Sed assert(index != -1 && "Expected incoming value from Original PreHeader"); 309193323Sed PN->setIncomingBlock(index, NewPreHeader); 310193323Sed assert(PN->getBasicBlockIndex(OrigPreHeader) == -1 && 311193323Sed "Expected only one incoming value from Original PreHeader"); 312193323Sed } 313193323Sed 314193323Sed if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) { 315193323Sed DT->addNewBlock(NewPreHeader, OrigPreHeader); 316193323Sed DT->changeImmediateDominator(L->getHeader(), NewPreHeader); 317193323Sed DT->changeImmediateDominator(Exit, OrigPreHeader); 318193323Sed for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end(); 319193323Sed BI != BE; ++BI) { 320193323Sed BasicBlock *B = *BI; 321193323Sed if (L->getHeader() != B) { 322193323Sed DomTreeNode *Node = DT->getNode(B); 323193323Sed if (Node && Node->getBlock() == OrigHeader) 324193323Sed DT->changeImmediateDominator(*BI, L->getHeader()); 325193323Sed } 326193323Sed } 327193323Sed DT->changeImmediateDominator(OrigHeader, OrigLatch); 328193323Sed } 329193323Sed 330193323Sed if (DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>()) { 331193323Sed // New Preheader's dominance frontier is Exit block. 332193323Sed DominanceFrontier::DomSetType NewPHSet; 333193323Sed NewPHSet.insert(Exit); 334193323Sed DF->addBasicBlock(NewPreHeader, NewPHSet); 335193323Sed 336193323Sed // New Header's dominance frontier now includes itself and Exit block 337193323Sed DominanceFrontier::iterator HeadI = DF->find(L->getHeader()); 338193323Sed if (HeadI != DF->end()) { 339193323Sed DominanceFrontier::DomSetType & HeaderSet = HeadI->second; 340193323Sed HeaderSet.clear(); 341193323Sed HeaderSet.insert(L->getHeader()); 342193323Sed HeaderSet.insert(Exit); 343193323Sed } else { 344193323Sed DominanceFrontier::DomSetType HeaderSet; 345193323Sed HeaderSet.insert(L->getHeader()); 346193323Sed HeaderSet.insert(Exit); 347193323Sed DF->addBasicBlock(L->getHeader(), HeaderSet); 348193323Sed } 349193323Sed 350193323Sed // Original header (new Loop Latch)'s dominance frontier is Exit. 351193323Sed DominanceFrontier::iterator LatchI = DF->find(L->getLoopLatch()); 352193323Sed if (LatchI != DF->end()) { 353193323Sed DominanceFrontier::DomSetType &LatchSet = LatchI->second; 354193323Sed LatchSet = LatchI->second; 355193323Sed LatchSet.clear(); 356193323Sed LatchSet.insert(Exit); 357193323Sed } else { 358193323Sed DominanceFrontier::DomSetType LatchSet; 359193323Sed LatchSet.insert(Exit); 360193323Sed DF->addBasicBlock(L->getHeader(), LatchSet); 361193323Sed } 362193323Sed 363198090Srdivacky // If a loop block dominates new loop latch then add to its frontiers 364198090Srdivacky // new header and Exit and remove new latch (which is equal to original 365198090Srdivacky // header). 366193323Sed BasicBlock *NewLatch = L->getLoopLatch(); 367198090Srdivacky 368198090Srdivacky assert(NewLatch == OrigHeader && "NewLatch is inequal to OrigHeader"); 369198090Srdivacky 370198090Srdivacky if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) { 371198090Srdivacky for (Loop::block_iterator BI = L->block_begin(), BE = L->block_end(); 372198090Srdivacky BI != BE; ++BI) { 373198090Srdivacky BasicBlock *B = *BI; 374198090Srdivacky if (DT->dominates(B, NewLatch)) { 375198090Srdivacky DominanceFrontier::iterator BDFI = DF->find(B); 376198090Srdivacky if (BDFI != DF->end()) { 377198090Srdivacky DominanceFrontier::DomSetType &BSet = BDFI->second; 378198090Srdivacky BSet.erase(NewLatch); 379198090Srdivacky BSet.insert(L->getHeader()); 380198090Srdivacky BSet.insert(Exit); 381198090Srdivacky } else { 382198090Srdivacky DominanceFrontier::DomSetType BSet; 383198090Srdivacky BSet.insert(L->getHeader()); 384198090Srdivacky BSet.insert(Exit); 385198090Srdivacky DF->addBasicBlock(B, BSet); 386198090Srdivacky } 387193323Sed } 388193323Sed } 389193323Sed } 390193323Sed } 391193323Sed 392193323Sed // Preserve canonical loop form, which means Exit block should 393193323Sed // have only one predecessor. 394198090Srdivacky SplitEdge(L->getLoopLatch(), Exit, this); 395193323Sed 396193323Sed assert(NewHeader && L->getHeader() == NewHeader && 397193323Sed "Invalid loop header after loop rotation"); 398193323Sed assert(NewPreHeader && L->getLoopPreheader() == NewPreHeader && 399193323Sed "Invalid loop preheader after loop rotation"); 400193323Sed assert(L->getLoopLatch() && 401193323Sed "Invalid loop latch after loop rotation"); 402193323Sed} 403