1193323Sed//===- LoopRotation.cpp - Loop Rotation Pass ------------------------------===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This file implements Loop Rotation Pass. 11193323Sed// 12193323Sed//===----------------------------------------------------------------------===// 13193323Sed 14193323Sed#include "llvm/Transforms/Scalar.h" 15249423Sdim#include "llvm/ADT/Statistic.h" 16296417Sdim#include "llvm/Analysis/AliasAnalysis.h" 17296417Sdim#include "llvm/Analysis/BasicAliasAnalysis.h" 18280031Sdim#include "llvm/Analysis/AssumptionCache.h" 19218893Sdim#include "llvm/Analysis/CodeMetrics.h" 20249423Sdim#include "llvm/Analysis/InstructionSimplify.h" 21296417Sdim#include "llvm/Analysis/GlobalsModRef.h" 22193323Sed#include "llvm/Analysis/LoopPass.h" 23193323Sed#include "llvm/Analysis/ScalarEvolution.h" 24296417Sdim#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" 25249423Sdim#include "llvm/Analysis/TargetTransformInfo.h" 26234353Sdim#include "llvm/Analysis/ValueTracking.h" 27276479Sdim#include "llvm/IR/CFG.h" 28276479Sdim#include "llvm/IR/Dominators.h" 29249423Sdim#include "llvm/IR/Function.h" 30249423Sdim#include "llvm/IR/IntrinsicInst.h" 31288943Sdim#include "llvm/IR/Module.h" 32276479Sdim#include "llvm/Support/CommandLine.h" 33249423Sdim#include "llvm/Support/Debug.h" 34288943Sdim#include "llvm/Support/raw_ostream.h" 35249423Sdim#include "llvm/Transforms/Utils/BasicBlockUtils.h" 36193323Sed#include "llvm/Transforms/Utils/Local.h" 37198892Srdivacky#include "llvm/Transforms/Utils/SSAUpdater.h" 38218893Sdim#include "llvm/Transforms/Utils/ValueMapper.h" 39193323Sedusing namespace llvm; 40193323Sed 41276479Sdim#define DEBUG_TYPE "loop-rotate" 42193323Sed 43276479Sdimstatic cl::opt<unsigned> 44276479SdimDefaultRotationThreshold("rotation-max-header-size", cl::init(16), cl::Hidden, 45276479Sdim cl::desc("The default maximum header size for automatic loop rotation")); 46276479Sdim 47193323SedSTATISTIC(NumRotated, "Number of loops rotated"); 48193323Sed 49218893Sdim/// RewriteUsesOfClonedInstructions - We just cloned the instructions from the 50218893Sdim/// old header into the preheader. If there were uses of the values produced by 51218893Sdim/// these instruction that were outside of the loop, we have to insert PHI nodes 52218893Sdim/// to merge the two values. Do this now. 53218893Sdimstatic void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader, 54218893Sdim BasicBlock *OrigPreheader, 55218893Sdim ValueToValueMapTy &ValueMap) { 56218893Sdim // Remove PHI node entries that are no longer live. 57218893Sdim BasicBlock::iterator I, E = OrigHeader->end(); 58218893Sdim for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I) 59218893Sdim PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreheader)); 60234353Sdim 61218893Sdim // Now fix up users of the instructions in OrigHeader, inserting PHI nodes 62218893Sdim // as necessary. 63218893Sdim SSAUpdater SSA; 64218893Sdim for (I = OrigHeader->begin(); I != E; ++I) { 65296417Sdim Value *OrigHeaderVal = &*I; 66234353Sdim 67218893Sdim // If there are no uses of the value (e.g. because it returns void), there 68218893Sdim // is nothing to rewrite. 69218893Sdim if (OrigHeaderVal->use_empty()) 70218893Sdim continue; 71234353Sdim 72218893Sdim Value *OrigPreHeaderVal = ValueMap[OrigHeaderVal]; 73193323Sed 74218893Sdim // The value now exits in two versions: the initial value in the preheader 75218893Sdim // and the loop "next" value in the original header. 76218893Sdim SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName()); 77218893Sdim SSA.AddAvailableValue(OrigHeader, OrigHeaderVal); 78218893Sdim SSA.AddAvailableValue(OrigPreheader, OrigPreHeaderVal); 79234353Sdim 80218893Sdim // Visit each use of the OrigHeader instruction. 81218893Sdim for (Value::use_iterator UI = OrigHeaderVal->use_begin(), 82218893Sdim UE = OrigHeaderVal->use_end(); UI != UE; ) { 83218893Sdim // Grab the use before incrementing the iterator. 84276479Sdim Use &U = *UI; 85234353Sdim 86218893Sdim // Increment the iterator before removing the use from the list. 87218893Sdim ++UI; 88234353Sdim 89218893Sdim // SSAUpdater can't handle a non-PHI use in the same block as an 90218893Sdim // earlier def. We can easily handle those cases manually. 91218893Sdim Instruction *UserInst = cast<Instruction>(U.getUser()); 92218893Sdim if (!isa<PHINode>(UserInst)) { 93218893Sdim BasicBlock *UserBB = UserInst->getParent(); 94234353Sdim 95218893Sdim // The original users in the OrigHeader are already using the 96218893Sdim // original definitions. 97218893Sdim if (UserBB == OrigHeader) 98218893Sdim continue; 99234353Sdim 100218893Sdim // Users in the OrigPreHeader need to use the value to which the 101218893Sdim // original definitions are mapped. 102218893Sdim if (UserBB == OrigPreheader) { 103218893Sdim U = OrigPreHeaderVal; 104218893Sdim continue; 105218893Sdim } 106218893Sdim } 107234353Sdim 108218893Sdim // Anything else can be handled by SSAUpdater. 109218893Sdim SSA.RewriteUse(U); 110218893Sdim } 111218893Sdim } 112234353Sdim} 113199481Srdivacky 114218893Sdim/// Rotate loop LP. Return true if the loop is rotated. 115251662Sdim/// 116251662Sdim/// \param SimplifiedLatch is true if the latch was just folded into the final 117251662Sdim/// loop exit. In this case we may want to rotate even though the new latch is 118251662Sdim/// now an exiting branch. This rotation would have happened had the latch not 119251662Sdim/// been simplified. However, if SimplifiedLatch is false, then we avoid 120251662Sdim/// rotating loops in which the latch exits to avoid excessive or endless 121251662Sdim/// rotation. LoopRotate should be repeatable and converge to a canonical 122251662Sdim/// form. This property is satisfied because simplifying the loop latch can only 123251662Sdim/// happen once across multiple invocations of the LoopRotate pass. 124296417Sdimstatic bool rotateLoop(Loop *L, unsigned MaxHeaderSize, LoopInfo *LI, 125296417Sdim const TargetTransformInfo *TTI, AssumptionCache *AC, 126296417Sdim DominatorTree *DT, ScalarEvolution *SE, 127296417Sdim bool SimplifiedLatch) { 128195098Sed // If the loop has only one block then there is not much to rotate. 129193323Sed if (L->getBlocks().size() == 1) 130193323Sed return false; 131234353Sdim 132218893Sdim BasicBlock *OrigHeader = L->getHeader(); 133243830Sdim BasicBlock *OrigLatch = L->getLoopLatch(); 134234353Sdim 135218893Sdim BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator()); 136276479Sdim if (!BI || BI->isUnconditional()) 137218893Sdim return false; 138234353Sdim 139195098Sed // If the loop header is not one of the loop exiting blocks then 140195098Sed // either this loop is already rotated or it is not 141193323Sed // suitable for loop rotation transformations. 142198892Srdivacky if (!L->isLoopExiting(OrigHeader)) 143193323Sed return false; 144193323Sed 145243830Sdim // If the loop latch already contains a branch that leaves the loop then the 146243830Sdim // loop is already rotated. 147276479Sdim if (!OrigLatch) 148193323Sed return false; 149193323Sed 150251662Sdim // Rotate if either the loop latch does *not* exit the loop, or if the loop 151251662Sdim // latch was just simplified. 152251662Sdim if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch) 153251662Sdim return false; 154251662Sdim 155249423Sdim // Check size of original header and reject loop if it is very big or we can't 156249423Sdim // duplicate blocks inside it. 157218893Sdim { 158280031Sdim SmallPtrSet<const Value *, 32> EphValues; 159280031Sdim CodeMetrics::collectEphemeralValues(L, AC, EphValues); 160280031Sdim 161218893Sdim CodeMetrics Metrics; 162280031Sdim Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues); 163249423Sdim if (Metrics.notDuplicatable) { 164276479Sdim DEBUG(dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable" 165249423Sdim << " instructions: "; L->dump()); 166249423Sdim return false; 167249423Sdim } 168276479Sdim if (Metrics.NumInsts > MaxHeaderSize) 169218893Sdim return false; 170193323Sed } 171193323Sed 172193323Sed // Now, this loop is suitable for rotation. 173218893Sdim BasicBlock *OrigPreheader = L->getLoopPreheader(); 174234353Sdim 175221345Sdim // If the loop could not be converted to canonical form, it must have an 176221345Sdim // indirectbr in it, just give up. 177276479Sdim if (!OrigPreheader) 178221345Sdim return false; 179193323Sed 180198090Srdivacky // Anything ScalarEvolution may know about this loop or the PHI nodes 181198090Srdivacky // in its header will soon be invalidated. 182296417Sdim if (SE) 183198892Srdivacky SE->forgetLoop(L); 184198090Srdivacky 185243830Sdim DEBUG(dbgs() << "LoopRotation: rotating "; L->dump()); 186243830Sdim 187193323Sed // Find new Loop header. NewHeader is a Header's one and only successor 188193323Sed // that is inside loop. Header's other successor is outside the 189193323Sed // loop. Otherwise loop is not suitable for rotation. 190218893Sdim BasicBlock *Exit = BI->getSuccessor(0); 191218893Sdim BasicBlock *NewHeader = BI->getSuccessor(1); 192193323Sed if (L->contains(Exit)) 193193323Sed std::swap(Exit, NewHeader); 194193323Sed assert(NewHeader && "Unable to determine new loop header"); 195234353Sdim assert(L->contains(NewHeader) && !L->contains(Exit) && 196193323Sed "Unable to determine loop header and exit blocks"); 197234353Sdim 198195098Sed // This code assumes that the new header has exactly one predecessor. 199195098Sed // Remove any single-entry PHI nodes in it. 200193323Sed assert(NewHeader->getSinglePredecessor() && 201193323Sed "New header doesn't have one pred!"); 202193323Sed FoldSingleEntryPHINodes(NewHeader); 203193323Sed 204198892Srdivacky // Begin by walking OrigHeader and populating ValueMap with an entry for 205198892Srdivacky // each Instruction. 206193323Sed BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end(); 207218893Sdim ValueToValueMapTy ValueMap; 208193323Sed 209198892Srdivacky // For PHI nodes, the value available in OldPreHeader is just the 210198892Srdivacky // incoming value from OldPreHeader. 211198892Srdivacky for (; PHINode *PN = dyn_cast<PHINode>(I); ++I) 212224145Sdim ValueMap[PN] = PN->getIncomingValueForBlock(OrigPreheader); 213193323Sed 214288943Sdim const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); 215288943Sdim 216218893Sdim // For the rest of the instructions, either hoist to the OrigPreheader if 217218893Sdim // possible or create a clone in the OldPreHeader if not. 218218893Sdim TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator(); 219218893Sdim while (I != E) { 220296417Sdim Instruction *Inst = &*I++; 221234353Sdim 222218893Sdim // If the instruction's operands are invariant and it doesn't read or write 223218893Sdim // memory, then it is safe to hoist. Doing this doesn't change the order of 224218893Sdim // execution in the preheader, but does prevent the instruction from 225218893Sdim // executing in each iteration of the loop. This means it is safe to hoist 226218893Sdim // something that might trap, but isn't safe to hoist something that reads 227218893Sdim // memory (without proving that the loop doesn't write). 228218893Sdim if (L->hasLoopInvariantOperands(Inst) && 229218893Sdim !Inst->mayReadFromMemory() && !Inst->mayWriteToMemory() && 230234353Sdim !isa<TerminatorInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst) && 231234353Sdim !isa<AllocaInst>(Inst)) { 232218893Sdim Inst->moveBefore(LoopEntryBranch); 233218893Sdim continue; 234218893Sdim } 235234353Sdim 236218893Sdim // Otherwise, create a duplicate of the instruction. 237218893Sdim Instruction *C = Inst->clone(); 238234353Sdim 239218893Sdim // Eagerly remap the operands of the instruction. 240218893Sdim RemapInstruction(C, ValueMap, 241218893Sdim RF_NoModuleLevelChanges|RF_IgnoreMissingEntries); 242234353Sdim 243218893Sdim // With the operands remapped, see if the instruction constant folds or is 244218893Sdim // otherwise simplifyable. This commonly occurs because the entry from PHI 245218893Sdim // nodes allows icmps and other instructions to fold. 246288943Sdim // FIXME: Provide TLI, DT, AC to SimplifyInstruction. 247288943Sdim Value *V = SimplifyInstruction(C, DL); 248218893Sdim if (V && LI->replacementPreservesLCSSAForm(C, V)) { 249218893Sdim // If so, then delete the temporary instruction and stick the folded value 250218893Sdim // in the map. 251218893Sdim delete C; 252218893Sdim ValueMap[Inst] = V; 253218893Sdim } else { 254218893Sdim // Otherwise, stick the new instruction into the new block! 255218893Sdim C->setName(Inst->getName()); 256218893Sdim C->insertBefore(LoopEntryBranch); 257218893Sdim ValueMap[Inst] = C; 258218893Sdim } 259198892Srdivacky } 260193323Sed 261198892Srdivacky // Along with all the other instructions, we just cloned OrigHeader's 262198892Srdivacky // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's 263198892Srdivacky // successors by duplicating their incoming values for OrigHeader. 264198892Srdivacky TerminatorInst *TI = OrigHeader->getTerminator(); 265296417Sdim for (BasicBlock *SuccBB : TI->successors()) 266296417Sdim for (BasicBlock::iterator BI = SuccBB->begin(); 267198892Srdivacky PHINode *PN = dyn_cast<PHINode>(BI); ++BI) 268218893Sdim PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader); 269193323Sed 270198892Srdivacky // Now that OrigPreHeader has a clone of OrigHeader's terminator, remove 271198892Srdivacky // OrigPreHeader's old terminator (the original branch into the loop), and 272198892Srdivacky // remove the corresponding incoming values from the PHI nodes in OrigHeader. 273198892Srdivacky LoopEntryBranch->eraseFromParent(); 274193323Sed 275218893Sdim // If there were any uses of instructions in the duplicated block outside the 276218893Sdim // loop, update them, inserting PHI nodes as required 277218893Sdim RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap); 278193323Sed 279198892Srdivacky // NewHeader is now the header of the loop. 280193323Sed L->moveToHeader(NewHeader); 281218893Sdim assert(L->getHeader() == NewHeader && "Latch block is our new header"); 282193323Sed 283234353Sdim 284218893Sdim // At this point, we've finished our major CFG changes. As part of cloning 285218893Sdim // the loop into the preheader we've simplified instructions and the 286218893Sdim // duplicated conditional branch may now be branching on a constant. If it is 287218893Sdim // branching on a constant and if that constant means that we enter the loop, 288218893Sdim // then we fold away the cond branch to an uncond branch. This simplifies the 289218893Sdim // loop in cases important for nested loops, and it also means we don't have 290218893Sdim // to split as many edges. 291218893Sdim BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator()); 292218893Sdim assert(PHBI->isConditional() && "Should be clone of BI condbr!"); 293218893Sdim if (!isa<ConstantInt>(PHBI->getCondition()) || 294218893Sdim PHBI->getSuccessor(cast<ConstantInt>(PHBI->getCondition())->isZero()) 295218893Sdim != NewHeader) { 296218893Sdim // The conditional branch can't be folded, handle the general case. 297218893Sdim // Update DominatorTree to reflect the CFG change we just made. Then split 298218893Sdim // edges as necessary to preserve LoopSimplify form. 299288943Sdim if (DT) { 300243830Sdim // Everything that was dominated by the old loop header is now dominated 301243830Sdim // by the original loop preheader. Conceptually the header was merged 302243830Sdim // into the preheader, even though we reuse the actual block as a new 303243830Sdim // loop latch. 304288943Sdim DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader); 305243830Sdim SmallVector<DomTreeNode *, 8> HeaderChildren(OrigHeaderNode->begin(), 306243830Sdim OrigHeaderNode->end()); 307288943Sdim DomTreeNode *OrigPreheaderNode = DT->getNode(OrigPreheader); 308243830Sdim for (unsigned I = 0, E = HeaderChildren.size(); I != E; ++I) 309288943Sdim DT->changeImmediateDominator(HeaderChildren[I], OrigPreheaderNode); 310234353Sdim 311288943Sdim assert(DT->getNode(Exit)->getIDom() == OrigPreheaderNode); 312288943Sdim assert(DT->getNode(NewHeader)->getIDom() == OrigPreheaderNode); 313243830Sdim 314218893Sdim // Update OrigHeader to be dominated by the new header block. 315288943Sdim DT->changeImmediateDominator(OrigHeader, OrigLatch); 316193323Sed } 317234353Sdim 318218893Sdim // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and 319239462Sdim // thus is not a preheader anymore. 320239462Sdim // Split the edge to form a real preheader. 321288943Sdim BasicBlock *NewPH = SplitCriticalEdge( 322288943Sdim OrigPreheader, NewHeader, 323288943Sdim CriticalEdgeSplittingOptions(DT, LI).setPreserveLCSSA()); 324218893Sdim NewPH->setName(NewHeader->getName() + ".lr.ph"); 325234353Sdim 326239462Sdim // Preserve canonical loop form, which means that 'Exit' should have only 327276479Sdim // one predecessor. Note that Exit could be an exit block for multiple 328276479Sdim // nested loops, causing both of the edges to now be critical and need to 329276479Sdim // be split. 330276479Sdim SmallVector<BasicBlock *, 4> ExitPreds(pred_begin(Exit), pred_end(Exit)); 331276479Sdim bool SplitLatchEdge = false; 332276479Sdim for (SmallVectorImpl<BasicBlock *>::iterator PI = ExitPreds.begin(), 333276479Sdim PE = ExitPreds.end(); 334276479Sdim PI != PE; ++PI) { 335276479Sdim // We only need to split loop exit edges. 336276479Sdim Loop *PredLoop = LI->getLoopFor(*PI); 337276479Sdim if (!PredLoop || PredLoop->contains(Exit)) 338276479Sdim continue; 339279161Sdim if (isa<IndirectBrInst>((*PI)->getTerminator())) 340279161Sdim continue; 341276479Sdim SplitLatchEdge |= L->getLoopLatch() == *PI; 342288943Sdim BasicBlock *ExitSplit = SplitCriticalEdge( 343288943Sdim *PI, Exit, CriticalEdgeSplittingOptions(DT, LI).setPreserveLCSSA()); 344276479Sdim ExitSplit->moveBefore(Exit); 345276479Sdim } 346276479Sdim assert(SplitLatchEdge && 347276479Sdim "Despite splitting all preds, failed to split latch exit?"); 348218893Sdim } else { 349218893Sdim // We can fold the conditional branch in the preheader, this makes things 350218893Sdim // simpler. The first step is to remove the extra edge to the Exit block. 351218893Sdim Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/); 352221345Sdim BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI); 353221345Sdim NewBI->setDebugLoc(PHBI->getDebugLoc()); 354218893Sdim PHBI->eraseFromParent(); 355234353Sdim 356218893Sdim // With our CFG finalized, update DomTree if it is available. 357288943Sdim if (DT) { 358218893Sdim // Update OrigHeader to be dominated by the new header block. 359288943Sdim DT->changeImmediateDominator(NewHeader, OrigPreheader); 360288943Sdim DT->changeImmediateDominator(OrigHeader, OrigLatch); 361243830Sdim 362243830Sdim // Brute force incremental dominator tree update. Call 363243830Sdim // findNearestCommonDominator on all CFG predecessors of each child of the 364243830Sdim // original header. 365288943Sdim DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader); 366243830Sdim SmallVector<DomTreeNode *, 8> HeaderChildren(OrigHeaderNode->begin(), 367243830Sdim OrigHeaderNode->end()); 368243830Sdim bool Changed; 369243830Sdim do { 370243830Sdim Changed = false; 371243830Sdim for (unsigned I = 0, E = HeaderChildren.size(); I != E; ++I) { 372243830Sdim DomTreeNode *Node = HeaderChildren[I]; 373243830Sdim BasicBlock *BB = Node->getBlock(); 374243830Sdim 375243830Sdim pred_iterator PI = pred_begin(BB); 376243830Sdim BasicBlock *NearestDom = *PI; 377243830Sdim for (pred_iterator PE = pred_end(BB); PI != PE; ++PI) 378288943Sdim NearestDom = DT->findNearestCommonDominator(NearestDom, *PI); 379243830Sdim 380243830Sdim // Remember if this changes the DomTree. 381243830Sdim if (Node->getIDom()->getBlock() != NearestDom) { 382288943Sdim DT->changeImmediateDominator(BB, NearestDom); 383243830Sdim Changed = true; 384243830Sdim } 385243830Sdim } 386243830Sdim 387243830Sdim // If the dominator changed, this may have an effect on other 388243830Sdim // predecessors, continue until we reach a fixpoint. 389243830Sdim } while (Changed); 390193323Sed } 391193323Sed } 392234353Sdim 393218893Sdim assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation"); 394218893Sdim assert(L->getLoopLatch() && "Invalid loop latch after loop rotation"); 395193323Sed 396218893Sdim // Now that the CFG and DomTree are in a consistent state again, try to merge 397218893Sdim // the OrigHeader block into OrigLatch. This will succeed if they are 398218893Sdim // connected by an unconditional branch. This is just a cleanup so the 399218893Sdim // emitted code isn't too gross in this common case. 400288943Sdim MergeBlockIntoPredecessor(OrigHeader, DT, LI); 401234353Sdim 402243830Sdim DEBUG(dbgs() << "LoopRotation: into "; L->dump()); 403243830Sdim 404218893Sdim ++NumRotated; 405218893Sdim return true; 406218893Sdim} 407296417Sdim 408296417Sdim/// Determine whether the instructions in this range may be safely and cheaply 409296417Sdim/// speculated. This is not an important enough situation to develop complex 410296417Sdim/// heuristics. We handle a single arithmetic instruction along with any type 411296417Sdim/// conversions. 412296417Sdimstatic bool shouldSpeculateInstrs(BasicBlock::iterator Begin, 413296417Sdim BasicBlock::iterator End, Loop *L) { 414296417Sdim bool seenIncrement = false; 415296417Sdim bool MultiExitLoop = false; 416296417Sdim 417296417Sdim if (!L->getExitingBlock()) 418296417Sdim MultiExitLoop = true; 419296417Sdim 420296417Sdim for (BasicBlock::iterator I = Begin; I != End; ++I) { 421296417Sdim 422296417Sdim if (!isSafeToSpeculativelyExecute(&*I)) 423296417Sdim return false; 424296417Sdim 425296417Sdim if (isa<DbgInfoIntrinsic>(I)) 426296417Sdim continue; 427296417Sdim 428296417Sdim switch (I->getOpcode()) { 429296417Sdim default: 430296417Sdim return false; 431296417Sdim case Instruction::GetElementPtr: 432296417Sdim // GEPs are cheap if all indices are constant. 433296417Sdim if (!cast<GEPOperator>(I)->hasAllConstantIndices()) 434296417Sdim return false; 435296417Sdim // fall-thru to increment case 436296417Sdim case Instruction::Add: 437296417Sdim case Instruction::Sub: 438296417Sdim case Instruction::And: 439296417Sdim case Instruction::Or: 440296417Sdim case Instruction::Xor: 441296417Sdim case Instruction::Shl: 442296417Sdim case Instruction::LShr: 443296417Sdim case Instruction::AShr: { 444296417Sdim Value *IVOpnd = !isa<Constant>(I->getOperand(0)) 445296417Sdim ? I->getOperand(0) 446296417Sdim : !isa<Constant>(I->getOperand(1)) 447296417Sdim ? I->getOperand(1) 448296417Sdim : nullptr; 449296417Sdim if (!IVOpnd) 450296417Sdim return false; 451296417Sdim 452296417Sdim // If increment operand is used outside of the loop, this speculation 453296417Sdim // could cause extra live range interference. 454296417Sdim if (MultiExitLoop) { 455296417Sdim for (User *UseI : IVOpnd->users()) { 456296417Sdim auto *UserInst = cast<Instruction>(UseI); 457296417Sdim if (!L->contains(UserInst)) 458296417Sdim return false; 459296417Sdim } 460296417Sdim } 461296417Sdim 462296417Sdim if (seenIncrement) 463296417Sdim return false; 464296417Sdim seenIncrement = true; 465296417Sdim break; 466296417Sdim } 467296417Sdim case Instruction::Trunc: 468296417Sdim case Instruction::ZExt: 469296417Sdim case Instruction::SExt: 470296417Sdim // ignore type conversions 471296417Sdim break; 472296417Sdim } 473296417Sdim } 474296417Sdim return true; 475296417Sdim} 476296417Sdim 477296417Sdim/// Fold the loop tail into the loop exit by speculating the loop tail 478296417Sdim/// instructions. Typically, this is a single post-increment. In the case of a 479296417Sdim/// simple 2-block loop, hoisting the increment can be much better than 480296417Sdim/// duplicating the entire loop header. In the case of loops with early exits, 481296417Sdim/// rotation will not work anyway, but simplifyLoopLatch will put the loop in 482296417Sdim/// canonical form so downstream passes can handle it. 483296417Sdim/// 484296417Sdim/// I don't believe this invalidates SCEV. 485296417Sdimstatic bool simplifyLoopLatch(Loop *L, LoopInfo *LI, DominatorTree *DT) { 486296417Sdim BasicBlock *Latch = L->getLoopLatch(); 487296417Sdim if (!Latch || Latch->hasAddressTaken()) 488296417Sdim return false; 489296417Sdim 490296417Sdim BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator()); 491296417Sdim if (!Jmp || !Jmp->isUnconditional()) 492296417Sdim return false; 493296417Sdim 494296417Sdim BasicBlock *LastExit = Latch->getSinglePredecessor(); 495296417Sdim if (!LastExit || !L->isLoopExiting(LastExit)) 496296417Sdim return false; 497296417Sdim 498296417Sdim BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator()); 499296417Sdim if (!BI) 500296417Sdim return false; 501296417Sdim 502296417Sdim if (!shouldSpeculateInstrs(Latch->begin(), Jmp->getIterator(), L)) 503296417Sdim return false; 504296417Sdim 505296417Sdim DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into " 506296417Sdim << LastExit->getName() << "\n"); 507296417Sdim 508296417Sdim // Hoist the instructions from Latch into LastExit. 509296417Sdim LastExit->getInstList().splice(BI->getIterator(), Latch->getInstList(), 510296417Sdim Latch->begin(), Jmp->getIterator()); 511296417Sdim 512296417Sdim unsigned FallThruPath = BI->getSuccessor(0) == Latch ? 0 : 1; 513296417Sdim BasicBlock *Header = Jmp->getSuccessor(0); 514296417Sdim assert(Header == L->getHeader() && "expected a backward branch"); 515296417Sdim 516296417Sdim // Remove Latch from the CFG so that LastExit becomes the new Latch. 517296417Sdim BI->setSuccessor(FallThruPath, Header); 518296417Sdim Latch->replaceSuccessorsPhiUsesWith(LastExit); 519296417Sdim Jmp->eraseFromParent(); 520296417Sdim 521296417Sdim // Nuke the Latch block. 522296417Sdim assert(Latch->empty() && "unable to evacuate Latch"); 523296417Sdim LI->removeBlock(Latch); 524296417Sdim if (DT) 525296417Sdim DT->eraseNode(Latch); 526296417Sdim Latch->eraseFromParent(); 527296417Sdim return true; 528296417Sdim} 529296417Sdim 530296417Sdim/// Rotate \c L as many times as possible. Return true if the loop is rotated 531296417Sdim/// at least once. 532296417Sdimstatic bool iterativelyRotateLoop(Loop *L, unsigned MaxHeaderSize, LoopInfo *LI, 533296417Sdim const TargetTransformInfo *TTI, 534296417Sdim AssumptionCache *AC, DominatorTree *DT, 535296417Sdim ScalarEvolution *SE) { 536296417Sdim // Save the loop metadata. 537296417Sdim MDNode *LoopMD = L->getLoopID(); 538296417Sdim 539296417Sdim // Simplify the loop latch before attempting to rotate the header 540296417Sdim // upward. Rotation may not be needed if the loop tail can be folded into the 541296417Sdim // loop exit. 542296417Sdim bool SimplifiedLatch = simplifyLoopLatch(L, LI, DT); 543296417Sdim 544296417Sdim // One loop can be rotated multiple times. 545296417Sdim bool MadeChange = false; 546296417Sdim while (rotateLoop(L, MaxHeaderSize, LI, TTI, AC, DT, SE, SimplifiedLatch)) { 547296417Sdim MadeChange = true; 548296417Sdim SimplifiedLatch = false; 549296417Sdim } 550296417Sdim 551296417Sdim // Restore the loop metadata. 552296417Sdim // NB! We presume LoopRotation DOESN'T ADD its own metadata. 553296417Sdim if ((MadeChange || SimplifiedLatch) && LoopMD) 554296417Sdim L->setLoopID(LoopMD); 555296417Sdim 556296417Sdim return MadeChange; 557296417Sdim} 558296417Sdim 559296417Sdimnamespace { 560296417Sdim 561296417Sdimclass LoopRotate : public LoopPass { 562296417Sdim unsigned MaxHeaderSize; 563296417Sdim 564296417Sdimpublic: 565296417Sdim static char ID; // Pass ID, replacement for typeid 566296417Sdim LoopRotate(int SpecifiedMaxHeaderSize = -1) : LoopPass(ID) { 567296417Sdim initializeLoopRotatePass(*PassRegistry::getPassRegistry()); 568296417Sdim if (SpecifiedMaxHeaderSize == -1) 569296417Sdim MaxHeaderSize = DefaultRotationThreshold; 570296417Sdim else 571296417Sdim MaxHeaderSize = unsigned(SpecifiedMaxHeaderSize); 572296417Sdim } 573296417Sdim 574296417Sdim // LCSSA form makes instruction renaming easier. 575296417Sdim void getAnalysisUsage(AnalysisUsage &AU) const override { 576296417Sdim AU.addPreserved<AAResultsWrapperPass>(); 577296417Sdim AU.addRequired<AssumptionCacheTracker>(); 578296417Sdim AU.addPreserved<DominatorTreeWrapperPass>(); 579296417Sdim AU.addRequired<LoopInfoWrapperPass>(); 580296417Sdim AU.addPreserved<LoopInfoWrapperPass>(); 581296417Sdim AU.addRequiredID(LoopSimplifyID); 582296417Sdim AU.addPreservedID(LoopSimplifyID); 583296417Sdim AU.addRequiredID(LCSSAID); 584296417Sdim AU.addPreservedID(LCSSAID); 585296417Sdim AU.addPreserved<ScalarEvolutionWrapperPass>(); 586296417Sdim AU.addPreserved<SCEVAAWrapperPass>(); 587296417Sdim AU.addRequired<TargetTransformInfoWrapperPass>(); 588296417Sdim AU.addPreserved<BasicAAWrapperPass>(); 589296417Sdim AU.addPreserved<GlobalsAAWrapperPass>(); 590296417Sdim } 591296417Sdim 592296417Sdim bool runOnLoop(Loop *L, LPPassManager &LPM) override { 593296417Sdim if (skipOptnoneFunction(L)) 594296417Sdim return false; 595296417Sdim Function &F = *L->getHeader()->getParent(); 596296417Sdim 597296417Sdim auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 598296417Sdim const auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 599296417Sdim auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); 600296417Sdim auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); 601296417Sdim auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; 602296417Sdim auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>(); 603296417Sdim auto *SE = SEWP ? &SEWP->getSE() : nullptr; 604296417Sdim 605296417Sdim return iterativelyRotateLoop(L, MaxHeaderSize, LI, TTI, AC, DT, SE); 606296417Sdim } 607296417Sdim}; 608296417Sdim} 609296417Sdim 610296417Sdimchar LoopRotate::ID = 0; 611296417SdimINITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false) 612296417SdimINITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 613296417SdimINITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) 614296417SdimINITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 615296417SdimINITIALIZE_PASS_DEPENDENCY(LoopSimplify) 616296417SdimINITIALIZE_PASS_DEPENDENCY(LCSSA) 617296417SdimINITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) 618296417SdimINITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) 619296417SdimINITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) 620296417SdimINITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false) 621296417Sdim 622296417SdimPass *llvm::createLoopRotatePass(int MaxHeaderSize) { 623296417Sdim return new LoopRotate(MaxHeaderSize); 624296417Sdim} 625