LoopRotation.cpp revision 239462
1193323Sed//===- LoopRotation.cpp - Loop Rotation Pass ------------------------------===//
2193323Sed//
3193323Sed//                     The LLVM Compiler Infrastructure
4193323Sed//
5193323Sed// This file is distributed under the University of Illinois Open Source
6193323Sed// License. See LICENSE.TXT for details.
7193323Sed//
8193323Sed//===----------------------------------------------------------------------===//
9193323Sed//
10193323Sed// This file implements Loop Rotation Pass.
11193323Sed//
12193323Sed//===----------------------------------------------------------------------===//
13193323Sed
14193323Sed#define DEBUG_TYPE "loop-rotate"
15193323Sed#include "llvm/Transforms/Scalar.h"
16193323Sed#include "llvm/Function.h"
17193323Sed#include "llvm/IntrinsicInst.h"
18218893Sdim#include "llvm/Analysis/CodeMetrics.h"
19193323Sed#include "llvm/Analysis/LoopPass.h"
20218893Sdim#include "llvm/Analysis/InstructionSimplify.h"
21193323Sed#include "llvm/Analysis/ScalarEvolution.h"
22234353Sdim#include "llvm/Analysis/ValueTracking.h"
23193323Sed#include "llvm/Transforms/Utils/Local.h"
24193323Sed#include "llvm/Transforms/Utils/BasicBlockUtils.h"
25198892Srdivacky#include "llvm/Transforms/Utils/SSAUpdater.h"
26218893Sdim#include "llvm/Transforms/Utils/ValueMapper.h"
27193323Sed#include "llvm/Support/Debug.h"
28193323Sed#include "llvm/ADT/Statistic.h"
29193323Sedusing namespace llvm;
30193323Sed
31193323Sed#define MAX_HEADER_SIZE 16
32193323Sed
33193323SedSTATISTIC(NumRotated, "Number of loops rotated");
34193323Sednamespace {
35193323Sed
36198090Srdivacky  class LoopRotate : public LoopPass {
37193323Sed  public:
38193323Sed    static char ID; // Pass ID, replacement for typeid
39218893Sdim    LoopRotate() : LoopPass(ID) {
40218893Sdim      initializeLoopRotatePass(*PassRegistry::getPassRegistry());
41218893Sdim    }
42193323Sed
43193323Sed    // LCSSA form makes instruction renaming easier.
44193323Sed    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
45212904Sdim      AU.addPreserved<DominatorTree>();
46212904Sdim      AU.addRequired<LoopInfo>();
47212904Sdim      AU.addPreserved<LoopInfo>();
48193323Sed      AU.addRequiredID(LoopSimplifyID);
49193323Sed      AU.addPreservedID(LoopSimplifyID);
50193323Sed      AU.addRequiredID(LCSSAID);
51193323Sed      AU.addPreservedID(LCSSAID);
52193323Sed      AU.addPreserved<ScalarEvolution>();
53193323Sed    }
54193323Sed
55218893Sdim    bool runOnLoop(Loop *L, LPPassManager &LPM);
56234353Sdim    void simplifyLoopLatch(Loop *L);
57218893Sdim    bool rotateLoop(Loop *L);
58234353Sdim
59193323Sed  private:
60218893Sdim    LoopInfo *LI;
61193323Sed  };
62193323Sed}
63234353Sdim
64193323Sedchar LoopRotate::ID = 0;
65218893SdimINITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
66218893SdimINITIALIZE_PASS_DEPENDENCY(LoopInfo)
67218893SdimINITIALIZE_PASS_DEPENDENCY(LoopSimplify)
68218893SdimINITIALIZE_PASS_DEPENDENCY(LCSSA)
69218893SdimINITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
70193323Sed
71193323SedPass *llvm::createLoopRotatePass() { return new LoopRotate(); }
72193323Sed
73193323Sed/// Rotate Loop L as many times as possible. Return true if
74195098Sed/// the loop is rotated at least once.
75218893Sdimbool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
76218893Sdim  LI = &getAnalysis<LoopInfo>();
77193323Sed
78234353Sdim  // Simplify the loop latch before attempting to rotate the header
79234353Sdim  // upward. Rotation may not be needed if the loop tail can be folded into the
80234353Sdim  // loop exit.
81234353Sdim  simplifyLoopLatch(L);
82234353Sdim
83193323Sed  // One loop can be rotated multiple times.
84218893Sdim  bool MadeChange = false;
85218893Sdim  while (rotateLoop(L))
86218893Sdim    MadeChange = true;
87193323Sed
88218893Sdim  return MadeChange;
89193323Sed}
90193323Sed
91218893Sdim/// RewriteUsesOfClonedInstructions - We just cloned the instructions from the
92218893Sdim/// old header into the preheader.  If there were uses of the values produced by
93218893Sdim/// these instruction that were outside of the loop, we have to insert PHI nodes
94218893Sdim/// to merge the two values.  Do this now.
95218893Sdimstatic void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
96218893Sdim                                            BasicBlock *OrigPreheader,
97218893Sdim                                            ValueToValueMapTy &ValueMap) {
98218893Sdim  // Remove PHI node entries that are no longer live.
99218893Sdim  BasicBlock::iterator I, E = OrigHeader->end();
100218893Sdim  for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I)
101218893Sdim    PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreheader));
102234353Sdim
103218893Sdim  // Now fix up users of the instructions in OrigHeader, inserting PHI nodes
104218893Sdim  // as necessary.
105218893Sdim  SSAUpdater SSA;
106218893Sdim  for (I = OrigHeader->begin(); I != E; ++I) {
107218893Sdim    Value *OrigHeaderVal = I;
108234353Sdim
109218893Sdim    // If there are no uses of the value (e.g. because it returns void), there
110218893Sdim    // is nothing to rewrite.
111218893Sdim    if (OrigHeaderVal->use_empty())
112218893Sdim      continue;
113234353Sdim
114218893Sdim    Value *OrigPreHeaderVal = ValueMap[OrigHeaderVal];
115193323Sed
116218893Sdim    // The value now exits in two versions: the initial value in the preheader
117218893Sdim    // and the loop "next" value in the original header.
118218893Sdim    SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName());
119218893Sdim    SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
120218893Sdim    SSA.AddAvailableValue(OrigPreheader, OrigPreHeaderVal);
121234353Sdim
122218893Sdim    // Visit each use of the OrigHeader instruction.
123218893Sdim    for (Value::use_iterator UI = OrigHeaderVal->use_begin(),
124218893Sdim         UE = OrigHeaderVal->use_end(); UI != UE; ) {
125218893Sdim      // Grab the use before incrementing the iterator.
126218893Sdim      Use &U = UI.getUse();
127234353Sdim
128218893Sdim      // Increment the iterator before removing the use from the list.
129218893Sdim      ++UI;
130234353Sdim
131218893Sdim      // SSAUpdater can't handle a non-PHI use in the same block as an
132218893Sdim      // earlier def. We can easily handle those cases manually.
133218893Sdim      Instruction *UserInst = cast<Instruction>(U.getUser());
134218893Sdim      if (!isa<PHINode>(UserInst)) {
135218893Sdim        BasicBlock *UserBB = UserInst->getParent();
136234353Sdim
137218893Sdim        // The original users in the OrigHeader are already using the
138218893Sdim        // original definitions.
139218893Sdim        if (UserBB == OrigHeader)
140218893Sdim          continue;
141234353Sdim
142218893Sdim        // Users in the OrigPreHeader need to use the value to which the
143218893Sdim        // original definitions are mapped.
144218893Sdim        if (UserBB == OrigPreheader) {
145218893Sdim          U = OrigPreHeaderVal;
146218893Sdim          continue;
147218893Sdim        }
148218893Sdim      }
149234353Sdim
150218893Sdim      // Anything else can be handled by SSAUpdater.
151218893Sdim      SSA.RewriteUse(U);
152218893Sdim    }
153218893Sdim  }
154234353Sdim}
155199481Srdivacky
156234353Sdim/// Determine whether the instructions in this range my be safely and cheaply
157234353Sdim/// speculated. This is not an important enough situation to develop complex
158234353Sdim/// heuristics. We handle a single arithmetic instruction along with any type
159234353Sdim/// conversions.
160234353Sdimstatic bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
161234353Sdim                                  BasicBlock::iterator End) {
162234353Sdim  bool seenIncrement = false;
163234353Sdim  for (BasicBlock::iterator I = Begin; I != End; ++I) {
164234353Sdim
165234353Sdim    if (!isSafeToSpeculativelyExecute(I))
166234353Sdim      return false;
167234353Sdim
168234353Sdim    if (isa<DbgInfoIntrinsic>(I))
169234353Sdim      continue;
170234353Sdim
171234353Sdim    switch (I->getOpcode()) {
172234353Sdim    default:
173234353Sdim      return false;
174234353Sdim    case Instruction::GetElementPtr:
175234353Sdim      // GEPs are cheap if all indices are constant.
176234353Sdim      if (!cast<GEPOperator>(I)->hasAllConstantIndices())
177234353Sdim        return false;
178234353Sdim      // fall-thru to increment case
179234353Sdim    case Instruction::Add:
180234353Sdim    case Instruction::Sub:
181234353Sdim    case Instruction::And:
182234353Sdim    case Instruction::Or:
183234353Sdim    case Instruction::Xor:
184234353Sdim    case Instruction::Shl:
185234353Sdim    case Instruction::LShr:
186234353Sdim    case Instruction::AShr:
187234353Sdim      if (seenIncrement)
188234353Sdim        return false;
189234353Sdim      seenIncrement = true;
190234353Sdim      break;
191234353Sdim    case Instruction::Trunc:
192234353Sdim    case Instruction::ZExt:
193234353Sdim    case Instruction::SExt:
194234353Sdim      // ignore type conversions
195234353Sdim      break;
196234353Sdim    }
197234353Sdim  }
198234353Sdim  return true;
199234353Sdim}
200234353Sdim
201234353Sdim/// Fold the loop tail into the loop exit by speculating the loop tail
202234353Sdim/// instructions. Typically, this is a single post-increment. In the case of a
203234353Sdim/// simple 2-block loop, hoisting the increment can be much better than
204234353Sdim/// duplicating the entire loop header. In the cast of loops with early exits,
205234353Sdim/// rotation will not work anyway, but simplifyLoopLatch will put the loop in
206234353Sdim/// canonical form so downstream passes can handle it.
207234353Sdim///
208234353Sdim/// I don't believe this invalidates SCEV.
209234353Sdimvoid LoopRotate::simplifyLoopLatch(Loop *L) {
210234353Sdim  BasicBlock *Latch = L->getLoopLatch();
211234353Sdim  if (!Latch || Latch->hasAddressTaken())
212234353Sdim    return;
213234353Sdim
214234353Sdim  BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator());
215234353Sdim  if (!Jmp || !Jmp->isUnconditional())
216234353Sdim    return;
217234353Sdim
218234353Sdim  BasicBlock *LastExit = Latch->getSinglePredecessor();
219234353Sdim  if (!LastExit || !L->isLoopExiting(LastExit))
220234353Sdim    return;
221234353Sdim
222234353Sdim  BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator());
223234353Sdim  if (!BI)
224234353Sdim    return;
225234353Sdim
226234353Sdim  if (!shouldSpeculateInstrs(Latch->begin(), Jmp))
227234353Sdim    return;
228234353Sdim
229234353Sdim  DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into "
230234353Sdim        << LastExit->getName() << "\n");
231234353Sdim
232234353Sdim  // Hoist the instructions from Latch into LastExit.
233234353Sdim  LastExit->getInstList().splice(BI, Latch->getInstList(), Latch->begin(), Jmp);
234234353Sdim
235234353Sdim  unsigned FallThruPath = BI->getSuccessor(0) == Latch ? 0 : 1;
236234353Sdim  BasicBlock *Header = Jmp->getSuccessor(0);
237234353Sdim  assert(Header == L->getHeader() && "expected a backward branch");
238234353Sdim
239234353Sdim  // Remove Latch from the CFG so that LastExit becomes the new Latch.
240234353Sdim  BI->setSuccessor(FallThruPath, Header);
241234353Sdim  Latch->replaceSuccessorsPhiUsesWith(LastExit);
242234353Sdim  Jmp->eraseFromParent();
243234353Sdim
244234353Sdim  // Nuke the Latch block.
245234353Sdim  assert(Latch->empty() && "unable to evacuate Latch");
246234353Sdim  LI->removeBlock(Latch);
247234353Sdim  if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>())
248234353Sdim    DT->eraseNode(Latch);
249234353Sdim  Latch->eraseFromParent();
250234353Sdim}
251234353Sdim
252218893Sdim/// Rotate loop LP. Return true if the loop is rotated.
253218893Sdimbool LoopRotate::rotateLoop(Loop *L) {
254195098Sed  // If the loop has only one block then there is not much to rotate.
255193323Sed  if (L->getBlocks().size() == 1)
256193323Sed    return false;
257234353Sdim
258218893Sdim  BasicBlock *OrigHeader = L->getHeader();
259234353Sdim
260218893Sdim  BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
261218893Sdim  if (BI == 0 || BI->isUnconditional())
262218893Sdim    return false;
263234353Sdim
264195098Sed  // If the loop header is not one of the loop exiting blocks then
265195098Sed  // either this loop is already rotated or it is not
266193323Sed  // suitable for loop rotation transformations.
267198892Srdivacky  if (!L->isLoopExiting(OrigHeader))
268193323Sed    return false;
269193323Sed
270234353Sdim  // Updating PHInodes in loops with multiple exits adds complexity.
271193323Sed  // Keep it simple, and restrict loop rotation to loops with one exit only.
272193323Sed  // In future, lift this restriction and support for multiple exits if
273193323Sed  // required.
274193323Sed  SmallVector<BasicBlock*, 8> ExitBlocks;
275193323Sed  L->getExitBlocks(ExitBlocks);
276193323Sed  if (ExitBlocks.size() > 1)
277193323Sed    return false;
278193323Sed
279218893Sdim  // Check size of original header and reject loop if it is very big.
280218893Sdim  {
281218893Sdim    CodeMetrics Metrics;
282218893Sdim    Metrics.analyzeBasicBlock(OrigHeader);
283218893Sdim    if (Metrics.NumInsts > MAX_HEADER_SIZE)
284218893Sdim      return false;
285193323Sed  }
286193323Sed
287193323Sed  // Now, this loop is suitable for rotation.
288218893Sdim  BasicBlock *OrigPreheader = L->getLoopPreheader();
289218893Sdim  BasicBlock *OrigLatch = L->getLoopLatch();
290234353Sdim
291221345Sdim  // If the loop could not be converted to canonical form, it must have an
292221345Sdim  // indirectbr in it, just give up.
293221345Sdim  if (OrigPreheader == 0 || OrigLatch == 0)
294221345Sdim    return false;
295193323Sed
296198090Srdivacky  // Anything ScalarEvolution may know about this loop or the PHI nodes
297198090Srdivacky  // in its header will soon be invalidated.
298198090Srdivacky  if (ScalarEvolution *SE = getAnalysisIfAvailable<ScalarEvolution>())
299198892Srdivacky    SE->forgetLoop(L);
300198090Srdivacky
301193323Sed  // Find new Loop header. NewHeader is a Header's one and only successor
302193323Sed  // that is inside loop.  Header's other successor is outside the
303193323Sed  // loop.  Otherwise loop is not suitable for rotation.
304218893Sdim  BasicBlock *Exit = BI->getSuccessor(0);
305218893Sdim  BasicBlock *NewHeader = BI->getSuccessor(1);
306193323Sed  if (L->contains(Exit))
307193323Sed    std::swap(Exit, NewHeader);
308193323Sed  assert(NewHeader && "Unable to determine new loop header");
309234353Sdim  assert(L->contains(NewHeader) && !L->contains(Exit) &&
310193323Sed         "Unable to determine loop header and exit blocks");
311234353Sdim
312195098Sed  // This code assumes that the new header has exactly one predecessor.
313195098Sed  // Remove any single-entry PHI nodes in it.
314193323Sed  assert(NewHeader->getSinglePredecessor() &&
315193323Sed         "New header doesn't have one pred!");
316193323Sed  FoldSingleEntryPHINodes(NewHeader);
317193323Sed
318198892Srdivacky  // Begin by walking OrigHeader and populating ValueMap with an entry for
319198892Srdivacky  // each Instruction.
320193323Sed  BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end();
321218893Sdim  ValueToValueMapTy ValueMap;
322193323Sed
323198892Srdivacky  // For PHI nodes, the value available in OldPreHeader is just the
324198892Srdivacky  // incoming value from OldPreHeader.
325198892Srdivacky  for (; PHINode *PN = dyn_cast<PHINode>(I); ++I)
326224145Sdim    ValueMap[PN] = PN->getIncomingValueForBlock(OrigPreheader);
327193323Sed
328218893Sdim  // For the rest of the instructions, either hoist to the OrigPreheader if
329218893Sdim  // possible or create a clone in the OldPreHeader if not.
330218893Sdim  TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator();
331218893Sdim  while (I != E) {
332218893Sdim    Instruction *Inst = I++;
333234353Sdim
334218893Sdim    // If the instruction's operands are invariant and it doesn't read or write
335218893Sdim    // memory, then it is safe to hoist.  Doing this doesn't change the order of
336218893Sdim    // execution in the preheader, but does prevent the instruction from
337218893Sdim    // executing in each iteration of the loop.  This means it is safe to hoist
338218893Sdim    // something that might trap, but isn't safe to hoist something that reads
339218893Sdim    // memory (without proving that the loop doesn't write).
340218893Sdim    if (L->hasLoopInvariantOperands(Inst) &&
341218893Sdim        !Inst->mayReadFromMemory() && !Inst->mayWriteToMemory() &&
342234353Sdim        !isa<TerminatorInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst) &&
343234353Sdim        !isa<AllocaInst>(Inst)) {
344218893Sdim      Inst->moveBefore(LoopEntryBranch);
345218893Sdim      continue;
346218893Sdim    }
347234353Sdim
348218893Sdim    // Otherwise, create a duplicate of the instruction.
349218893Sdim    Instruction *C = Inst->clone();
350234353Sdim
351218893Sdim    // Eagerly remap the operands of the instruction.
352218893Sdim    RemapInstruction(C, ValueMap,
353218893Sdim                     RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
354234353Sdim
355218893Sdim    // With the operands remapped, see if the instruction constant folds or is
356218893Sdim    // otherwise simplifyable.  This commonly occurs because the entry from PHI
357218893Sdim    // nodes allows icmps and other instructions to fold.
358218893Sdim    Value *V = SimplifyInstruction(C);
359218893Sdim    if (V && LI->replacementPreservesLCSSAForm(C, V)) {
360218893Sdim      // If so, then delete the temporary instruction and stick the folded value
361218893Sdim      // in the map.
362218893Sdim      delete C;
363218893Sdim      ValueMap[Inst] = V;
364218893Sdim    } else {
365218893Sdim      // Otherwise, stick the new instruction into the new block!
366218893Sdim      C->setName(Inst->getName());
367218893Sdim      C->insertBefore(LoopEntryBranch);
368218893Sdim      ValueMap[Inst] = C;
369218893Sdim    }
370198892Srdivacky  }
371193323Sed
372198892Srdivacky  // Along with all the other instructions, we just cloned OrigHeader's
373198892Srdivacky  // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's
374198892Srdivacky  // successors by duplicating their incoming values for OrigHeader.
375198892Srdivacky  TerminatorInst *TI = OrigHeader->getTerminator();
376198892Srdivacky  for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i)
377198892Srdivacky    for (BasicBlock::iterator BI = TI->getSuccessor(i)->begin();
378198892Srdivacky         PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
379218893Sdim      PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader);
380193323Sed
381198892Srdivacky  // Now that OrigPreHeader has a clone of OrigHeader's terminator, remove
382198892Srdivacky  // OrigPreHeader's old terminator (the original branch into the loop), and
383198892Srdivacky  // remove the corresponding incoming values from the PHI nodes in OrigHeader.
384198892Srdivacky  LoopEntryBranch->eraseFromParent();
385193323Sed
386218893Sdim  // If there were any uses of instructions in the duplicated block outside the
387218893Sdim  // loop, update them, inserting PHI nodes as required
388218893Sdim  RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap);
389193323Sed
390198892Srdivacky  // NewHeader is now the header of the loop.
391193323Sed  L->moveToHeader(NewHeader);
392218893Sdim  assert(L->getHeader() == NewHeader && "Latch block is our new header");
393193323Sed
394234353Sdim
395218893Sdim  // At this point, we've finished our major CFG changes.  As part of cloning
396218893Sdim  // the loop into the preheader we've simplified instructions and the
397218893Sdim  // duplicated conditional branch may now be branching on a constant.  If it is
398218893Sdim  // branching on a constant and if that constant means that we enter the loop,
399218893Sdim  // then we fold away the cond branch to an uncond branch.  This simplifies the
400218893Sdim  // loop in cases important for nested loops, and it also means we don't have
401218893Sdim  // to split as many edges.
402218893Sdim  BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator());
403218893Sdim  assert(PHBI->isConditional() && "Should be clone of BI condbr!");
404218893Sdim  if (!isa<ConstantInt>(PHBI->getCondition()) ||
405218893Sdim      PHBI->getSuccessor(cast<ConstantInt>(PHBI->getCondition())->isZero())
406218893Sdim          != NewHeader) {
407218893Sdim    // The conditional branch can't be folded, handle the general case.
408218893Sdim    // Update DominatorTree to reflect the CFG change we just made.  Then split
409218893Sdim    // edges as necessary to preserve LoopSimplify form.
410218893Sdim    if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
411218893Sdim      // Since OrigPreheader now has the conditional branch to Exit block, it is
412218893Sdim      // the dominator of Exit.
413218893Sdim      DT->changeImmediateDominator(Exit, OrigPreheader);
414218893Sdim      DT->changeImmediateDominator(NewHeader, OrigPreheader);
415234353Sdim
416218893Sdim      // Update OrigHeader to be dominated by the new header block.
417218893Sdim      DT->changeImmediateDominator(OrigHeader, OrigLatch);
418193323Sed    }
419234353Sdim
420218893Sdim    // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and
421239462Sdim    // thus is not a preheader anymore.
422239462Sdim    // Split the edge to form a real preheader.
423218893Sdim    BasicBlock *NewPH = SplitCriticalEdge(OrigPreheader, NewHeader, this);
424218893Sdim    NewPH->setName(NewHeader->getName() + ".lr.ph");
425234353Sdim
426239462Sdim    // Preserve canonical loop form, which means that 'Exit' should have only
427239462Sdim    // one predecessor.
428218893Sdim    BasicBlock *ExitSplit = SplitCriticalEdge(L->getLoopLatch(), Exit, this);
429218893Sdim    ExitSplit->moveBefore(Exit);
430218893Sdim  } else {
431218893Sdim    // We can fold the conditional branch in the preheader, this makes things
432218893Sdim    // simpler. The first step is to remove the extra edge to the Exit block.
433218893Sdim    Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/);
434221345Sdim    BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI);
435221345Sdim    NewBI->setDebugLoc(PHBI->getDebugLoc());
436218893Sdim    PHBI->eraseFromParent();
437234353Sdim
438218893Sdim    // With our CFG finalized, update DomTree if it is available.
439198090Srdivacky    if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>()) {
440218893Sdim      // Update OrigHeader to be dominated by the new header block.
441218893Sdim      DT->changeImmediateDominator(NewHeader, OrigPreheader);
442218893Sdim      DT->changeImmediateDominator(OrigHeader, OrigLatch);
443193323Sed    }
444193323Sed  }
445234353Sdim
446218893Sdim  assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation");
447218893Sdim  assert(L->getLoopLatch() && "Invalid loop latch after loop rotation");
448193323Sed
449218893Sdim  // Now that the CFG and DomTree are in a consistent state again, try to merge
450218893Sdim  // the OrigHeader block into OrigLatch.  This will succeed if they are
451218893Sdim  // connected by an unconditional branch.  This is just a cleanup so the
452218893Sdim  // emitted code isn't too gross in this common case.
453218893Sdim  MergeBlockIntoPredecessor(OrigHeader, this);
454234353Sdim
455218893Sdim  ++NumRotated;
456218893Sdim  return true;
457218893Sdim}
458193323Sed
459