1193323Sed//===- LoopRotation.cpp - Loop Rotation Pass ------------------------------===//
2193323Sed//
3193323Sed//                     The LLVM Compiler Infrastructure
4193323Sed//
5193323Sed// This file is distributed under the University of Illinois Open Source
6193323Sed// License. See LICENSE.TXT for details.
7193323Sed//
8193323Sed//===----------------------------------------------------------------------===//
9193323Sed//
10193323Sed// This file implements Loop Rotation Pass.
11193323Sed//
12193323Sed//===----------------------------------------------------------------------===//
13193323Sed
14193323Sed#include "llvm/Transforms/Scalar.h"
15249423Sdim#include "llvm/ADT/Statistic.h"
16296417Sdim#include "llvm/Analysis/AliasAnalysis.h"
17296417Sdim#include "llvm/Analysis/BasicAliasAnalysis.h"
18280031Sdim#include "llvm/Analysis/AssumptionCache.h"
19218893Sdim#include "llvm/Analysis/CodeMetrics.h"
20249423Sdim#include "llvm/Analysis/InstructionSimplify.h"
21296417Sdim#include "llvm/Analysis/GlobalsModRef.h"
22193323Sed#include "llvm/Analysis/LoopPass.h"
23193323Sed#include "llvm/Analysis/ScalarEvolution.h"
24296417Sdim#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
25249423Sdim#include "llvm/Analysis/TargetTransformInfo.h"
26234353Sdim#include "llvm/Analysis/ValueTracking.h"
27276479Sdim#include "llvm/IR/CFG.h"
28276479Sdim#include "llvm/IR/Dominators.h"
29249423Sdim#include "llvm/IR/Function.h"
30249423Sdim#include "llvm/IR/IntrinsicInst.h"
31288943Sdim#include "llvm/IR/Module.h"
32276479Sdim#include "llvm/Support/CommandLine.h"
33249423Sdim#include "llvm/Support/Debug.h"
34288943Sdim#include "llvm/Support/raw_ostream.h"
35249423Sdim#include "llvm/Transforms/Utils/BasicBlockUtils.h"
36193323Sed#include "llvm/Transforms/Utils/Local.h"
37198892Srdivacky#include "llvm/Transforms/Utils/SSAUpdater.h"
38218893Sdim#include "llvm/Transforms/Utils/ValueMapper.h"
39193323Sedusing namespace llvm;
40193323Sed
41276479Sdim#define DEBUG_TYPE "loop-rotate"
42193323Sed
43276479Sdimstatic cl::opt<unsigned>
44276479SdimDefaultRotationThreshold("rotation-max-header-size", cl::init(16), cl::Hidden,
45276479Sdim       cl::desc("The default maximum header size for automatic loop rotation"));
46276479Sdim
47193323SedSTATISTIC(NumRotated, "Number of loops rotated");
48193323Sed
49218893Sdim/// RewriteUsesOfClonedInstructions - We just cloned the instructions from the
50218893Sdim/// old header into the preheader.  If there were uses of the values produced by
51218893Sdim/// these instruction that were outside of the loop, we have to insert PHI nodes
52218893Sdim/// to merge the two values.  Do this now.
53218893Sdimstatic void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
54218893Sdim                                            BasicBlock *OrigPreheader,
55218893Sdim                                            ValueToValueMapTy &ValueMap) {
56218893Sdim  // Remove PHI node entries that are no longer live.
57218893Sdim  BasicBlock::iterator I, E = OrigHeader->end();
58218893Sdim  for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I)
59218893Sdim    PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreheader));
60234353Sdim
61218893Sdim  // Now fix up users of the instructions in OrigHeader, inserting PHI nodes
62218893Sdim  // as necessary.
63218893Sdim  SSAUpdater SSA;
64218893Sdim  for (I = OrigHeader->begin(); I != E; ++I) {
65296417Sdim    Value *OrigHeaderVal = &*I;
66234353Sdim
67218893Sdim    // If there are no uses of the value (e.g. because it returns void), there
68218893Sdim    // is nothing to rewrite.
69218893Sdim    if (OrigHeaderVal->use_empty())
70218893Sdim      continue;
71234353Sdim
72218893Sdim    Value *OrigPreHeaderVal = ValueMap[OrigHeaderVal];
73193323Sed
74218893Sdim    // The value now exits in two versions: the initial value in the preheader
75218893Sdim    // and the loop "next" value in the original header.
76218893Sdim    SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName());
77218893Sdim    SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
78218893Sdim    SSA.AddAvailableValue(OrigPreheader, OrigPreHeaderVal);
79234353Sdim
80218893Sdim    // Visit each use of the OrigHeader instruction.
81218893Sdim    for (Value::use_iterator UI = OrigHeaderVal->use_begin(),
82218893Sdim         UE = OrigHeaderVal->use_end(); UI != UE; ) {
83218893Sdim      // Grab the use before incrementing the iterator.
84276479Sdim      Use &U = *UI;
85234353Sdim
86218893Sdim      // Increment the iterator before removing the use from the list.
87218893Sdim      ++UI;
88234353Sdim
89218893Sdim      // SSAUpdater can't handle a non-PHI use in the same block as an
90218893Sdim      // earlier def. We can easily handle those cases manually.
91218893Sdim      Instruction *UserInst = cast<Instruction>(U.getUser());
92218893Sdim      if (!isa<PHINode>(UserInst)) {
93218893Sdim        BasicBlock *UserBB = UserInst->getParent();
94234353Sdim
95218893Sdim        // The original users in the OrigHeader are already using the
96218893Sdim        // original definitions.
97218893Sdim        if (UserBB == OrigHeader)
98218893Sdim          continue;
99234353Sdim
100218893Sdim        // Users in the OrigPreHeader need to use the value to which the
101218893Sdim        // original definitions are mapped.
102218893Sdim        if (UserBB == OrigPreheader) {
103218893Sdim          U = OrigPreHeaderVal;
104218893Sdim          continue;
105218893Sdim        }
106218893Sdim      }
107234353Sdim
108218893Sdim      // Anything else can be handled by SSAUpdater.
109218893Sdim      SSA.RewriteUse(U);
110218893Sdim    }
111218893Sdim  }
112234353Sdim}
113199481Srdivacky
114218893Sdim/// Rotate loop LP. Return true if the loop is rotated.
115251662Sdim///
116251662Sdim/// \param SimplifiedLatch is true if the latch was just folded into the final
117251662Sdim/// loop exit. In this case we may want to rotate even though the new latch is
118251662Sdim/// now an exiting branch. This rotation would have happened had the latch not
119251662Sdim/// been simplified. However, if SimplifiedLatch is false, then we avoid
120251662Sdim/// rotating loops in which the latch exits to avoid excessive or endless
121251662Sdim/// rotation. LoopRotate should be repeatable and converge to a canonical
122251662Sdim/// form. This property is satisfied because simplifying the loop latch can only
123251662Sdim/// happen once across multiple invocations of the LoopRotate pass.
124296417Sdimstatic bool rotateLoop(Loop *L, unsigned MaxHeaderSize, LoopInfo *LI,
125296417Sdim                       const TargetTransformInfo *TTI, AssumptionCache *AC,
126296417Sdim                       DominatorTree *DT, ScalarEvolution *SE,
127296417Sdim                       bool SimplifiedLatch) {
128195098Sed  // If the loop has only one block then there is not much to rotate.
129193323Sed  if (L->getBlocks().size() == 1)
130193323Sed    return false;
131234353Sdim
132218893Sdim  BasicBlock *OrigHeader = L->getHeader();
133243830Sdim  BasicBlock *OrigLatch = L->getLoopLatch();
134234353Sdim
135218893Sdim  BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
136276479Sdim  if (!BI || BI->isUnconditional())
137218893Sdim    return false;
138234353Sdim
139195098Sed  // If the loop header is not one of the loop exiting blocks then
140195098Sed  // either this loop is already rotated or it is not
141193323Sed  // suitable for loop rotation transformations.
142198892Srdivacky  if (!L->isLoopExiting(OrigHeader))
143193323Sed    return false;
144193323Sed
145243830Sdim  // If the loop latch already contains a branch that leaves the loop then the
146243830Sdim  // loop is already rotated.
147276479Sdim  if (!OrigLatch)
148193323Sed    return false;
149193323Sed
150251662Sdim  // Rotate if either the loop latch does *not* exit the loop, or if the loop
151251662Sdim  // latch was just simplified.
152251662Sdim  if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch)
153251662Sdim    return false;
154251662Sdim
155249423Sdim  // Check size of original header and reject loop if it is very big or we can't
156249423Sdim  // duplicate blocks inside it.
157218893Sdim  {
158280031Sdim    SmallPtrSet<const Value *, 32> EphValues;
159280031Sdim    CodeMetrics::collectEphemeralValues(L, AC, EphValues);
160280031Sdim
161218893Sdim    CodeMetrics Metrics;
162280031Sdim    Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues);
163249423Sdim    if (Metrics.notDuplicatable) {
164276479Sdim      DEBUG(dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable"
165249423Sdim            << " instructions: "; L->dump());
166249423Sdim      return false;
167249423Sdim    }
168276479Sdim    if (Metrics.NumInsts > MaxHeaderSize)
169218893Sdim      return false;
170193323Sed  }
171193323Sed
172193323Sed  // Now, this loop is suitable for rotation.
173218893Sdim  BasicBlock *OrigPreheader = L->getLoopPreheader();
174234353Sdim
175221345Sdim  // If the loop could not be converted to canonical form, it must have an
176221345Sdim  // indirectbr in it, just give up.
177276479Sdim  if (!OrigPreheader)
178221345Sdim    return false;
179193323Sed
180198090Srdivacky  // Anything ScalarEvolution may know about this loop or the PHI nodes
181198090Srdivacky  // in its header will soon be invalidated.
182296417Sdim  if (SE)
183198892Srdivacky    SE->forgetLoop(L);
184198090Srdivacky
185243830Sdim  DEBUG(dbgs() << "LoopRotation: rotating "; L->dump());
186243830Sdim
187193323Sed  // Find new Loop header. NewHeader is a Header's one and only successor
188193323Sed  // that is inside loop.  Header's other successor is outside the
189193323Sed  // loop.  Otherwise loop is not suitable for rotation.
190218893Sdim  BasicBlock *Exit = BI->getSuccessor(0);
191218893Sdim  BasicBlock *NewHeader = BI->getSuccessor(1);
192193323Sed  if (L->contains(Exit))
193193323Sed    std::swap(Exit, NewHeader);
194193323Sed  assert(NewHeader && "Unable to determine new loop header");
195234353Sdim  assert(L->contains(NewHeader) && !L->contains(Exit) &&
196193323Sed         "Unable to determine loop header and exit blocks");
197234353Sdim
198195098Sed  // This code assumes that the new header has exactly one predecessor.
199195098Sed  // Remove any single-entry PHI nodes in it.
200193323Sed  assert(NewHeader->getSinglePredecessor() &&
201193323Sed         "New header doesn't have one pred!");
202193323Sed  FoldSingleEntryPHINodes(NewHeader);
203193323Sed
204198892Srdivacky  // Begin by walking OrigHeader and populating ValueMap with an entry for
205198892Srdivacky  // each Instruction.
206193323Sed  BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end();
207218893Sdim  ValueToValueMapTy ValueMap;
208193323Sed
209198892Srdivacky  // For PHI nodes, the value available in OldPreHeader is just the
210198892Srdivacky  // incoming value from OldPreHeader.
211198892Srdivacky  for (; PHINode *PN = dyn_cast<PHINode>(I); ++I)
212224145Sdim    ValueMap[PN] = PN->getIncomingValueForBlock(OrigPreheader);
213193323Sed
214288943Sdim  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
215288943Sdim
216218893Sdim  // For the rest of the instructions, either hoist to the OrigPreheader if
217218893Sdim  // possible or create a clone in the OldPreHeader if not.
218218893Sdim  TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator();
219218893Sdim  while (I != E) {
220296417Sdim    Instruction *Inst = &*I++;
221234353Sdim
222218893Sdim    // If the instruction's operands are invariant and it doesn't read or write
223218893Sdim    // memory, then it is safe to hoist.  Doing this doesn't change the order of
224218893Sdim    // execution in the preheader, but does prevent the instruction from
225218893Sdim    // executing in each iteration of the loop.  This means it is safe to hoist
226218893Sdim    // something that might trap, but isn't safe to hoist something that reads
227218893Sdim    // memory (without proving that the loop doesn't write).
228218893Sdim    if (L->hasLoopInvariantOperands(Inst) &&
229218893Sdim        !Inst->mayReadFromMemory() && !Inst->mayWriteToMemory() &&
230234353Sdim        !isa<TerminatorInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst) &&
231234353Sdim        !isa<AllocaInst>(Inst)) {
232218893Sdim      Inst->moveBefore(LoopEntryBranch);
233218893Sdim      continue;
234218893Sdim    }
235234353Sdim
236218893Sdim    // Otherwise, create a duplicate of the instruction.
237218893Sdim    Instruction *C = Inst->clone();
238234353Sdim
239218893Sdim    // Eagerly remap the operands of the instruction.
240218893Sdim    RemapInstruction(C, ValueMap,
241218893Sdim                     RF_NoModuleLevelChanges|RF_IgnoreMissingEntries);
242234353Sdim
243218893Sdim    // With the operands remapped, see if the instruction constant folds or is
244218893Sdim    // otherwise simplifyable.  This commonly occurs because the entry from PHI
245218893Sdim    // nodes allows icmps and other instructions to fold.
246288943Sdim    // FIXME: Provide TLI, DT, AC to SimplifyInstruction.
247288943Sdim    Value *V = SimplifyInstruction(C, DL);
248218893Sdim    if (V && LI->replacementPreservesLCSSAForm(C, V)) {
249218893Sdim      // If so, then delete the temporary instruction and stick the folded value
250218893Sdim      // in the map.
251218893Sdim      delete C;
252218893Sdim      ValueMap[Inst] = V;
253218893Sdim    } else {
254218893Sdim      // Otherwise, stick the new instruction into the new block!
255218893Sdim      C->setName(Inst->getName());
256218893Sdim      C->insertBefore(LoopEntryBranch);
257218893Sdim      ValueMap[Inst] = C;
258218893Sdim    }
259198892Srdivacky  }
260193323Sed
261198892Srdivacky  // Along with all the other instructions, we just cloned OrigHeader's
262198892Srdivacky  // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's
263198892Srdivacky  // successors by duplicating their incoming values for OrigHeader.
264198892Srdivacky  TerminatorInst *TI = OrigHeader->getTerminator();
265296417Sdim  for (BasicBlock *SuccBB : TI->successors())
266296417Sdim    for (BasicBlock::iterator BI = SuccBB->begin();
267198892Srdivacky         PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
268218893Sdim      PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader);
269193323Sed
270198892Srdivacky  // Now that OrigPreHeader has a clone of OrigHeader's terminator, remove
271198892Srdivacky  // OrigPreHeader's old terminator (the original branch into the loop), and
272198892Srdivacky  // remove the corresponding incoming values from the PHI nodes in OrigHeader.
273198892Srdivacky  LoopEntryBranch->eraseFromParent();
274193323Sed
275218893Sdim  // If there were any uses of instructions in the duplicated block outside the
276218893Sdim  // loop, update them, inserting PHI nodes as required
277218893Sdim  RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap);
278193323Sed
279198892Srdivacky  // NewHeader is now the header of the loop.
280193323Sed  L->moveToHeader(NewHeader);
281218893Sdim  assert(L->getHeader() == NewHeader && "Latch block is our new header");
282193323Sed
283234353Sdim
284218893Sdim  // At this point, we've finished our major CFG changes.  As part of cloning
285218893Sdim  // the loop into the preheader we've simplified instructions and the
286218893Sdim  // duplicated conditional branch may now be branching on a constant.  If it is
287218893Sdim  // branching on a constant and if that constant means that we enter the loop,
288218893Sdim  // then we fold away the cond branch to an uncond branch.  This simplifies the
289218893Sdim  // loop in cases important for nested loops, and it also means we don't have
290218893Sdim  // to split as many edges.
291218893Sdim  BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator());
292218893Sdim  assert(PHBI->isConditional() && "Should be clone of BI condbr!");
293218893Sdim  if (!isa<ConstantInt>(PHBI->getCondition()) ||
294218893Sdim      PHBI->getSuccessor(cast<ConstantInt>(PHBI->getCondition())->isZero())
295218893Sdim          != NewHeader) {
296218893Sdim    // The conditional branch can't be folded, handle the general case.
297218893Sdim    // Update DominatorTree to reflect the CFG change we just made.  Then split
298218893Sdim    // edges as necessary to preserve LoopSimplify form.
299288943Sdim    if (DT) {
300243830Sdim      // Everything that was dominated by the old loop header is now dominated
301243830Sdim      // by the original loop preheader. Conceptually the header was merged
302243830Sdim      // into the preheader, even though we reuse the actual block as a new
303243830Sdim      // loop latch.
304288943Sdim      DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader);
305243830Sdim      SmallVector<DomTreeNode *, 8> HeaderChildren(OrigHeaderNode->begin(),
306243830Sdim                                                   OrigHeaderNode->end());
307288943Sdim      DomTreeNode *OrigPreheaderNode = DT->getNode(OrigPreheader);
308243830Sdim      for (unsigned I = 0, E = HeaderChildren.size(); I != E; ++I)
309288943Sdim        DT->changeImmediateDominator(HeaderChildren[I], OrigPreheaderNode);
310234353Sdim
311288943Sdim      assert(DT->getNode(Exit)->getIDom() == OrigPreheaderNode);
312288943Sdim      assert(DT->getNode(NewHeader)->getIDom() == OrigPreheaderNode);
313243830Sdim
314218893Sdim      // Update OrigHeader to be dominated by the new header block.
315288943Sdim      DT->changeImmediateDominator(OrigHeader, OrigLatch);
316193323Sed    }
317234353Sdim
318218893Sdim    // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and
319239462Sdim    // thus is not a preheader anymore.
320239462Sdim    // Split the edge to form a real preheader.
321288943Sdim    BasicBlock *NewPH = SplitCriticalEdge(
322288943Sdim        OrigPreheader, NewHeader,
323288943Sdim        CriticalEdgeSplittingOptions(DT, LI).setPreserveLCSSA());
324218893Sdim    NewPH->setName(NewHeader->getName() + ".lr.ph");
325234353Sdim
326239462Sdim    // Preserve canonical loop form, which means that 'Exit' should have only
327276479Sdim    // one predecessor. Note that Exit could be an exit block for multiple
328276479Sdim    // nested loops, causing both of the edges to now be critical and need to
329276479Sdim    // be split.
330276479Sdim    SmallVector<BasicBlock *, 4> ExitPreds(pred_begin(Exit), pred_end(Exit));
331276479Sdim    bool SplitLatchEdge = false;
332276479Sdim    for (SmallVectorImpl<BasicBlock *>::iterator PI = ExitPreds.begin(),
333276479Sdim                                                 PE = ExitPreds.end();
334276479Sdim         PI != PE; ++PI) {
335276479Sdim      // We only need to split loop exit edges.
336276479Sdim      Loop *PredLoop = LI->getLoopFor(*PI);
337276479Sdim      if (!PredLoop || PredLoop->contains(Exit))
338276479Sdim        continue;
339279161Sdim      if (isa<IndirectBrInst>((*PI)->getTerminator()))
340279161Sdim        continue;
341276479Sdim      SplitLatchEdge |= L->getLoopLatch() == *PI;
342288943Sdim      BasicBlock *ExitSplit = SplitCriticalEdge(
343288943Sdim          *PI, Exit, CriticalEdgeSplittingOptions(DT, LI).setPreserveLCSSA());
344276479Sdim      ExitSplit->moveBefore(Exit);
345276479Sdim    }
346276479Sdim    assert(SplitLatchEdge &&
347276479Sdim           "Despite splitting all preds, failed to split latch exit?");
348218893Sdim  } else {
349218893Sdim    // We can fold the conditional branch in the preheader, this makes things
350218893Sdim    // simpler. The first step is to remove the extra edge to the Exit block.
351218893Sdim    Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/);
352221345Sdim    BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI);
353221345Sdim    NewBI->setDebugLoc(PHBI->getDebugLoc());
354218893Sdim    PHBI->eraseFromParent();
355234353Sdim
356218893Sdim    // With our CFG finalized, update DomTree if it is available.
357288943Sdim    if (DT) {
358218893Sdim      // Update OrigHeader to be dominated by the new header block.
359288943Sdim      DT->changeImmediateDominator(NewHeader, OrigPreheader);
360288943Sdim      DT->changeImmediateDominator(OrigHeader, OrigLatch);
361243830Sdim
362243830Sdim      // Brute force incremental dominator tree update. Call
363243830Sdim      // findNearestCommonDominator on all CFG predecessors of each child of the
364243830Sdim      // original header.
365288943Sdim      DomTreeNode *OrigHeaderNode = DT->getNode(OrigHeader);
366243830Sdim      SmallVector<DomTreeNode *, 8> HeaderChildren(OrigHeaderNode->begin(),
367243830Sdim                                                   OrigHeaderNode->end());
368243830Sdim      bool Changed;
369243830Sdim      do {
370243830Sdim        Changed = false;
371243830Sdim        for (unsigned I = 0, E = HeaderChildren.size(); I != E; ++I) {
372243830Sdim          DomTreeNode *Node = HeaderChildren[I];
373243830Sdim          BasicBlock *BB = Node->getBlock();
374243830Sdim
375243830Sdim          pred_iterator PI = pred_begin(BB);
376243830Sdim          BasicBlock *NearestDom = *PI;
377243830Sdim          for (pred_iterator PE = pred_end(BB); PI != PE; ++PI)
378288943Sdim            NearestDom = DT->findNearestCommonDominator(NearestDom, *PI);
379243830Sdim
380243830Sdim          // Remember if this changes the DomTree.
381243830Sdim          if (Node->getIDom()->getBlock() != NearestDom) {
382288943Sdim            DT->changeImmediateDominator(BB, NearestDom);
383243830Sdim            Changed = true;
384243830Sdim          }
385243830Sdim        }
386243830Sdim
387243830Sdim      // If the dominator changed, this may have an effect on other
388243830Sdim      // predecessors, continue until we reach a fixpoint.
389243830Sdim      } while (Changed);
390193323Sed    }
391193323Sed  }
392234353Sdim
393218893Sdim  assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation");
394218893Sdim  assert(L->getLoopLatch() && "Invalid loop latch after loop rotation");
395193323Sed
396218893Sdim  // Now that the CFG and DomTree are in a consistent state again, try to merge
397218893Sdim  // the OrigHeader block into OrigLatch.  This will succeed if they are
398218893Sdim  // connected by an unconditional branch.  This is just a cleanup so the
399218893Sdim  // emitted code isn't too gross in this common case.
400288943Sdim  MergeBlockIntoPredecessor(OrigHeader, DT, LI);
401234353Sdim
402243830Sdim  DEBUG(dbgs() << "LoopRotation: into "; L->dump());
403243830Sdim
404218893Sdim  ++NumRotated;
405218893Sdim  return true;
406218893Sdim}
407296417Sdim
408296417Sdim/// Determine whether the instructions in this range may be safely and cheaply
409296417Sdim/// speculated. This is not an important enough situation to develop complex
410296417Sdim/// heuristics. We handle a single arithmetic instruction along with any type
411296417Sdim/// conversions.
412296417Sdimstatic bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
413296417Sdim                                  BasicBlock::iterator End, Loop *L) {
414296417Sdim  bool seenIncrement = false;
415296417Sdim  bool MultiExitLoop = false;
416296417Sdim
417296417Sdim  if (!L->getExitingBlock())
418296417Sdim    MultiExitLoop = true;
419296417Sdim
420296417Sdim  for (BasicBlock::iterator I = Begin; I != End; ++I) {
421296417Sdim
422296417Sdim    if (!isSafeToSpeculativelyExecute(&*I))
423296417Sdim      return false;
424296417Sdim
425296417Sdim    if (isa<DbgInfoIntrinsic>(I))
426296417Sdim      continue;
427296417Sdim
428296417Sdim    switch (I->getOpcode()) {
429296417Sdim    default:
430296417Sdim      return false;
431296417Sdim    case Instruction::GetElementPtr:
432296417Sdim      // GEPs are cheap if all indices are constant.
433296417Sdim      if (!cast<GEPOperator>(I)->hasAllConstantIndices())
434296417Sdim        return false;
435296417Sdim      // fall-thru to increment case
436296417Sdim    case Instruction::Add:
437296417Sdim    case Instruction::Sub:
438296417Sdim    case Instruction::And:
439296417Sdim    case Instruction::Or:
440296417Sdim    case Instruction::Xor:
441296417Sdim    case Instruction::Shl:
442296417Sdim    case Instruction::LShr:
443296417Sdim    case Instruction::AShr: {
444296417Sdim      Value *IVOpnd = !isa<Constant>(I->getOperand(0))
445296417Sdim                          ? I->getOperand(0)
446296417Sdim                          : !isa<Constant>(I->getOperand(1))
447296417Sdim                                ? I->getOperand(1)
448296417Sdim                                : nullptr;
449296417Sdim      if (!IVOpnd)
450296417Sdim        return false;
451296417Sdim
452296417Sdim      // If increment operand is used outside of the loop, this speculation
453296417Sdim      // could cause extra live range interference.
454296417Sdim      if (MultiExitLoop) {
455296417Sdim        for (User *UseI : IVOpnd->users()) {
456296417Sdim          auto *UserInst = cast<Instruction>(UseI);
457296417Sdim          if (!L->contains(UserInst))
458296417Sdim            return false;
459296417Sdim        }
460296417Sdim      }
461296417Sdim
462296417Sdim      if (seenIncrement)
463296417Sdim        return false;
464296417Sdim      seenIncrement = true;
465296417Sdim      break;
466296417Sdim    }
467296417Sdim    case Instruction::Trunc:
468296417Sdim    case Instruction::ZExt:
469296417Sdim    case Instruction::SExt:
470296417Sdim      // ignore type conversions
471296417Sdim      break;
472296417Sdim    }
473296417Sdim  }
474296417Sdim  return true;
475296417Sdim}
476296417Sdim
477296417Sdim/// Fold the loop tail into the loop exit by speculating the loop tail
478296417Sdim/// instructions. Typically, this is a single post-increment. In the case of a
479296417Sdim/// simple 2-block loop, hoisting the increment can be much better than
480296417Sdim/// duplicating the entire loop header. In the case of loops with early exits,
481296417Sdim/// rotation will not work anyway, but simplifyLoopLatch will put the loop in
482296417Sdim/// canonical form so downstream passes can handle it.
483296417Sdim///
484296417Sdim/// I don't believe this invalidates SCEV.
485296417Sdimstatic bool simplifyLoopLatch(Loop *L, LoopInfo *LI, DominatorTree *DT) {
486296417Sdim  BasicBlock *Latch = L->getLoopLatch();
487296417Sdim  if (!Latch || Latch->hasAddressTaken())
488296417Sdim    return false;
489296417Sdim
490296417Sdim  BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator());
491296417Sdim  if (!Jmp || !Jmp->isUnconditional())
492296417Sdim    return false;
493296417Sdim
494296417Sdim  BasicBlock *LastExit = Latch->getSinglePredecessor();
495296417Sdim  if (!LastExit || !L->isLoopExiting(LastExit))
496296417Sdim    return false;
497296417Sdim
498296417Sdim  BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator());
499296417Sdim  if (!BI)
500296417Sdim    return false;
501296417Sdim
502296417Sdim  if (!shouldSpeculateInstrs(Latch->begin(), Jmp->getIterator(), L))
503296417Sdim    return false;
504296417Sdim
505296417Sdim  DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into "
506296417Sdim        << LastExit->getName() << "\n");
507296417Sdim
508296417Sdim  // Hoist the instructions from Latch into LastExit.
509296417Sdim  LastExit->getInstList().splice(BI->getIterator(), Latch->getInstList(),
510296417Sdim                                 Latch->begin(), Jmp->getIterator());
511296417Sdim
512296417Sdim  unsigned FallThruPath = BI->getSuccessor(0) == Latch ? 0 : 1;
513296417Sdim  BasicBlock *Header = Jmp->getSuccessor(0);
514296417Sdim  assert(Header == L->getHeader() && "expected a backward branch");
515296417Sdim
516296417Sdim  // Remove Latch from the CFG so that LastExit becomes the new Latch.
517296417Sdim  BI->setSuccessor(FallThruPath, Header);
518296417Sdim  Latch->replaceSuccessorsPhiUsesWith(LastExit);
519296417Sdim  Jmp->eraseFromParent();
520296417Sdim
521296417Sdim  // Nuke the Latch block.
522296417Sdim  assert(Latch->empty() && "unable to evacuate Latch");
523296417Sdim  LI->removeBlock(Latch);
524296417Sdim  if (DT)
525296417Sdim    DT->eraseNode(Latch);
526296417Sdim  Latch->eraseFromParent();
527296417Sdim  return true;
528296417Sdim}
529296417Sdim
530296417Sdim/// Rotate \c L as many times as possible. Return true if the loop is rotated
531296417Sdim/// at least once.
532296417Sdimstatic bool iterativelyRotateLoop(Loop *L, unsigned MaxHeaderSize, LoopInfo *LI,
533296417Sdim                                  const TargetTransformInfo *TTI,
534296417Sdim                                  AssumptionCache *AC, DominatorTree *DT,
535296417Sdim                                  ScalarEvolution *SE) {
536296417Sdim  // Save the loop metadata.
537296417Sdim  MDNode *LoopMD = L->getLoopID();
538296417Sdim
539296417Sdim  // Simplify the loop latch before attempting to rotate the header
540296417Sdim  // upward. Rotation may not be needed if the loop tail can be folded into the
541296417Sdim  // loop exit.
542296417Sdim  bool SimplifiedLatch = simplifyLoopLatch(L, LI, DT);
543296417Sdim
544296417Sdim  // One loop can be rotated multiple times.
545296417Sdim  bool MadeChange = false;
546296417Sdim  while (rotateLoop(L, MaxHeaderSize, LI, TTI, AC, DT, SE, SimplifiedLatch)) {
547296417Sdim    MadeChange = true;
548296417Sdim    SimplifiedLatch = false;
549296417Sdim  }
550296417Sdim
551296417Sdim  // Restore the loop metadata.
552296417Sdim  // NB! We presume LoopRotation DOESN'T ADD its own metadata.
553296417Sdim  if ((MadeChange || SimplifiedLatch) && LoopMD)
554296417Sdim    L->setLoopID(LoopMD);
555296417Sdim
556296417Sdim  return MadeChange;
557296417Sdim}
558296417Sdim
559296417Sdimnamespace {
560296417Sdim
561296417Sdimclass LoopRotate : public LoopPass {
562296417Sdim  unsigned MaxHeaderSize;
563296417Sdim
564296417Sdimpublic:
565296417Sdim  static char ID; // Pass ID, replacement for typeid
566296417Sdim  LoopRotate(int SpecifiedMaxHeaderSize = -1) : LoopPass(ID) {
567296417Sdim    initializeLoopRotatePass(*PassRegistry::getPassRegistry());
568296417Sdim    if (SpecifiedMaxHeaderSize == -1)
569296417Sdim      MaxHeaderSize = DefaultRotationThreshold;
570296417Sdim    else
571296417Sdim      MaxHeaderSize = unsigned(SpecifiedMaxHeaderSize);
572296417Sdim  }
573296417Sdim
574296417Sdim  // LCSSA form makes instruction renaming easier.
575296417Sdim  void getAnalysisUsage(AnalysisUsage &AU) const override {
576296417Sdim    AU.addPreserved<AAResultsWrapperPass>();
577296417Sdim    AU.addRequired<AssumptionCacheTracker>();
578296417Sdim    AU.addPreserved<DominatorTreeWrapperPass>();
579296417Sdim    AU.addRequired<LoopInfoWrapperPass>();
580296417Sdim    AU.addPreserved<LoopInfoWrapperPass>();
581296417Sdim    AU.addRequiredID(LoopSimplifyID);
582296417Sdim    AU.addPreservedID(LoopSimplifyID);
583296417Sdim    AU.addRequiredID(LCSSAID);
584296417Sdim    AU.addPreservedID(LCSSAID);
585296417Sdim    AU.addPreserved<ScalarEvolutionWrapperPass>();
586296417Sdim    AU.addPreserved<SCEVAAWrapperPass>();
587296417Sdim    AU.addRequired<TargetTransformInfoWrapperPass>();
588296417Sdim    AU.addPreserved<BasicAAWrapperPass>();
589296417Sdim    AU.addPreserved<GlobalsAAWrapperPass>();
590296417Sdim  }
591296417Sdim
592296417Sdim  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
593296417Sdim    if (skipOptnoneFunction(L))
594296417Sdim      return false;
595296417Sdim    Function &F = *L->getHeader()->getParent();
596296417Sdim
597296417Sdim    auto *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
598296417Sdim    const auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
599296417Sdim    auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
600296417Sdim    auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
601296417Sdim    auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
602296417Sdim    auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
603296417Sdim    auto *SE = SEWP ? &SEWP->getSE() : nullptr;
604296417Sdim
605296417Sdim    return iterativelyRotateLoop(L, MaxHeaderSize, LI, TTI, AC, DT, SE);
606296417Sdim  }
607296417Sdim};
608296417Sdim}
609296417Sdim
610296417Sdimchar LoopRotate::ID = 0;
611296417SdimINITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
612296417SdimINITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
613296417SdimINITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
614296417SdimINITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
615296417SdimINITIALIZE_PASS_DEPENDENCY(LoopSimplify)
616296417SdimINITIALIZE_PASS_DEPENDENCY(LCSSA)
617296417SdimINITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
618296417SdimINITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
619296417SdimINITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
620296417SdimINITIALIZE_PASS_END(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
621296417Sdim
622296417SdimPass *llvm::createLoopRotatePass(int MaxHeaderSize) {
623296417Sdim  return new LoopRotate(MaxHeaderSize);
624296417Sdim}
625