1//===----------------- LoopRotationUtils.cpp -----------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides utilities to convert a loop into a loop with bottom test.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/Transforms/Utils/LoopRotationUtils.h"
14#include "llvm/ADT/Statistic.h"
15#include "llvm/Analysis/AliasAnalysis.h"
16#include "llvm/Analysis/AssumptionCache.h"
17#include "llvm/Analysis/BasicAliasAnalysis.h"
18#include "llvm/Analysis/CodeMetrics.h"
19#include "llvm/Analysis/DomTreeUpdater.h"
20#include "llvm/Analysis/GlobalsModRef.h"
21#include "llvm/Analysis/InstructionSimplify.h"
22#include "llvm/Analysis/LoopPass.h"
23#include "llvm/Analysis/MemorySSA.h"
24#include "llvm/Analysis/MemorySSAUpdater.h"
25#include "llvm/Analysis/ScalarEvolution.h"
26#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
27#include "llvm/Analysis/TargetTransformInfo.h"
28#include "llvm/Analysis/ValueTracking.h"
29#include "llvm/IR/CFG.h"
30#include "llvm/IR/DebugInfoMetadata.h"
31#include "llvm/IR/Dominators.h"
32#include "llvm/IR/Function.h"
33#include "llvm/IR/IntrinsicInst.h"
34#include "llvm/IR/Module.h"
35#include "llvm/Support/CommandLine.h"
36#include "llvm/Support/Debug.h"
37#include "llvm/Support/raw_ostream.h"
38#include "llvm/Transforms/Utils/BasicBlockUtils.h"
39#include "llvm/Transforms/Utils/Local.h"
40#include "llvm/Transforms/Utils/LoopUtils.h"
41#include "llvm/Transforms/Utils/SSAUpdater.h"
42#include "llvm/Transforms/Utils/ValueMapper.h"
43using namespace llvm;
44
45#define DEBUG_TYPE "loop-rotate"
46
47STATISTIC(NumRotated, "Number of loops rotated");
48
49namespace {
50/// A simple loop rotation transformation.
51class LoopRotate {
52  const unsigned MaxHeaderSize;
53  LoopInfo *LI;
54  const TargetTransformInfo *TTI;
55  AssumptionCache *AC;
56  DominatorTree *DT;
57  ScalarEvolution *SE;
58  MemorySSAUpdater *MSSAU;
59  const SimplifyQuery &SQ;
60  bool RotationOnly;
61  bool IsUtilMode;
62
63public:
64  LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI,
65             const TargetTransformInfo *TTI, AssumptionCache *AC,
66             DominatorTree *DT, ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
67             const SimplifyQuery &SQ, bool RotationOnly, bool IsUtilMode)
68      : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE),
69        MSSAU(MSSAU), SQ(SQ), RotationOnly(RotationOnly),
70        IsUtilMode(IsUtilMode) {}
71  bool processLoop(Loop *L);
72
73private:
74  bool rotateLoop(Loop *L, bool SimplifiedLatch);
75  bool simplifyLoopLatch(Loop *L);
76};
77} // end anonymous namespace
78
79/// Insert (K, V) pair into the ValueToValueMap, and verify the key did not
80/// previously exist in the map, and the value was inserted.
81static void InsertNewValueIntoMap(ValueToValueMapTy &VM, Value *K, Value *V) {
82  bool Inserted = VM.insert({K, V}).second;
83  assert(Inserted);
84  (void)Inserted;
85}
86/// RewriteUsesOfClonedInstructions - We just cloned the instructions from the
87/// old header into the preheader.  If there were uses of the values produced by
88/// these instruction that were outside of the loop, we have to insert PHI nodes
89/// to merge the two values.  Do this now.
90static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader,
91                                            BasicBlock *OrigPreheader,
92                                            ValueToValueMapTy &ValueMap,
93                                SmallVectorImpl<PHINode*> *InsertedPHIs) {
94  // Remove PHI node entries that are no longer live.
95  BasicBlock::iterator I, E = OrigHeader->end();
96  for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I)
97    PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreheader));
98
99  // Now fix up users of the instructions in OrigHeader, inserting PHI nodes
100  // as necessary.
101  SSAUpdater SSA(InsertedPHIs);
102  for (I = OrigHeader->begin(); I != E; ++I) {
103    Value *OrigHeaderVal = &*I;
104
105    // If there are no uses of the value (e.g. because it returns void), there
106    // is nothing to rewrite.
107    if (OrigHeaderVal->use_empty())
108      continue;
109
110    Value *OrigPreHeaderVal = ValueMap.lookup(OrigHeaderVal);
111
112    // The value now exits in two versions: the initial value in the preheader
113    // and the loop "next" value in the original header.
114    SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName());
115    SSA.AddAvailableValue(OrigHeader, OrigHeaderVal);
116    SSA.AddAvailableValue(OrigPreheader, OrigPreHeaderVal);
117
118    // Visit each use of the OrigHeader instruction.
119    for (Value::use_iterator UI = OrigHeaderVal->use_begin(),
120                             UE = OrigHeaderVal->use_end();
121         UI != UE;) {
122      // Grab the use before incrementing the iterator.
123      Use &U = *UI;
124
125      // Increment the iterator before removing the use from the list.
126      ++UI;
127
128      // SSAUpdater can't handle a non-PHI use in the same block as an
129      // earlier def. We can easily handle those cases manually.
130      Instruction *UserInst = cast<Instruction>(U.getUser());
131      if (!isa<PHINode>(UserInst)) {
132        BasicBlock *UserBB = UserInst->getParent();
133
134        // The original users in the OrigHeader are already using the
135        // original definitions.
136        if (UserBB == OrigHeader)
137          continue;
138
139        // Users in the OrigPreHeader need to use the value to which the
140        // original definitions are mapped.
141        if (UserBB == OrigPreheader) {
142          U = OrigPreHeaderVal;
143          continue;
144        }
145      }
146
147      // Anything else can be handled by SSAUpdater.
148      SSA.RewriteUse(U);
149    }
150
151    // Replace MetadataAsValue(ValueAsMetadata(OrigHeaderVal)) uses in debug
152    // intrinsics.
153    SmallVector<DbgValueInst *, 1> DbgValues;
154    llvm::findDbgValues(DbgValues, OrigHeaderVal);
155    for (auto &DbgValue : DbgValues) {
156      // The original users in the OrigHeader are already using the original
157      // definitions.
158      BasicBlock *UserBB = DbgValue->getParent();
159      if (UserBB == OrigHeader)
160        continue;
161
162      // Users in the OrigPreHeader need to use the value to which the
163      // original definitions are mapped and anything else can be handled by
164      // the SSAUpdater. To avoid adding PHINodes, check if the value is
165      // available in UserBB, if not substitute undef.
166      Value *NewVal;
167      if (UserBB == OrigPreheader)
168        NewVal = OrigPreHeaderVal;
169      else if (SSA.HasValueForBlock(UserBB))
170        NewVal = SSA.GetValueInMiddleOfBlock(UserBB);
171      else
172        NewVal = UndefValue::get(OrigHeaderVal->getType());
173      DbgValue->setOperand(0,
174                           MetadataAsValue::get(OrigHeaderVal->getContext(),
175                                                ValueAsMetadata::get(NewVal)));
176    }
177  }
178}
179
180// Look for a phi which is only used outside the loop (via a LCSSA phi)
181// in the exit from the header. This means that rotating the loop can
182// remove the phi.
183static bool shouldRotateLoopExitingLatch(Loop *L) {
184  BasicBlock *Header = L->getHeader();
185  BasicBlock *HeaderExit = Header->getTerminator()->getSuccessor(0);
186  if (L->contains(HeaderExit))
187    HeaderExit = Header->getTerminator()->getSuccessor(1);
188
189  for (auto &Phi : Header->phis()) {
190    // Look for uses of this phi in the loop/via exits other than the header.
191    if (llvm::any_of(Phi.users(), [HeaderExit](const User *U) {
192          return cast<Instruction>(U)->getParent() != HeaderExit;
193        }))
194      continue;
195    return true;
196  }
197
198  return false;
199}
200
201/// Rotate loop LP. Return true if the loop is rotated.
202///
203/// \param SimplifiedLatch is true if the latch was just folded into the final
204/// loop exit. In this case we may want to rotate even though the new latch is
205/// now an exiting branch. This rotation would have happened had the latch not
206/// been simplified. However, if SimplifiedLatch is false, then we avoid
207/// rotating loops in which the latch exits to avoid excessive or endless
208/// rotation. LoopRotate should be repeatable and converge to a canonical
209/// form. This property is satisfied because simplifying the loop latch can only
210/// happen once across multiple invocations of the LoopRotate pass.
211bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
212  // If the loop has only one block then there is not much to rotate.
213  if (L->getBlocks().size() == 1)
214    return false;
215
216  BasicBlock *OrigHeader = L->getHeader();
217  BasicBlock *OrigLatch = L->getLoopLatch();
218
219  BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator());
220  if (!BI || BI->isUnconditional())
221    return false;
222
223  // If the loop header is not one of the loop exiting blocks then
224  // either this loop is already rotated or it is not
225  // suitable for loop rotation transformations.
226  if (!L->isLoopExiting(OrigHeader))
227    return false;
228
229  // If the loop latch already contains a branch that leaves the loop then the
230  // loop is already rotated.
231  if (!OrigLatch)
232    return false;
233
234  // Rotate if either the loop latch does *not* exit the loop, or if the loop
235  // latch was just simplified. Or if we think it will be profitable.
236  if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch && IsUtilMode == false &&
237      !shouldRotateLoopExitingLatch(L))
238    return false;
239
240  // Check size of original header and reject loop if it is very big or we can't
241  // duplicate blocks inside it.
242  {
243    SmallPtrSet<const Value *, 32> EphValues;
244    CodeMetrics::collectEphemeralValues(L, AC, EphValues);
245
246    CodeMetrics Metrics;
247    Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues);
248    if (Metrics.notDuplicatable) {
249      LLVM_DEBUG(
250          dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable"
251                 << " instructions: ";
252          L->dump());
253      return false;
254    }
255    if (Metrics.convergent) {
256      LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains convergent "
257                           "instructions: ";
258                 L->dump());
259      return false;
260    }
261    if (Metrics.NumInsts > MaxHeaderSize)
262      return false;
263  }
264
265  // Now, this loop is suitable for rotation.
266  BasicBlock *OrigPreheader = L->getLoopPreheader();
267
268  // If the loop could not be converted to canonical form, it must have an
269  // indirectbr in it, just give up.
270  if (!OrigPreheader || !L->hasDedicatedExits())
271    return false;
272
273  // Anything ScalarEvolution may know about this loop or the PHI nodes
274  // in its header will soon be invalidated. We should also invalidate
275  // all outer loops because insertion and deletion of blocks that happens
276  // during the rotation may violate invariants related to backedge taken
277  // infos in them.
278  if (SE)
279    SE->forgetTopmostLoop(L);
280
281  LLVM_DEBUG(dbgs() << "LoopRotation: rotating "; L->dump());
282  if (MSSAU && VerifyMemorySSA)
283    MSSAU->getMemorySSA()->verifyMemorySSA();
284
285  // Find new Loop header. NewHeader is a Header's one and only successor
286  // that is inside loop.  Header's other successor is outside the
287  // loop.  Otherwise loop is not suitable for rotation.
288  BasicBlock *Exit = BI->getSuccessor(0);
289  BasicBlock *NewHeader = BI->getSuccessor(1);
290  if (L->contains(Exit))
291    std::swap(Exit, NewHeader);
292  assert(NewHeader && "Unable to determine new loop header");
293  assert(L->contains(NewHeader) && !L->contains(Exit) &&
294         "Unable to determine loop header and exit blocks");
295
296  // This code assumes that the new header has exactly one predecessor.
297  // Remove any single-entry PHI nodes in it.
298  assert(NewHeader->getSinglePredecessor() &&
299         "New header doesn't have one pred!");
300  FoldSingleEntryPHINodes(NewHeader);
301
302  // Begin by walking OrigHeader and populating ValueMap with an entry for
303  // each Instruction.
304  BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end();
305  ValueToValueMapTy ValueMap, ValueMapMSSA;
306
307  // For PHI nodes, the value available in OldPreHeader is just the
308  // incoming value from OldPreHeader.
309  for (; PHINode *PN = dyn_cast<PHINode>(I); ++I)
310    InsertNewValueIntoMap(ValueMap, PN,
311                          PN->getIncomingValueForBlock(OrigPreheader));
312
313  // For the rest of the instructions, either hoist to the OrigPreheader if
314  // possible or create a clone in the OldPreHeader if not.
315  Instruction *LoopEntryBranch = OrigPreheader->getTerminator();
316
317  // Record all debug intrinsics preceding LoopEntryBranch to avoid duplication.
318  using DbgIntrinsicHash =
319      std::pair<std::pair<Value *, DILocalVariable *>, DIExpression *>;
320  auto makeHash = [](DbgVariableIntrinsic *D) -> DbgIntrinsicHash {
321    return {{D->getVariableLocation(), D->getVariable()}, D->getExpression()};
322  };
323  SmallDenseSet<DbgIntrinsicHash, 8> DbgIntrinsics;
324  for (auto I = std::next(OrigPreheader->rbegin()), E = OrigPreheader->rend();
325       I != E; ++I) {
326    if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&*I))
327      DbgIntrinsics.insert(makeHash(DII));
328    else
329      break;
330  }
331
332  while (I != E) {
333    Instruction *Inst = &*I++;
334
335    // If the instruction's operands are invariant and it doesn't read or write
336    // memory, then it is safe to hoist.  Doing this doesn't change the order of
337    // execution in the preheader, but does prevent the instruction from
338    // executing in each iteration of the loop.  This means it is safe to hoist
339    // something that might trap, but isn't safe to hoist something that reads
340    // memory (without proving that the loop doesn't write).
341    if (L->hasLoopInvariantOperands(Inst) && !Inst->mayReadFromMemory() &&
342        !Inst->mayWriteToMemory() && !Inst->isTerminator() &&
343        !isa<DbgInfoIntrinsic>(Inst) && !isa<AllocaInst>(Inst)) {
344      Inst->moveBefore(LoopEntryBranch);
345      continue;
346    }
347
348    // Otherwise, create a duplicate of the instruction.
349    Instruction *C = Inst->clone();
350
351    // Eagerly remap the operands of the instruction.
352    RemapInstruction(C, ValueMap,
353                     RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
354
355    // Avoid inserting the same intrinsic twice.
356    if (auto *DII = dyn_cast<DbgVariableIntrinsic>(C))
357      if (DbgIntrinsics.count(makeHash(DII))) {
358        C->deleteValue();
359        continue;
360      }
361
362    // With the operands remapped, see if the instruction constant folds or is
363    // otherwise simplifyable.  This commonly occurs because the entry from PHI
364    // nodes allows icmps and other instructions to fold.
365    Value *V = SimplifyInstruction(C, SQ);
366    if (V && LI->replacementPreservesLCSSAForm(C, V)) {
367      // If so, then delete the temporary instruction and stick the folded value
368      // in the map.
369      InsertNewValueIntoMap(ValueMap, Inst, V);
370      if (!C->mayHaveSideEffects()) {
371        C->deleteValue();
372        C = nullptr;
373      }
374    } else {
375      InsertNewValueIntoMap(ValueMap, Inst, C);
376    }
377    if (C) {
378      // Otherwise, stick the new instruction into the new block!
379      C->setName(Inst->getName());
380      C->insertBefore(LoopEntryBranch);
381
382      if (auto *II = dyn_cast<IntrinsicInst>(C))
383        if (II->getIntrinsicID() == Intrinsic::assume)
384          AC->registerAssumption(II);
385      // MemorySSA cares whether the cloned instruction was inserted or not, and
386      // not whether it can be remapped to a simplified value.
387      if (MSSAU)
388        InsertNewValueIntoMap(ValueMapMSSA, Inst, C);
389    }
390  }
391
392  // Along with all the other instructions, we just cloned OrigHeader's
393  // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's
394  // successors by duplicating their incoming values for OrigHeader.
395  for (BasicBlock *SuccBB : successors(OrigHeader))
396    for (BasicBlock::iterator BI = SuccBB->begin();
397         PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
398      PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader);
399
400  // Now that OrigPreHeader has a clone of OrigHeader's terminator, remove
401  // OrigPreHeader's old terminator (the original branch into the loop), and
402  // remove the corresponding incoming values from the PHI nodes in OrigHeader.
403  LoopEntryBranch->eraseFromParent();
404
405  // Update MemorySSA before the rewrite call below changes the 1:1
406  // instruction:cloned_instruction_or_value mapping.
407  if (MSSAU) {
408    InsertNewValueIntoMap(ValueMapMSSA, OrigHeader, OrigPreheader);
409    MSSAU->updateForClonedBlockIntoPred(OrigHeader, OrigPreheader,
410                                        ValueMapMSSA);
411  }
412
413  SmallVector<PHINode*, 2> InsertedPHIs;
414  // If there were any uses of instructions in the duplicated block outside the
415  // loop, update them, inserting PHI nodes as required
416  RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap,
417                                  &InsertedPHIs);
418
419  // Attach dbg.value intrinsics to the new phis if that phi uses a value that
420  // previously had debug metadata attached. This keeps the debug info
421  // up-to-date in the loop body.
422  if (!InsertedPHIs.empty())
423    insertDebugValuesForPHIs(OrigHeader, InsertedPHIs);
424
425  // NewHeader is now the header of the loop.
426  L->moveToHeader(NewHeader);
427  assert(L->getHeader() == NewHeader && "Latch block is our new header");
428
429  // Inform DT about changes to the CFG.
430  if (DT) {
431    // The OrigPreheader branches to the NewHeader and Exit now. Then, inform
432    // the DT about the removed edge to the OrigHeader (that got removed).
433    SmallVector<DominatorTree::UpdateType, 3> Updates;
434    Updates.push_back({DominatorTree::Insert, OrigPreheader, Exit});
435    Updates.push_back({DominatorTree::Insert, OrigPreheader, NewHeader});
436    Updates.push_back({DominatorTree::Delete, OrigPreheader, OrigHeader});
437    DT->applyUpdates(Updates);
438
439    if (MSSAU) {
440      MSSAU->applyUpdates(Updates, *DT);
441      if (VerifyMemorySSA)
442        MSSAU->getMemorySSA()->verifyMemorySSA();
443    }
444  }
445
446  // At this point, we've finished our major CFG changes.  As part of cloning
447  // the loop into the preheader we've simplified instructions and the
448  // duplicated conditional branch may now be branching on a constant.  If it is
449  // branching on a constant and if that constant means that we enter the loop,
450  // then we fold away the cond branch to an uncond branch.  This simplifies the
451  // loop in cases important for nested loops, and it also means we don't have
452  // to split as many edges.
453  BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator());
454  assert(PHBI->isConditional() && "Should be clone of BI condbr!");
455  if (!isa<ConstantInt>(PHBI->getCondition()) ||
456      PHBI->getSuccessor(cast<ConstantInt>(PHBI->getCondition())->isZero()) !=
457          NewHeader) {
458    // The conditional branch can't be folded, handle the general case.
459    // Split edges as necessary to preserve LoopSimplify form.
460
461    // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and
462    // thus is not a preheader anymore.
463    // Split the edge to form a real preheader.
464    BasicBlock *NewPH = SplitCriticalEdge(
465        OrigPreheader, NewHeader,
466        CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA());
467    NewPH->setName(NewHeader->getName() + ".lr.ph");
468
469    // Preserve canonical loop form, which means that 'Exit' should have only
470    // one predecessor. Note that Exit could be an exit block for multiple
471    // nested loops, causing both of the edges to now be critical and need to
472    // be split.
473    SmallVector<BasicBlock *, 4> ExitPreds(pred_begin(Exit), pred_end(Exit));
474    bool SplitLatchEdge = false;
475    for (BasicBlock *ExitPred : ExitPreds) {
476      // We only need to split loop exit edges.
477      Loop *PredLoop = LI->getLoopFor(ExitPred);
478      if (!PredLoop || PredLoop->contains(Exit) ||
479          ExitPred->getTerminator()->isIndirectTerminator())
480        continue;
481      SplitLatchEdge |= L->getLoopLatch() == ExitPred;
482      BasicBlock *ExitSplit = SplitCriticalEdge(
483          ExitPred, Exit,
484          CriticalEdgeSplittingOptions(DT, LI, MSSAU).setPreserveLCSSA());
485      ExitSplit->moveBefore(Exit);
486    }
487    assert(SplitLatchEdge &&
488           "Despite splitting all preds, failed to split latch exit?");
489  } else {
490    // We can fold the conditional branch in the preheader, this makes things
491    // simpler. The first step is to remove the extra edge to the Exit block.
492    Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/);
493    BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI);
494    NewBI->setDebugLoc(PHBI->getDebugLoc());
495    PHBI->eraseFromParent();
496
497    // With our CFG finalized, update DomTree if it is available.
498    if (DT) DT->deleteEdge(OrigPreheader, Exit);
499
500    // Update MSSA too, if available.
501    if (MSSAU)
502      MSSAU->removeEdge(OrigPreheader, Exit);
503  }
504
505  assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation");
506  assert(L->getLoopLatch() && "Invalid loop latch after loop rotation");
507
508  if (MSSAU && VerifyMemorySSA)
509    MSSAU->getMemorySSA()->verifyMemorySSA();
510
511  // Now that the CFG and DomTree are in a consistent state again, try to merge
512  // the OrigHeader block into OrigLatch.  This will succeed if they are
513  // connected by an unconditional branch.  This is just a cleanup so the
514  // emitted code isn't too gross in this common case.
515  DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
516  MergeBlockIntoPredecessor(OrigHeader, &DTU, LI, MSSAU);
517
518  if (MSSAU && VerifyMemorySSA)
519    MSSAU->getMemorySSA()->verifyMemorySSA();
520
521  LLVM_DEBUG(dbgs() << "LoopRotation: into "; L->dump());
522
523  ++NumRotated;
524  return true;
525}
526
527/// Determine whether the instructions in this range may be safely and cheaply
528/// speculated. This is not an important enough situation to develop complex
529/// heuristics. We handle a single arithmetic instruction along with any type
530/// conversions.
531static bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
532                                  BasicBlock::iterator End, Loop *L) {
533  bool seenIncrement = false;
534  bool MultiExitLoop = false;
535
536  if (!L->getExitingBlock())
537    MultiExitLoop = true;
538
539  for (BasicBlock::iterator I = Begin; I != End; ++I) {
540
541    if (!isSafeToSpeculativelyExecute(&*I))
542      return false;
543
544    if (isa<DbgInfoIntrinsic>(I))
545      continue;
546
547    switch (I->getOpcode()) {
548    default:
549      return false;
550    case Instruction::GetElementPtr:
551      // GEPs are cheap if all indices are constant.
552      if (!cast<GEPOperator>(I)->hasAllConstantIndices())
553        return false;
554      // fall-thru to increment case
555      LLVM_FALLTHROUGH;
556    case Instruction::Add:
557    case Instruction::Sub:
558    case Instruction::And:
559    case Instruction::Or:
560    case Instruction::Xor:
561    case Instruction::Shl:
562    case Instruction::LShr:
563    case Instruction::AShr: {
564      Value *IVOpnd =
565          !isa<Constant>(I->getOperand(0))
566              ? I->getOperand(0)
567              : !isa<Constant>(I->getOperand(1)) ? I->getOperand(1) : nullptr;
568      if (!IVOpnd)
569        return false;
570
571      // If increment operand is used outside of the loop, this speculation
572      // could cause extra live range interference.
573      if (MultiExitLoop) {
574        for (User *UseI : IVOpnd->users()) {
575          auto *UserInst = cast<Instruction>(UseI);
576          if (!L->contains(UserInst))
577            return false;
578        }
579      }
580
581      if (seenIncrement)
582        return false;
583      seenIncrement = true;
584      break;
585    }
586    case Instruction::Trunc:
587    case Instruction::ZExt:
588    case Instruction::SExt:
589      // ignore type conversions
590      break;
591    }
592  }
593  return true;
594}
595
596/// Fold the loop tail into the loop exit by speculating the loop tail
597/// instructions. Typically, this is a single post-increment. In the case of a
598/// simple 2-block loop, hoisting the increment can be much better than
599/// duplicating the entire loop header. In the case of loops with early exits,
600/// rotation will not work anyway, but simplifyLoopLatch will put the loop in
601/// canonical form so downstream passes can handle it.
602///
603/// I don't believe this invalidates SCEV.
604bool LoopRotate::simplifyLoopLatch(Loop *L) {
605  BasicBlock *Latch = L->getLoopLatch();
606  if (!Latch || Latch->hasAddressTaken())
607    return false;
608
609  BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator());
610  if (!Jmp || !Jmp->isUnconditional())
611    return false;
612
613  BasicBlock *LastExit = Latch->getSinglePredecessor();
614  if (!LastExit || !L->isLoopExiting(LastExit))
615    return false;
616
617  BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator());
618  if (!BI)
619    return false;
620
621  if (!shouldSpeculateInstrs(Latch->begin(), Jmp->getIterator(), L))
622    return false;
623
624  LLVM_DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into "
625                    << LastExit->getName() << "\n");
626
627  DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
628  MergeBlockIntoPredecessor(Latch, &DTU, LI, MSSAU, nullptr,
629                            /*PredecessorWithTwoSuccessors=*/true);
630
631  if (MSSAU && VerifyMemorySSA)
632    MSSAU->getMemorySSA()->verifyMemorySSA();
633
634  return true;
635}
636
637/// Rotate \c L, and return true if any modification was made.
638bool LoopRotate::processLoop(Loop *L) {
639  // Save the loop metadata.
640  MDNode *LoopMD = L->getLoopID();
641
642  bool SimplifiedLatch = false;
643
644  // Simplify the loop latch before attempting to rotate the header
645  // upward. Rotation may not be needed if the loop tail can be folded into the
646  // loop exit.
647  if (!RotationOnly)
648    SimplifiedLatch = simplifyLoopLatch(L);
649
650  bool MadeChange = rotateLoop(L, SimplifiedLatch);
651  assert((!MadeChange || L->isLoopExiting(L->getLoopLatch())) &&
652         "Loop latch should be exiting after loop-rotate.");
653
654  // Restore the loop metadata.
655  // NB! We presume LoopRotation DOESN'T ADD its own metadata.
656  if ((MadeChange || SimplifiedLatch) && LoopMD)
657    L->setLoopID(LoopMD);
658
659  return MadeChange || SimplifiedLatch;
660}
661
662
663/// The utility to convert a loop into a loop with bottom test.
664bool llvm::LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI,
665                        AssumptionCache *AC, DominatorTree *DT,
666                        ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
667                        const SimplifyQuery &SQ, bool RotationOnly = true,
668                        unsigned Threshold = unsigned(-1),
669                        bool IsUtilMode = true) {
670  if (MSSAU && VerifyMemorySSA)
671    MSSAU->getMemorySSA()->verifyMemorySSA();
672  LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, MSSAU, SQ, RotationOnly,
673                IsUtilMode);
674  if (MSSAU && VerifyMemorySSA)
675    MSSAU->getMemorySSA()->verifyMemorySSA();
676
677  return LR.processLoop(L);
678}
679