1327952Sdim//===- BypassSlowDivision.cpp - Bypass slow division ----------------------===//
2243789Sdim//
3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim// See https://llvm.org/LICENSE.txt for license information.
5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6243789Sdim//
7243789Sdim//===----------------------------------------------------------------------===//
8243789Sdim//
9243789Sdim// This file contains an optimization for div and rem on architectures that
10243789Sdim// execute short instructions significantly faster than longer instructions.
11243789Sdim// For example, on Intel Atom 32-bit divides are slow enough that during
12243789Sdim// runtime it is profitable to check the value of the operands, and if they are
13243789Sdim// positive and less than 256 use an unsigned 8-bit divide.
14243789Sdim//
15243789Sdim//===----------------------------------------------------------------------===//
16243789Sdim
17249423Sdim#include "llvm/Transforms/Utils/BypassSlowDivision.h"
18243789Sdim#include "llvm/ADT/DenseMap.h"
19327952Sdim#include "llvm/ADT/None.h"
20327952Sdim#include "llvm/ADT/Optional.h"
21327952Sdim#include "llvm/ADT/STLExtras.h"
22321369Sdim#include "llvm/ADT/SmallPtrSet.h"
23341825Sdim#include "llvm/Transforms/Utils/Local.h"
24321369Sdim#include "llvm/Analysis/ValueTracking.h"
25327952Sdim#include "llvm/IR/BasicBlock.h"
26327952Sdim#include "llvm/IR/Constants.h"
27327952Sdim#include "llvm/IR/DerivedTypes.h"
28249423Sdim#include "llvm/IR/Function.h"
29249423Sdim#include "llvm/IR/IRBuilder.h"
30327952Sdim#include "llvm/IR/Instruction.h"
31249423Sdim#include "llvm/IR/Instructions.h"
32327952Sdim#include "llvm/IR/Module.h"
33327952Sdim#include "llvm/IR/Type.h"
34327952Sdim#include "llvm/IR/Value.h"
35327952Sdim#include "llvm/Support/Casting.h"
36321369Sdim#include "llvm/Support/KnownBits.h"
37327952Sdim#include <cassert>
38327952Sdim#include <cstdint>
39243789Sdim
40243789Sdimusing namespace llvm;
41243789Sdim
42276479Sdim#define DEBUG_TYPE "bypass-slow-division"
43276479Sdim
44243789Sdimnamespace {
45243789Sdim
46321369Sdim  struct QuotRemPair {
47321369Sdim    Value *Quotient;
48321369Sdim    Value *Remainder;
49243789Sdim
50321369Sdim    QuotRemPair(Value *InQuotient, Value *InRemainder)
51321369Sdim        : Quotient(InQuotient), Remainder(InRemainder) {}
52243789Sdim  };
53321369Sdim
54321369Sdim  /// A quotient and remainder, plus a BB from which they logically "originate".
55321369Sdim  /// If you use Quotient or Remainder in a Phi node, you should use BB as its
56321369Sdim  /// corresponding predecessor.
57321369Sdim  struct QuotRemWithBB {
58321369Sdim    BasicBlock *BB = nullptr;
59321369Sdim    Value *Quotient = nullptr;
60321369Sdim    Value *Remainder = nullptr;
61321369Sdim  };
62243789Sdim
63327952Sdimusing DivCacheTy = DenseMap<DivRemMapKey, QuotRemPair>;
64327952Sdimusing BypassWidthsTy = DenseMap<unsigned, unsigned>;
65327952Sdimusing VisitedSetTy = SmallPtrSet<Instruction *, 4>;
66243789Sdim
67321369Sdimenum ValueRange {
68321369Sdim  /// Operand definitely fits into BypassType. No runtime checks are needed.
69321369Sdim  VALRNG_KNOWN_SHORT,
70321369Sdim  /// A runtime check is required, as value range is unknown.
71321369Sdim  VALRNG_UNKNOWN,
72321369Sdim  /// Operand is unlikely to fit into BypassType. The bypassing should be
73321369Sdim  /// disabled.
74321369Sdim  VALRNG_LIKELY_LONG
75321369Sdim};
76243789Sdim
77321369Sdimclass FastDivInsertionTask {
78321369Sdim  bool IsValidTask = false;
79321369Sdim  Instruction *SlowDivOrRem = nullptr;
80321369Sdim  IntegerType *BypassType = nullptr;
81321369Sdim  BasicBlock *MainBB = nullptr;
82321369Sdim
83321369Sdim  bool isHashLikeValue(Value *V, VisitedSetTy &Visited);
84321369Sdim  ValueRange getValueRange(Value *Op, VisitedSetTy &Visited);
85321369Sdim  QuotRemWithBB createSlowBB(BasicBlock *Successor);
86321369Sdim  QuotRemWithBB createFastBB(BasicBlock *Successor);
87321369Sdim  QuotRemPair createDivRemPhiNodes(QuotRemWithBB &LHS, QuotRemWithBB &RHS,
88321369Sdim                                   BasicBlock *PhiBB);
89321369Sdim  Value *insertOperandRuntimeCheck(Value *Op1, Value *Op2);
90321369Sdim  Optional<QuotRemPair> insertFastDivAndRem();
91321369Sdim
92321369Sdim  bool isSignedOp() {
93321369Sdim    return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
94321369Sdim           SlowDivOrRem->getOpcode() == Instruction::SRem;
95321369Sdim  }
96327952Sdim
97321369Sdim  bool isDivisionOp() {
98321369Sdim    return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
99321369Sdim           SlowDivOrRem->getOpcode() == Instruction::UDiv;
100321369Sdim  }
101327952Sdim
102321369Sdim  Type *getSlowType() { return SlowDivOrRem->getType(); }
103321369Sdim
104321369Sdimpublic:
105321369Sdim  FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths);
106327952Sdim
107321369Sdim  Value *getReplacement(DivCacheTy &Cache);
108321369Sdim};
109321369Sdim
110327952Sdim} // end anonymous namespace
111327952Sdim
112321369SdimFastDivInsertionTask::FastDivInsertionTask(Instruction *I,
113321369Sdim                                           const BypassWidthsTy &BypassWidths) {
114321369Sdim  switch (I->getOpcode()) {
115321369Sdim  case Instruction::UDiv:
116321369Sdim  case Instruction::SDiv:
117321369Sdim  case Instruction::URem:
118321369Sdim  case Instruction::SRem:
119321369Sdim    SlowDivOrRem = I;
120321369Sdim    break;
121321369Sdim  default:
122321369Sdim    // I is not a div/rem operation.
123321369Sdim    return;
124321369Sdim  }
125321369Sdim
126321369Sdim  // Skip division on vector types. Only optimize integer instructions.
127321369Sdim  IntegerType *SlowType = dyn_cast<IntegerType>(SlowDivOrRem->getType());
128321369Sdim  if (!SlowType)
129321369Sdim    return;
130321369Sdim
131321369Sdim  // Skip if this bitwidth is not bypassed.
132321369Sdim  auto BI = BypassWidths.find(SlowType->getBitWidth());
133321369Sdim  if (BI == BypassWidths.end())
134321369Sdim    return;
135321369Sdim
136321369Sdim  // Get type for div/rem instruction with bypass bitwidth.
137321369Sdim  IntegerType *BT = IntegerType::get(I->getContext(), BI->second);
138321369Sdim  BypassType = BT;
139321369Sdim
140321369Sdim  // The original basic block.
141321369Sdim  MainBB = I->getParent();
142321369Sdim
143321369Sdim  // The instruction is indeed a slow div or rem operation.
144321369Sdim  IsValidTask = true;
145321369Sdim}
146321369Sdim
147321369Sdim/// Reuses previously-computed dividend or remainder from the current BB if
148321369Sdim/// operands and operation are identical. Otherwise calls insertFastDivAndRem to
149321369Sdim/// perform the optimization and caches the resulting dividend and remainder.
150321369Sdim/// If no replacement can be generated, nullptr is returned.
151321369SdimValue *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) {
152321369Sdim  // First, make sure that the task is valid.
153321369Sdim  if (!IsValidTask)
154321369Sdim    return nullptr;
155321369Sdim
156321369Sdim  // Then, look for a value in Cache.
157321369Sdim  Value *Dividend = SlowDivOrRem->getOperand(0);
158321369Sdim  Value *Divisor = SlowDivOrRem->getOperand(1);
159327952Sdim  DivRemMapKey Key(isSignedOp(), Dividend, Divisor);
160321369Sdim  auto CacheI = Cache.find(Key);
161321369Sdim
162321369Sdim  if (CacheI == Cache.end()) {
163321369Sdim    // If previous instance does not exist, try to insert fast div.
164321369Sdim    Optional<QuotRemPair> OptResult = insertFastDivAndRem();
165321369Sdim    // Bail out if insertFastDivAndRem has failed.
166321369Sdim    if (!OptResult)
167321369Sdim      return nullptr;
168321369Sdim    CacheI = Cache.insert({Key, *OptResult}).first;
169321369Sdim  }
170321369Sdim
171321369Sdim  QuotRemPair &Value = CacheI->second;
172321369Sdim  return isDivisionOp() ? Value.Quotient : Value.Remainder;
173321369Sdim}
174321369Sdim
175341825Sdim/// Check if a value looks like a hash.
176321369Sdim///
177321369Sdim/// The routine is expected to detect values computed using the most common hash
178321369Sdim/// algorithms. Typically, hash computations end with one of the following
179321369Sdim/// instructions:
180321369Sdim///
181321369Sdim/// 1) MUL with a constant wider than BypassType
182321369Sdim/// 2) XOR instruction
183321369Sdim///
184321369Sdim/// And even if we are wrong and the value is not a hash, it is still quite
185321369Sdim/// unlikely that such values will fit into BypassType.
186321369Sdim///
187321369Sdim/// To detect string hash algorithms like FNV we have to look through PHI-nodes.
188321369Sdim/// It is implemented as a depth-first search for values that look neither long
189321369Sdim/// nor hash-like.
190321369Sdimbool FastDivInsertionTask::isHashLikeValue(Value *V, VisitedSetTy &Visited) {
191321369Sdim  Instruction *I = dyn_cast<Instruction>(V);
192321369Sdim  if (!I)
193243789Sdim    return false;
194321369Sdim
195321369Sdim  switch (I->getOpcode()) {
196321369Sdim  case Instruction::Xor:
197321369Sdim    return true;
198321369Sdim  case Instruction::Mul: {
199321369Sdim    // After Constant Hoisting pass, long constants may be represented as
200321369Sdim    // bitcast instructions. As a result, some constants may look like an
201321369Sdim    // instruction at first, and an additional check is necessary to find out if
202321369Sdim    // an operand is actually a constant.
203321369Sdim    Value *Op1 = I->getOperand(1);
204321369Sdim    ConstantInt *C = dyn_cast<ConstantInt>(Op1);
205321369Sdim    if (!C && isa<BitCastInst>(Op1))
206321369Sdim      C = dyn_cast<ConstantInt>(cast<BitCastInst>(Op1)->getOperand(0));
207321369Sdim    return C && C->getValue().getMinSignedBits() > BypassType->getBitWidth();
208243789Sdim  }
209327952Sdim  case Instruction::PHI:
210321369Sdim    // Stop IR traversal in case of a crazy input code. This limits recursion
211321369Sdim    // depth.
212321369Sdim    if (Visited.size() >= 16)
213314564Sdim      return false;
214321369Sdim    // Do not visit nodes that have been visited already. We return true because
215321369Sdim    // it means that we couldn't find any value that doesn't look hash-like.
216321369Sdim    if (Visited.find(I) != Visited.end())
217321369Sdim      return true;
218321369Sdim    Visited.insert(I);
219321369Sdim    return llvm::all_of(cast<PHINode>(I)->incoming_values(), [&](Value *V) {
220321369Sdim      // Ignore undef values as they probably don't affect the division
221321369Sdim      // operands.
222321369Sdim      return getValueRange(V, Visited) == VALRNG_LIKELY_LONG ||
223321369Sdim             isa<UndefValue>(V);
224321369Sdim    });
225321369Sdim  default:
226321369Sdim    return false;
227321369Sdim  }
228321369Sdim}
229314564Sdim
230321369Sdim/// Check if an integer value fits into our bypass type.
231321369SdimValueRange FastDivInsertionTask::getValueRange(Value *V,
232321369Sdim                                               VisitedSetTy &Visited) {
233321369Sdim  unsigned ShortLen = BypassType->getBitWidth();
234321369Sdim  unsigned LongLen = V->getType()->getIntegerBitWidth();
235243789Sdim
236321369Sdim  assert(LongLen > ShortLen && "Value type must be wider than BypassType");
237321369Sdim  unsigned HiBits = LongLen - ShortLen;
238321369Sdim
239321369Sdim  const DataLayout &DL = SlowDivOrRem->getModule()->getDataLayout();
240321369Sdim  KnownBits Known(LongLen);
241321369Sdim
242321369Sdim  computeKnownBits(V, Known, DL);
243321369Sdim
244321369Sdim  if (Known.countMinLeadingZeros() >= HiBits)
245321369Sdim    return VALRNG_KNOWN_SHORT;
246321369Sdim
247321369Sdim  if (Known.countMaxLeadingZeros() < HiBits)
248321369Sdim    return VALRNG_LIKELY_LONG;
249321369Sdim
250321369Sdim  // Long integer divisions are often used in hashtable implementations. It's
251321369Sdim  // not worth bypassing such divisions because hash values are extremely
252321369Sdim  // unlikely to have enough leading zeros. The call below tries to detect
253321369Sdim  // values that are unlikely to fit BypassType (including hashes).
254321369Sdim  if (isHashLikeValue(V, Visited))
255321369Sdim    return VALRNG_LIKELY_LONG;
256321369Sdim
257321369Sdim  return VALRNG_UNKNOWN;
258321369Sdim}
259321369Sdim
260321369Sdim/// Add new basic block for slow div and rem operations and put it before
261321369Sdim/// SuccessorBB.
262321369SdimQuotRemWithBB FastDivInsertionTask::createSlowBB(BasicBlock *SuccessorBB) {
263321369Sdim  QuotRemWithBB DivRemPair;
264321369Sdim  DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
265321369Sdim                                     MainBB->getParent(), SuccessorBB);
266321369Sdim  IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
267321369Sdim
268321369Sdim  Value *Dividend = SlowDivOrRem->getOperand(0);
269321369Sdim  Value *Divisor = SlowDivOrRem->getOperand(1);
270321369Sdim
271321369Sdim  if (isSignedOp()) {
272321369Sdim    DivRemPair.Quotient = Builder.CreateSDiv(Dividend, Divisor);
273321369Sdim    DivRemPair.Remainder = Builder.CreateSRem(Dividend, Divisor);
274243789Sdim  } else {
275321369Sdim    DivRemPair.Quotient = Builder.CreateUDiv(Dividend, Divisor);
276321369Sdim    DivRemPair.Remainder = Builder.CreateURem(Dividend, Divisor);
277243789Sdim  }
278243789Sdim
279321369Sdim  Builder.CreateBr(SuccessorBB);
280321369Sdim  return DivRemPair;
281321369Sdim}
282243789Sdim
283321369Sdim/// Add new basic block for fast div and rem operations and put it before
284321369Sdim/// SuccessorBB.
285321369SdimQuotRemWithBB FastDivInsertionTask::createFastBB(BasicBlock *SuccessorBB) {
286321369Sdim  QuotRemWithBB DivRemPair;
287321369Sdim  DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
288321369Sdim                                     MainBB->getParent(), SuccessorBB);
289321369Sdim  IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
290243789Sdim
291321369Sdim  Value *Dividend = SlowDivOrRem->getOperand(0);
292321369Sdim  Value *Divisor = SlowDivOrRem->getOperand(1);
293321369Sdim  Value *ShortDivisorV =
294321369Sdim      Builder.CreateCast(Instruction::Trunc, Divisor, BypassType);
295321369Sdim  Value *ShortDividendV =
296321369Sdim      Builder.CreateCast(Instruction::Trunc, Dividend, BypassType);
297243789Sdim
298321369Sdim  // udiv/urem because this optimization only handles positive numbers.
299321369Sdim  Value *ShortQV = Builder.CreateUDiv(ShortDividendV, ShortDivisorV);
300321369Sdim  Value *ShortRV = Builder.CreateURem(ShortDividendV, ShortDivisorV);
301321369Sdim  DivRemPair.Quotient =
302321369Sdim      Builder.CreateCast(Instruction::ZExt, ShortQV, getSlowType());
303321369Sdim  DivRemPair.Remainder =
304321369Sdim      Builder.CreateCast(Instruction::ZExt, ShortRV, getSlowType());
305321369Sdim  Builder.CreateBr(SuccessorBB);
306243789Sdim
307321369Sdim  return DivRemPair;
308321369Sdim}
309243789Sdim
310321369Sdim/// Creates Phi nodes for result of Div and Rem.
311321369SdimQuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &LHS,
312321369Sdim                                                       QuotRemWithBB &RHS,
313321369Sdim                                                       BasicBlock *PhiBB) {
314321369Sdim  IRBuilder<> Builder(PhiBB, PhiBB->begin());
315321369Sdim  PHINode *QuoPhi = Builder.CreatePHI(getSlowType(), 2);
316321369Sdim  QuoPhi->addIncoming(LHS.Quotient, LHS.BB);
317321369Sdim  QuoPhi->addIncoming(RHS.Quotient, RHS.BB);
318321369Sdim  PHINode *RemPhi = Builder.CreatePHI(getSlowType(), 2);
319321369Sdim  RemPhi->addIncoming(LHS.Remainder, LHS.BB);
320321369Sdim  RemPhi->addIncoming(RHS.Remainder, RHS.BB);
321321369Sdim  return QuotRemPair(QuoPhi, RemPhi);
322321369Sdim}
323314564Sdim
324321369Sdim/// Creates a runtime check to test whether both the divisor and dividend fit
325321369Sdim/// into BypassType. The check is inserted at the end of MainBB. True return
326321369Sdim/// value means that the operands fit. Either of the operands may be NULL if it
327321369Sdim/// doesn't need a runtime check.
328321369SdimValue *FastDivInsertionTask::insertOperandRuntimeCheck(Value *Op1, Value *Op2) {
329321369Sdim  assert((Op1 || Op2) && "Nothing to check");
330321369Sdim  IRBuilder<> Builder(MainBB, MainBB->end());
331321369Sdim
332314564Sdim  Value *OrV;
333321369Sdim  if (Op1 && Op2)
334321369Sdim    OrV = Builder.CreateOr(Op1, Op2);
335314564Sdim  else
336321369Sdim    OrV = Op1 ? Op1 : Op2;
337314564Sdim
338243789Sdim  // BitMask is inverted to check if the operands are
339243789Sdim  // larger than the bypass type
340243789Sdim  uint64_t BitMask = ~BypassType->getBitMask();
341321369Sdim  Value *AndV = Builder.CreateAnd(OrV, BitMask);
342243789Sdim
343321369Sdim  // Compare operand values
344321369Sdim  Value *ZeroV = ConstantInt::getSigned(getSlowType(), 0);
345321369Sdim  return Builder.CreateICmpEQ(AndV, ZeroV);
346243789Sdim}
347243789Sdim
348321369Sdim/// Substitutes the div/rem instruction with code that checks the value of the
349321369Sdim/// operands and uses a shorter-faster div/rem instruction when possible.
350321369SdimOptional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
351321369Sdim  Value *Dividend = SlowDivOrRem->getOperand(0);
352321369Sdim  Value *Divisor = SlowDivOrRem->getOperand(1);
353243789Sdim
354321369Sdim  VisitedSetTy SetL;
355321369Sdim  ValueRange DividendRange = getValueRange(Dividend, SetL);
356321369Sdim  if (DividendRange == VALRNG_LIKELY_LONG)
357321369Sdim    return None;
358321369Sdim
359321369Sdim  VisitedSetTy SetR;
360321369Sdim  ValueRange DivisorRange = getValueRange(Divisor, SetR);
361321369Sdim  if (DivisorRange == VALRNG_LIKELY_LONG)
362321369Sdim    return None;
363321369Sdim
364321369Sdim  bool DividendShort = (DividendRange == VALRNG_KNOWN_SHORT);
365321369Sdim  bool DivisorShort = (DivisorRange == VALRNG_KNOWN_SHORT);
366321369Sdim
367321369Sdim  if (DividendShort && DivisorShort) {
368321369Sdim    // If both operands are known to be short then just replace the long
369327952Sdim    // division with a short one in-place.  Since we're not introducing control
370327952Sdim    // flow in this case, narrowing the division is always a win, even if the
371327952Sdim    // divisor is a constant (and will later get replaced by a multiplication).
372321369Sdim
373321369Sdim    IRBuilder<> Builder(SlowDivOrRem);
374321369Sdim    Value *TruncDividend = Builder.CreateTrunc(Dividend, BypassType);
375321369Sdim    Value *TruncDivisor = Builder.CreateTrunc(Divisor, BypassType);
376321369Sdim    Value *TruncDiv = Builder.CreateUDiv(TruncDividend, TruncDivisor);
377321369Sdim    Value *TruncRem = Builder.CreateURem(TruncDividend, TruncDivisor);
378321369Sdim    Value *ExtDiv = Builder.CreateZExt(TruncDiv, getSlowType());
379321369Sdim    Value *ExtRem = Builder.CreateZExt(TruncRem, getSlowType());
380321369Sdim    return QuotRemPair(ExtDiv, ExtRem);
381327952Sdim  }
382327952Sdim
383327952Sdim  if (isa<ConstantInt>(Divisor)) {
384327952Sdim    // If the divisor is not a constant, DAGCombiner will convert it to a
385327952Sdim    // multiplication by a magic constant.  It isn't clear if it is worth
386327952Sdim    // introducing control flow to get a narrower multiply.
387327952Sdim    return None;
388327952Sdim  }
389327952Sdim
390341825Sdim  // After Constant Hoisting pass, long constants may be represented as
391341825Sdim  // bitcast instructions. As a result, some constants may look like an
392341825Sdim  // instruction at first, and an additional check is necessary to find out if
393341825Sdim  // an operand is actually a constant.
394341825Sdim  if (auto *BCI = dyn_cast<BitCastInst>(Divisor))
395341825Sdim    if (BCI->getParent() == SlowDivOrRem->getParent() &&
396341825Sdim        isa<ConstantInt>(BCI->getOperand(0)))
397341825Sdim      return None;
398341825Sdim
399327952Sdim  if (DividendShort && !isSignedOp()) {
400321369Sdim    // If the division is unsigned and Dividend is known to be short, then
401321369Sdim    // either
402321369Sdim    // 1) Divisor is less or equal to Dividend, and the result can be computed
403321369Sdim    //    with a short division.
404321369Sdim    // 2) Divisor is greater than Dividend. In this case, no division is needed
405321369Sdim    //    at all: The quotient is 0 and the remainder is equal to Dividend.
406321369Sdim    //
407321369Sdim    // So instead of checking at runtime whether Divisor fits into BypassType,
408321369Sdim    // we emit a runtime check to differentiate between these two cases. This
409321369Sdim    // lets us entirely avoid a long div.
410321369Sdim
411321369Sdim    // Split the basic block before the div/rem.
412321369Sdim    BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
413321369Sdim    // Remove the unconditional branch from MainBB to SuccessorBB.
414321369Sdim    MainBB->getInstList().back().eraseFromParent();
415321369Sdim    QuotRemWithBB Long;
416321369Sdim    Long.BB = MainBB;
417321369Sdim    Long.Quotient = ConstantInt::get(getSlowType(), 0);
418321369Sdim    Long.Remainder = Dividend;
419321369Sdim    QuotRemWithBB Fast = createFastBB(SuccessorBB);
420321369Sdim    QuotRemPair Result = createDivRemPhiNodes(Fast, Long, SuccessorBB);
421321369Sdim    IRBuilder<> Builder(MainBB, MainBB->end());
422321369Sdim    Value *CmpV = Builder.CreateICmpUGE(Dividend, Divisor);
423321369Sdim    Builder.CreateCondBr(CmpV, Fast.BB, SuccessorBB);
424321369Sdim    return Result;
425243789Sdim  } else {
426321369Sdim    // General case. Create both slow and fast div/rem pairs and choose one of
427321369Sdim    // them at runtime.
428321369Sdim
429321369Sdim    // Split the basic block before the div/rem.
430321369Sdim    BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
431321369Sdim    // Remove the unconditional branch from MainBB to SuccessorBB.
432321369Sdim    MainBB->getInstList().back().eraseFromParent();
433321369Sdim    QuotRemWithBB Fast = createFastBB(SuccessorBB);
434321369Sdim    QuotRemWithBB Slow = createSlowBB(SuccessorBB);
435321369Sdim    QuotRemPair Result = createDivRemPhiNodes(Fast, Slow, SuccessorBB);
436321369Sdim    Value *CmpV = insertOperandRuntimeCheck(DividendShort ? nullptr : Dividend,
437321369Sdim                                            DivisorShort ? nullptr : Divisor);
438321369Sdim    IRBuilder<> Builder(MainBB, MainBB->end());
439321369Sdim    Builder.CreateCondBr(CmpV, Fast.BB, Slow.BB);
440321369Sdim    return Result;
441243789Sdim  }
442243789Sdim}
443243789Sdim
444321369Sdim/// This optimization identifies DIV/REM instructions in a BB that can be
445321369Sdim/// profitably bypassed and carried out with a shorter, faster divide.
446321369Sdimbool llvm::bypassSlowDivision(BasicBlock *BB,
447321369Sdim                              const BypassWidthsTy &BypassWidths) {
448321369Sdim  DivCacheTy PerBBDivCache;
449243789Sdim
450243789Sdim  bool MadeChange = false;
451360784Sdim  Instruction *Next = &*BB->begin();
452296417Sdim  while (Next != nullptr) {
453296417Sdim    // We may add instructions immediately after I, but we want to skip over
454296417Sdim    // them.
455360784Sdim    Instruction *I = Next;
456296417Sdim    Next = Next->getNextNode();
457243789Sdim
458360784Sdim    // Ignore dead code to save time and avoid bugs.
459360784Sdim    if (I->hasNUses(0))
460360784Sdim      continue;
461360784Sdim
462321369Sdim    FastDivInsertionTask Task(I, BypassWidths);
463321369Sdim    if (Value *Replacement = Task.getReplacement(PerBBDivCache)) {
464321369Sdim      I->replaceAllUsesWith(Replacement);
465321369Sdim      I->eraseFromParent();
466321369Sdim      MadeChange = true;
467321369Sdim    }
468243789Sdim  }
469243789Sdim
470314564Sdim  // Above we eagerly create divs and rems, as pairs, so that we can efficiently
471314564Sdim  // create divrem machine instructions.  Now erase any unused divs / rems so we
472314564Sdim  // don't leave extra instructions sitting around.
473321369Sdim  for (auto &KV : PerBBDivCache)
474321369Sdim    for (Value *V : {KV.second.Quotient, KV.second.Remainder})
475321369Sdim      RecursivelyDeleteTriviallyDeadInstructions(V);
476314564Sdim
477243789Sdim  return MadeChange;
478243789Sdim}
479