1327952Sdim//===- BypassSlowDivision.cpp - Bypass slow division ----------------------===// 2243789Sdim// 3353358Sdim// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim// See https://llvm.org/LICENSE.txt for license information. 5353358Sdim// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6243789Sdim// 7243789Sdim//===----------------------------------------------------------------------===// 8243789Sdim// 9243789Sdim// This file contains an optimization for div and rem on architectures that 10243789Sdim// execute short instructions significantly faster than longer instructions. 11243789Sdim// For example, on Intel Atom 32-bit divides are slow enough that during 12243789Sdim// runtime it is profitable to check the value of the operands, and if they are 13243789Sdim// positive and less than 256 use an unsigned 8-bit divide. 14243789Sdim// 15243789Sdim//===----------------------------------------------------------------------===// 16243789Sdim 17249423Sdim#include "llvm/Transforms/Utils/BypassSlowDivision.h" 18243789Sdim#include "llvm/ADT/DenseMap.h" 19327952Sdim#include "llvm/ADT/None.h" 20327952Sdim#include "llvm/ADT/Optional.h" 21327952Sdim#include "llvm/ADT/STLExtras.h" 22321369Sdim#include "llvm/ADT/SmallPtrSet.h" 23341825Sdim#include "llvm/Transforms/Utils/Local.h" 24321369Sdim#include "llvm/Analysis/ValueTracking.h" 25327952Sdim#include "llvm/IR/BasicBlock.h" 26327952Sdim#include "llvm/IR/Constants.h" 27327952Sdim#include "llvm/IR/DerivedTypes.h" 28249423Sdim#include "llvm/IR/Function.h" 29249423Sdim#include "llvm/IR/IRBuilder.h" 30327952Sdim#include "llvm/IR/Instruction.h" 31249423Sdim#include "llvm/IR/Instructions.h" 32327952Sdim#include "llvm/IR/Module.h" 33327952Sdim#include "llvm/IR/Type.h" 34327952Sdim#include "llvm/IR/Value.h" 35327952Sdim#include "llvm/Support/Casting.h" 36321369Sdim#include "llvm/Support/KnownBits.h" 37327952Sdim#include <cassert> 38327952Sdim#include <cstdint> 39243789Sdim 40243789Sdimusing namespace llvm; 41243789Sdim 42276479Sdim#define DEBUG_TYPE "bypass-slow-division" 43276479Sdim 44243789Sdimnamespace { 45243789Sdim 46321369Sdim struct QuotRemPair { 47321369Sdim Value *Quotient; 48321369Sdim Value *Remainder; 49243789Sdim 50321369Sdim QuotRemPair(Value *InQuotient, Value *InRemainder) 51321369Sdim : Quotient(InQuotient), Remainder(InRemainder) {} 52243789Sdim }; 53321369Sdim 54321369Sdim /// A quotient and remainder, plus a BB from which they logically "originate". 55321369Sdim /// If you use Quotient or Remainder in a Phi node, you should use BB as its 56321369Sdim /// corresponding predecessor. 57321369Sdim struct QuotRemWithBB { 58321369Sdim BasicBlock *BB = nullptr; 59321369Sdim Value *Quotient = nullptr; 60321369Sdim Value *Remainder = nullptr; 61321369Sdim }; 62243789Sdim 63327952Sdimusing DivCacheTy = DenseMap<DivRemMapKey, QuotRemPair>; 64327952Sdimusing BypassWidthsTy = DenseMap<unsigned, unsigned>; 65327952Sdimusing VisitedSetTy = SmallPtrSet<Instruction *, 4>; 66243789Sdim 67321369Sdimenum ValueRange { 68321369Sdim /// Operand definitely fits into BypassType. No runtime checks are needed. 69321369Sdim VALRNG_KNOWN_SHORT, 70321369Sdim /// A runtime check is required, as value range is unknown. 71321369Sdim VALRNG_UNKNOWN, 72321369Sdim /// Operand is unlikely to fit into BypassType. The bypassing should be 73321369Sdim /// disabled. 74321369Sdim VALRNG_LIKELY_LONG 75321369Sdim}; 76243789Sdim 77321369Sdimclass FastDivInsertionTask { 78321369Sdim bool IsValidTask = false; 79321369Sdim Instruction *SlowDivOrRem = nullptr; 80321369Sdim IntegerType *BypassType = nullptr; 81321369Sdim BasicBlock *MainBB = nullptr; 82321369Sdim 83321369Sdim bool isHashLikeValue(Value *V, VisitedSetTy &Visited); 84321369Sdim ValueRange getValueRange(Value *Op, VisitedSetTy &Visited); 85321369Sdim QuotRemWithBB createSlowBB(BasicBlock *Successor); 86321369Sdim QuotRemWithBB createFastBB(BasicBlock *Successor); 87321369Sdim QuotRemPair createDivRemPhiNodes(QuotRemWithBB &LHS, QuotRemWithBB &RHS, 88321369Sdim BasicBlock *PhiBB); 89321369Sdim Value *insertOperandRuntimeCheck(Value *Op1, Value *Op2); 90321369Sdim Optional<QuotRemPair> insertFastDivAndRem(); 91321369Sdim 92321369Sdim bool isSignedOp() { 93321369Sdim return SlowDivOrRem->getOpcode() == Instruction::SDiv || 94321369Sdim SlowDivOrRem->getOpcode() == Instruction::SRem; 95321369Sdim } 96327952Sdim 97321369Sdim bool isDivisionOp() { 98321369Sdim return SlowDivOrRem->getOpcode() == Instruction::SDiv || 99321369Sdim SlowDivOrRem->getOpcode() == Instruction::UDiv; 100321369Sdim } 101327952Sdim 102321369Sdim Type *getSlowType() { return SlowDivOrRem->getType(); } 103321369Sdim 104321369Sdimpublic: 105321369Sdim FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths); 106327952Sdim 107321369Sdim Value *getReplacement(DivCacheTy &Cache); 108321369Sdim}; 109321369Sdim 110327952Sdim} // end anonymous namespace 111327952Sdim 112321369SdimFastDivInsertionTask::FastDivInsertionTask(Instruction *I, 113321369Sdim const BypassWidthsTy &BypassWidths) { 114321369Sdim switch (I->getOpcode()) { 115321369Sdim case Instruction::UDiv: 116321369Sdim case Instruction::SDiv: 117321369Sdim case Instruction::URem: 118321369Sdim case Instruction::SRem: 119321369Sdim SlowDivOrRem = I; 120321369Sdim break; 121321369Sdim default: 122321369Sdim // I is not a div/rem operation. 123321369Sdim return; 124321369Sdim } 125321369Sdim 126321369Sdim // Skip division on vector types. Only optimize integer instructions. 127321369Sdim IntegerType *SlowType = dyn_cast<IntegerType>(SlowDivOrRem->getType()); 128321369Sdim if (!SlowType) 129321369Sdim return; 130321369Sdim 131321369Sdim // Skip if this bitwidth is not bypassed. 132321369Sdim auto BI = BypassWidths.find(SlowType->getBitWidth()); 133321369Sdim if (BI == BypassWidths.end()) 134321369Sdim return; 135321369Sdim 136321369Sdim // Get type for div/rem instruction with bypass bitwidth. 137321369Sdim IntegerType *BT = IntegerType::get(I->getContext(), BI->second); 138321369Sdim BypassType = BT; 139321369Sdim 140321369Sdim // The original basic block. 141321369Sdim MainBB = I->getParent(); 142321369Sdim 143321369Sdim // The instruction is indeed a slow div or rem operation. 144321369Sdim IsValidTask = true; 145321369Sdim} 146321369Sdim 147321369Sdim/// Reuses previously-computed dividend or remainder from the current BB if 148321369Sdim/// operands and operation are identical. Otherwise calls insertFastDivAndRem to 149321369Sdim/// perform the optimization and caches the resulting dividend and remainder. 150321369Sdim/// If no replacement can be generated, nullptr is returned. 151321369SdimValue *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) { 152321369Sdim // First, make sure that the task is valid. 153321369Sdim if (!IsValidTask) 154321369Sdim return nullptr; 155321369Sdim 156321369Sdim // Then, look for a value in Cache. 157321369Sdim Value *Dividend = SlowDivOrRem->getOperand(0); 158321369Sdim Value *Divisor = SlowDivOrRem->getOperand(1); 159327952Sdim DivRemMapKey Key(isSignedOp(), Dividend, Divisor); 160321369Sdim auto CacheI = Cache.find(Key); 161321369Sdim 162321369Sdim if (CacheI == Cache.end()) { 163321369Sdim // If previous instance does not exist, try to insert fast div. 164321369Sdim Optional<QuotRemPair> OptResult = insertFastDivAndRem(); 165321369Sdim // Bail out if insertFastDivAndRem has failed. 166321369Sdim if (!OptResult) 167321369Sdim return nullptr; 168321369Sdim CacheI = Cache.insert({Key, *OptResult}).first; 169321369Sdim } 170321369Sdim 171321369Sdim QuotRemPair &Value = CacheI->second; 172321369Sdim return isDivisionOp() ? Value.Quotient : Value.Remainder; 173321369Sdim} 174321369Sdim 175341825Sdim/// Check if a value looks like a hash. 176321369Sdim/// 177321369Sdim/// The routine is expected to detect values computed using the most common hash 178321369Sdim/// algorithms. Typically, hash computations end with one of the following 179321369Sdim/// instructions: 180321369Sdim/// 181321369Sdim/// 1) MUL with a constant wider than BypassType 182321369Sdim/// 2) XOR instruction 183321369Sdim/// 184321369Sdim/// And even if we are wrong and the value is not a hash, it is still quite 185321369Sdim/// unlikely that such values will fit into BypassType. 186321369Sdim/// 187321369Sdim/// To detect string hash algorithms like FNV we have to look through PHI-nodes. 188321369Sdim/// It is implemented as a depth-first search for values that look neither long 189321369Sdim/// nor hash-like. 190321369Sdimbool FastDivInsertionTask::isHashLikeValue(Value *V, VisitedSetTy &Visited) { 191321369Sdim Instruction *I = dyn_cast<Instruction>(V); 192321369Sdim if (!I) 193243789Sdim return false; 194321369Sdim 195321369Sdim switch (I->getOpcode()) { 196321369Sdim case Instruction::Xor: 197321369Sdim return true; 198321369Sdim case Instruction::Mul: { 199321369Sdim // After Constant Hoisting pass, long constants may be represented as 200321369Sdim // bitcast instructions. As a result, some constants may look like an 201321369Sdim // instruction at first, and an additional check is necessary to find out if 202321369Sdim // an operand is actually a constant. 203321369Sdim Value *Op1 = I->getOperand(1); 204321369Sdim ConstantInt *C = dyn_cast<ConstantInt>(Op1); 205321369Sdim if (!C && isa<BitCastInst>(Op1)) 206321369Sdim C = dyn_cast<ConstantInt>(cast<BitCastInst>(Op1)->getOperand(0)); 207321369Sdim return C && C->getValue().getMinSignedBits() > BypassType->getBitWidth(); 208243789Sdim } 209327952Sdim case Instruction::PHI: 210321369Sdim // Stop IR traversal in case of a crazy input code. This limits recursion 211321369Sdim // depth. 212321369Sdim if (Visited.size() >= 16) 213314564Sdim return false; 214321369Sdim // Do not visit nodes that have been visited already. We return true because 215321369Sdim // it means that we couldn't find any value that doesn't look hash-like. 216321369Sdim if (Visited.find(I) != Visited.end()) 217321369Sdim return true; 218321369Sdim Visited.insert(I); 219321369Sdim return llvm::all_of(cast<PHINode>(I)->incoming_values(), [&](Value *V) { 220321369Sdim // Ignore undef values as they probably don't affect the division 221321369Sdim // operands. 222321369Sdim return getValueRange(V, Visited) == VALRNG_LIKELY_LONG || 223321369Sdim isa<UndefValue>(V); 224321369Sdim }); 225321369Sdim default: 226321369Sdim return false; 227321369Sdim } 228321369Sdim} 229314564Sdim 230321369Sdim/// Check if an integer value fits into our bypass type. 231321369SdimValueRange FastDivInsertionTask::getValueRange(Value *V, 232321369Sdim VisitedSetTy &Visited) { 233321369Sdim unsigned ShortLen = BypassType->getBitWidth(); 234321369Sdim unsigned LongLen = V->getType()->getIntegerBitWidth(); 235243789Sdim 236321369Sdim assert(LongLen > ShortLen && "Value type must be wider than BypassType"); 237321369Sdim unsigned HiBits = LongLen - ShortLen; 238321369Sdim 239321369Sdim const DataLayout &DL = SlowDivOrRem->getModule()->getDataLayout(); 240321369Sdim KnownBits Known(LongLen); 241321369Sdim 242321369Sdim computeKnownBits(V, Known, DL); 243321369Sdim 244321369Sdim if (Known.countMinLeadingZeros() >= HiBits) 245321369Sdim return VALRNG_KNOWN_SHORT; 246321369Sdim 247321369Sdim if (Known.countMaxLeadingZeros() < HiBits) 248321369Sdim return VALRNG_LIKELY_LONG; 249321369Sdim 250321369Sdim // Long integer divisions are often used in hashtable implementations. It's 251321369Sdim // not worth bypassing such divisions because hash values are extremely 252321369Sdim // unlikely to have enough leading zeros. The call below tries to detect 253321369Sdim // values that are unlikely to fit BypassType (including hashes). 254321369Sdim if (isHashLikeValue(V, Visited)) 255321369Sdim return VALRNG_LIKELY_LONG; 256321369Sdim 257321369Sdim return VALRNG_UNKNOWN; 258321369Sdim} 259321369Sdim 260321369Sdim/// Add new basic block for slow div and rem operations and put it before 261321369Sdim/// SuccessorBB. 262321369SdimQuotRemWithBB FastDivInsertionTask::createSlowBB(BasicBlock *SuccessorBB) { 263321369Sdim QuotRemWithBB DivRemPair; 264321369Sdim DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "", 265321369Sdim MainBB->getParent(), SuccessorBB); 266321369Sdim IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin()); 267321369Sdim 268321369Sdim Value *Dividend = SlowDivOrRem->getOperand(0); 269321369Sdim Value *Divisor = SlowDivOrRem->getOperand(1); 270321369Sdim 271321369Sdim if (isSignedOp()) { 272321369Sdim DivRemPair.Quotient = Builder.CreateSDiv(Dividend, Divisor); 273321369Sdim DivRemPair.Remainder = Builder.CreateSRem(Dividend, Divisor); 274243789Sdim } else { 275321369Sdim DivRemPair.Quotient = Builder.CreateUDiv(Dividend, Divisor); 276321369Sdim DivRemPair.Remainder = Builder.CreateURem(Dividend, Divisor); 277243789Sdim } 278243789Sdim 279321369Sdim Builder.CreateBr(SuccessorBB); 280321369Sdim return DivRemPair; 281321369Sdim} 282243789Sdim 283321369Sdim/// Add new basic block for fast div and rem operations and put it before 284321369Sdim/// SuccessorBB. 285321369SdimQuotRemWithBB FastDivInsertionTask::createFastBB(BasicBlock *SuccessorBB) { 286321369Sdim QuotRemWithBB DivRemPair; 287321369Sdim DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "", 288321369Sdim MainBB->getParent(), SuccessorBB); 289321369Sdim IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin()); 290243789Sdim 291321369Sdim Value *Dividend = SlowDivOrRem->getOperand(0); 292321369Sdim Value *Divisor = SlowDivOrRem->getOperand(1); 293321369Sdim Value *ShortDivisorV = 294321369Sdim Builder.CreateCast(Instruction::Trunc, Divisor, BypassType); 295321369Sdim Value *ShortDividendV = 296321369Sdim Builder.CreateCast(Instruction::Trunc, Dividend, BypassType); 297243789Sdim 298321369Sdim // udiv/urem because this optimization only handles positive numbers. 299321369Sdim Value *ShortQV = Builder.CreateUDiv(ShortDividendV, ShortDivisorV); 300321369Sdim Value *ShortRV = Builder.CreateURem(ShortDividendV, ShortDivisorV); 301321369Sdim DivRemPair.Quotient = 302321369Sdim Builder.CreateCast(Instruction::ZExt, ShortQV, getSlowType()); 303321369Sdim DivRemPair.Remainder = 304321369Sdim Builder.CreateCast(Instruction::ZExt, ShortRV, getSlowType()); 305321369Sdim Builder.CreateBr(SuccessorBB); 306243789Sdim 307321369Sdim return DivRemPair; 308321369Sdim} 309243789Sdim 310321369Sdim/// Creates Phi nodes for result of Div and Rem. 311321369SdimQuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &LHS, 312321369Sdim QuotRemWithBB &RHS, 313321369Sdim BasicBlock *PhiBB) { 314321369Sdim IRBuilder<> Builder(PhiBB, PhiBB->begin()); 315321369Sdim PHINode *QuoPhi = Builder.CreatePHI(getSlowType(), 2); 316321369Sdim QuoPhi->addIncoming(LHS.Quotient, LHS.BB); 317321369Sdim QuoPhi->addIncoming(RHS.Quotient, RHS.BB); 318321369Sdim PHINode *RemPhi = Builder.CreatePHI(getSlowType(), 2); 319321369Sdim RemPhi->addIncoming(LHS.Remainder, LHS.BB); 320321369Sdim RemPhi->addIncoming(RHS.Remainder, RHS.BB); 321321369Sdim return QuotRemPair(QuoPhi, RemPhi); 322321369Sdim} 323314564Sdim 324321369Sdim/// Creates a runtime check to test whether both the divisor and dividend fit 325321369Sdim/// into BypassType. The check is inserted at the end of MainBB. True return 326321369Sdim/// value means that the operands fit. Either of the operands may be NULL if it 327321369Sdim/// doesn't need a runtime check. 328321369SdimValue *FastDivInsertionTask::insertOperandRuntimeCheck(Value *Op1, Value *Op2) { 329321369Sdim assert((Op1 || Op2) && "Nothing to check"); 330321369Sdim IRBuilder<> Builder(MainBB, MainBB->end()); 331321369Sdim 332314564Sdim Value *OrV; 333321369Sdim if (Op1 && Op2) 334321369Sdim OrV = Builder.CreateOr(Op1, Op2); 335314564Sdim else 336321369Sdim OrV = Op1 ? Op1 : Op2; 337314564Sdim 338243789Sdim // BitMask is inverted to check if the operands are 339243789Sdim // larger than the bypass type 340243789Sdim uint64_t BitMask = ~BypassType->getBitMask(); 341321369Sdim Value *AndV = Builder.CreateAnd(OrV, BitMask); 342243789Sdim 343321369Sdim // Compare operand values 344321369Sdim Value *ZeroV = ConstantInt::getSigned(getSlowType(), 0); 345321369Sdim return Builder.CreateICmpEQ(AndV, ZeroV); 346243789Sdim} 347243789Sdim 348321369Sdim/// Substitutes the div/rem instruction with code that checks the value of the 349321369Sdim/// operands and uses a shorter-faster div/rem instruction when possible. 350321369SdimOptional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() { 351321369Sdim Value *Dividend = SlowDivOrRem->getOperand(0); 352321369Sdim Value *Divisor = SlowDivOrRem->getOperand(1); 353243789Sdim 354321369Sdim VisitedSetTy SetL; 355321369Sdim ValueRange DividendRange = getValueRange(Dividend, SetL); 356321369Sdim if (DividendRange == VALRNG_LIKELY_LONG) 357321369Sdim return None; 358321369Sdim 359321369Sdim VisitedSetTy SetR; 360321369Sdim ValueRange DivisorRange = getValueRange(Divisor, SetR); 361321369Sdim if (DivisorRange == VALRNG_LIKELY_LONG) 362321369Sdim return None; 363321369Sdim 364321369Sdim bool DividendShort = (DividendRange == VALRNG_KNOWN_SHORT); 365321369Sdim bool DivisorShort = (DivisorRange == VALRNG_KNOWN_SHORT); 366321369Sdim 367321369Sdim if (DividendShort && DivisorShort) { 368321369Sdim // If both operands are known to be short then just replace the long 369327952Sdim // division with a short one in-place. Since we're not introducing control 370327952Sdim // flow in this case, narrowing the division is always a win, even if the 371327952Sdim // divisor is a constant (and will later get replaced by a multiplication). 372321369Sdim 373321369Sdim IRBuilder<> Builder(SlowDivOrRem); 374321369Sdim Value *TruncDividend = Builder.CreateTrunc(Dividend, BypassType); 375321369Sdim Value *TruncDivisor = Builder.CreateTrunc(Divisor, BypassType); 376321369Sdim Value *TruncDiv = Builder.CreateUDiv(TruncDividend, TruncDivisor); 377321369Sdim Value *TruncRem = Builder.CreateURem(TruncDividend, TruncDivisor); 378321369Sdim Value *ExtDiv = Builder.CreateZExt(TruncDiv, getSlowType()); 379321369Sdim Value *ExtRem = Builder.CreateZExt(TruncRem, getSlowType()); 380321369Sdim return QuotRemPair(ExtDiv, ExtRem); 381327952Sdim } 382327952Sdim 383327952Sdim if (isa<ConstantInt>(Divisor)) { 384327952Sdim // If the divisor is not a constant, DAGCombiner will convert it to a 385327952Sdim // multiplication by a magic constant. It isn't clear if it is worth 386327952Sdim // introducing control flow to get a narrower multiply. 387327952Sdim return None; 388327952Sdim } 389327952Sdim 390341825Sdim // After Constant Hoisting pass, long constants may be represented as 391341825Sdim // bitcast instructions. As a result, some constants may look like an 392341825Sdim // instruction at first, and an additional check is necessary to find out if 393341825Sdim // an operand is actually a constant. 394341825Sdim if (auto *BCI = dyn_cast<BitCastInst>(Divisor)) 395341825Sdim if (BCI->getParent() == SlowDivOrRem->getParent() && 396341825Sdim isa<ConstantInt>(BCI->getOperand(0))) 397341825Sdim return None; 398341825Sdim 399327952Sdim if (DividendShort && !isSignedOp()) { 400321369Sdim // If the division is unsigned and Dividend is known to be short, then 401321369Sdim // either 402321369Sdim // 1) Divisor is less or equal to Dividend, and the result can be computed 403321369Sdim // with a short division. 404321369Sdim // 2) Divisor is greater than Dividend. In this case, no division is needed 405321369Sdim // at all: The quotient is 0 and the remainder is equal to Dividend. 406321369Sdim // 407321369Sdim // So instead of checking at runtime whether Divisor fits into BypassType, 408321369Sdim // we emit a runtime check to differentiate between these two cases. This 409321369Sdim // lets us entirely avoid a long div. 410321369Sdim 411321369Sdim // Split the basic block before the div/rem. 412321369Sdim BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem); 413321369Sdim // Remove the unconditional branch from MainBB to SuccessorBB. 414321369Sdim MainBB->getInstList().back().eraseFromParent(); 415321369Sdim QuotRemWithBB Long; 416321369Sdim Long.BB = MainBB; 417321369Sdim Long.Quotient = ConstantInt::get(getSlowType(), 0); 418321369Sdim Long.Remainder = Dividend; 419321369Sdim QuotRemWithBB Fast = createFastBB(SuccessorBB); 420321369Sdim QuotRemPair Result = createDivRemPhiNodes(Fast, Long, SuccessorBB); 421321369Sdim IRBuilder<> Builder(MainBB, MainBB->end()); 422321369Sdim Value *CmpV = Builder.CreateICmpUGE(Dividend, Divisor); 423321369Sdim Builder.CreateCondBr(CmpV, Fast.BB, SuccessorBB); 424321369Sdim return Result; 425243789Sdim } else { 426321369Sdim // General case. Create both slow and fast div/rem pairs and choose one of 427321369Sdim // them at runtime. 428321369Sdim 429321369Sdim // Split the basic block before the div/rem. 430321369Sdim BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem); 431321369Sdim // Remove the unconditional branch from MainBB to SuccessorBB. 432321369Sdim MainBB->getInstList().back().eraseFromParent(); 433321369Sdim QuotRemWithBB Fast = createFastBB(SuccessorBB); 434321369Sdim QuotRemWithBB Slow = createSlowBB(SuccessorBB); 435321369Sdim QuotRemPair Result = createDivRemPhiNodes(Fast, Slow, SuccessorBB); 436321369Sdim Value *CmpV = insertOperandRuntimeCheck(DividendShort ? nullptr : Dividend, 437321369Sdim DivisorShort ? nullptr : Divisor); 438321369Sdim IRBuilder<> Builder(MainBB, MainBB->end()); 439321369Sdim Builder.CreateCondBr(CmpV, Fast.BB, Slow.BB); 440321369Sdim return Result; 441243789Sdim } 442243789Sdim} 443243789Sdim 444321369Sdim/// This optimization identifies DIV/REM instructions in a BB that can be 445321369Sdim/// profitably bypassed and carried out with a shorter, faster divide. 446321369Sdimbool llvm::bypassSlowDivision(BasicBlock *BB, 447321369Sdim const BypassWidthsTy &BypassWidths) { 448321369Sdim DivCacheTy PerBBDivCache; 449243789Sdim 450243789Sdim bool MadeChange = false; 451360784Sdim Instruction *Next = &*BB->begin(); 452296417Sdim while (Next != nullptr) { 453296417Sdim // We may add instructions immediately after I, but we want to skip over 454296417Sdim // them. 455360784Sdim Instruction *I = Next; 456296417Sdim Next = Next->getNextNode(); 457243789Sdim 458360784Sdim // Ignore dead code to save time and avoid bugs. 459360784Sdim if (I->hasNUses(0)) 460360784Sdim continue; 461360784Sdim 462321369Sdim FastDivInsertionTask Task(I, BypassWidths); 463321369Sdim if (Value *Replacement = Task.getReplacement(PerBBDivCache)) { 464321369Sdim I->replaceAllUsesWith(Replacement); 465321369Sdim I->eraseFromParent(); 466321369Sdim MadeChange = true; 467321369Sdim } 468243789Sdim } 469243789Sdim 470314564Sdim // Above we eagerly create divs and rems, as pairs, so that we can efficiently 471314564Sdim // create divrem machine instructions. Now erase any unused divs / rems so we 472314564Sdim // don't leave extra instructions sitting around. 473321369Sdim for (auto &KV : PerBBDivCache) 474321369Sdim for (Value *V : {KV.second.Quotient, KV.second.Remainder}) 475321369Sdim RecursivelyDeleteTriviallyDeadInstructions(V); 476314564Sdim 477243789Sdim return MadeChange; 478243789Sdim} 479