1283625Sdim//===- SpeculativeExecution.cpp ---------------------------------*- C++ -*-===// 2283625Sdim// 3283625Sdim// The LLVM Compiler Infrastructure 4283625Sdim// 5283625Sdim// This file is distributed under the University of Illinois Open Source 6283625Sdim// License. See LICENSE.TXT for details. 7283625Sdim// 8283625Sdim//===----------------------------------------------------------------------===// 9283625Sdim// 10283625Sdim// This pass hoists instructions to enable speculative execution on 11283625Sdim// targets where branches are expensive. This is aimed at GPUs. It 12283625Sdim// currently works on simple if-then and if-then-else 13283625Sdim// patterns. 14283625Sdim// 15283625Sdim// Removing branches is not the only motivation for this 16283625Sdim// pass. E.g. consider this code and assume that there is no 17283625Sdim// addressing mode for multiplying by sizeof(*a): 18283625Sdim// 19283625Sdim// if (b > 0) 20283625Sdim// c = a[i + 1] 21283625Sdim// if (d > 0) 22283625Sdim// e = a[i + 2] 23283625Sdim// 24283625Sdim// turns into 25283625Sdim// 26283625Sdim// p = &a[i + 1]; 27283625Sdim// if (b > 0) 28283625Sdim// c = *p; 29283625Sdim// q = &a[i + 2]; 30283625Sdim// if (d > 0) 31283625Sdim// e = *q; 32283625Sdim// 33283625Sdim// which could later be optimized to 34283625Sdim// 35283625Sdim// r = &a[i]; 36283625Sdim// if (b > 0) 37283625Sdim// c = r[1]; 38283625Sdim// if (d > 0) 39283625Sdim// e = r[2]; 40283625Sdim// 41283625Sdim// Later passes sink back much of the speculated code that did not enable 42283625Sdim// further optimization. 43283625Sdim// 44283625Sdim// This pass is more aggressive than the function SpeculativeyExecuteBB in 45283625Sdim// SimplifyCFG. SimplifyCFG will not speculate if no selects are introduced and 46283625Sdim// it will speculate at most one instruction. It also will not speculate if 47283625Sdim// there is a value defined in the if-block that is only used in the then-block. 48283625Sdim// These restrictions make sense since the speculation in SimplifyCFG seems 49283625Sdim// aimed at introducing cheap selects, while this pass is intended to do more 50283625Sdim// aggressive speculation while counting on later passes to either capitalize on 51283625Sdim// that or clean it up. 52283625Sdim// 53283625Sdim//===----------------------------------------------------------------------===// 54283625Sdim 55283625Sdim#include "llvm/ADT/SmallSet.h" 56283625Sdim#include "llvm/Analysis/TargetTransformInfo.h" 57283625Sdim#include "llvm/Analysis/ValueTracking.h" 58283625Sdim#include "llvm/IR/Instructions.h" 59283625Sdim#include "llvm/IR/Module.h" 60283625Sdim#include "llvm/IR/Operator.h" 61283625Sdim#include "llvm/Support/CommandLine.h" 62283625Sdim#include "llvm/Support/Debug.h" 63283625Sdim 64283625Sdimusing namespace llvm; 65283625Sdim 66283625Sdim#define DEBUG_TYPE "speculative-execution" 67283625Sdim 68283625Sdim// The risk that speculation will not pay off increases with the 69283625Sdim// number of instructions speculated, so we put a limit on that. 70283625Sdimstatic cl::opt<unsigned> SpecExecMaxSpeculationCost( 71283625Sdim "spec-exec-max-speculation-cost", cl::init(7), cl::Hidden, 72283625Sdim cl::desc("Speculative execution is not applied to basic blocks where " 73283625Sdim "the cost of the instructions to speculatively execute " 74283625Sdim "exceeds this limit.")); 75283625Sdim 76283625Sdim// Speculating just a few instructions from a larger block tends not 77283625Sdim// to be profitable and this limit prevents that. A reason for that is 78283625Sdim// that small basic blocks are more likely to be candidates for 79283625Sdim// further optimization. 80283625Sdimstatic cl::opt<unsigned> SpecExecMaxNotHoisted( 81283625Sdim "spec-exec-max-not-hoisted", cl::init(5), cl::Hidden, 82283625Sdim cl::desc("Speculative execution is not applied to basic blocks where the " 83283625Sdim "number of instructions that would not be speculatively executed " 84283625Sdim "exceeds this limit.")); 85283625Sdim 86283625Sdimnamespace { 87283625Sdimclass SpeculativeExecution : public FunctionPass { 88283625Sdim public: 89283625Sdim static char ID; 90283625Sdim SpeculativeExecution(): FunctionPass(ID) {} 91283625Sdim 92283625Sdim void getAnalysisUsage(AnalysisUsage &AU) const override; 93283625Sdim bool runOnFunction(Function &F) override; 94283625Sdim 95283625Sdim private: 96283625Sdim bool runOnBasicBlock(BasicBlock &B); 97283625Sdim bool considerHoistingFromTo(BasicBlock &FromBlock, BasicBlock &ToBlock); 98283625Sdim 99283625Sdim const TargetTransformInfo *TTI = nullptr; 100283625Sdim}; 101283625Sdim} // namespace 102283625Sdim 103283625Sdimchar SpeculativeExecution::ID = 0; 104283625SdimINITIALIZE_PASS_BEGIN(SpeculativeExecution, "speculative-execution", 105283625Sdim "Speculatively execute instructions", false, false) 106283625SdimINITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 107283625SdimINITIALIZE_PASS_END(SpeculativeExecution, "speculative-execution", 108283625Sdim "Speculatively execute instructions", false, false) 109283625Sdim 110283625Sdimvoid SpeculativeExecution::getAnalysisUsage(AnalysisUsage &AU) const { 111283625Sdim AU.addRequired<TargetTransformInfoWrapperPass>(); 112283625Sdim} 113283625Sdim 114283625Sdimbool SpeculativeExecution::runOnFunction(Function &F) { 115283625Sdim if (skipOptnoneFunction(F)) 116283625Sdim return false; 117283625Sdim 118283625Sdim TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 119283625Sdim 120283625Sdim bool Changed = false; 121283625Sdim for (auto& B : F) { 122283625Sdim Changed |= runOnBasicBlock(B); 123283625Sdim } 124283625Sdim return Changed; 125283625Sdim} 126283625Sdim 127283625Sdimbool SpeculativeExecution::runOnBasicBlock(BasicBlock &B) { 128283625Sdim BranchInst *BI = dyn_cast<BranchInst>(B.getTerminator()); 129283625Sdim if (BI == nullptr) 130283625Sdim return false; 131283625Sdim 132283625Sdim if (BI->getNumSuccessors() != 2) 133283625Sdim return false; 134283625Sdim BasicBlock &Succ0 = *BI->getSuccessor(0); 135283625Sdim BasicBlock &Succ1 = *BI->getSuccessor(1); 136283625Sdim 137283625Sdim if (&B == &Succ0 || &B == &Succ1 || &Succ0 == &Succ1) { 138283625Sdim return false; 139283625Sdim } 140283625Sdim 141283625Sdim // Hoist from if-then (triangle). 142283625Sdim if (Succ0.getSinglePredecessor() != nullptr && 143283625Sdim Succ0.getSingleSuccessor() == &Succ1) { 144283625Sdim return considerHoistingFromTo(Succ0, B); 145283625Sdim } 146283625Sdim 147283625Sdim // Hoist from if-else (triangle). 148283625Sdim if (Succ1.getSinglePredecessor() != nullptr && 149283625Sdim Succ1.getSingleSuccessor() == &Succ0) { 150283625Sdim return considerHoistingFromTo(Succ1, B); 151283625Sdim } 152283625Sdim 153283625Sdim // Hoist from if-then-else (diamond), but only if it is equivalent to 154283625Sdim // an if-else or if-then due to one of the branches doing nothing. 155283625Sdim if (Succ0.getSinglePredecessor() != nullptr && 156283625Sdim Succ1.getSinglePredecessor() != nullptr && 157283625Sdim Succ1.getSingleSuccessor() != nullptr && 158283625Sdim Succ1.getSingleSuccessor() != &B && 159283625Sdim Succ1.getSingleSuccessor() == Succ0.getSingleSuccessor()) { 160283625Sdim // If a block has only one instruction, then that is a terminator 161283625Sdim // instruction so that the block does nothing. This does happen. 162283625Sdim if (Succ1.size() == 1) // equivalent to if-then 163283625Sdim return considerHoistingFromTo(Succ0, B); 164283625Sdim if (Succ0.size() == 1) // equivalent to if-else 165283625Sdim return considerHoistingFromTo(Succ1, B); 166283625Sdim } 167283625Sdim 168283625Sdim return false; 169283625Sdim} 170283625Sdim 171283625Sdimstatic unsigned ComputeSpeculationCost(const Instruction *I, 172283625Sdim const TargetTransformInfo &TTI) { 173283625Sdim switch (Operator::getOpcode(I)) { 174283625Sdim case Instruction::GetElementPtr: 175283625Sdim case Instruction::Add: 176283625Sdim case Instruction::Mul: 177283625Sdim case Instruction::And: 178283625Sdim case Instruction::Or: 179283625Sdim case Instruction::Select: 180283625Sdim case Instruction::Shl: 181283625Sdim case Instruction::Sub: 182283625Sdim case Instruction::LShr: 183283625Sdim case Instruction::AShr: 184283625Sdim case Instruction::Xor: 185283625Sdim case Instruction::ZExt: 186283625Sdim case Instruction::SExt: 187283625Sdim return TTI.getUserCost(I); 188283625Sdim 189283625Sdim default: 190283625Sdim return UINT_MAX; // Disallow anything not whitelisted. 191283625Sdim } 192283625Sdim} 193283625Sdim 194283625Sdimbool SpeculativeExecution::considerHoistingFromTo(BasicBlock &FromBlock, 195283625Sdim BasicBlock &ToBlock) { 196283625Sdim SmallSet<const Instruction *, 8> NotHoisted; 197283625Sdim const auto AllPrecedingUsesFromBlockHoisted = [&NotHoisted](User *U) { 198283625Sdim for (Value* V : U->operand_values()) { 199283625Sdim if (Instruction *I = dyn_cast<Instruction>(V)) { 200283625Sdim if (NotHoisted.count(I) > 0) 201283625Sdim return false; 202283625Sdim } 203283625Sdim } 204283625Sdim return true; 205283625Sdim }; 206283625Sdim 207283625Sdim unsigned TotalSpeculationCost = 0; 208283625Sdim for (auto& I : FromBlock) { 209283625Sdim const unsigned Cost = ComputeSpeculationCost(&I, *TTI); 210283625Sdim if (Cost != UINT_MAX && isSafeToSpeculativelyExecute(&I) && 211283625Sdim AllPrecedingUsesFromBlockHoisted(&I)) { 212283625Sdim TotalSpeculationCost += Cost; 213283625Sdim if (TotalSpeculationCost > SpecExecMaxSpeculationCost) 214283625Sdim return false; // too much to hoist 215283625Sdim } else { 216283625Sdim NotHoisted.insert(&I); 217283625Sdim if (NotHoisted.size() > SpecExecMaxNotHoisted) 218283625Sdim return false; // too much left behind 219283625Sdim } 220283625Sdim } 221283625Sdim 222283625Sdim if (TotalSpeculationCost == 0) 223283625Sdim return false; // nothing to hoist 224283625Sdim 225283625Sdim for (auto I = FromBlock.begin(); I != FromBlock.end();) { 226283625Sdim // We have to increment I before moving Current as moving Current 227283625Sdim // changes the list that I is iterating through. 228283625Sdim auto Current = I; 229283625Sdim ++I; 230296417Sdim if (!NotHoisted.count(&*Current)) { 231283625Sdim Current->moveBefore(ToBlock.getTerminator()); 232283625Sdim } 233283625Sdim } 234283625Sdim return true; 235283625Sdim} 236283625Sdim 237283625Sdimnamespace llvm { 238283625Sdim 239283625SdimFunctionPass *createSpeculativeExecutionPass() { 240283625Sdim return new SpeculativeExecution(); 241283625Sdim} 242283625Sdim 243283625Sdim} // namespace llvm 244