CodeExtractor.cpp revision 341825
1193323Sed//===- CodeExtractor.cpp - Pull code region into a new function -----------===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This file implements the interface to tear out a code region, such as an 11193323Sed// individual loop or a parallel section, into a new function, replacing it with 12193323Sed// a call to the new function. 13193323Sed// 14193323Sed//===----------------------------------------------------------------------===// 15193323Sed 16239462Sdim#include "llvm/Transforms/Utils/CodeExtractor.h" 17327952Sdim#include "llvm/ADT/ArrayRef.h" 18327952Sdim#include "llvm/ADT/DenseMap.h" 19327952Sdim#include "llvm/ADT/Optional.h" 20276479Sdim#include "llvm/ADT/STLExtras.h" 21249423Sdim#include "llvm/ADT/SetVector.h" 22327952Sdim#include "llvm/ADT/SmallPtrSet.h" 23327952Sdim#include "llvm/ADT/SmallVector.h" 24314564Sdim#include "llvm/Analysis/BlockFrequencyInfo.h" 25314564Sdim#include "llvm/Analysis/BlockFrequencyInfoImpl.h" 26314564Sdim#include "llvm/Analysis/BranchProbabilityInfo.h" 27193323Sed#include "llvm/Analysis/LoopInfo.h" 28327952Sdim#include "llvm/IR/Argument.h" 29327952Sdim#include "llvm/IR/Attributes.h" 30327952Sdim#include "llvm/IR/BasicBlock.h" 31327952Sdim#include "llvm/IR/CFG.h" 32327952Sdim#include "llvm/IR/Constant.h" 33249423Sdim#include "llvm/IR/Constants.h" 34327952Sdim#include "llvm/IR/DataLayout.h" 35249423Sdim#include "llvm/IR/DerivedTypes.h" 36276479Sdim#include "llvm/IR/Dominators.h" 37327952Sdim#include "llvm/IR/Function.h" 38327952Sdim#include "llvm/IR/GlobalValue.h" 39327952Sdim#include "llvm/IR/InstrTypes.h" 40327952Sdim#include "llvm/IR/Instruction.h" 41249423Sdim#include "llvm/IR/Instructions.h" 42321369Sdim#include "llvm/IR/IntrinsicInst.h" 43249423Sdim#include "llvm/IR/Intrinsics.h" 44249423Sdim#include "llvm/IR/LLVMContext.h" 45314564Sdim#include "llvm/IR/MDBuilder.h" 46249423Sdim#include "llvm/IR/Module.h" 47327952Sdim#include "llvm/IR/Type.h" 48327952Sdim#include "llvm/IR/User.h" 49327952Sdim#include "llvm/IR/Value.h" 50276479Sdim#include "llvm/IR/Verifier.h" 51249423Sdim#include "llvm/Pass.h" 52314564Sdim#include "llvm/Support/BlockFrequency.h" 53327952Sdim#include "llvm/Support/BranchProbability.h" 54327952Sdim#include "llvm/Support/Casting.h" 55193323Sed#include "llvm/Support/CommandLine.h" 56193323Sed#include "llvm/Support/Debug.h" 57198090Srdivacky#include "llvm/Support/ErrorHandling.h" 58198090Srdivacky#include "llvm/Support/raw_ostream.h" 59249423Sdim#include "llvm/Transforms/Utils/BasicBlockUtils.h" 60327952Sdim#include <cassert> 61327952Sdim#include <cstdint> 62327952Sdim#include <iterator> 63327952Sdim#include <map> 64193323Sed#include <set> 65327952Sdim#include <utility> 66327952Sdim#include <vector> 67327952Sdim 68193323Sedusing namespace llvm; 69341825Sdimusing ProfileCount = Function::ProfileCount; 70193323Sed 71276479Sdim#define DEBUG_TYPE "code-extractor" 72276479Sdim 73193323Sed// Provide a command-line option to aggregate function arguments into a struct 74193323Sed// for functions produced by the code extractor. This is useful when converting 75193323Sed// extracted functions to pthread-based code, as only one argument (void*) can 76193323Sed// be passed in to pthread_create(). 77193323Sedstatic cl::opt<bool> 78193323SedAggregateArgsOpt("aggregate-extracted-args", cl::Hidden, 79193323Sed cl::desc("Aggregate arguments to code-extracted functions")); 80193323Sed 81341825Sdim/// Test whether a block is valid for extraction. 82341825Sdimstatic bool isBlockValidForExtraction(const BasicBlock &BB, 83341825Sdim const SetVector<BasicBlock *> &Result, 84341825Sdim bool AllowVarArgs, bool AllowAlloca) { 85321369Sdim // taking the address of a basic block moved to another function is illegal 86321369Sdim if (BB.hasAddressTaken()) 87321369Sdim return false; 88193323Sed 89321369Sdim // don't hoist code that uses another basicblock address, as it's likely to 90321369Sdim // lead to unexpected behavior, like cross-function jumps 91321369Sdim SmallPtrSet<User const *, 16> Visited; 92321369Sdim SmallVector<User const *, 16> ToVisit; 93321369Sdim 94321369Sdim for (Instruction const &Inst : BB) 95321369Sdim ToVisit.push_back(&Inst); 96321369Sdim 97321369Sdim while (!ToVisit.empty()) { 98321369Sdim User const *Curr = ToVisit.pop_back_val(); 99321369Sdim if (!Visited.insert(Curr).second) 100321369Sdim continue; 101321369Sdim if (isa<BlockAddress const>(Curr)) 102321369Sdim return false; // even a reference to self is likely to be not compatible 103321369Sdim 104321369Sdim if (isa<Instruction>(Curr) && cast<Instruction>(Curr)->getParent() != &BB) 105321369Sdim continue; 106321369Sdim 107321369Sdim for (auto const &U : Curr->operands()) { 108321369Sdim if (auto *UU = dyn_cast<User>(U)) 109321369Sdim ToVisit.push_back(UU); 110321369Sdim } 111321369Sdim } 112321369Sdim 113341825Sdim // If explicitly requested, allow vastart and alloca. For invoke instructions 114341825Sdim // verify that extraction is valid. 115239462Sdim for (BasicBlock::const_iterator I = BB.begin(), E = BB.end(); I != E; ++I) { 116341825Sdim if (isa<AllocaInst>(I)) { 117341825Sdim if (!AllowAlloca) 118341825Sdim return false; 119341825Sdim continue; 120341825Sdim } 121341825Sdim 122341825Sdim if (const auto *II = dyn_cast<InvokeInst>(I)) { 123341825Sdim // Unwind destination (either a landingpad, catchswitch, or cleanuppad) 124341825Sdim // must be a part of the subgraph which is being extracted. 125341825Sdim if (auto *UBB = II->getUnwindDest()) 126341825Sdim if (!Result.count(UBB)) 127341825Sdim return false; 128341825Sdim continue; 129341825Sdim } 130341825Sdim 131341825Sdim // All catch handlers of a catchswitch instruction as well as the unwind 132341825Sdim // destination must be in the subgraph. 133341825Sdim if (const auto *CSI = dyn_cast<CatchSwitchInst>(I)) { 134341825Sdim if (auto *UBB = CSI->getUnwindDest()) 135341825Sdim if (!Result.count(UBB)) 136341825Sdim return false; 137341825Sdim for (auto *HBB : CSI->handlers()) 138341825Sdim if (!Result.count(const_cast<BasicBlock*>(HBB))) 139341825Sdim return false; 140341825Sdim continue; 141341825Sdim } 142341825Sdim 143341825Sdim // Make sure that entire catch handler is within subgraph. It is sufficient 144341825Sdim // to check that catch return's block is in the list. 145341825Sdim if (const auto *CPI = dyn_cast<CatchPadInst>(I)) { 146341825Sdim for (const auto *U : CPI->users()) 147341825Sdim if (const auto *CRI = dyn_cast<CatchReturnInst>(U)) 148341825Sdim if (!Result.count(const_cast<BasicBlock*>(CRI->getParent()))) 149341825Sdim return false; 150341825Sdim continue; 151341825Sdim } 152341825Sdim 153341825Sdim // And do similar checks for cleanup handler - the entire handler must be 154341825Sdim // in subgraph which is going to be extracted. For cleanup return should 155341825Sdim // additionally check that the unwind destination is also in the subgraph. 156341825Sdim if (const auto *CPI = dyn_cast<CleanupPadInst>(I)) { 157341825Sdim for (const auto *U : CPI->users()) 158341825Sdim if (const auto *CRI = dyn_cast<CleanupReturnInst>(U)) 159341825Sdim if (!Result.count(const_cast<BasicBlock*>(CRI->getParent()))) 160341825Sdim return false; 161341825Sdim continue; 162341825Sdim } 163341825Sdim if (const auto *CRI = dyn_cast<CleanupReturnInst>(I)) { 164341825Sdim if (auto *UBB = CRI->getUnwindDest()) 165341825Sdim if (!Result.count(UBB)) 166341825Sdim return false; 167341825Sdim continue; 168341825Sdim } 169341825Sdim 170239462Sdim if (const CallInst *CI = dyn_cast<CallInst>(I)) 171239462Sdim if (const Function *F = CI->getCalledFunction()) 172327952Sdim if (F->getIntrinsicID() == Intrinsic::vastart) { 173327952Sdim if (AllowVarArgs) 174327952Sdim continue; 175327952Sdim else 176327952Sdim return false; 177327952Sdim } 178239462Sdim } 179193323Sed 180239462Sdim return true; 181239462Sdim} 182193323Sed 183341825Sdim/// Build a set of blocks to extract if the input blocks are viable. 184321369Sdimstatic SetVector<BasicBlock *> 185327952SdimbuildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT, 186341825Sdim bool AllowVarArgs, bool AllowAlloca) { 187321369Sdim assert(!BBs.empty() && "The set of blocks to extract must be non-empty"); 188239462Sdim SetVector<BasicBlock *> Result; 189193323Sed 190239462Sdim // Loop over the blocks, adding them to our set-vector, and aborting with an 191239462Sdim // empty set if we encounter invalid blocks. 192321369Sdim for (BasicBlock *BB : BBs) { 193321369Sdim // If this block is dead, don't process it. 194321369Sdim if (DT && !DT->isReachableFromEntry(BB)) 195321369Sdim continue; 196321369Sdim 197321369Sdim if (!Result.insert(BB)) 198239462Sdim llvm_unreachable("Repeated basic blocks in extraction input"); 199341825Sdim } 200341825Sdim 201341825Sdim for (auto *BB : Result) { 202341825Sdim if (!isBlockValidForExtraction(*BB, Result, AllowVarArgs, AllowAlloca)) 203341825Sdim return {}; 204341825Sdim 205341825Sdim // Make sure that the first block is not a landing pad. 206341825Sdim if (BB == Result.front()) { 207341825Sdim if (BB->isEHPad()) { 208341825Sdim LLVM_DEBUG(dbgs() << "The first block cannot be an unwind block\n"); 209341825Sdim return {}; 210341825Sdim } 211341825Sdim continue; 212193323Sed } 213341825Sdim 214341825Sdim // All blocks other than the first must not have predecessors outside of 215341825Sdim // the subgraph which is being extracted. 216341825Sdim for (auto *PBB : predecessors(BB)) 217341825Sdim if (!Result.count(PBB)) { 218341825Sdim LLVM_DEBUG( 219341825Sdim dbgs() << "No blocks in this region may have entries from " 220341825Sdim "outside the region except for the first block!\n"); 221341825Sdim return {}; 222341825Sdim } 223321369Sdim } 224193323Sed 225239462Sdim return Result; 226239462Sdim} 227193323Sed 228239462SdimCodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT, 229314564Sdim bool AggregateArgs, BlockFrequencyInfo *BFI, 230341825Sdim BranchProbabilityInfo *BPI, bool AllowVarArgs, 231341825Sdim bool AllowAlloca) 232314564Sdim : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), 233327952Sdim BPI(BPI), AllowVarArgs(AllowVarArgs), 234341825Sdim Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca)) {} 235239462Sdim 236314564SdimCodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs, 237314564Sdim BlockFrequencyInfo *BFI, 238314564Sdim BranchProbabilityInfo *BPI) 239314564Sdim : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), 240327952Sdim BPI(BPI), AllowVarArgs(false), 241327952Sdim Blocks(buildExtractionBlockSet(L.getBlocks(), &DT, 242341825Sdim /* AllowVarArgs */ false, 243341825Sdim /* AllowAlloca */ false)) {} 244239462Sdim 245239462Sdim/// definedInRegion - Return true if the specified value is defined in the 246239462Sdim/// extracted region. 247239462Sdimstatic bool definedInRegion(const SetVector<BasicBlock *> &Blocks, Value *V) { 248239462Sdim if (Instruction *I = dyn_cast<Instruction>(V)) 249239462Sdim if (Blocks.count(I->getParent())) 250239462Sdim return true; 251239462Sdim return false; 252239462Sdim} 253239462Sdim 254239462Sdim/// definedInCaller - Return true if the specified value is defined in the 255239462Sdim/// function being code extracted, but not in the region being extracted. 256239462Sdim/// These values must be passed in as live-ins to the function. 257239462Sdimstatic bool definedInCaller(const SetVector<BasicBlock *> &Blocks, Value *V) { 258239462Sdim if (isa<Argument>(V)) return true; 259239462Sdim if (Instruction *I = dyn_cast<Instruction>(V)) 260239462Sdim if (!Blocks.count(I->getParent())) 261239462Sdim return true; 262239462Sdim return false; 263239462Sdim} 264239462Sdim 265321369Sdimstatic BasicBlock *getCommonExitBlock(const SetVector<BasicBlock *> &Blocks) { 266321369Sdim BasicBlock *CommonExitBlock = nullptr; 267321369Sdim auto hasNonCommonExitSucc = [&](BasicBlock *Block) { 268321369Sdim for (auto *Succ : successors(Block)) { 269321369Sdim // Internal edges, ok. 270321369Sdim if (Blocks.count(Succ)) 271321369Sdim continue; 272321369Sdim if (!CommonExitBlock) { 273321369Sdim CommonExitBlock = Succ; 274321369Sdim continue; 275321369Sdim } 276321369Sdim if (CommonExitBlock == Succ) 277321369Sdim continue; 278321369Sdim 279321369Sdim return true; 280321369Sdim } 281321369Sdim return false; 282321369Sdim }; 283321369Sdim 284321369Sdim if (any_of(Blocks, hasNonCommonExitSucc)) 285321369Sdim return nullptr; 286321369Sdim 287321369Sdim return CommonExitBlock; 288321369Sdim} 289321369Sdim 290321369Sdimbool CodeExtractor::isLegalToShrinkwrapLifetimeMarkers( 291321369Sdim Instruction *Addr) const { 292321369Sdim AllocaInst *AI = cast<AllocaInst>(Addr->stripInBoundsConstantOffsets()); 293321369Sdim Function *Func = (*Blocks.begin())->getParent(); 294321369Sdim for (BasicBlock &BB : *Func) { 295321369Sdim if (Blocks.count(&BB)) 296321369Sdim continue; 297321369Sdim for (Instruction &II : BB) { 298321369Sdim if (isa<DbgInfoIntrinsic>(II)) 299321369Sdim continue; 300321369Sdim 301321369Sdim unsigned Opcode = II.getOpcode(); 302321369Sdim Value *MemAddr = nullptr; 303321369Sdim switch (Opcode) { 304321369Sdim case Instruction::Store: 305321369Sdim case Instruction::Load: { 306321369Sdim if (Opcode == Instruction::Store) { 307321369Sdim StoreInst *SI = cast<StoreInst>(&II); 308321369Sdim MemAddr = SI->getPointerOperand(); 309321369Sdim } else { 310321369Sdim LoadInst *LI = cast<LoadInst>(&II); 311321369Sdim MemAddr = LI->getPointerOperand(); 312321369Sdim } 313321369Sdim // Global variable can not be aliased with locals. 314321369Sdim if (dyn_cast<Constant>(MemAddr)) 315321369Sdim break; 316321369Sdim Value *Base = MemAddr->stripInBoundsConstantOffsets(); 317321369Sdim if (!dyn_cast<AllocaInst>(Base) || Base == AI) 318321369Sdim return false; 319321369Sdim break; 320321369Sdim } 321321369Sdim default: { 322321369Sdim IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(&II); 323321369Sdim if (IntrInst) { 324321369Sdim if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start || 325321369Sdim IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) 326321369Sdim break; 327321369Sdim return false; 328321369Sdim } 329321369Sdim // Treat all the other cases conservatively if it has side effects. 330321369Sdim if (II.mayHaveSideEffects()) 331321369Sdim return false; 332321369Sdim } 333321369Sdim } 334321369Sdim } 335321369Sdim } 336321369Sdim 337321369Sdim return true; 338321369Sdim} 339321369Sdim 340321369SdimBasicBlock * 341321369SdimCodeExtractor::findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock) { 342321369Sdim BasicBlock *SinglePredFromOutlineRegion = nullptr; 343321369Sdim assert(!Blocks.count(CommonExitBlock) && 344321369Sdim "Expect a block outside the region!"); 345321369Sdim for (auto *Pred : predecessors(CommonExitBlock)) { 346321369Sdim if (!Blocks.count(Pred)) 347321369Sdim continue; 348321369Sdim if (!SinglePredFromOutlineRegion) { 349321369Sdim SinglePredFromOutlineRegion = Pred; 350321369Sdim } else if (SinglePredFromOutlineRegion != Pred) { 351321369Sdim SinglePredFromOutlineRegion = nullptr; 352321369Sdim break; 353321369Sdim } 354321369Sdim } 355321369Sdim 356321369Sdim if (SinglePredFromOutlineRegion) 357321369Sdim return SinglePredFromOutlineRegion; 358321369Sdim 359321369Sdim#ifndef NDEBUG 360321369Sdim auto getFirstPHI = [](BasicBlock *BB) { 361321369Sdim BasicBlock::iterator I = BB->begin(); 362321369Sdim PHINode *FirstPhi = nullptr; 363321369Sdim while (I != BB->end()) { 364321369Sdim PHINode *Phi = dyn_cast<PHINode>(I); 365321369Sdim if (!Phi) 366321369Sdim break; 367321369Sdim if (!FirstPhi) { 368321369Sdim FirstPhi = Phi; 369321369Sdim break; 370321369Sdim } 371321369Sdim } 372321369Sdim return FirstPhi; 373321369Sdim }; 374321369Sdim // If there are any phi nodes, the single pred either exists or has already 375321369Sdim // be created before code extraction. 376321369Sdim assert(!getFirstPHI(CommonExitBlock) && "Phi not expected"); 377321369Sdim#endif 378321369Sdim 379321369Sdim BasicBlock *NewExitBlock = CommonExitBlock->splitBasicBlock( 380321369Sdim CommonExitBlock->getFirstNonPHI()->getIterator()); 381321369Sdim 382327952Sdim for (auto PI = pred_begin(CommonExitBlock), PE = pred_end(CommonExitBlock); 383327952Sdim PI != PE;) { 384327952Sdim BasicBlock *Pred = *PI++; 385321369Sdim if (Blocks.count(Pred)) 386321369Sdim continue; 387321369Sdim Pred->getTerminator()->replaceUsesOfWith(CommonExitBlock, NewExitBlock); 388321369Sdim } 389321369Sdim // Now add the old exit block to the outline region. 390321369Sdim Blocks.insert(CommonExitBlock); 391321369Sdim return CommonExitBlock; 392321369Sdim} 393321369Sdim 394321369Sdimvoid CodeExtractor::findAllocas(ValueSet &SinkCands, ValueSet &HoistCands, 395321369Sdim BasicBlock *&ExitBlock) const { 396321369Sdim Function *Func = (*Blocks.begin())->getParent(); 397321369Sdim ExitBlock = getCommonExitBlock(Blocks); 398321369Sdim 399321369Sdim for (BasicBlock &BB : *Func) { 400321369Sdim if (Blocks.count(&BB)) 401321369Sdim continue; 402321369Sdim for (Instruction &II : BB) { 403321369Sdim auto *AI = dyn_cast<AllocaInst>(&II); 404321369Sdim if (!AI) 405321369Sdim continue; 406321369Sdim 407321369Sdim // Find the pair of life time markers for address 'Addr' that are either 408321369Sdim // defined inside the outline region or can legally be shrinkwrapped into 409321369Sdim // the outline region. If there are not other untracked uses of the 410321369Sdim // address, return the pair of markers if found; otherwise return a pair 411321369Sdim // of nullptr. 412321369Sdim auto GetLifeTimeMarkers = 413321369Sdim [&](Instruction *Addr, bool &SinkLifeStart, 414321369Sdim bool &HoistLifeEnd) -> std::pair<Instruction *, Instruction *> { 415321369Sdim Instruction *LifeStart = nullptr, *LifeEnd = nullptr; 416321369Sdim 417321369Sdim for (User *U : Addr->users()) { 418321369Sdim IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(U); 419321369Sdim if (IntrInst) { 420321369Sdim if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start) { 421321369Sdim // Do not handle the case where AI has multiple start markers. 422321369Sdim if (LifeStart) 423321369Sdim return std::make_pair<Instruction *>(nullptr, nullptr); 424321369Sdim LifeStart = IntrInst; 425321369Sdim } 426321369Sdim if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_end) { 427321369Sdim if (LifeEnd) 428321369Sdim return std::make_pair<Instruction *>(nullptr, nullptr); 429321369Sdim LifeEnd = IntrInst; 430321369Sdim } 431321369Sdim continue; 432321369Sdim } 433321369Sdim // Find untracked uses of the address, bail. 434321369Sdim if (!definedInRegion(Blocks, U)) 435321369Sdim return std::make_pair<Instruction *>(nullptr, nullptr); 436321369Sdim } 437321369Sdim 438321369Sdim if (!LifeStart || !LifeEnd) 439321369Sdim return std::make_pair<Instruction *>(nullptr, nullptr); 440321369Sdim 441321369Sdim SinkLifeStart = !definedInRegion(Blocks, LifeStart); 442321369Sdim HoistLifeEnd = !definedInRegion(Blocks, LifeEnd); 443321369Sdim // Do legality Check. 444321369Sdim if ((SinkLifeStart || HoistLifeEnd) && 445321369Sdim !isLegalToShrinkwrapLifetimeMarkers(Addr)) 446321369Sdim return std::make_pair<Instruction *>(nullptr, nullptr); 447321369Sdim 448321369Sdim // Check to see if we have a place to do hoisting, if not, bail. 449321369Sdim if (HoistLifeEnd && !ExitBlock) 450321369Sdim return std::make_pair<Instruction *>(nullptr, nullptr); 451321369Sdim 452321369Sdim return std::make_pair(LifeStart, LifeEnd); 453321369Sdim }; 454321369Sdim 455321369Sdim bool SinkLifeStart = false, HoistLifeEnd = false; 456321369Sdim auto Markers = GetLifeTimeMarkers(AI, SinkLifeStart, HoistLifeEnd); 457321369Sdim 458321369Sdim if (Markers.first) { 459321369Sdim if (SinkLifeStart) 460321369Sdim SinkCands.insert(Markers.first); 461321369Sdim SinkCands.insert(AI); 462321369Sdim if (HoistLifeEnd) 463321369Sdim HoistCands.insert(Markers.second); 464321369Sdim continue; 465321369Sdim } 466321369Sdim 467321369Sdim // Follow the bitcast. 468321369Sdim Instruction *MarkerAddr = nullptr; 469321369Sdim for (User *U : AI->users()) { 470321369Sdim if (U->stripInBoundsConstantOffsets() == AI) { 471321369Sdim SinkLifeStart = false; 472321369Sdim HoistLifeEnd = false; 473321369Sdim Instruction *Bitcast = cast<Instruction>(U); 474321369Sdim Markers = GetLifeTimeMarkers(Bitcast, SinkLifeStart, HoistLifeEnd); 475321369Sdim if (Markers.first) { 476321369Sdim MarkerAddr = Bitcast; 477321369Sdim continue; 478321369Sdim } 479321369Sdim } 480321369Sdim 481321369Sdim // Found unknown use of AI. 482321369Sdim if (!definedInRegion(Blocks, U)) { 483321369Sdim MarkerAddr = nullptr; 484321369Sdim break; 485321369Sdim } 486321369Sdim } 487321369Sdim 488321369Sdim if (MarkerAddr) { 489321369Sdim if (SinkLifeStart) 490321369Sdim SinkCands.insert(Markers.first); 491321369Sdim if (!definedInRegion(Blocks, MarkerAddr)) 492321369Sdim SinkCands.insert(MarkerAddr); 493321369Sdim SinkCands.insert(AI); 494321369Sdim if (HoistLifeEnd) 495321369Sdim HoistCands.insert(Markers.second); 496321369Sdim } 497321369Sdim } 498321369Sdim } 499321369Sdim} 500321369Sdim 501321369Sdimvoid CodeExtractor::findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs, 502321369Sdim const ValueSet &SinkCands) const { 503309124Sdim for (BasicBlock *BB : Blocks) { 504239462Sdim // If a used value is defined outside the region, it's an input. If an 505239462Sdim // instruction is used outside the region, it's an output. 506309124Sdim for (Instruction &II : *BB) { 507309124Sdim for (User::op_iterator OI = II.op_begin(), OE = II.op_end(); OI != OE; 508321369Sdim ++OI) { 509321369Sdim Value *V = *OI; 510321369Sdim if (!SinkCands.count(V) && definedInCaller(Blocks, V)) 511321369Sdim Inputs.insert(V); 512321369Sdim } 513239462Sdim 514309124Sdim for (User *U : II.users()) 515276479Sdim if (!definedInRegion(Blocks, U)) { 516309124Sdim Outputs.insert(&II); 517239462Sdim break; 518239462Sdim } 519239462Sdim } 520239462Sdim } 521239462Sdim} 522239462Sdim 523193323Sed/// severSplitPHINodes - If a PHI node has multiple inputs from outside of the 524193323Sed/// region, we need to split the entry block of the region so that the PHI node 525193323Sed/// is easier to deal with. 526193323Sedvoid CodeExtractor::severSplitPHINodes(BasicBlock *&Header) { 527221345Sdim unsigned NumPredsFromRegion = 0; 528193323Sed unsigned NumPredsOutsideRegion = 0; 529193323Sed 530193323Sed if (Header != &Header->getParent()->getEntryBlock()) { 531193323Sed PHINode *PN = dyn_cast<PHINode>(Header->begin()); 532193323Sed if (!PN) return; // No PHI nodes. 533193323Sed 534193323Sed // If the header node contains any PHI nodes, check to see if there is more 535193323Sed // than one entry from outside the region. If so, we need to sever the 536193323Sed // header block into two. 537193323Sed for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 538239462Sdim if (Blocks.count(PN->getIncomingBlock(i))) 539221345Sdim ++NumPredsFromRegion; 540193323Sed else 541193323Sed ++NumPredsOutsideRegion; 542193323Sed 543193323Sed // If there is one (or fewer) predecessor from outside the region, we don't 544193323Sed // need to do anything special. 545193323Sed if (NumPredsOutsideRegion <= 1) return; 546193323Sed } 547193323Sed 548193323Sed // Otherwise, we need to split the header block into two pieces: one 549193323Sed // containing PHI nodes merging values from outside of the region, and a 550193323Sed // second that contains all of the code for the block and merges back any 551193323Sed // incoming values from inside of the region. 552327952Sdim BasicBlock *NewBB = SplitBlock(Header, Header->getFirstNonPHI(), DT); 553193323Sed 554193323Sed // We only want to code extract the second block now, and it becomes the new 555193323Sed // header of the region. 556193323Sed BasicBlock *OldPred = Header; 557239462Sdim Blocks.remove(OldPred); 558239462Sdim Blocks.insert(NewBB); 559193323Sed Header = NewBB; 560193323Sed 561193323Sed // Okay, now we need to adjust the PHI nodes and any branches from within the 562193323Sed // region to go to the new header block instead of the old header block. 563221345Sdim if (NumPredsFromRegion) { 564193323Sed PHINode *PN = cast<PHINode>(OldPred->begin()); 565193323Sed // Loop over all of the predecessors of OldPred that are in the region, 566193323Sed // changing them to branch to NewBB instead. 567193323Sed for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 568239462Sdim if (Blocks.count(PN->getIncomingBlock(i))) { 569193323Sed TerminatorInst *TI = PN->getIncomingBlock(i)->getTerminator(); 570193323Sed TI->replaceUsesOfWith(OldPred, NewBB); 571193323Sed } 572193323Sed 573221345Sdim // Okay, everything within the region is now branching to the right block, we 574193323Sed // just have to update the PHI nodes now, inserting PHI nodes into NewBB. 575321369Sdim BasicBlock::iterator AfterPHIs; 576193323Sed for (AfterPHIs = OldPred->begin(); isa<PHINode>(AfterPHIs); ++AfterPHIs) { 577193323Sed PHINode *PN = cast<PHINode>(AfterPHIs); 578193323Sed // Create a new PHI node in the new region, which has an incoming value 579193323Sed // from OldPred of PN. 580221345Sdim PHINode *NewPN = PHINode::Create(PN->getType(), 1 + NumPredsFromRegion, 581296417Sdim PN->getName() + ".ce", &NewBB->front()); 582321369Sdim PN->replaceAllUsesWith(NewPN); 583193323Sed NewPN->addIncoming(PN, OldPred); 584193323Sed 585193323Sed // Loop over all of the incoming value in PN, moving them to NewPN if they 586193323Sed // are from the extracted region. 587193323Sed for (unsigned i = 0; i != PN->getNumIncomingValues(); ++i) { 588239462Sdim if (Blocks.count(PN->getIncomingBlock(i))) { 589193323Sed NewPN->addIncoming(PN->getIncomingValue(i), PN->getIncomingBlock(i)); 590193323Sed PN->removeIncomingValue(i); 591193323Sed --i; 592193323Sed } 593193323Sed } 594193323Sed } 595193323Sed } 596193323Sed} 597193323Sed 598193323Sedvoid CodeExtractor::splitReturnBlocks() { 599309124Sdim for (BasicBlock *Block : Blocks) 600309124Sdim if (ReturnInst *RI = dyn_cast<ReturnInst>(Block->getTerminator())) { 601296417Sdim BasicBlock *New = 602309124Sdim Block->splitBasicBlock(RI->getIterator(), Block->getName() + ".ret"); 603198090Srdivacky if (DT) { 604218893Sdim // Old dominates New. New node dominates all other nodes dominated 605218893Sdim // by Old. 606309124Sdim DomTreeNode *OldNode = DT->getNode(Block); 607309124Sdim SmallVector<DomTreeNode *, 8> Children(OldNode->begin(), 608309124Sdim OldNode->end()); 609198090Srdivacky 610309124Sdim DomTreeNode *NewNode = DT->addNewBlock(New, Block); 611198090Srdivacky 612309124Sdim for (DomTreeNode *I : Children) 613309124Sdim DT->changeImmediateDominator(I, NewNode); 614198090Srdivacky } 615198090Srdivacky } 616193323Sed} 617193323Sed 618193323Sed/// constructFunction - make a function based on inputs and outputs, as follows: 619193323Sed/// f(in0, ..., inN, out0, ..., outN) 620239462SdimFunction *CodeExtractor::constructFunction(const ValueSet &inputs, 621239462Sdim const ValueSet &outputs, 622193323Sed BasicBlock *header, 623193323Sed BasicBlock *newRootNode, 624193323Sed BasicBlock *newHeader, 625193323Sed Function *oldFunction, 626193323Sed Module *M) { 627341825Sdim LLVM_DEBUG(dbgs() << "inputs: " << inputs.size() << "\n"); 628341825Sdim LLVM_DEBUG(dbgs() << "outputs: " << outputs.size() << "\n"); 629193323Sed 630193323Sed // This function returns unsigned, outputs will go back by reference. 631193323Sed switch (NumExitBlocks) { 632193323Sed case 0: 633198090Srdivacky case 1: RetTy = Type::getVoidTy(header->getContext()); break; 634198090Srdivacky case 2: RetTy = Type::getInt1Ty(header->getContext()); break; 635198090Srdivacky default: RetTy = Type::getInt16Ty(header->getContext()); break; 636193323Sed } 637193323Sed 638327952Sdim std::vector<Type *> paramTy; 639193323Sed 640193323Sed // Add the types of the input values to the function's argument list 641309124Sdim for (Value *value : inputs) { 642341825Sdim LLVM_DEBUG(dbgs() << "value used in func: " << *value << "\n"); 643193323Sed paramTy.push_back(value->getType()); 644193323Sed } 645193323Sed 646193323Sed // Add the types of the output values to the function's argument list. 647309124Sdim for (Value *output : outputs) { 648341825Sdim LLVM_DEBUG(dbgs() << "instr used in func: " << *output << "\n"); 649193323Sed if (AggregateArgs) 650309124Sdim paramTy.push_back(output->getType()); 651193323Sed else 652309124Sdim paramTy.push_back(PointerType::getUnqual(output->getType())); 653193323Sed } 654193323Sed 655341825Sdim LLVM_DEBUG({ 656309124Sdim dbgs() << "Function type: " << *RetTy << " f("; 657309124Sdim for (Type *i : paramTy) 658309124Sdim dbgs() << *i << ", "; 659309124Sdim dbgs() << ")\n"; 660309124Sdim }); 661193323Sed 662288943Sdim StructType *StructTy; 663193323Sed if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { 664288943Sdim StructTy = StructType::get(M->getContext(), paramTy); 665193323Sed paramTy.clear(); 666288943Sdim paramTy.push_back(PointerType::getUnqual(StructTy)); 667193323Sed } 668226633Sdim FunctionType *funcType = 669327952Sdim FunctionType::get(RetTy, paramTy, 670327952Sdim AllowVarArgs && oldFunction->isVarArg()); 671193323Sed 672193323Sed // Create the new function 673193323Sed Function *newFunction = Function::Create(funcType, 674193323Sed GlobalValue::InternalLinkage, 675193323Sed oldFunction->getName() + "_" + 676193323Sed header->getName(), M); 677193323Sed // If the old function is no-throw, so is the new one. 678193323Sed if (oldFunction->doesNotThrow()) 679243830Sdim newFunction->setDoesNotThrow(); 680314564Sdim 681314564Sdim // Inherit the uwtable attribute if we need to. 682314564Sdim if (oldFunction->hasUWTable()) 683314564Sdim newFunction->setHasUWTable(); 684314564Sdim 685341825Sdim // Inherit all of the target dependent attributes and white-listed 686341825Sdim // target independent attributes. 687314564Sdim // (e.g. If the extracted region contains a call to an x86.sse 688314564Sdim // instruction we need to make sure that the extracted region has the 689314564Sdim // "target-features" attribute allowing it to be lowered. 690314564Sdim // FIXME: This should be changed to check to see if a specific 691314564Sdim // attribute can not be inherited. 692341825Sdim for (const auto &Attr : oldFunction->getAttributes().getFnAttributes()) { 693341825Sdim if (Attr.isStringAttribute()) { 694341825Sdim if (Attr.getKindAsString() == "thunk") 695341825Sdim continue; 696341825Sdim } else 697341825Sdim switch (Attr.getKindAsEnum()) { 698341825Sdim // Those attributes cannot be propagated safely. Explicitly list them 699341825Sdim // here so we get a warning if new attributes are added. This list also 700341825Sdim // includes non-function attributes. 701341825Sdim case Attribute::Alignment: 702341825Sdim case Attribute::AllocSize: 703341825Sdim case Attribute::ArgMemOnly: 704341825Sdim case Attribute::Builtin: 705341825Sdim case Attribute::ByVal: 706341825Sdim case Attribute::Convergent: 707341825Sdim case Attribute::Dereferenceable: 708341825Sdim case Attribute::DereferenceableOrNull: 709341825Sdim case Attribute::InAlloca: 710341825Sdim case Attribute::InReg: 711341825Sdim case Attribute::InaccessibleMemOnly: 712341825Sdim case Attribute::InaccessibleMemOrArgMemOnly: 713341825Sdim case Attribute::JumpTable: 714341825Sdim case Attribute::Naked: 715341825Sdim case Attribute::Nest: 716341825Sdim case Attribute::NoAlias: 717341825Sdim case Attribute::NoBuiltin: 718341825Sdim case Attribute::NoCapture: 719341825Sdim case Attribute::NoReturn: 720341825Sdim case Attribute::None: 721341825Sdim case Attribute::NonNull: 722341825Sdim case Attribute::ReadNone: 723341825Sdim case Attribute::ReadOnly: 724341825Sdim case Attribute::Returned: 725341825Sdim case Attribute::ReturnsTwice: 726341825Sdim case Attribute::SExt: 727341825Sdim case Attribute::Speculatable: 728341825Sdim case Attribute::StackAlignment: 729341825Sdim case Attribute::StructRet: 730341825Sdim case Attribute::SwiftError: 731341825Sdim case Attribute::SwiftSelf: 732341825Sdim case Attribute::WriteOnly: 733341825Sdim case Attribute::ZExt: 734341825Sdim case Attribute::EndAttrKinds: 735341825Sdim continue; 736341825Sdim // Those attributes should be safe to propagate to the extracted function. 737341825Sdim case Attribute::AlwaysInline: 738341825Sdim case Attribute::Cold: 739341825Sdim case Attribute::NoRecurse: 740341825Sdim case Attribute::InlineHint: 741341825Sdim case Attribute::MinSize: 742341825Sdim case Attribute::NoDuplicate: 743341825Sdim case Attribute::NoImplicitFloat: 744341825Sdim case Attribute::NoInline: 745341825Sdim case Attribute::NonLazyBind: 746341825Sdim case Attribute::NoRedZone: 747341825Sdim case Attribute::NoUnwind: 748341825Sdim case Attribute::OptForFuzzing: 749341825Sdim case Attribute::OptimizeNone: 750341825Sdim case Attribute::OptimizeForSize: 751341825Sdim case Attribute::SafeStack: 752341825Sdim case Attribute::ShadowCallStack: 753341825Sdim case Attribute::SanitizeAddress: 754341825Sdim case Attribute::SanitizeMemory: 755341825Sdim case Attribute::SanitizeThread: 756341825Sdim case Attribute::SanitizeHWAddress: 757341825Sdim case Attribute::StackProtect: 758341825Sdim case Attribute::StackProtectReq: 759341825Sdim case Attribute::StackProtectStrong: 760341825Sdim case Attribute::StrictFP: 761341825Sdim case Attribute::UWTable: 762341825Sdim case Attribute::NoCfCheck: 763341825Sdim break; 764341825Sdim } 765314564Sdim 766341825Sdim newFunction->addFnAttr(Attr); 767341825Sdim } 768193323Sed newFunction->getBasicBlockList().push_back(newRootNode); 769193323Sed 770193323Sed // Create an iterator to name all of the arguments we inserted. 771193323Sed Function::arg_iterator AI = newFunction->arg_begin(); 772193323Sed 773193323Sed // Rewrite all users of the inputs in the extracted region to use the 774193323Sed // arguments (or appropriate addressing into struct) instead. 775193323Sed for (unsigned i = 0, e = inputs.size(); i != e; ++i) { 776193323Sed Value *RewriteVal; 777193323Sed if (AggregateArgs) { 778193323Sed Value *Idx[2]; 779198090Srdivacky Idx[0] = Constant::getNullValue(Type::getInt32Ty(header->getContext())); 780198090Srdivacky Idx[1] = ConstantInt::get(Type::getInt32Ty(header->getContext()), i); 781193323Sed TerminatorInst *TI = newFunction->begin()->getTerminator(); 782288943Sdim GetElementPtrInst *GEP = GetElementPtrInst::Create( 783296417Sdim StructTy, &*AI, Idx, "gep_" + inputs[i]->getName(), TI); 784198090Srdivacky RewriteVal = new LoadInst(GEP, "loadgep_" + inputs[i]->getName(), TI); 785193323Sed } else 786296417Sdim RewriteVal = &*AI++; 787193323Sed 788327952Sdim std::vector<User *> Users(inputs[i]->user_begin(), inputs[i]->user_end()); 789309124Sdim for (User *use : Users) 790309124Sdim if (Instruction *inst = dyn_cast<Instruction>(use)) 791239462Sdim if (Blocks.count(inst->getParent())) 792193323Sed inst->replaceUsesOfWith(inputs[i], RewriteVal); 793193323Sed } 794193323Sed 795193323Sed // Set names for input and output arguments. 796193323Sed if (!AggregateArgs) { 797193323Sed AI = newFunction->arg_begin(); 798193323Sed for (unsigned i = 0, e = inputs.size(); i != e; ++i, ++AI) 799193323Sed AI->setName(inputs[i]->getName()); 800193323Sed for (unsigned i = 0, e = outputs.size(); i != e; ++i, ++AI) 801193323Sed AI->setName(outputs[i]->getName()+".out"); 802193323Sed } 803193323Sed 804193323Sed // Rewrite branches to basic blocks outside of the loop to new dummy blocks 805193323Sed // within the new function. This must be done before we lose track of which 806193323Sed // blocks were originally in the code region. 807327952Sdim std::vector<User *> Users(header->user_begin(), header->user_end()); 808193323Sed for (unsigned i = 0, e = Users.size(); i != e; ++i) 809193323Sed // The BasicBlock which contains the branch is not in the region 810193323Sed // modify the branch target to a new block 811193323Sed if (TerminatorInst *TI = dyn_cast<TerminatorInst>(Users[i])) 812239462Sdim if (!Blocks.count(TI->getParent()) && 813193323Sed TI->getParent()->getParent() == oldFunction) 814193323Sed TI->replaceUsesOfWith(header, newHeader); 815193323Sed 816193323Sed return newFunction; 817193323Sed} 818193323Sed 819193323Sed/// emitCallAndSwitchStatement - This method sets up the caller side by adding 820193323Sed/// the call instruction, splitting any PHI nodes in the header block as 821193323Sed/// necessary. 822193323Sedvoid CodeExtractor:: 823193323SedemitCallAndSwitchStatement(Function *newFunction, BasicBlock *codeReplacer, 824239462Sdim ValueSet &inputs, ValueSet &outputs) { 825193323Sed // Emit a call to the new function, passing in: *pointer to struct (if 826193323Sed // aggregating parameters), or plan inputs and allocated memory for outputs 827327952Sdim std::vector<Value *> params, StructValues, ReloadOutputs, Reloads; 828193323Sed 829321369Sdim Module *M = newFunction->getParent(); 830321369Sdim LLVMContext &Context = M->getContext(); 831321369Sdim const DataLayout &DL = M->getDataLayout(); 832321369Sdim 833193323Sed // Add inputs as params, or to be filled into the struct 834309124Sdim for (Value *input : inputs) 835193323Sed if (AggregateArgs) 836309124Sdim StructValues.push_back(input); 837193323Sed else 838309124Sdim params.push_back(input); 839193323Sed 840193323Sed // Create allocas for the outputs 841309124Sdim for (Value *output : outputs) { 842193323Sed if (AggregateArgs) { 843309124Sdim StructValues.push_back(output); 844193323Sed } else { 845193323Sed AllocaInst *alloca = 846321369Sdim new AllocaInst(output->getType(), DL.getAllocaAddrSpace(), 847321369Sdim nullptr, output->getName() + ".loc", 848321369Sdim &codeReplacer->getParent()->front().front()); 849193323Sed ReloadOutputs.push_back(alloca); 850193323Sed params.push_back(alloca); 851193323Sed } 852193323Sed } 853193323Sed 854288943Sdim StructType *StructArgTy = nullptr; 855276479Sdim AllocaInst *Struct = nullptr; 856193323Sed if (AggregateArgs && (inputs.size() + outputs.size() > 0)) { 857327952Sdim std::vector<Type *> ArgTypes; 858239462Sdim for (ValueSet::iterator v = StructValues.begin(), 859193323Sed ve = StructValues.end(); v != ve; ++v) 860193323Sed ArgTypes.push_back((*v)->getType()); 861193323Sed 862193323Sed // Allocate a struct at the beginning of this function 863288943Sdim StructArgTy = StructType::get(newFunction->getContext(), ArgTypes); 864321369Sdim Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr, 865321369Sdim "structArg", 866296417Sdim &codeReplacer->getParent()->front().front()); 867193323Sed params.push_back(Struct); 868193323Sed 869193323Sed for (unsigned i = 0, e = inputs.size(); i != e; ++i) { 870193323Sed Value *Idx[2]; 871198090Srdivacky Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); 872198090Srdivacky Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), i); 873288943Sdim GetElementPtrInst *GEP = GetElementPtrInst::Create( 874288943Sdim StructArgTy, Struct, Idx, "gep_" + StructValues[i]->getName()); 875193323Sed codeReplacer->getInstList().push_back(GEP); 876193323Sed StoreInst *SI = new StoreInst(StructValues[i], GEP); 877193323Sed codeReplacer->getInstList().push_back(SI); 878193323Sed } 879193323Sed } 880193323Sed 881193323Sed // Emit the call to the function 882224145Sdim CallInst *call = CallInst::Create(newFunction, params, 883193323Sed NumExitBlocks > 1 ? "targetBlock" : ""); 884327952Sdim // Add debug location to the new call, if the original function has debug 885327952Sdim // info. In that case, the terminator of the entry block of the extracted 886327952Sdim // function contains the first debug location of the extracted function, 887327952Sdim // set in extractCodeRegion. 888327952Sdim if (codeReplacer->getParent()->getSubprogram()) { 889327952Sdim if (auto DL = newFunction->getEntryBlock().getTerminator()->getDebugLoc()) 890327952Sdim call->setDebugLoc(DL); 891327952Sdim } 892193323Sed codeReplacer->getInstList().push_back(call); 893193323Sed 894193323Sed Function::arg_iterator OutputArgBegin = newFunction->arg_begin(); 895193323Sed unsigned FirstOut = inputs.size(); 896193323Sed if (!AggregateArgs) 897193323Sed std::advance(OutputArgBegin, inputs.size()); 898193323Sed 899327952Sdim // Reload the outputs passed in by reference. 900327952Sdim Function::arg_iterator OAI = OutputArgBegin; 901193323Sed for (unsigned i = 0, e = outputs.size(); i != e; ++i) { 902276479Sdim Value *Output = nullptr; 903193323Sed if (AggregateArgs) { 904193323Sed Value *Idx[2]; 905198090Srdivacky Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); 906198090Srdivacky Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); 907288943Sdim GetElementPtrInst *GEP = GetElementPtrInst::Create( 908288943Sdim StructArgTy, Struct, Idx, "gep_reload_" + outputs[i]->getName()); 909193323Sed codeReplacer->getInstList().push_back(GEP); 910193323Sed Output = GEP; 911193323Sed } else { 912193323Sed Output = ReloadOutputs[i]; 913193323Sed } 914193323Sed LoadInst *load = new LoadInst(Output, outputs[i]->getName()+".reload"); 915198090Srdivacky Reloads.push_back(load); 916193323Sed codeReplacer->getInstList().push_back(load); 917327952Sdim std::vector<User *> Users(outputs[i]->user_begin(), outputs[i]->user_end()); 918193323Sed for (unsigned u = 0, e = Users.size(); u != e; ++u) { 919193323Sed Instruction *inst = cast<Instruction>(Users[u]); 920239462Sdim if (!Blocks.count(inst->getParent())) 921193323Sed inst->replaceUsesOfWith(outputs[i], load); 922193323Sed } 923327952Sdim 924327952Sdim // Store to argument right after the definition of output value. 925327952Sdim auto *OutI = dyn_cast<Instruction>(outputs[i]); 926327952Sdim if (!OutI) 927327952Sdim continue; 928327952Sdim // Find proper insertion point. 929327952Sdim Instruction *InsertPt = OutI->getNextNode(); 930327952Sdim // Let's assume that there is no other guy interleave non-PHI in PHIs. 931327952Sdim if (isa<PHINode>(InsertPt)) 932327952Sdim InsertPt = InsertPt->getParent()->getFirstNonPHI(); 933327952Sdim 934327952Sdim assert(OAI != newFunction->arg_end() && 935327952Sdim "Number of output arguments should match " 936327952Sdim "the amount of defined values"); 937327952Sdim if (AggregateArgs) { 938327952Sdim Value *Idx[2]; 939327952Sdim Idx[0] = Constant::getNullValue(Type::getInt32Ty(Context)); 940327952Sdim Idx[1] = ConstantInt::get(Type::getInt32Ty(Context), FirstOut + i); 941327952Sdim GetElementPtrInst *GEP = GetElementPtrInst::Create( 942327952Sdim StructArgTy, &*OAI, Idx, "gep_" + outputs[i]->getName(), InsertPt); 943327952Sdim new StoreInst(outputs[i], GEP, InsertPt); 944327952Sdim // Since there should be only one struct argument aggregating 945327952Sdim // all the output values, we shouldn't increment OAI, which always 946327952Sdim // points to the struct argument, in this case. 947327952Sdim } else { 948327952Sdim new StoreInst(outputs[i], &*OAI, InsertPt); 949327952Sdim ++OAI; 950327952Sdim } 951193323Sed } 952193323Sed 953193323Sed // Now we can emit a switch statement using the call as a value. 954193323Sed SwitchInst *TheSwitch = 955198090Srdivacky SwitchInst::Create(Constant::getNullValue(Type::getInt16Ty(Context)), 956193323Sed codeReplacer, 0, codeReplacer); 957193323Sed 958193323Sed // Since there may be multiple exits from the original region, make the new 959193323Sed // function return an unsigned, switch on that number. This loop iterates 960193323Sed // over all of the blocks in the extracted region, updating any terminator 961193323Sed // instructions in the to-be-extracted region that branch to blocks that are 962193323Sed // not in the region to be extracted. 963327952Sdim std::map<BasicBlock *, BasicBlock *> ExitBlockMap; 964193323Sed 965193323Sed unsigned switchVal = 0; 966309124Sdim for (BasicBlock *Block : Blocks) { 967309124Sdim TerminatorInst *TI = Block->getTerminator(); 968193323Sed for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) 969239462Sdim if (!Blocks.count(TI->getSuccessor(i))) { 970193323Sed BasicBlock *OldTarget = TI->getSuccessor(i); 971193323Sed // add a new basic block which returns the appropriate value 972193323Sed BasicBlock *&NewTarget = ExitBlockMap[OldTarget]; 973193323Sed if (!NewTarget) { 974193323Sed // If we don't already have an exit stub for this non-extracted 975193323Sed // destination, create one now! 976198090Srdivacky NewTarget = BasicBlock::Create(Context, 977198090Srdivacky OldTarget->getName() + ".exitStub", 978193323Sed newFunction); 979193323Sed unsigned SuccNum = switchVal++; 980193323Sed 981276479Sdim Value *brVal = nullptr; 982193323Sed switch (NumExitBlocks) { 983193323Sed case 0: 984193323Sed case 1: break; // No value needed. 985193323Sed case 2: // Conditional branch, return a bool 986198090Srdivacky brVal = ConstantInt::get(Type::getInt1Ty(Context), !SuccNum); 987193323Sed break; 988193323Sed default: 989198090Srdivacky brVal = ConstantInt::get(Type::getInt16Ty(Context), SuccNum); 990193323Sed break; 991193323Sed } 992193323Sed 993327952Sdim ReturnInst::Create(Context, brVal, NewTarget); 994193323Sed 995193323Sed // Update the switch instruction. 996198090Srdivacky TheSwitch->addCase(ConstantInt::get(Type::getInt16Ty(Context), 997198090Srdivacky SuccNum), 998193323Sed OldTarget); 999193323Sed } 1000193323Sed 1001193323Sed // rewrite the original branch instruction with this new target 1002193323Sed TI->setSuccessor(i, NewTarget); 1003193323Sed } 1004193323Sed } 1005193323Sed 1006193323Sed // Now that we've done the deed, simplify the switch instruction. 1007226633Sdim Type *OldFnRetTy = TheSwitch->getParent()->getParent()->getReturnType(); 1008193323Sed switch (NumExitBlocks) { 1009193323Sed case 0: 1010193323Sed // There are no successors (the block containing the switch itself), which 1011193323Sed // means that previously this was the last part of the function, and hence 1012193323Sed // this should be rewritten as a `ret' 1013193323Sed 1014193323Sed // Check if the function should return a value 1015202375Srdivacky if (OldFnRetTy->isVoidTy()) { 1016276479Sdim ReturnInst::Create(Context, nullptr, TheSwitch); // Return void 1017193323Sed } else if (OldFnRetTy == TheSwitch->getCondition()->getType()) { 1018193323Sed // return what we have 1019198090Srdivacky ReturnInst::Create(Context, TheSwitch->getCondition(), TheSwitch); 1020193323Sed } else { 1021193323Sed // Otherwise we must have code extracted an unwind or something, just 1022193323Sed // return whatever we want. 1023341825Sdim ReturnInst::Create(Context, 1024198090Srdivacky Constant::getNullValue(OldFnRetTy), TheSwitch); 1025193323Sed } 1026193323Sed 1027193323Sed TheSwitch->eraseFromParent(); 1028193323Sed break; 1029193323Sed case 1: 1030193323Sed // Only a single destination, change the switch into an unconditional 1031193323Sed // branch. 1032193323Sed BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch); 1033193323Sed TheSwitch->eraseFromParent(); 1034193323Sed break; 1035193323Sed case 2: 1036193323Sed BranchInst::Create(TheSwitch->getSuccessor(1), TheSwitch->getSuccessor(2), 1037193323Sed call, TheSwitch); 1038193323Sed TheSwitch->eraseFromParent(); 1039193323Sed break; 1040193323Sed default: 1041193323Sed // Otherwise, make the default destination of the switch instruction be one 1042193323Sed // of the other successors. 1043234353Sdim TheSwitch->setCondition(call); 1044234353Sdim TheSwitch->setDefaultDest(TheSwitch->getSuccessor(NumExitBlocks)); 1045234353Sdim // Remove redundant case 1046261991Sdim TheSwitch->removeCase(SwitchInst::CaseIt(TheSwitch, NumExitBlocks-1)); 1047193323Sed break; 1048193323Sed } 1049193323Sed} 1050193323Sed 1051193323Sedvoid CodeExtractor::moveCodeToFunction(Function *newFunction) { 1052239462Sdim Function *oldFunc = (*Blocks.begin())->getParent(); 1053193323Sed Function::BasicBlockListType &oldBlocks = oldFunc->getBasicBlockList(); 1054193323Sed Function::BasicBlockListType &newBlocks = newFunction->getBasicBlockList(); 1055193323Sed 1056309124Sdim for (BasicBlock *Block : Blocks) { 1057193323Sed // Delete the basic block from the old function, and the list of blocks 1058309124Sdim oldBlocks.remove(Block); 1059193323Sed 1060193323Sed // Insert this basic block into the new function 1061309124Sdim newBlocks.push_back(Block); 1062193323Sed } 1063193323Sed} 1064193323Sed 1065314564Sdimvoid CodeExtractor::calculateNewCallTerminatorWeights( 1066314564Sdim BasicBlock *CodeReplacer, 1067314564Sdim DenseMap<BasicBlock *, BlockFrequency> &ExitWeights, 1068314564Sdim BranchProbabilityInfo *BPI) { 1069327952Sdim using Distribution = BlockFrequencyInfoImplBase::Distribution; 1070327952Sdim using BlockNode = BlockFrequencyInfoImplBase::BlockNode; 1071314564Sdim 1072314564Sdim // Update the branch weights for the exit block. 1073314564Sdim TerminatorInst *TI = CodeReplacer->getTerminator(); 1074314564Sdim SmallVector<unsigned, 8> BranchWeights(TI->getNumSuccessors(), 0); 1075314564Sdim 1076314564Sdim // Block Frequency distribution with dummy node. 1077314564Sdim Distribution BranchDist; 1078314564Sdim 1079314564Sdim // Add each of the frequencies of the successors. 1080314564Sdim for (unsigned i = 0, e = TI->getNumSuccessors(); i < e; ++i) { 1081314564Sdim BlockNode ExitNode(i); 1082314564Sdim uint64_t ExitFreq = ExitWeights[TI->getSuccessor(i)].getFrequency(); 1083314564Sdim if (ExitFreq != 0) 1084314564Sdim BranchDist.addExit(ExitNode, ExitFreq); 1085314564Sdim else 1086314564Sdim BPI->setEdgeProbability(CodeReplacer, i, BranchProbability::getZero()); 1087314564Sdim } 1088314564Sdim 1089314564Sdim // Check for no total weight. 1090314564Sdim if (BranchDist.Total == 0) 1091314564Sdim return; 1092314564Sdim 1093314564Sdim // Normalize the distribution so that they can fit in unsigned. 1094314564Sdim BranchDist.normalize(); 1095314564Sdim 1096314564Sdim // Create normalized branch weights and set the metadata. 1097314564Sdim for (unsigned I = 0, E = BranchDist.Weights.size(); I < E; ++I) { 1098314564Sdim const auto &Weight = BranchDist.Weights[I]; 1099314564Sdim 1100314564Sdim // Get the weight and update the current BFI. 1101314564Sdim BranchWeights[Weight.TargetNode.Index] = Weight.Amount; 1102314564Sdim BranchProbability BP(Weight.Amount, BranchDist.Total); 1103314564Sdim BPI->setEdgeProbability(CodeReplacer, Weight.TargetNode.Index, BP); 1104314564Sdim } 1105314564Sdim TI->setMetadata( 1106314564Sdim LLVMContext::MD_prof, 1107314564Sdim MDBuilder(TI->getContext()).createBranchWeights(BranchWeights)); 1108314564Sdim} 1109314564Sdim 1110239462SdimFunction *CodeExtractor::extractCodeRegion() { 1111239462Sdim if (!isEligible()) 1112276479Sdim return nullptr; 1113193323Sed 1114193323Sed // Assumption: this is a single-entry code region, and the header is the first 1115193323Sed // block in the region. 1116239462Sdim BasicBlock *header = *Blocks.begin(); 1117327952Sdim Function *oldFunction = header->getParent(); 1118193323Sed 1119327952Sdim // For functions with varargs, check that varargs handling is only done in the 1120327952Sdim // outlined function, i.e vastart and vaend are only used in outlined blocks. 1121327952Sdim if (AllowVarArgs && oldFunction->getFunctionType()->isVarArg()) { 1122327952Sdim auto containsVarArgIntrinsic = [](Instruction &I) { 1123327952Sdim if (const CallInst *CI = dyn_cast<CallInst>(&I)) 1124327952Sdim if (const Function *F = CI->getCalledFunction()) 1125327952Sdim return F->getIntrinsicID() == Intrinsic::vastart || 1126327952Sdim F->getIntrinsicID() == Intrinsic::vaend; 1127327952Sdim return false; 1128327952Sdim }; 1129327952Sdim 1130327952Sdim for (auto &BB : *oldFunction) { 1131327952Sdim if (Blocks.count(&BB)) 1132327952Sdim continue; 1133327952Sdim if (llvm::any_of(BB, containsVarArgIntrinsic)) 1134327952Sdim return nullptr; 1135327952Sdim } 1136327952Sdim } 1137327952Sdim ValueSet inputs, outputs, SinkingCands, HoistingCands; 1138327952Sdim BasicBlock *CommonExit = nullptr; 1139327952Sdim 1140314564Sdim // Calculate the entry frequency of the new function before we change the root 1141314564Sdim // block. 1142314564Sdim BlockFrequency EntryFreq; 1143314564Sdim if (BFI) { 1144314564Sdim assert(BPI && "Both BPI and BFI are required to preserve profile info"); 1145314564Sdim for (BasicBlock *Pred : predecessors(header)) { 1146314564Sdim if (Blocks.count(Pred)) 1147314564Sdim continue; 1148314564Sdim EntryFreq += 1149314564Sdim BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, header); 1150314564Sdim } 1151314564Sdim } 1152314564Sdim 1153193323Sed // If we have to split PHI nodes or the entry block, do so now. 1154193323Sed severSplitPHINodes(header); 1155193323Sed 1156193323Sed // If we have any return instructions in the region, split those blocks so 1157193323Sed // that the return is not in the region. 1158193323Sed splitReturnBlocks(); 1159193323Sed 1160193323Sed // This takes place of the original loop 1161341825Sdim BasicBlock *codeReplacer = BasicBlock::Create(header->getContext(), 1162198090Srdivacky "codeRepl", oldFunction, 1163193323Sed header); 1164193323Sed 1165193323Sed // The new function needs a root node because other nodes can branch to the 1166193323Sed // head of the region, but the entry node of a function cannot have preds. 1167341825Sdim BasicBlock *newFuncRoot = BasicBlock::Create(header->getContext(), 1168198090Srdivacky "newFuncRoot"); 1169327952Sdim auto *BranchI = BranchInst::Create(header); 1170327952Sdim // If the original function has debug info, we have to add a debug location 1171327952Sdim // to the new branch instruction from the artificial entry block. 1172327952Sdim // We use the debug location of the first instruction in the extracted 1173327952Sdim // blocks, as there is no other equivalent line in the source code. 1174327952Sdim if (oldFunction->getSubprogram()) { 1175327952Sdim any_of(Blocks, [&BranchI](const BasicBlock *BB) { 1176327952Sdim return any_of(*BB, [&BranchI](const Instruction &I) { 1177327952Sdim if (!I.getDebugLoc()) 1178327952Sdim return false; 1179327952Sdim BranchI->setDebugLoc(I.getDebugLoc()); 1180327952Sdim return true; 1181327952Sdim }); 1182327952Sdim }); 1183327952Sdim } 1184327952Sdim newFuncRoot->getInstList().push_back(BranchI); 1185193323Sed 1186321369Sdim findAllocas(SinkingCands, HoistingCands, CommonExit); 1187321369Sdim assert(HoistingCands.empty() || CommonExit); 1188321369Sdim 1189193323Sed // Find inputs to, outputs from the code region. 1190321369Sdim findInputsOutputs(inputs, outputs, SinkingCands); 1191193323Sed 1192321369Sdim // Now sink all instructions which only have non-phi uses inside the region 1193321369Sdim for (auto *II : SinkingCands) 1194321369Sdim cast<Instruction>(II)->moveBefore(*newFuncRoot, 1195321369Sdim newFuncRoot->getFirstInsertionPt()); 1196321369Sdim 1197321369Sdim if (!HoistingCands.empty()) { 1198321369Sdim auto *HoistToBlock = findOrCreateBlockForHoisting(CommonExit); 1199321369Sdim Instruction *TI = HoistToBlock->getTerminator(); 1200321369Sdim for (auto *II : HoistingCands) 1201321369Sdim cast<Instruction>(II)->moveBefore(TI); 1202321369Sdim } 1203321369Sdim 1204314564Sdim // Calculate the exit blocks for the extracted region and the total exit 1205327952Sdim // weights for each of those blocks. 1206314564Sdim DenseMap<BasicBlock *, BlockFrequency> ExitWeights; 1207239462Sdim SmallPtrSet<BasicBlock *, 1> ExitBlocks; 1208314564Sdim for (BasicBlock *Block : Blocks) { 1209309124Sdim for (succ_iterator SI = succ_begin(Block), SE = succ_end(Block); SI != SE; 1210314564Sdim ++SI) { 1211314564Sdim if (!Blocks.count(*SI)) { 1212314564Sdim // Update the branch weight for this successor. 1213314564Sdim if (BFI) { 1214314564Sdim BlockFrequency &BF = ExitWeights[*SI]; 1215314564Sdim BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, *SI); 1216314564Sdim } 1217239462Sdim ExitBlocks.insert(*SI); 1218314564Sdim } 1219314564Sdim } 1220314564Sdim } 1221239462Sdim NumExitBlocks = ExitBlocks.size(); 1222239462Sdim 1223193323Sed // Construct new function based on inputs/outputs & add allocas for all defs. 1224193323Sed Function *newFunction = constructFunction(inputs, outputs, header, 1225193323Sed newFuncRoot, 1226193323Sed codeReplacer, oldFunction, 1227193323Sed oldFunction->getParent()); 1228193323Sed 1229314564Sdim // Update the entry count of the function. 1230314564Sdim if (BFI) { 1231341825Sdim auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency()); 1232341825Sdim if (Count.hasValue()) 1233341825Sdim newFunction->setEntryCount( 1234341825Sdim ProfileCount(Count.getValue(), Function::PCT_Real)); // FIXME 1235314564Sdim BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency()); 1236314564Sdim } 1237314564Sdim 1238193323Sed emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs); 1239193323Sed 1240193323Sed moveCodeToFunction(newFunction); 1241193323Sed 1242341825Sdim // Propagate personality info to the new function if there is one. 1243341825Sdim if (oldFunction->hasPersonalityFn()) 1244341825Sdim newFunction->setPersonalityFn(oldFunction->getPersonalityFn()); 1245341825Sdim 1246314564Sdim // Update the branch weights for the exit block. 1247314564Sdim if (BFI && NumExitBlocks > 1) 1248314564Sdim calculateNewCallTerminatorWeights(codeReplacer, ExitWeights, BPI); 1249314564Sdim 1250193323Sed // Loop over all of the PHI nodes in the header block, and change any 1251193323Sed // references to the old incoming edge to be the new incoming edge. 1252193323Sed for (BasicBlock::iterator I = header->begin(); isa<PHINode>(I); ++I) { 1253193323Sed PHINode *PN = cast<PHINode>(I); 1254193323Sed for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 1255239462Sdim if (!Blocks.count(PN->getIncomingBlock(i))) 1256193323Sed PN->setIncomingBlock(i, newFuncRoot); 1257193323Sed } 1258193323Sed 1259193323Sed // Look at all successors of the codeReplacer block. If any of these blocks 1260193323Sed // had PHI nodes in them, we need to update the "from" block to be the code 1261193323Sed // replacer, not the original block in the extracted region. 1262327952Sdim std::vector<BasicBlock *> Succs(succ_begin(codeReplacer), 1263327952Sdim succ_end(codeReplacer)); 1264193323Sed for (unsigned i = 0, e = Succs.size(); i != e; ++i) 1265193323Sed for (BasicBlock::iterator I = Succs[i]->begin(); isa<PHINode>(I); ++I) { 1266193323Sed PHINode *PN = cast<PHINode>(I); 1267193323Sed std::set<BasicBlock*> ProcessedPreds; 1268193323Sed for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 1269239462Sdim if (Blocks.count(PN->getIncomingBlock(i))) { 1270193323Sed if (ProcessedPreds.insert(PN->getIncomingBlock(i)).second) 1271193323Sed PN->setIncomingBlock(i, codeReplacer); 1272193323Sed else { 1273193323Sed // There were multiple entries in the PHI for this block, now there 1274193323Sed // is only one, so remove the duplicated entries. 1275193323Sed PN->removeIncomingValue(i, false); 1276193323Sed --i; --e; 1277193323Sed } 1278193323Sed } 1279193323Sed } 1280193323Sed 1281341825Sdim LLVM_DEBUG(if (verifyFunction(*newFunction)) 1282341825Sdim report_fatal_error("verifyFunction failed!")); 1283193323Sed return newFunction; 1284193323Sed} 1285