1193323Sed//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===// 2193323Sed// 3193323Sed// The LLVM Compiler Infrastructure 4193323Sed// 5193323Sed// This file is distributed under the University of Illinois Open Source 6193323Sed// License. See LICENSE.TXT for details. 7193323Sed// 8193323Sed//===----------------------------------------------------------------------===// 9193323Sed// 10193323Sed// This pass munges the code in the input function to better prepare it for 11193323Sed// SelectionDAG-based code generation. This works around limitations in it's 12193323Sed// basic-block-at-a-time approach. It should eventually be removed. 13193323Sed// 14193323Sed//===----------------------------------------------------------------------===// 15193323Sed 16193323Sed#define DEBUG_TYPE "codegenprepare" 17193323Sed#include "llvm/Transforms/Scalar.h" 18239462Sdim#include "llvm/ADT/DenseMap.h" 19239462Sdim#include "llvm/ADT/SmallSet.h" 20239462Sdim#include "llvm/ADT/Statistic.h" 21251662Sdim#include "llvm/ADT/ValueMap.h" 22249423Sdim#include "llvm/Analysis/DominatorInternals.h" 23218893Sdim#include "llvm/Analysis/Dominators.h" 24218893Sdim#include "llvm/Analysis/InstructionSimplify.h" 25198090Srdivacky#include "llvm/Analysis/ProfileInfo.h" 26193323Sed#include "llvm/Assembly/Writer.h" 27249423Sdim#include "llvm/IR/Constants.h" 28249423Sdim#include "llvm/IR/DataLayout.h" 29249423Sdim#include "llvm/IR/DerivedTypes.h" 30249423Sdim#include "llvm/IR/Function.h" 31249423Sdim#include "llvm/IR/IRBuilder.h" 32249423Sdim#include "llvm/IR/InlineAsm.h" 33249423Sdim#include "llvm/IR/Instructions.h" 34249423Sdim#include "llvm/IR/IntrinsicInst.h" 35249423Sdim#include "llvm/Pass.h" 36193323Sed#include "llvm/Support/CallSite.h" 37212904Sdim#include "llvm/Support/CommandLine.h" 38193323Sed#include "llvm/Support/Debug.h" 39193323Sed#include "llvm/Support/GetElementPtrTypeIterator.h" 40193323Sed#include "llvm/Support/PatternMatch.h" 41239462Sdim#include "llvm/Support/ValueHandle.h" 42198090Srdivacky#include "llvm/Support/raw_ostream.h" 43239462Sdim#include "llvm/Target/TargetLibraryInfo.h" 44239462Sdim#include "llvm/Target/TargetLowering.h" 45239462Sdim#include "llvm/Transforms/Utils/BasicBlockUtils.h" 46239462Sdim#include "llvm/Transforms/Utils/BuildLibCalls.h" 47243830Sdim#include "llvm/Transforms/Utils/BypassSlowDivision.h" 48239462Sdim#include "llvm/Transforms/Utils/Local.h" 49193323Sedusing namespace llvm; 50193323Sedusing namespace llvm::PatternMatch; 51193323Sed 52218893SdimSTATISTIC(NumBlocksElim, "Number of blocks eliminated"); 53221345SdimSTATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated"); 54221345SdimSTATISTIC(NumGEPsElim, "Number of GEPs converted to casts"); 55218893SdimSTATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of " 56218893Sdim "sunken Cmps"); 57218893SdimSTATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses " 58218893Sdim "of sunken Casts"); 59218893SdimSTATISTIC(NumMemoryInsts, "Number of memory instructions whose address " 60218893Sdim "computations were sunk"); 61221345SdimSTATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); 62221345SdimSTATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); 63221345SdimSTATISTIC(NumRetsDup, "Number of return instructions duplicated"); 64226633SdimSTATISTIC(NumDbgValueMoved, "Number of debug value instructions moved"); 65239462SdimSTATISTIC(NumSelectsExpanded, "Number of selects turned into branches"); 66218893Sdim 67221345Sdimstatic cl::opt<bool> DisableBranchOpts( 68221345Sdim "disable-cgp-branch-opts", cl::Hidden, cl::init(false), 69221345Sdim cl::desc("Disable branch optimizations in CodeGenPrepare")); 70212904Sdim 71239462Sdimstatic cl::opt<bool> DisableSelectToBranch( 72239462Sdim "disable-cgp-select2branch", cl::Hidden, cl::init(false), 73239462Sdim cl::desc("Disable select to branch conversion.")); 74234353Sdim 75193323Sednamespace { 76198090Srdivacky class CodeGenPrepare : public FunctionPass { 77193323Sed /// TLI - Keep a pointer of a TargetLowering to consult for determining 78193323Sed /// transformation profitability. 79193323Sed const TargetLowering *TLI; 80234353Sdim const TargetLibraryInfo *TLInfo; 81218893Sdim DominatorTree *DT; 82201360Srdivacky ProfileInfo *PFI; 83239462Sdim 84218893Sdim /// CurInstIterator - As we scan instructions optimizing them, this is the 85218893Sdim /// next instruction to optimize. Xforms that can invalidate this should 86218893Sdim /// update it. 87218893Sdim BasicBlock::iterator CurInstIterator; 88193323Sed 89221345Sdim /// Keeps track of non-local addresses that have been sunk into a block. 90221345Sdim /// This allows us to avoid inserting duplicate code for blocks with 91221345Sdim /// multiple load/stores of the same address. 92251662Sdim ValueMap<Value*, Value*> SunkAddrs; 93218893Sdim 94221345Sdim /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to 95221345Sdim /// be updated. 96221345Sdim bool ModifiedDT; 97221345Sdim 98239462Sdim /// OptSize - True if optimizing for size. 99239462Sdim bool OptSize; 100239462Sdim 101193323Sed public: 102193323Sed static char ID; // Pass identification, replacement for typeid 103193323Sed explicit CodeGenPrepare(const TargetLowering *tli = 0) 104218893Sdim : FunctionPass(ID), TLI(tli) { 105218893Sdim initializeCodeGenPreparePass(*PassRegistry::getPassRegistry()); 106218893Sdim } 107193323Sed bool runOnFunction(Function &F); 108193323Sed 109249423Sdim const char *getPassName() const { return "CodeGen Prepare"; } 110249423Sdim 111198090Srdivacky virtual void getAnalysisUsage(AnalysisUsage &AU) const { 112218893Sdim AU.addPreserved<DominatorTree>(); 113198090Srdivacky AU.addPreserved<ProfileInfo>(); 114234353Sdim AU.addRequired<TargetLibraryInfo>(); 115198090Srdivacky } 116198090Srdivacky 117193323Sed private: 118239462Sdim bool EliminateFallThrough(Function &F); 119193323Sed bool EliminateMostlyEmptyBlocks(Function &F); 120193323Sed bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; 121193323Sed void EliminateMostlyEmptyBlock(BasicBlock *BB); 122193323Sed bool OptimizeBlock(BasicBlock &BB); 123218893Sdim bool OptimizeInst(Instruction *I); 124226633Sdim bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy); 125218893Sdim bool OptimizeInlineAsmInst(CallInst *CS); 126205218Srdivacky bool OptimizeCallInst(CallInst *CI); 127198396Srdivacky bool MoveExtToFormExtLoad(Instruction *I); 128193323Sed bool OptimizeExtUses(Instruction *I); 129239462Sdim bool OptimizeSelectInst(SelectInst *SI); 130249423Sdim bool DupRetToEnableTailCallOpts(BasicBlock *BB); 131226633Sdim bool PlaceDbgValues(Function &F); 132193323Sed }; 133193323Sed} 134193323Sed 135193323Sedchar CodeGenPrepare::ID = 0; 136234353SdimINITIALIZE_PASS_BEGIN(CodeGenPrepare, "codegenprepare", 137218893Sdim "Optimize for code generation", false, false) 138234353SdimINITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) 139234353SdimINITIALIZE_PASS_END(CodeGenPrepare, "codegenprepare", 140234353Sdim "Optimize for code generation", false, false) 141193323Sed 142193323SedFunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) { 143193323Sed return new CodeGenPrepare(TLI); 144193323Sed} 145193323Sed 146193323Sedbool CodeGenPrepare::runOnFunction(Function &F) { 147193323Sed bool EverMadeChange = false; 148193323Sed 149221345Sdim ModifiedDT = false; 150234353Sdim TLInfo = &getAnalysis<TargetLibraryInfo>(); 151218893Sdim DT = getAnalysisIfAvailable<DominatorTree>(); 152201360Srdivacky PFI = getAnalysisIfAvailable<ProfileInfo>(); 153249423Sdim OptSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, 154249423Sdim Attribute::OptimizeForSize); 155221345Sdim 156243830Sdim /// This optimization identifies DIV instructions that can be 157243830Sdim /// profitably bypassed and carried out with a shorter, faster divide. 158249423Sdim if (!OptSize && TLI && TLI->isSlowDivBypassed()) { 159243830Sdim const DenseMap<unsigned int, unsigned int> &BypassWidths = 160243830Sdim TLI->getBypassSlowDivWidths(); 161243830Sdim for (Function::iterator I = F.begin(); I != F.end(); I++) 162243830Sdim EverMadeChange |= bypassSlowDivision(F, I, BypassWidths); 163243830Sdim } 164243830Sdim 165243830Sdim // Eliminate blocks that contain only PHI nodes and an 166193323Sed // unconditional branch. 167193323Sed EverMadeChange |= EliminateMostlyEmptyBlocks(F); 168193323Sed 169226633Sdim // llvm.dbg.value is far away from the value then iSel may not be able 170239462Sdim // handle it properly. iSel will drop llvm.dbg.value if it can not 171226633Sdim // find a node corresponding to the value. 172226633Sdim EverMadeChange |= PlaceDbgValues(F); 173226633Sdim 174193323Sed bool MadeChange = true; 175193323Sed while (MadeChange) { 176193323Sed MadeChange = false; 177243830Sdim for (Function::iterator I = F.begin(); I != F.end(); ) { 178221345Sdim BasicBlock *BB = I++; 179193323Sed MadeChange |= OptimizeBlock(*BB); 180221345Sdim } 181193323Sed EverMadeChange |= MadeChange; 182193323Sed } 183218893Sdim 184218893Sdim SunkAddrs.clear(); 185218893Sdim 186221345Sdim if (!DisableBranchOpts) { 187221345Sdim MadeChange = false; 188234353Sdim SmallPtrSet<BasicBlock*, 8> WorkList; 189234353Sdim for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { 190234353Sdim SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB)); 191223017Sdim MadeChange |= ConstantFoldTerminator(BB, true); 192234353Sdim if (!MadeChange) continue; 193221345Sdim 194234353Sdim for (SmallVectorImpl<BasicBlock*>::iterator 195234353Sdim II = Successors.begin(), IE = Successors.end(); II != IE; ++II) 196234353Sdim if (pred_begin(*II) == pred_end(*II)) 197234353Sdim WorkList.insert(*II); 198234353Sdim } 199234353Sdim 200249423Sdim // Delete the dead blocks and any of their dead successors. 201249423Sdim MadeChange |= !WorkList.empty(); 202249423Sdim while (!WorkList.empty()) { 203249423Sdim BasicBlock *BB = *WorkList.begin(); 204249423Sdim WorkList.erase(BB); 205249423Sdim SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB)); 206234353Sdim 207249423Sdim DeleteDeadBlock(BB); 208249423Sdim 209249423Sdim for (SmallVectorImpl<BasicBlock*>::iterator 210249423Sdim II = Successors.begin(), IE = Successors.end(); II != IE; ++II) 211249423Sdim if (pred_begin(*II) == pred_end(*II)) 212249423Sdim WorkList.insert(*II); 213249423Sdim } 214249423Sdim 215239462Sdim // Merge pairs of basic blocks with unconditional branches, connected by 216239462Sdim // a single edge. 217239462Sdim if (EverMadeChange || MadeChange) 218239462Sdim MadeChange |= EliminateFallThrough(F); 219239462Sdim 220221345Sdim if (MadeChange) 221221345Sdim ModifiedDT = true; 222221345Sdim EverMadeChange |= MadeChange; 223221345Sdim } 224221345Sdim 225221345Sdim if (ModifiedDT && DT) 226221345Sdim DT->DT->recalculate(F); 227221345Sdim 228193323Sed return EverMadeChange; 229193323Sed} 230193323Sed 231239462Sdim/// EliminateFallThrough - Merge basic blocks which are connected 232239462Sdim/// by a single edge, where one of the basic blocks has a single successor 233239462Sdim/// pointing to the other basic block, which has a single predecessor. 234239462Sdimbool CodeGenPrepare::EliminateFallThrough(Function &F) { 235239462Sdim bool Changed = false; 236239462Sdim // Scan all of the blocks in the function, except for the entry block. 237239462Sdim for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) { 238239462Sdim BasicBlock *BB = I++; 239239462Sdim // If the destination block has a single pred, then this is a trivial 240239462Sdim // edge, just collapse it. 241239462Sdim BasicBlock *SinglePred = BB->getSinglePredecessor(); 242239462Sdim 243243830Sdim // Don't merge if BB's address is taken. 244243830Sdim if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue; 245239462Sdim 246239462Sdim BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator()); 247239462Sdim if (Term && !Term->isConditional()) { 248239462Sdim Changed = true; 249243830Sdim DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n"); 250239462Sdim // Remember if SinglePred was the entry block of the function. 251239462Sdim // If so, we will need to move BB back to the entry position. 252239462Sdim bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); 253239462Sdim MergeBasicBlockIntoOnlyPred(BB, this); 254239462Sdim 255239462Sdim if (isEntry && BB != &BB->getParent()->getEntryBlock()) 256239462Sdim BB->moveBefore(&BB->getParent()->getEntryBlock()); 257239462Sdim 258239462Sdim // We have erased a block. Update the iterator. 259239462Sdim I = BB; 260239462Sdim } 261239462Sdim } 262239462Sdim return Changed; 263239462Sdim} 264239462Sdim 265193323Sed/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes, 266193323Sed/// debug info directives, and an unconditional branch. Passes before isel 267193323Sed/// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for 268193323Sed/// isel. Start by eliminating these blocks so we can split them the way we 269193323Sed/// want them. 270193323Sedbool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) { 271193323Sed bool MadeChange = false; 272193323Sed // Note that this intentionally skips the entry block. 273193323Sed for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) { 274193323Sed BasicBlock *BB = I++; 275193323Sed 276193323Sed // If this block doesn't end with an uncond branch, ignore it. 277193323Sed BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); 278193323Sed if (!BI || !BI->isUnconditional()) 279193323Sed continue; 280193323Sed 281193323Sed // If the instruction before the branch (skipping debug info) isn't a phi 282193323Sed // node, then other stuff is happening here. 283193323Sed BasicBlock::iterator BBI = BI; 284193323Sed if (BBI != BB->begin()) { 285193323Sed --BBI; 286193323Sed while (isa<DbgInfoIntrinsic>(BBI)) { 287193323Sed if (BBI == BB->begin()) 288193323Sed break; 289193323Sed --BBI; 290193323Sed } 291193323Sed if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI)) 292193323Sed continue; 293193323Sed } 294193323Sed 295193323Sed // Do not break infinite loops. 296193323Sed BasicBlock *DestBB = BI->getSuccessor(0); 297193323Sed if (DestBB == BB) 298193323Sed continue; 299193323Sed 300193323Sed if (!CanMergeBlocks(BB, DestBB)) 301193323Sed continue; 302193323Sed 303193323Sed EliminateMostlyEmptyBlock(BB); 304193323Sed MadeChange = true; 305193323Sed } 306193323Sed return MadeChange; 307193323Sed} 308193323Sed 309193323Sed/// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a 310193323Sed/// single uncond branch between them, and BB contains no other non-phi 311193323Sed/// instructions. 312193323Sedbool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB, 313193323Sed const BasicBlock *DestBB) const { 314193323Sed // We only want to eliminate blocks whose phi nodes are used by phi nodes in 315193323Sed // the successor. If there are more complex condition (e.g. preheaders), 316193323Sed // don't mess around with them. 317193323Sed BasicBlock::const_iterator BBI = BB->begin(); 318193323Sed while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { 319206083Srdivacky for (Value::const_use_iterator UI = PN->use_begin(), E = PN->use_end(); 320193323Sed UI != E; ++UI) { 321193323Sed const Instruction *User = cast<Instruction>(*UI); 322193323Sed if (User->getParent() != DestBB || !isa<PHINode>(User)) 323193323Sed return false; 324193323Sed // If User is inside DestBB block and it is a PHINode then check 325193323Sed // incoming value. If incoming value is not from BB then this is 326193323Sed // a complex condition (e.g. preheaders) we want to avoid here. 327193323Sed if (User->getParent() == DestBB) { 328193323Sed if (const PHINode *UPN = dyn_cast<PHINode>(User)) 329193323Sed for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) { 330193323Sed Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I)); 331193323Sed if (Insn && Insn->getParent() == BB && 332193323Sed Insn->getParent() != UPN->getIncomingBlock(I)) 333193323Sed return false; 334193323Sed } 335193323Sed } 336193323Sed } 337193323Sed } 338193323Sed 339193323Sed // If BB and DestBB contain any common predecessors, then the phi nodes in BB 340193323Sed // and DestBB may have conflicting incoming values for the block. If so, we 341193323Sed // can't merge the block. 342193323Sed const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin()); 343193323Sed if (!DestBBPN) return true; // no conflict. 344193323Sed 345193323Sed // Collect the preds of BB. 346193323Sed SmallPtrSet<const BasicBlock*, 16> BBPreds; 347193323Sed if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { 348193323Sed // It is faster to get preds from a PHI than with pred_iterator. 349193323Sed for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) 350193323Sed BBPreds.insert(BBPN->getIncomingBlock(i)); 351193323Sed } else { 352193323Sed BBPreds.insert(pred_begin(BB), pred_end(BB)); 353193323Sed } 354193323Sed 355193323Sed // Walk the preds of DestBB. 356193323Sed for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) { 357193323Sed BasicBlock *Pred = DestBBPN->getIncomingBlock(i); 358193323Sed if (BBPreds.count(Pred)) { // Common predecessor? 359193323Sed BBI = DestBB->begin(); 360193323Sed while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { 361193323Sed const Value *V1 = PN->getIncomingValueForBlock(Pred); 362193323Sed const Value *V2 = PN->getIncomingValueForBlock(BB); 363193323Sed 364193323Sed // If V2 is a phi node in BB, look up what the mapped value will be. 365193323Sed if (const PHINode *V2PN = dyn_cast<PHINode>(V2)) 366193323Sed if (V2PN->getParent() == BB) 367193323Sed V2 = V2PN->getIncomingValueForBlock(Pred); 368193323Sed 369193323Sed // If there is a conflict, bail out. 370193323Sed if (V1 != V2) return false; 371193323Sed } 372193323Sed } 373193323Sed } 374193323Sed 375193323Sed return true; 376193323Sed} 377193323Sed 378193323Sed 379193323Sed/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and 380193323Sed/// an unconditional branch in it. 381193323Sedvoid CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { 382193323Sed BranchInst *BI = cast<BranchInst>(BB->getTerminator()); 383193323Sed BasicBlock *DestBB = BI->getSuccessor(0); 384193323Sed 385202375Srdivacky DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB); 386193323Sed 387193323Sed // If the destination block has a single pred, then this is a trivial edge, 388193323Sed // just collapse it. 389193323Sed if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) { 390193323Sed if (SinglePred != DestBB) { 391193323Sed // Remember if SinglePred was the entry block of the function. If so, we 392193323Sed // will need to move BB back to the entry position. 393193323Sed bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); 394198090Srdivacky MergeBasicBlockIntoOnlyPred(DestBB, this); 395193323Sed 396193323Sed if (isEntry && BB != &BB->getParent()->getEntryBlock()) 397193323Sed BB->moveBefore(&BB->getParent()->getEntryBlock()); 398239462Sdim 399202375Srdivacky DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); 400193323Sed return; 401193323Sed } 402193323Sed } 403193323Sed 404193323Sed // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB 405193323Sed // to handle the new incoming edges it is about to have. 406193323Sed PHINode *PN; 407193323Sed for (BasicBlock::iterator BBI = DestBB->begin(); 408193323Sed (PN = dyn_cast<PHINode>(BBI)); ++BBI) { 409193323Sed // Remove the incoming value for BB, and remember it. 410193323Sed Value *InVal = PN->removeIncomingValue(BB, false); 411193323Sed 412193323Sed // Two options: either the InVal is a phi node defined in BB or it is some 413193323Sed // value that dominates BB. 414193323Sed PHINode *InValPhi = dyn_cast<PHINode>(InVal); 415193323Sed if (InValPhi && InValPhi->getParent() == BB) { 416193323Sed // Add all of the input values of the input PHI as inputs of this phi. 417193323Sed for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i) 418193323Sed PN->addIncoming(InValPhi->getIncomingValue(i), 419193323Sed InValPhi->getIncomingBlock(i)); 420193323Sed } else { 421193323Sed // Otherwise, add one instance of the dominating value for each edge that 422193323Sed // we will be adding. 423193323Sed if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { 424193323Sed for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) 425193323Sed PN->addIncoming(InVal, BBPN->getIncomingBlock(i)); 426193323Sed } else { 427193323Sed for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) 428193323Sed PN->addIncoming(InVal, *PI); 429193323Sed } 430193323Sed } 431193323Sed } 432193323Sed 433193323Sed // The PHIs are now updated, change everything that refers to BB to use 434193323Sed // DestBB and remove BB. 435193323Sed BB->replaceAllUsesWith(DestBB); 436221345Sdim if (DT && !ModifiedDT) { 437218893Sdim BasicBlock *BBIDom = DT->getNode(BB)->getIDom()->getBlock(); 438218893Sdim BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock(); 439218893Sdim BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom); 440218893Sdim DT->changeImmediateDominator(DestBB, NewIDom); 441218893Sdim DT->eraseNode(BB); 442218893Sdim } 443201360Srdivacky if (PFI) { 444201360Srdivacky PFI->replaceAllUses(BB, DestBB); 445201360Srdivacky PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB)); 446198090Srdivacky } 447193323Sed BB->eraseFromParent(); 448218893Sdim ++NumBlocksElim; 449193323Sed 450202375Srdivacky DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); 451193323Sed} 452193323Sed 453193323Sed/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop 454194612Sed/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC), 455194612Sed/// sink it into user blocks to reduce the number of virtual 456193323Sed/// registers that must be created and coalesced. 457193323Sed/// 458193323Sed/// Return true if any changes are made. 459193323Sed/// 460193323Sedstatic bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ 461193323Sed // If this is a noop copy, 462198090Srdivacky EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType()); 463198090Srdivacky EVT DstVT = TLI.getValueType(CI->getType()); 464193323Sed 465193323Sed // This is an fp<->int conversion? 466193323Sed if (SrcVT.isInteger() != DstVT.isInteger()) 467193323Sed return false; 468193323Sed 469193323Sed // If this is an extension, it will be a zero or sign extension, which 470193323Sed // isn't a noop. 471193323Sed if (SrcVT.bitsLT(DstVT)) return false; 472193323Sed 473193323Sed // If these values will be promoted, find out what they will be promoted 474193323Sed // to. This helps us consider truncates on PPC as noop copies when they 475193323Sed // are. 476223017Sdim if (TLI.getTypeAction(CI->getContext(), SrcVT) == 477223017Sdim TargetLowering::TypePromoteInteger) 478198090Srdivacky SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT); 479223017Sdim if (TLI.getTypeAction(CI->getContext(), DstVT) == 480223017Sdim TargetLowering::TypePromoteInteger) 481198090Srdivacky DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT); 482193323Sed 483193323Sed // If, after promotion, these are the same types, this is a noop copy. 484193323Sed if (SrcVT != DstVT) 485193323Sed return false; 486193323Sed 487193323Sed BasicBlock *DefBB = CI->getParent(); 488193323Sed 489193323Sed /// InsertedCasts - Only insert a cast in each block once. 490193323Sed DenseMap<BasicBlock*, CastInst*> InsertedCasts; 491193323Sed 492193323Sed bool MadeChange = false; 493193323Sed for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); 494193323Sed UI != E; ) { 495193323Sed Use &TheUse = UI.getUse(); 496193323Sed Instruction *User = cast<Instruction>(*UI); 497193323Sed 498193323Sed // Figure out which BB this cast is used in. For PHI's this is the 499193323Sed // appropriate predecessor block. 500193323Sed BasicBlock *UserBB = User->getParent(); 501193323Sed if (PHINode *PN = dyn_cast<PHINode>(User)) { 502193323Sed UserBB = PN->getIncomingBlock(UI); 503193323Sed } 504193323Sed 505193323Sed // Preincrement use iterator so we don't invalidate it. 506193323Sed ++UI; 507193323Sed 508193323Sed // If this user is in the same block as the cast, don't change the cast. 509193323Sed if (UserBB == DefBB) continue; 510193323Sed 511193323Sed // If we have already inserted a cast into this block, use it. 512193323Sed CastInst *&InsertedCast = InsertedCasts[UserBB]; 513193323Sed 514193323Sed if (!InsertedCast) { 515226633Sdim BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 516193323Sed InsertedCast = 517193323Sed CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "", 518193323Sed InsertPt); 519193323Sed MadeChange = true; 520193323Sed } 521193323Sed 522193323Sed // Replace a use of the cast with a use of the new cast. 523193323Sed TheUse = InsertedCast; 524218893Sdim ++NumCastUses; 525193323Sed } 526193323Sed 527193323Sed // If we removed all uses, nuke the cast. 528193323Sed if (CI->use_empty()) { 529193323Sed CI->eraseFromParent(); 530193323Sed MadeChange = true; 531193323Sed } 532193323Sed 533193323Sed return MadeChange; 534193323Sed} 535193323Sed 536193323Sed/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce 537193323Sed/// the number of virtual registers that must be created and coalesced. This is 538193323Sed/// a clear win except on targets with multiple condition code registers 539193323Sed/// (PowerPC), where it might lose; some adjustment may be wanted there. 540193323Sed/// 541193323Sed/// Return true if any changes are made. 542193323Sedstatic bool OptimizeCmpExpression(CmpInst *CI) { 543193323Sed BasicBlock *DefBB = CI->getParent(); 544193323Sed 545193323Sed /// InsertedCmp - Only insert a cmp in each block once. 546193323Sed DenseMap<BasicBlock*, CmpInst*> InsertedCmps; 547193323Sed 548193323Sed bool MadeChange = false; 549193323Sed for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); 550193323Sed UI != E; ) { 551193323Sed Use &TheUse = UI.getUse(); 552193323Sed Instruction *User = cast<Instruction>(*UI); 553193323Sed 554193323Sed // Preincrement use iterator so we don't invalidate it. 555193323Sed ++UI; 556193323Sed 557193323Sed // Don't bother for PHI nodes. 558193323Sed if (isa<PHINode>(User)) 559193323Sed continue; 560193323Sed 561193323Sed // Figure out which BB this cmp is used in. 562193323Sed BasicBlock *UserBB = User->getParent(); 563193323Sed 564193323Sed // If this user is in the same block as the cmp, don't change the cmp. 565193323Sed if (UserBB == DefBB) continue; 566193323Sed 567193323Sed // If we have already inserted a cmp into this block, use it. 568193323Sed CmpInst *&InsertedCmp = InsertedCmps[UserBB]; 569193323Sed 570193323Sed if (!InsertedCmp) { 571226633Sdim BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 572193323Sed InsertedCmp = 573198090Srdivacky CmpInst::Create(CI->getOpcode(), 574198090Srdivacky CI->getPredicate(), CI->getOperand(0), 575193323Sed CI->getOperand(1), "", InsertPt); 576193323Sed MadeChange = true; 577193323Sed } 578193323Sed 579193323Sed // Replace a use of the cmp with a use of the new cmp. 580193323Sed TheUse = InsertedCmp; 581218893Sdim ++NumCmpUses; 582193323Sed } 583193323Sed 584193323Sed // If we removed all uses, nuke the cmp. 585193323Sed if (CI->use_empty()) 586193323Sed CI->eraseFromParent(); 587193323Sed 588193323Sed return MadeChange; 589193323Sed} 590193323Sed 591205218Srdivackynamespace { 592205218Srdivackyclass CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls { 593205218Srdivackyprotected: 594205218Srdivacky void replaceCall(Value *With) { 595205218Srdivacky CI->replaceAllUsesWith(With); 596205218Srdivacky CI->eraseFromParent(); 597205218Srdivacky } 598205218Srdivacky bool isFoldable(unsigned SizeCIOp, unsigned, bool) const { 599212904Sdim if (ConstantInt *SizeCI = 600212904Sdim dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) 601212904Sdim return SizeCI->isAllOnesValue(); 602205218Srdivacky return false; 603205218Srdivacky } 604205218Srdivacky}; 605205218Srdivacky} // end anonymous namespace 606205218Srdivacky 607205218Srdivackybool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { 608218893Sdim BasicBlock *BB = CI->getParent(); 609239462Sdim 610218893Sdim // Lower inline assembly if we can. 611218893Sdim // If we found an inline asm expession, and if the target knows how to 612218893Sdim // lower it to normal LLVM code, do so now. 613218893Sdim if (TLI && isa<InlineAsm>(CI->getCalledValue())) { 614218893Sdim if (TLI->ExpandInlineAsm(CI)) { 615218893Sdim // Avoid invalidating the iterator. 616218893Sdim CurInstIterator = BB->begin(); 617218893Sdim // Avoid processing instructions out of order, which could cause 618218893Sdim // reuse before a value is defined. 619218893Sdim SunkAddrs.clear(); 620218893Sdim return true; 621218893Sdim } 622218893Sdim // Sink address computing for memory operands into the block. 623218893Sdim if (OptimizeInlineAsmInst(CI)) 624218893Sdim return true; 625218893Sdim } 626239462Sdim 627205218Srdivacky // Lower all uses of llvm.objectsize.* 628205218Srdivacky IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); 629205218Srdivacky if (II && II->getIntrinsicID() == Intrinsic::objectsize) { 630210299Sed bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1); 631226633Sdim Type *ReturnTy = CI->getType(); 632239462Sdim Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); 633239462Sdim 634218893Sdim // Substituting this can cause recursive simplifications, which can 635218893Sdim // invalidate our iterator. Use a WeakVH to hold onto it in case this 636218893Sdim // happens. 637218893Sdim WeakVH IterHandle(CurInstIterator); 638239462Sdim 639243830Sdim replaceAndRecursivelySimplify(CI, RetVal, TLI ? TLI->getDataLayout() : 0, 640234353Sdim TLInfo, ModifiedDT ? 0 : DT); 641218893Sdim 642218893Sdim // If the iterator instruction was recursively deleted, start over at the 643218893Sdim // start of the block. 644218893Sdim if (IterHandle != CurInstIterator) { 645218893Sdim CurInstIterator = BB->begin(); 646218893Sdim SunkAddrs.clear(); 647218893Sdim } 648205218Srdivacky return true; 649205218Srdivacky } 650205218Srdivacky 651234353Sdim if (II && TLI) { 652234353Sdim SmallVector<Value*, 2> PtrOps; 653234353Sdim Type *AccessTy; 654234353Sdim if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy)) 655234353Sdim while (!PtrOps.empty()) 656234353Sdim if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy)) 657234353Sdim return true; 658234353Sdim } 659234353Sdim 660205218Srdivacky // From here on out we're working with named functions. 661205218Srdivacky if (CI->getCalledFunction() == 0) return false; 662223017Sdim 663243830Sdim // We'll need DataLayout from here on out. 664243830Sdim const DataLayout *TD = TLI ? TLI->getDataLayout() : 0; 665205218Srdivacky if (!TD) return false; 666239462Sdim 667205218Srdivacky // Lower all default uses of _chk calls. This is very similar 668205218Srdivacky // to what InstCombineCalls does, but here we are only lowering calls 669205218Srdivacky // that have the default "don't know" as the objectsize. Anything else 670205218Srdivacky // should be left alone. 671205218Srdivacky CodeGenPrepareFortifiedLibCalls Simplifier; 672239462Sdim return Simplifier.fold(CI, TD, TLInfo); 673205218Srdivacky} 674218893Sdim 675221345Sdim/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return 676221345Sdim/// instructions to the predecessor to enable tail call optimizations. The 677221345Sdim/// case it is currently looking for is: 678243830Sdim/// @code 679221345Sdim/// bb0: 680221345Sdim/// %tmp0 = tail call i32 @f0() 681221345Sdim/// br label %return 682221345Sdim/// bb1: 683221345Sdim/// %tmp1 = tail call i32 @f1() 684221345Sdim/// br label %return 685221345Sdim/// bb2: 686221345Sdim/// %tmp2 = tail call i32 @f2() 687221345Sdim/// br label %return 688221345Sdim/// return: 689221345Sdim/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ] 690221345Sdim/// ret i32 %retval 691243830Sdim/// @endcode 692221345Sdim/// 693221345Sdim/// => 694221345Sdim/// 695243830Sdim/// @code 696221345Sdim/// bb0: 697221345Sdim/// %tmp0 = tail call i32 @f0() 698221345Sdim/// ret i32 %tmp0 699221345Sdim/// bb1: 700221345Sdim/// %tmp1 = tail call i32 @f1() 701221345Sdim/// ret i32 %tmp1 702221345Sdim/// bb2: 703221345Sdim/// %tmp2 = tail call i32 @f2() 704221345Sdim/// ret i32 %tmp2 705243830Sdim/// @endcode 706249423Sdimbool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) { 707221345Sdim if (!TLI) 708221345Sdim return false; 709221345Sdim 710249423Sdim ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()); 711249423Sdim if (!RI) 712249423Sdim return false; 713249423Sdim 714239462Sdim PHINode *PN = 0; 715239462Sdim BitCastInst *BCI = 0; 716221345Sdim Value *V = RI->getReturnValue(); 717239462Sdim if (V) { 718239462Sdim BCI = dyn_cast<BitCastInst>(V); 719239462Sdim if (BCI) 720239462Sdim V = BCI->getOperand(0); 721221345Sdim 722239462Sdim PN = dyn_cast<PHINode>(V); 723239462Sdim if (!PN) 724239462Sdim return false; 725239462Sdim } 726239462Sdim 727221345Sdim if (PN && PN->getParent() != BB) 728221345Sdim return false; 729221345Sdim 730221345Sdim // It's not safe to eliminate the sign / zero extension of the return value. 731221345Sdim // See llvm::isInTailCallPosition(). 732221345Sdim const Function *F = BB->getParent(); 733249423Sdim AttributeSet CallerAttrs = F->getAttributes(); 734249423Sdim if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) || 735249423Sdim CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) 736221345Sdim return false; 737221345Sdim 738221345Sdim // Make sure there are no instructions between the PHI and return, or that the 739221345Sdim // return is the first instruction in the block. 740221345Sdim if (PN) { 741221345Sdim BasicBlock::iterator BI = BB->begin(); 742221345Sdim do { ++BI; } while (isa<DbgInfoIntrinsic>(BI)); 743239462Sdim if (&*BI == BCI) 744239462Sdim // Also skip over the bitcast. 745239462Sdim ++BI; 746221345Sdim if (&*BI != RI) 747221345Sdim return false; 748221345Sdim } else { 749221345Sdim BasicBlock::iterator BI = BB->begin(); 750221345Sdim while (isa<DbgInfoIntrinsic>(BI)) ++BI; 751221345Sdim if (&*BI != RI) 752221345Sdim return false; 753221345Sdim } 754221345Sdim 755221345Sdim /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail 756221345Sdim /// call. 757221345Sdim SmallVector<CallInst*, 4> TailCalls; 758221345Sdim if (PN) { 759221345Sdim for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { 760221345Sdim CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I)); 761221345Sdim // Make sure the phi value is indeed produced by the tail call. 762221345Sdim if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) && 763221345Sdim TLI->mayBeEmittedAsTailCall(CI)) 764221345Sdim TailCalls.push_back(CI); 765221345Sdim } 766221345Sdim } else { 767221345Sdim SmallPtrSet<BasicBlock*, 4> VisitedBBs; 768221345Sdim for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { 769221345Sdim if (!VisitedBBs.insert(*PI)) 770221345Sdim continue; 771221345Sdim 772221345Sdim BasicBlock::InstListType &InstList = (*PI)->getInstList(); 773221345Sdim BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin(); 774221345Sdim BasicBlock::InstListType::reverse_iterator RE = InstList.rend(); 775221345Sdim do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI)); 776221345Sdim if (RI == RE) 777221345Sdim continue; 778221345Sdim 779221345Sdim CallInst *CI = dyn_cast<CallInst>(&*RI); 780221345Sdim if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI)) 781221345Sdim TailCalls.push_back(CI); 782221345Sdim } 783221345Sdim } 784221345Sdim 785221345Sdim bool Changed = false; 786221345Sdim for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) { 787221345Sdim CallInst *CI = TailCalls[i]; 788221345Sdim CallSite CS(CI); 789221345Sdim 790221345Sdim // Conservatively require the attributes of the call to match those of the 791221345Sdim // return. Ignore noalias because it doesn't affect the call sequence. 792249423Sdim AttributeSet CalleeAttrs = CS.getAttributes(); 793249423Sdim if (AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex). 794249423Sdim removeAttribute(Attribute::NoAlias) != 795249423Sdim AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex). 796249423Sdim removeAttribute(Attribute::NoAlias)) 797221345Sdim continue; 798221345Sdim 799221345Sdim // Make sure the call instruction is followed by an unconditional branch to 800221345Sdim // the return block. 801221345Sdim BasicBlock *CallBB = CI->getParent(); 802221345Sdim BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator()); 803221345Sdim if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB) 804221345Sdim continue; 805221345Sdim 806221345Sdim // Duplicate the return into CallBB. 807221345Sdim (void)FoldReturnIntoUncondBranch(RI, BB, CallBB); 808221345Sdim ModifiedDT = Changed = true; 809221345Sdim ++NumRetsDup; 810221345Sdim } 811221345Sdim 812221345Sdim // If we eliminated all predecessors of the block, delete the block now. 813243830Sdim if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB)) 814221345Sdim BB->eraseFromParent(); 815221345Sdim 816221345Sdim return Changed; 817221345Sdim} 818221345Sdim 819193323Sed//===----------------------------------------------------------------------===// 820193323Sed// Memory Optimization 821193323Sed//===----------------------------------------------------------------------===// 822193323Sed 823249423Sdimnamespace { 824249423Sdim 825249423Sdim/// ExtAddrMode - This is an extended version of TargetLowering::AddrMode 826249423Sdim/// which holds actual Value*'s for register values. 827249423Sdimstruct ExtAddrMode : public TargetLowering::AddrMode { 828249423Sdim Value *BaseReg; 829249423Sdim Value *ScaledReg; 830249423Sdim ExtAddrMode() : BaseReg(0), ScaledReg(0) {} 831249423Sdim void print(raw_ostream &OS) const; 832249423Sdim void dump() const; 833249423Sdim 834249423Sdim bool operator==(const ExtAddrMode& O) const { 835249423Sdim return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) && 836249423Sdim (BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) && 837249423Sdim (HasBaseReg == O.HasBaseReg) && (Scale == O.Scale); 838249423Sdim } 839249423Sdim}; 840249423Sdim 841249423Sdimstatic inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) { 842249423Sdim AM.print(OS); 843249423Sdim return OS; 844249423Sdim} 845249423Sdim 846249423Sdimvoid ExtAddrMode::print(raw_ostream &OS) const { 847249423Sdim bool NeedPlus = false; 848249423Sdim OS << "["; 849249423Sdim if (BaseGV) { 850249423Sdim OS << (NeedPlus ? " + " : "") 851249423Sdim << "GV:"; 852249423Sdim WriteAsOperand(OS, BaseGV, /*PrintType=*/false); 853249423Sdim NeedPlus = true; 854249423Sdim } 855249423Sdim 856249423Sdim if (BaseOffs) 857249423Sdim OS << (NeedPlus ? " + " : "") << BaseOffs, NeedPlus = true; 858249423Sdim 859249423Sdim if (BaseReg) { 860249423Sdim OS << (NeedPlus ? " + " : "") 861249423Sdim << "Base:"; 862249423Sdim WriteAsOperand(OS, BaseReg, /*PrintType=*/false); 863249423Sdim NeedPlus = true; 864249423Sdim } 865249423Sdim if (Scale) { 866249423Sdim OS << (NeedPlus ? " + " : "") 867249423Sdim << Scale << "*"; 868249423Sdim WriteAsOperand(OS, ScaledReg, /*PrintType=*/false); 869249423Sdim NeedPlus = true; 870249423Sdim } 871249423Sdim 872249423Sdim OS << ']'; 873249423Sdim} 874249423Sdim 875249423Sdim#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 876249423Sdimvoid ExtAddrMode::dump() const { 877249423Sdim print(dbgs()); 878249423Sdim dbgs() << '\n'; 879249423Sdim} 880249423Sdim#endif 881249423Sdim 882249423Sdim 883249423Sdim/// \brief A helper class for matching addressing modes. 884249423Sdim/// 885249423Sdim/// This encapsulates the logic for matching the target-legal addressing modes. 886249423Sdimclass AddressingModeMatcher { 887249423Sdim SmallVectorImpl<Instruction*> &AddrModeInsts; 888249423Sdim const TargetLowering &TLI; 889249423Sdim 890249423Sdim /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and 891249423Sdim /// the memory instruction that we're computing this address for. 892249423Sdim Type *AccessTy; 893249423Sdim Instruction *MemoryInst; 894249423Sdim 895249423Sdim /// AddrMode - This is the addressing mode that we're building up. This is 896249423Sdim /// part of the return value of this addressing mode matching stuff. 897249423Sdim ExtAddrMode &AddrMode; 898249423Sdim 899249423Sdim /// IgnoreProfitability - This is set to true when we should not do 900249423Sdim /// profitability checks. When true, IsProfitableToFoldIntoAddressingMode 901249423Sdim /// always returns true. 902249423Sdim bool IgnoreProfitability; 903249423Sdim 904249423Sdim AddressingModeMatcher(SmallVectorImpl<Instruction*> &AMI, 905249423Sdim const TargetLowering &T, Type *AT, 906249423Sdim Instruction *MI, ExtAddrMode &AM) 907249423Sdim : AddrModeInsts(AMI), TLI(T), AccessTy(AT), MemoryInst(MI), AddrMode(AM) { 908249423Sdim IgnoreProfitability = false; 909249423Sdim } 910249423Sdimpublic: 911249423Sdim 912249423Sdim /// Match - Find the maximal addressing mode that a load/store of V can fold, 913249423Sdim /// give an access type of AccessTy. This returns a list of involved 914249423Sdim /// instructions in AddrModeInsts. 915249423Sdim static ExtAddrMode Match(Value *V, Type *AccessTy, 916249423Sdim Instruction *MemoryInst, 917249423Sdim SmallVectorImpl<Instruction*> &AddrModeInsts, 918249423Sdim const TargetLowering &TLI) { 919249423Sdim ExtAddrMode Result; 920249423Sdim 921249423Sdim bool Success = 922249423Sdim AddressingModeMatcher(AddrModeInsts, TLI, AccessTy, 923249423Sdim MemoryInst, Result).MatchAddr(V, 0); 924249423Sdim (void)Success; assert(Success && "Couldn't select *anything*?"); 925249423Sdim return Result; 926249423Sdim } 927249423Sdimprivate: 928249423Sdim bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth); 929249423Sdim bool MatchAddr(Value *V, unsigned Depth); 930249423Sdim bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth); 931249423Sdim bool IsProfitableToFoldIntoAddressingMode(Instruction *I, 932249423Sdim ExtAddrMode &AMBefore, 933249423Sdim ExtAddrMode &AMAfter); 934249423Sdim bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2); 935249423Sdim}; 936249423Sdim 937249423Sdim/// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode. 938249423Sdim/// Return true and update AddrMode if this addr mode is legal for the target, 939249423Sdim/// false if not. 940249423Sdimbool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale, 941249423Sdim unsigned Depth) { 942249423Sdim // If Scale is 1, then this is the same as adding ScaleReg to the addressing 943249423Sdim // mode. Just process that directly. 944249423Sdim if (Scale == 1) 945249423Sdim return MatchAddr(ScaleReg, Depth); 946249423Sdim 947249423Sdim // If the scale is 0, it takes nothing to add this. 948249423Sdim if (Scale == 0) 949249423Sdim return true; 950249423Sdim 951249423Sdim // If we already have a scale of this value, we can add to it, otherwise, we 952249423Sdim // need an available scale field. 953249423Sdim if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg) 954249423Sdim return false; 955249423Sdim 956249423Sdim ExtAddrMode TestAddrMode = AddrMode; 957249423Sdim 958249423Sdim // Add scale to turn X*4+X*3 -> X*7. This could also do things like 959249423Sdim // [A+B + A*7] -> [B+A*8]. 960249423Sdim TestAddrMode.Scale += Scale; 961249423Sdim TestAddrMode.ScaledReg = ScaleReg; 962249423Sdim 963249423Sdim // If the new address isn't legal, bail out. 964249423Sdim if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) 965249423Sdim return false; 966249423Sdim 967249423Sdim // It was legal, so commit it. 968249423Sdim AddrMode = TestAddrMode; 969249423Sdim 970249423Sdim // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now 971249423Sdim // to see if ScaleReg is actually X+C. If so, we can turn this into adding 972249423Sdim // X*Scale + C*Scale to addr mode. 973249423Sdim ConstantInt *CI = 0; Value *AddLHS = 0; 974249423Sdim if (isa<Instruction>(ScaleReg) && // not a constant expr. 975249423Sdim match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) { 976249423Sdim TestAddrMode.ScaledReg = AddLHS; 977249423Sdim TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale; 978249423Sdim 979249423Sdim // If this addressing mode is legal, commit it and remember that we folded 980249423Sdim // this instruction. 981249423Sdim if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) { 982249423Sdim AddrModeInsts.push_back(cast<Instruction>(ScaleReg)); 983249423Sdim AddrMode = TestAddrMode; 984249423Sdim return true; 985249423Sdim } 986249423Sdim } 987249423Sdim 988249423Sdim // Otherwise, not (x+c)*scale, just return what we have. 989249423Sdim return true; 990249423Sdim} 991249423Sdim 992249423Sdim/// MightBeFoldableInst - This is a little filter, which returns true if an 993249423Sdim/// addressing computation involving I might be folded into a load/store 994249423Sdim/// accessing it. This doesn't need to be perfect, but needs to accept at least 995249423Sdim/// the set of instructions that MatchOperationAddr can. 996249423Sdimstatic bool MightBeFoldableInst(Instruction *I) { 997249423Sdim switch (I->getOpcode()) { 998249423Sdim case Instruction::BitCast: 999249423Sdim // Don't touch identity bitcasts. 1000249423Sdim if (I->getType() == I->getOperand(0)->getType()) 1001249423Sdim return false; 1002249423Sdim return I->getType()->isPointerTy() || I->getType()->isIntegerTy(); 1003249423Sdim case Instruction::PtrToInt: 1004249423Sdim // PtrToInt is always a noop, as we know that the int type is pointer sized. 1005249423Sdim return true; 1006249423Sdim case Instruction::IntToPtr: 1007249423Sdim // We know the input is intptr_t, so this is foldable. 1008249423Sdim return true; 1009249423Sdim case Instruction::Add: 1010249423Sdim return true; 1011249423Sdim case Instruction::Mul: 1012249423Sdim case Instruction::Shl: 1013249423Sdim // Can only handle X*C and X << C. 1014249423Sdim return isa<ConstantInt>(I->getOperand(1)); 1015249423Sdim case Instruction::GetElementPtr: 1016249423Sdim return true; 1017249423Sdim default: 1018249423Sdim return false; 1019249423Sdim } 1020249423Sdim} 1021249423Sdim 1022249423Sdim/// MatchOperationAddr - Given an instruction or constant expr, see if we can 1023249423Sdim/// fold the operation into the addressing mode. If so, update the addressing 1024249423Sdim/// mode and return true, otherwise return false without modifying AddrMode. 1025249423Sdimbool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, 1026249423Sdim unsigned Depth) { 1027249423Sdim // Avoid exponential behavior on extremely deep expression trees. 1028249423Sdim if (Depth >= 5) return false; 1029249423Sdim 1030249423Sdim switch (Opcode) { 1031249423Sdim case Instruction::PtrToInt: 1032249423Sdim // PtrToInt is always a noop, as we know that the int type is pointer sized. 1033249423Sdim return MatchAddr(AddrInst->getOperand(0), Depth); 1034249423Sdim case Instruction::IntToPtr: 1035249423Sdim // This inttoptr is a no-op if the integer type is pointer sized. 1036249423Sdim if (TLI.getValueType(AddrInst->getOperand(0)->getType()) == 1037249423Sdim TLI.getPointerTy()) 1038249423Sdim return MatchAddr(AddrInst->getOperand(0), Depth); 1039249423Sdim return false; 1040249423Sdim case Instruction::BitCast: 1041249423Sdim // BitCast is always a noop, and we can handle it as long as it is 1042249423Sdim // int->int or pointer->pointer (we don't want int<->fp or something). 1043249423Sdim if ((AddrInst->getOperand(0)->getType()->isPointerTy() || 1044249423Sdim AddrInst->getOperand(0)->getType()->isIntegerTy()) && 1045249423Sdim // Don't touch identity bitcasts. These were probably put here by LSR, 1046249423Sdim // and we don't want to mess around with them. Assume it knows what it 1047249423Sdim // is doing. 1048249423Sdim AddrInst->getOperand(0)->getType() != AddrInst->getType()) 1049249423Sdim return MatchAddr(AddrInst->getOperand(0), Depth); 1050249423Sdim return false; 1051249423Sdim case Instruction::Add: { 1052249423Sdim // Check to see if we can merge in the RHS then the LHS. If so, we win. 1053249423Sdim ExtAddrMode BackupAddrMode = AddrMode; 1054249423Sdim unsigned OldSize = AddrModeInsts.size(); 1055249423Sdim if (MatchAddr(AddrInst->getOperand(1), Depth+1) && 1056249423Sdim MatchAddr(AddrInst->getOperand(0), Depth+1)) 1057249423Sdim return true; 1058249423Sdim 1059249423Sdim // Restore the old addr mode info. 1060249423Sdim AddrMode = BackupAddrMode; 1061249423Sdim AddrModeInsts.resize(OldSize); 1062249423Sdim 1063249423Sdim // Otherwise this was over-aggressive. Try merging in the LHS then the RHS. 1064249423Sdim if (MatchAddr(AddrInst->getOperand(0), Depth+1) && 1065249423Sdim MatchAddr(AddrInst->getOperand(1), Depth+1)) 1066249423Sdim return true; 1067249423Sdim 1068249423Sdim // Otherwise we definitely can't merge the ADD in. 1069249423Sdim AddrMode = BackupAddrMode; 1070249423Sdim AddrModeInsts.resize(OldSize); 1071249423Sdim break; 1072249423Sdim } 1073249423Sdim //case Instruction::Or: 1074249423Sdim // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD. 1075249423Sdim //break; 1076249423Sdim case Instruction::Mul: 1077249423Sdim case Instruction::Shl: { 1078249423Sdim // Can only handle X*C and X << C. 1079249423Sdim ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1)); 1080249423Sdim if (!RHS) return false; 1081249423Sdim int64_t Scale = RHS->getSExtValue(); 1082249423Sdim if (Opcode == Instruction::Shl) 1083249423Sdim Scale = 1LL << Scale; 1084249423Sdim 1085249423Sdim return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth); 1086249423Sdim } 1087249423Sdim case Instruction::GetElementPtr: { 1088249423Sdim // Scan the GEP. We check it if it contains constant offsets and at most 1089249423Sdim // one variable offset. 1090249423Sdim int VariableOperand = -1; 1091249423Sdim unsigned VariableScale = 0; 1092249423Sdim 1093249423Sdim int64_t ConstantOffset = 0; 1094249423Sdim const DataLayout *TD = TLI.getDataLayout(); 1095249423Sdim gep_type_iterator GTI = gep_type_begin(AddrInst); 1096249423Sdim for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) { 1097249423Sdim if (StructType *STy = dyn_cast<StructType>(*GTI)) { 1098249423Sdim const StructLayout *SL = TD->getStructLayout(STy); 1099249423Sdim unsigned Idx = 1100249423Sdim cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue(); 1101249423Sdim ConstantOffset += SL->getElementOffset(Idx); 1102249423Sdim } else { 1103249423Sdim uint64_t TypeSize = TD->getTypeAllocSize(GTI.getIndexedType()); 1104249423Sdim if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) { 1105249423Sdim ConstantOffset += CI->getSExtValue()*TypeSize; 1106249423Sdim } else if (TypeSize) { // Scales of zero don't do anything. 1107249423Sdim // We only allow one variable index at the moment. 1108249423Sdim if (VariableOperand != -1) 1109249423Sdim return false; 1110249423Sdim 1111249423Sdim // Remember the variable index. 1112249423Sdim VariableOperand = i; 1113249423Sdim VariableScale = TypeSize; 1114249423Sdim } 1115249423Sdim } 1116249423Sdim } 1117249423Sdim 1118249423Sdim // A common case is for the GEP to only do a constant offset. In this case, 1119249423Sdim // just add it to the disp field and check validity. 1120249423Sdim if (VariableOperand == -1) { 1121249423Sdim AddrMode.BaseOffs += ConstantOffset; 1122249423Sdim if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){ 1123249423Sdim // Check to see if we can fold the base pointer in too. 1124249423Sdim if (MatchAddr(AddrInst->getOperand(0), Depth+1)) 1125249423Sdim return true; 1126249423Sdim } 1127249423Sdim AddrMode.BaseOffs -= ConstantOffset; 1128249423Sdim return false; 1129249423Sdim } 1130249423Sdim 1131249423Sdim // Save the valid addressing mode in case we can't match. 1132249423Sdim ExtAddrMode BackupAddrMode = AddrMode; 1133249423Sdim unsigned OldSize = AddrModeInsts.size(); 1134249423Sdim 1135249423Sdim // See if the scale and offset amount is valid for this target. 1136249423Sdim AddrMode.BaseOffs += ConstantOffset; 1137249423Sdim 1138249423Sdim // Match the base operand of the GEP. 1139249423Sdim if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) { 1140249423Sdim // If it couldn't be matched, just stuff the value in a register. 1141249423Sdim if (AddrMode.HasBaseReg) { 1142249423Sdim AddrMode = BackupAddrMode; 1143249423Sdim AddrModeInsts.resize(OldSize); 1144249423Sdim return false; 1145249423Sdim } 1146249423Sdim AddrMode.HasBaseReg = true; 1147249423Sdim AddrMode.BaseReg = AddrInst->getOperand(0); 1148249423Sdim } 1149249423Sdim 1150249423Sdim // Match the remaining variable portion of the GEP. 1151249423Sdim if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale, 1152249423Sdim Depth)) { 1153249423Sdim // If it couldn't be matched, try stuffing the base into a register 1154249423Sdim // instead of matching it, and retrying the match of the scale. 1155249423Sdim AddrMode = BackupAddrMode; 1156249423Sdim AddrModeInsts.resize(OldSize); 1157249423Sdim if (AddrMode.HasBaseReg) 1158249423Sdim return false; 1159249423Sdim AddrMode.HasBaseReg = true; 1160249423Sdim AddrMode.BaseReg = AddrInst->getOperand(0); 1161249423Sdim AddrMode.BaseOffs += ConstantOffset; 1162249423Sdim if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), 1163249423Sdim VariableScale, Depth)) { 1164249423Sdim // If even that didn't work, bail. 1165249423Sdim AddrMode = BackupAddrMode; 1166249423Sdim AddrModeInsts.resize(OldSize); 1167249423Sdim return false; 1168249423Sdim } 1169249423Sdim } 1170249423Sdim 1171249423Sdim return true; 1172249423Sdim } 1173249423Sdim } 1174249423Sdim return false; 1175249423Sdim} 1176249423Sdim 1177249423Sdim/// MatchAddr - If we can, try to add the value of 'Addr' into the current 1178249423Sdim/// addressing mode. If Addr can't be added to AddrMode this returns false and 1179249423Sdim/// leaves AddrMode unmodified. This assumes that Addr is either a pointer type 1180249423Sdim/// or intptr_t for the target. 1181249423Sdim/// 1182249423Sdimbool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { 1183249423Sdim if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) { 1184249423Sdim // Fold in immediates if legal for the target. 1185249423Sdim AddrMode.BaseOffs += CI->getSExtValue(); 1186249423Sdim if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) 1187249423Sdim return true; 1188249423Sdim AddrMode.BaseOffs -= CI->getSExtValue(); 1189249423Sdim } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) { 1190249423Sdim // If this is a global variable, try to fold it into the addressing mode. 1191249423Sdim if (AddrMode.BaseGV == 0) { 1192249423Sdim AddrMode.BaseGV = GV; 1193249423Sdim if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) 1194249423Sdim return true; 1195249423Sdim AddrMode.BaseGV = 0; 1196249423Sdim } 1197249423Sdim } else if (Instruction *I = dyn_cast<Instruction>(Addr)) { 1198249423Sdim ExtAddrMode BackupAddrMode = AddrMode; 1199249423Sdim unsigned OldSize = AddrModeInsts.size(); 1200249423Sdim 1201249423Sdim // Check to see if it is possible to fold this operation. 1202249423Sdim if (MatchOperationAddr(I, I->getOpcode(), Depth)) { 1203249423Sdim // Okay, it's possible to fold this. Check to see if it is actually 1204249423Sdim // *profitable* to do so. We use a simple cost model to avoid increasing 1205249423Sdim // register pressure too much. 1206249423Sdim if (I->hasOneUse() || 1207249423Sdim IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) { 1208249423Sdim AddrModeInsts.push_back(I); 1209249423Sdim return true; 1210249423Sdim } 1211249423Sdim 1212249423Sdim // It isn't profitable to do this, roll back. 1213249423Sdim //cerr << "NOT FOLDING: " << *I; 1214249423Sdim AddrMode = BackupAddrMode; 1215249423Sdim AddrModeInsts.resize(OldSize); 1216249423Sdim } 1217249423Sdim } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) { 1218249423Sdim if (MatchOperationAddr(CE, CE->getOpcode(), Depth)) 1219249423Sdim return true; 1220249423Sdim } else if (isa<ConstantPointerNull>(Addr)) { 1221249423Sdim // Null pointer gets folded without affecting the addressing mode. 1222249423Sdim return true; 1223249423Sdim } 1224249423Sdim 1225249423Sdim // Worse case, the target should support [reg] addressing modes. :) 1226249423Sdim if (!AddrMode.HasBaseReg) { 1227249423Sdim AddrMode.HasBaseReg = true; 1228249423Sdim AddrMode.BaseReg = Addr; 1229249423Sdim // Still check for legality in case the target supports [imm] but not [i+r]. 1230249423Sdim if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) 1231249423Sdim return true; 1232249423Sdim AddrMode.HasBaseReg = false; 1233249423Sdim AddrMode.BaseReg = 0; 1234249423Sdim } 1235249423Sdim 1236249423Sdim // If the base register is already taken, see if we can do [r+r]. 1237249423Sdim if (AddrMode.Scale == 0) { 1238249423Sdim AddrMode.Scale = 1; 1239249423Sdim AddrMode.ScaledReg = Addr; 1240249423Sdim if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) 1241249423Sdim return true; 1242249423Sdim AddrMode.Scale = 0; 1243249423Sdim AddrMode.ScaledReg = 0; 1244249423Sdim } 1245249423Sdim // Couldn't match. 1246249423Sdim return false; 1247249423Sdim} 1248249423Sdim 1249249423Sdim/// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified 1250249423Sdim/// inline asm call are due to memory operands. If so, return true, otherwise 1251249423Sdim/// return false. 1252249423Sdimstatic bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, 1253249423Sdim const TargetLowering &TLI) { 1254249423Sdim TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI)); 1255249423Sdim for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { 1256249423Sdim TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; 1257249423Sdim 1258249423Sdim // Compute the constraint code and ConstraintType to use. 1259249423Sdim TLI.ComputeConstraintToUse(OpInfo, SDValue()); 1260249423Sdim 1261249423Sdim // If this asm operand is our Value*, and if it isn't an indirect memory 1262249423Sdim // operand, we can't fold it! 1263249423Sdim if (OpInfo.CallOperandVal == OpVal && 1264249423Sdim (OpInfo.ConstraintType != TargetLowering::C_Memory || 1265249423Sdim !OpInfo.isIndirect)) 1266249423Sdim return false; 1267249423Sdim } 1268249423Sdim 1269249423Sdim return true; 1270249423Sdim} 1271249423Sdim 1272249423Sdim/// FindAllMemoryUses - Recursively walk all the uses of I until we find a 1273249423Sdim/// memory use. If we find an obviously non-foldable instruction, return true. 1274249423Sdim/// Add the ultimately found memory instructions to MemoryUses. 1275249423Sdimstatic bool FindAllMemoryUses(Instruction *I, 1276249423Sdim SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses, 1277249423Sdim SmallPtrSet<Instruction*, 16> &ConsideredInsts, 1278249423Sdim const TargetLowering &TLI) { 1279249423Sdim // If we already considered this instruction, we're done. 1280249423Sdim if (!ConsideredInsts.insert(I)) 1281249423Sdim return false; 1282249423Sdim 1283249423Sdim // If this is an obviously unfoldable instruction, bail out. 1284249423Sdim if (!MightBeFoldableInst(I)) 1285249423Sdim return true; 1286249423Sdim 1287249423Sdim // Loop over all the uses, recursively processing them. 1288249423Sdim for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); 1289249423Sdim UI != E; ++UI) { 1290249423Sdim User *U = *UI; 1291249423Sdim 1292249423Sdim if (LoadInst *LI = dyn_cast<LoadInst>(U)) { 1293249423Sdim MemoryUses.push_back(std::make_pair(LI, UI.getOperandNo())); 1294249423Sdim continue; 1295249423Sdim } 1296249423Sdim 1297249423Sdim if (StoreInst *SI = dyn_cast<StoreInst>(U)) { 1298249423Sdim unsigned opNo = UI.getOperandNo(); 1299249423Sdim if (opNo == 0) return true; // Storing addr, not into addr. 1300249423Sdim MemoryUses.push_back(std::make_pair(SI, opNo)); 1301249423Sdim continue; 1302249423Sdim } 1303249423Sdim 1304249423Sdim if (CallInst *CI = dyn_cast<CallInst>(U)) { 1305249423Sdim InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue()); 1306249423Sdim if (!IA) return true; 1307249423Sdim 1308249423Sdim // If this is a memory operand, we're cool, otherwise bail out. 1309249423Sdim if (!IsOperandAMemoryOperand(CI, IA, I, TLI)) 1310249423Sdim return true; 1311249423Sdim continue; 1312249423Sdim } 1313249423Sdim 1314249423Sdim if (FindAllMemoryUses(cast<Instruction>(U), MemoryUses, ConsideredInsts, 1315249423Sdim TLI)) 1316249423Sdim return true; 1317249423Sdim } 1318249423Sdim 1319249423Sdim return false; 1320249423Sdim} 1321249423Sdim 1322249423Sdim/// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at 1323249423Sdim/// the use site that we're folding it into. If so, there is no cost to 1324249423Sdim/// include it in the addressing mode. KnownLive1 and KnownLive2 are two values 1325249423Sdim/// that we know are live at the instruction already. 1326249423Sdimbool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1, 1327249423Sdim Value *KnownLive2) { 1328249423Sdim // If Val is either of the known-live values, we know it is live! 1329249423Sdim if (Val == 0 || Val == KnownLive1 || Val == KnownLive2) 1330249423Sdim return true; 1331249423Sdim 1332249423Sdim // All values other than instructions and arguments (e.g. constants) are live. 1333249423Sdim if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true; 1334249423Sdim 1335249423Sdim // If Val is a constant sized alloca in the entry block, it is live, this is 1336249423Sdim // true because it is just a reference to the stack/frame pointer, which is 1337249423Sdim // live for the whole function. 1338249423Sdim if (AllocaInst *AI = dyn_cast<AllocaInst>(Val)) 1339249423Sdim if (AI->isStaticAlloca()) 1340249423Sdim return true; 1341249423Sdim 1342249423Sdim // Check to see if this value is already used in the memory instruction's 1343249423Sdim // block. If so, it's already live into the block at the very least, so we 1344249423Sdim // can reasonably fold it. 1345249423Sdim return Val->isUsedInBasicBlock(MemoryInst->getParent()); 1346249423Sdim} 1347249423Sdim 1348249423Sdim/// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing 1349249423Sdim/// mode of the machine to fold the specified instruction into a load or store 1350249423Sdim/// that ultimately uses it. However, the specified instruction has multiple 1351249423Sdim/// uses. Given this, it may actually increase register pressure to fold it 1352249423Sdim/// into the load. For example, consider this code: 1353249423Sdim/// 1354249423Sdim/// X = ... 1355249423Sdim/// Y = X+1 1356249423Sdim/// use(Y) -> nonload/store 1357249423Sdim/// Z = Y+1 1358249423Sdim/// load Z 1359249423Sdim/// 1360249423Sdim/// In this case, Y has multiple uses, and can be folded into the load of Z 1361249423Sdim/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to 1362249423Sdim/// be live at the use(Y) line. If we don't fold Y into load Z, we use one 1363249423Sdim/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the 1364249423Sdim/// number of computations either. 1365249423Sdim/// 1366249423Sdim/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If 1367249423Sdim/// X was live across 'load Z' for other reasons, we actually *would* want to 1368249423Sdim/// fold the addressing mode in the Z case. This would make Y die earlier. 1369249423Sdimbool AddressingModeMatcher:: 1370249423SdimIsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, 1371249423Sdim ExtAddrMode &AMAfter) { 1372249423Sdim if (IgnoreProfitability) return true; 1373249423Sdim 1374249423Sdim // AMBefore is the addressing mode before this instruction was folded into it, 1375249423Sdim // and AMAfter is the addressing mode after the instruction was folded. Get 1376249423Sdim // the set of registers referenced by AMAfter and subtract out those 1377249423Sdim // referenced by AMBefore: this is the set of values which folding in this 1378249423Sdim // address extends the lifetime of. 1379249423Sdim // 1380249423Sdim // Note that there are only two potential values being referenced here, 1381249423Sdim // BaseReg and ScaleReg (global addresses are always available, as are any 1382249423Sdim // folded immediates). 1383249423Sdim Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg; 1384249423Sdim 1385249423Sdim // If the BaseReg or ScaledReg was referenced by the previous addrmode, their 1386249423Sdim // lifetime wasn't extended by adding this instruction. 1387249423Sdim if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg)) 1388249423Sdim BaseReg = 0; 1389249423Sdim if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg)) 1390249423Sdim ScaledReg = 0; 1391249423Sdim 1392249423Sdim // If folding this instruction (and it's subexprs) didn't extend any live 1393249423Sdim // ranges, we're ok with it. 1394249423Sdim if (BaseReg == 0 && ScaledReg == 0) 1395249423Sdim return true; 1396249423Sdim 1397249423Sdim // If all uses of this instruction are ultimately load/store/inlineasm's, 1398249423Sdim // check to see if their addressing modes will include this instruction. If 1399249423Sdim // so, we can fold it into all uses, so it doesn't matter if it has multiple 1400249423Sdim // uses. 1401249423Sdim SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses; 1402249423Sdim SmallPtrSet<Instruction*, 16> ConsideredInsts; 1403249423Sdim if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI)) 1404249423Sdim return false; // Has a non-memory, non-foldable use! 1405249423Sdim 1406249423Sdim // Now that we know that all uses of this instruction are part of a chain of 1407249423Sdim // computation involving only operations that could theoretically be folded 1408249423Sdim // into a memory use, loop over each of these uses and see if they could 1409249423Sdim // *actually* fold the instruction. 1410249423Sdim SmallVector<Instruction*, 32> MatchedAddrModeInsts; 1411249423Sdim for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) { 1412249423Sdim Instruction *User = MemoryUses[i].first; 1413249423Sdim unsigned OpNo = MemoryUses[i].second; 1414249423Sdim 1415249423Sdim // Get the access type of this use. If the use isn't a pointer, we don't 1416249423Sdim // know what it accesses. 1417249423Sdim Value *Address = User->getOperand(OpNo); 1418249423Sdim if (!Address->getType()->isPointerTy()) 1419249423Sdim return false; 1420249423Sdim Type *AddressAccessTy = 1421249423Sdim cast<PointerType>(Address->getType())->getElementType(); 1422249423Sdim 1423249423Sdim // Do a match against the root of this address, ignoring profitability. This 1424249423Sdim // will tell us if the addressing mode for the memory operation will 1425249423Sdim // *actually* cover the shared instruction. 1426249423Sdim ExtAddrMode Result; 1427249423Sdim AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy, 1428249423Sdim MemoryInst, Result); 1429249423Sdim Matcher.IgnoreProfitability = true; 1430249423Sdim bool Success = Matcher.MatchAddr(Address, 0); 1431249423Sdim (void)Success; assert(Success && "Couldn't select *anything*?"); 1432249423Sdim 1433249423Sdim // If the match didn't cover I, then it won't be shared by it. 1434249423Sdim if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(), 1435249423Sdim I) == MatchedAddrModeInsts.end()) 1436249423Sdim return false; 1437249423Sdim 1438249423Sdim MatchedAddrModeInsts.clear(); 1439249423Sdim } 1440249423Sdim 1441249423Sdim return true; 1442249423Sdim} 1443249423Sdim 1444249423Sdim} // end anonymous namespace 1445249423Sdim 1446193323Sed/// IsNonLocalValue - Return true if the specified values are defined in a 1447193323Sed/// different basic block than BB. 1448193323Sedstatic bool IsNonLocalValue(Value *V, BasicBlock *BB) { 1449193323Sed if (Instruction *I = dyn_cast<Instruction>(V)) 1450193323Sed return I->getParent() != BB; 1451193323Sed return false; 1452193323Sed} 1453193323Sed 1454200581Srdivacky/// OptimizeMemoryInst - Load and Store Instructions often have 1455193323Sed/// addressing modes that can do significant amounts of computation. As such, 1456193323Sed/// instruction selection will try to get the load or store to do as much 1457193323Sed/// computation as possible for the program. The problem is that isel can only 1458193323Sed/// see within a single block. As such, we sink as much legal addressing mode 1459193323Sed/// stuff into the block as possible. 1460193323Sed/// 1461193323Sed/// This method is used to optimize both load/store and inline asms with memory 1462193323Sed/// operands. 1463193323Sedbool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, 1464226633Sdim Type *AccessTy) { 1465218893Sdim Value *Repl = Addr; 1466239462Sdim 1467239462Sdim // Try to collapse single-value PHI nodes. This is necessary to undo 1468218893Sdim // unprofitable PRE transformations. 1469218893Sdim SmallVector<Value*, 8> worklist; 1470218893Sdim SmallPtrSet<Value*, 16> Visited; 1471218893Sdim worklist.push_back(Addr); 1472239462Sdim 1473218893Sdim // Use a worklist to iteratively look through PHI nodes, and ensure that 1474218893Sdim // the addressing mode obtained from the non-PHI roots of the graph 1475218893Sdim // are equivalent. 1476218893Sdim Value *Consensus = 0; 1477221345Sdim unsigned NumUsesConsensus = 0; 1478221345Sdim bool IsNumUsesConsensusValid = false; 1479193323Sed SmallVector<Instruction*, 16> AddrModeInsts; 1480218893Sdim ExtAddrMode AddrMode; 1481218893Sdim while (!worklist.empty()) { 1482218893Sdim Value *V = worklist.back(); 1483218893Sdim worklist.pop_back(); 1484239462Sdim 1485218893Sdim // Break use-def graph loops. 1486226633Sdim if (!Visited.insert(V)) { 1487218893Sdim Consensus = 0; 1488218893Sdim break; 1489218893Sdim } 1490239462Sdim 1491218893Sdim // For a PHI node, push all of its incoming values. 1492218893Sdim if (PHINode *P = dyn_cast<PHINode>(V)) { 1493218893Sdim for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) 1494218893Sdim worklist.push_back(P->getIncomingValue(i)); 1495218893Sdim continue; 1496218893Sdim } 1497239462Sdim 1498218893Sdim // For non-PHIs, determine the addressing mode being computed. 1499218893Sdim SmallVector<Instruction*, 16> NewAddrModeInsts; 1500218893Sdim ExtAddrMode NewAddrMode = 1501226633Sdim AddressingModeMatcher::Match(V, AccessTy, MemoryInst, 1502218893Sdim NewAddrModeInsts, *TLI); 1503221345Sdim 1504221345Sdim // This check is broken into two cases with very similar code to avoid using 1505221345Sdim // getNumUses() as much as possible. Some values have a lot of uses, so 1506221345Sdim // calling getNumUses() unconditionally caused a significant compile-time 1507221345Sdim // regression. 1508221345Sdim if (!Consensus) { 1509221345Sdim Consensus = V; 1510221345Sdim AddrMode = NewAddrMode; 1511221345Sdim AddrModeInsts = NewAddrModeInsts; 1512221345Sdim continue; 1513221345Sdim } else if (NewAddrMode == AddrMode) { 1514221345Sdim if (!IsNumUsesConsensusValid) { 1515221345Sdim NumUsesConsensus = Consensus->getNumUses(); 1516221345Sdim IsNumUsesConsensusValid = true; 1517221345Sdim } 1518221345Sdim 1519221345Sdim // Ensure that the obtained addressing mode is equivalent to that obtained 1520221345Sdim // for all other roots of the PHI traversal. Also, when choosing one 1521221345Sdim // such root as representative, select the one with the most uses in order 1522221345Sdim // to keep the cost modeling heuristics in AddressingModeMatcher 1523221345Sdim // applicable. 1524221345Sdim unsigned NumUses = V->getNumUses(); 1525221345Sdim if (NumUses > NumUsesConsensus) { 1526218893Sdim Consensus = V; 1527221345Sdim NumUsesConsensus = NumUses; 1528218893Sdim AddrModeInsts = NewAddrModeInsts; 1529218893Sdim } 1530218893Sdim continue; 1531218893Sdim } 1532239462Sdim 1533218893Sdim Consensus = 0; 1534218893Sdim break; 1535218893Sdim } 1536239462Sdim 1537218893Sdim // If the addressing mode couldn't be determined, or if multiple different 1538218893Sdim // ones were determined, bail out now. 1539218893Sdim if (!Consensus) return false; 1540239462Sdim 1541193323Sed // Check to see if any of the instructions supersumed by this addr mode are 1542193323Sed // non-local to I's BB. 1543193323Sed bool AnyNonLocal = false; 1544193323Sed for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) { 1545193323Sed if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) { 1546193323Sed AnyNonLocal = true; 1547193323Sed break; 1548193323Sed } 1549193323Sed } 1550193323Sed 1551193323Sed // If all the instructions matched are already in this BB, don't do anything. 1552193323Sed if (!AnyNonLocal) { 1553202375Srdivacky DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n"); 1554193323Sed return false; 1555193323Sed } 1556193323Sed 1557193323Sed // Insert this computation right after this user. Since our caller is 1558193323Sed // scanning from the top of the BB to the bottom, reuse of the expr are 1559193323Sed // guaranteed to happen later. 1560226633Sdim IRBuilder<> Builder(MemoryInst); 1561193323Sed 1562193323Sed // Now that we determined the addressing expression we want to use and know 1563193323Sed // that we have to sink it into this block. Check to see if we have already 1564193323Sed // done this for some other load/store instr in this block. If so, reuse the 1565193323Sed // computation. 1566193323Sed Value *&SunkAddr = SunkAddrs[Addr]; 1567193323Sed if (SunkAddr) { 1568202375Srdivacky DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " 1569198090Srdivacky << *MemoryInst); 1570193323Sed if (SunkAddr->getType() != Addr->getType()) 1571226633Sdim SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType()); 1572193323Sed } else { 1573202375Srdivacky DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " 1574198090Srdivacky << *MemoryInst); 1575226633Sdim Type *IntPtrTy = 1576243830Sdim TLI->getDataLayout()->getIntPtrType(AccessTy->getContext()); 1577193323Sed 1578193323Sed Value *Result = 0; 1579202878Srdivacky 1580202878Srdivacky // Start with the base register. Do this first so that subsequent address 1581202878Srdivacky // matching finds it last, which will prevent it from trying to match it 1582202878Srdivacky // as the scaled value in case it happens to be a mul. That would be 1583202878Srdivacky // problematic if we've sunk a different mul for the scale, because then 1584202878Srdivacky // we'd end up sinking both muls. 1585202878Srdivacky if (AddrMode.BaseReg) { 1586202878Srdivacky Value *V = AddrMode.BaseReg; 1587204642Srdivacky if (V->getType()->isPointerTy()) 1588226633Sdim V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr"); 1589202878Srdivacky if (V->getType() != IntPtrTy) 1590226633Sdim V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr"); 1591202878Srdivacky Result = V; 1592202878Srdivacky } 1593202878Srdivacky 1594202878Srdivacky // Add the scale value. 1595193323Sed if (AddrMode.Scale) { 1596193323Sed Value *V = AddrMode.ScaledReg; 1597193323Sed if (V->getType() == IntPtrTy) { 1598193323Sed // done. 1599204642Srdivacky } else if (V->getType()->isPointerTy()) { 1600226633Sdim V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr"); 1601193323Sed } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() < 1602193323Sed cast<IntegerType>(V->getType())->getBitWidth()) { 1603226633Sdim V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); 1604193323Sed } else { 1605226633Sdim V = Builder.CreateSExt(V, IntPtrTy, "sunkaddr"); 1606193323Sed } 1607193323Sed if (AddrMode.Scale != 1) 1608226633Sdim V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale), 1609226633Sdim "sunkaddr"); 1610193323Sed if (Result) 1611226633Sdim Result = Builder.CreateAdd(Result, V, "sunkaddr"); 1612193323Sed else 1613193323Sed Result = V; 1614193323Sed } 1615193323Sed 1616193323Sed // Add in the BaseGV if present. 1617193323Sed if (AddrMode.BaseGV) { 1618226633Sdim Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr"); 1619193323Sed if (Result) 1620226633Sdim Result = Builder.CreateAdd(Result, V, "sunkaddr"); 1621193323Sed else 1622193323Sed Result = V; 1623193323Sed } 1624193323Sed 1625193323Sed // Add in the Base Offset if present. 1626193323Sed if (AddrMode.BaseOffs) { 1627198090Srdivacky Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); 1628193323Sed if (Result) 1629226633Sdim Result = Builder.CreateAdd(Result, V, "sunkaddr"); 1630193323Sed else 1631193323Sed Result = V; 1632193323Sed } 1633193323Sed 1634193323Sed if (Result == 0) 1635198090Srdivacky SunkAddr = Constant::getNullValue(Addr->getType()); 1636193323Sed else 1637226633Sdim SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr"); 1638193323Sed } 1639193323Sed 1640218893Sdim MemoryInst->replaceUsesOfWith(Repl, SunkAddr); 1641193323Sed 1642221345Sdim // If we have no uses, recursively delete the value and all dead instructions 1643221345Sdim // using it. 1644218893Sdim if (Repl->use_empty()) { 1645221345Sdim // This can cause recursive deletion, which can invalidate our iterator. 1646221345Sdim // Use a WeakVH to hold onto it in case this happens. 1647221345Sdim WeakVH IterHandle(CurInstIterator); 1648221345Sdim BasicBlock *BB = CurInstIterator->getParent(); 1649239462Sdim 1650243830Sdim RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo); 1651221345Sdim 1652221345Sdim if (IterHandle != CurInstIterator) { 1653221345Sdim // If the iterator instruction was recursively deleted, start over at the 1654221345Sdim // start of the block. 1655221345Sdim CurInstIterator = BB->begin(); 1656221345Sdim SunkAddrs.clear(); 1657239462Sdim } 1658206083Srdivacky } 1659218893Sdim ++NumMemoryInsts; 1660193323Sed return true; 1661193323Sed} 1662193323Sed 1663193323Sed/// OptimizeInlineAsmInst - If there are any memory operands, use 1664193323Sed/// OptimizeMemoryInst to sink their address computing into the block when 1665193323Sed/// possible / profitable. 1666218893Sdimbool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) { 1667193323Sed bool MadeChange = false; 1668193323Sed 1669239462Sdim TargetLowering::AsmOperandInfoVector 1670218893Sdim TargetConstraints = TLI->ParseConstraints(CS); 1671218893Sdim unsigned ArgNo = 0; 1672218893Sdim for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { 1673218893Sdim TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; 1674239462Sdim 1675193323Sed // Compute the constraint code and ConstraintType to use. 1676210299Sed TLI->ComputeConstraintToUse(OpInfo, SDValue()); 1677193323Sed 1678193323Sed if (OpInfo.ConstraintType == TargetLowering::C_Memory && 1679193323Sed OpInfo.isIndirect) { 1680218893Sdim Value *OpVal = CS->getArgOperand(ArgNo++); 1681218893Sdim MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType()); 1682218893Sdim } else if (OpInfo.Type == InlineAsm::isInput) 1683218893Sdim ArgNo++; 1684193323Sed } 1685193323Sed 1686193323Sed return MadeChange; 1687193323Sed} 1688193323Sed 1689198396Srdivacky/// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same 1690198396Srdivacky/// basic block as the load, unless conditions are unfavorable. This allows 1691198396Srdivacky/// SelectionDAG to fold the extend into the load. 1692198396Srdivacky/// 1693198396Srdivackybool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) { 1694198396Srdivacky // Look for a load being extended. 1695198396Srdivacky LoadInst *LI = dyn_cast<LoadInst>(I->getOperand(0)); 1696198396Srdivacky if (!LI) return false; 1697198396Srdivacky 1698198396Srdivacky // If they're already in the same block, there's nothing to do. 1699198396Srdivacky if (LI->getParent() == I->getParent()) 1700198396Srdivacky return false; 1701198396Srdivacky 1702198396Srdivacky // If the load has other users and the truncate is not free, this probably 1703198396Srdivacky // isn't worthwhile. 1704198396Srdivacky if (!LI->hasOneUse() && 1705218893Sdim TLI && (TLI->isTypeLegal(TLI->getValueType(LI->getType())) || 1706218893Sdim !TLI->isTypeLegal(TLI->getValueType(I->getType()))) && 1707218893Sdim !TLI->isTruncateFree(I->getType(), LI->getType())) 1708198396Srdivacky return false; 1709198396Srdivacky 1710198396Srdivacky // Check whether the target supports casts folded into loads. 1711198396Srdivacky unsigned LType; 1712198396Srdivacky if (isa<ZExtInst>(I)) 1713198396Srdivacky LType = ISD::ZEXTLOAD; 1714198396Srdivacky else { 1715198396Srdivacky assert(isa<SExtInst>(I) && "Unexpected ext type!"); 1716198396Srdivacky LType = ISD::SEXTLOAD; 1717198396Srdivacky } 1718198396Srdivacky if (TLI && !TLI->isLoadExtLegal(LType, TLI->getValueType(LI->getType()))) 1719198396Srdivacky return false; 1720198396Srdivacky 1721198396Srdivacky // Move the extend into the same block as the load, so that SelectionDAG 1722198396Srdivacky // can fold it. 1723198396Srdivacky I->removeFromParent(); 1724198396Srdivacky I->insertAfter(LI); 1725218893Sdim ++NumExtsMoved; 1726198396Srdivacky return true; 1727198396Srdivacky} 1728198396Srdivacky 1729193323Sedbool CodeGenPrepare::OptimizeExtUses(Instruction *I) { 1730193323Sed BasicBlock *DefBB = I->getParent(); 1731193323Sed 1732218893Sdim // If the result of a {s|z}ext and its source are both live out, rewrite all 1733193323Sed // other uses of the source with result of extension. 1734193323Sed Value *Src = I->getOperand(0); 1735193323Sed if (Src->hasOneUse()) 1736193323Sed return false; 1737193323Sed 1738193323Sed // Only do this xform if truncating is free. 1739193323Sed if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType())) 1740193323Sed return false; 1741193323Sed 1742193323Sed // Only safe to perform the optimization if the source is also defined in 1743193323Sed // this block. 1744193323Sed if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent()) 1745193323Sed return false; 1746193323Sed 1747193323Sed bool DefIsLiveOut = false; 1748193323Sed for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); 1749193323Sed UI != E; ++UI) { 1750193323Sed Instruction *User = cast<Instruction>(*UI); 1751193323Sed 1752193323Sed // Figure out which BB this ext is used in. 1753193323Sed BasicBlock *UserBB = User->getParent(); 1754193323Sed if (UserBB == DefBB) continue; 1755193323Sed DefIsLiveOut = true; 1756193323Sed break; 1757193323Sed } 1758193323Sed if (!DefIsLiveOut) 1759193323Sed return false; 1760193323Sed 1761251662Sdim // Make sure none of the uses are PHI nodes. 1762193323Sed for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end(); 1763193323Sed UI != E; ++UI) { 1764193323Sed Instruction *User = cast<Instruction>(*UI); 1765193323Sed BasicBlock *UserBB = User->getParent(); 1766193323Sed if (UserBB == DefBB) continue; 1767193323Sed // Be conservative. We don't want this xform to end up introducing 1768193323Sed // reloads just before load / store instructions. 1769193323Sed if (isa<PHINode>(User) || isa<LoadInst>(User) || isa<StoreInst>(User)) 1770193323Sed return false; 1771193323Sed } 1772193323Sed 1773193323Sed // InsertedTruncs - Only insert one trunc in each block once. 1774193323Sed DenseMap<BasicBlock*, Instruction*> InsertedTruncs; 1775193323Sed 1776193323Sed bool MadeChange = false; 1777193323Sed for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end(); 1778193323Sed UI != E; ++UI) { 1779193323Sed Use &TheUse = UI.getUse(); 1780193323Sed Instruction *User = cast<Instruction>(*UI); 1781193323Sed 1782193323Sed // Figure out which BB this ext is used in. 1783193323Sed BasicBlock *UserBB = User->getParent(); 1784193323Sed if (UserBB == DefBB) continue; 1785193323Sed 1786193323Sed // Both src and def are live in this block. Rewrite the use. 1787193323Sed Instruction *&InsertedTrunc = InsertedTruncs[UserBB]; 1788193323Sed 1789193323Sed if (!InsertedTrunc) { 1790226633Sdim BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 1791193323Sed InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt); 1792193323Sed } 1793193323Sed 1794193323Sed // Replace a use of the {s|z}ext source with a use of the result. 1795193323Sed TheUse = InsertedTrunc; 1796218893Sdim ++NumExtUses; 1797193323Sed MadeChange = true; 1798193323Sed } 1799193323Sed 1800193323Sed return MadeChange; 1801193323Sed} 1802193323Sed 1803239462Sdim/// isFormingBranchFromSelectProfitable - Returns true if a SelectInst should be 1804239462Sdim/// turned into an explicit branch. 1805239462Sdimstatic bool isFormingBranchFromSelectProfitable(SelectInst *SI) { 1806239462Sdim // FIXME: This should use the same heuristics as IfConversion to determine 1807239462Sdim // whether a select is better represented as a branch. This requires that 1808239462Sdim // branch probability metadata is preserved for the select, which is not the 1809239462Sdim // case currently. 1810239462Sdim 1811239462Sdim CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition()); 1812239462Sdim 1813239462Sdim // If the branch is predicted right, an out of order CPU can avoid blocking on 1814239462Sdim // the compare. Emit cmovs on compares with a memory operand as branches to 1815239462Sdim // avoid stalls on the load from memory. If the compare has more than one use 1816239462Sdim // there's probably another cmov or setcc around so it's not worth emitting a 1817239462Sdim // branch. 1818239462Sdim if (!Cmp) 1819239462Sdim return false; 1820239462Sdim 1821239462Sdim Value *CmpOp0 = Cmp->getOperand(0); 1822239462Sdim Value *CmpOp1 = Cmp->getOperand(1); 1823239462Sdim 1824239462Sdim // We check that the memory operand has one use to avoid uses of the loaded 1825239462Sdim // value directly after the compare, making branches unprofitable. 1826239462Sdim return Cmp->hasOneUse() && 1827239462Sdim ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) || 1828239462Sdim (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse())); 1829239462Sdim} 1830239462Sdim 1831239462Sdim 1832243830Sdim/// If we have a SelectInst that will likely profit from branch prediction, 1833243830Sdim/// turn it into a branch. 1834239462Sdimbool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) { 1835243830Sdim bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); 1836239462Sdim 1837243830Sdim // Can we convert the 'select' to CF ? 1838243830Sdim if (DisableSelectToBranch || OptSize || !TLI || VectorCond) 1839239462Sdim return false; 1840239462Sdim 1841243830Sdim TargetLowering::SelectSupportKind SelectKind; 1842243830Sdim if (VectorCond) 1843243830Sdim SelectKind = TargetLowering::VectorMaskSelect; 1844243830Sdim else if (SI->getType()->isVectorTy()) 1845243830Sdim SelectKind = TargetLowering::ScalarCondVectorVal; 1846243830Sdim else 1847243830Sdim SelectKind = TargetLowering::ScalarValSelect; 1848243830Sdim 1849243830Sdim // Do we have efficient codegen support for this kind of 'selects' ? 1850243830Sdim if (TLI->isSelectSupported(SelectKind)) { 1851243830Sdim // We have efficient codegen support for the select instruction. 1852243830Sdim // Check if it is profitable to keep this 'select'. 1853243830Sdim if (!TLI->isPredictableSelectExpensive() || 1854243830Sdim !isFormingBranchFromSelectProfitable(SI)) 1855243830Sdim return false; 1856243830Sdim } 1857243830Sdim 1858239462Sdim ModifiedDT = true; 1859239462Sdim 1860239462Sdim // First, we split the block containing the select into 2 blocks. 1861239462Sdim BasicBlock *StartBlock = SI->getParent(); 1862239462Sdim BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI)); 1863239462Sdim BasicBlock *NextBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); 1864239462Sdim 1865239462Sdim // Create a new block serving as the landing pad for the branch. 1866239462Sdim BasicBlock *SmallBlock = BasicBlock::Create(SI->getContext(), "select.mid", 1867239462Sdim NextBlock->getParent(), NextBlock); 1868239462Sdim 1869239462Sdim // Move the unconditional branch from the block with the select in it into our 1870239462Sdim // landing pad block. 1871239462Sdim StartBlock->getTerminator()->eraseFromParent(); 1872239462Sdim BranchInst::Create(NextBlock, SmallBlock); 1873239462Sdim 1874239462Sdim // Insert the real conditional branch based on the original condition. 1875239462Sdim BranchInst::Create(NextBlock, SmallBlock, SI->getCondition(), SI); 1876239462Sdim 1877239462Sdim // The select itself is replaced with a PHI Node. 1878239462Sdim PHINode *PN = PHINode::Create(SI->getType(), 2, "", NextBlock->begin()); 1879239462Sdim PN->takeName(SI); 1880239462Sdim PN->addIncoming(SI->getTrueValue(), StartBlock); 1881239462Sdim PN->addIncoming(SI->getFalseValue(), SmallBlock); 1882239462Sdim SI->replaceAllUsesWith(PN); 1883239462Sdim SI->eraseFromParent(); 1884239462Sdim 1885239462Sdim // Instruct OptimizeBlock to skip to the next block. 1886239462Sdim CurInstIterator = StartBlock->end(); 1887239462Sdim ++NumSelectsExpanded; 1888239462Sdim return true; 1889239462Sdim} 1890239462Sdim 1891218893Sdimbool CodeGenPrepare::OptimizeInst(Instruction *I) { 1892218893Sdim if (PHINode *P = dyn_cast<PHINode>(I)) { 1893218893Sdim // It is possible for very late stage optimizations (such as SimplifyCFG) 1894218893Sdim // to introduce PHI nodes too late to be cleaned up. If we detect such a 1895218893Sdim // trivial PHI, go ahead and zap it here. 1896218893Sdim if (Value *V = SimplifyInstruction(P)) { 1897218893Sdim P->replaceAllUsesWith(V); 1898218893Sdim P->eraseFromParent(); 1899218893Sdim ++NumPHIsElim; 1900218893Sdim return true; 1901218893Sdim } 1902218893Sdim return false; 1903218893Sdim } 1904239462Sdim 1905218893Sdim if (CastInst *CI = dyn_cast<CastInst>(I)) { 1906218893Sdim // If the source of the cast is a constant, then this should have 1907218893Sdim // already been constant folded. The only reason NOT to constant fold 1908218893Sdim // it is if something (e.g. LSR) was careful to place the constant 1909218893Sdim // evaluation in a block other than then one that uses it (e.g. to hoist 1910218893Sdim // the address of globals out of a loop). If this is the case, we don't 1911218893Sdim // want to forward-subst the cast. 1912218893Sdim if (isa<Constant>(CI->getOperand(0))) 1913218893Sdim return false; 1914218893Sdim 1915218893Sdim if (TLI && OptimizeNoopCopyExpression(CI, *TLI)) 1916218893Sdim return true; 1917218893Sdim 1918218893Sdim if (isa<ZExtInst>(I) || isa<SExtInst>(I)) { 1919218893Sdim bool MadeChange = MoveExtToFormExtLoad(I); 1920218893Sdim return MadeChange | OptimizeExtUses(I); 1921218893Sdim } 1922218893Sdim return false; 1923218893Sdim } 1924239462Sdim 1925218893Sdim if (CmpInst *CI = dyn_cast<CmpInst>(I)) 1926218893Sdim return OptimizeCmpExpression(CI); 1927239462Sdim 1928218893Sdim if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 1929218893Sdim if (TLI) 1930218893Sdim return OptimizeMemoryInst(I, I->getOperand(0), LI->getType()); 1931218893Sdim return false; 1932218893Sdim } 1933239462Sdim 1934218893Sdim if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 1935218893Sdim if (TLI) 1936218893Sdim return OptimizeMemoryInst(I, SI->getOperand(1), 1937218893Sdim SI->getOperand(0)->getType()); 1938218893Sdim return false; 1939218893Sdim } 1940239462Sdim 1941218893Sdim if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { 1942218893Sdim if (GEPI->hasAllZeroIndices()) { 1943218893Sdim /// The GEP operand must be a pointer, so must its result -> BitCast 1944218893Sdim Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(), 1945218893Sdim GEPI->getName(), GEPI); 1946218893Sdim GEPI->replaceAllUsesWith(NC); 1947218893Sdim GEPI->eraseFromParent(); 1948218893Sdim ++NumGEPsElim; 1949218893Sdim OptimizeInst(NC); 1950218893Sdim return true; 1951218893Sdim } 1952218893Sdim return false; 1953218893Sdim } 1954239462Sdim 1955218893Sdim if (CallInst *CI = dyn_cast<CallInst>(I)) 1956218893Sdim return OptimizeCallInst(CI); 1957218893Sdim 1958239462Sdim if (SelectInst *SI = dyn_cast<SelectInst>(I)) 1959239462Sdim return OptimizeSelectInst(SI); 1960239462Sdim 1961218893Sdim return false; 1962218893Sdim} 1963218893Sdim 1964193323Sed// In this pass we look for GEP and cast instructions that are used 1965193323Sed// across basic blocks and rewrite them to improve basic-block-at-a-time 1966193323Sed// selection. 1967193323Sedbool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { 1968221345Sdim SunkAddrs.clear(); 1969193323Sed bool MadeChange = false; 1970193323Sed 1971218893Sdim CurInstIterator = BB.begin(); 1972243830Sdim while (CurInstIterator != BB.end()) 1973218893Sdim MadeChange |= OptimizeInst(CurInstIterator++); 1974193323Sed 1975249423Sdim MadeChange |= DupRetToEnableTailCallOpts(&BB); 1976249423Sdim 1977193323Sed return MadeChange; 1978193323Sed} 1979226633Sdim 1980226633Sdim// llvm.dbg.value is far away from the value then iSel may not be able 1981239462Sdim// handle it properly. iSel will drop llvm.dbg.value if it can not 1982226633Sdim// find a node corresponding to the value. 1983226633Sdimbool CodeGenPrepare::PlaceDbgValues(Function &F) { 1984226633Sdim bool MadeChange = false; 1985226633Sdim for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { 1986226633Sdim Instruction *PrevNonDbgInst = NULL; 1987226633Sdim for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) { 1988226633Sdim Instruction *Insn = BI; ++BI; 1989226633Sdim DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn); 1990226633Sdim if (!DVI) { 1991226633Sdim PrevNonDbgInst = Insn; 1992226633Sdim continue; 1993226633Sdim } 1994226633Sdim 1995226633Sdim Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue()); 1996226633Sdim if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) { 1997226633Sdim DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI); 1998226633Sdim DVI->removeFromParent(); 1999226633Sdim if (isa<PHINode>(VI)) 2000226633Sdim DVI->insertBefore(VI->getParent()->getFirstInsertionPt()); 2001226633Sdim else 2002226633Sdim DVI->insertAfter(VI); 2003226633Sdim MadeChange = true; 2004226633Sdim ++NumDbgValueMoved; 2005226633Sdim } 2006226633Sdim } 2007226633Sdim } 2008226633Sdim return MadeChange; 2009226633Sdim} 2010