1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass munges the code in the input function to better prepare it for 11// SelectionDAG-based code generation. This works around limitations in it's 12// basic-block-at-a-time approach. It should eventually be removed. 13// 14//===----------------------------------------------------------------------===// 15 16#define DEBUG_TYPE "codegenprepare" 17#include "llvm/Transforms/Scalar.h" 18#include "llvm/ADT/DenseMap.h" 19#include "llvm/ADT/SmallSet.h" 20#include "llvm/ADT/Statistic.h" 21#include "llvm/ADT/ValueMap.h" 22#include "llvm/Analysis/DominatorInternals.h" 23#include "llvm/Analysis/Dominators.h" 24#include "llvm/Analysis/InstructionSimplify.h" 25#include "llvm/Assembly/Writer.h" 26#include "llvm/IR/Constants.h" 27#include "llvm/IR/DataLayout.h" 28#include "llvm/IR/DerivedTypes.h" 29#include "llvm/IR/Function.h" 30#include "llvm/IR/IRBuilder.h" 31#include "llvm/IR/InlineAsm.h" 32#include "llvm/IR/Instructions.h" 33#include "llvm/IR/IntrinsicInst.h" 34#include "llvm/Pass.h" 35#include "llvm/Support/CallSite.h" 36#include "llvm/Support/CommandLine.h" 37#include "llvm/Support/Debug.h" 38#include "llvm/Support/GetElementPtrTypeIterator.h" 39#include "llvm/Support/PatternMatch.h" 40#include "llvm/Support/ValueHandle.h" 41#include "llvm/Support/raw_ostream.h" 42#include "llvm/Target/TargetLibraryInfo.h" 43#include "llvm/Target/TargetLowering.h" 44#include "llvm/Transforms/Utils/BasicBlockUtils.h" 45#include "llvm/Transforms/Utils/BuildLibCalls.h" 46#include "llvm/Transforms/Utils/BypassSlowDivision.h" 47#include "llvm/Transforms/Utils/Local.h" 48using namespace llvm; 49using namespace llvm::PatternMatch; 50 51STATISTIC(NumBlocksElim, "Number of blocks eliminated"); 52STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated"); 53STATISTIC(NumGEPsElim, "Number of GEPs converted to casts"); 54STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of " 55 "sunken Cmps"); 56STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses " 57 "of sunken Casts"); 58STATISTIC(NumMemoryInsts, "Number of memory instructions whose address " 59 "computations were sunk"); 60STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); 61STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); 62STATISTIC(NumRetsDup, "Number of return instructions duplicated"); 63STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved"); 64STATISTIC(NumSelectsExpanded, "Number of selects turned into branches"); 65 66static cl::opt<bool> DisableBranchOpts( 67 "disable-cgp-branch-opts", cl::Hidden, cl::init(false), 68 cl::desc("Disable branch optimizations in CodeGenPrepare")); 69 70static cl::opt<bool> DisableSelectToBranch( 71 "disable-cgp-select2branch", cl::Hidden, cl::init(false), 72 cl::desc("Disable select to branch conversion.")); 73 74namespace { 75 class CodeGenPrepare : public FunctionPass { 76 /// TLI - Keep a pointer of a TargetLowering to consult for determining 77 /// transformation profitability. 78 const TargetMachine *TM; 79 const TargetLowering *TLI; 80 const TargetLibraryInfo *TLInfo; 81 DominatorTree *DT; 82 83 /// CurInstIterator - As we scan instructions optimizing them, this is the 84 /// next instruction to optimize. Xforms that can invalidate this should 85 /// update it. 86 BasicBlock::iterator CurInstIterator; 87 88 /// Keeps track of non-local addresses that have been sunk into a block. 89 /// This allows us to avoid inserting duplicate code for blocks with 90 /// multiple load/stores of the same address. 91 ValueMap<Value*, Value*> SunkAddrs; 92 93 /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to 94 /// be updated. 95 bool ModifiedDT; 96 97 /// OptSize - True if optimizing for size. 98 bool OptSize; 99 100 public: 101 static char ID; // Pass identification, replacement for typeid 102 explicit CodeGenPrepare(const TargetMachine *TM = 0) 103 : FunctionPass(ID), TM(TM), TLI(0) { 104 initializeCodeGenPreparePass(*PassRegistry::getPassRegistry()); 105 } 106 bool runOnFunction(Function &F); 107 108 const char *getPassName() const { return "CodeGen Prepare"; } 109 110 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 111 AU.addPreserved<DominatorTree>(); 112 AU.addRequired<TargetLibraryInfo>(); 113 } 114 115 private: 116 bool EliminateFallThrough(Function &F); 117 bool EliminateMostlyEmptyBlocks(Function &F); 118 bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; 119 void EliminateMostlyEmptyBlock(BasicBlock *BB); 120 bool OptimizeBlock(BasicBlock &BB); 121 bool OptimizeInst(Instruction *I); 122 bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy); 123 bool OptimizeInlineAsmInst(CallInst *CS); 124 bool OptimizeCallInst(CallInst *CI); 125 bool MoveExtToFormExtLoad(Instruction *I); 126 bool OptimizeExtUses(Instruction *I); 127 bool OptimizeSelectInst(SelectInst *SI); 128 bool DupRetToEnableTailCallOpts(BasicBlock *BB); 129 bool PlaceDbgValues(Function &F); 130 }; 131} 132 133char CodeGenPrepare::ID = 0; 134INITIALIZE_PASS_BEGIN(CodeGenPrepare, "codegenprepare", 135 "Optimize for code generation", false, false) 136INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) 137INITIALIZE_PASS_END(CodeGenPrepare, "codegenprepare", 138 "Optimize for code generation", false, false) 139 140FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) { 141 return new CodeGenPrepare(TM); 142} 143 144bool CodeGenPrepare::runOnFunction(Function &F) { 145 bool EverMadeChange = false; 146 147 ModifiedDT = false; 148 if (TM) TLI = TM->getTargetLowering(); 149 TLInfo = &getAnalysis<TargetLibraryInfo>(); 150 DT = getAnalysisIfAvailable<DominatorTree>(); 151 OptSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, 152 Attribute::OptimizeForSize); 153 154 /// This optimization identifies DIV instructions that can be 155 /// profitably bypassed and carried out with a shorter, faster divide. 156 if (!OptSize && TLI && TLI->isSlowDivBypassed()) { 157 const DenseMap<unsigned int, unsigned int> &BypassWidths = 158 TLI->getBypassSlowDivWidths(); 159 for (Function::iterator I = F.begin(); I != F.end(); I++) 160 EverMadeChange |= bypassSlowDivision(F, I, BypassWidths); 161 } 162 163 // Eliminate blocks that contain only PHI nodes and an 164 // unconditional branch. 165 EverMadeChange |= EliminateMostlyEmptyBlocks(F); 166 167 // llvm.dbg.value is far away from the value then iSel may not be able 168 // handle it properly. iSel will drop llvm.dbg.value if it can not 169 // find a node corresponding to the value. 170 EverMadeChange |= PlaceDbgValues(F); 171 172 bool MadeChange = true; 173 while (MadeChange) { 174 MadeChange = false; 175 for (Function::iterator I = F.begin(); I != F.end(); ) { 176 BasicBlock *BB = I++; 177 MadeChange |= OptimizeBlock(*BB); 178 } 179 EverMadeChange |= MadeChange; 180 } 181 182 SunkAddrs.clear(); 183 184 if (!DisableBranchOpts) { 185 MadeChange = false; 186 SmallPtrSet<BasicBlock*, 8> WorkList; 187 for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { 188 SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB)); 189 MadeChange |= ConstantFoldTerminator(BB, true); 190 if (!MadeChange) continue; 191 192 for (SmallVectorImpl<BasicBlock*>::iterator 193 II = Successors.begin(), IE = Successors.end(); II != IE; ++II) 194 if (pred_begin(*II) == pred_end(*II)) 195 WorkList.insert(*II); 196 } 197 198 // Delete the dead blocks and any of their dead successors. 199 MadeChange |= !WorkList.empty(); 200 while (!WorkList.empty()) { 201 BasicBlock *BB = *WorkList.begin(); 202 WorkList.erase(BB); 203 SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB)); 204 205 DeleteDeadBlock(BB); 206 207 for (SmallVectorImpl<BasicBlock*>::iterator 208 II = Successors.begin(), IE = Successors.end(); II != IE; ++II) 209 if (pred_begin(*II) == pred_end(*II)) 210 WorkList.insert(*II); 211 } 212 213 // Merge pairs of basic blocks with unconditional branches, connected by 214 // a single edge. 215 if (EverMadeChange || MadeChange) 216 MadeChange |= EliminateFallThrough(F); 217 218 if (MadeChange) 219 ModifiedDT = true; 220 EverMadeChange |= MadeChange; 221 } 222 223 if (ModifiedDT && DT) 224 DT->DT->recalculate(F); 225 226 return EverMadeChange; 227} 228 229/// EliminateFallThrough - Merge basic blocks which are connected 230/// by a single edge, where one of the basic blocks has a single successor 231/// pointing to the other basic block, which has a single predecessor. 232bool CodeGenPrepare::EliminateFallThrough(Function &F) { 233 bool Changed = false; 234 // Scan all of the blocks in the function, except for the entry block. 235 for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) { 236 BasicBlock *BB = I++; 237 // If the destination block has a single pred, then this is a trivial 238 // edge, just collapse it. 239 BasicBlock *SinglePred = BB->getSinglePredecessor(); 240 241 // Don't merge if BB's address is taken. 242 if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue; 243 244 BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator()); 245 if (Term && !Term->isConditional()) { 246 Changed = true; 247 DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n"); 248 // Remember if SinglePred was the entry block of the function. 249 // If so, we will need to move BB back to the entry position. 250 bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); 251 MergeBasicBlockIntoOnlyPred(BB, this); 252 253 if (isEntry && BB != &BB->getParent()->getEntryBlock()) 254 BB->moveBefore(&BB->getParent()->getEntryBlock()); 255 256 // We have erased a block. Update the iterator. 257 I = BB; 258 } 259 } 260 return Changed; 261} 262 263/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes, 264/// debug info directives, and an unconditional branch. Passes before isel 265/// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for 266/// isel. Start by eliminating these blocks so we can split them the way we 267/// want them. 268bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) { 269 bool MadeChange = false; 270 // Note that this intentionally skips the entry block. 271 for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) { 272 BasicBlock *BB = I++; 273 274 // If this block doesn't end with an uncond branch, ignore it. 275 BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); 276 if (!BI || !BI->isUnconditional()) 277 continue; 278 279 // If the instruction before the branch (skipping debug info) isn't a phi 280 // node, then other stuff is happening here. 281 BasicBlock::iterator BBI = BI; 282 if (BBI != BB->begin()) { 283 --BBI; 284 while (isa<DbgInfoIntrinsic>(BBI)) { 285 if (BBI == BB->begin()) 286 break; 287 --BBI; 288 } 289 if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI)) 290 continue; 291 } 292 293 // Do not break infinite loops. 294 BasicBlock *DestBB = BI->getSuccessor(0); 295 if (DestBB == BB) 296 continue; 297 298 if (!CanMergeBlocks(BB, DestBB)) 299 continue; 300 301 EliminateMostlyEmptyBlock(BB); 302 MadeChange = true; 303 } 304 return MadeChange; 305} 306 307/// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a 308/// single uncond branch between them, and BB contains no other non-phi 309/// instructions. 310bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB, 311 const BasicBlock *DestBB) const { 312 // We only want to eliminate blocks whose phi nodes are used by phi nodes in 313 // the successor. If there are more complex condition (e.g. preheaders), 314 // don't mess around with them. 315 BasicBlock::const_iterator BBI = BB->begin(); 316 while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { 317 for (Value::const_use_iterator UI = PN->use_begin(), E = PN->use_end(); 318 UI != E; ++UI) { 319 const Instruction *User = cast<Instruction>(*UI); 320 if (User->getParent() != DestBB || !isa<PHINode>(User)) 321 return false; 322 // If User is inside DestBB block and it is a PHINode then check 323 // incoming value. If incoming value is not from BB then this is 324 // a complex condition (e.g. preheaders) we want to avoid here. 325 if (User->getParent() == DestBB) { 326 if (const PHINode *UPN = dyn_cast<PHINode>(User)) 327 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) { 328 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I)); 329 if (Insn && Insn->getParent() == BB && 330 Insn->getParent() != UPN->getIncomingBlock(I)) 331 return false; 332 } 333 } 334 } 335 } 336 337 // If BB and DestBB contain any common predecessors, then the phi nodes in BB 338 // and DestBB may have conflicting incoming values for the block. If so, we 339 // can't merge the block. 340 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin()); 341 if (!DestBBPN) return true; // no conflict. 342 343 // Collect the preds of BB. 344 SmallPtrSet<const BasicBlock*, 16> BBPreds; 345 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { 346 // It is faster to get preds from a PHI than with pred_iterator. 347 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) 348 BBPreds.insert(BBPN->getIncomingBlock(i)); 349 } else { 350 BBPreds.insert(pred_begin(BB), pred_end(BB)); 351 } 352 353 // Walk the preds of DestBB. 354 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) { 355 BasicBlock *Pred = DestBBPN->getIncomingBlock(i); 356 if (BBPreds.count(Pred)) { // Common predecessor? 357 BBI = DestBB->begin(); 358 while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { 359 const Value *V1 = PN->getIncomingValueForBlock(Pred); 360 const Value *V2 = PN->getIncomingValueForBlock(BB); 361 362 // If V2 is a phi node in BB, look up what the mapped value will be. 363 if (const PHINode *V2PN = dyn_cast<PHINode>(V2)) 364 if (V2PN->getParent() == BB) 365 V2 = V2PN->getIncomingValueForBlock(Pred); 366 367 // If there is a conflict, bail out. 368 if (V1 != V2) return false; 369 } 370 } 371 } 372 373 return true; 374} 375 376 377/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and 378/// an unconditional branch in it. 379void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { 380 BranchInst *BI = cast<BranchInst>(BB->getTerminator()); 381 BasicBlock *DestBB = BI->getSuccessor(0); 382 383 DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB); 384 385 // If the destination block has a single pred, then this is a trivial edge, 386 // just collapse it. 387 if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) { 388 if (SinglePred != DestBB) { 389 // Remember if SinglePred was the entry block of the function. If so, we 390 // will need to move BB back to the entry position. 391 bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); 392 MergeBasicBlockIntoOnlyPred(DestBB, this); 393 394 if (isEntry && BB != &BB->getParent()->getEntryBlock()) 395 BB->moveBefore(&BB->getParent()->getEntryBlock()); 396 397 DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); 398 return; 399 } 400 } 401 402 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB 403 // to handle the new incoming edges it is about to have. 404 PHINode *PN; 405 for (BasicBlock::iterator BBI = DestBB->begin(); 406 (PN = dyn_cast<PHINode>(BBI)); ++BBI) { 407 // Remove the incoming value for BB, and remember it. 408 Value *InVal = PN->removeIncomingValue(BB, false); 409 410 // Two options: either the InVal is a phi node defined in BB or it is some 411 // value that dominates BB. 412 PHINode *InValPhi = dyn_cast<PHINode>(InVal); 413 if (InValPhi && InValPhi->getParent() == BB) { 414 // Add all of the input values of the input PHI as inputs of this phi. 415 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i) 416 PN->addIncoming(InValPhi->getIncomingValue(i), 417 InValPhi->getIncomingBlock(i)); 418 } else { 419 // Otherwise, add one instance of the dominating value for each edge that 420 // we will be adding. 421 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { 422 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) 423 PN->addIncoming(InVal, BBPN->getIncomingBlock(i)); 424 } else { 425 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) 426 PN->addIncoming(InVal, *PI); 427 } 428 } 429 } 430 431 // The PHIs are now updated, change everything that refers to BB to use 432 // DestBB and remove BB. 433 BB->replaceAllUsesWith(DestBB); 434 if (DT && !ModifiedDT) { 435 BasicBlock *BBIDom = DT->getNode(BB)->getIDom()->getBlock(); 436 BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock(); 437 BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom); 438 DT->changeImmediateDominator(DestBB, NewIDom); 439 DT->eraseNode(BB); 440 } 441 BB->eraseFromParent(); 442 ++NumBlocksElim; 443 444 DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); 445} 446 447/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop 448/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC), 449/// sink it into user blocks to reduce the number of virtual 450/// registers that must be created and coalesced. 451/// 452/// Return true if any changes are made. 453/// 454static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ 455 // If this is a noop copy, 456 EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType()); 457 EVT DstVT = TLI.getValueType(CI->getType()); 458 459 // This is an fp<->int conversion? 460 if (SrcVT.isInteger() != DstVT.isInteger()) 461 return false; 462 463 // If this is an extension, it will be a zero or sign extension, which 464 // isn't a noop. 465 if (SrcVT.bitsLT(DstVT)) return false; 466 467 // If these values will be promoted, find out what they will be promoted 468 // to. This helps us consider truncates on PPC as noop copies when they 469 // are. 470 if (TLI.getTypeAction(CI->getContext(), SrcVT) == 471 TargetLowering::TypePromoteInteger) 472 SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT); 473 if (TLI.getTypeAction(CI->getContext(), DstVT) == 474 TargetLowering::TypePromoteInteger) 475 DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT); 476 477 // If, after promotion, these are the same types, this is a noop copy. 478 if (SrcVT != DstVT) 479 return false; 480 481 BasicBlock *DefBB = CI->getParent(); 482 483 /// InsertedCasts - Only insert a cast in each block once. 484 DenseMap<BasicBlock*, CastInst*> InsertedCasts; 485 486 bool MadeChange = false; 487 for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); 488 UI != E; ) { 489 Use &TheUse = UI.getUse(); 490 Instruction *User = cast<Instruction>(*UI); 491 492 // Figure out which BB this cast is used in. For PHI's this is the 493 // appropriate predecessor block. 494 BasicBlock *UserBB = User->getParent(); 495 if (PHINode *PN = dyn_cast<PHINode>(User)) { 496 UserBB = PN->getIncomingBlock(UI); 497 } 498 499 // Preincrement use iterator so we don't invalidate it. 500 ++UI; 501 502 // If this user is in the same block as the cast, don't change the cast. 503 if (UserBB == DefBB) continue; 504 505 // If we have already inserted a cast into this block, use it. 506 CastInst *&InsertedCast = InsertedCasts[UserBB]; 507 508 if (!InsertedCast) { 509 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 510 InsertedCast = 511 CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "", 512 InsertPt); 513 MadeChange = true; 514 } 515 516 // Replace a use of the cast with a use of the new cast. 517 TheUse = InsertedCast; 518 ++NumCastUses; 519 } 520 521 // If we removed all uses, nuke the cast. 522 if (CI->use_empty()) { 523 CI->eraseFromParent(); 524 MadeChange = true; 525 } 526 527 return MadeChange; 528} 529 530/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce 531/// the number of virtual registers that must be created and coalesced. This is 532/// a clear win except on targets with multiple condition code registers 533/// (PowerPC), where it might lose; some adjustment may be wanted there. 534/// 535/// Return true if any changes are made. 536static bool OptimizeCmpExpression(CmpInst *CI) { 537 BasicBlock *DefBB = CI->getParent(); 538 539 /// InsertedCmp - Only insert a cmp in each block once. 540 DenseMap<BasicBlock*, CmpInst*> InsertedCmps; 541 542 bool MadeChange = false; 543 for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); 544 UI != E; ) { 545 Use &TheUse = UI.getUse(); 546 Instruction *User = cast<Instruction>(*UI); 547 548 // Preincrement use iterator so we don't invalidate it. 549 ++UI; 550 551 // Don't bother for PHI nodes. 552 if (isa<PHINode>(User)) 553 continue; 554 555 // Figure out which BB this cmp is used in. 556 BasicBlock *UserBB = User->getParent(); 557 558 // If this user is in the same block as the cmp, don't change the cmp. 559 if (UserBB == DefBB) continue; 560 561 // If we have already inserted a cmp into this block, use it. 562 CmpInst *&InsertedCmp = InsertedCmps[UserBB]; 563 564 if (!InsertedCmp) { 565 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 566 InsertedCmp = 567 CmpInst::Create(CI->getOpcode(), 568 CI->getPredicate(), CI->getOperand(0), 569 CI->getOperand(1), "", InsertPt); 570 MadeChange = true; 571 } 572 573 // Replace a use of the cmp with a use of the new cmp. 574 TheUse = InsertedCmp; 575 ++NumCmpUses; 576 } 577 578 // If we removed all uses, nuke the cmp. 579 if (CI->use_empty()) 580 CI->eraseFromParent(); 581 582 return MadeChange; 583} 584 585namespace { 586class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls { 587protected: 588 void replaceCall(Value *With) { 589 CI->replaceAllUsesWith(With); 590 CI->eraseFromParent(); 591 } 592 bool isFoldable(unsigned SizeCIOp, unsigned, bool) const { 593 if (ConstantInt *SizeCI = 594 dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) 595 return SizeCI->isAllOnesValue(); 596 return false; 597 } 598}; 599} // end anonymous namespace 600 601bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { 602 BasicBlock *BB = CI->getParent(); 603 604 // Lower inline assembly if we can. 605 // If we found an inline asm expession, and if the target knows how to 606 // lower it to normal LLVM code, do so now. 607 if (TLI && isa<InlineAsm>(CI->getCalledValue())) { 608 if (TLI->ExpandInlineAsm(CI)) { 609 // Avoid invalidating the iterator. 610 CurInstIterator = BB->begin(); 611 // Avoid processing instructions out of order, which could cause 612 // reuse before a value is defined. 613 SunkAddrs.clear(); 614 return true; 615 } 616 // Sink address computing for memory operands into the block. 617 if (OptimizeInlineAsmInst(CI)) 618 return true; 619 } 620 621 // Lower all uses of llvm.objectsize.* 622 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); 623 if (II && II->getIntrinsicID() == Intrinsic::objectsize) { 624 bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1); 625 Type *ReturnTy = CI->getType(); 626 Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); 627 628 // Substituting this can cause recursive simplifications, which can 629 // invalidate our iterator. Use a WeakVH to hold onto it in case this 630 // happens. 631 WeakVH IterHandle(CurInstIterator); 632 633 replaceAndRecursivelySimplify(CI, RetVal, TLI ? TLI->getDataLayout() : 0, 634 TLInfo, ModifiedDT ? 0 : DT); 635 636 // If the iterator instruction was recursively deleted, start over at the 637 // start of the block. 638 if (IterHandle != CurInstIterator) { 639 CurInstIterator = BB->begin(); 640 SunkAddrs.clear(); 641 } 642 return true; 643 } 644 645 if (II && TLI) { 646 SmallVector<Value*, 2> PtrOps; 647 Type *AccessTy; 648 if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy)) 649 while (!PtrOps.empty()) 650 if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy)) 651 return true; 652 } 653 654 // From here on out we're working with named functions. 655 if (CI->getCalledFunction() == 0) return false; 656 657 // We'll need DataLayout from here on out. 658 const DataLayout *TD = TLI ? TLI->getDataLayout() : 0; 659 if (!TD) return false; 660 661 // Lower all default uses of _chk calls. This is very similar 662 // to what InstCombineCalls does, but here we are only lowering calls 663 // that have the default "don't know" as the objectsize. Anything else 664 // should be left alone. 665 CodeGenPrepareFortifiedLibCalls Simplifier; 666 return Simplifier.fold(CI, TD, TLInfo); 667} 668 669/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return 670/// instructions to the predecessor to enable tail call optimizations. The 671/// case it is currently looking for is: 672/// @code 673/// bb0: 674/// %tmp0 = tail call i32 @f0() 675/// br label %return 676/// bb1: 677/// %tmp1 = tail call i32 @f1() 678/// br label %return 679/// bb2: 680/// %tmp2 = tail call i32 @f2() 681/// br label %return 682/// return: 683/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ] 684/// ret i32 %retval 685/// @endcode 686/// 687/// => 688/// 689/// @code 690/// bb0: 691/// %tmp0 = tail call i32 @f0() 692/// ret i32 %tmp0 693/// bb1: 694/// %tmp1 = tail call i32 @f1() 695/// ret i32 %tmp1 696/// bb2: 697/// %tmp2 = tail call i32 @f2() 698/// ret i32 %tmp2 699/// @endcode 700bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) { 701 if (!TLI) 702 return false; 703 704 ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()); 705 if (!RI) 706 return false; 707 708 PHINode *PN = 0; 709 BitCastInst *BCI = 0; 710 Value *V = RI->getReturnValue(); 711 if (V) { 712 BCI = dyn_cast<BitCastInst>(V); 713 if (BCI) 714 V = BCI->getOperand(0); 715 716 PN = dyn_cast<PHINode>(V); 717 if (!PN) 718 return false; 719 } 720 721 if (PN && PN->getParent() != BB) 722 return false; 723 724 // It's not safe to eliminate the sign / zero extension of the return value. 725 // See llvm::isInTailCallPosition(). 726 const Function *F = BB->getParent(); 727 AttributeSet CallerAttrs = F->getAttributes(); 728 if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) || 729 CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) 730 return false; 731 732 // Make sure there are no instructions between the PHI and return, or that the 733 // return is the first instruction in the block. 734 if (PN) { 735 BasicBlock::iterator BI = BB->begin(); 736 do { ++BI; } while (isa<DbgInfoIntrinsic>(BI)); 737 if (&*BI == BCI) 738 // Also skip over the bitcast. 739 ++BI; 740 if (&*BI != RI) 741 return false; 742 } else { 743 BasicBlock::iterator BI = BB->begin(); 744 while (isa<DbgInfoIntrinsic>(BI)) ++BI; 745 if (&*BI != RI) 746 return false; 747 } 748 749 /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail 750 /// call. 751 SmallVector<CallInst*, 4> TailCalls; 752 if (PN) { 753 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { 754 CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I)); 755 // Make sure the phi value is indeed produced by the tail call. 756 if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) && 757 TLI->mayBeEmittedAsTailCall(CI)) 758 TailCalls.push_back(CI); 759 } 760 } else { 761 SmallPtrSet<BasicBlock*, 4> VisitedBBs; 762 for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { 763 if (!VisitedBBs.insert(*PI)) 764 continue; 765 766 BasicBlock::InstListType &InstList = (*PI)->getInstList(); 767 BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin(); 768 BasicBlock::InstListType::reverse_iterator RE = InstList.rend(); 769 do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI)); 770 if (RI == RE) 771 continue; 772 773 CallInst *CI = dyn_cast<CallInst>(&*RI); 774 if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI)) 775 TailCalls.push_back(CI); 776 } 777 } 778 779 bool Changed = false; 780 for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) { 781 CallInst *CI = TailCalls[i]; 782 CallSite CS(CI); 783 784 // Conservatively require the attributes of the call to match those of the 785 // return. Ignore noalias because it doesn't affect the call sequence. 786 AttributeSet CalleeAttrs = CS.getAttributes(); 787 if (AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex). 788 removeAttribute(Attribute::NoAlias) != 789 AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex). 790 removeAttribute(Attribute::NoAlias)) 791 continue; 792 793 // Make sure the call instruction is followed by an unconditional branch to 794 // the return block. 795 BasicBlock *CallBB = CI->getParent(); 796 BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator()); 797 if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB) 798 continue; 799 800 // Duplicate the return into CallBB. 801 (void)FoldReturnIntoUncondBranch(RI, BB, CallBB); 802 ModifiedDT = Changed = true; 803 ++NumRetsDup; 804 } 805 806 // If we eliminated all predecessors of the block, delete the block now. 807 if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB)) 808 BB->eraseFromParent(); 809 810 return Changed; 811} 812 813//===----------------------------------------------------------------------===// 814// Memory Optimization 815//===----------------------------------------------------------------------===// 816 817namespace { 818 819/// ExtAddrMode - This is an extended version of TargetLowering::AddrMode 820/// which holds actual Value*'s for register values. 821struct ExtAddrMode : public TargetLowering::AddrMode { 822 Value *BaseReg; 823 Value *ScaledReg; 824 ExtAddrMode() : BaseReg(0), ScaledReg(0) {} 825 void print(raw_ostream &OS) const; 826 void dump() const; 827 828 bool operator==(const ExtAddrMode& O) const { 829 return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) && 830 (BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) && 831 (HasBaseReg == O.HasBaseReg) && (Scale == O.Scale); 832 } 833}; 834 835#ifndef NDEBUG 836static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) { 837 AM.print(OS); 838 return OS; 839} 840#endif 841 842void ExtAddrMode::print(raw_ostream &OS) const { 843 bool NeedPlus = false; 844 OS << "["; 845 if (BaseGV) { 846 OS << (NeedPlus ? " + " : "") 847 << "GV:"; 848 WriteAsOperand(OS, BaseGV, /*PrintType=*/false); 849 NeedPlus = true; 850 } 851 852 if (BaseOffs) 853 OS << (NeedPlus ? " + " : "") << BaseOffs, NeedPlus = true; 854 855 if (BaseReg) { 856 OS << (NeedPlus ? " + " : "") 857 << "Base:"; 858 WriteAsOperand(OS, BaseReg, /*PrintType=*/false); 859 NeedPlus = true; 860 } 861 if (Scale) { 862 OS << (NeedPlus ? " + " : "") 863 << Scale << "*"; 864 WriteAsOperand(OS, ScaledReg, /*PrintType=*/false); 865 } 866 867 OS << ']'; 868} 869 870#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 871void ExtAddrMode::dump() const { 872 print(dbgs()); 873 dbgs() << '\n'; 874} 875#endif 876 877 878/// \brief A helper class for matching addressing modes. 879/// 880/// This encapsulates the logic for matching the target-legal addressing modes. 881class AddressingModeMatcher { 882 SmallVectorImpl<Instruction*> &AddrModeInsts; 883 const TargetLowering &TLI; 884 885 /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and 886 /// the memory instruction that we're computing this address for. 887 Type *AccessTy; 888 Instruction *MemoryInst; 889 890 /// AddrMode - This is the addressing mode that we're building up. This is 891 /// part of the return value of this addressing mode matching stuff. 892 ExtAddrMode &AddrMode; 893 894 /// IgnoreProfitability - This is set to true when we should not do 895 /// profitability checks. When true, IsProfitableToFoldIntoAddressingMode 896 /// always returns true. 897 bool IgnoreProfitability; 898 899 AddressingModeMatcher(SmallVectorImpl<Instruction*> &AMI, 900 const TargetLowering &T, Type *AT, 901 Instruction *MI, ExtAddrMode &AM) 902 : AddrModeInsts(AMI), TLI(T), AccessTy(AT), MemoryInst(MI), AddrMode(AM) { 903 IgnoreProfitability = false; 904 } 905public: 906 907 /// Match - Find the maximal addressing mode that a load/store of V can fold, 908 /// give an access type of AccessTy. This returns a list of involved 909 /// instructions in AddrModeInsts. 910 static ExtAddrMode Match(Value *V, Type *AccessTy, 911 Instruction *MemoryInst, 912 SmallVectorImpl<Instruction*> &AddrModeInsts, 913 const TargetLowering &TLI) { 914 ExtAddrMode Result; 915 916 bool Success = 917 AddressingModeMatcher(AddrModeInsts, TLI, AccessTy, 918 MemoryInst, Result).MatchAddr(V, 0); 919 (void)Success; assert(Success && "Couldn't select *anything*?"); 920 return Result; 921 } 922private: 923 bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth); 924 bool MatchAddr(Value *V, unsigned Depth); 925 bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth); 926 bool IsProfitableToFoldIntoAddressingMode(Instruction *I, 927 ExtAddrMode &AMBefore, 928 ExtAddrMode &AMAfter); 929 bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2); 930}; 931 932/// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode. 933/// Return true and update AddrMode if this addr mode is legal for the target, 934/// false if not. 935bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale, 936 unsigned Depth) { 937 // If Scale is 1, then this is the same as adding ScaleReg to the addressing 938 // mode. Just process that directly. 939 if (Scale == 1) 940 return MatchAddr(ScaleReg, Depth); 941 942 // If the scale is 0, it takes nothing to add this. 943 if (Scale == 0) 944 return true; 945 946 // If we already have a scale of this value, we can add to it, otherwise, we 947 // need an available scale field. 948 if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg) 949 return false; 950 951 ExtAddrMode TestAddrMode = AddrMode; 952 953 // Add scale to turn X*4+X*3 -> X*7. This could also do things like 954 // [A+B + A*7] -> [B+A*8]. 955 TestAddrMode.Scale += Scale; 956 TestAddrMode.ScaledReg = ScaleReg; 957 958 // If the new address isn't legal, bail out. 959 if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) 960 return false; 961 962 // It was legal, so commit it. 963 AddrMode = TestAddrMode; 964 965 // Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now 966 // to see if ScaleReg is actually X+C. If so, we can turn this into adding 967 // X*Scale + C*Scale to addr mode. 968 ConstantInt *CI = 0; Value *AddLHS = 0; 969 if (isa<Instruction>(ScaleReg) && // not a constant expr. 970 match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) { 971 TestAddrMode.ScaledReg = AddLHS; 972 TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale; 973 974 // If this addressing mode is legal, commit it and remember that we folded 975 // this instruction. 976 if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) { 977 AddrModeInsts.push_back(cast<Instruction>(ScaleReg)); 978 AddrMode = TestAddrMode; 979 return true; 980 } 981 } 982 983 // Otherwise, not (x+c)*scale, just return what we have. 984 return true; 985} 986 987/// MightBeFoldableInst - This is a little filter, which returns true if an 988/// addressing computation involving I might be folded into a load/store 989/// accessing it. This doesn't need to be perfect, but needs to accept at least 990/// the set of instructions that MatchOperationAddr can. 991static bool MightBeFoldableInst(Instruction *I) { 992 switch (I->getOpcode()) { 993 case Instruction::BitCast: 994 // Don't touch identity bitcasts. 995 if (I->getType() == I->getOperand(0)->getType()) 996 return false; 997 return I->getType()->isPointerTy() || I->getType()->isIntegerTy(); 998 case Instruction::PtrToInt: 999 // PtrToInt is always a noop, as we know that the int type is pointer sized. 1000 return true; 1001 case Instruction::IntToPtr: 1002 // We know the input is intptr_t, so this is foldable. 1003 return true; 1004 case Instruction::Add: 1005 return true; 1006 case Instruction::Mul: 1007 case Instruction::Shl: 1008 // Can only handle X*C and X << C. 1009 return isa<ConstantInt>(I->getOperand(1)); 1010 case Instruction::GetElementPtr: 1011 return true; 1012 default: 1013 return false; 1014 } 1015} 1016 1017/// MatchOperationAddr - Given an instruction or constant expr, see if we can 1018/// fold the operation into the addressing mode. If so, update the addressing 1019/// mode and return true, otherwise return false without modifying AddrMode. 1020bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode, 1021 unsigned Depth) { 1022 // Avoid exponential behavior on extremely deep expression trees. 1023 if (Depth >= 5) return false; 1024 1025 switch (Opcode) { 1026 case Instruction::PtrToInt: 1027 // PtrToInt is always a noop, as we know that the int type is pointer sized. 1028 return MatchAddr(AddrInst->getOperand(0), Depth); 1029 case Instruction::IntToPtr: 1030 // This inttoptr is a no-op if the integer type is pointer sized. 1031 if (TLI.getValueType(AddrInst->getOperand(0)->getType()) == 1032 TLI.getPointerTy(AddrInst->getType()->getPointerAddressSpace())) 1033 return MatchAddr(AddrInst->getOperand(0), Depth); 1034 return false; 1035 case Instruction::BitCast: 1036 // BitCast is always a noop, and we can handle it as long as it is 1037 // int->int or pointer->pointer (we don't want int<->fp or something). 1038 if ((AddrInst->getOperand(0)->getType()->isPointerTy() || 1039 AddrInst->getOperand(0)->getType()->isIntegerTy()) && 1040 // Don't touch identity bitcasts. These were probably put here by LSR, 1041 // and we don't want to mess around with them. Assume it knows what it 1042 // is doing. 1043 AddrInst->getOperand(0)->getType() != AddrInst->getType()) 1044 return MatchAddr(AddrInst->getOperand(0), Depth); 1045 return false; 1046 case Instruction::Add: { 1047 // Check to see if we can merge in the RHS then the LHS. If so, we win. 1048 ExtAddrMode BackupAddrMode = AddrMode; 1049 unsigned OldSize = AddrModeInsts.size(); 1050 if (MatchAddr(AddrInst->getOperand(1), Depth+1) && 1051 MatchAddr(AddrInst->getOperand(0), Depth+1)) 1052 return true; 1053 1054 // Restore the old addr mode info. 1055 AddrMode = BackupAddrMode; 1056 AddrModeInsts.resize(OldSize); 1057 1058 // Otherwise this was over-aggressive. Try merging in the LHS then the RHS. 1059 if (MatchAddr(AddrInst->getOperand(0), Depth+1) && 1060 MatchAddr(AddrInst->getOperand(1), Depth+1)) 1061 return true; 1062 1063 // Otherwise we definitely can't merge the ADD in. 1064 AddrMode = BackupAddrMode; 1065 AddrModeInsts.resize(OldSize); 1066 break; 1067 } 1068 //case Instruction::Or: 1069 // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD. 1070 //break; 1071 case Instruction::Mul: 1072 case Instruction::Shl: { 1073 // Can only handle X*C and X << C. 1074 ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1)); 1075 if (!RHS) return false; 1076 int64_t Scale = RHS->getSExtValue(); 1077 if (Opcode == Instruction::Shl) 1078 Scale = 1LL << Scale; 1079 1080 return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth); 1081 } 1082 case Instruction::GetElementPtr: { 1083 // Scan the GEP. We check it if it contains constant offsets and at most 1084 // one variable offset. 1085 int VariableOperand = -1; 1086 unsigned VariableScale = 0; 1087 1088 int64_t ConstantOffset = 0; 1089 const DataLayout *TD = TLI.getDataLayout(); 1090 gep_type_iterator GTI = gep_type_begin(AddrInst); 1091 for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) { 1092 if (StructType *STy = dyn_cast<StructType>(*GTI)) { 1093 const StructLayout *SL = TD->getStructLayout(STy); 1094 unsigned Idx = 1095 cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue(); 1096 ConstantOffset += SL->getElementOffset(Idx); 1097 } else { 1098 uint64_t TypeSize = TD->getTypeAllocSize(GTI.getIndexedType()); 1099 if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) { 1100 ConstantOffset += CI->getSExtValue()*TypeSize; 1101 } else if (TypeSize) { // Scales of zero don't do anything. 1102 // We only allow one variable index at the moment. 1103 if (VariableOperand != -1) 1104 return false; 1105 1106 // Remember the variable index. 1107 VariableOperand = i; 1108 VariableScale = TypeSize; 1109 } 1110 } 1111 } 1112 1113 // A common case is for the GEP to only do a constant offset. In this case, 1114 // just add it to the disp field and check validity. 1115 if (VariableOperand == -1) { 1116 AddrMode.BaseOffs += ConstantOffset; 1117 if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){ 1118 // Check to see if we can fold the base pointer in too. 1119 if (MatchAddr(AddrInst->getOperand(0), Depth+1)) 1120 return true; 1121 } 1122 AddrMode.BaseOffs -= ConstantOffset; 1123 return false; 1124 } 1125 1126 // Save the valid addressing mode in case we can't match. 1127 ExtAddrMode BackupAddrMode = AddrMode; 1128 unsigned OldSize = AddrModeInsts.size(); 1129 1130 // See if the scale and offset amount is valid for this target. 1131 AddrMode.BaseOffs += ConstantOffset; 1132 1133 // Match the base operand of the GEP. 1134 if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) { 1135 // If it couldn't be matched, just stuff the value in a register. 1136 if (AddrMode.HasBaseReg) { 1137 AddrMode = BackupAddrMode; 1138 AddrModeInsts.resize(OldSize); 1139 return false; 1140 } 1141 AddrMode.HasBaseReg = true; 1142 AddrMode.BaseReg = AddrInst->getOperand(0); 1143 } 1144 1145 // Match the remaining variable portion of the GEP. 1146 if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale, 1147 Depth)) { 1148 // If it couldn't be matched, try stuffing the base into a register 1149 // instead of matching it, and retrying the match of the scale. 1150 AddrMode = BackupAddrMode; 1151 AddrModeInsts.resize(OldSize); 1152 if (AddrMode.HasBaseReg) 1153 return false; 1154 AddrMode.HasBaseReg = true; 1155 AddrMode.BaseReg = AddrInst->getOperand(0); 1156 AddrMode.BaseOffs += ConstantOffset; 1157 if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), 1158 VariableScale, Depth)) { 1159 // If even that didn't work, bail. 1160 AddrMode = BackupAddrMode; 1161 AddrModeInsts.resize(OldSize); 1162 return false; 1163 } 1164 } 1165 1166 return true; 1167 } 1168 } 1169 return false; 1170} 1171 1172/// MatchAddr - If we can, try to add the value of 'Addr' into the current 1173/// addressing mode. If Addr can't be added to AddrMode this returns false and 1174/// leaves AddrMode unmodified. This assumes that Addr is either a pointer type 1175/// or intptr_t for the target. 1176/// 1177bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) { 1178 if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) { 1179 // Fold in immediates if legal for the target. 1180 AddrMode.BaseOffs += CI->getSExtValue(); 1181 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) 1182 return true; 1183 AddrMode.BaseOffs -= CI->getSExtValue(); 1184 } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) { 1185 // If this is a global variable, try to fold it into the addressing mode. 1186 if (AddrMode.BaseGV == 0) { 1187 AddrMode.BaseGV = GV; 1188 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) 1189 return true; 1190 AddrMode.BaseGV = 0; 1191 } 1192 } else if (Instruction *I = dyn_cast<Instruction>(Addr)) { 1193 ExtAddrMode BackupAddrMode = AddrMode; 1194 unsigned OldSize = AddrModeInsts.size(); 1195 1196 // Check to see if it is possible to fold this operation. 1197 if (MatchOperationAddr(I, I->getOpcode(), Depth)) { 1198 // Okay, it's possible to fold this. Check to see if it is actually 1199 // *profitable* to do so. We use a simple cost model to avoid increasing 1200 // register pressure too much. 1201 if (I->hasOneUse() || 1202 IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) { 1203 AddrModeInsts.push_back(I); 1204 return true; 1205 } 1206 1207 // It isn't profitable to do this, roll back. 1208 //cerr << "NOT FOLDING: " << *I; 1209 AddrMode = BackupAddrMode; 1210 AddrModeInsts.resize(OldSize); 1211 } 1212 } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) { 1213 if (MatchOperationAddr(CE, CE->getOpcode(), Depth)) 1214 return true; 1215 } else if (isa<ConstantPointerNull>(Addr)) { 1216 // Null pointer gets folded without affecting the addressing mode. 1217 return true; 1218 } 1219 1220 // Worse case, the target should support [reg] addressing modes. :) 1221 if (!AddrMode.HasBaseReg) { 1222 AddrMode.HasBaseReg = true; 1223 AddrMode.BaseReg = Addr; 1224 // Still check for legality in case the target supports [imm] but not [i+r]. 1225 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) 1226 return true; 1227 AddrMode.HasBaseReg = false; 1228 AddrMode.BaseReg = 0; 1229 } 1230 1231 // If the base register is already taken, see if we can do [r+r]. 1232 if (AddrMode.Scale == 0) { 1233 AddrMode.Scale = 1; 1234 AddrMode.ScaledReg = Addr; 1235 if (TLI.isLegalAddressingMode(AddrMode, AccessTy)) 1236 return true; 1237 AddrMode.Scale = 0; 1238 AddrMode.ScaledReg = 0; 1239 } 1240 // Couldn't match. 1241 return false; 1242} 1243 1244/// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified 1245/// inline asm call are due to memory operands. If so, return true, otherwise 1246/// return false. 1247static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal, 1248 const TargetLowering &TLI) { 1249 TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI)); 1250 for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { 1251 TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; 1252 1253 // Compute the constraint code and ConstraintType to use. 1254 TLI.ComputeConstraintToUse(OpInfo, SDValue()); 1255 1256 // If this asm operand is our Value*, and if it isn't an indirect memory 1257 // operand, we can't fold it! 1258 if (OpInfo.CallOperandVal == OpVal && 1259 (OpInfo.ConstraintType != TargetLowering::C_Memory || 1260 !OpInfo.isIndirect)) 1261 return false; 1262 } 1263 1264 return true; 1265} 1266 1267/// FindAllMemoryUses - Recursively walk all the uses of I until we find a 1268/// memory use. If we find an obviously non-foldable instruction, return true. 1269/// Add the ultimately found memory instructions to MemoryUses. 1270static bool FindAllMemoryUses(Instruction *I, 1271 SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses, 1272 SmallPtrSet<Instruction*, 16> &ConsideredInsts, 1273 const TargetLowering &TLI) { 1274 // If we already considered this instruction, we're done. 1275 if (!ConsideredInsts.insert(I)) 1276 return false; 1277 1278 // If this is an obviously unfoldable instruction, bail out. 1279 if (!MightBeFoldableInst(I)) 1280 return true; 1281 1282 // Loop over all the uses, recursively processing them. 1283 for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); 1284 UI != E; ++UI) { 1285 User *U = *UI; 1286 1287 if (LoadInst *LI = dyn_cast<LoadInst>(U)) { 1288 MemoryUses.push_back(std::make_pair(LI, UI.getOperandNo())); 1289 continue; 1290 } 1291 1292 if (StoreInst *SI = dyn_cast<StoreInst>(U)) { 1293 unsigned opNo = UI.getOperandNo(); 1294 if (opNo == 0) return true; // Storing addr, not into addr. 1295 MemoryUses.push_back(std::make_pair(SI, opNo)); 1296 continue; 1297 } 1298 1299 if (CallInst *CI = dyn_cast<CallInst>(U)) { 1300 InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue()); 1301 if (!IA) return true; 1302 1303 // If this is a memory operand, we're cool, otherwise bail out. 1304 if (!IsOperandAMemoryOperand(CI, IA, I, TLI)) 1305 return true; 1306 continue; 1307 } 1308 1309 if (FindAllMemoryUses(cast<Instruction>(U), MemoryUses, ConsideredInsts, 1310 TLI)) 1311 return true; 1312 } 1313 1314 return false; 1315} 1316 1317/// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at 1318/// the use site that we're folding it into. If so, there is no cost to 1319/// include it in the addressing mode. KnownLive1 and KnownLive2 are two values 1320/// that we know are live at the instruction already. 1321bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1, 1322 Value *KnownLive2) { 1323 // If Val is either of the known-live values, we know it is live! 1324 if (Val == 0 || Val == KnownLive1 || Val == KnownLive2) 1325 return true; 1326 1327 // All values other than instructions and arguments (e.g. constants) are live. 1328 if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true; 1329 1330 // If Val is a constant sized alloca in the entry block, it is live, this is 1331 // true because it is just a reference to the stack/frame pointer, which is 1332 // live for the whole function. 1333 if (AllocaInst *AI = dyn_cast<AllocaInst>(Val)) 1334 if (AI->isStaticAlloca()) 1335 return true; 1336 1337 // Check to see if this value is already used in the memory instruction's 1338 // block. If so, it's already live into the block at the very least, so we 1339 // can reasonably fold it. 1340 return Val->isUsedInBasicBlock(MemoryInst->getParent()); 1341} 1342 1343/// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing 1344/// mode of the machine to fold the specified instruction into a load or store 1345/// that ultimately uses it. However, the specified instruction has multiple 1346/// uses. Given this, it may actually increase register pressure to fold it 1347/// into the load. For example, consider this code: 1348/// 1349/// X = ... 1350/// Y = X+1 1351/// use(Y) -> nonload/store 1352/// Z = Y+1 1353/// load Z 1354/// 1355/// In this case, Y has multiple uses, and can be folded into the load of Z 1356/// (yielding load [X+2]). However, doing this will cause both "X" and "X+1" to 1357/// be live at the use(Y) line. If we don't fold Y into load Z, we use one 1358/// fewer register. Since Y can't be folded into "use(Y)" we don't increase the 1359/// number of computations either. 1360/// 1361/// Note that this (like most of CodeGenPrepare) is just a rough heuristic. If 1362/// X was live across 'load Z' for other reasons, we actually *would* want to 1363/// fold the addressing mode in the Z case. This would make Y die earlier. 1364bool AddressingModeMatcher:: 1365IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore, 1366 ExtAddrMode &AMAfter) { 1367 if (IgnoreProfitability) return true; 1368 1369 // AMBefore is the addressing mode before this instruction was folded into it, 1370 // and AMAfter is the addressing mode after the instruction was folded. Get 1371 // the set of registers referenced by AMAfter and subtract out those 1372 // referenced by AMBefore: this is the set of values which folding in this 1373 // address extends the lifetime of. 1374 // 1375 // Note that there are only two potential values being referenced here, 1376 // BaseReg and ScaleReg (global addresses are always available, as are any 1377 // folded immediates). 1378 Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg; 1379 1380 // If the BaseReg or ScaledReg was referenced by the previous addrmode, their 1381 // lifetime wasn't extended by adding this instruction. 1382 if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg)) 1383 BaseReg = 0; 1384 if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg)) 1385 ScaledReg = 0; 1386 1387 // If folding this instruction (and it's subexprs) didn't extend any live 1388 // ranges, we're ok with it. 1389 if (BaseReg == 0 && ScaledReg == 0) 1390 return true; 1391 1392 // If all uses of this instruction are ultimately load/store/inlineasm's, 1393 // check to see if their addressing modes will include this instruction. If 1394 // so, we can fold it into all uses, so it doesn't matter if it has multiple 1395 // uses. 1396 SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses; 1397 SmallPtrSet<Instruction*, 16> ConsideredInsts; 1398 if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI)) 1399 return false; // Has a non-memory, non-foldable use! 1400 1401 // Now that we know that all uses of this instruction are part of a chain of 1402 // computation involving only operations that could theoretically be folded 1403 // into a memory use, loop over each of these uses and see if they could 1404 // *actually* fold the instruction. 1405 SmallVector<Instruction*, 32> MatchedAddrModeInsts; 1406 for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) { 1407 Instruction *User = MemoryUses[i].first; 1408 unsigned OpNo = MemoryUses[i].second; 1409 1410 // Get the access type of this use. If the use isn't a pointer, we don't 1411 // know what it accesses. 1412 Value *Address = User->getOperand(OpNo); 1413 if (!Address->getType()->isPointerTy()) 1414 return false; 1415 Type *AddressAccessTy = Address->getType()->getPointerElementType(); 1416 1417 // Do a match against the root of this address, ignoring profitability. This 1418 // will tell us if the addressing mode for the memory operation will 1419 // *actually* cover the shared instruction. 1420 ExtAddrMode Result; 1421 AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy, 1422 MemoryInst, Result); 1423 Matcher.IgnoreProfitability = true; 1424 bool Success = Matcher.MatchAddr(Address, 0); 1425 (void)Success; assert(Success && "Couldn't select *anything*?"); 1426 1427 // If the match didn't cover I, then it won't be shared by it. 1428 if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(), 1429 I) == MatchedAddrModeInsts.end()) 1430 return false; 1431 1432 MatchedAddrModeInsts.clear(); 1433 } 1434 1435 return true; 1436} 1437 1438} // end anonymous namespace 1439 1440/// IsNonLocalValue - Return true if the specified values are defined in a 1441/// different basic block than BB. 1442static bool IsNonLocalValue(Value *V, BasicBlock *BB) { 1443 if (Instruction *I = dyn_cast<Instruction>(V)) 1444 return I->getParent() != BB; 1445 return false; 1446} 1447 1448/// OptimizeMemoryInst - Load and Store Instructions often have 1449/// addressing modes that can do significant amounts of computation. As such, 1450/// instruction selection will try to get the load or store to do as much 1451/// computation as possible for the program. The problem is that isel can only 1452/// see within a single block. As such, we sink as much legal addressing mode 1453/// stuff into the block as possible. 1454/// 1455/// This method is used to optimize both load/store and inline asms with memory 1456/// operands. 1457bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, 1458 Type *AccessTy) { 1459 Value *Repl = Addr; 1460 1461 // Try to collapse single-value PHI nodes. This is necessary to undo 1462 // unprofitable PRE transformations. 1463 SmallVector<Value*, 8> worklist; 1464 SmallPtrSet<Value*, 16> Visited; 1465 worklist.push_back(Addr); 1466 1467 // Use a worklist to iteratively look through PHI nodes, and ensure that 1468 // the addressing mode obtained from the non-PHI roots of the graph 1469 // are equivalent. 1470 Value *Consensus = 0; 1471 unsigned NumUsesConsensus = 0; 1472 bool IsNumUsesConsensusValid = false; 1473 SmallVector<Instruction*, 16> AddrModeInsts; 1474 ExtAddrMode AddrMode; 1475 while (!worklist.empty()) { 1476 Value *V = worklist.back(); 1477 worklist.pop_back(); 1478 1479 // Break use-def graph loops. 1480 if (!Visited.insert(V)) { 1481 Consensus = 0; 1482 break; 1483 } 1484 1485 // For a PHI node, push all of its incoming values. 1486 if (PHINode *P = dyn_cast<PHINode>(V)) { 1487 for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) 1488 worklist.push_back(P->getIncomingValue(i)); 1489 continue; 1490 } 1491 1492 // For non-PHIs, determine the addressing mode being computed. 1493 SmallVector<Instruction*, 16> NewAddrModeInsts; 1494 ExtAddrMode NewAddrMode = 1495 AddressingModeMatcher::Match(V, AccessTy, MemoryInst, 1496 NewAddrModeInsts, *TLI); 1497 1498 // This check is broken into two cases with very similar code to avoid using 1499 // getNumUses() as much as possible. Some values have a lot of uses, so 1500 // calling getNumUses() unconditionally caused a significant compile-time 1501 // regression. 1502 if (!Consensus) { 1503 Consensus = V; 1504 AddrMode = NewAddrMode; 1505 AddrModeInsts = NewAddrModeInsts; 1506 continue; 1507 } else if (NewAddrMode == AddrMode) { 1508 if (!IsNumUsesConsensusValid) { 1509 NumUsesConsensus = Consensus->getNumUses(); 1510 IsNumUsesConsensusValid = true; 1511 } 1512 1513 // Ensure that the obtained addressing mode is equivalent to that obtained 1514 // for all other roots of the PHI traversal. Also, when choosing one 1515 // such root as representative, select the one with the most uses in order 1516 // to keep the cost modeling heuristics in AddressingModeMatcher 1517 // applicable. 1518 unsigned NumUses = V->getNumUses(); 1519 if (NumUses > NumUsesConsensus) { 1520 Consensus = V; 1521 NumUsesConsensus = NumUses; 1522 AddrModeInsts = NewAddrModeInsts; 1523 } 1524 continue; 1525 } 1526 1527 Consensus = 0; 1528 break; 1529 } 1530 1531 // If the addressing mode couldn't be determined, or if multiple different 1532 // ones were determined, bail out now. 1533 if (!Consensus) return false; 1534 1535 // Check to see if any of the instructions supersumed by this addr mode are 1536 // non-local to I's BB. 1537 bool AnyNonLocal = false; 1538 for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) { 1539 if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) { 1540 AnyNonLocal = true; 1541 break; 1542 } 1543 } 1544 1545 // If all the instructions matched are already in this BB, don't do anything. 1546 if (!AnyNonLocal) { 1547 DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n"); 1548 return false; 1549 } 1550 1551 // Insert this computation right after this user. Since our caller is 1552 // scanning from the top of the BB to the bottom, reuse of the expr are 1553 // guaranteed to happen later. 1554 IRBuilder<> Builder(MemoryInst); 1555 1556 // Now that we determined the addressing expression we want to use and know 1557 // that we have to sink it into this block. Check to see if we have already 1558 // done this for some other load/store instr in this block. If so, reuse the 1559 // computation. 1560 Value *&SunkAddr = SunkAddrs[Addr]; 1561 if (SunkAddr) { 1562 DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " 1563 << *MemoryInst); 1564 if (SunkAddr->getType() != Addr->getType()) 1565 SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType()); 1566 } else { 1567 DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " 1568 << *MemoryInst); 1569 Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType()); 1570 Value *Result = 0; 1571 1572 // Start with the base register. Do this first so that subsequent address 1573 // matching finds it last, which will prevent it from trying to match it 1574 // as the scaled value in case it happens to be a mul. That would be 1575 // problematic if we've sunk a different mul for the scale, because then 1576 // we'd end up sinking both muls. 1577 if (AddrMode.BaseReg) { 1578 Value *V = AddrMode.BaseReg; 1579 if (V->getType()->isPointerTy()) 1580 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr"); 1581 if (V->getType() != IntPtrTy) 1582 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr"); 1583 Result = V; 1584 } 1585 1586 // Add the scale value. 1587 if (AddrMode.Scale) { 1588 Value *V = AddrMode.ScaledReg; 1589 if (V->getType() == IntPtrTy) { 1590 // done. 1591 } else if (V->getType()->isPointerTy()) { 1592 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr"); 1593 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() < 1594 cast<IntegerType>(V->getType())->getBitWidth()) { 1595 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); 1596 } else { 1597 V = Builder.CreateSExt(V, IntPtrTy, "sunkaddr"); 1598 } 1599 if (AddrMode.Scale != 1) 1600 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale), 1601 "sunkaddr"); 1602 if (Result) 1603 Result = Builder.CreateAdd(Result, V, "sunkaddr"); 1604 else 1605 Result = V; 1606 } 1607 1608 // Add in the BaseGV if present. 1609 if (AddrMode.BaseGV) { 1610 Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr"); 1611 if (Result) 1612 Result = Builder.CreateAdd(Result, V, "sunkaddr"); 1613 else 1614 Result = V; 1615 } 1616 1617 // Add in the Base Offset if present. 1618 if (AddrMode.BaseOffs) { 1619 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); 1620 if (Result) 1621 Result = Builder.CreateAdd(Result, V, "sunkaddr"); 1622 else 1623 Result = V; 1624 } 1625 1626 if (Result == 0) 1627 SunkAddr = Constant::getNullValue(Addr->getType()); 1628 else 1629 SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr"); 1630 } 1631 1632 MemoryInst->replaceUsesOfWith(Repl, SunkAddr); 1633 1634 // If we have no uses, recursively delete the value and all dead instructions 1635 // using it. 1636 if (Repl->use_empty()) { 1637 // This can cause recursive deletion, which can invalidate our iterator. 1638 // Use a WeakVH to hold onto it in case this happens. 1639 WeakVH IterHandle(CurInstIterator); 1640 BasicBlock *BB = CurInstIterator->getParent(); 1641 1642 RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo); 1643 1644 if (IterHandle != CurInstIterator) { 1645 // If the iterator instruction was recursively deleted, start over at the 1646 // start of the block. 1647 CurInstIterator = BB->begin(); 1648 SunkAddrs.clear(); 1649 } 1650 } 1651 ++NumMemoryInsts; 1652 return true; 1653} 1654 1655/// OptimizeInlineAsmInst - If there are any memory operands, use 1656/// OptimizeMemoryInst to sink their address computing into the block when 1657/// possible / profitable. 1658bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) { 1659 bool MadeChange = false; 1660 1661 TargetLowering::AsmOperandInfoVector 1662 TargetConstraints = TLI->ParseConstraints(CS); 1663 unsigned ArgNo = 0; 1664 for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { 1665 TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; 1666 1667 // Compute the constraint code and ConstraintType to use. 1668 TLI->ComputeConstraintToUse(OpInfo, SDValue()); 1669 1670 if (OpInfo.ConstraintType == TargetLowering::C_Memory && 1671 OpInfo.isIndirect) { 1672 Value *OpVal = CS->getArgOperand(ArgNo++); 1673 MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType()); 1674 } else if (OpInfo.Type == InlineAsm::isInput) 1675 ArgNo++; 1676 } 1677 1678 return MadeChange; 1679} 1680 1681/// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same 1682/// basic block as the load, unless conditions are unfavorable. This allows 1683/// SelectionDAG to fold the extend into the load. 1684/// 1685bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) { 1686 // Look for a load being extended. 1687 LoadInst *LI = dyn_cast<LoadInst>(I->getOperand(0)); 1688 if (!LI) return false; 1689 1690 // If they're already in the same block, there's nothing to do. 1691 if (LI->getParent() == I->getParent()) 1692 return false; 1693 1694 // If the load has other users and the truncate is not free, this probably 1695 // isn't worthwhile. 1696 if (!LI->hasOneUse() && 1697 TLI && (TLI->isTypeLegal(TLI->getValueType(LI->getType())) || 1698 !TLI->isTypeLegal(TLI->getValueType(I->getType()))) && 1699 !TLI->isTruncateFree(I->getType(), LI->getType())) 1700 return false; 1701 1702 // Check whether the target supports casts folded into loads. 1703 unsigned LType; 1704 if (isa<ZExtInst>(I)) 1705 LType = ISD::ZEXTLOAD; 1706 else { 1707 assert(isa<SExtInst>(I) && "Unexpected ext type!"); 1708 LType = ISD::SEXTLOAD; 1709 } 1710 if (TLI && !TLI->isLoadExtLegal(LType, TLI->getValueType(LI->getType()))) 1711 return false; 1712 1713 // Move the extend into the same block as the load, so that SelectionDAG 1714 // can fold it. 1715 I->removeFromParent(); 1716 I->insertAfter(LI); 1717 ++NumExtsMoved; 1718 return true; 1719} 1720 1721bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { 1722 BasicBlock *DefBB = I->getParent(); 1723 1724 // If the result of a {s|z}ext and its source are both live out, rewrite all 1725 // other uses of the source with result of extension. 1726 Value *Src = I->getOperand(0); 1727 if (Src->hasOneUse()) 1728 return false; 1729 1730 // Only do this xform if truncating is free. 1731 if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType())) 1732 return false; 1733 1734 // Only safe to perform the optimization if the source is also defined in 1735 // this block. 1736 if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent()) 1737 return false; 1738 1739 bool DefIsLiveOut = false; 1740 for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); 1741 UI != E; ++UI) { 1742 Instruction *User = cast<Instruction>(*UI); 1743 1744 // Figure out which BB this ext is used in. 1745 BasicBlock *UserBB = User->getParent(); 1746 if (UserBB == DefBB) continue; 1747 DefIsLiveOut = true; 1748 break; 1749 } 1750 if (!DefIsLiveOut) 1751 return false; 1752 1753 // Make sure none of the uses are PHI nodes. 1754 for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end(); 1755 UI != E; ++UI) { 1756 Instruction *User = cast<Instruction>(*UI); 1757 BasicBlock *UserBB = User->getParent(); 1758 if (UserBB == DefBB) continue; 1759 // Be conservative. We don't want this xform to end up introducing 1760 // reloads just before load / store instructions. 1761 if (isa<PHINode>(User) || isa<LoadInst>(User) || isa<StoreInst>(User)) 1762 return false; 1763 } 1764 1765 // InsertedTruncs - Only insert one trunc in each block once. 1766 DenseMap<BasicBlock*, Instruction*> InsertedTruncs; 1767 1768 bool MadeChange = false; 1769 for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end(); 1770 UI != E; ++UI) { 1771 Use &TheUse = UI.getUse(); 1772 Instruction *User = cast<Instruction>(*UI); 1773 1774 // Figure out which BB this ext is used in. 1775 BasicBlock *UserBB = User->getParent(); 1776 if (UserBB == DefBB) continue; 1777 1778 // Both src and def are live in this block. Rewrite the use. 1779 Instruction *&InsertedTrunc = InsertedTruncs[UserBB]; 1780 1781 if (!InsertedTrunc) { 1782 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 1783 InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt); 1784 } 1785 1786 // Replace a use of the {s|z}ext source with a use of the result. 1787 TheUse = InsertedTrunc; 1788 ++NumExtUses; 1789 MadeChange = true; 1790 } 1791 1792 return MadeChange; 1793} 1794 1795/// isFormingBranchFromSelectProfitable - Returns true if a SelectInst should be 1796/// turned into an explicit branch. 1797static bool isFormingBranchFromSelectProfitable(SelectInst *SI) { 1798 // FIXME: This should use the same heuristics as IfConversion to determine 1799 // whether a select is better represented as a branch. This requires that 1800 // branch probability metadata is preserved for the select, which is not the 1801 // case currently. 1802 1803 CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition()); 1804 1805 // If the branch is predicted right, an out of order CPU can avoid blocking on 1806 // the compare. Emit cmovs on compares with a memory operand as branches to 1807 // avoid stalls on the load from memory. If the compare has more than one use 1808 // there's probably another cmov or setcc around so it's not worth emitting a 1809 // branch. 1810 if (!Cmp) 1811 return false; 1812 1813 Value *CmpOp0 = Cmp->getOperand(0); 1814 Value *CmpOp1 = Cmp->getOperand(1); 1815 1816 // We check that the memory operand has one use to avoid uses of the loaded 1817 // value directly after the compare, making branches unprofitable. 1818 return Cmp->hasOneUse() && 1819 ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) || 1820 (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse())); 1821} 1822 1823 1824/// If we have a SelectInst that will likely profit from branch prediction, 1825/// turn it into a branch. 1826bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) { 1827 bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); 1828 1829 // Can we convert the 'select' to CF ? 1830 if (DisableSelectToBranch || OptSize || !TLI || VectorCond) 1831 return false; 1832 1833 TargetLowering::SelectSupportKind SelectKind; 1834 if (VectorCond) 1835 SelectKind = TargetLowering::VectorMaskSelect; 1836 else if (SI->getType()->isVectorTy()) 1837 SelectKind = TargetLowering::ScalarCondVectorVal; 1838 else 1839 SelectKind = TargetLowering::ScalarValSelect; 1840 1841 // Do we have efficient codegen support for this kind of 'selects' ? 1842 if (TLI->isSelectSupported(SelectKind)) { 1843 // We have efficient codegen support for the select instruction. 1844 // Check if it is profitable to keep this 'select'. 1845 if (!TLI->isPredictableSelectExpensive() || 1846 !isFormingBranchFromSelectProfitable(SI)) 1847 return false; 1848 } 1849 1850 ModifiedDT = true; 1851 1852 // First, we split the block containing the select into 2 blocks. 1853 BasicBlock *StartBlock = SI->getParent(); 1854 BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI)); 1855 BasicBlock *NextBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); 1856 1857 // Create a new block serving as the landing pad for the branch. 1858 BasicBlock *SmallBlock = BasicBlock::Create(SI->getContext(), "select.mid", 1859 NextBlock->getParent(), NextBlock); 1860 1861 // Move the unconditional branch from the block with the select in it into our 1862 // landing pad block. 1863 StartBlock->getTerminator()->eraseFromParent(); 1864 BranchInst::Create(NextBlock, SmallBlock); 1865 1866 // Insert the real conditional branch based on the original condition. 1867 BranchInst::Create(NextBlock, SmallBlock, SI->getCondition(), SI); 1868 1869 // The select itself is replaced with a PHI Node. 1870 PHINode *PN = PHINode::Create(SI->getType(), 2, "", NextBlock->begin()); 1871 PN->takeName(SI); 1872 PN->addIncoming(SI->getTrueValue(), StartBlock); 1873 PN->addIncoming(SI->getFalseValue(), SmallBlock); 1874 SI->replaceAllUsesWith(PN); 1875 SI->eraseFromParent(); 1876 1877 // Instruct OptimizeBlock to skip to the next block. 1878 CurInstIterator = StartBlock->end(); 1879 ++NumSelectsExpanded; 1880 return true; 1881} 1882 1883bool CodeGenPrepare::OptimizeInst(Instruction *I) { 1884 if (PHINode *P = dyn_cast<PHINode>(I)) { 1885 // It is possible for very late stage optimizations (such as SimplifyCFG) 1886 // to introduce PHI nodes too late to be cleaned up. If we detect such a 1887 // trivial PHI, go ahead and zap it here. 1888 if (Value *V = SimplifyInstruction(P, TLI ? TLI->getDataLayout() : 0, 1889 TLInfo, DT)) { 1890 P->replaceAllUsesWith(V); 1891 P->eraseFromParent(); 1892 ++NumPHIsElim; 1893 return true; 1894 } 1895 return false; 1896 } 1897 1898 if (CastInst *CI = dyn_cast<CastInst>(I)) { 1899 // If the source of the cast is a constant, then this should have 1900 // already been constant folded. The only reason NOT to constant fold 1901 // it is if something (e.g. LSR) was careful to place the constant 1902 // evaluation in a block other than then one that uses it (e.g. to hoist 1903 // the address of globals out of a loop). If this is the case, we don't 1904 // want to forward-subst the cast. 1905 if (isa<Constant>(CI->getOperand(0))) 1906 return false; 1907 1908 if (TLI && OptimizeNoopCopyExpression(CI, *TLI)) 1909 return true; 1910 1911 if (isa<ZExtInst>(I) || isa<SExtInst>(I)) { 1912 bool MadeChange = MoveExtToFormExtLoad(I); 1913 return MadeChange | OptimizeExtUses(I); 1914 } 1915 return false; 1916 } 1917 1918 if (CmpInst *CI = dyn_cast<CmpInst>(I)) 1919 return OptimizeCmpExpression(CI); 1920 1921 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 1922 if (TLI) 1923 return OptimizeMemoryInst(I, I->getOperand(0), LI->getType()); 1924 return false; 1925 } 1926 1927 if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 1928 if (TLI) 1929 return OptimizeMemoryInst(I, SI->getOperand(1), 1930 SI->getOperand(0)->getType()); 1931 return false; 1932 } 1933 1934 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { 1935 if (GEPI->hasAllZeroIndices()) { 1936 /// The GEP operand must be a pointer, so must its result -> BitCast 1937 Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(), 1938 GEPI->getName(), GEPI); 1939 GEPI->replaceAllUsesWith(NC); 1940 GEPI->eraseFromParent(); 1941 ++NumGEPsElim; 1942 OptimizeInst(NC); 1943 return true; 1944 } 1945 return false; 1946 } 1947 1948 if (CallInst *CI = dyn_cast<CallInst>(I)) 1949 return OptimizeCallInst(CI); 1950 1951 if (SelectInst *SI = dyn_cast<SelectInst>(I)) 1952 return OptimizeSelectInst(SI); 1953 1954 return false; 1955} 1956 1957// In this pass we look for GEP and cast instructions that are used 1958// across basic blocks and rewrite them to improve basic-block-at-a-time 1959// selection. 1960bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { 1961 SunkAddrs.clear(); 1962 bool MadeChange = false; 1963 1964 CurInstIterator = BB.begin(); 1965 while (CurInstIterator != BB.end()) 1966 MadeChange |= OptimizeInst(CurInstIterator++); 1967 1968 MadeChange |= DupRetToEnableTailCallOpts(&BB); 1969 1970 return MadeChange; 1971} 1972 1973// llvm.dbg.value is far away from the value then iSel may not be able 1974// handle it properly. iSel will drop llvm.dbg.value if it can not 1975// find a node corresponding to the value. 1976bool CodeGenPrepare::PlaceDbgValues(Function &F) { 1977 bool MadeChange = false; 1978 for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { 1979 Instruction *PrevNonDbgInst = NULL; 1980 for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) { 1981 Instruction *Insn = BI; ++BI; 1982 DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn); 1983 if (!DVI) { 1984 PrevNonDbgInst = Insn; 1985 continue; 1986 } 1987 1988 Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue()); 1989 if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) { 1990 DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI); 1991 DVI->removeFromParent(); 1992 if (isa<PHINode>(VI)) 1993 DVI->insertBefore(VI->getParent()->getFirstInsertionPt()); 1994 else 1995 DVI->insertAfter(VI); 1996 MadeChange = true; 1997 ++NumDbgValueMoved; 1998 } 1999 } 2000 } 2001 return MadeChange; 2002} 2003