1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass munges the code in the input function to better prepare it for 11// SelectionDAG-based code generation. This works around limitations in it's 12// basic-block-at-a-time approach. It should eventually be removed. 13// 14//===----------------------------------------------------------------------===// 15 16#define DEBUG_TYPE "codegenprepare" 17#include "llvm/Transforms/Scalar.h" 18#include "llvm/Constants.h" 19#include "llvm/DerivedTypes.h" 20#include "llvm/Function.h" 21#include "llvm/GlobalVariable.h" 22#include "llvm/IRBuilder.h" 23#include "llvm/InlineAsm.h" 24#include "llvm/Instructions.h" 25#include "llvm/IntrinsicInst.h" 26#include "llvm/Module.h" 27#include "llvm/Pass.h" 28#include "llvm/ADT/DenseMap.h" 29#include "llvm/ADT/SmallSet.h" 30#include "llvm/ADT/Statistic.h" 31#include "llvm/ADT/ValueMap.h" 32#include "llvm/Analysis/Dominators.h" 33#include "llvm/Analysis/InstructionSimplify.h" 34#include "llvm/Analysis/ProfileInfo.h" 35#include "llvm/Assembly/Writer.h" 36#include "llvm/Support/CallSite.h" 37#include "llvm/Support/CommandLine.h" 38#include "llvm/Support/Debug.h" 39#include "llvm/Support/GetElementPtrTypeIterator.h" 40#include "llvm/Support/PatternMatch.h" 41#include "llvm/Support/ValueHandle.h" 42#include "llvm/Support/raw_ostream.h" 43#include "llvm/Target/TargetData.h" 44#include "llvm/Target/TargetLibraryInfo.h" 45#include "llvm/Target/TargetLowering.h" 46#include "llvm/Transforms/Utils/AddrModeMatcher.h" 47#include "llvm/Transforms/Utils/BasicBlockUtils.h" 48#include "llvm/Transforms/Utils/BuildLibCalls.h" 49#include "llvm/Transforms/Utils/BypassSlowDivision.h" 50#include "llvm/Transforms/Utils/Local.h" 51using namespace llvm; 52using namespace llvm::PatternMatch; 53 54STATISTIC(NumBlocksElim, "Number of blocks eliminated"); 55STATISTIC(NumPHIsElim, "Number of trivial PHIs eliminated"); 56STATISTIC(NumGEPsElim, "Number of GEPs converted to casts"); 57STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of " 58 "sunken Cmps"); 59STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses " 60 "of sunken Casts"); 61STATISTIC(NumMemoryInsts, "Number of memory instructions whose address " 62 "computations were sunk"); 63STATISTIC(NumExtsMoved, "Number of [s|z]ext instructions combined with loads"); 64STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); 65STATISTIC(NumRetsDup, "Number of return instructions duplicated"); 66STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved"); 67STATISTIC(NumSelectsExpanded, "Number of selects turned into branches"); 68 69static cl::opt<bool> DisableBranchOpts( 70 "disable-cgp-branch-opts", cl::Hidden, cl::init(false), 71 cl::desc("Disable branch optimizations in CodeGenPrepare")); 72 73static cl::opt<bool> DisableSelectToBranch( 74 "disable-cgp-select2branch", cl::Hidden, cl::init(false), 75 cl::desc("Disable select to branch conversion.")); 76 77namespace { 78 class CodeGenPrepare : public FunctionPass { 79 /// TLI - Keep a pointer of a TargetLowering to consult for determining 80 /// transformation profitability. 81 const TargetLowering *TLI; 82 const TargetLibraryInfo *TLInfo; 83 DominatorTree *DT; 84 ProfileInfo *PFI; 85 86 /// CurInstIterator - As we scan instructions optimizing them, this is the 87 /// next instruction to optimize. Xforms that can invalidate this should 88 /// update it. 89 BasicBlock::iterator CurInstIterator; 90 91 /// Keeps track of non-local addresses that have been sunk into a block. 92 /// This allows us to avoid inserting duplicate code for blocks with 93 /// multiple load/stores of the same address. 94 ValueMap<Value*, Value*> SunkAddrs; 95 96 /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to 97 /// be updated. 98 bool ModifiedDT; 99 100 /// OptSize - True if optimizing for size. 101 bool OptSize; 102 103 public: 104 static char ID; // Pass identification, replacement for typeid 105 explicit CodeGenPrepare(const TargetLowering *tli = 0) 106 : FunctionPass(ID), TLI(tli) { 107 initializeCodeGenPreparePass(*PassRegistry::getPassRegistry()); 108 } 109 bool runOnFunction(Function &F); 110 111 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 112 AU.addPreserved<DominatorTree>(); 113 AU.addPreserved<ProfileInfo>(); 114 AU.addRequired<TargetLibraryInfo>(); 115 } 116 117 private: 118 bool EliminateFallThrough(Function &F); 119 bool EliminateMostlyEmptyBlocks(Function &F); 120 bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const; 121 void EliminateMostlyEmptyBlock(BasicBlock *BB); 122 bool OptimizeBlock(BasicBlock &BB); 123 bool OptimizeInst(Instruction *I); 124 bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy); 125 bool OptimizeInlineAsmInst(CallInst *CS); 126 bool OptimizeCallInst(CallInst *CI); 127 bool MoveExtToFormExtLoad(Instruction *I); 128 bool OptimizeExtUses(Instruction *I); 129 bool OptimizeSelectInst(SelectInst *SI); 130 bool DupRetToEnableTailCallOpts(ReturnInst *RI); 131 bool PlaceDbgValues(Function &F); 132 bool ConvertLoadToSwitch(LoadInst *LI); 133 }; 134} 135 136char CodeGenPrepare::ID = 0; 137INITIALIZE_PASS_BEGIN(CodeGenPrepare, "codegenprepare", 138 "Optimize for code generation", false, false) 139INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo) 140INITIALIZE_PASS_END(CodeGenPrepare, "codegenprepare", 141 "Optimize for code generation", false, false) 142 143FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) { 144 return new CodeGenPrepare(TLI); 145} 146 147bool CodeGenPrepare::runOnFunction(Function &F) { 148 bool EverMadeChange = false; 149 150 ModifiedDT = false; 151 TLInfo = &getAnalysis<TargetLibraryInfo>(); 152 DT = getAnalysisIfAvailable<DominatorTree>(); 153 PFI = getAnalysisIfAvailable<ProfileInfo>(); 154 OptSize = F.getFnAttributes().hasOptimizeForSizeAttr(); 155 156 /// This optimization identifies DIV instructions that can be 157 /// profitably bypassed and carried out with a shorter, faster divide. 158 if (TLI && TLI->isSlowDivBypassed()) { 159 const DenseMap<Type*, Type*> &BypassTypeMap = TLI->getBypassSlowDivTypes(); 160 for (Function::iterator I = F.begin(); I != F.end(); I++) 161 EverMadeChange |= bypassSlowDivision(F, I, BypassTypeMap); 162 } 163 164 // Eliminate blocks that contain only PHI nodes and an 165 // unconditional branch. 166 EverMadeChange |= EliminateMostlyEmptyBlocks(F); 167 168 // llvm.dbg.value is far away from the value then iSel may not be able 169 // handle it properly. iSel will drop llvm.dbg.value if it can not 170 // find a node corresponding to the value. 171 EverMadeChange |= PlaceDbgValues(F); 172 173 bool MadeChange = true; 174 while (MadeChange) { 175 MadeChange = false; 176 for (Function::iterator I = F.begin(); I != F.end(); ) { 177 BasicBlock *BB = I++; 178 MadeChange |= OptimizeBlock(*BB); 179 } 180 EverMadeChange |= MadeChange; 181 } 182 183 SunkAddrs.clear(); 184 185 if (!DisableBranchOpts) { 186 MadeChange = false; 187 SmallPtrSet<BasicBlock*, 8> WorkList; 188 SmallPtrSet<BasicBlock*, 8> LPadList; 189 SmallVector<BasicBlock*, 8> ReturnList; 190 for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { 191 SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB)); 192 if (BB->isLandingPad()) LPadList.insert(BB); 193 if (isa<ReturnInst>(BB->getTerminator())) ReturnList.push_back(BB); 194 MadeChange |= ConstantFoldTerminator(BB, true); 195 if (!MadeChange) continue; 196 197 for (SmallVectorImpl<BasicBlock*>::iterator 198 II = Successors.begin(), IE = Successors.end(); II != IE; ++II) 199 if (pred_begin(*II) == pred_end(*II)) 200 WorkList.insert(*II); 201 } 202 203 // Delete the dead blocks and any of their dead successors. 204 bool HadLPads = !LPadList.empty(); 205 while (!WorkList.empty()) { 206 BasicBlock *BB = *WorkList.begin(); 207 WorkList.erase(BB); 208 LPadList.erase(BB); 209 SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB)); 210 211 DeleteDeadBlock(BB); 212 213 for (SmallVectorImpl<BasicBlock*>::iterator 214 II = Successors.begin(), IE = Successors.end(); II != IE; ++II) 215 if (pred_begin(*II) == pred_end(*II)) 216 WorkList.insert(*II); 217 } 218 219 if (HadLPads && LPadList.empty()) { 220 // All of the landing pads were removed. Get rid of the SjLj EH context 221 // code. 222 Module *M = F.getParent(); 223 224 // These functions must exist if we have SjLj EH code to clean up. 225 Constant *RegisterFn = M->getFunction("_Unwind_SjLj_Register"); 226 Constant *UnregisterFn = M->getFunction("_Unwind_SjLj_Unregister"); 227 228 if (RegisterFn) { 229 Constant *LSDAAddrFn = 230 Intrinsic::getDeclaration(M, Intrinsic::eh_sjlj_lsda); 231 Constant *FrameAddrFn = 232 Intrinsic::getDeclaration(M, Intrinsic::frameaddress); 233 Constant *StackAddrFn = 234 Intrinsic::getDeclaration(M, Intrinsic::stacksave); 235 Constant *BuiltinSetjmpFn = 236 Intrinsic::getDeclaration(M, Intrinsic::eh_sjlj_setjmp); 237 Constant *FuncCtxFn = 238 Intrinsic::getDeclaration(M, Intrinsic::eh_sjlj_functioncontext); 239 240 BasicBlock &Entry = F.getEntryBlock(); 241 SmallVector<Instruction*, 8> DeadInsts; 242 for (BasicBlock::iterator I = Entry.begin(), E = Entry.end(); 243 I != E; ++I) { 244 if (CallInst *CI = dyn_cast<CallInst>(I)) { 245 Value *Callee = CI->getCalledValue(); 246 bool IsDead = true; 247 if (Callee != LSDAAddrFn && Callee != FrameAddrFn && 248 Callee != StackAddrFn && Callee != BuiltinSetjmpFn && 249 Callee != FuncCtxFn && Callee != RegisterFn) 250 IsDead = false; 251 252 if (IsDead) { 253 Type *Ty = CI->getType(); 254 if (!Ty->isVoidTy()) 255 CI->replaceAllUsesWith(UndefValue::get(Ty)); 256 DeadInsts.push_back(CI); 257 } 258 } 259 } 260 261 // Find and remove the unregister calls. 262 for (SmallVectorImpl<BasicBlock*>::iterator I = ReturnList.begin(), 263 E = ReturnList.end(); I != E; ++I) { 264 BasicBlock *BB = *I; 265 typedef BasicBlock::InstListType::reverse_iterator reverse_iterator; 266 267 for (reverse_iterator II = BB->getInstList().rbegin(), 268 IE = BB->getInstList().rend(); II != IE; ++II) { 269 if (CallInst *CI = dyn_cast<CallInst>(&*II)) { 270 Value *Callee = CI->getCalledValue(); 271 272 if (Callee == UnregisterFn) { 273 DeadInsts.push_back(CI); 274 break; 275 } 276 } 277 } 278 } 279 280 // Kill the dead instructions. 281 for (SmallVectorImpl<Instruction*>::iterator I = DeadInsts.begin(), 282 E = DeadInsts.end(); I != E; ++I) 283 (*I)->eraseFromParent(); 284 } 285 } 286 287 // Merge pairs of basic blocks with unconditional branches, connected by 288 // a single edge. 289 if (EverMadeChange || MadeChange) 290 MadeChange |= EliminateFallThrough(F); 291 292 if (MadeChange) 293 ModifiedDT = true; 294 EverMadeChange |= MadeChange; 295 } 296 297 if (ModifiedDT && DT) 298 DT->DT->recalculate(F); 299 300 return EverMadeChange; 301} 302 303/// EliminateFallThrough - Merge basic blocks which are connected 304/// by a single edge, where one of the basic blocks has a single successor 305/// pointing to the other basic block, which has a single predecessor. 306bool CodeGenPrepare::EliminateFallThrough(Function &F) { 307 bool Changed = false; 308 // Scan all of the blocks in the function, except for the entry block. 309 for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) { 310 BasicBlock *BB = I++; 311 // If the destination block has a single pred, then this is a trivial 312 // edge, just collapse it. 313 BasicBlock *SinglePred = BB->getSinglePredecessor(); 314 315 // Don't merge if BB's address is taken. 316 if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue; 317 318 BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator()); 319 if (Term && !Term->isConditional()) { 320 Changed = true; 321 DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n"); 322 // Remember if SinglePred was the entry block of the function. 323 // If so, we will need to move BB back to the entry position. 324 bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); 325 MergeBasicBlockIntoOnlyPred(BB, this); 326 327 if (isEntry && BB != &BB->getParent()->getEntryBlock()) 328 BB->moveBefore(&BB->getParent()->getEntryBlock()); 329 330 // We have erased a block. Update the iterator. 331 I = BB; 332 } 333 } 334 return Changed; 335} 336 337/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes, 338/// debug info directives, and an unconditional branch. Passes before isel 339/// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for 340/// isel. Start by eliminating these blocks so we can split them the way we 341/// want them. 342bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) { 343 bool MadeChange = false; 344 // Note that this intentionally skips the entry block. 345 for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) { 346 BasicBlock *BB = I++; 347 348 // If this block doesn't end with an uncond branch, ignore it. 349 BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()); 350 if (!BI || !BI->isUnconditional()) 351 continue; 352 353 // If the instruction before the branch (skipping debug info) isn't a phi 354 // node, then other stuff is happening here. 355 BasicBlock::iterator BBI = BI; 356 if (BBI != BB->begin()) { 357 --BBI; 358 while (isa<DbgInfoIntrinsic>(BBI)) { 359 if (BBI == BB->begin()) 360 break; 361 --BBI; 362 } 363 if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI)) 364 continue; 365 } 366 367 // Do not break infinite loops. 368 BasicBlock *DestBB = BI->getSuccessor(0); 369 if (DestBB == BB) 370 continue; 371 372 if (!CanMergeBlocks(BB, DestBB)) 373 continue; 374 375 EliminateMostlyEmptyBlock(BB); 376 MadeChange = true; 377 } 378 return MadeChange; 379} 380 381/// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a 382/// single uncond branch between them, and BB contains no other non-phi 383/// instructions. 384bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB, 385 const BasicBlock *DestBB) const { 386 // We only want to eliminate blocks whose phi nodes are used by phi nodes in 387 // the successor. If there are more complex condition (e.g. preheaders), 388 // don't mess around with them. 389 BasicBlock::const_iterator BBI = BB->begin(); 390 while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { 391 for (Value::const_use_iterator UI = PN->use_begin(), E = PN->use_end(); 392 UI != E; ++UI) { 393 const Instruction *User = cast<Instruction>(*UI); 394 if (User->getParent() != DestBB || !isa<PHINode>(User)) 395 return false; 396 // If User is inside DestBB block and it is a PHINode then check 397 // incoming value. If incoming value is not from BB then this is 398 // a complex condition (e.g. preheaders) we want to avoid here. 399 if (User->getParent() == DestBB) { 400 if (const PHINode *UPN = dyn_cast<PHINode>(User)) 401 for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) { 402 Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I)); 403 if (Insn && Insn->getParent() == BB && 404 Insn->getParent() != UPN->getIncomingBlock(I)) 405 return false; 406 } 407 } 408 } 409 } 410 411 // If BB and DestBB contain any common predecessors, then the phi nodes in BB 412 // and DestBB may have conflicting incoming values for the block. If so, we 413 // can't merge the block. 414 const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin()); 415 if (!DestBBPN) return true; // no conflict. 416 417 // Collect the preds of BB. 418 SmallPtrSet<const BasicBlock*, 16> BBPreds; 419 if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { 420 // It is faster to get preds from a PHI than with pred_iterator. 421 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) 422 BBPreds.insert(BBPN->getIncomingBlock(i)); 423 } else { 424 BBPreds.insert(pred_begin(BB), pred_end(BB)); 425 } 426 427 // Walk the preds of DestBB. 428 for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) { 429 BasicBlock *Pred = DestBBPN->getIncomingBlock(i); 430 if (BBPreds.count(Pred)) { // Common predecessor? 431 BBI = DestBB->begin(); 432 while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) { 433 const Value *V1 = PN->getIncomingValueForBlock(Pred); 434 const Value *V2 = PN->getIncomingValueForBlock(BB); 435 436 // If V2 is a phi node in BB, look up what the mapped value will be. 437 if (const PHINode *V2PN = dyn_cast<PHINode>(V2)) 438 if (V2PN->getParent() == BB) 439 V2 = V2PN->getIncomingValueForBlock(Pred); 440 441 // If there is a conflict, bail out. 442 if (V1 != V2) return false; 443 } 444 } 445 } 446 447 return true; 448} 449 450 451/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and 452/// an unconditional branch in it. 453void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) { 454 BranchInst *BI = cast<BranchInst>(BB->getTerminator()); 455 BasicBlock *DestBB = BI->getSuccessor(0); 456 457 DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB); 458 459 // If the destination block has a single pred, then this is a trivial edge, 460 // just collapse it. 461 if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) { 462 if (SinglePred != DestBB) { 463 // Remember if SinglePred was the entry block of the function. If so, we 464 // will need to move BB back to the entry position. 465 bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock(); 466 MergeBasicBlockIntoOnlyPred(DestBB, this); 467 468 if (isEntry && BB != &BB->getParent()->getEntryBlock()) 469 BB->moveBefore(&BB->getParent()->getEntryBlock()); 470 471 DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); 472 return; 473 } 474 } 475 476 // Otherwise, we have multiple predecessors of BB. Update the PHIs in DestBB 477 // to handle the new incoming edges it is about to have. 478 PHINode *PN; 479 for (BasicBlock::iterator BBI = DestBB->begin(); 480 (PN = dyn_cast<PHINode>(BBI)); ++BBI) { 481 // Remove the incoming value for BB, and remember it. 482 Value *InVal = PN->removeIncomingValue(BB, false); 483 484 // Two options: either the InVal is a phi node defined in BB or it is some 485 // value that dominates BB. 486 PHINode *InValPhi = dyn_cast<PHINode>(InVal); 487 if (InValPhi && InValPhi->getParent() == BB) { 488 // Add all of the input values of the input PHI as inputs of this phi. 489 for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i) 490 PN->addIncoming(InValPhi->getIncomingValue(i), 491 InValPhi->getIncomingBlock(i)); 492 } else { 493 // Otherwise, add one instance of the dominating value for each edge that 494 // we will be adding. 495 if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) { 496 for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i) 497 PN->addIncoming(InVal, BBPN->getIncomingBlock(i)); 498 } else { 499 for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) 500 PN->addIncoming(InVal, *PI); 501 } 502 } 503 } 504 505 // The PHIs are now updated, change everything that refers to BB to use 506 // DestBB and remove BB. 507 BB->replaceAllUsesWith(DestBB); 508 if (DT && !ModifiedDT) { 509 BasicBlock *BBIDom = DT->getNode(BB)->getIDom()->getBlock(); 510 BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock(); 511 BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom); 512 DT->changeImmediateDominator(DestBB, NewIDom); 513 DT->eraseNode(BB); 514 } 515 if (PFI) { 516 PFI->replaceAllUses(BB, DestBB); 517 PFI->removeEdge(ProfileInfo::getEdge(BB, DestBB)); 518 } 519 BB->eraseFromParent(); 520 ++NumBlocksElim; 521 522 DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n"); 523} 524 525/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop 526/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC), 527/// sink it into user blocks to reduce the number of virtual 528/// registers that must be created and coalesced. 529/// 530/// Return true if any changes are made. 531/// 532static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){ 533 // If this is a noop copy, 534 EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType()); 535 EVT DstVT = TLI.getValueType(CI->getType()); 536 537 // This is an fp<->int conversion? 538 if (SrcVT.isInteger() != DstVT.isInteger()) 539 return false; 540 541 // If this is an extension, it will be a zero or sign extension, which 542 // isn't a noop. 543 if (SrcVT.bitsLT(DstVT)) return false; 544 545 // If these values will be promoted, find out what they will be promoted 546 // to. This helps us consider truncates on PPC as noop copies when they 547 // are. 548 if (TLI.getTypeAction(CI->getContext(), SrcVT) == 549 TargetLowering::TypePromoteInteger) 550 SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT); 551 if (TLI.getTypeAction(CI->getContext(), DstVT) == 552 TargetLowering::TypePromoteInteger) 553 DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT); 554 555 // If, after promotion, these are the same types, this is a noop copy. 556 if (SrcVT != DstVT) 557 return false; 558 559 BasicBlock *DefBB = CI->getParent(); 560 561 /// InsertedCasts - Only insert a cast in each block once. 562 DenseMap<BasicBlock*, CastInst*> InsertedCasts; 563 564 bool MadeChange = false; 565 for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); 566 UI != E; ) { 567 Use &TheUse = UI.getUse(); 568 Instruction *User = cast<Instruction>(*UI); 569 570 // Figure out which BB this cast is used in. For PHI's this is the 571 // appropriate predecessor block. 572 BasicBlock *UserBB = User->getParent(); 573 if (PHINode *PN = dyn_cast<PHINode>(User)) { 574 UserBB = PN->getIncomingBlock(UI); 575 } 576 577 // Preincrement use iterator so we don't invalidate it. 578 ++UI; 579 580 // If this user is in the same block as the cast, don't change the cast. 581 if (UserBB == DefBB) continue; 582 583 // If we have already inserted a cast into this block, use it. 584 CastInst *&InsertedCast = InsertedCasts[UserBB]; 585 586 if (!InsertedCast) { 587 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 588 InsertedCast = 589 CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "", 590 InsertPt); 591 MadeChange = true; 592 } 593 594 // Replace a use of the cast with a use of the new cast. 595 TheUse = InsertedCast; 596 ++NumCastUses; 597 } 598 599 // If we removed all uses, nuke the cast. 600 if (CI->use_empty()) { 601 CI->eraseFromParent(); 602 MadeChange = true; 603 } 604 605 return MadeChange; 606} 607 608/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce 609/// the number of virtual registers that must be created and coalesced. This is 610/// a clear win except on targets with multiple condition code registers 611/// (PowerPC), where it might lose; some adjustment may be wanted there. 612/// 613/// Return true if any changes are made. 614static bool OptimizeCmpExpression(CmpInst *CI) { 615 BasicBlock *DefBB = CI->getParent(); 616 617 /// InsertedCmp - Only insert a cmp in each block once. 618 DenseMap<BasicBlock*, CmpInst*> InsertedCmps; 619 620 bool MadeChange = false; 621 for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); 622 UI != E; ) { 623 Use &TheUse = UI.getUse(); 624 Instruction *User = cast<Instruction>(*UI); 625 626 // Preincrement use iterator so we don't invalidate it. 627 ++UI; 628 629 // Don't bother for PHI nodes. 630 if (isa<PHINode>(User)) 631 continue; 632 633 // Figure out which BB this cmp is used in. 634 BasicBlock *UserBB = User->getParent(); 635 636 // If this user is in the same block as the cmp, don't change the cmp. 637 if (UserBB == DefBB) continue; 638 639 // If we have already inserted a cmp into this block, use it. 640 CmpInst *&InsertedCmp = InsertedCmps[UserBB]; 641 642 if (!InsertedCmp) { 643 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 644 InsertedCmp = 645 CmpInst::Create(CI->getOpcode(), 646 CI->getPredicate(), CI->getOperand(0), 647 CI->getOperand(1), "", InsertPt); 648 MadeChange = true; 649 } 650 651 // Replace a use of the cmp with a use of the new cmp. 652 TheUse = InsertedCmp; 653 ++NumCmpUses; 654 } 655 656 // If we removed all uses, nuke the cmp. 657 if (CI->use_empty()) 658 CI->eraseFromParent(); 659 660 return MadeChange; 661} 662 663namespace { 664class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls { 665protected: 666 void replaceCall(Value *With) { 667 CI->replaceAllUsesWith(With); 668 CI->eraseFromParent(); 669 } 670 bool isFoldable(unsigned SizeCIOp, unsigned, bool) const { 671 if (ConstantInt *SizeCI = 672 dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp))) 673 return SizeCI->isAllOnesValue(); 674 return false; 675 } 676}; 677} // end anonymous namespace 678 679bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) { 680 BasicBlock *BB = CI->getParent(); 681 682 // Lower inline assembly if we can. 683 // If we found an inline asm expession, and if the target knows how to 684 // lower it to normal LLVM code, do so now. 685 if (TLI && isa<InlineAsm>(CI->getCalledValue())) { 686 if (TLI->ExpandInlineAsm(CI)) { 687 // Avoid invalidating the iterator. 688 CurInstIterator = BB->begin(); 689 // Avoid processing instructions out of order, which could cause 690 // reuse before a value is defined. 691 SunkAddrs.clear(); 692 return true; 693 } 694 // Sink address computing for memory operands into the block. 695 if (OptimizeInlineAsmInst(CI)) 696 return true; 697 } 698 699 // Lower all uses of llvm.objectsize.* 700 IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI); 701 if (II && II->getIntrinsicID() == Intrinsic::objectsize) { 702 bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1); 703 Type *ReturnTy = CI->getType(); 704 Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL); 705 706 // Substituting this can cause recursive simplifications, which can 707 // invalidate our iterator. Use a WeakVH to hold onto it in case this 708 // happens. 709 WeakVH IterHandle(CurInstIterator); 710 711 replaceAndRecursivelySimplify(CI, RetVal, TLI ? TLI->getTargetData() : 0, 712 TLInfo, ModifiedDT ? 0 : DT); 713 714 // If the iterator instruction was recursively deleted, start over at the 715 // start of the block. 716 if (IterHandle != CurInstIterator) { 717 CurInstIterator = BB->begin(); 718 SunkAddrs.clear(); 719 } 720 return true; 721 } 722 723 if (II && TLI) { 724 SmallVector<Value*, 2> PtrOps; 725 Type *AccessTy; 726 if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy)) 727 while (!PtrOps.empty()) 728 if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy)) 729 return true; 730 } 731 732 // From here on out we're working with named functions. 733 if (CI->getCalledFunction() == 0) return false; 734 735 // We'll need TargetData from here on out. 736 const TargetData *TD = TLI ? TLI->getTargetData() : 0; 737 if (!TD) return false; 738 739 // Lower all default uses of _chk calls. This is very similar 740 // to what InstCombineCalls does, but here we are only lowering calls 741 // that have the default "don't know" as the objectsize. Anything else 742 // should be left alone. 743 CodeGenPrepareFortifiedLibCalls Simplifier; 744 return Simplifier.fold(CI, TD, TLInfo); 745} 746 747/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return 748/// instructions to the predecessor to enable tail call optimizations. The 749/// case it is currently looking for is: 750/// @code 751/// bb0: 752/// %tmp0 = tail call i32 @f0() 753/// br label %return 754/// bb1: 755/// %tmp1 = tail call i32 @f1() 756/// br label %return 757/// bb2: 758/// %tmp2 = tail call i32 @f2() 759/// br label %return 760/// return: 761/// %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ] 762/// ret i32 %retval 763/// @endcode 764/// 765/// => 766/// 767/// @code 768/// bb0: 769/// %tmp0 = tail call i32 @f0() 770/// ret i32 %tmp0 771/// bb1: 772/// %tmp1 = tail call i32 @f1() 773/// ret i32 %tmp1 774/// bb2: 775/// %tmp2 = tail call i32 @f2() 776/// ret i32 %tmp2 777/// @endcode 778bool CodeGenPrepare::DupRetToEnableTailCallOpts(ReturnInst *RI) { 779 if (!TLI) 780 return false; 781 782 PHINode *PN = 0; 783 BitCastInst *BCI = 0; 784 Value *V = RI->getReturnValue(); 785 if (V) { 786 BCI = dyn_cast<BitCastInst>(V); 787 if (BCI) 788 V = BCI->getOperand(0); 789 790 PN = dyn_cast<PHINode>(V); 791 if (!PN) 792 return false; 793 } 794 795 BasicBlock *BB = RI->getParent(); 796 if (PN && PN->getParent() != BB) 797 return false; 798 799 // It's not safe to eliminate the sign / zero extension of the return value. 800 // See llvm::isInTailCallPosition(). 801 const Function *F = BB->getParent(); 802 Attributes CallerRetAttr = F->getAttributes().getRetAttributes(); 803 if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt)) 804 return false; 805 806 // Make sure there are no instructions between the PHI and return, or that the 807 // return is the first instruction in the block. 808 if (PN) { 809 BasicBlock::iterator BI = BB->begin(); 810 do { ++BI; } while (isa<DbgInfoIntrinsic>(BI)); 811 if (&*BI == BCI) 812 // Also skip over the bitcast. 813 ++BI; 814 if (&*BI != RI) 815 return false; 816 } else { 817 BasicBlock::iterator BI = BB->begin(); 818 while (isa<DbgInfoIntrinsic>(BI)) ++BI; 819 if (&*BI != RI) 820 return false; 821 } 822 823 /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail 824 /// call. 825 SmallVector<CallInst*, 4> TailCalls; 826 if (PN) { 827 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) { 828 CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I)); 829 // Make sure the phi value is indeed produced by the tail call. 830 if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) && 831 TLI->mayBeEmittedAsTailCall(CI)) 832 TailCalls.push_back(CI); 833 } 834 } else { 835 SmallPtrSet<BasicBlock*, 4> VisitedBBs; 836 for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) { 837 if (!VisitedBBs.insert(*PI)) 838 continue; 839 840 BasicBlock::InstListType &InstList = (*PI)->getInstList(); 841 BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin(); 842 BasicBlock::InstListType::reverse_iterator RE = InstList.rend(); 843 do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI)); 844 if (RI == RE) 845 continue; 846 847 CallInst *CI = dyn_cast<CallInst>(&*RI); 848 if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI)) 849 TailCalls.push_back(CI); 850 } 851 } 852 853 bool Changed = false; 854 for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) { 855 CallInst *CI = TailCalls[i]; 856 CallSite CS(CI); 857 858 // Conservatively require the attributes of the call to match those of the 859 // return. Ignore noalias because it doesn't affect the call sequence. 860 Attributes CalleeRetAttr = CS.getAttributes().getRetAttributes(); 861 if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias) 862 continue; 863 864 // Make sure the call instruction is followed by an unconditional branch to 865 // the return block. 866 BasicBlock *CallBB = CI->getParent(); 867 BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator()); 868 if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB) 869 continue; 870 871 // Duplicate the return into CallBB. 872 (void)FoldReturnIntoUncondBranch(RI, BB, CallBB); 873 ModifiedDT = Changed = true; 874 ++NumRetsDup; 875 } 876 877 // If we eliminated all predecessors of the block, delete the block now. 878 if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB)) 879 BB->eraseFromParent(); 880 881 return Changed; 882} 883 884//===----------------------------------------------------------------------===// 885// Memory Optimization 886//===----------------------------------------------------------------------===// 887 888/// IsNonLocalValue - Return true if the specified values are defined in a 889/// different basic block than BB. 890static bool IsNonLocalValue(Value *V, BasicBlock *BB) { 891 if (Instruction *I = dyn_cast<Instruction>(V)) 892 return I->getParent() != BB; 893 return false; 894} 895 896/// OptimizeMemoryInst - Load and Store Instructions often have 897/// addressing modes that can do significant amounts of computation. As such, 898/// instruction selection will try to get the load or store to do as much 899/// computation as possible for the program. The problem is that isel can only 900/// see within a single block. As such, we sink as much legal addressing mode 901/// stuff into the block as possible. 902/// 903/// This method is used to optimize both load/store and inline asms with memory 904/// operands. 905bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr, 906 Type *AccessTy) { 907 Value *Repl = Addr; 908 909 // Try to collapse single-value PHI nodes. This is necessary to undo 910 // unprofitable PRE transformations. 911 SmallVector<Value*, 8> worklist; 912 SmallPtrSet<Value*, 16> Visited; 913 worklist.push_back(Addr); 914 915 // Use a worklist to iteratively look through PHI nodes, and ensure that 916 // the addressing mode obtained from the non-PHI roots of the graph 917 // are equivalent. 918 Value *Consensus = 0; 919 unsigned NumUsesConsensus = 0; 920 bool IsNumUsesConsensusValid = false; 921 SmallVector<Instruction*, 16> AddrModeInsts; 922 ExtAddrMode AddrMode; 923 while (!worklist.empty()) { 924 Value *V = worklist.back(); 925 worklist.pop_back(); 926 927 // Break use-def graph loops. 928 if (!Visited.insert(V)) { 929 Consensus = 0; 930 break; 931 } 932 933 // For a PHI node, push all of its incoming values. 934 if (PHINode *P = dyn_cast<PHINode>(V)) { 935 for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i) 936 worklist.push_back(P->getIncomingValue(i)); 937 continue; 938 } 939 940 // For non-PHIs, determine the addressing mode being computed. 941 SmallVector<Instruction*, 16> NewAddrModeInsts; 942 ExtAddrMode NewAddrMode = 943 AddressingModeMatcher::Match(V, AccessTy, MemoryInst, 944 NewAddrModeInsts, *TLI); 945 946 // This check is broken into two cases with very similar code to avoid using 947 // getNumUses() as much as possible. Some values have a lot of uses, so 948 // calling getNumUses() unconditionally caused a significant compile-time 949 // regression. 950 if (!Consensus) { 951 Consensus = V; 952 AddrMode = NewAddrMode; 953 AddrModeInsts = NewAddrModeInsts; 954 continue; 955 } else if (NewAddrMode == AddrMode) { 956 if (!IsNumUsesConsensusValid) { 957 NumUsesConsensus = Consensus->getNumUses(); 958 IsNumUsesConsensusValid = true; 959 } 960 961 // Ensure that the obtained addressing mode is equivalent to that obtained 962 // for all other roots of the PHI traversal. Also, when choosing one 963 // such root as representative, select the one with the most uses in order 964 // to keep the cost modeling heuristics in AddressingModeMatcher 965 // applicable. 966 unsigned NumUses = V->getNumUses(); 967 if (NumUses > NumUsesConsensus) { 968 Consensus = V; 969 NumUsesConsensus = NumUses; 970 AddrModeInsts = NewAddrModeInsts; 971 } 972 continue; 973 } 974 975 Consensus = 0; 976 break; 977 } 978 979 // If the addressing mode couldn't be determined, or if multiple different 980 // ones were determined, bail out now. 981 if (!Consensus) return false; 982 983 // Check to see if any of the instructions supersumed by this addr mode are 984 // non-local to I's BB. 985 bool AnyNonLocal = false; 986 for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) { 987 if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) { 988 AnyNonLocal = true; 989 break; 990 } 991 } 992 993 // If all the instructions matched are already in this BB, don't do anything. 994 if (!AnyNonLocal) { 995 DEBUG(dbgs() << "CGP: Found local addrmode: " << AddrMode << "\n"); 996 return false; 997 } 998 999 // Insert this computation right after this user. Since our caller is 1000 // scanning from the top of the BB to the bottom, reuse of the expr are 1001 // guaranteed to happen later. 1002 IRBuilder<> Builder(MemoryInst); 1003 1004 // Now that we determined the addressing expression we want to use and know 1005 // that we have to sink it into this block. Check to see if we have already 1006 // done this for some other load/store instr in this block. If so, reuse the 1007 // computation. 1008 Value *&SunkAddr = SunkAddrs[Addr]; 1009 if (SunkAddr) { 1010 DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for " 1011 << *MemoryInst); 1012 if (SunkAddr->getType() != Addr->getType()) 1013 SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType()); 1014 } else { 1015 DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for " 1016 << *MemoryInst); 1017 Type *IntPtrTy = 1018 TLI->getTargetData()->getIntPtrType(AccessTy->getContext()); 1019 1020 Value *Result = 0; 1021 1022 // Start with the base register. Do this first so that subsequent address 1023 // matching finds it last, which will prevent it from trying to match it 1024 // as the scaled value in case it happens to be a mul. That would be 1025 // problematic if we've sunk a different mul for the scale, because then 1026 // we'd end up sinking both muls. 1027 if (AddrMode.BaseReg) { 1028 Value *V = AddrMode.BaseReg; 1029 if (V->getType()->isPointerTy()) 1030 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr"); 1031 if (V->getType() != IntPtrTy) 1032 V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr"); 1033 Result = V; 1034 } 1035 1036 // Add the scale value. 1037 if (AddrMode.Scale) { 1038 Value *V = AddrMode.ScaledReg; 1039 if (V->getType() == IntPtrTy) { 1040 // done. 1041 } else if (V->getType()->isPointerTy()) { 1042 V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr"); 1043 } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() < 1044 cast<IntegerType>(V->getType())->getBitWidth()) { 1045 V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr"); 1046 } else { 1047 V = Builder.CreateSExt(V, IntPtrTy, "sunkaddr"); 1048 } 1049 if (AddrMode.Scale != 1) 1050 V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale), 1051 "sunkaddr"); 1052 if (Result) 1053 Result = Builder.CreateAdd(Result, V, "sunkaddr"); 1054 else 1055 Result = V; 1056 } 1057 1058 // Add in the BaseGV if present. 1059 if (AddrMode.BaseGV) { 1060 Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr"); 1061 if (Result) 1062 Result = Builder.CreateAdd(Result, V, "sunkaddr"); 1063 else 1064 Result = V; 1065 } 1066 1067 // Add in the Base Offset if present. 1068 if (AddrMode.BaseOffs) { 1069 Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs); 1070 if (Result) 1071 Result = Builder.CreateAdd(Result, V, "sunkaddr"); 1072 else 1073 Result = V; 1074 } 1075 1076 if (Result == 0) 1077 SunkAddr = Constant::getNullValue(Addr->getType()); 1078 else 1079 SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr"); 1080 } 1081 1082 MemoryInst->replaceUsesOfWith(Repl, SunkAddr); 1083 1084 // If we have no uses, recursively delete the value and all dead instructions 1085 // using it. 1086 if (Repl->use_empty()) { 1087 // This can cause recursive deletion, which can invalidate our iterator. 1088 // Use a WeakVH to hold onto it in case this happens. 1089 WeakVH IterHandle(CurInstIterator); 1090 BasicBlock *BB = CurInstIterator->getParent(); 1091 1092 RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo); 1093 1094 if (IterHandle != CurInstIterator) { 1095 // If the iterator instruction was recursively deleted, start over at the 1096 // start of the block. 1097 CurInstIterator = BB->begin(); 1098 SunkAddrs.clear(); 1099 } 1100 } 1101 ++NumMemoryInsts; 1102 return true; 1103} 1104 1105/// OptimizeInlineAsmInst - If there are any memory operands, use 1106/// OptimizeMemoryInst to sink their address computing into the block when 1107/// possible / profitable. 1108bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) { 1109 bool MadeChange = false; 1110 1111 TargetLowering::AsmOperandInfoVector 1112 TargetConstraints = TLI->ParseConstraints(CS); 1113 unsigned ArgNo = 0; 1114 for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) { 1115 TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; 1116 1117 // Compute the constraint code and ConstraintType to use. 1118 TLI->ComputeConstraintToUse(OpInfo, SDValue()); 1119 1120 if (OpInfo.ConstraintType == TargetLowering::C_Memory && 1121 OpInfo.isIndirect) { 1122 Value *OpVal = CS->getArgOperand(ArgNo++); 1123 MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType()); 1124 } else if (OpInfo.Type == InlineAsm::isInput) 1125 ArgNo++; 1126 } 1127 1128 return MadeChange; 1129} 1130 1131/// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same 1132/// basic block as the load, unless conditions are unfavorable. This allows 1133/// SelectionDAG to fold the extend into the load. 1134/// 1135bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) { 1136 // Look for a load being extended. 1137 LoadInst *LI = dyn_cast<LoadInst>(I->getOperand(0)); 1138 if (!LI) return false; 1139 1140 // If they're already in the same block, there's nothing to do. 1141 if (LI->getParent() == I->getParent()) 1142 return false; 1143 1144 // If the load has other users and the truncate is not free, this probably 1145 // isn't worthwhile. 1146 if (!LI->hasOneUse() && 1147 TLI && (TLI->isTypeLegal(TLI->getValueType(LI->getType())) || 1148 !TLI->isTypeLegal(TLI->getValueType(I->getType()))) && 1149 !TLI->isTruncateFree(I->getType(), LI->getType())) 1150 return false; 1151 1152 // Check whether the target supports casts folded into loads. 1153 unsigned LType; 1154 if (isa<ZExtInst>(I)) 1155 LType = ISD::ZEXTLOAD; 1156 else { 1157 assert(isa<SExtInst>(I) && "Unexpected ext type!"); 1158 LType = ISD::SEXTLOAD; 1159 } 1160 if (TLI && !TLI->isLoadExtLegal(LType, TLI->getValueType(LI->getType()))) 1161 return false; 1162 1163 // Move the extend into the same block as the load, so that SelectionDAG 1164 // can fold it. 1165 I->removeFromParent(); 1166 I->insertAfter(LI); 1167 ++NumExtsMoved; 1168 return true; 1169} 1170 1171bool CodeGenPrepare::OptimizeExtUses(Instruction *I) { 1172 BasicBlock *DefBB = I->getParent(); 1173 1174 // If the result of a {s|z}ext and its source are both live out, rewrite all 1175 // other uses of the source with result of extension. 1176 Value *Src = I->getOperand(0); 1177 if (Src->hasOneUse()) 1178 return false; 1179 1180 // Only do this xform if truncating is free. 1181 if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType())) 1182 return false; 1183 1184 // Only safe to perform the optimization if the source is also defined in 1185 // this block. 1186 if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent()) 1187 return false; 1188 1189 bool DefIsLiveOut = false; 1190 for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); 1191 UI != E; ++UI) { 1192 Instruction *User = cast<Instruction>(*UI); 1193 1194 // Figure out which BB this ext is used in. 1195 BasicBlock *UserBB = User->getParent(); 1196 if (UserBB == DefBB) continue; 1197 DefIsLiveOut = true; 1198 break; 1199 } 1200 if (!DefIsLiveOut) 1201 return false; 1202 1203 // Make sure non of the uses are PHI nodes. 1204 for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end(); 1205 UI != E; ++UI) { 1206 Instruction *User = cast<Instruction>(*UI); 1207 BasicBlock *UserBB = User->getParent(); 1208 if (UserBB == DefBB) continue; 1209 // Be conservative. We don't want this xform to end up introducing 1210 // reloads just before load / store instructions. 1211 if (isa<PHINode>(User) || isa<LoadInst>(User) || isa<StoreInst>(User)) 1212 return false; 1213 } 1214 1215 // InsertedTruncs - Only insert one trunc in each block once. 1216 DenseMap<BasicBlock*, Instruction*> InsertedTruncs; 1217 1218 bool MadeChange = false; 1219 for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end(); 1220 UI != E; ++UI) { 1221 Use &TheUse = UI.getUse(); 1222 Instruction *User = cast<Instruction>(*UI); 1223 1224 // Figure out which BB this ext is used in. 1225 BasicBlock *UserBB = User->getParent(); 1226 if (UserBB == DefBB) continue; 1227 1228 // Both src and def are live in this block. Rewrite the use. 1229 Instruction *&InsertedTrunc = InsertedTruncs[UserBB]; 1230 1231 if (!InsertedTrunc) { 1232 BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt(); 1233 InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt); 1234 } 1235 1236 // Replace a use of the {s|z}ext source with a use of the result. 1237 TheUse = InsertedTrunc; 1238 ++NumExtUses; 1239 MadeChange = true; 1240 } 1241 1242 return MadeChange; 1243} 1244 1245/// isFormingBranchFromSelectProfitable - Returns true if a SelectInst should be 1246/// turned into an explicit branch. 1247static bool isFormingBranchFromSelectProfitable(SelectInst *SI) { 1248 // FIXME: This should use the same heuristics as IfConversion to determine 1249 // whether a select is better represented as a branch. This requires that 1250 // branch probability metadata is preserved for the select, which is not the 1251 // case currently. 1252 1253 CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition()); 1254 1255 // If the branch is predicted right, an out of order CPU can avoid blocking on 1256 // the compare. Emit cmovs on compares with a memory operand as branches to 1257 // avoid stalls on the load from memory. If the compare has more than one use 1258 // there's probably another cmov or setcc around so it's not worth emitting a 1259 // branch. 1260 if (!Cmp) 1261 return false; 1262 1263 Value *CmpOp0 = Cmp->getOperand(0); 1264 Value *CmpOp1 = Cmp->getOperand(1); 1265 1266 // We check that the memory operand has one use to avoid uses of the loaded 1267 // value directly after the compare, making branches unprofitable. 1268 return Cmp->hasOneUse() && 1269 ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) || 1270 (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse())); 1271} 1272 1273 1274/// If we have a SelectInst that will likely profit from branch prediction, 1275/// turn it into a branch. 1276bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) { 1277 bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1); 1278 1279 // Can we convert the 'select' to CF ? 1280 if (DisableSelectToBranch || OptSize || !TLI || VectorCond) 1281 return false; 1282 1283 TargetLowering::SelectSupportKind SelectKind; 1284 if (VectorCond) 1285 SelectKind = TargetLowering::VectorMaskSelect; 1286 else if (SI->getType()->isVectorTy()) 1287 SelectKind = TargetLowering::ScalarCondVectorVal; 1288 else 1289 SelectKind = TargetLowering::ScalarValSelect; 1290 1291 // Do we have efficient codegen support for this kind of 'selects' ? 1292 if (TLI->isSelectSupported(SelectKind)) { 1293 // We have efficient codegen support for the select instruction. 1294 // Check if it is profitable to keep this 'select'. 1295 if (!TLI->isPredictableSelectExpensive() || 1296 !isFormingBranchFromSelectProfitable(SI)) 1297 return false; 1298 } 1299 1300 ModifiedDT = true; 1301 1302 // First, we split the block containing the select into 2 blocks. 1303 BasicBlock *StartBlock = SI->getParent(); 1304 BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI)); 1305 BasicBlock *NextBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); 1306 1307 // Create a new block serving as the landing pad for the branch. 1308 BasicBlock *SmallBlock = BasicBlock::Create(SI->getContext(), "select.mid", 1309 NextBlock->getParent(), NextBlock); 1310 1311 // Move the unconditional branch from the block with the select in it into our 1312 // landing pad block. 1313 StartBlock->getTerminator()->eraseFromParent(); 1314 BranchInst::Create(NextBlock, SmallBlock); 1315 1316 // Insert the real conditional branch based on the original condition. 1317 BranchInst::Create(NextBlock, SmallBlock, SI->getCondition(), SI); 1318 1319 // The select itself is replaced with a PHI Node. 1320 PHINode *PN = PHINode::Create(SI->getType(), 2, "", NextBlock->begin()); 1321 PN->takeName(SI); 1322 PN->addIncoming(SI->getTrueValue(), StartBlock); 1323 PN->addIncoming(SI->getFalseValue(), SmallBlock); 1324 SI->replaceAllUsesWith(PN); 1325 SI->eraseFromParent(); 1326 1327 // Instruct OptimizeBlock to skip to the next block. 1328 CurInstIterator = StartBlock->end(); 1329 ++NumSelectsExpanded; 1330 return true; 1331} 1332 1333bool CodeGenPrepare::OptimizeInst(Instruction *I) { 1334 if (PHINode *P = dyn_cast<PHINode>(I)) { 1335 // It is possible for very late stage optimizations (such as SimplifyCFG) 1336 // to introduce PHI nodes too late to be cleaned up. If we detect such a 1337 // trivial PHI, go ahead and zap it here. 1338 if (Value *V = SimplifyInstruction(P)) { 1339 P->replaceAllUsesWith(V); 1340 P->eraseFromParent(); 1341 ++NumPHIsElim; 1342 return true; 1343 } 1344 return false; 1345 } 1346 1347 if (CastInst *CI = dyn_cast<CastInst>(I)) { 1348 // If the source of the cast is a constant, then this should have 1349 // already been constant folded. The only reason NOT to constant fold 1350 // it is if something (e.g. LSR) was careful to place the constant 1351 // evaluation in a block other than then one that uses it (e.g. to hoist 1352 // the address of globals out of a loop). If this is the case, we don't 1353 // want to forward-subst the cast. 1354 if (isa<Constant>(CI->getOperand(0))) 1355 return false; 1356 1357 if (TLI && OptimizeNoopCopyExpression(CI, *TLI)) 1358 return true; 1359 1360 if (isa<ZExtInst>(I) || isa<SExtInst>(I)) { 1361 bool MadeChange = MoveExtToFormExtLoad(I); 1362 return MadeChange | OptimizeExtUses(I); 1363 } 1364 return false; 1365 } 1366 1367 if (CmpInst *CI = dyn_cast<CmpInst>(I)) 1368 return OptimizeCmpExpression(CI); 1369 1370 if (LoadInst *LI = dyn_cast<LoadInst>(I)) { 1371 bool Changed = false; 1372 if (TLI) 1373 Changed |= OptimizeMemoryInst(I, I->getOperand(0), LI->getType()); 1374 Changed |= ConvertLoadToSwitch(LI); 1375 return Changed; 1376 } 1377 1378 if (StoreInst *SI = dyn_cast<StoreInst>(I)) { 1379 if (TLI) 1380 return OptimizeMemoryInst(I, SI->getOperand(1), 1381 SI->getOperand(0)->getType()); 1382 return false; 1383 } 1384 1385 if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) { 1386 if (GEPI->hasAllZeroIndices()) { 1387 /// The GEP operand must be a pointer, so must its result -> BitCast 1388 Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(), 1389 GEPI->getName(), GEPI); 1390 GEPI->replaceAllUsesWith(NC); 1391 GEPI->eraseFromParent(); 1392 ++NumGEPsElim; 1393 OptimizeInst(NC); 1394 return true; 1395 } 1396 return false; 1397 } 1398 1399 if (CallInst *CI = dyn_cast<CallInst>(I)) 1400 return OptimizeCallInst(CI); 1401 1402 if (ReturnInst *RI = dyn_cast<ReturnInst>(I)) 1403 return DupRetToEnableTailCallOpts(RI); 1404 1405 if (SelectInst *SI = dyn_cast<SelectInst>(I)) 1406 return OptimizeSelectInst(SI); 1407 1408 return false; 1409} 1410 1411// In this pass we look for GEP and cast instructions that are used 1412// across basic blocks and rewrite them to improve basic-block-at-a-time 1413// selection. 1414bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) { 1415 SunkAddrs.clear(); 1416 bool MadeChange = false; 1417 1418 CurInstIterator = BB.begin(); 1419 while (CurInstIterator != BB.end()) 1420 MadeChange |= OptimizeInst(CurInstIterator++); 1421 1422 return MadeChange; 1423} 1424 1425// llvm.dbg.value is far away from the value then iSel may not be able 1426// handle it properly. iSel will drop llvm.dbg.value if it can not 1427// find a node corresponding to the value. 1428bool CodeGenPrepare::PlaceDbgValues(Function &F) { 1429 bool MadeChange = false; 1430 for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { 1431 Instruction *PrevNonDbgInst = NULL; 1432 for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) { 1433 Instruction *Insn = BI; ++BI; 1434 DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn); 1435 if (!DVI) { 1436 PrevNonDbgInst = Insn; 1437 continue; 1438 } 1439 1440 Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue()); 1441 if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) { 1442 DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI); 1443 DVI->removeFromParent(); 1444 if (isa<PHINode>(VI)) 1445 DVI->insertBefore(VI->getParent()->getFirstInsertionPt()); 1446 else 1447 DVI->insertAfter(VI); 1448 MadeChange = true; 1449 ++NumDbgValueMoved; 1450 } 1451 } 1452 } 1453 return MadeChange; 1454} 1455 1456static bool TargetSupportsJumpTables(const TargetLowering &TLI) { 1457 return TLI.supportJumpTables() && 1458 (TLI.isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || 1459 TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); 1460} 1461 1462/// ConvertLoadToSwitch - Convert loads from constant lookup tables into 1463/// switches. This undos the switch-to-lookup table transformation in 1464/// SimplifyCFG for targets where that is inprofitable. 1465bool CodeGenPrepare::ConvertLoadToSwitch(LoadInst *LI) { 1466 // This only applies to targets that don't support jump tables. 1467 if (!TLI || TargetSupportsJumpTables(*TLI)) 1468 return false; 1469 1470 // FIXME: In the future, it would be desirable to have enough target 1471 // information in SimplifyCFG, so we could decide at that stage whether to 1472 // transform the switch to a lookup table or not, and this 1473 // reverse-transformation could be removed. 1474 1475 GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getPointerOperand()); 1476 if (!GEP || !GEP->isInBounds() || GEP->getPointerAddressSpace()) 1477 return false; 1478 if (GEP->getNumIndices() != 2) 1479 return false; 1480 Value *FirstIndex = GEP->idx_begin()[0]; 1481 ConstantInt *FirstIndexInt = dyn_cast<ConstantInt>(FirstIndex); 1482 if (!FirstIndexInt || !FirstIndexInt->isZero()) 1483 return false; 1484 1485 Value *TableIndex = GEP->idx_begin()[1]; 1486 IntegerType *TableIndexTy = cast<IntegerType>(TableIndex->getType()); 1487 1488 GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getPointerOperand()); 1489 if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer()) 1490 return false; 1491 1492 Constant *Arr = GV->getInitializer(); 1493 uint64_t NumElements; 1494 if (ConstantArray *CA = dyn_cast<ConstantArray>(Arr)) 1495 NumElements = CA->getType()->getNumElements(); 1496 else if (ConstantDataArray *CDA = dyn_cast<ConstantDataArray>(Arr)) 1497 NumElements = CDA->getNumElements(); 1498 else 1499 return false; 1500 if (NumElements < 2) 1501 return false; 1502 1503 // Split the block. 1504 BasicBlock *OriginalBB = LI->getParent(); 1505 BasicBlock *PostSwitchBB = OriginalBB->splitBasicBlock(LI); 1506 1507 // Replace OriginalBB's terminator with a switch. 1508 IRBuilder<> Builder(OriginalBB->getTerminator()); 1509 SwitchInst *Switch = Builder.CreateSwitch(TableIndex, PostSwitchBB, 1510 NumElements - 1); 1511 OriginalBB->getTerminator()->eraseFromParent(); 1512 1513 // Count the frequency of each value to decide which to use as default. 1514 SmallDenseMap<Constant*, uint64_t> ValueFreq; 1515 for (uint64_t I = 0; I < NumElements; ++I) 1516 ++ValueFreq[Arr->getAggregateElement(I)]; 1517 uint64_t MaxCount = 0; 1518 Constant *DefaultValue = NULL; 1519 for (SmallDenseMap<Constant*, uint64_t>::iterator I = ValueFreq.begin(), 1520 E = ValueFreq.end(); I != E; ++I) { 1521 if (I->second > MaxCount) { 1522 MaxCount = I->second; 1523 DefaultValue = I->first; 1524 } 1525 } 1526 assert(DefaultValue && "No values in the array?"); 1527 1528 // Create the phi node in PostSwitchBB, which will replace the load. 1529 Builder.SetInsertPoint(PostSwitchBB->begin()); 1530 PHINode *PHI = Builder.CreatePHI(LI->getType(), NumElements); 1531 PHI->addIncoming(DefaultValue, OriginalBB); 1532 1533 // Build basic blocks to target with the switch. 1534 for (uint64_t I = 0; I < NumElements; ++I) { 1535 Constant *C = Arr->getAggregateElement(I); 1536 if (C == DefaultValue) continue; // Already covered by the default case. 1537 1538 BasicBlock *BB = BasicBlock::Create(PostSwitchBB->getContext(), 1539 "lookup.bb", 1540 PostSwitchBB->getParent(), 1541 PostSwitchBB); 1542 Switch->addCase(ConstantInt::get(TableIndexTy, I), BB); 1543 Builder.SetInsertPoint(BB); 1544 Builder.CreateBr(PostSwitchBB); 1545 PHI->addIncoming(C, BB); 1546 } 1547 1548 // Remove the load. 1549 LI->replaceAllUsesWith(PHI); 1550 LI->eraseFromParent(); 1551 1552 // Clean up. 1553 if (GEP->use_empty()) 1554 GEP->eraseFromParent(); 1555 if (GV->hasUnnamedAddr() && GV->hasPrivateLinkage() && GV->use_empty()) 1556 GV->eraseFromParent(); 1557 1558 CurInstIterator = Switch; 1559 return true; 1560} 1561