1//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass munges the code in the input function to better prepare it for
11// SelectionDAG-based code generation. This works around limitations in it's
12// basic-block-at-a-time approach. It should eventually be removed.
13//
14//===----------------------------------------------------------------------===//
15
16#define DEBUG_TYPE "codegenprepare"
17#include "llvm/Transforms/Scalar.h"
18#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/SmallSet.h"
20#include "llvm/ADT/Statistic.h"
21#include "llvm/ADT/ValueMap.h"
22#include "llvm/Analysis/DominatorInternals.h"
23#include "llvm/Analysis/Dominators.h"
24#include "llvm/Analysis/InstructionSimplify.h"
25#include "llvm/Assembly/Writer.h"
26#include "llvm/IR/Constants.h"
27#include "llvm/IR/DataLayout.h"
28#include "llvm/IR/DerivedTypes.h"
29#include "llvm/IR/Function.h"
30#include "llvm/IR/IRBuilder.h"
31#include "llvm/IR/InlineAsm.h"
32#include "llvm/IR/Instructions.h"
33#include "llvm/IR/IntrinsicInst.h"
34#include "llvm/Pass.h"
35#include "llvm/Support/CallSite.h"
36#include "llvm/Support/CommandLine.h"
37#include "llvm/Support/Debug.h"
38#include "llvm/Support/GetElementPtrTypeIterator.h"
39#include "llvm/Support/PatternMatch.h"
40#include "llvm/Support/ValueHandle.h"
41#include "llvm/Support/raw_ostream.h"
42#include "llvm/Target/TargetLibraryInfo.h"
43#include "llvm/Target/TargetLowering.h"
44#include "llvm/Transforms/Utils/BasicBlockUtils.h"
45#include "llvm/Transforms/Utils/BuildLibCalls.h"
46#include "llvm/Transforms/Utils/BypassSlowDivision.h"
47#include "llvm/Transforms/Utils/Local.h"
48using namespace llvm;
49using namespace llvm::PatternMatch;
50
51STATISTIC(NumBlocksElim, "Number of blocks eliminated");
52STATISTIC(NumPHIsElim,   "Number of trivial PHIs eliminated");
53STATISTIC(NumGEPsElim,   "Number of GEPs converted to casts");
54STATISTIC(NumCmpUses, "Number of uses of Cmp expressions replaced with uses of "
55                      "sunken Cmps");
56STATISTIC(NumCastUses, "Number of uses of Cast expressions replaced with uses "
57                       "of sunken Casts");
58STATISTIC(NumMemoryInsts, "Number of memory instructions whose address "
59                          "computations were sunk");
60STATISTIC(NumExtsMoved,  "Number of [s|z]ext instructions combined with loads");
61STATISTIC(NumExtUses,    "Number of uses of [s|z]ext instructions optimized");
62STATISTIC(NumRetsDup,    "Number of return instructions duplicated");
63STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
64STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
65
66static cl::opt<bool> DisableBranchOpts(
67  "disable-cgp-branch-opts", cl::Hidden, cl::init(false),
68  cl::desc("Disable branch optimizations in CodeGenPrepare"));
69
70static cl::opt<bool> DisableSelectToBranch(
71  "disable-cgp-select2branch", cl::Hidden, cl::init(false),
72  cl::desc("Disable select to branch conversion."));
73
74namespace {
75  class CodeGenPrepare : public FunctionPass {
76    /// TLI - Keep a pointer of a TargetLowering to consult for determining
77    /// transformation profitability.
78    const TargetMachine *TM;
79    const TargetLowering *TLI;
80    const TargetLibraryInfo *TLInfo;
81    DominatorTree *DT;
82
83    /// CurInstIterator - As we scan instructions optimizing them, this is the
84    /// next instruction to optimize.  Xforms that can invalidate this should
85    /// update it.
86    BasicBlock::iterator CurInstIterator;
87
88    /// Keeps track of non-local addresses that have been sunk into a block.
89    /// This allows us to avoid inserting duplicate code for blocks with
90    /// multiple load/stores of the same address.
91    ValueMap<Value*, Value*> SunkAddrs;
92
93    /// ModifiedDT - If CFG is modified in anyway, dominator tree may need to
94    /// be updated.
95    bool ModifiedDT;
96
97    /// OptSize - True if optimizing for size.
98    bool OptSize;
99
100  public:
101    static char ID; // Pass identification, replacement for typeid
102    explicit CodeGenPrepare(const TargetMachine *TM = 0)
103      : FunctionPass(ID), TM(TM), TLI(0) {
104        initializeCodeGenPreparePass(*PassRegistry::getPassRegistry());
105      }
106    bool runOnFunction(Function &F);
107
108    const char *getPassName() const { return "CodeGen Prepare"; }
109
110    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
111      AU.addPreserved<DominatorTree>();
112      AU.addRequired<TargetLibraryInfo>();
113    }
114
115  private:
116    bool EliminateFallThrough(Function &F);
117    bool EliminateMostlyEmptyBlocks(Function &F);
118    bool CanMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
119    void EliminateMostlyEmptyBlock(BasicBlock *BB);
120    bool OptimizeBlock(BasicBlock &BB);
121    bool OptimizeInst(Instruction *I);
122    bool OptimizeMemoryInst(Instruction *I, Value *Addr, Type *AccessTy);
123    bool OptimizeInlineAsmInst(CallInst *CS);
124    bool OptimizeCallInst(CallInst *CI);
125    bool MoveExtToFormExtLoad(Instruction *I);
126    bool OptimizeExtUses(Instruction *I);
127    bool OptimizeSelectInst(SelectInst *SI);
128    bool DupRetToEnableTailCallOpts(BasicBlock *BB);
129    bool PlaceDbgValues(Function &F);
130  };
131}
132
133char CodeGenPrepare::ID = 0;
134INITIALIZE_PASS_BEGIN(CodeGenPrepare, "codegenprepare",
135                "Optimize for code generation", false, false)
136INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfo)
137INITIALIZE_PASS_END(CodeGenPrepare, "codegenprepare",
138                "Optimize for code generation", false, false)
139
140FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) {
141  return new CodeGenPrepare(TM);
142}
143
144bool CodeGenPrepare::runOnFunction(Function &F) {
145  bool EverMadeChange = false;
146
147  ModifiedDT = false;
148  if (TM) TLI = TM->getTargetLowering();
149  TLInfo = &getAnalysis<TargetLibraryInfo>();
150  DT = getAnalysisIfAvailable<DominatorTree>();
151  OptSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
152                                           Attribute::OptimizeForSize);
153
154  /// This optimization identifies DIV instructions that can be
155  /// profitably bypassed and carried out with a shorter, faster divide.
156  if (!OptSize && TLI && TLI->isSlowDivBypassed()) {
157    const DenseMap<unsigned int, unsigned int> &BypassWidths =
158       TLI->getBypassSlowDivWidths();
159    for (Function::iterator I = F.begin(); I != F.end(); I++)
160      EverMadeChange |= bypassSlowDivision(F, I, BypassWidths);
161  }
162
163  // Eliminate blocks that contain only PHI nodes and an
164  // unconditional branch.
165  EverMadeChange |= EliminateMostlyEmptyBlocks(F);
166
167  // llvm.dbg.value is far away from the value then iSel may not be able
168  // handle it properly. iSel will drop llvm.dbg.value if it can not
169  // find a node corresponding to the value.
170  EverMadeChange |= PlaceDbgValues(F);
171
172  bool MadeChange = true;
173  while (MadeChange) {
174    MadeChange = false;
175    for (Function::iterator I = F.begin(); I != F.end(); ) {
176      BasicBlock *BB = I++;
177      MadeChange |= OptimizeBlock(*BB);
178    }
179    EverMadeChange |= MadeChange;
180  }
181
182  SunkAddrs.clear();
183
184  if (!DisableBranchOpts) {
185    MadeChange = false;
186    SmallPtrSet<BasicBlock*, 8> WorkList;
187    for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
188      SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
189      MadeChange |= ConstantFoldTerminator(BB, true);
190      if (!MadeChange) continue;
191
192      for (SmallVectorImpl<BasicBlock*>::iterator
193             II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
194        if (pred_begin(*II) == pred_end(*II))
195          WorkList.insert(*II);
196    }
197
198    // Delete the dead blocks and any of their dead successors.
199    MadeChange |= !WorkList.empty();
200    while (!WorkList.empty()) {
201      BasicBlock *BB = *WorkList.begin();
202      WorkList.erase(BB);
203      SmallVector<BasicBlock*, 2> Successors(succ_begin(BB), succ_end(BB));
204
205      DeleteDeadBlock(BB);
206
207      for (SmallVectorImpl<BasicBlock*>::iterator
208             II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
209        if (pred_begin(*II) == pred_end(*II))
210          WorkList.insert(*II);
211    }
212
213    // Merge pairs of basic blocks with unconditional branches, connected by
214    // a single edge.
215    if (EverMadeChange || MadeChange)
216      MadeChange |= EliminateFallThrough(F);
217
218    if (MadeChange)
219      ModifiedDT = true;
220    EverMadeChange |= MadeChange;
221  }
222
223  if (ModifiedDT && DT)
224    DT->DT->recalculate(F);
225
226  return EverMadeChange;
227}
228
229/// EliminateFallThrough - Merge basic blocks which are connected
230/// by a single edge, where one of the basic blocks has a single successor
231/// pointing to the other basic block, which has a single predecessor.
232bool CodeGenPrepare::EliminateFallThrough(Function &F) {
233  bool Changed = false;
234  // Scan all of the blocks in the function, except for the entry block.
235  for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) {
236    BasicBlock *BB = I++;
237    // If the destination block has a single pred, then this is a trivial
238    // edge, just collapse it.
239    BasicBlock *SinglePred = BB->getSinglePredecessor();
240
241    // Don't merge if BB's address is taken.
242    if (!SinglePred || SinglePred == BB || BB->hasAddressTaken()) continue;
243
244    BranchInst *Term = dyn_cast<BranchInst>(SinglePred->getTerminator());
245    if (Term && !Term->isConditional()) {
246      Changed = true;
247      DEBUG(dbgs() << "To merge:\n"<< *SinglePred << "\n\n\n");
248      // Remember if SinglePred was the entry block of the function.
249      // If so, we will need to move BB back to the entry position.
250      bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
251      MergeBasicBlockIntoOnlyPred(BB, this);
252
253      if (isEntry && BB != &BB->getParent()->getEntryBlock())
254        BB->moveBefore(&BB->getParent()->getEntryBlock());
255
256      // We have erased a block. Update the iterator.
257      I = BB;
258    }
259  }
260  return Changed;
261}
262
263/// EliminateMostlyEmptyBlocks - eliminate blocks that contain only PHI nodes,
264/// debug info directives, and an unconditional branch.  Passes before isel
265/// (e.g. LSR/loopsimplify) often split edges in ways that are non-optimal for
266/// isel.  Start by eliminating these blocks so we can split them the way we
267/// want them.
268bool CodeGenPrepare::EliminateMostlyEmptyBlocks(Function &F) {
269  bool MadeChange = false;
270  // Note that this intentionally skips the entry block.
271  for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ) {
272    BasicBlock *BB = I++;
273
274    // If this block doesn't end with an uncond branch, ignore it.
275    BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
276    if (!BI || !BI->isUnconditional())
277      continue;
278
279    // If the instruction before the branch (skipping debug info) isn't a phi
280    // node, then other stuff is happening here.
281    BasicBlock::iterator BBI = BI;
282    if (BBI != BB->begin()) {
283      --BBI;
284      while (isa<DbgInfoIntrinsic>(BBI)) {
285        if (BBI == BB->begin())
286          break;
287        --BBI;
288      }
289      if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
290        continue;
291    }
292
293    // Do not break infinite loops.
294    BasicBlock *DestBB = BI->getSuccessor(0);
295    if (DestBB == BB)
296      continue;
297
298    if (!CanMergeBlocks(BB, DestBB))
299      continue;
300
301    EliminateMostlyEmptyBlock(BB);
302    MadeChange = true;
303  }
304  return MadeChange;
305}
306
307/// CanMergeBlocks - Return true if we can merge BB into DestBB if there is a
308/// single uncond branch between them, and BB contains no other non-phi
309/// instructions.
310bool CodeGenPrepare::CanMergeBlocks(const BasicBlock *BB,
311                                    const BasicBlock *DestBB) const {
312  // We only want to eliminate blocks whose phi nodes are used by phi nodes in
313  // the successor.  If there are more complex condition (e.g. preheaders),
314  // don't mess around with them.
315  BasicBlock::const_iterator BBI = BB->begin();
316  while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
317    for (Value::const_use_iterator UI = PN->use_begin(), E = PN->use_end();
318         UI != E; ++UI) {
319      const Instruction *User = cast<Instruction>(*UI);
320      if (User->getParent() != DestBB || !isa<PHINode>(User))
321        return false;
322      // If User is inside DestBB block and it is a PHINode then check
323      // incoming value. If incoming value is not from BB then this is
324      // a complex condition (e.g. preheaders) we want to avoid here.
325      if (User->getParent() == DestBB) {
326        if (const PHINode *UPN = dyn_cast<PHINode>(User))
327          for (unsigned I = 0, E = UPN->getNumIncomingValues(); I != E; ++I) {
328            Instruction *Insn = dyn_cast<Instruction>(UPN->getIncomingValue(I));
329            if (Insn && Insn->getParent() == BB &&
330                Insn->getParent() != UPN->getIncomingBlock(I))
331              return false;
332          }
333      }
334    }
335  }
336
337  // If BB and DestBB contain any common predecessors, then the phi nodes in BB
338  // and DestBB may have conflicting incoming values for the block.  If so, we
339  // can't merge the block.
340  const PHINode *DestBBPN = dyn_cast<PHINode>(DestBB->begin());
341  if (!DestBBPN) return true;  // no conflict.
342
343  // Collect the preds of BB.
344  SmallPtrSet<const BasicBlock*, 16> BBPreds;
345  if (const PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
346    // It is faster to get preds from a PHI than with pred_iterator.
347    for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
348      BBPreds.insert(BBPN->getIncomingBlock(i));
349  } else {
350    BBPreds.insert(pred_begin(BB), pred_end(BB));
351  }
352
353  // Walk the preds of DestBB.
354  for (unsigned i = 0, e = DestBBPN->getNumIncomingValues(); i != e; ++i) {
355    BasicBlock *Pred = DestBBPN->getIncomingBlock(i);
356    if (BBPreds.count(Pred)) {   // Common predecessor?
357      BBI = DestBB->begin();
358      while (const PHINode *PN = dyn_cast<PHINode>(BBI++)) {
359        const Value *V1 = PN->getIncomingValueForBlock(Pred);
360        const Value *V2 = PN->getIncomingValueForBlock(BB);
361
362        // If V2 is a phi node in BB, look up what the mapped value will be.
363        if (const PHINode *V2PN = dyn_cast<PHINode>(V2))
364          if (V2PN->getParent() == BB)
365            V2 = V2PN->getIncomingValueForBlock(Pred);
366
367        // If there is a conflict, bail out.
368        if (V1 != V2) return false;
369      }
370    }
371  }
372
373  return true;
374}
375
376
377/// EliminateMostlyEmptyBlock - Eliminate a basic block that have only phi's and
378/// an unconditional branch in it.
379void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
380  BranchInst *BI = cast<BranchInst>(BB->getTerminator());
381  BasicBlock *DestBB = BI->getSuccessor(0);
382
383  DEBUG(dbgs() << "MERGING MOSTLY EMPTY BLOCKS - BEFORE:\n" << *BB << *DestBB);
384
385  // If the destination block has a single pred, then this is a trivial edge,
386  // just collapse it.
387  if (BasicBlock *SinglePred = DestBB->getSinglePredecessor()) {
388    if (SinglePred != DestBB) {
389      // Remember if SinglePred was the entry block of the function.  If so, we
390      // will need to move BB back to the entry position.
391      bool isEntry = SinglePred == &SinglePred->getParent()->getEntryBlock();
392      MergeBasicBlockIntoOnlyPred(DestBB, this);
393
394      if (isEntry && BB != &BB->getParent()->getEntryBlock())
395        BB->moveBefore(&BB->getParent()->getEntryBlock());
396
397      DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
398      return;
399    }
400  }
401
402  // Otherwise, we have multiple predecessors of BB.  Update the PHIs in DestBB
403  // to handle the new incoming edges it is about to have.
404  PHINode *PN;
405  for (BasicBlock::iterator BBI = DestBB->begin();
406       (PN = dyn_cast<PHINode>(BBI)); ++BBI) {
407    // Remove the incoming value for BB, and remember it.
408    Value *InVal = PN->removeIncomingValue(BB, false);
409
410    // Two options: either the InVal is a phi node defined in BB or it is some
411    // value that dominates BB.
412    PHINode *InValPhi = dyn_cast<PHINode>(InVal);
413    if (InValPhi && InValPhi->getParent() == BB) {
414      // Add all of the input values of the input PHI as inputs of this phi.
415      for (unsigned i = 0, e = InValPhi->getNumIncomingValues(); i != e; ++i)
416        PN->addIncoming(InValPhi->getIncomingValue(i),
417                        InValPhi->getIncomingBlock(i));
418    } else {
419      // Otherwise, add one instance of the dominating value for each edge that
420      // we will be adding.
421      if (PHINode *BBPN = dyn_cast<PHINode>(BB->begin())) {
422        for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
423          PN->addIncoming(InVal, BBPN->getIncomingBlock(i));
424      } else {
425        for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
426          PN->addIncoming(InVal, *PI);
427      }
428    }
429  }
430
431  // The PHIs are now updated, change everything that refers to BB to use
432  // DestBB and remove BB.
433  BB->replaceAllUsesWith(DestBB);
434  if (DT && !ModifiedDT) {
435    BasicBlock *BBIDom  = DT->getNode(BB)->getIDom()->getBlock();
436    BasicBlock *DestBBIDom = DT->getNode(DestBB)->getIDom()->getBlock();
437    BasicBlock *NewIDom = DT->findNearestCommonDominator(BBIDom, DestBBIDom);
438    DT->changeImmediateDominator(DestBB, NewIDom);
439    DT->eraseNode(BB);
440  }
441  BB->eraseFromParent();
442  ++NumBlocksElim;
443
444  DEBUG(dbgs() << "AFTER:\n" << *DestBB << "\n\n\n");
445}
446
447/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
448/// copy (e.g. it's casting from one pointer type to another, i32->i8 on PPC),
449/// sink it into user blocks to reduce the number of virtual
450/// registers that must be created and coalesced.
451///
452/// Return true if any changes are made.
453///
454static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI){
455  // If this is a noop copy,
456  EVT SrcVT = TLI.getValueType(CI->getOperand(0)->getType());
457  EVT DstVT = TLI.getValueType(CI->getType());
458
459  // This is an fp<->int conversion?
460  if (SrcVT.isInteger() != DstVT.isInteger())
461    return false;
462
463  // If this is an extension, it will be a zero or sign extension, which
464  // isn't a noop.
465  if (SrcVT.bitsLT(DstVT)) return false;
466
467  // If these values will be promoted, find out what they will be promoted
468  // to.  This helps us consider truncates on PPC as noop copies when they
469  // are.
470  if (TLI.getTypeAction(CI->getContext(), SrcVT) ==
471      TargetLowering::TypePromoteInteger)
472    SrcVT = TLI.getTypeToTransformTo(CI->getContext(), SrcVT);
473  if (TLI.getTypeAction(CI->getContext(), DstVT) ==
474      TargetLowering::TypePromoteInteger)
475    DstVT = TLI.getTypeToTransformTo(CI->getContext(), DstVT);
476
477  // If, after promotion, these are the same types, this is a noop copy.
478  if (SrcVT != DstVT)
479    return false;
480
481  BasicBlock *DefBB = CI->getParent();
482
483  /// InsertedCasts - Only insert a cast in each block once.
484  DenseMap<BasicBlock*, CastInst*> InsertedCasts;
485
486  bool MadeChange = false;
487  for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
488       UI != E; ) {
489    Use &TheUse = UI.getUse();
490    Instruction *User = cast<Instruction>(*UI);
491
492    // Figure out which BB this cast is used in.  For PHI's this is the
493    // appropriate predecessor block.
494    BasicBlock *UserBB = User->getParent();
495    if (PHINode *PN = dyn_cast<PHINode>(User)) {
496      UserBB = PN->getIncomingBlock(UI);
497    }
498
499    // Preincrement use iterator so we don't invalidate it.
500    ++UI;
501
502    // If this user is in the same block as the cast, don't change the cast.
503    if (UserBB == DefBB) continue;
504
505    // If we have already inserted a cast into this block, use it.
506    CastInst *&InsertedCast = InsertedCasts[UserBB];
507
508    if (!InsertedCast) {
509      BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
510      InsertedCast =
511        CastInst::Create(CI->getOpcode(), CI->getOperand(0), CI->getType(), "",
512                         InsertPt);
513      MadeChange = true;
514    }
515
516    // Replace a use of the cast with a use of the new cast.
517    TheUse = InsertedCast;
518    ++NumCastUses;
519  }
520
521  // If we removed all uses, nuke the cast.
522  if (CI->use_empty()) {
523    CI->eraseFromParent();
524    MadeChange = true;
525  }
526
527  return MadeChange;
528}
529
530/// OptimizeCmpExpression - sink the given CmpInst into user blocks to reduce
531/// the number of virtual registers that must be created and coalesced.  This is
532/// a clear win except on targets with multiple condition code registers
533///  (PowerPC), where it might lose; some adjustment may be wanted there.
534///
535/// Return true if any changes are made.
536static bool OptimizeCmpExpression(CmpInst *CI) {
537  BasicBlock *DefBB = CI->getParent();
538
539  /// InsertedCmp - Only insert a cmp in each block once.
540  DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
541
542  bool MadeChange = false;
543  for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end();
544       UI != E; ) {
545    Use &TheUse = UI.getUse();
546    Instruction *User = cast<Instruction>(*UI);
547
548    // Preincrement use iterator so we don't invalidate it.
549    ++UI;
550
551    // Don't bother for PHI nodes.
552    if (isa<PHINode>(User))
553      continue;
554
555    // Figure out which BB this cmp is used in.
556    BasicBlock *UserBB = User->getParent();
557
558    // If this user is in the same block as the cmp, don't change the cmp.
559    if (UserBB == DefBB) continue;
560
561    // If we have already inserted a cmp into this block, use it.
562    CmpInst *&InsertedCmp = InsertedCmps[UserBB];
563
564    if (!InsertedCmp) {
565      BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
566      InsertedCmp =
567        CmpInst::Create(CI->getOpcode(),
568                        CI->getPredicate(),  CI->getOperand(0),
569                        CI->getOperand(1), "", InsertPt);
570      MadeChange = true;
571    }
572
573    // Replace a use of the cmp with a use of the new cmp.
574    TheUse = InsertedCmp;
575    ++NumCmpUses;
576  }
577
578  // If we removed all uses, nuke the cmp.
579  if (CI->use_empty())
580    CI->eraseFromParent();
581
582  return MadeChange;
583}
584
585namespace {
586class CodeGenPrepareFortifiedLibCalls : public SimplifyFortifiedLibCalls {
587protected:
588  void replaceCall(Value *With) {
589    CI->replaceAllUsesWith(With);
590    CI->eraseFromParent();
591  }
592  bool isFoldable(unsigned SizeCIOp, unsigned, bool) const {
593      if (ConstantInt *SizeCI =
594                             dyn_cast<ConstantInt>(CI->getArgOperand(SizeCIOp)))
595        return SizeCI->isAllOnesValue();
596    return false;
597  }
598};
599} // end anonymous namespace
600
601bool CodeGenPrepare::OptimizeCallInst(CallInst *CI) {
602  BasicBlock *BB = CI->getParent();
603
604  // Lower inline assembly if we can.
605  // If we found an inline asm expession, and if the target knows how to
606  // lower it to normal LLVM code, do so now.
607  if (TLI && isa<InlineAsm>(CI->getCalledValue())) {
608    if (TLI->ExpandInlineAsm(CI)) {
609      // Avoid invalidating the iterator.
610      CurInstIterator = BB->begin();
611      // Avoid processing instructions out of order, which could cause
612      // reuse before a value is defined.
613      SunkAddrs.clear();
614      return true;
615    }
616    // Sink address computing for memory operands into the block.
617    if (OptimizeInlineAsmInst(CI))
618      return true;
619  }
620
621  // Lower all uses of llvm.objectsize.*
622  IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI);
623  if (II && II->getIntrinsicID() == Intrinsic::objectsize) {
624    bool Min = (cast<ConstantInt>(II->getArgOperand(1))->getZExtValue() == 1);
625    Type *ReturnTy = CI->getType();
626    Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
627
628    // Substituting this can cause recursive simplifications, which can
629    // invalidate our iterator.  Use a WeakVH to hold onto it in case this
630    // happens.
631    WeakVH IterHandle(CurInstIterator);
632
633    replaceAndRecursivelySimplify(CI, RetVal, TLI ? TLI->getDataLayout() : 0,
634                                  TLInfo, ModifiedDT ? 0 : DT);
635
636    // If the iterator instruction was recursively deleted, start over at the
637    // start of the block.
638    if (IterHandle != CurInstIterator) {
639      CurInstIterator = BB->begin();
640      SunkAddrs.clear();
641    }
642    return true;
643  }
644
645  if (II && TLI) {
646    SmallVector<Value*, 2> PtrOps;
647    Type *AccessTy;
648    if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy))
649      while (!PtrOps.empty())
650        if (OptimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy))
651          return true;
652  }
653
654  // From here on out we're working with named functions.
655  if (CI->getCalledFunction() == 0) return false;
656
657  // We'll need DataLayout from here on out.
658  const DataLayout *TD = TLI ? TLI->getDataLayout() : 0;
659  if (!TD) return false;
660
661  // Lower all default uses of _chk calls.  This is very similar
662  // to what InstCombineCalls does, but here we are only lowering calls
663  // that have the default "don't know" as the objectsize.  Anything else
664  // should be left alone.
665  CodeGenPrepareFortifiedLibCalls Simplifier;
666  return Simplifier.fold(CI, TD, TLInfo);
667}
668
669/// DupRetToEnableTailCallOpts - Look for opportunities to duplicate return
670/// instructions to the predecessor to enable tail call optimizations. The
671/// case it is currently looking for is:
672/// @code
673/// bb0:
674///   %tmp0 = tail call i32 @f0()
675///   br label %return
676/// bb1:
677///   %tmp1 = tail call i32 @f1()
678///   br label %return
679/// bb2:
680///   %tmp2 = tail call i32 @f2()
681///   br label %return
682/// return:
683///   %retval = phi i32 [ %tmp0, %bb0 ], [ %tmp1, %bb1 ], [ %tmp2, %bb2 ]
684///   ret i32 %retval
685/// @endcode
686///
687/// =>
688///
689/// @code
690/// bb0:
691///   %tmp0 = tail call i32 @f0()
692///   ret i32 %tmp0
693/// bb1:
694///   %tmp1 = tail call i32 @f1()
695///   ret i32 %tmp1
696/// bb2:
697///   %tmp2 = tail call i32 @f2()
698///   ret i32 %tmp2
699/// @endcode
700bool CodeGenPrepare::DupRetToEnableTailCallOpts(BasicBlock *BB) {
701  if (!TLI)
702    return false;
703
704  ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator());
705  if (!RI)
706    return false;
707
708  PHINode *PN = 0;
709  BitCastInst *BCI = 0;
710  Value *V = RI->getReturnValue();
711  if (V) {
712    BCI = dyn_cast<BitCastInst>(V);
713    if (BCI)
714      V = BCI->getOperand(0);
715
716    PN = dyn_cast<PHINode>(V);
717    if (!PN)
718      return false;
719  }
720
721  if (PN && PN->getParent() != BB)
722    return false;
723
724  // It's not safe to eliminate the sign / zero extension of the return value.
725  // See llvm::isInTailCallPosition().
726  const Function *F = BB->getParent();
727  AttributeSet CallerAttrs = F->getAttributes();
728  if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) ||
729      CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
730    return false;
731
732  // Make sure there are no instructions between the PHI and return, or that the
733  // return is the first instruction in the block.
734  if (PN) {
735    BasicBlock::iterator BI = BB->begin();
736    do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
737    if (&*BI == BCI)
738      // Also skip over the bitcast.
739      ++BI;
740    if (&*BI != RI)
741      return false;
742  } else {
743    BasicBlock::iterator BI = BB->begin();
744    while (isa<DbgInfoIntrinsic>(BI)) ++BI;
745    if (&*BI != RI)
746      return false;
747  }
748
749  /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
750  /// call.
751  SmallVector<CallInst*, 4> TailCalls;
752  if (PN) {
753    for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
754      CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I));
755      // Make sure the phi value is indeed produced by the tail call.
756      if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&
757          TLI->mayBeEmittedAsTailCall(CI))
758        TailCalls.push_back(CI);
759    }
760  } else {
761    SmallPtrSet<BasicBlock*, 4> VisitedBBs;
762    for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
763      if (!VisitedBBs.insert(*PI))
764        continue;
765
766      BasicBlock::InstListType &InstList = (*PI)->getInstList();
767      BasicBlock::InstListType::reverse_iterator RI = InstList.rbegin();
768      BasicBlock::InstListType::reverse_iterator RE = InstList.rend();
769      do { ++RI; } while (RI != RE && isa<DbgInfoIntrinsic>(&*RI));
770      if (RI == RE)
771        continue;
772
773      CallInst *CI = dyn_cast<CallInst>(&*RI);
774      if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI))
775        TailCalls.push_back(CI);
776    }
777  }
778
779  bool Changed = false;
780  for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) {
781    CallInst *CI = TailCalls[i];
782    CallSite CS(CI);
783
784    // Conservatively require the attributes of the call to match those of the
785    // return. Ignore noalias because it doesn't affect the call sequence.
786    AttributeSet CalleeAttrs = CS.getAttributes();
787    if (AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
788          removeAttribute(Attribute::NoAlias) !=
789        AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
790          removeAttribute(Attribute::NoAlias))
791      continue;
792
793    // Make sure the call instruction is followed by an unconditional branch to
794    // the return block.
795    BasicBlock *CallBB = CI->getParent();
796    BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator());
797    if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
798      continue;
799
800    // Duplicate the return into CallBB.
801    (void)FoldReturnIntoUncondBranch(RI, BB, CallBB);
802    ModifiedDT = Changed = true;
803    ++NumRetsDup;
804  }
805
806  // If we eliminated all predecessors of the block, delete the block now.
807  if (Changed && !BB->hasAddressTaken() && pred_begin(BB) == pred_end(BB))
808    BB->eraseFromParent();
809
810  return Changed;
811}
812
813//===----------------------------------------------------------------------===//
814// Memory Optimization
815//===----------------------------------------------------------------------===//
816
817namespace {
818
819/// ExtAddrMode - This is an extended version of TargetLowering::AddrMode
820/// which holds actual Value*'s for register values.
821struct ExtAddrMode : public TargetLowering::AddrMode {
822  Value *BaseReg;
823  Value *ScaledReg;
824  ExtAddrMode() : BaseReg(0), ScaledReg(0) {}
825  void print(raw_ostream &OS) const;
826  void dump() const;
827
828  bool operator==(const ExtAddrMode& O) const {
829    return (BaseReg == O.BaseReg) && (ScaledReg == O.ScaledReg) &&
830           (BaseGV == O.BaseGV) && (BaseOffs == O.BaseOffs) &&
831           (HasBaseReg == O.HasBaseReg) && (Scale == O.Scale);
832  }
833};
834
835#ifndef NDEBUG
836static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
837  AM.print(OS);
838  return OS;
839}
840#endif
841
842void ExtAddrMode::print(raw_ostream &OS) const {
843  bool NeedPlus = false;
844  OS << "[";
845  if (BaseGV) {
846    OS << (NeedPlus ? " + " : "")
847       << "GV:";
848    WriteAsOperand(OS, BaseGV, /*PrintType=*/false);
849    NeedPlus = true;
850  }
851
852  if (BaseOffs)
853    OS << (NeedPlus ? " + " : "") << BaseOffs, NeedPlus = true;
854
855  if (BaseReg) {
856    OS << (NeedPlus ? " + " : "")
857       << "Base:";
858    WriteAsOperand(OS, BaseReg, /*PrintType=*/false);
859    NeedPlus = true;
860  }
861  if (Scale) {
862    OS << (NeedPlus ? " + " : "")
863       << Scale << "*";
864    WriteAsOperand(OS, ScaledReg, /*PrintType=*/false);
865  }
866
867  OS << ']';
868}
869
870#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
871void ExtAddrMode::dump() const {
872  print(dbgs());
873  dbgs() << '\n';
874}
875#endif
876
877
878/// \brief A helper class for matching addressing modes.
879///
880/// This encapsulates the logic for matching the target-legal addressing modes.
881class AddressingModeMatcher {
882  SmallVectorImpl<Instruction*> &AddrModeInsts;
883  const TargetLowering &TLI;
884
885  /// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
886  /// the memory instruction that we're computing this address for.
887  Type *AccessTy;
888  Instruction *MemoryInst;
889
890  /// AddrMode - This is the addressing mode that we're building up.  This is
891  /// part of the return value of this addressing mode matching stuff.
892  ExtAddrMode &AddrMode;
893
894  /// IgnoreProfitability - This is set to true when we should not do
895  /// profitability checks.  When true, IsProfitableToFoldIntoAddressingMode
896  /// always returns true.
897  bool IgnoreProfitability;
898
899  AddressingModeMatcher(SmallVectorImpl<Instruction*> &AMI,
900                        const TargetLowering &T, Type *AT,
901                        Instruction *MI, ExtAddrMode &AM)
902    : AddrModeInsts(AMI), TLI(T), AccessTy(AT), MemoryInst(MI), AddrMode(AM) {
903    IgnoreProfitability = false;
904  }
905public:
906
907  /// Match - Find the maximal addressing mode that a load/store of V can fold,
908  /// give an access type of AccessTy.  This returns a list of involved
909  /// instructions in AddrModeInsts.
910  static ExtAddrMode Match(Value *V, Type *AccessTy,
911                           Instruction *MemoryInst,
912                           SmallVectorImpl<Instruction*> &AddrModeInsts,
913                           const TargetLowering &TLI) {
914    ExtAddrMode Result;
915
916    bool Success =
917      AddressingModeMatcher(AddrModeInsts, TLI, AccessTy,
918                            MemoryInst, Result).MatchAddr(V, 0);
919    (void)Success; assert(Success && "Couldn't select *anything*?");
920    return Result;
921  }
922private:
923  bool MatchScaledValue(Value *ScaleReg, int64_t Scale, unsigned Depth);
924  bool MatchAddr(Value *V, unsigned Depth);
925  bool MatchOperationAddr(User *Operation, unsigned Opcode, unsigned Depth);
926  bool IsProfitableToFoldIntoAddressingMode(Instruction *I,
927                                            ExtAddrMode &AMBefore,
928                                            ExtAddrMode &AMAfter);
929  bool ValueAlreadyLiveAtInst(Value *Val, Value *KnownLive1, Value *KnownLive2);
930};
931
932/// MatchScaledValue - Try adding ScaleReg*Scale to the current addressing mode.
933/// Return true and update AddrMode if this addr mode is legal for the target,
934/// false if not.
935bool AddressingModeMatcher::MatchScaledValue(Value *ScaleReg, int64_t Scale,
936                                             unsigned Depth) {
937  // If Scale is 1, then this is the same as adding ScaleReg to the addressing
938  // mode.  Just process that directly.
939  if (Scale == 1)
940    return MatchAddr(ScaleReg, Depth);
941
942  // If the scale is 0, it takes nothing to add this.
943  if (Scale == 0)
944    return true;
945
946  // If we already have a scale of this value, we can add to it, otherwise, we
947  // need an available scale field.
948  if (AddrMode.Scale != 0 && AddrMode.ScaledReg != ScaleReg)
949    return false;
950
951  ExtAddrMode TestAddrMode = AddrMode;
952
953  // Add scale to turn X*4+X*3 -> X*7.  This could also do things like
954  // [A+B + A*7] -> [B+A*8].
955  TestAddrMode.Scale += Scale;
956  TestAddrMode.ScaledReg = ScaleReg;
957
958  // If the new address isn't legal, bail out.
959  if (!TLI.isLegalAddressingMode(TestAddrMode, AccessTy))
960    return false;
961
962  // It was legal, so commit it.
963  AddrMode = TestAddrMode;
964
965  // Okay, we decided that we can add ScaleReg+Scale to AddrMode.  Check now
966  // to see if ScaleReg is actually X+C.  If so, we can turn this into adding
967  // X*Scale + C*Scale to addr mode.
968  ConstantInt *CI = 0; Value *AddLHS = 0;
969  if (isa<Instruction>(ScaleReg) &&  // not a constant expr.
970      match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
971    TestAddrMode.ScaledReg = AddLHS;
972    TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
973
974    // If this addressing mode is legal, commit it and remember that we folded
975    // this instruction.
976    if (TLI.isLegalAddressingMode(TestAddrMode, AccessTy)) {
977      AddrModeInsts.push_back(cast<Instruction>(ScaleReg));
978      AddrMode = TestAddrMode;
979      return true;
980    }
981  }
982
983  // Otherwise, not (x+c)*scale, just return what we have.
984  return true;
985}
986
987/// MightBeFoldableInst - This is a little filter, which returns true if an
988/// addressing computation involving I might be folded into a load/store
989/// accessing it.  This doesn't need to be perfect, but needs to accept at least
990/// the set of instructions that MatchOperationAddr can.
991static bool MightBeFoldableInst(Instruction *I) {
992  switch (I->getOpcode()) {
993  case Instruction::BitCast:
994    // Don't touch identity bitcasts.
995    if (I->getType() == I->getOperand(0)->getType())
996      return false;
997    return I->getType()->isPointerTy() || I->getType()->isIntegerTy();
998  case Instruction::PtrToInt:
999    // PtrToInt is always a noop, as we know that the int type is pointer sized.
1000    return true;
1001  case Instruction::IntToPtr:
1002    // We know the input is intptr_t, so this is foldable.
1003    return true;
1004  case Instruction::Add:
1005    return true;
1006  case Instruction::Mul:
1007  case Instruction::Shl:
1008    // Can only handle X*C and X << C.
1009    return isa<ConstantInt>(I->getOperand(1));
1010  case Instruction::GetElementPtr:
1011    return true;
1012  default:
1013    return false;
1014  }
1015}
1016
1017/// MatchOperationAddr - Given an instruction or constant expr, see if we can
1018/// fold the operation into the addressing mode.  If so, update the addressing
1019/// mode and return true, otherwise return false without modifying AddrMode.
1020bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
1021                                               unsigned Depth) {
1022  // Avoid exponential behavior on extremely deep expression trees.
1023  if (Depth >= 5) return false;
1024
1025  switch (Opcode) {
1026  case Instruction::PtrToInt:
1027    // PtrToInt is always a noop, as we know that the int type is pointer sized.
1028    return MatchAddr(AddrInst->getOperand(0), Depth);
1029  case Instruction::IntToPtr:
1030    // This inttoptr is a no-op if the integer type is pointer sized.
1031    if (TLI.getValueType(AddrInst->getOperand(0)->getType()) ==
1032        TLI.getPointerTy(AddrInst->getType()->getPointerAddressSpace()))
1033      return MatchAddr(AddrInst->getOperand(0), Depth);
1034    return false;
1035  case Instruction::BitCast:
1036    // BitCast is always a noop, and we can handle it as long as it is
1037    // int->int or pointer->pointer (we don't want int<->fp or something).
1038    if ((AddrInst->getOperand(0)->getType()->isPointerTy() ||
1039         AddrInst->getOperand(0)->getType()->isIntegerTy()) &&
1040        // Don't touch identity bitcasts.  These were probably put here by LSR,
1041        // and we don't want to mess around with them.  Assume it knows what it
1042        // is doing.
1043        AddrInst->getOperand(0)->getType() != AddrInst->getType())
1044      return MatchAddr(AddrInst->getOperand(0), Depth);
1045    return false;
1046  case Instruction::Add: {
1047    // Check to see if we can merge in the RHS then the LHS.  If so, we win.
1048    ExtAddrMode BackupAddrMode = AddrMode;
1049    unsigned OldSize = AddrModeInsts.size();
1050    if (MatchAddr(AddrInst->getOperand(1), Depth+1) &&
1051        MatchAddr(AddrInst->getOperand(0), Depth+1))
1052      return true;
1053
1054    // Restore the old addr mode info.
1055    AddrMode = BackupAddrMode;
1056    AddrModeInsts.resize(OldSize);
1057
1058    // Otherwise this was over-aggressive.  Try merging in the LHS then the RHS.
1059    if (MatchAddr(AddrInst->getOperand(0), Depth+1) &&
1060        MatchAddr(AddrInst->getOperand(1), Depth+1))
1061      return true;
1062
1063    // Otherwise we definitely can't merge the ADD in.
1064    AddrMode = BackupAddrMode;
1065    AddrModeInsts.resize(OldSize);
1066    break;
1067  }
1068  //case Instruction::Or:
1069  // TODO: We can handle "Or Val, Imm" iff this OR is equivalent to an ADD.
1070  //break;
1071  case Instruction::Mul:
1072  case Instruction::Shl: {
1073    // Can only handle X*C and X << C.
1074    ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
1075    if (!RHS) return false;
1076    int64_t Scale = RHS->getSExtValue();
1077    if (Opcode == Instruction::Shl)
1078      Scale = 1LL << Scale;
1079
1080    return MatchScaledValue(AddrInst->getOperand(0), Scale, Depth);
1081  }
1082  case Instruction::GetElementPtr: {
1083    // Scan the GEP.  We check it if it contains constant offsets and at most
1084    // one variable offset.
1085    int VariableOperand = -1;
1086    unsigned VariableScale = 0;
1087
1088    int64_t ConstantOffset = 0;
1089    const DataLayout *TD = TLI.getDataLayout();
1090    gep_type_iterator GTI = gep_type_begin(AddrInst);
1091    for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
1092      if (StructType *STy = dyn_cast<StructType>(*GTI)) {
1093        const StructLayout *SL = TD->getStructLayout(STy);
1094        unsigned Idx =
1095          cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
1096        ConstantOffset += SL->getElementOffset(Idx);
1097      } else {
1098        uint64_t TypeSize = TD->getTypeAllocSize(GTI.getIndexedType());
1099        if (ConstantInt *CI = dyn_cast<ConstantInt>(AddrInst->getOperand(i))) {
1100          ConstantOffset += CI->getSExtValue()*TypeSize;
1101        } else if (TypeSize) {  // Scales of zero don't do anything.
1102          // We only allow one variable index at the moment.
1103          if (VariableOperand != -1)
1104            return false;
1105
1106          // Remember the variable index.
1107          VariableOperand = i;
1108          VariableScale = TypeSize;
1109        }
1110      }
1111    }
1112
1113    // A common case is for the GEP to only do a constant offset.  In this case,
1114    // just add it to the disp field and check validity.
1115    if (VariableOperand == -1) {
1116      AddrMode.BaseOffs += ConstantOffset;
1117      if (ConstantOffset == 0 || TLI.isLegalAddressingMode(AddrMode, AccessTy)){
1118        // Check to see if we can fold the base pointer in too.
1119        if (MatchAddr(AddrInst->getOperand(0), Depth+1))
1120          return true;
1121      }
1122      AddrMode.BaseOffs -= ConstantOffset;
1123      return false;
1124    }
1125
1126    // Save the valid addressing mode in case we can't match.
1127    ExtAddrMode BackupAddrMode = AddrMode;
1128    unsigned OldSize = AddrModeInsts.size();
1129
1130    // See if the scale and offset amount is valid for this target.
1131    AddrMode.BaseOffs += ConstantOffset;
1132
1133    // Match the base operand of the GEP.
1134    if (!MatchAddr(AddrInst->getOperand(0), Depth+1)) {
1135      // If it couldn't be matched, just stuff the value in a register.
1136      if (AddrMode.HasBaseReg) {
1137        AddrMode = BackupAddrMode;
1138        AddrModeInsts.resize(OldSize);
1139        return false;
1140      }
1141      AddrMode.HasBaseReg = true;
1142      AddrMode.BaseReg = AddrInst->getOperand(0);
1143    }
1144
1145    // Match the remaining variable portion of the GEP.
1146    if (!MatchScaledValue(AddrInst->getOperand(VariableOperand), VariableScale,
1147                          Depth)) {
1148      // If it couldn't be matched, try stuffing the base into a register
1149      // instead of matching it, and retrying the match of the scale.
1150      AddrMode = BackupAddrMode;
1151      AddrModeInsts.resize(OldSize);
1152      if (AddrMode.HasBaseReg)
1153        return false;
1154      AddrMode.HasBaseReg = true;
1155      AddrMode.BaseReg = AddrInst->getOperand(0);
1156      AddrMode.BaseOffs += ConstantOffset;
1157      if (!MatchScaledValue(AddrInst->getOperand(VariableOperand),
1158                            VariableScale, Depth)) {
1159        // If even that didn't work, bail.
1160        AddrMode = BackupAddrMode;
1161        AddrModeInsts.resize(OldSize);
1162        return false;
1163      }
1164    }
1165
1166    return true;
1167  }
1168  }
1169  return false;
1170}
1171
1172/// MatchAddr - If we can, try to add the value of 'Addr' into the current
1173/// addressing mode.  If Addr can't be added to AddrMode this returns false and
1174/// leaves AddrMode unmodified.  This assumes that Addr is either a pointer type
1175/// or intptr_t for the target.
1176///
1177bool AddressingModeMatcher::MatchAddr(Value *Addr, unsigned Depth) {
1178  if (ConstantInt *CI = dyn_cast<ConstantInt>(Addr)) {
1179    // Fold in immediates if legal for the target.
1180    AddrMode.BaseOffs += CI->getSExtValue();
1181    if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
1182      return true;
1183    AddrMode.BaseOffs -= CI->getSExtValue();
1184  } else if (GlobalValue *GV = dyn_cast<GlobalValue>(Addr)) {
1185    // If this is a global variable, try to fold it into the addressing mode.
1186    if (AddrMode.BaseGV == 0) {
1187      AddrMode.BaseGV = GV;
1188      if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
1189        return true;
1190      AddrMode.BaseGV = 0;
1191    }
1192  } else if (Instruction *I = dyn_cast<Instruction>(Addr)) {
1193    ExtAddrMode BackupAddrMode = AddrMode;
1194    unsigned OldSize = AddrModeInsts.size();
1195
1196    // Check to see if it is possible to fold this operation.
1197    if (MatchOperationAddr(I, I->getOpcode(), Depth)) {
1198      // Okay, it's possible to fold this.  Check to see if it is actually
1199      // *profitable* to do so.  We use a simple cost model to avoid increasing
1200      // register pressure too much.
1201      if (I->hasOneUse() ||
1202          IsProfitableToFoldIntoAddressingMode(I, BackupAddrMode, AddrMode)) {
1203        AddrModeInsts.push_back(I);
1204        return true;
1205      }
1206
1207      // It isn't profitable to do this, roll back.
1208      //cerr << "NOT FOLDING: " << *I;
1209      AddrMode = BackupAddrMode;
1210      AddrModeInsts.resize(OldSize);
1211    }
1212  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Addr)) {
1213    if (MatchOperationAddr(CE, CE->getOpcode(), Depth))
1214      return true;
1215  } else if (isa<ConstantPointerNull>(Addr)) {
1216    // Null pointer gets folded without affecting the addressing mode.
1217    return true;
1218  }
1219
1220  // Worse case, the target should support [reg] addressing modes. :)
1221  if (!AddrMode.HasBaseReg) {
1222    AddrMode.HasBaseReg = true;
1223    AddrMode.BaseReg = Addr;
1224    // Still check for legality in case the target supports [imm] but not [i+r].
1225    if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
1226      return true;
1227    AddrMode.HasBaseReg = false;
1228    AddrMode.BaseReg = 0;
1229  }
1230
1231  // If the base register is already taken, see if we can do [r+r].
1232  if (AddrMode.Scale == 0) {
1233    AddrMode.Scale = 1;
1234    AddrMode.ScaledReg = Addr;
1235    if (TLI.isLegalAddressingMode(AddrMode, AccessTy))
1236      return true;
1237    AddrMode.Scale = 0;
1238    AddrMode.ScaledReg = 0;
1239  }
1240  // Couldn't match.
1241  return false;
1242}
1243
1244/// IsOperandAMemoryOperand - Check to see if all uses of OpVal by the specified
1245/// inline asm call are due to memory operands.  If so, return true, otherwise
1246/// return false.
1247static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
1248                                    const TargetLowering &TLI) {
1249  TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(ImmutableCallSite(CI));
1250  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
1251    TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
1252
1253    // Compute the constraint code and ConstraintType to use.
1254    TLI.ComputeConstraintToUse(OpInfo, SDValue());
1255
1256    // If this asm operand is our Value*, and if it isn't an indirect memory
1257    // operand, we can't fold it!
1258    if (OpInfo.CallOperandVal == OpVal &&
1259        (OpInfo.ConstraintType != TargetLowering::C_Memory ||
1260         !OpInfo.isIndirect))
1261      return false;
1262  }
1263
1264  return true;
1265}
1266
1267/// FindAllMemoryUses - Recursively walk all the uses of I until we find a
1268/// memory use.  If we find an obviously non-foldable instruction, return true.
1269/// Add the ultimately found memory instructions to MemoryUses.
1270static bool FindAllMemoryUses(Instruction *I,
1271                SmallVectorImpl<std::pair<Instruction*,unsigned> > &MemoryUses,
1272                              SmallPtrSet<Instruction*, 16> &ConsideredInsts,
1273                              const TargetLowering &TLI) {
1274  // If we already considered this instruction, we're done.
1275  if (!ConsideredInsts.insert(I))
1276    return false;
1277
1278  // If this is an obviously unfoldable instruction, bail out.
1279  if (!MightBeFoldableInst(I))
1280    return true;
1281
1282  // Loop over all the uses, recursively processing them.
1283  for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
1284       UI != E; ++UI) {
1285    User *U = *UI;
1286
1287    if (LoadInst *LI = dyn_cast<LoadInst>(U)) {
1288      MemoryUses.push_back(std::make_pair(LI, UI.getOperandNo()));
1289      continue;
1290    }
1291
1292    if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
1293      unsigned opNo = UI.getOperandNo();
1294      if (opNo == 0) return true; // Storing addr, not into addr.
1295      MemoryUses.push_back(std::make_pair(SI, opNo));
1296      continue;
1297    }
1298
1299    if (CallInst *CI = dyn_cast<CallInst>(U)) {
1300      InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue());
1301      if (!IA) return true;
1302
1303      // If this is a memory operand, we're cool, otherwise bail out.
1304      if (!IsOperandAMemoryOperand(CI, IA, I, TLI))
1305        return true;
1306      continue;
1307    }
1308
1309    if (FindAllMemoryUses(cast<Instruction>(U), MemoryUses, ConsideredInsts,
1310                          TLI))
1311      return true;
1312  }
1313
1314  return false;
1315}
1316
1317/// ValueAlreadyLiveAtInst - Retrn true if Val is already known to be live at
1318/// the use site that we're folding it into.  If so, there is no cost to
1319/// include it in the addressing mode.  KnownLive1 and KnownLive2 are two values
1320/// that we know are live at the instruction already.
1321bool AddressingModeMatcher::ValueAlreadyLiveAtInst(Value *Val,Value *KnownLive1,
1322                                                   Value *KnownLive2) {
1323  // If Val is either of the known-live values, we know it is live!
1324  if (Val == 0 || Val == KnownLive1 || Val == KnownLive2)
1325    return true;
1326
1327  // All values other than instructions and arguments (e.g. constants) are live.
1328  if (!isa<Instruction>(Val) && !isa<Argument>(Val)) return true;
1329
1330  // If Val is a constant sized alloca in the entry block, it is live, this is
1331  // true because it is just a reference to the stack/frame pointer, which is
1332  // live for the whole function.
1333  if (AllocaInst *AI = dyn_cast<AllocaInst>(Val))
1334    if (AI->isStaticAlloca())
1335      return true;
1336
1337  // Check to see if this value is already used in the memory instruction's
1338  // block.  If so, it's already live into the block at the very least, so we
1339  // can reasonably fold it.
1340  return Val->isUsedInBasicBlock(MemoryInst->getParent());
1341}
1342
1343/// IsProfitableToFoldIntoAddressingMode - It is possible for the addressing
1344/// mode of the machine to fold the specified instruction into a load or store
1345/// that ultimately uses it.  However, the specified instruction has multiple
1346/// uses.  Given this, it may actually increase register pressure to fold it
1347/// into the load.  For example, consider this code:
1348///
1349///     X = ...
1350///     Y = X+1
1351///     use(Y)   -> nonload/store
1352///     Z = Y+1
1353///     load Z
1354///
1355/// In this case, Y has multiple uses, and can be folded into the load of Z
1356/// (yielding load [X+2]).  However, doing this will cause both "X" and "X+1" to
1357/// be live at the use(Y) line.  If we don't fold Y into load Z, we use one
1358/// fewer register.  Since Y can't be folded into "use(Y)" we don't increase the
1359/// number of computations either.
1360///
1361/// Note that this (like most of CodeGenPrepare) is just a rough heuristic.  If
1362/// X was live across 'load Z' for other reasons, we actually *would* want to
1363/// fold the addressing mode in the Z case.  This would make Y die earlier.
1364bool AddressingModeMatcher::
1365IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
1366                                     ExtAddrMode &AMAfter) {
1367  if (IgnoreProfitability) return true;
1368
1369  // AMBefore is the addressing mode before this instruction was folded into it,
1370  // and AMAfter is the addressing mode after the instruction was folded.  Get
1371  // the set of registers referenced by AMAfter and subtract out those
1372  // referenced by AMBefore: this is the set of values which folding in this
1373  // address extends the lifetime of.
1374  //
1375  // Note that there are only two potential values being referenced here,
1376  // BaseReg and ScaleReg (global addresses are always available, as are any
1377  // folded immediates).
1378  Value *BaseReg = AMAfter.BaseReg, *ScaledReg = AMAfter.ScaledReg;
1379
1380  // If the BaseReg or ScaledReg was referenced by the previous addrmode, their
1381  // lifetime wasn't extended by adding this instruction.
1382  if (ValueAlreadyLiveAtInst(BaseReg, AMBefore.BaseReg, AMBefore.ScaledReg))
1383    BaseReg = 0;
1384  if (ValueAlreadyLiveAtInst(ScaledReg, AMBefore.BaseReg, AMBefore.ScaledReg))
1385    ScaledReg = 0;
1386
1387  // If folding this instruction (and it's subexprs) didn't extend any live
1388  // ranges, we're ok with it.
1389  if (BaseReg == 0 && ScaledReg == 0)
1390    return true;
1391
1392  // If all uses of this instruction are ultimately load/store/inlineasm's,
1393  // check to see if their addressing modes will include this instruction.  If
1394  // so, we can fold it into all uses, so it doesn't matter if it has multiple
1395  // uses.
1396  SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
1397  SmallPtrSet<Instruction*, 16> ConsideredInsts;
1398  if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI))
1399    return false;  // Has a non-memory, non-foldable use!
1400
1401  // Now that we know that all uses of this instruction are part of a chain of
1402  // computation involving only operations that could theoretically be folded
1403  // into a memory use, loop over each of these uses and see if they could
1404  // *actually* fold the instruction.
1405  SmallVector<Instruction*, 32> MatchedAddrModeInsts;
1406  for (unsigned i = 0, e = MemoryUses.size(); i != e; ++i) {
1407    Instruction *User = MemoryUses[i].first;
1408    unsigned OpNo = MemoryUses[i].second;
1409
1410    // Get the access type of this use.  If the use isn't a pointer, we don't
1411    // know what it accesses.
1412    Value *Address = User->getOperand(OpNo);
1413    if (!Address->getType()->isPointerTy())
1414      return false;
1415    Type *AddressAccessTy = Address->getType()->getPointerElementType();
1416
1417    // Do a match against the root of this address, ignoring profitability. This
1418    // will tell us if the addressing mode for the memory operation will
1419    // *actually* cover the shared instruction.
1420    ExtAddrMode Result;
1421    AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, AddressAccessTy,
1422                                  MemoryInst, Result);
1423    Matcher.IgnoreProfitability = true;
1424    bool Success = Matcher.MatchAddr(Address, 0);
1425    (void)Success; assert(Success && "Couldn't select *anything*?");
1426
1427    // If the match didn't cover I, then it won't be shared by it.
1428    if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(),
1429                  I) == MatchedAddrModeInsts.end())
1430      return false;
1431
1432    MatchedAddrModeInsts.clear();
1433  }
1434
1435  return true;
1436}
1437
1438} // end anonymous namespace
1439
1440/// IsNonLocalValue - Return true if the specified values are defined in a
1441/// different basic block than BB.
1442static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
1443  if (Instruction *I = dyn_cast<Instruction>(V))
1444    return I->getParent() != BB;
1445  return false;
1446}
1447
1448/// OptimizeMemoryInst - Load and Store Instructions often have
1449/// addressing modes that can do significant amounts of computation.  As such,
1450/// instruction selection will try to get the load or store to do as much
1451/// computation as possible for the program.  The problem is that isel can only
1452/// see within a single block.  As such, we sink as much legal addressing mode
1453/// stuff into the block as possible.
1454///
1455/// This method is used to optimize both load/store and inline asms with memory
1456/// operands.
1457bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
1458                                        Type *AccessTy) {
1459  Value *Repl = Addr;
1460
1461  // Try to collapse single-value PHI nodes.  This is necessary to undo
1462  // unprofitable PRE transformations.
1463  SmallVector<Value*, 8> worklist;
1464  SmallPtrSet<Value*, 16> Visited;
1465  worklist.push_back(Addr);
1466
1467  // Use a worklist to iteratively look through PHI nodes, and ensure that
1468  // the addressing mode obtained from the non-PHI roots of the graph
1469  // are equivalent.
1470  Value *Consensus = 0;
1471  unsigned NumUsesConsensus = 0;
1472  bool IsNumUsesConsensusValid = false;
1473  SmallVector<Instruction*, 16> AddrModeInsts;
1474  ExtAddrMode AddrMode;
1475  while (!worklist.empty()) {
1476    Value *V = worklist.back();
1477    worklist.pop_back();
1478
1479    // Break use-def graph loops.
1480    if (!Visited.insert(V)) {
1481      Consensus = 0;
1482      break;
1483    }
1484
1485    // For a PHI node, push all of its incoming values.
1486    if (PHINode *P = dyn_cast<PHINode>(V)) {
1487      for (unsigned i = 0, e = P->getNumIncomingValues(); i != e; ++i)
1488        worklist.push_back(P->getIncomingValue(i));
1489      continue;
1490    }
1491
1492    // For non-PHIs, determine the addressing mode being computed.
1493    SmallVector<Instruction*, 16> NewAddrModeInsts;
1494    ExtAddrMode NewAddrMode =
1495      AddressingModeMatcher::Match(V, AccessTy, MemoryInst,
1496                                   NewAddrModeInsts, *TLI);
1497
1498    // This check is broken into two cases with very similar code to avoid using
1499    // getNumUses() as much as possible. Some values have a lot of uses, so
1500    // calling getNumUses() unconditionally caused a significant compile-time
1501    // regression.
1502    if (!Consensus) {
1503      Consensus = V;
1504      AddrMode = NewAddrMode;
1505      AddrModeInsts = NewAddrModeInsts;
1506      continue;
1507    } else if (NewAddrMode == AddrMode) {
1508      if (!IsNumUsesConsensusValid) {
1509        NumUsesConsensus = Consensus->getNumUses();
1510        IsNumUsesConsensusValid = true;
1511      }
1512
1513      // Ensure that the obtained addressing mode is equivalent to that obtained
1514      // for all other roots of the PHI traversal.  Also, when choosing one
1515      // such root as representative, select the one with the most uses in order
1516      // to keep the cost modeling heuristics in AddressingModeMatcher
1517      // applicable.
1518      unsigned NumUses = V->getNumUses();
1519      if (NumUses > NumUsesConsensus) {
1520        Consensus = V;
1521        NumUsesConsensus = NumUses;
1522        AddrModeInsts = NewAddrModeInsts;
1523      }
1524      continue;
1525    }
1526
1527    Consensus = 0;
1528    break;
1529  }
1530
1531  // If the addressing mode couldn't be determined, or if multiple different
1532  // ones were determined, bail out now.
1533  if (!Consensus) return false;
1534
1535  // Check to see if any of the instructions supersumed by this addr mode are
1536  // non-local to I's BB.
1537  bool AnyNonLocal = false;
1538  for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) {
1539    if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) {
1540      AnyNonLocal = true;
1541      break;
1542    }
1543  }
1544
1545  // If all the instructions matched are already in this BB, don't do anything.
1546  if (!AnyNonLocal) {
1547    DEBUG(dbgs() << "CGP: Found      local addrmode: " << AddrMode << "\n");
1548    return false;
1549  }
1550
1551  // Insert this computation right after this user.  Since our caller is
1552  // scanning from the top of the BB to the bottom, reuse of the expr are
1553  // guaranteed to happen later.
1554  IRBuilder<> Builder(MemoryInst);
1555
1556  // Now that we determined the addressing expression we want to use and know
1557  // that we have to sink it into this block.  Check to see if we have already
1558  // done this for some other load/store instr in this block.  If so, reuse the
1559  // computation.
1560  Value *&SunkAddr = SunkAddrs[Addr];
1561  if (SunkAddr) {
1562    DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
1563                 << *MemoryInst);
1564    if (SunkAddr->getType() != Addr->getType())
1565      SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
1566  } else {
1567    DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
1568                 << *MemoryInst);
1569    Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(Addr->getType());
1570    Value *Result = 0;
1571
1572    // Start with the base register. Do this first so that subsequent address
1573    // matching finds it last, which will prevent it from trying to match it
1574    // as the scaled value in case it happens to be a mul. That would be
1575    // problematic if we've sunk a different mul for the scale, because then
1576    // we'd end up sinking both muls.
1577    if (AddrMode.BaseReg) {
1578      Value *V = AddrMode.BaseReg;
1579      if (V->getType()->isPointerTy())
1580        V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
1581      if (V->getType() != IntPtrTy)
1582        V = Builder.CreateIntCast(V, IntPtrTy, /*isSigned=*/true, "sunkaddr");
1583      Result = V;
1584    }
1585
1586    // Add the scale value.
1587    if (AddrMode.Scale) {
1588      Value *V = AddrMode.ScaledReg;
1589      if (V->getType() == IntPtrTy) {
1590        // done.
1591      } else if (V->getType()->isPointerTy()) {
1592        V = Builder.CreatePtrToInt(V, IntPtrTy, "sunkaddr");
1593      } else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
1594                 cast<IntegerType>(V->getType())->getBitWidth()) {
1595        V = Builder.CreateTrunc(V, IntPtrTy, "sunkaddr");
1596      } else {
1597        V = Builder.CreateSExt(V, IntPtrTy, "sunkaddr");
1598      }
1599      if (AddrMode.Scale != 1)
1600        V = Builder.CreateMul(V, ConstantInt::get(IntPtrTy, AddrMode.Scale),
1601                              "sunkaddr");
1602      if (Result)
1603        Result = Builder.CreateAdd(Result, V, "sunkaddr");
1604      else
1605        Result = V;
1606    }
1607
1608    // Add in the BaseGV if present.
1609    if (AddrMode.BaseGV) {
1610      Value *V = Builder.CreatePtrToInt(AddrMode.BaseGV, IntPtrTy, "sunkaddr");
1611      if (Result)
1612        Result = Builder.CreateAdd(Result, V, "sunkaddr");
1613      else
1614        Result = V;
1615    }
1616
1617    // Add in the Base Offset if present.
1618    if (AddrMode.BaseOffs) {
1619      Value *V = ConstantInt::get(IntPtrTy, AddrMode.BaseOffs);
1620      if (Result)
1621        Result = Builder.CreateAdd(Result, V, "sunkaddr");
1622      else
1623        Result = V;
1624    }
1625
1626    if (Result == 0)
1627      SunkAddr = Constant::getNullValue(Addr->getType());
1628    else
1629      SunkAddr = Builder.CreateIntToPtr(Result, Addr->getType(), "sunkaddr");
1630  }
1631
1632  MemoryInst->replaceUsesOfWith(Repl, SunkAddr);
1633
1634  // If we have no uses, recursively delete the value and all dead instructions
1635  // using it.
1636  if (Repl->use_empty()) {
1637    // This can cause recursive deletion, which can invalidate our iterator.
1638    // Use a WeakVH to hold onto it in case this happens.
1639    WeakVH IterHandle(CurInstIterator);
1640    BasicBlock *BB = CurInstIterator->getParent();
1641
1642    RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo);
1643
1644    if (IterHandle != CurInstIterator) {
1645      // If the iterator instruction was recursively deleted, start over at the
1646      // start of the block.
1647      CurInstIterator = BB->begin();
1648      SunkAddrs.clear();
1649    }
1650  }
1651  ++NumMemoryInsts;
1652  return true;
1653}
1654
1655/// OptimizeInlineAsmInst - If there are any memory operands, use
1656/// OptimizeMemoryInst to sink their address computing into the block when
1657/// possible / profitable.
1658bool CodeGenPrepare::OptimizeInlineAsmInst(CallInst *CS) {
1659  bool MadeChange = false;
1660
1661  TargetLowering::AsmOperandInfoVector
1662    TargetConstraints = TLI->ParseConstraints(CS);
1663  unsigned ArgNo = 0;
1664  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
1665    TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
1666
1667    // Compute the constraint code and ConstraintType to use.
1668    TLI->ComputeConstraintToUse(OpInfo, SDValue());
1669
1670    if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
1671        OpInfo.isIndirect) {
1672      Value *OpVal = CS->getArgOperand(ArgNo++);
1673      MadeChange |= OptimizeMemoryInst(CS, OpVal, OpVal->getType());
1674    } else if (OpInfo.Type == InlineAsm::isInput)
1675      ArgNo++;
1676  }
1677
1678  return MadeChange;
1679}
1680
1681/// MoveExtToFormExtLoad - Move a zext or sext fed by a load into the same
1682/// basic block as the load, unless conditions are unfavorable. This allows
1683/// SelectionDAG to fold the extend into the load.
1684///
1685bool CodeGenPrepare::MoveExtToFormExtLoad(Instruction *I) {
1686  // Look for a load being extended.
1687  LoadInst *LI = dyn_cast<LoadInst>(I->getOperand(0));
1688  if (!LI) return false;
1689
1690  // If they're already in the same block, there's nothing to do.
1691  if (LI->getParent() == I->getParent())
1692    return false;
1693
1694  // If the load has other users and the truncate is not free, this probably
1695  // isn't worthwhile.
1696  if (!LI->hasOneUse() &&
1697      TLI && (TLI->isTypeLegal(TLI->getValueType(LI->getType())) ||
1698              !TLI->isTypeLegal(TLI->getValueType(I->getType()))) &&
1699      !TLI->isTruncateFree(I->getType(), LI->getType()))
1700    return false;
1701
1702  // Check whether the target supports casts folded into loads.
1703  unsigned LType;
1704  if (isa<ZExtInst>(I))
1705    LType = ISD::ZEXTLOAD;
1706  else {
1707    assert(isa<SExtInst>(I) && "Unexpected ext type!");
1708    LType = ISD::SEXTLOAD;
1709  }
1710  if (TLI && !TLI->isLoadExtLegal(LType, TLI->getValueType(LI->getType())))
1711    return false;
1712
1713  // Move the extend into the same block as the load, so that SelectionDAG
1714  // can fold it.
1715  I->removeFromParent();
1716  I->insertAfter(LI);
1717  ++NumExtsMoved;
1718  return true;
1719}
1720
1721bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
1722  BasicBlock *DefBB = I->getParent();
1723
1724  // If the result of a {s|z}ext and its source are both live out, rewrite all
1725  // other uses of the source with result of extension.
1726  Value *Src = I->getOperand(0);
1727  if (Src->hasOneUse())
1728    return false;
1729
1730  // Only do this xform if truncating is free.
1731  if (TLI && !TLI->isTruncateFree(I->getType(), Src->getType()))
1732    return false;
1733
1734  // Only safe to perform the optimization if the source is also defined in
1735  // this block.
1736  if (!isa<Instruction>(Src) || DefBB != cast<Instruction>(Src)->getParent())
1737    return false;
1738
1739  bool DefIsLiveOut = false;
1740  for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
1741       UI != E; ++UI) {
1742    Instruction *User = cast<Instruction>(*UI);
1743
1744    // Figure out which BB this ext is used in.
1745    BasicBlock *UserBB = User->getParent();
1746    if (UserBB == DefBB) continue;
1747    DefIsLiveOut = true;
1748    break;
1749  }
1750  if (!DefIsLiveOut)
1751    return false;
1752
1753  // Make sure none of the uses are PHI nodes.
1754  for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
1755       UI != E; ++UI) {
1756    Instruction *User = cast<Instruction>(*UI);
1757    BasicBlock *UserBB = User->getParent();
1758    if (UserBB == DefBB) continue;
1759    // Be conservative. We don't want this xform to end up introducing
1760    // reloads just before load / store instructions.
1761    if (isa<PHINode>(User) || isa<LoadInst>(User) || isa<StoreInst>(User))
1762      return false;
1763  }
1764
1765  // InsertedTruncs - Only insert one trunc in each block once.
1766  DenseMap<BasicBlock*, Instruction*> InsertedTruncs;
1767
1768  bool MadeChange = false;
1769  for (Value::use_iterator UI = Src->use_begin(), E = Src->use_end();
1770       UI != E; ++UI) {
1771    Use &TheUse = UI.getUse();
1772    Instruction *User = cast<Instruction>(*UI);
1773
1774    // Figure out which BB this ext is used in.
1775    BasicBlock *UserBB = User->getParent();
1776    if (UserBB == DefBB) continue;
1777
1778    // Both src and def are live in this block. Rewrite the use.
1779    Instruction *&InsertedTrunc = InsertedTruncs[UserBB];
1780
1781    if (!InsertedTrunc) {
1782      BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
1783      InsertedTrunc = new TruncInst(I, Src->getType(), "", InsertPt);
1784    }
1785
1786    // Replace a use of the {s|z}ext source with a use of the result.
1787    TheUse = InsertedTrunc;
1788    ++NumExtUses;
1789    MadeChange = true;
1790  }
1791
1792  return MadeChange;
1793}
1794
1795/// isFormingBranchFromSelectProfitable - Returns true if a SelectInst should be
1796/// turned into an explicit branch.
1797static bool isFormingBranchFromSelectProfitable(SelectInst *SI) {
1798  // FIXME: This should use the same heuristics as IfConversion to determine
1799  // whether a select is better represented as a branch.  This requires that
1800  // branch probability metadata is preserved for the select, which is not the
1801  // case currently.
1802
1803  CmpInst *Cmp = dyn_cast<CmpInst>(SI->getCondition());
1804
1805  // If the branch is predicted right, an out of order CPU can avoid blocking on
1806  // the compare.  Emit cmovs on compares with a memory operand as branches to
1807  // avoid stalls on the load from memory.  If the compare has more than one use
1808  // there's probably another cmov or setcc around so it's not worth emitting a
1809  // branch.
1810  if (!Cmp)
1811    return false;
1812
1813  Value *CmpOp0 = Cmp->getOperand(0);
1814  Value *CmpOp1 = Cmp->getOperand(1);
1815
1816  // We check that the memory operand has one use to avoid uses of the loaded
1817  // value directly after the compare, making branches unprofitable.
1818  return Cmp->hasOneUse() &&
1819         ((isa<LoadInst>(CmpOp0) && CmpOp0->hasOneUse()) ||
1820          (isa<LoadInst>(CmpOp1) && CmpOp1->hasOneUse()));
1821}
1822
1823
1824/// If we have a SelectInst that will likely profit from branch prediction,
1825/// turn it into a branch.
1826bool CodeGenPrepare::OptimizeSelectInst(SelectInst *SI) {
1827  bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
1828
1829  // Can we convert the 'select' to CF ?
1830  if (DisableSelectToBranch || OptSize || !TLI || VectorCond)
1831    return false;
1832
1833  TargetLowering::SelectSupportKind SelectKind;
1834  if (VectorCond)
1835    SelectKind = TargetLowering::VectorMaskSelect;
1836  else if (SI->getType()->isVectorTy())
1837    SelectKind = TargetLowering::ScalarCondVectorVal;
1838  else
1839    SelectKind = TargetLowering::ScalarValSelect;
1840
1841  // Do we have efficient codegen support for this kind of 'selects' ?
1842  if (TLI->isSelectSupported(SelectKind)) {
1843    // We have efficient codegen support for the select instruction.
1844    // Check if it is profitable to keep this 'select'.
1845    if (!TLI->isPredictableSelectExpensive() ||
1846        !isFormingBranchFromSelectProfitable(SI))
1847      return false;
1848  }
1849
1850  ModifiedDT = true;
1851
1852  // First, we split the block containing the select into 2 blocks.
1853  BasicBlock *StartBlock = SI->getParent();
1854  BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI));
1855  BasicBlock *NextBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
1856
1857  // Create a new block serving as the landing pad for the branch.
1858  BasicBlock *SmallBlock = BasicBlock::Create(SI->getContext(), "select.mid",
1859                                             NextBlock->getParent(), NextBlock);
1860
1861  // Move the unconditional branch from the block with the select in it into our
1862  // landing pad block.
1863  StartBlock->getTerminator()->eraseFromParent();
1864  BranchInst::Create(NextBlock, SmallBlock);
1865
1866  // Insert the real conditional branch based on the original condition.
1867  BranchInst::Create(NextBlock, SmallBlock, SI->getCondition(), SI);
1868
1869  // The select itself is replaced with a PHI Node.
1870  PHINode *PN = PHINode::Create(SI->getType(), 2, "", NextBlock->begin());
1871  PN->takeName(SI);
1872  PN->addIncoming(SI->getTrueValue(), StartBlock);
1873  PN->addIncoming(SI->getFalseValue(), SmallBlock);
1874  SI->replaceAllUsesWith(PN);
1875  SI->eraseFromParent();
1876
1877  // Instruct OptimizeBlock to skip to the next block.
1878  CurInstIterator = StartBlock->end();
1879  ++NumSelectsExpanded;
1880  return true;
1881}
1882
1883bool CodeGenPrepare::OptimizeInst(Instruction *I) {
1884  if (PHINode *P = dyn_cast<PHINode>(I)) {
1885    // It is possible for very late stage optimizations (such as SimplifyCFG)
1886    // to introduce PHI nodes too late to be cleaned up.  If we detect such a
1887    // trivial PHI, go ahead and zap it here.
1888    if (Value *V = SimplifyInstruction(P, TLI ? TLI->getDataLayout() : 0,
1889                                       TLInfo, DT)) {
1890      P->replaceAllUsesWith(V);
1891      P->eraseFromParent();
1892      ++NumPHIsElim;
1893      return true;
1894    }
1895    return false;
1896  }
1897
1898  if (CastInst *CI = dyn_cast<CastInst>(I)) {
1899    // If the source of the cast is a constant, then this should have
1900    // already been constant folded.  The only reason NOT to constant fold
1901    // it is if something (e.g. LSR) was careful to place the constant
1902    // evaluation in a block other than then one that uses it (e.g. to hoist
1903    // the address of globals out of a loop).  If this is the case, we don't
1904    // want to forward-subst the cast.
1905    if (isa<Constant>(CI->getOperand(0)))
1906      return false;
1907
1908    if (TLI && OptimizeNoopCopyExpression(CI, *TLI))
1909      return true;
1910
1911    if (isa<ZExtInst>(I) || isa<SExtInst>(I)) {
1912      bool MadeChange = MoveExtToFormExtLoad(I);
1913      return MadeChange | OptimizeExtUses(I);
1914    }
1915    return false;
1916  }
1917
1918  if (CmpInst *CI = dyn_cast<CmpInst>(I))
1919    return OptimizeCmpExpression(CI);
1920
1921  if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
1922    if (TLI)
1923      return OptimizeMemoryInst(I, I->getOperand(0), LI->getType());
1924    return false;
1925  }
1926
1927  if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
1928    if (TLI)
1929      return OptimizeMemoryInst(I, SI->getOperand(1),
1930                                SI->getOperand(0)->getType());
1931    return false;
1932  }
1933
1934  if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
1935    if (GEPI->hasAllZeroIndices()) {
1936      /// The GEP operand must be a pointer, so must its result -> BitCast
1937      Instruction *NC = new BitCastInst(GEPI->getOperand(0), GEPI->getType(),
1938                                        GEPI->getName(), GEPI);
1939      GEPI->replaceAllUsesWith(NC);
1940      GEPI->eraseFromParent();
1941      ++NumGEPsElim;
1942      OptimizeInst(NC);
1943      return true;
1944    }
1945    return false;
1946  }
1947
1948  if (CallInst *CI = dyn_cast<CallInst>(I))
1949    return OptimizeCallInst(CI);
1950
1951  if (SelectInst *SI = dyn_cast<SelectInst>(I))
1952    return OptimizeSelectInst(SI);
1953
1954  return false;
1955}
1956
1957// In this pass we look for GEP and cast instructions that are used
1958// across basic blocks and rewrite them to improve basic-block-at-a-time
1959// selection.
1960bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
1961  SunkAddrs.clear();
1962  bool MadeChange = false;
1963
1964  CurInstIterator = BB.begin();
1965  while (CurInstIterator != BB.end())
1966    MadeChange |= OptimizeInst(CurInstIterator++);
1967
1968  MadeChange |= DupRetToEnableTailCallOpts(&BB);
1969
1970  return MadeChange;
1971}
1972
1973// llvm.dbg.value is far away from the value then iSel may not be able
1974// handle it properly. iSel will drop llvm.dbg.value if it can not
1975// find a node corresponding to the value.
1976bool CodeGenPrepare::PlaceDbgValues(Function &F) {
1977  bool MadeChange = false;
1978  for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) {
1979    Instruction *PrevNonDbgInst = NULL;
1980    for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) {
1981      Instruction *Insn = BI; ++BI;
1982      DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn);
1983      if (!DVI) {
1984        PrevNonDbgInst = Insn;
1985        continue;
1986      }
1987
1988      Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue());
1989      if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) {
1990        DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI);
1991        DVI->removeFromParent();
1992        if (isa<PHINode>(VI))
1993          DVI->insertBefore(VI->getParent()->getFirstInsertionPt());
1994        else
1995          DVI->insertAfter(VI);
1996        MadeChange = true;
1997        ++NumDbgValueMoved;
1998      }
1999    }
2000  }
2001  return MadeChange;
2002}
2003