X86FastISel.cpp revision 223017
1//===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the X86-specific support for the FastISel class. Much
11// of the target-specific code is generated by tablegen in the file
12// X86GenFastISel.inc, which is #included here.
13//
14//===----------------------------------------------------------------------===//
15
16#include "X86.h"
17#include "X86InstrBuilder.h"
18#include "X86RegisterInfo.h"
19#include "X86Subtarget.h"
20#include "X86TargetMachine.h"
21#include "llvm/CallingConv.h"
22#include "llvm/DerivedTypes.h"
23#include "llvm/GlobalVariable.h"
24#include "llvm/Instructions.h"
25#include "llvm/IntrinsicInst.h"
26#include "llvm/Operator.h"
27#include "llvm/CodeGen/Analysis.h"
28#include "llvm/CodeGen/FastISel.h"
29#include "llvm/CodeGen/FunctionLoweringInfo.h"
30#include "llvm/CodeGen/MachineConstantPool.h"
31#include "llvm/CodeGen/MachineFrameInfo.h"
32#include "llvm/CodeGen/MachineRegisterInfo.h"
33#include "llvm/Support/CallSite.h"
34#include "llvm/Support/ErrorHandling.h"
35#include "llvm/Support/GetElementPtrTypeIterator.h"
36#include "llvm/Target/TargetOptions.h"
37using namespace llvm;
38
39namespace {
40
41class X86FastISel : public FastISel {
42  /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
43  /// make the right decision when generating code for different targets.
44  const X86Subtarget *Subtarget;
45
46  /// StackPtr - Register used as the stack pointer.
47  ///
48  unsigned StackPtr;
49
50  /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
51  /// floating point ops.
52  /// When SSE is available, use it for f32 operations.
53  /// When SSE2 is available, use it for f64 operations.
54  bool X86ScalarSSEf64;
55  bool X86ScalarSSEf32;
56
57public:
58  explicit X86FastISel(FunctionLoweringInfo &funcInfo) : FastISel(funcInfo) {
59    Subtarget = &TM.getSubtarget<X86Subtarget>();
60    StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
61    X86ScalarSSEf64 = Subtarget->hasSSE2();
62    X86ScalarSSEf32 = Subtarget->hasSSE1();
63  }
64
65  virtual bool TargetSelectInstruction(const Instruction *I);
66
67  /// TryToFoldLoad - The specified machine instr operand is a vreg, and that
68  /// vreg is being provided by the specified load instruction.  If possible,
69  /// try to fold the load as an operand to the instruction, returning true if
70  /// possible.
71  virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
72                             const LoadInst *LI);
73
74#include "X86GenFastISel.inc"
75
76private:
77  bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT);
78
79  bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR);
80
81  bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM);
82  bool X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM);
83
84  bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
85                         unsigned &ResultReg);
86
87  bool X86SelectAddress(const Value *V, X86AddressMode &AM);
88  bool X86SelectCallAddress(const Value *V, X86AddressMode &AM);
89
90  bool X86SelectLoad(const Instruction *I);
91
92  bool X86SelectStore(const Instruction *I);
93
94  bool X86SelectRet(const Instruction *I);
95
96  bool X86SelectCmp(const Instruction *I);
97
98  bool X86SelectZExt(const Instruction *I);
99
100  bool X86SelectBranch(const Instruction *I);
101
102  bool X86SelectShift(const Instruction *I);
103
104  bool X86SelectSelect(const Instruction *I);
105
106  bool X86SelectTrunc(const Instruction *I);
107
108  bool X86SelectFPExt(const Instruction *I);
109  bool X86SelectFPTrunc(const Instruction *I);
110
111  bool X86VisitIntrinsicCall(const IntrinsicInst &I);
112  bool X86SelectCall(const Instruction *I);
113
114  bool DoSelectCall(const Instruction *I, const char *MemIntName);
115
116  const X86InstrInfo *getInstrInfo() const {
117    return getTargetMachine()->getInstrInfo();
118  }
119  const X86TargetMachine *getTargetMachine() const {
120    return static_cast<const X86TargetMachine *>(&TM);
121  }
122
123  unsigned TargetMaterializeConstant(const Constant *C);
124
125  unsigned TargetMaterializeAlloca(const AllocaInst *C);
126
127  unsigned TargetMaterializeFloatZero(const ConstantFP *CF);
128
129  /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
130  /// computed in an SSE register, not on the X87 floating point stack.
131  bool isScalarFPTypeInSSEReg(EVT VT) const {
132    return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
133      (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
134  }
135
136  bool isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1 = false);
137
138  bool IsMemcpySmall(uint64_t Len);
139
140  bool TryEmitSmallMemcpy(X86AddressMode DestAM,
141                          X86AddressMode SrcAM, uint64_t Len);
142};
143
144} // end anonymous namespace.
145
146bool X86FastISel::isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1) {
147  EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true);
148  if (evt == MVT::Other || !evt.isSimple())
149    // Unhandled type. Halt "fast" selection and bail.
150    return false;
151
152  VT = evt.getSimpleVT();
153  // For now, require SSE/SSE2 for performing floating-point operations,
154  // since x87 requires additional work.
155  if (VT == MVT::f64 && !X86ScalarSSEf64)
156     return false;
157  if (VT == MVT::f32 && !X86ScalarSSEf32)
158     return false;
159  // Similarly, no f80 support yet.
160  if (VT == MVT::f80)
161    return false;
162  // We only handle legal types. For example, on x86-32 the instruction
163  // selector contains all of the 64-bit instructions from x86-64,
164  // under the assumption that i64 won't be used if the target doesn't
165  // support it.
166  return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
167}
168
169#include "X86GenCallingConv.inc"
170
171/// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
172/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
173/// Return true and the result register by reference if it is possible.
174bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
175                                  unsigned &ResultReg) {
176  // Get opcode and regclass of the output for the given load instruction.
177  unsigned Opc = 0;
178  const TargetRegisterClass *RC = NULL;
179  switch (VT.getSimpleVT().SimpleTy) {
180  default: return false;
181  case MVT::i1:
182  case MVT::i8:
183    Opc = X86::MOV8rm;
184    RC  = X86::GR8RegisterClass;
185    break;
186  case MVT::i16:
187    Opc = X86::MOV16rm;
188    RC  = X86::GR16RegisterClass;
189    break;
190  case MVT::i32:
191    Opc = X86::MOV32rm;
192    RC  = X86::GR32RegisterClass;
193    break;
194  case MVT::i64:
195    // Must be in x86-64 mode.
196    Opc = X86::MOV64rm;
197    RC  = X86::GR64RegisterClass;
198    break;
199  case MVT::f32:
200    if (Subtarget->hasSSE1()) {
201      Opc = X86::MOVSSrm;
202      RC  = X86::FR32RegisterClass;
203    } else {
204      Opc = X86::LD_Fp32m;
205      RC  = X86::RFP32RegisterClass;
206    }
207    break;
208  case MVT::f64:
209    if (Subtarget->hasSSE2()) {
210      Opc = X86::MOVSDrm;
211      RC  = X86::FR64RegisterClass;
212    } else {
213      Opc = X86::LD_Fp64m;
214      RC  = X86::RFP64RegisterClass;
215    }
216    break;
217  case MVT::f80:
218    // No f80 support yet.
219    return false;
220  }
221
222  ResultReg = createResultReg(RC);
223  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
224                         DL, TII.get(Opc), ResultReg), AM);
225  return true;
226}
227
228/// X86FastEmitStore - Emit a machine instruction to store a value Val of
229/// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
230/// and a displacement offset, or a GlobalAddress,
231/// i.e. V. Return true if it is possible.
232bool
233X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) {
234  // Get opcode and regclass of the output for the given store instruction.
235  unsigned Opc = 0;
236  switch (VT.getSimpleVT().SimpleTy) {
237  case MVT::f80: // No f80 support yet.
238  default: return false;
239  case MVT::i1: {
240    // Mask out all but lowest bit.
241    unsigned AndResult = createResultReg(X86::GR8RegisterClass);
242    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
243            TII.get(X86::AND8ri), AndResult).addReg(Val).addImm(1);
244    Val = AndResult;
245  }
246  // FALLTHROUGH, handling i1 as i8.
247  case MVT::i8:  Opc = X86::MOV8mr;  break;
248  case MVT::i16: Opc = X86::MOV16mr; break;
249  case MVT::i32: Opc = X86::MOV32mr; break;
250  case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode.
251  case MVT::f32:
252    Opc = Subtarget->hasSSE1() ? X86::MOVSSmr : X86::ST_Fp32m;
253    break;
254  case MVT::f64:
255    Opc = Subtarget->hasSSE2() ? X86::MOVSDmr : X86::ST_Fp64m;
256    break;
257  }
258
259  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
260                         DL, TII.get(Opc)), AM).addReg(Val);
261  return true;
262}
263
264bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
265                                   const X86AddressMode &AM) {
266  // Handle 'null' like i32/i64 0.
267  if (isa<ConstantPointerNull>(Val))
268    Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext()));
269
270  // If this is a store of a simple constant, fold the constant into the store.
271  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
272    unsigned Opc = 0;
273    bool Signed = true;
274    switch (VT.getSimpleVT().SimpleTy) {
275    default: break;
276    case MVT::i1:  Signed = false;     // FALLTHROUGH to handle as i8.
277    case MVT::i8:  Opc = X86::MOV8mi;  break;
278    case MVT::i16: Opc = X86::MOV16mi; break;
279    case MVT::i32: Opc = X86::MOV32mi; break;
280    case MVT::i64:
281      // Must be a 32-bit sign extended value.
282      if ((int)CI->getSExtValue() == CI->getSExtValue())
283        Opc = X86::MOV64mi32;
284      break;
285    }
286
287    if (Opc) {
288      addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
289                             DL, TII.get(Opc)), AM)
290                             .addImm(Signed ? (uint64_t) CI->getSExtValue() :
291                                              CI->getZExtValue());
292      return true;
293    }
294  }
295
296  unsigned ValReg = getRegForValue(Val);
297  if (ValReg == 0)
298    return false;
299
300  return X86FastEmitStore(VT, ValReg, AM);
301}
302
303/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
304/// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
305/// ISD::SIGN_EXTEND).
306bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
307                                    unsigned Src, EVT SrcVT,
308                                    unsigned &ResultReg) {
309  unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
310                           Src, /*TODO: Kill=*/false);
311
312  if (RR != 0) {
313    ResultReg = RR;
314    return true;
315  } else
316    return false;
317}
318
319/// X86SelectAddress - Attempt to fill in an address from the given value.
320///
321bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
322  const User *U = NULL;
323  unsigned Opcode = Instruction::UserOp1;
324  if (const Instruction *I = dyn_cast<Instruction>(V)) {
325    // Don't walk into other basic blocks; it's possible we haven't
326    // visited them yet, so the instructions may not yet be assigned
327    // virtual registers.
328    if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) ||
329        FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
330      Opcode = I->getOpcode();
331      U = I;
332    }
333  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
334    Opcode = C->getOpcode();
335    U = C;
336  }
337
338  if (const PointerType *Ty = dyn_cast<PointerType>(V->getType()))
339    if (Ty->getAddressSpace() > 255)
340      // Fast instruction selection doesn't support the special
341      // address spaces.
342      return false;
343
344  switch (Opcode) {
345  default: break;
346  case Instruction::BitCast:
347    // Look past bitcasts.
348    return X86SelectAddress(U->getOperand(0), AM);
349
350  case Instruction::IntToPtr:
351    // Look past no-op inttoptrs.
352    if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
353      return X86SelectAddress(U->getOperand(0), AM);
354    break;
355
356  case Instruction::PtrToInt:
357    // Look past no-op ptrtoints.
358    if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
359      return X86SelectAddress(U->getOperand(0), AM);
360    break;
361
362  case Instruction::Alloca: {
363    // Do static allocas.
364    const AllocaInst *A = cast<AllocaInst>(V);
365    DenseMap<const AllocaInst*, int>::iterator SI =
366      FuncInfo.StaticAllocaMap.find(A);
367    if (SI != FuncInfo.StaticAllocaMap.end()) {
368      AM.BaseType = X86AddressMode::FrameIndexBase;
369      AM.Base.FrameIndex = SI->second;
370      return true;
371    }
372    break;
373  }
374
375  case Instruction::Add: {
376    // Adds of constants are common and easy enough.
377    if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
378      uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
379      // They have to fit in the 32-bit signed displacement field though.
380      if (isInt<32>(Disp)) {
381        AM.Disp = (uint32_t)Disp;
382        return X86SelectAddress(U->getOperand(0), AM);
383      }
384    }
385    break;
386  }
387
388  case Instruction::GetElementPtr: {
389    X86AddressMode SavedAM = AM;
390
391    // Pattern-match simple GEPs.
392    uint64_t Disp = (int32_t)AM.Disp;
393    unsigned IndexReg = AM.IndexReg;
394    unsigned Scale = AM.Scale;
395    gep_type_iterator GTI = gep_type_begin(U);
396    // Iterate through the indices, folding what we can. Constants can be
397    // folded, and one dynamic index can be handled, if the scale is supported.
398    for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
399         i != e; ++i, ++GTI) {
400      const Value *Op = *i;
401      if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
402        const StructLayout *SL = TD.getStructLayout(STy);
403        Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
404        continue;
405      }
406
407      // A array/variable index is always of the form i*S where S is the
408      // constant scale size.  See if we can push the scale into immediates.
409      uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
410      for (;;) {
411        if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
412          // Constant-offset addressing.
413          Disp += CI->getSExtValue() * S;
414          break;
415        }
416        if (isa<AddOperator>(Op) &&
417            (!isa<Instruction>(Op) ||
418             FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()]
419               == FuncInfo.MBB) &&
420            isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
421          // An add (in the same block) with a constant operand. Fold the
422          // constant.
423          ConstantInt *CI =
424            cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
425          Disp += CI->getSExtValue() * S;
426          // Iterate on the other operand.
427          Op = cast<AddOperator>(Op)->getOperand(0);
428          continue;
429        }
430        if (IndexReg == 0 &&
431            (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
432            (S == 1 || S == 2 || S == 4 || S == 8)) {
433          // Scaled-index addressing.
434          Scale = S;
435          IndexReg = getRegForGEPIndex(Op).first;
436          if (IndexReg == 0)
437            return false;
438          break;
439        }
440        // Unsupported.
441        goto unsupported_gep;
442      }
443    }
444    // Check for displacement overflow.
445    if (!isInt<32>(Disp))
446      break;
447    // Ok, the GEP indices were covered by constant-offset and scaled-index
448    // addressing. Update the address state and move on to examining the base.
449    AM.IndexReg = IndexReg;
450    AM.Scale = Scale;
451    AM.Disp = (uint32_t)Disp;
452    if (X86SelectAddress(U->getOperand(0), AM))
453      return true;
454
455    // If we couldn't merge the gep value into this addr mode, revert back to
456    // our address and just match the value instead of completely failing.
457    AM = SavedAM;
458    break;
459  unsupported_gep:
460    // Ok, the GEP indices weren't all covered.
461    break;
462  }
463  }
464
465  // Handle constant address.
466  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
467    // Can't handle alternate code models or TLS yet.
468    if (TM.getCodeModel() != CodeModel::Small)
469      return false;
470
471    if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
472      if (GVar->isThreadLocal())
473        return false;
474
475    // RIP-relative addresses can't have additional register operands, so if
476    // we've already folded stuff into the addressing mode, just force the
477    // global value into its own register, which we can use as the basereg.
478    if (!Subtarget->isPICStyleRIPRel() ||
479        (AM.Base.Reg == 0 && AM.IndexReg == 0)) {
480      // Okay, we've committed to selecting this global. Set up the address.
481      AM.GV = GV;
482
483      // Allow the subtarget to classify the global.
484      unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM);
485
486      // If this reference is relative to the pic base, set it now.
487      if (isGlobalRelativeToPICBase(GVFlags)) {
488        // FIXME: How do we know Base.Reg is free??
489        AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
490      }
491
492      // Unless the ABI requires an extra load, return a direct reference to
493      // the global.
494      if (!isGlobalStubReference(GVFlags)) {
495        if (Subtarget->isPICStyleRIPRel()) {
496          // Use rip-relative addressing if we can.  Above we verified that the
497          // base and index registers are unused.
498          assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
499          AM.Base.Reg = X86::RIP;
500        }
501        AM.GVOpFlags = GVFlags;
502        return true;
503      }
504
505      // Ok, we need to do a load from a stub.  If we've already loaded from
506      // this stub, reuse the loaded pointer, otherwise emit the load now.
507      DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V);
508      unsigned LoadReg;
509      if (I != LocalValueMap.end() && I->second != 0) {
510        LoadReg = I->second;
511      } else {
512        // Issue load from stub.
513        unsigned Opc = 0;
514        const TargetRegisterClass *RC = NULL;
515        X86AddressMode StubAM;
516        StubAM.Base.Reg = AM.Base.Reg;
517        StubAM.GV = GV;
518        StubAM.GVOpFlags = GVFlags;
519
520        // Prepare for inserting code in the local-value area.
521        SavePoint SaveInsertPt = enterLocalValueArea();
522
523        if (TLI.getPointerTy() == MVT::i64) {
524          Opc = X86::MOV64rm;
525          RC  = X86::GR64RegisterClass;
526
527          if (Subtarget->isPICStyleRIPRel())
528            StubAM.Base.Reg = X86::RIP;
529        } else {
530          Opc = X86::MOV32rm;
531          RC  = X86::GR32RegisterClass;
532        }
533
534        LoadReg = createResultReg(RC);
535        MachineInstrBuilder LoadMI =
536          BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg);
537        addFullAddress(LoadMI, StubAM);
538
539        // Ok, back to normal mode.
540        leaveLocalValueArea(SaveInsertPt);
541
542        // Prevent loading GV stub multiple times in same MBB.
543        LocalValueMap[V] = LoadReg;
544      }
545
546      // Now construct the final address. Note that the Disp, Scale,
547      // and Index values may already be set here.
548      AM.Base.Reg = LoadReg;
549      AM.GV = 0;
550      return true;
551    }
552  }
553
554  // If all else fails, try to materialize the value in a register.
555  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
556    if (AM.Base.Reg == 0) {
557      AM.Base.Reg = getRegForValue(V);
558      return AM.Base.Reg != 0;
559    }
560    if (AM.IndexReg == 0) {
561      assert(AM.Scale == 1 && "Scale with no index!");
562      AM.IndexReg = getRegForValue(V);
563      return AM.IndexReg != 0;
564    }
565  }
566
567  return false;
568}
569
570/// X86SelectCallAddress - Attempt to fill in an address from the given value.
571///
572bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
573  const User *U = NULL;
574  unsigned Opcode = Instruction::UserOp1;
575  if (const Instruction *I = dyn_cast<Instruction>(V)) {
576    Opcode = I->getOpcode();
577    U = I;
578  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
579    Opcode = C->getOpcode();
580    U = C;
581  }
582
583  switch (Opcode) {
584  default: break;
585  case Instruction::BitCast:
586    // Look past bitcasts.
587    return X86SelectCallAddress(U->getOperand(0), AM);
588
589  case Instruction::IntToPtr:
590    // Look past no-op inttoptrs.
591    if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
592      return X86SelectCallAddress(U->getOperand(0), AM);
593    break;
594
595  case Instruction::PtrToInt:
596    // Look past no-op ptrtoints.
597    if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
598      return X86SelectCallAddress(U->getOperand(0), AM);
599    break;
600  }
601
602  // Handle constant address.
603  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
604    // Can't handle alternate code models yet.
605    if (TM.getCodeModel() != CodeModel::Small)
606      return false;
607
608    // RIP-relative addresses can't have additional register operands.
609    if (Subtarget->isPICStyleRIPRel() &&
610        (AM.Base.Reg != 0 || AM.IndexReg != 0))
611      return false;
612
613    // Can't handle DLLImport.
614    if (GV->hasDLLImportLinkage())
615      return false;
616
617    // Can't handle TLS.
618    if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
619      if (GVar->isThreadLocal())
620        return false;
621
622    // Okay, we've committed to selecting this global. Set up the basic address.
623    AM.GV = GV;
624
625    // No ABI requires an extra load for anything other than DLLImport, which
626    // we rejected above. Return a direct reference to the global.
627    if (Subtarget->isPICStyleRIPRel()) {
628      // Use rip-relative addressing if we can.  Above we verified that the
629      // base and index registers are unused.
630      assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
631      AM.Base.Reg = X86::RIP;
632    } else if (Subtarget->isPICStyleStubPIC()) {
633      AM.GVOpFlags = X86II::MO_PIC_BASE_OFFSET;
634    } else if (Subtarget->isPICStyleGOT()) {
635      AM.GVOpFlags = X86II::MO_GOTOFF;
636    }
637
638    return true;
639  }
640
641  // If all else fails, try to materialize the value in a register.
642  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
643    if (AM.Base.Reg == 0) {
644      AM.Base.Reg = getRegForValue(V);
645      return AM.Base.Reg != 0;
646    }
647    if (AM.IndexReg == 0) {
648      assert(AM.Scale == 1 && "Scale with no index!");
649      AM.IndexReg = getRegForValue(V);
650      return AM.IndexReg != 0;
651    }
652  }
653
654  return false;
655}
656
657
658/// X86SelectStore - Select and emit code to implement store instructions.
659bool X86FastISel::X86SelectStore(const Instruction *I) {
660  MVT VT;
661  if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
662    return false;
663
664  X86AddressMode AM;
665  if (!X86SelectAddress(I->getOperand(1), AM))
666    return false;
667
668  return X86FastEmitStore(VT, I->getOperand(0), AM);
669}
670
671/// X86SelectRet - Select and emit code to implement ret instructions.
672bool X86FastISel::X86SelectRet(const Instruction *I) {
673  const ReturnInst *Ret = cast<ReturnInst>(I);
674  const Function &F = *I->getParent()->getParent();
675
676  if (!FuncInfo.CanLowerReturn)
677    return false;
678
679  CallingConv::ID CC = F.getCallingConv();
680  if (CC != CallingConv::C &&
681      CC != CallingConv::Fast &&
682      CC != CallingConv::X86_FastCall)
683    return false;
684
685  if (Subtarget->isTargetWin64())
686    return false;
687
688  // Don't handle popping bytes on return for now.
689  if (FuncInfo.MF->getInfo<X86MachineFunctionInfo>()
690        ->getBytesToPopOnReturn() != 0)
691    return 0;
692
693  // fastcc with -tailcallopt is intended to provide a guaranteed
694  // tail call optimization. Fastisel doesn't know how to do that.
695  if (CC == CallingConv::Fast && GuaranteedTailCallOpt)
696    return false;
697
698  // Let SDISel handle vararg functions.
699  if (F.isVarArg())
700    return false;
701
702  if (Ret->getNumOperands() > 0) {
703    SmallVector<ISD::OutputArg, 4> Outs;
704    GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
705                  Outs, TLI);
706
707    // Analyze operands of the call, assigning locations to each operand.
708    SmallVector<CCValAssign, 16> ValLocs;
709    CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,
710		   I->getContext());
711    CCInfo.AnalyzeReturn(Outs, RetCC_X86);
712
713    const Value *RV = Ret->getOperand(0);
714    unsigned Reg = getRegForValue(RV);
715    if (Reg == 0)
716      return false;
717
718    // Only handle a single return value for now.
719    if (ValLocs.size() != 1)
720      return false;
721
722    CCValAssign &VA = ValLocs[0];
723
724    // Don't bother handling odd stuff for now.
725    if (VA.getLocInfo() != CCValAssign::Full)
726      return false;
727    // Only handle register returns for now.
728    if (!VA.isRegLoc())
729      return false;
730
731    // The calling-convention tables for x87 returns don't tell
732    // the whole story.
733    if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1)
734      return false;
735
736    unsigned SrcReg = Reg + VA.getValNo();
737    EVT SrcVT = TLI.getValueType(RV->getType());
738    EVT DstVT = VA.getValVT();
739    // Special handling for extended integers.
740    if (SrcVT != DstVT) {
741      if (SrcVT != MVT::i1 && SrcVT != MVT::i8 && SrcVT != MVT::i16)
742        return false;
743
744      if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
745        return false;
746
747      assert(DstVT == MVT::i32 && "X86 should always ext to i32");
748
749      if (SrcVT == MVT::i1) {
750        if (Outs[0].Flags.isSExt())
751          return false;
752        SrcReg = FastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false);
753        SrcVT = MVT::i8;
754      }
755      unsigned Op = Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND :
756                                             ISD::SIGN_EXTEND;
757      SrcReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op,
758                          SrcReg, /*TODO: Kill=*/false);
759    }
760
761    // Make the copy.
762    unsigned DstReg = VA.getLocReg();
763    const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
764    // Avoid a cross-class copy. This is very unlikely.
765    if (!SrcRC->contains(DstReg))
766      return false;
767    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
768            DstReg).addReg(SrcReg);
769
770    // Mark the register as live out of the function.
771    MRI.addLiveOut(VA.getLocReg());
772  }
773
774  // Now emit the RET.
775  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::RET));
776  return true;
777}
778
779/// X86SelectLoad - Select and emit code to implement load instructions.
780///
781bool X86FastISel::X86SelectLoad(const Instruction *I)  {
782  MVT VT;
783  if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true))
784    return false;
785
786  X86AddressMode AM;
787  if (!X86SelectAddress(I->getOperand(0), AM))
788    return false;
789
790  unsigned ResultReg = 0;
791  if (X86FastEmitLoad(VT, AM, ResultReg)) {
792    UpdateValueMap(I, ResultReg);
793    return true;
794  }
795  return false;
796}
797
798static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
799  switch (VT.getSimpleVT().SimpleTy) {
800  default:       return 0;
801  case MVT::i8:  return X86::CMP8rr;
802  case MVT::i16: return X86::CMP16rr;
803  case MVT::i32: return X86::CMP32rr;
804  case MVT::i64: return X86::CMP64rr;
805  case MVT::f32: return Subtarget->hasSSE1() ? X86::UCOMISSrr : 0;
806  case MVT::f64: return Subtarget->hasSSE2() ? X86::UCOMISDrr : 0;
807  }
808}
809
810/// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS
811/// of the comparison, return an opcode that works for the compare (e.g.
812/// CMP32ri) otherwise return 0.
813static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
814  switch (VT.getSimpleVT().SimpleTy) {
815  // Otherwise, we can't fold the immediate into this comparison.
816  default: return 0;
817  case MVT::i8: return X86::CMP8ri;
818  case MVT::i16: return X86::CMP16ri;
819  case MVT::i32: return X86::CMP32ri;
820  case MVT::i64:
821    // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
822    // field.
823    if ((int)RHSC->getSExtValue() == RHSC->getSExtValue())
824      return X86::CMP64ri32;
825    return 0;
826  }
827}
828
829bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1,
830                                     EVT VT) {
831  unsigned Op0Reg = getRegForValue(Op0);
832  if (Op0Reg == 0) return false;
833
834  // Handle 'null' like i32/i64 0.
835  if (isa<ConstantPointerNull>(Op1))
836    Op1 = Constant::getNullValue(TD.getIntPtrType(Op0->getContext()));
837
838  // We have two options: compare with register or immediate.  If the RHS of
839  // the compare is an immediate that we can fold into this compare, use
840  // CMPri, otherwise use CMPrr.
841  if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
842    if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
843      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CompareImmOpc))
844        .addReg(Op0Reg)
845        .addImm(Op1C->getSExtValue());
846      return true;
847    }
848  }
849
850  unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget);
851  if (CompareOpc == 0) return false;
852
853  unsigned Op1Reg = getRegForValue(Op1);
854  if (Op1Reg == 0) return false;
855  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CompareOpc))
856    .addReg(Op0Reg)
857    .addReg(Op1Reg);
858
859  return true;
860}
861
862bool X86FastISel::X86SelectCmp(const Instruction *I) {
863  const CmpInst *CI = cast<CmpInst>(I);
864
865  MVT VT;
866  if (!isTypeLegal(I->getOperand(0)->getType(), VT))
867    return false;
868
869  unsigned ResultReg = createResultReg(&X86::GR8RegClass);
870  unsigned SetCCOpc;
871  bool SwapArgs;  // false -> compare Op0, Op1.  true -> compare Op1, Op0.
872  switch (CI->getPredicate()) {
873  case CmpInst::FCMP_OEQ: {
874    if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
875      return false;
876
877    unsigned EReg = createResultReg(&X86::GR8RegClass);
878    unsigned NPReg = createResultReg(&X86::GR8RegClass);
879    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETEr), EReg);
880    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
881            TII.get(X86::SETNPr), NPReg);
882    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
883            TII.get(X86::AND8rr), ResultReg).addReg(NPReg).addReg(EReg);
884    UpdateValueMap(I, ResultReg);
885    return true;
886  }
887  case CmpInst::FCMP_UNE: {
888    if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
889      return false;
890
891    unsigned NEReg = createResultReg(&X86::GR8RegClass);
892    unsigned PReg = createResultReg(&X86::GR8RegClass);
893    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETNEr), NEReg);
894    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETPr), PReg);
895    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::OR8rr),ResultReg)
896      .addReg(PReg).addReg(NEReg);
897    UpdateValueMap(I, ResultReg);
898    return true;
899  }
900  case CmpInst::FCMP_OGT: SwapArgs = false; SetCCOpc = X86::SETAr;  break;
901  case CmpInst::FCMP_OGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break;
902  case CmpInst::FCMP_OLT: SwapArgs = true;  SetCCOpc = X86::SETAr;  break;
903  case CmpInst::FCMP_OLE: SwapArgs = true;  SetCCOpc = X86::SETAEr; break;
904  case CmpInst::FCMP_ONE: SwapArgs = false; SetCCOpc = X86::SETNEr; break;
905  case CmpInst::FCMP_ORD: SwapArgs = false; SetCCOpc = X86::SETNPr; break;
906  case CmpInst::FCMP_UNO: SwapArgs = false; SetCCOpc = X86::SETPr;  break;
907  case CmpInst::FCMP_UEQ: SwapArgs = false; SetCCOpc = X86::SETEr;  break;
908  case CmpInst::FCMP_UGT: SwapArgs = true;  SetCCOpc = X86::SETBr;  break;
909  case CmpInst::FCMP_UGE: SwapArgs = true;  SetCCOpc = X86::SETBEr; break;
910  case CmpInst::FCMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr;  break;
911  case CmpInst::FCMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break;
912
913  case CmpInst::ICMP_EQ:  SwapArgs = false; SetCCOpc = X86::SETEr;  break;
914  case CmpInst::ICMP_NE:  SwapArgs = false; SetCCOpc = X86::SETNEr; break;
915  case CmpInst::ICMP_UGT: SwapArgs = false; SetCCOpc = X86::SETAr;  break;
916  case CmpInst::ICMP_UGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break;
917  case CmpInst::ICMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr;  break;
918  case CmpInst::ICMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break;
919  case CmpInst::ICMP_SGT: SwapArgs = false; SetCCOpc = X86::SETGr;  break;
920  case CmpInst::ICMP_SGE: SwapArgs = false; SetCCOpc = X86::SETGEr; break;
921  case CmpInst::ICMP_SLT: SwapArgs = false; SetCCOpc = X86::SETLr;  break;
922  case CmpInst::ICMP_SLE: SwapArgs = false; SetCCOpc = X86::SETLEr; break;
923  default:
924    return false;
925  }
926
927  const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
928  if (SwapArgs)
929    std::swap(Op0, Op1);
930
931  // Emit a compare of Op0/Op1.
932  if (!X86FastEmitCompare(Op0, Op1, VT))
933    return false;
934
935  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(SetCCOpc), ResultReg);
936  UpdateValueMap(I, ResultReg);
937  return true;
938}
939
940bool X86FastISel::X86SelectZExt(const Instruction *I) {
941  // Handle zero-extension from i1 to i8, which is common.
942  if (!I->getOperand(0)->getType()->isIntegerTy(1))
943    return false;
944
945  EVT DstVT = TLI.getValueType(I->getType());
946  if (!TLI.isTypeLegal(DstVT))
947    return false;
948
949  unsigned ResultReg = getRegForValue(I->getOperand(0));
950  if (ResultReg == 0)
951    return false;
952
953  // Set the high bits to zero.
954  ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
955  if (ResultReg == 0)
956    return false;
957
958  if (DstVT != MVT::i8) {
959    ResultReg = FastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
960                           ResultReg, /*Kill=*/true);
961    if (ResultReg == 0)
962      return false;
963  }
964
965  UpdateValueMap(I, ResultReg);
966  return true;
967}
968
969
970bool X86FastISel::X86SelectBranch(const Instruction *I) {
971  // Unconditional branches are selected by tablegen-generated code.
972  // Handle a conditional branch.
973  const BranchInst *BI = cast<BranchInst>(I);
974  MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
975  MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
976
977  // Fold the common case of a conditional branch with a comparison
978  // in the same block (values defined on other blocks may not have
979  // initialized registers).
980  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
981    if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
982      EVT VT = TLI.getValueType(CI->getOperand(0)->getType());
983
984      // Try to take advantage of fallthrough opportunities.
985      CmpInst::Predicate Predicate = CI->getPredicate();
986      if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
987        std::swap(TrueMBB, FalseMBB);
988        Predicate = CmpInst::getInversePredicate(Predicate);
989      }
990
991      bool SwapArgs;  // false -> compare Op0, Op1.  true -> compare Op1, Op0.
992      unsigned BranchOpc; // Opcode to jump on, e.g. "X86::JA"
993
994      switch (Predicate) {
995      case CmpInst::FCMP_OEQ:
996        std::swap(TrueMBB, FalseMBB);
997        Predicate = CmpInst::FCMP_UNE;
998        // FALL THROUGH
999      case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
1000      case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA_4;  break;
1001      case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE_4; break;
1002      case CmpInst::FCMP_OLT: SwapArgs = true;  BranchOpc = X86::JA_4;  break;
1003      case CmpInst::FCMP_OLE: SwapArgs = true;  BranchOpc = X86::JAE_4; break;
1004      case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
1005      case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP_4; break;
1006      case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP_4;  break;
1007      case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE_4;  break;
1008      case CmpInst::FCMP_UGT: SwapArgs = true;  BranchOpc = X86::JB_4;  break;
1009      case CmpInst::FCMP_UGE: SwapArgs = true;  BranchOpc = X86::JBE_4; break;
1010      case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4;  break;
1011      case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break;
1012
1013      case CmpInst::ICMP_EQ:  SwapArgs = false; BranchOpc = X86::JE_4;  break;
1014      case CmpInst::ICMP_NE:  SwapArgs = false; BranchOpc = X86::JNE_4; break;
1015      case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA_4;  break;
1016      case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE_4; break;
1017      case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4;  break;
1018      case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break;
1019      case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG_4;  break;
1020      case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE_4; break;
1021      case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL_4;  break;
1022      case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE_4; break;
1023      default:
1024        return false;
1025      }
1026
1027      const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
1028      if (SwapArgs)
1029        std::swap(Op0, Op1);
1030
1031      // Emit a compare of the LHS and RHS, setting the flags.
1032      if (!X86FastEmitCompare(Op0, Op1, VT))
1033        return false;
1034
1035      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BranchOpc))
1036        .addMBB(TrueMBB);
1037
1038      if (Predicate == CmpInst::FCMP_UNE) {
1039        // X86 requires a second branch to handle UNE (and OEQ,
1040        // which is mapped to UNE above).
1041        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::JP_4))
1042          .addMBB(TrueMBB);
1043      }
1044
1045      FastEmitBranch(FalseMBB, DL);
1046      FuncInfo.MBB->addSuccessor(TrueMBB);
1047      return true;
1048    }
1049  } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
1050    // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
1051    // typically happen for _Bool and C++ bools.
1052    MVT SourceVT;
1053    if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1054        isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) {
1055      unsigned TestOpc = 0;
1056      switch (SourceVT.SimpleTy) {
1057      default: break;
1058      case MVT::i8:  TestOpc = X86::TEST8ri; break;
1059      case MVT::i16: TestOpc = X86::TEST16ri; break;
1060      case MVT::i32: TestOpc = X86::TEST32ri; break;
1061      case MVT::i64: TestOpc = X86::TEST64ri32; break;
1062      }
1063      if (TestOpc) {
1064        unsigned OpReg = getRegForValue(TI->getOperand(0));
1065        if (OpReg == 0) return false;
1066        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TestOpc))
1067          .addReg(OpReg).addImm(1);
1068
1069        unsigned JmpOpc = X86::JNE_4;
1070        if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1071          std::swap(TrueMBB, FalseMBB);
1072          JmpOpc = X86::JE_4;
1073        }
1074
1075        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(JmpOpc))
1076          .addMBB(TrueMBB);
1077        FastEmitBranch(FalseMBB, DL);
1078        FuncInfo.MBB->addSuccessor(TrueMBB);
1079        return true;
1080      }
1081    }
1082  }
1083
1084  // Otherwise do a clumsy setcc and re-test it.
1085  // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used
1086  // in an explicit cast, so make sure to handle that correctly.
1087  unsigned OpReg = getRegForValue(BI->getCondition());
1088  if (OpReg == 0) return false;
1089
1090  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8ri))
1091    .addReg(OpReg).addImm(1);
1092  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::JNE_4))
1093    .addMBB(TrueMBB);
1094  FastEmitBranch(FalseMBB, DL);
1095  FuncInfo.MBB->addSuccessor(TrueMBB);
1096  return true;
1097}
1098
1099bool X86FastISel::X86SelectShift(const Instruction *I) {
1100  unsigned CReg = 0, OpReg = 0;
1101  const TargetRegisterClass *RC = NULL;
1102  if (I->getType()->isIntegerTy(8)) {
1103    CReg = X86::CL;
1104    RC = &X86::GR8RegClass;
1105    switch (I->getOpcode()) {
1106    case Instruction::LShr: OpReg = X86::SHR8rCL; break;
1107    case Instruction::AShr: OpReg = X86::SAR8rCL; break;
1108    case Instruction::Shl:  OpReg = X86::SHL8rCL; break;
1109    default: return false;
1110    }
1111  } else if (I->getType()->isIntegerTy(16)) {
1112    CReg = X86::CX;
1113    RC = &X86::GR16RegClass;
1114    switch (I->getOpcode()) {
1115    case Instruction::LShr: OpReg = X86::SHR16rCL; break;
1116    case Instruction::AShr: OpReg = X86::SAR16rCL; break;
1117    case Instruction::Shl:  OpReg = X86::SHL16rCL; break;
1118    default: return false;
1119    }
1120  } else if (I->getType()->isIntegerTy(32)) {
1121    CReg = X86::ECX;
1122    RC = &X86::GR32RegClass;
1123    switch (I->getOpcode()) {
1124    case Instruction::LShr: OpReg = X86::SHR32rCL; break;
1125    case Instruction::AShr: OpReg = X86::SAR32rCL; break;
1126    case Instruction::Shl:  OpReg = X86::SHL32rCL; break;
1127    default: return false;
1128    }
1129  } else if (I->getType()->isIntegerTy(64)) {
1130    CReg = X86::RCX;
1131    RC = &X86::GR64RegClass;
1132    switch (I->getOpcode()) {
1133    case Instruction::LShr: OpReg = X86::SHR64rCL; break;
1134    case Instruction::AShr: OpReg = X86::SAR64rCL; break;
1135    case Instruction::Shl:  OpReg = X86::SHL64rCL; break;
1136    default: return false;
1137    }
1138  } else {
1139    return false;
1140  }
1141
1142  MVT VT;
1143  if (!isTypeLegal(I->getType(), VT))
1144    return false;
1145
1146  unsigned Op0Reg = getRegForValue(I->getOperand(0));
1147  if (Op0Reg == 0) return false;
1148
1149  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1150  if (Op1Reg == 0) return false;
1151  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
1152          CReg).addReg(Op1Reg);
1153
1154  // The shift instruction uses X86::CL. If we defined a super-register
1155  // of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
1156  if (CReg != X86::CL)
1157    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1158            TII.get(TargetOpcode::KILL), X86::CL)
1159      .addReg(CReg, RegState::Kill);
1160
1161  unsigned ResultReg = createResultReg(RC);
1162  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpReg), ResultReg)
1163    .addReg(Op0Reg);
1164  UpdateValueMap(I, ResultReg);
1165  return true;
1166}
1167
1168bool X86FastISel::X86SelectSelect(const Instruction *I) {
1169  MVT VT;
1170  if (!isTypeLegal(I->getType(), VT))
1171    return false;
1172
1173  // We only use cmov here, if we don't have a cmov instruction bail.
1174  if (!Subtarget->hasCMov()) return false;
1175
1176  unsigned Opc = 0;
1177  const TargetRegisterClass *RC = NULL;
1178  if (VT == MVT::i16) {
1179    Opc = X86::CMOVE16rr;
1180    RC = &X86::GR16RegClass;
1181  } else if (VT == MVT::i32) {
1182    Opc = X86::CMOVE32rr;
1183    RC = &X86::GR32RegClass;
1184  } else if (VT == MVT::i64) {
1185    Opc = X86::CMOVE64rr;
1186    RC = &X86::GR64RegClass;
1187  } else {
1188    return false;
1189  }
1190
1191  unsigned Op0Reg = getRegForValue(I->getOperand(0));
1192  if (Op0Reg == 0) return false;
1193  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1194  if (Op1Reg == 0) return false;
1195  unsigned Op2Reg = getRegForValue(I->getOperand(2));
1196  if (Op2Reg == 0) return false;
1197
1198  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8rr))
1199    .addReg(Op0Reg).addReg(Op0Reg);
1200  unsigned ResultReg = createResultReg(RC);
1201  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
1202    .addReg(Op1Reg).addReg(Op2Reg);
1203  UpdateValueMap(I, ResultReg);
1204  return true;
1205}
1206
1207bool X86FastISel::X86SelectFPExt(const Instruction *I) {
1208  // fpext from float to double.
1209  if (Subtarget->hasSSE2() &&
1210      I->getType()->isDoubleTy()) {
1211    const Value *V = I->getOperand(0);
1212    if (V->getType()->isFloatTy()) {
1213      unsigned OpReg = getRegForValue(V);
1214      if (OpReg == 0) return false;
1215      unsigned ResultReg = createResultReg(X86::FR64RegisterClass);
1216      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1217              TII.get(X86::CVTSS2SDrr), ResultReg)
1218        .addReg(OpReg);
1219      UpdateValueMap(I, ResultReg);
1220      return true;
1221    }
1222  }
1223
1224  return false;
1225}
1226
1227bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
1228  if (Subtarget->hasSSE2()) {
1229    if (I->getType()->isFloatTy()) {
1230      const Value *V = I->getOperand(0);
1231      if (V->getType()->isDoubleTy()) {
1232        unsigned OpReg = getRegForValue(V);
1233        if (OpReg == 0) return false;
1234        unsigned ResultReg = createResultReg(X86::FR32RegisterClass);
1235        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1236                TII.get(X86::CVTSD2SSrr), ResultReg)
1237          .addReg(OpReg);
1238        UpdateValueMap(I, ResultReg);
1239        return true;
1240      }
1241    }
1242  }
1243
1244  return false;
1245}
1246
1247bool X86FastISel::X86SelectTrunc(const Instruction *I) {
1248  EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
1249  EVT DstVT = TLI.getValueType(I->getType());
1250
1251  // This code only handles truncation to byte.
1252  if (DstVT != MVT::i8 && DstVT != MVT::i1)
1253    return false;
1254  if (!TLI.isTypeLegal(SrcVT))
1255    return false;
1256
1257  unsigned InputReg = getRegForValue(I->getOperand(0));
1258  if (!InputReg)
1259    // Unhandled operand.  Halt "fast" selection and bail.
1260    return false;
1261
1262  if (SrcVT == MVT::i8) {
1263    // Truncate from i8 to i1; no code needed.
1264    UpdateValueMap(I, InputReg);
1265    return true;
1266  }
1267
1268  if (!Subtarget->is64Bit()) {
1269    // If we're on x86-32; we can't extract an i8 from a general register.
1270    // First issue a copy to GR16_ABCD or GR32_ABCD.
1271    const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16)
1272      ? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass;
1273    unsigned CopyReg = createResultReg(CopyRC);
1274    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
1275            CopyReg).addReg(InputReg);
1276    InputReg = CopyReg;
1277  }
1278
1279  // Issue an extract_subreg.
1280  unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8,
1281                                                  InputReg, /*Kill=*/true,
1282                                                  X86::sub_8bit);
1283  if (!ResultReg)
1284    return false;
1285
1286  UpdateValueMap(I, ResultReg);
1287  return true;
1288}
1289
1290bool X86FastISel::IsMemcpySmall(uint64_t Len) {
1291  return Len <= (Subtarget->is64Bit() ? 32 : 16);
1292}
1293
1294bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
1295                                     X86AddressMode SrcAM, uint64_t Len) {
1296
1297  // Make sure we don't bloat code by inlining very large memcpy's.
1298  if (!IsMemcpySmall(Len))
1299    return false;
1300
1301  bool i64Legal = Subtarget->is64Bit();
1302
1303  // We don't care about alignment here since we just emit integer accesses.
1304  while (Len) {
1305    MVT VT;
1306    if (Len >= 8 && i64Legal)
1307      VT = MVT::i64;
1308    else if (Len >= 4)
1309      VT = MVT::i32;
1310    else if (Len >= 2)
1311      VT = MVT::i16;
1312    else {
1313      assert(Len == 1);
1314      VT = MVT::i8;
1315    }
1316
1317    unsigned Reg;
1318    bool RV = X86FastEmitLoad(VT, SrcAM, Reg);
1319    RV &= X86FastEmitStore(VT, Reg, DestAM);
1320    assert(RV && "Failed to emit load or store??");
1321
1322    unsigned Size = VT.getSizeInBits()/8;
1323    Len -= Size;
1324    DestAM.Disp += Size;
1325    SrcAM.Disp += Size;
1326  }
1327
1328  return true;
1329}
1330
1331bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
1332  // FIXME: Handle more intrinsics.
1333  switch (I.getIntrinsicID()) {
1334  default: return false;
1335  case Intrinsic::memcpy: {
1336    const MemCpyInst &MCI = cast<MemCpyInst>(I);
1337    // Don't handle volatile or variable length memcpys.
1338    if (MCI.isVolatile())
1339      return false;
1340
1341    if (isa<ConstantInt>(MCI.getLength())) {
1342      // Small memcpy's are common enough that we want to do them
1343      // without a call if possible.
1344      uint64_t Len = cast<ConstantInt>(MCI.getLength())->getZExtValue();
1345      if (IsMemcpySmall(Len)) {
1346        X86AddressMode DestAM, SrcAM;
1347        if (!X86SelectAddress(MCI.getRawDest(), DestAM) ||
1348            !X86SelectAddress(MCI.getRawSource(), SrcAM))
1349          return false;
1350        TryEmitSmallMemcpy(DestAM, SrcAM, Len);
1351        return true;
1352      }
1353    }
1354
1355    unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
1356    if (!MCI.getLength()->getType()->isIntegerTy(SizeWidth))
1357      return false;
1358
1359    if (MCI.getSourceAddressSpace() > 255 || MCI.getDestAddressSpace() > 255)
1360      return false;
1361
1362    return DoSelectCall(&I, "memcpy");
1363  }
1364  case Intrinsic::memset: {
1365    const MemSetInst &MSI = cast<MemSetInst>(I);
1366
1367    unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
1368    if (!MSI.getLength()->getType()->isIntegerTy(SizeWidth))
1369      return false;
1370
1371    if (MSI.getDestAddressSpace() > 255)
1372      return false;
1373
1374    return DoSelectCall(&I, "memset");
1375  }
1376  case Intrinsic::stackprotector: {
1377    // Emit code inline code to store the stack guard onto the stack.
1378    EVT PtrTy = TLI.getPointerTy();
1379
1380    const Value *Op1 = I.getArgOperand(0); // The guard's value.
1381    const AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
1382
1383    // Grab the frame index.
1384    X86AddressMode AM;
1385    if (!X86SelectAddress(Slot, AM)) return false;
1386    if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
1387    return true;
1388  }
1389  case Intrinsic::dbg_declare: {
1390    const DbgDeclareInst *DI = cast<DbgDeclareInst>(&I);
1391    X86AddressMode AM;
1392    assert(DI->getAddress() && "Null address should be checked earlier!");
1393    if (!X86SelectAddress(DI->getAddress(), AM))
1394      return false;
1395    const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
1396    // FIXME may need to add RegState::Debug to any registers produced,
1397    // although ESP/EBP should be the only ones at the moment.
1398    addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II), AM).
1399      addImm(0).addMetadata(DI->getVariable());
1400    return true;
1401  }
1402  case Intrinsic::trap: {
1403    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TRAP));
1404    return true;
1405  }
1406  case Intrinsic::sadd_with_overflow:
1407  case Intrinsic::uadd_with_overflow: {
1408    // FIXME: Should fold immediates.
1409
1410    // Replace "add with overflow" intrinsics with an "add" instruction followed
1411    // by a seto/setc instruction.
1412    const Function *Callee = I.getCalledFunction();
1413    const Type *RetTy =
1414      cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0));
1415
1416    MVT VT;
1417    if (!isTypeLegal(RetTy, VT))
1418      return false;
1419
1420    const Value *Op1 = I.getArgOperand(0);
1421    const Value *Op2 = I.getArgOperand(1);
1422    unsigned Reg1 = getRegForValue(Op1);
1423    unsigned Reg2 = getRegForValue(Op2);
1424
1425    if (Reg1 == 0 || Reg2 == 0)
1426      // FIXME: Handle values *not* in registers.
1427      return false;
1428
1429    unsigned OpC = 0;
1430    if (VT == MVT::i32)
1431      OpC = X86::ADD32rr;
1432    else if (VT == MVT::i64)
1433      OpC = X86::ADD64rr;
1434    else
1435      return false;
1436
1437    // The call to CreateRegs builds two sequential registers, to store the
1438    // both the the returned values.
1439    unsigned ResultReg = FuncInfo.CreateRegs(I.getType());
1440    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpC), ResultReg)
1441      .addReg(Reg1).addReg(Reg2);
1442
1443    unsigned Opc = X86::SETBr;
1444    if (I.getIntrinsicID() == Intrinsic::sadd_with_overflow)
1445      Opc = X86::SETOr;
1446    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg+1);
1447
1448    UpdateValueMap(&I, ResultReg, 2);
1449    return true;
1450  }
1451  }
1452}
1453
1454bool X86FastISel::X86SelectCall(const Instruction *I) {
1455  const CallInst *CI = cast<CallInst>(I);
1456  const Value *Callee = CI->getCalledValue();
1457
1458  // Can't handle inline asm yet.
1459  if (isa<InlineAsm>(Callee))
1460    return false;
1461
1462  // Handle intrinsic calls.
1463  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI))
1464    return X86VisitIntrinsicCall(*II);
1465
1466  return DoSelectCall(I, 0);
1467}
1468
1469// Select either a call, or an llvm.memcpy/memmove/memset intrinsic
1470bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
1471  const CallInst *CI = cast<CallInst>(I);
1472  const Value *Callee = CI->getCalledValue();
1473
1474  // Handle only C and fastcc calling conventions for now.
1475  ImmutableCallSite CS(CI);
1476  CallingConv::ID CC = CS.getCallingConv();
1477  if (CC != CallingConv::C && CC != CallingConv::Fast &&
1478      CC != CallingConv::X86_FastCall)
1479    return false;
1480
1481  // fastcc with -tailcallopt is intended to provide a guaranteed
1482  // tail call optimization. Fastisel doesn't know how to do that.
1483  if (CC == CallingConv::Fast && GuaranteedTailCallOpt)
1484    return false;
1485
1486  const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
1487  const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
1488  bool isVarArg = FTy->isVarArg();
1489
1490  // Don't know how to handle Win64 varargs yet.  Nothing special needed for
1491  // x86-32.  Special handling for x86-64 is implemented.
1492  if (isVarArg && Subtarget->isTargetWin64())
1493    return false;
1494
1495  // Fast-isel doesn't know about callee-pop yet.
1496  if (Subtarget->IsCalleePop(isVarArg, CC))
1497    return false;
1498
1499  // Check whether the function can return without sret-demotion.
1500  SmallVector<ISD::OutputArg, 4> Outs;
1501  SmallVector<uint64_t, 4> Offsets;
1502  GetReturnInfo(I->getType(), CS.getAttributes().getRetAttributes(),
1503                Outs, TLI, &Offsets);
1504  bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
1505					   *FuncInfo.MF, FTy->isVarArg(),
1506					   Outs, FTy->getContext());
1507  if (!CanLowerReturn)
1508    return false;
1509
1510  // Materialize callee address in a register. FIXME: GV address can be
1511  // handled with a CALLpcrel32 instead.
1512  X86AddressMode CalleeAM;
1513  if (!X86SelectCallAddress(Callee, CalleeAM))
1514    return false;
1515  unsigned CalleeOp = 0;
1516  const GlobalValue *GV = 0;
1517  if (CalleeAM.GV != 0) {
1518    GV = CalleeAM.GV;
1519  } else if (CalleeAM.Base.Reg != 0) {
1520    CalleeOp = CalleeAM.Base.Reg;
1521  } else
1522    return false;
1523
1524  // Deal with call operands first.
1525  SmallVector<const Value *, 8> ArgVals;
1526  SmallVector<unsigned, 8> Args;
1527  SmallVector<MVT, 8> ArgVTs;
1528  SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
1529  Args.reserve(CS.arg_size());
1530  ArgVals.reserve(CS.arg_size());
1531  ArgVTs.reserve(CS.arg_size());
1532  ArgFlags.reserve(CS.arg_size());
1533  for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
1534       i != e; ++i) {
1535    // If we're lowering a mem intrinsic instead of a regular call, skip the
1536    // last two arguments, which should not passed to the underlying functions.
1537    if (MemIntName && e-i <= 2)
1538      break;
1539    Value *ArgVal = *i;
1540    ISD::ArgFlagsTy Flags;
1541    unsigned AttrInd = i - CS.arg_begin() + 1;
1542    if (CS.paramHasAttr(AttrInd, Attribute::SExt))
1543      Flags.setSExt();
1544    if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
1545      Flags.setZExt();
1546
1547    if (CS.paramHasAttr(AttrInd, Attribute::ByVal)) {
1548      const PointerType *Ty = cast<PointerType>(ArgVal->getType());
1549      const Type *ElementTy = Ty->getElementType();
1550      unsigned FrameSize = TD.getTypeAllocSize(ElementTy);
1551      unsigned FrameAlign = CS.getParamAlignment(AttrInd);
1552      if (!FrameAlign)
1553        FrameAlign = TLI.getByValTypeAlignment(ElementTy);
1554      Flags.setByVal();
1555      Flags.setByValSize(FrameSize);
1556      Flags.setByValAlign(FrameAlign);
1557      if (!IsMemcpySmall(FrameSize))
1558        return false;
1559    }
1560
1561    if (CS.paramHasAttr(AttrInd, Attribute::InReg))
1562      Flags.setInReg();
1563    if (CS.paramHasAttr(AttrInd, Attribute::Nest))
1564      Flags.setNest();
1565
1566    // If this is an i1/i8/i16 argument, promote to i32 to avoid an extra
1567    // instruction.  This is safe because it is common to all fastisel supported
1568    // calling conventions on x86.
1569    if (ConstantInt *CI = dyn_cast<ConstantInt>(ArgVal)) {
1570      if (CI->getBitWidth() == 1 || CI->getBitWidth() == 8 ||
1571          CI->getBitWidth() == 16) {
1572        if (Flags.isSExt())
1573          ArgVal = ConstantExpr::getSExt(CI,Type::getInt32Ty(CI->getContext()));
1574        else
1575          ArgVal = ConstantExpr::getZExt(CI,Type::getInt32Ty(CI->getContext()));
1576      }
1577    }
1578
1579    unsigned ArgReg;
1580
1581    // Passing bools around ends up doing a trunc to i1 and passing it.
1582    // Codegen this as an argument + "and 1".
1583    if (ArgVal->getType()->isIntegerTy(1) && isa<TruncInst>(ArgVal) &&
1584        cast<TruncInst>(ArgVal)->getParent() == I->getParent() &&
1585        ArgVal->hasOneUse()) {
1586      ArgVal = cast<TruncInst>(ArgVal)->getOperand(0);
1587      ArgReg = getRegForValue(ArgVal);
1588      if (ArgReg == 0) return false;
1589
1590      MVT ArgVT;
1591      if (!isTypeLegal(ArgVal->getType(), ArgVT)) return false;
1592
1593      ArgReg = FastEmit_ri(ArgVT, ArgVT, ISD::AND, ArgReg,
1594                           ArgVal->hasOneUse(), 1);
1595    } else {
1596      ArgReg = getRegForValue(ArgVal);
1597    }
1598
1599    if (ArgReg == 0) return false;
1600
1601    const Type *ArgTy = ArgVal->getType();
1602    MVT ArgVT;
1603    if (!isTypeLegal(ArgTy, ArgVT))
1604      return false;
1605    if (ArgVT == MVT::x86mmx)
1606      return false;
1607    unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
1608    Flags.setOrigAlign(OriginalAlignment);
1609
1610    Args.push_back(ArgReg);
1611    ArgVals.push_back(ArgVal);
1612    ArgVTs.push_back(ArgVT);
1613    ArgFlags.push_back(Flags);
1614  }
1615
1616  // Analyze operands of the call, assigning locations to each operand.
1617  SmallVector<CCValAssign, 16> ArgLocs;
1618  CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, ArgLocs,
1619		 I->getParent()->getContext());
1620
1621  // Allocate shadow area for Win64
1622  if (Subtarget->isTargetWin64())
1623    CCInfo.AllocateStack(32, 8);
1624
1625  CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_X86);
1626
1627  // Get a count of how many bytes are to be pushed on the stack.
1628  unsigned NumBytes = CCInfo.getNextStackOffset();
1629
1630  // Issue CALLSEQ_START
1631  unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode();
1632  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackDown))
1633    .addImm(NumBytes);
1634
1635  // Process argument: walk the register/memloc assignments, inserting
1636  // copies / loads.
1637  SmallVector<unsigned, 4> RegArgs;
1638  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1639    CCValAssign &VA = ArgLocs[i];
1640    unsigned Arg = Args[VA.getValNo()];
1641    EVT ArgVT = ArgVTs[VA.getValNo()];
1642
1643    // Promote the value if needed.
1644    switch (VA.getLocInfo()) {
1645    default: llvm_unreachable("Unknown loc info!");
1646    case CCValAssign::Full: break;
1647    case CCValAssign::SExt: {
1648      assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
1649             "Unexpected extend");
1650      bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
1651                                       Arg, ArgVT, Arg);
1652      assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
1653      ArgVT = VA.getLocVT();
1654      break;
1655    }
1656    case CCValAssign::ZExt: {
1657      assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
1658             "Unexpected extend");
1659      bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
1660                                       Arg, ArgVT, Arg);
1661      assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
1662      ArgVT = VA.getLocVT();
1663      break;
1664    }
1665    case CCValAssign::AExt: {
1666      assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
1667             "Unexpected extend");
1668      bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(),
1669                                       Arg, ArgVT, Arg);
1670      if (!Emitted)
1671        Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
1672                                    Arg, ArgVT, Arg);
1673      if (!Emitted)
1674        Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
1675                                    Arg, ArgVT, Arg);
1676
1677      assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
1678      ArgVT = VA.getLocVT();
1679      break;
1680    }
1681    case CCValAssign::BCvt: {
1682      unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT(),
1683                               ISD::BITCAST, Arg, /*TODO: Kill=*/false);
1684      assert(BC != 0 && "Failed to emit a bitcast!");
1685      Arg = BC;
1686      ArgVT = VA.getLocVT();
1687      break;
1688    }
1689    }
1690
1691    if (VA.isRegLoc()) {
1692      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
1693              VA.getLocReg()).addReg(Arg);
1694      RegArgs.push_back(VA.getLocReg());
1695    } else {
1696      unsigned LocMemOffset = VA.getLocMemOffset();
1697      X86AddressMode AM;
1698      AM.Base.Reg = StackPtr;
1699      AM.Disp = LocMemOffset;
1700      const Value *ArgVal = ArgVals[VA.getValNo()];
1701      ISD::ArgFlagsTy Flags = ArgFlags[VA.getValNo()];
1702
1703      if (Flags.isByVal()) {
1704        X86AddressMode SrcAM;
1705        SrcAM.Base.Reg = Arg;
1706        bool Res = TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize());
1707        assert(Res && "memcpy length already checked!"); (void)Res;
1708      } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) {
1709        // If this is a really simple value, emit this with the Value* version
1710        //of X86FastEmitStore.  If it isn't simple, we don't want to do this,
1711        // as it can cause us to reevaluate the argument.
1712        X86FastEmitStore(ArgVT, ArgVal, AM);
1713      } else {
1714        X86FastEmitStore(ArgVT, Arg, AM);
1715      }
1716    }
1717  }
1718
1719  // ELF / PIC requires GOT in the EBX register before function calls via PLT
1720  // GOT pointer.
1721  if (Subtarget->isPICStyleGOT()) {
1722    unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
1723    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
1724            X86::EBX).addReg(Base);
1725  }
1726
1727  if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64()) {
1728    // Count the number of XMM registers allocated.
1729    static const unsigned XMMArgRegs[] = {
1730      X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
1731      X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
1732    };
1733    unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
1734    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::MOV8ri),
1735            X86::AL).addImm(NumXMMRegs);
1736  }
1737
1738  // Issue the call.
1739  MachineInstrBuilder MIB;
1740  if (CalleeOp) {
1741    // Register-indirect call.
1742    unsigned CallOpc;
1743    if (Subtarget->isTargetWin64())
1744      CallOpc = X86::WINCALL64r;
1745    else if (Subtarget->is64Bit())
1746      CallOpc = X86::CALL64r;
1747    else
1748      CallOpc = X86::CALL32r;
1749    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))
1750      .addReg(CalleeOp);
1751
1752  } else {
1753    // Direct call.
1754    assert(GV && "Not a direct call");
1755    unsigned CallOpc;
1756    if (Subtarget->isTargetWin64())
1757      CallOpc = X86::WINCALL64pcrel32;
1758    else if (Subtarget->is64Bit())
1759      CallOpc = X86::CALL64pcrel32;
1760    else
1761      CallOpc = X86::CALLpcrel32;
1762
1763    // See if we need any target-specific flags on the GV operand.
1764    unsigned char OpFlags = 0;
1765
1766    // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
1767    // external symbols most go through the PLT in PIC mode.  If the symbol
1768    // has hidden or protected visibility, or if it is static or local, then
1769    // we don't need to use the PLT - we can directly call it.
1770    if (Subtarget->isTargetELF() &&
1771        TM.getRelocationModel() == Reloc::PIC_ &&
1772        GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
1773      OpFlags = X86II::MO_PLT;
1774    } else if (Subtarget->isPICStyleStubAny() &&
1775               (GV->isDeclaration() || GV->isWeakForLinker()) &&
1776               (!Subtarget->getTargetTriple().isMacOSX() ||
1777                Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
1778      // PC-relative references to external symbols should go through $stub,
1779      // unless we're building with the leopard linker or later, which
1780      // automatically synthesizes these stubs.
1781      OpFlags = X86II::MO_DARWIN_STUB;
1782    }
1783
1784
1785    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc));
1786    if (MemIntName)
1787      MIB.addExternalSymbol(MemIntName, OpFlags);
1788    else
1789      MIB.addGlobalAddress(GV, 0, OpFlags);
1790  }
1791
1792  // Add an implicit use GOT pointer in EBX.
1793  if (Subtarget->isPICStyleGOT())
1794    MIB.addReg(X86::EBX);
1795
1796  if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64())
1797    MIB.addReg(X86::AL);
1798
1799  // Add implicit physical register uses to the call.
1800  for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
1801    MIB.addReg(RegArgs[i]);
1802
1803  // Issue CALLSEQ_END
1804  unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode();
1805  unsigned NumBytesCallee = 0;
1806  if (!Subtarget->is64Bit() && CS.paramHasAttr(1, Attribute::StructRet))
1807    NumBytesCallee = 4;
1808  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp))
1809    .addImm(NumBytes).addImm(NumBytesCallee);
1810
1811  // Build info for return calling conv lowering code.
1812  // FIXME: This is practically a copy-paste from TargetLowering::LowerCallTo.
1813  SmallVector<ISD::InputArg, 32> Ins;
1814  SmallVector<EVT, 4> RetTys;
1815  ComputeValueVTs(TLI, I->getType(), RetTys);
1816  for (unsigned i = 0, e = RetTys.size(); i != e; ++i) {
1817    EVT VT = RetTys[i];
1818    EVT RegisterVT = TLI.getRegisterType(I->getParent()->getContext(), VT);
1819    unsigned NumRegs = TLI.getNumRegisters(I->getParent()->getContext(), VT);
1820    for (unsigned j = 0; j != NumRegs; ++j) {
1821      ISD::InputArg MyFlags;
1822      MyFlags.VT = RegisterVT.getSimpleVT();
1823      MyFlags.Used = !CS.getInstruction()->use_empty();
1824      if (CS.paramHasAttr(0, Attribute::SExt))
1825        MyFlags.Flags.setSExt();
1826      if (CS.paramHasAttr(0, Attribute::ZExt))
1827        MyFlags.Flags.setZExt();
1828      if (CS.paramHasAttr(0, Attribute::InReg))
1829        MyFlags.Flags.setInReg();
1830      Ins.push_back(MyFlags);
1831    }
1832  }
1833
1834  // Now handle call return values.
1835  SmallVector<unsigned, 4> UsedRegs;
1836  SmallVector<CCValAssign, 16> RVLocs;
1837  CCState CCRetInfo(CC, false, *FuncInfo.MF, TM, RVLocs,
1838		    I->getParent()->getContext());
1839  unsigned ResultReg = FuncInfo.CreateRegs(I->getType());
1840  CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86);
1841  for (unsigned i = 0; i != RVLocs.size(); ++i) {
1842    EVT CopyVT = RVLocs[i].getValVT();
1843    unsigned CopyReg = ResultReg + i;
1844
1845    // If this is a call to a function that returns an fp value on the x87 fp
1846    // stack, but where we prefer to use the value in xmm registers, copy it
1847    // out as F80 and use a truncate to move it from fp stack reg to xmm reg.
1848    if ((RVLocs[i].getLocReg() == X86::ST0 ||
1849         RVLocs[i].getLocReg() == X86::ST1) &&
1850        isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) {
1851      CopyVT = MVT::f80;
1852      CopyReg = createResultReg(X86::RFP80RegisterClass);
1853    }
1854
1855    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
1856            CopyReg).addReg(RVLocs[i].getLocReg());
1857    UsedRegs.push_back(RVLocs[i].getLocReg());
1858
1859    if (CopyVT != RVLocs[i].getValVT()) {
1860      // Round the F80 the right size, which also moves to the appropriate xmm
1861      // register. This is accomplished by storing the F80 value in memory and
1862      // then loading it back. Ewww...
1863      EVT ResVT = RVLocs[i].getValVT();
1864      unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
1865      unsigned MemSize = ResVT.getSizeInBits()/8;
1866      int FI = MFI.CreateStackObject(MemSize, MemSize, false);
1867      addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1868                                TII.get(Opc)), FI)
1869        .addReg(CopyReg);
1870      Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm;
1871      addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1872                                TII.get(Opc), ResultReg + i), FI);
1873    }
1874  }
1875
1876  if (RVLocs.size())
1877    UpdateValueMap(I, ResultReg, RVLocs.size());
1878
1879  // Set all unused physreg defs as dead.
1880  static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
1881
1882  return true;
1883}
1884
1885
1886bool
1887X86FastISel::TargetSelectInstruction(const Instruction *I)  {
1888  switch (I->getOpcode()) {
1889  default: break;
1890  case Instruction::Load:
1891    return X86SelectLoad(I);
1892  case Instruction::Store:
1893    return X86SelectStore(I);
1894  case Instruction::Ret:
1895    return X86SelectRet(I);
1896  case Instruction::ICmp:
1897  case Instruction::FCmp:
1898    return X86SelectCmp(I);
1899  case Instruction::ZExt:
1900    return X86SelectZExt(I);
1901  case Instruction::Br:
1902    return X86SelectBranch(I);
1903  case Instruction::Call:
1904    return X86SelectCall(I);
1905  case Instruction::LShr:
1906  case Instruction::AShr:
1907  case Instruction::Shl:
1908    return X86SelectShift(I);
1909  case Instruction::Select:
1910    return X86SelectSelect(I);
1911  case Instruction::Trunc:
1912    return X86SelectTrunc(I);
1913  case Instruction::FPExt:
1914    return X86SelectFPExt(I);
1915  case Instruction::FPTrunc:
1916    return X86SelectFPTrunc(I);
1917  case Instruction::IntToPtr: // Deliberate fall-through.
1918  case Instruction::PtrToInt: {
1919    EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
1920    EVT DstVT = TLI.getValueType(I->getType());
1921    if (DstVT.bitsGT(SrcVT))
1922      return X86SelectZExt(I);
1923    if (DstVT.bitsLT(SrcVT))
1924      return X86SelectTrunc(I);
1925    unsigned Reg = getRegForValue(I->getOperand(0));
1926    if (Reg == 0) return false;
1927    UpdateValueMap(I, Reg);
1928    return true;
1929  }
1930  }
1931
1932  return false;
1933}
1934
1935unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
1936  MVT VT;
1937  if (!isTypeLegal(C->getType(), VT))
1938    return false;
1939
1940  // Get opcode and regclass of the output for the given load instruction.
1941  unsigned Opc = 0;
1942  const TargetRegisterClass *RC = NULL;
1943  switch (VT.SimpleTy) {
1944  default: return false;
1945  case MVT::i8:
1946    Opc = X86::MOV8rm;
1947    RC  = X86::GR8RegisterClass;
1948    break;
1949  case MVT::i16:
1950    Opc = X86::MOV16rm;
1951    RC  = X86::GR16RegisterClass;
1952    break;
1953  case MVT::i32:
1954    Opc = X86::MOV32rm;
1955    RC  = X86::GR32RegisterClass;
1956    break;
1957  case MVT::i64:
1958    // Must be in x86-64 mode.
1959    Opc = X86::MOV64rm;
1960    RC  = X86::GR64RegisterClass;
1961    break;
1962  case MVT::f32:
1963    if (Subtarget->hasSSE1()) {
1964      Opc = X86::MOVSSrm;
1965      RC  = X86::FR32RegisterClass;
1966    } else {
1967      Opc = X86::LD_Fp32m;
1968      RC  = X86::RFP32RegisterClass;
1969    }
1970    break;
1971  case MVT::f64:
1972    if (Subtarget->hasSSE2()) {
1973      Opc = X86::MOVSDrm;
1974      RC  = X86::FR64RegisterClass;
1975    } else {
1976      Opc = X86::LD_Fp64m;
1977      RC  = X86::RFP64RegisterClass;
1978    }
1979    break;
1980  case MVT::f80:
1981    // No f80 support yet.
1982    return false;
1983  }
1984
1985  // Materialize addresses with LEA instructions.
1986  if (isa<GlobalValue>(C)) {
1987    X86AddressMode AM;
1988    if (X86SelectAddress(C, AM)) {
1989      // If the expression is just a basereg, then we're done, otherwise we need
1990      // to emit an LEA.
1991      if (AM.BaseType == X86AddressMode::RegBase &&
1992          AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == 0)
1993        return AM.Base.Reg;
1994
1995      Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r;
1996      unsigned ResultReg = createResultReg(RC);
1997      addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1998                             TII.get(Opc), ResultReg), AM);
1999      return ResultReg;
2000    }
2001    return 0;
2002  }
2003
2004  // MachineConstantPool wants an explicit alignment.
2005  unsigned Align = TD.getPrefTypeAlignment(C->getType());
2006  if (Align == 0) {
2007    // Alignment of vector types.  FIXME!
2008    Align = TD.getTypeAllocSize(C->getType());
2009  }
2010
2011  // x86-32 PIC requires a PIC base register for constant pools.
2012  unsigned PICBase = 0;
2013  unsigned char OpFlag = 0;
2014  if (Subtarget->isPICStyleStubPIC()) { // Not dynamic-no-pic
2015    OpFlag = X86II::MO_PIC_BASE_OFFSET;
2016    PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
2017  } else if (Subtarget->isPICStyleGOT()) {
2018    OpFlag = X86II::MO_GOTOFF;
2019    PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
2020  } else if (Subtarget->isPICStyleRIPRel() &&
2021             TM.getCodeModel() == CodeModel::Small) {
2022    PICBase = X86::RIP;
2023  }
2024
2025  // Create the load from the constant pool.
2026  unsigned MCPOffset = MCP.getConstantPoolIndex(C, Align);
2027  unsigned ResultReg = createResultReg(RC);
2028  addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
2029                                   TII.get(Opc), ResultReg),
2030                           MCPOffset, PICBase, OpFlag);
2031
2032  return ResultReg;
2033}
2034
2035unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) {
2036  // Fail on dynamic allocas. At this point, getRegForValue has already
2037  // checked its CSE maps, so if we're here trying to handle a dynamic
2038  // alloca, we're not going to succeed. X86SelectAddress has a
2039  // check for dynamic allocas, because it's called directly from
2040  // various places, but TargetMaterializeAlloca also needs a check
2041  // in order to avoid recursion between getRegForValue,
2042  // X86SelectAddrss, and TargetMaterializeAlloca.
2043  if (!FuncInfo.StaticAllocaMap.count(C))
2044    return 0;
2045
2046  X86AddressMode AM;
2047  if (!X86SelectAddress(C, AM))
2048    return 0;
2049  unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
2050  TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
2051  unsigned ResultReg = createResultReg(RC);
2052  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
2053                         TII.get(Opc), ResultReg), AM);
2054  return ResultReg;
2055}
2056
2057unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
2058  MVT VT;
2059  if (!isTypeLegal(CF->getType(), VT))
2060    return false;
2061
2062  // Get opcode and regclass for the given zero.
2063  unsigned Opc = 0;
2064  const TargetRegisterClass *RC = NULL;
2065  switch (VT.SimpleTy) {
2066    default: return false;
2067    case MVT::f32:
2068      if (Subtarget->hasSSE1()) {
2069        Opc = X86::FsFLD0SS;
2070        RC  = X86::FR32RegisterClass;
2071      } else {
2072        Opc = X86::LD_Fp032;
2073        RC  = X86::RFP32RegisterClass;
2074      }
2075      break;
2076    case MVT::f64:
2077      if (Subtarget->hasSSE2()) {
2078        Opc = X86::FsFLD0SD;
2079        RC  = X86::FR64RegisterClass;
2080      } else {
2081        Opc = X86::LD_Fp064;
2082        RC  = X86::RFP64RegisterClass;
2083      }
2084      break;
2085    case MVT::f80:
2086      // No f80 support yet.
2087      return false;
2088  }
2089
2090  unsigned ResultReg = createResultReg(RC);
2091  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg);
2092  return ResultReg;
2093}
2094
2095
2096/// TryToFoldLoad - The specified machine instr operand is a vreg, and that
2097/// vreg is being provided by the specified load instruction.  If possible,
2098/// try to fold the load as an operand to the instruction, returning true if
2099/// possible.
2100bool X86FastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo,
2101                                const LoadInst *LI) {
2102  X86AddressMode AM;
2103  if (!X86SelectAddress(LI->getOperand(0), AM))
2104    return false;
2105
2106  X86InstrInfo &XII = (X86InstrInfo&)TII;
2107
2108  unsigned Size = TD.getTypeAllocSize(LI->getType());
2109  unsigned Alignment = LI->getAlignment();
2110
2111  SmallVector<MachineOperand, 8> AddrOps;
2112  AM.getFullAddress(AddrOps);
2113
2114  MachineInstr *Result =
2115    XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment);
2116  if (Result == 0) return false;
2117
2118  FuncInfo.MBB->insert(FuncInfo.InsertPt, Result);
2119  MI->eraseFromParent();
2120  return true;
2121}
2122
2123
2124namespace llvm {
2125  llvm::FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) {
2126    return new X86FastISel(funcInfo);
2127  }
2128}
2129