X86FastISel.cpp revision 205218
1//===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the X86-specific support for the FastISel class. Much
11// of the target-specific code is generated by tablegen in the file
12// X86GenFastISel.inc, which is #included here.
13//
14//===----------------------------------------------------------------------===//
15
16#include "X86.h"
17#include "X86InstrBuilder.h"
18#include "X86ISelLowering.h"
19#include "X86RegisterInfo.h"
20#include "X86Subtarget.h"
21#include "X86TargetMachine.h"
22#include "llvm/CallingConv.h"
23#include "llvm/DerivedTypes.h"
24#include "llvm/GlobalVariable.h"
25#include "llvm/Instructions.h"
26#include "llvm/IntrinsicInst.h"
27#include "llvm/CodeGen/FastISel.h"
28#include "llvm/CodeGen/MachineConstantPool.h"
29#include "llvm/CodeGen/MachineFrameInfo.h"
30#include "llvm/CodeGen/MachineRegisterInfo.h"
31#include "llvm/Support/CallSite.h"
32#include "llvm/Support/ErrorHandling.h"
33#include "llvm/Support/GetElementPtrTypeIterator.h"
34#include "llvm/Target/TargetOptions.h"
35using namespace llvm;
36
37namespace {
38
39class X86FastISel : public FastISel {
40  /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
41  /// make the right decision when generating code for different targets.
42  const X86Subtarget *Subtarget;
43
44  /// StackPtr - Register used as the stack pointer.
45  ///
46  unsigned StackPtr;
47
48  /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
49  /// floating point ops.
50  /// When SSE is available, use it for f32 operations.
51  /// When SSE2 is available, use it for f64 operations.
52  bool X86ScalarSSEf64;
53  bool X86ScalarSSEf32;
54
55public:
56  explicit X86FastISel(MachineFunction &mf,
57                       MachineModuleInfo *mmi,
58                       DwarfWriter *dw,
59                       DenseMap<const Value *, unsigned> &vm,
60                       DenseMap<const BasicBlock *, MachineBasicBlock *> &bm,
61                       DenseMap<const AllocaInst *, int> &am
62#ifndef NDEBUG
63                       , SmallSet<Instruction*, 8> &cil
64#endif
65                       )
66    : FastISel(mf, mmi, dw, vm, bm, am
67#ifndef NDEBUG
68               , cil
69#endif
70               ) {
71    Subtarget = &TM.getSubtarget<X86Subtarget>();
72    StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP;
73    X86ScalarSSEf64 = Subtarget->hasSSE2();
74    X86ScalarSSEf32 = Subtarget->hasSSE1();
75  }
76
77  virtual bool TargetSelectInstruction(Instruction *I);
78
79#include "X86GenFastISel.inc"
80
81private:
82  bool X86FastEmitCompare(Value *LHS, Value *RHS, EVT VT);
83
84  bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR);
85
86  bool X86FastEmitStore(EVT VT, Value *Val,
87                        const X86AddressMode &AM);
88  bool X86FastEmitStore(EVT VT, unsigned Val,
89                        const X86AddressMode &AM);
90
91  bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
92                         unsigned &ResultReg);
93
94  bool X86SelectAddress(Value *V, X86AddressMode &AM);
95  bool X86SelectCallAddress(Value *V, X86AddressMode &AM);
96
97  bool X86SelectLoad(Instruction *I);
98
99  bool X86SelectStore(Instruction *I);
100
101  bool X86SelectCmp(Instruction *I);
102
103  bool X86SelectZExt(Instruction *I);
104
105  bool X86SelectBranch(Instruction *I);
106
107  bool X86SelectShift(Instruction *I);
108
109  bool X86SelectSelect(Instruction *I);
110
111  bool X86SelectTrunc(Instruction *I);
112
113  bool X86SelectFPExt(Instruction *I);
114  bool X86SelectFPTrunc(Instruction *I);
115
116  bool X86SelectExtractValue(Instruction *I);
117
118  bool X86VisitIntrinsicCall(IntrinsicInst &I);
119  bool X86SelectCall(Instruction *I);
120
121  CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isTailCall = false);
122
123  const X86InstrInfo *getInstrInfo() const {
124    return getTargetMachine()->getInstrInfo();
125  }
126  const X86TargetMachine *getTargetMachine() const {
127    return static_cast<const X86TargetMachine *>(&TM);
128  }
129
130  unsigned TargetMaterializeConstant(Constant *C);
131
132  unsigned TargetMaterializeAlloca(AllocaInst *C);
133
134  /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
135  /// computed in an SSE register, not on the X87 floating point stack.
136  bool isScalarFPTypeInSSEReg(EVT VT) const {
137    return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
138      (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
139  }
140
141  bool isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1 = false);
142};
143
144} // end anonymous namespace.
145
146bool X86FastISel::isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1) {
147  VT = TLI.getValueType(Ty, /*HandleUnknown=*/true);
148  if (VT == MVT::Other || !VT.isSimple())
149    // Unhandled type. Halt "fast" selection and bail.
150    return false;
151
152  // For now, require SSE/SSE2 for performing floating-point operations,
153  // since x87 requires additional work.
154  if (VT == MVT::f64 && !X86ScalarSSEf64)
155     return false;
156  if (VT == MVT::f32 && !X86ScalarSSEf32)
157     return false;
158  // Similarly, no f80 support yet.
159  if (VT == MVT::f80)
160    return false;
161  // We only handle legal types. For example, on x86-32 the instruction
162  // selector contains all of the 64-bit instructions from x86-64,
163  // under the assumption that i64 won't be used if the target doesn't
164  // support it.
165  return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
166}
167
168#include "X86GenCallingConv.inc"
169
170/// CCAssignFnForCall - Selects the correct CCAssignFn for a given calling
171/// convention.
172CCAssignFn *X86FastISel::CCAssignFnForCall(CallingConv::ID CC,
173                                           bool isTaillCall) {
174  if (Subtarget->is64Bit()) {
175    if (CC == CallingConv::GHC)
176      return CC_X86_64_GHC;
177    else if (Subtarget->isTargetWin64())
178      return CC_X86_Win64_C;
179    else
180      return CC_X86_64_C;
181  }
182
183  if (CC == CallingConv::X86_FastCall)
184    return CC_X86_32_FastCall;
185  else if (CC == CallingConv::Fast)
186    return CC_X86_32_FastCC;
187  else if (CC == CallingConv::GHC)
188    return CC_X86_32_GHC;
189  else
190    return CC_X86_32_C;
191}
192
193/// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
194/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
195/// Return true and the result register by reference if it is possible.
196bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
197                                  unsigned &ResultReg) {
198  // Get opcode and regclass of the output for the given load instruction.
199  unsigned Opc = 0;
200  const TargetRegisterClass *RC = NULL;
201  switch (VT.getSimpleVT().SimpleTy) {
202  default: return false;
203  case MVT::i1:
204  case MVT::i8:
205    Opc = X86::MOV8rm;
206    RC  = X86::GR8RegisterClass;
207    break;
208  case MVT::i16:
209    Opc = X86::MOV16rm;
210    RC  = X86::GR16RegisterClass;
211    break;
212  case MVT::i32:
213    Opc = X86::MOV32rm;
214    RC  = X86::GR32RegisterClass;
215    break;
216  case MVT::i64:
217    // Must be in x86-64 mode.
218    Opc = X86::MOV64rm;
219    RC  = X86::GR64RegisterClass;
220    break;
221  case MVT::f32:
222    if (Subtarget->hasSSE1()) {
223      Opc = X86::MOVSSrm;
224      RC  = X86::FR32RegisterClass;
225    } else {
226      Opc = X86::LD_Fp32m;
227      RC  = X86::RFP32RegisterClass;
228    }
229    break;
230  case MVT::f64:
231    if (Subtarget->hasSSE2()) {
232      Opc = X86::MOVSDrm;
233      RC  = X86::FR64RegisterClass;
234    } else {
235      Opc = X86::LD_Fp64m;
236      RC  = X86::RFP64RegisterClass;
237    }
238    break;
239  case MVT::f80:
240    // No f80 support yet.
241    return false;
242  }
243
244  ResultReg = createResultReg(RC);
245  addFullAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM);
246  return true;
247}
248
249/// X86FastEmitStore - Emit a machine instruction to store a value Val of
250/// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
251/// and a displacement offset, or a GlobalAddress,
252/// i.e. V. Return true if it is possible.
253bool
254X86FastISel::X86FastEmitStore(EVT VT, unsigned Val,
255                              const X86AddressMode &AM) {
256  // Get opcode and regclass of the output for the given store instruction.
257  unsigned Opc = 0;
258  switch (VT.getSimpleVT().SimpleTy) {
259  case MVT::f80: // No f80 support yet.
260  default: return false;
261  case MVT::i1: {
262    // Mask out all but lowest bit.
263    unsigned AndResult = createResultReg(X86::GR8RegisterClass);
264    BuildMI(MBB, DL,
265            TII.get(X86::AND8ri), AndResult).addReg(Val).addImm(1);
266    Val = AndResult;
267  }
268  // FALLTHROUGH, handling i1 as i8.
269  case MVT::i8:  Opc = X86::MOV8mr;  break;
270  case MVT::i16: Opc = X86::MOV16mr; break;
271  case MVT::i32: Opc = X86::MOV32mr; break;
272  case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode.
273  case MVT::f32:
274    Opc = Subtarget->hasSSE1() ? X86::MOVSSmr : X86::ST_Fp32m;
275    break;
276  case MVT::f64:
277    Opc = Subtarget->hasSSE2() ? X86::MOVSDmr : X86::ST_Fp64m;
278    break;
279  }
280
281  addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM).addReg(Val);
282  return true;
283}
284
285bool X86FastISel::X86FastEmitStore(EVT VT, Value *Val,
286                                   const X86AddressMode &AM) {
287  // Handle 'null' like i32/i64 0.
288  if (isa<ConstantPointerNull>(Val))
289    Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext()));
290
291  // If this is a store of a simple constant, fold the constant into the store.
292  if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
293    unsigned Opc = 0;
294    bool Signed = true;
295    switch (VT.getSimpleVT().SimpleTy) {
296    default: break;
297    case MVT::i1:  Signed = false;     // FALLTHROUGH to handle as i8.
298    case MVT::i8:  Opc = X86::MOV8mi;  break;
299    case MVT::i16: Opc = X86::MOV16mi; break;
300    case MVT::i32: Opc = X86::MOV32mi; break;
301    case MVT::i64:
302      // Must be a 32-bit sign extended value.
303      if ((int)CI->getSExtValue() == CI->getSExtValue())
304        Opc = X86::MOV64mi32;
305      break;
306    }
307
308    if (Opc) {
309      addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM)
310                             .addImm(Signed ? CI->getSExtValue() :
311                                              CI->getZExtValue());
312      return true;
313    }
314  }
315
316  unsigned ValReg = getRegForValue(Val);
317  if (ValReg == 0)
318    return false;
319
320  return X86FastEmitStore(VT, ValReg, AM);
321}
322
323/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
324/// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
325/// ISD::SIGN_EXTEND).
326bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
327                                    unsigned Src, EVT SrcVT,
328                                    unsigned &ResultReg) {
329  unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, Src);
330
331  if (RR != 0) {
332    ResultReg = RR;
333    return true;
334  } else
335    return false;
336}
337
338/// X86SelectAddress - Attempt to fill in an address from the given value.
339///
340bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM) {
341  User *U = NULL;
342  unsigned Opcode = Instruction::UserOp1;
343  if (Instruction *I = dyn_cast<Instruction>(V)) {
344    Opcode = I->getOpcode();
345    U = I;
346  } else if (ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
347    Opcode = C->getOpcode();
348    U = C;
349  }
350
351  switch (Opcode) {
352  default: break;
353  case Instruction::BitCast:
354    // Look past bitcasts.
355    return X86SelectAddress(U->getOperand(0), AM);
356
357  case Instruction::IntToPtr:
358    // Look past no-op inttoptrs.
359    if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
360      return X86SelectAddress(U->getOperand(0), AM);
361    break;
362
363  case Instruction::PtrToInt:
364    // Look past no-op ptrtoints.
365    if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
366      return X86SelectAddress(U->getOperand(0), AM);
367    break;
368
369  case Instruction::Alloca: {
370    // Do static allocas.
371    const AllocaInst *A = cast<AllocaInst>(V);
372    DenseMap<const AllocaInst*, int>::iterator SI = StaticAllocaMap.find(A);
373    if (SI != StaticAllocaMap.end()) {
374      AM.BaseType = X86AddressMode::FrameIndexBase;
375      AM.Base.FrameIndex = SI->second;
376      return true;
377    }
378    break;
379  }
380
381  case Instruction::Add: {
382    // Adds of constants are common and easy enough.
383    if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
384      uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
385      // They have to fit in the 32-bit signed displacement field though.
386      if (isInt32(Disp)) {
387        AM.Disp = (uint32_t)Disp;
388        return X86SelectAddress(U->getOperand(0), AM);
389      }
390    }
391    break;
392  }
393
394  case Instruction::GetElementPtr: {
395    X86AddressMode SavedAM = AM;
396
397    // Pattern-match simple GEPs.
398    uint64_t Disp = (int32_t)AM.Disp;
399    unsigned IndexReg = AM.IndexReg;
400    unsigned Scale = AM.Scale;
401    gep_type_iterator GTI = gep_type_begin(U);
402    // Iterate through the indices, folding what we can. Constants can be
403    // folded, and one dynamic index can be handled, if the scale is supported.
404    for (User::op_iterator i = U->op_begin() + 1, e = U->op_end();
405         i != e; ++i, ++GTI) {
406      Value *Op = *i;
407      if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
408        const StructLayout *SL = TD.getStructLayout(STy);
409        unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
410        Disp += SL->getElementOffset(Idx);
411      } else {
412        uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
413        if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
414          // Constant-offset addressing.
415          Disp += CI->getSExtValue() * S;
416        } else if (IndexReg == 0 &&
417                   (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
418                   (S == 1 || S == 2 || S == 4 || S == 8)) {
419          // Scaled-index addressing.
420          Scale = S;
421          IndexReg = getRegForGEPIndex(Op);
422          if (IndexReg == 0)
423            return false;
424        } else
425          // Unsupported.
426          goto unsupported_gep;
427      }
428    }
429    // Check for displacement overflow.
430    if (!isInt32(Disp))
431      break;
432    // Ok, the GEP indices were covered by constant-offset and scaled-index
433    // addressing. Update the address state and move on to examining the base.
434    AM.IndexReg = IndexReg;
435    AM.Scale = Scale;
436    AM.Disp = (uint32_t)Disp;
437    if (X86SelectAddress(U->getOperand(0), AM))
438      return true;
439
440    // If we couldn't merge the sub value into this addr mode, revert back to
441    // our address and just match the value instead of completely failing.
442    AM = SavedAM;
443    break;
444  unsupported_gep:
445    // Ok, the GEP indices weren't all covered.
446    break;
447  }
448  }
449
450  // Handle constant address.
451  if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
452    // Can't handle alternate code models yet.
453    if (TM.getCodeModel() != CodeModel::Small)
454      return false;
455
456    // RIP-relative addresses can't have additional register operands.
457    if (Subtarget->isPICStyleRIPRel() &&
458        (AM.Base.Reg != 0 || AM.IndexReg != 0))
459      return false;
460
461    // Can't handle TLS yet.
462    if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
463      if (GVar->isThreadLocal())
464        return false;
465
466    // Okay, we've committed to selecting this global. Set up the basic address.
467    AM.GV = GV;
468
469    // Allow the subtarget to classify the global.
470    unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM);
471
472    // If this reference is relative to the pic base, set it now.
473    if (isGlobalRelativeToPICBase(GVFlags)) {
474      // FIXME: How do we know Base.Reg is free??
475      AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(&MF);
476    }
477
478    // Unless the ABI requires an extra load, return a direct reference to
479    // the global.
480    if (!isGlobalStubReference(GVFlags)) {
481      if (Subtarget->isPICStyleRIPRel()) {
482        // Use rip-relative addressing if we can.  Above we verified that the
483        // base and index registers are unused.
484        assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
485        AM.Base.Reg = X86::RIP;
486      }
487      AM.GVOpFlags = GVFlags;
488      return true;
489    }
490
491    // Ok, we need to do a load from a stub.  If we've already loaded from this
492    // stub, reuse the loaded pointer, otherwise emit the load now.
493    DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V);
494    unsigned LoadReg;
495    if (I != LocalValueMap.end() && I->second != 0) {
496      LoadReg = I->second;
497    } else {
498      // Issue load from stub.
499      unsigned Opc = 0;
500      const TargetRegisterClass *RC = NULL;
501      X86AddressMode StubAM;
502      StubAM.Base.Reg = AM.Base.Reg;
503      StubAM.GV = GV;
504      StubAM.GVOpFlags = GVFlags;
505
506      if (TLI.getPointerTy() == MVT::i64) {
507        Opc = X86::MOV64rm;
508        RC  = X86::GR64RegisterClass;
509
510        if (Subtarget->isPICStyleRIPRel())
511          StubAM.Base.Reg = X86::RIP;
512      } else {
513        Opc = X86::MOV32rm;
514        RC  = X86::GR32RegisterClass;
515      }
516
517      LoadReg = createResultReg(RC);
518      addFullAddress(BuildMI(MBB, DL, TII.get(Opc), LoadReg), StubAM);
519
520      // Prevent loading GV stub multiple times in same MBB.
521      LocalValueMap[V] = LoadReg;
522    }
523
524    // Now construct the final address. Note that the Disp, Scale,
525    // and Index values may already be set here.
526    AM.Base.Reg = LoadReg;
527    AM.GV = 0;
528    return true;
529  }
530
531  // If all else fails, try to materialize the value in a register.
532  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
533    if (AM.Base.Reg == 0) {
534      AM.Base.Reg = getRegForValue(V);
535      return AM.Base.Reg != 0;
536    }
537    if (AM.IndexReg == 0) {
538      assert(AM.Scale == 1 && "Scale with no index!");
539      AM.IndexReg = getRegForValue(V);
540      return AM.IndexReg != 0;
541    }
542  }
543
544  return false;
545}
546
547/// X86SelectCallAddress - Attempt to fill in an address from the given value.
548///
549bool X86FastISel::X86SelectCallAddress(Value *V, X86AddressMode &AM) {
550  User *U = NULL;
551  unsigned Opcode = Instruction::UserOp1;
552  if (Instruction *I = dyn_cast<Instruction>(V)) {
553    Opcode = I->getOpcode();
554    U = I;
555  } else if (ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
556    Opcode = C->getOpcode();
557    U = C;
558  }
559
560  switch (Opcode) {
561  default: break;
562  case Instruction::BitCast:
563    // Look past bitcasts.
564    return X86SelectCallAddress(U->getOperand(0), AM);
565
566  case Instruction::IntToPtr:
567    // Look past no-op inttoptrs.
568    if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
569      return X86SelectCallAddress(U->getOperand(0), AM);
570    break;
571
572  case Instruction::PtrToInt:
573    // Look past no-op ptrtoints.
574    if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
575      return X86SelectCallAddress(U->getOperand(0), AM);
576    break;
577  }
578
579  // Handle constant address.
580  if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
581    // Can't handle alternate code models yet.
582    if (TM.getCodeModel() != CodeModel::Small)
583      return false;
584
585    // RIP-relative addresses can't have additional register operands.
586    if (Subtarget->isPICStyleRIPRel() &&
587        (AM.Base.Reg != 0 || AM.IndexReg != 0))
588      return false;
589
590    // Can't handle TLS or DLLImport.
591    if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
592      if (GVar->isThreadLocal() || GVar->hasDLLImportLinkage())
593        return false;
594
595    // Okay, we've committed to selecting this global. Set up the basic address.
596    AM.GV = GV;
597
598    // No ABI requires an extra load for anything other than DLLImport, which
599    // we rejected above. Return a direct reference to the global.
600    if (Subtarget->isPICStyleRIPRel()) {
601      // Use rip-relative addressing if we can.  Above we verified that the
602      // base and index registers are unused.
603      assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
604      AM.Base.Reg = X86::RIP;
605    } else if (Subtarget->isPICStyleStubPIC()) {
606      AM.GVOpFlags = X86II::MO_PIC_BASE_OFFSET;
607    } else if (Subtarget->isPICStyleGOT()) {
608      AM.GVOpFlags = X86II::MO_GOTOFF;
609    }
610
611    return true;
612  }
613
614  // If all else fails, try to materialize the value in a register.
615  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
616    if (AM.Base.Reg == 0) {
617      AM.Base.Reg = getRegForValue(V);
618      return AM.Base.Reg != 0;
619    }
620    if (AM.IndexReg == 0) {
621      assert(AM.Scale == 1 && "Scale with no index!");
622      AM.IndexReg = getRegForValue(V);
623      return AM.IndexReg != 0;
624    }
625  }
626
627  return false;
628}
629
630
631/// X86SelectStore - Select and emit code to implement store instructions.
632bool X86FastISel::X86SelectStore(Instruction* I) {
633  EVT VT;
634  if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
635    return false;
636
637  X86AddressMode AM;
638  if (!X86SelectAddress(I->getOperand(1), AM))
639    return false;
640
641  return X86FastEmitStore(VT, I->getOperand(0), AM);
642}
643
644/// X86SelectLoad - Select and emit code to implement load instructions.
645///
646bool X86FastISel::X86SelectLoad(Instruction *I)  {
647  EVT VT;
648  if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true))
649    return false;
650
651  X86AddressMode AM;
652  if (!X86SelectAddress(I->getOperand(0), AM))
653    return false;
654
655  unsigned ResultReg = 0;
656  if (X86FastEmitLoad(VT, AM, ResultReg)) {
657    UpdateValueMap(I, ResultReg);
658    return true;
659  }
660  return false;
661}
662
663static unsigned X86ChooseCmpOpcode(EVT VT) {
664  switch (VT.getSimpleVT().SimpleTy) {
665  default:       return 0;
666  case MVT::i8:  return X86::CMP8rr;
667  case MVT::i16: return X86::CMP16rr;
668  case MVT::i32: return X86::CMP32rr;
669  case MVT::i64: return X86::CMP64rr;
670  case MVT::f32: return X86::UCOMISSrr;
671  case MVT::f64: return X86::UCOMISDrr;
672  }
673}
674
675/// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS
676/// of the comparison, return an opcode that works for the compare (e.g.
677/// CMP32ri) otherwise return 0.
678static unsigned X86ChooseCmpImmediateOpcode(EVT VT, ConstantInt *RHSC) {
679  switch (VT.getSimpleVT().SimpleTy) {
680  // Otherwise, we can't fold the immediate into this comparison.
681  default: return 0;
682  case MVT::i8: return X86::CMP8ri;
683  case MVT::i16: return X86::CMP16ri;
684  case MVT::i32: return X86::CMP32ri;
685  case MVT::i64:
686    // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
687    // field.
688    if ((int)RHSC->getSExtValue() == RHSC->getSExtValue())
689      return X86::CMP64ri32;
690    return 0;
691  }
692}
693
694bool X86FastISel::X86FastEmitCompare(Value *Op0, Value *Op1, EVT VT) {
695  unsigned Op0Reg = getRegForValue(Op0);
696  if (Op0Reg == 0) return false;
697
698  // Handle 'null' like i32/i64 0.
699  if (isa<ConstantPointerNull>(Op1))
700    Op1 = Constant::getNullValue(TD.getIntPtrType(Op0->getContext()));
701
702  // We have two options: compare with register or immediate.  If the RHS of
703  // the compare is an immediate that we can fold into this compare, use
704  // CMPri, otherwise use CMPrr.
705  if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
706    if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
707      BuildMI(MBB, DL, TII.get(CompareImmOpc)).addReg(Op0Reg)
708                                          .addImm(Op1C->getSExtValue());
709      return true;
710    }
711  }
712
713  unsigned CompareOpc = X86ChooseCmpOpcode(VT);
714  if (CompareOpc == 0) return false;
715
716  unsigned Op1Reg = getRegForValue(Op1);
717  if (Op1Reg == 0) return false;
718  BuildMI(MBB, DL, TII.get(CompareOpc)).addReg(Op0Reg).addReg(Op1Reg);
719
720  return true;
721}
722
723bool X86FastISel::X86SelectCmp(Instruction *I) {
724  CmpInst *CI = cast<CmpInst>(I);
725
726  EVT VT;
727  if (!isTypeLegal(I->getOperand(0)->getType(), VT))
728    return false;
729
730  unsigned ResultReg = createResultReg(&X86::GR8RegClass);
731  unsigned SetCCOpc;
732  bool SwapArgs;  // false -> compare Op0, Op1.  true -> compare Op1, Op0.
733  switch (CI->getPredicate()) {
734  case CmpInst::FCMP_OEQ: {
735    if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
736      return false;
737
738    unsigned EReg = createResultReg(&X86::GR8RegClass);
739    unsigned NPReg = createResultReg(&X86::GR8RegClass);
740    BuildMI(MBB, DL, TII.get(X86::SETEr), EReg);
741    BuildMI(MBB, DL, TII.get(X86::SETNPr), NPReg);
742    BuildMI(MBB, DL,
743            TII.get(X86::AND8rr), ResultReg).addReg(NPReg).addReg(EReg);
744    UpdateValueMap(I, ResultReg);
745    return true;
746  }
747  case CmpInst::FCMP_UNE: {
748    if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
749      return false;
750
751    unsigned NEReg = createResultReg(&X86::GR8RegClass);
752    unsigned PReg = createResultReg(&X86::GR8RegClass);
753    BuildMI(MBB, DL, TII.get(X86::SETNEr), NEReg);
754    BuildMI(MBB, DL, TII.get(X86::SETPr), PReg);
755    BuildMI(MBB, DL, TII.get(X86::OR8rr), ResultReg).addReg(PReg).addReg(NEReg);
756    UpdateValueMap(I, ResultReg);
757    return true;
758  }
759  case CmpInst::FCMP_OGT: SwapArgs = false; SetCCOpc = X86::SETAr;  break;
760  case CmpInst::FCMP_OGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break;
761  case CmpInst::FCMP_OLT: SwapArgs = true;  SetCCOpc = X86::SETAr;  break;
762  case CmpInst::FCMP_OLE: SwapArgs = true;  SetCCOpc = X86::SETAEr; break;
763  case CmpInst::FCMP_ONE: SwapArgs = false; SetCCOpc = X86::SETNEr; break;
764  case CmpInst::FCMP_ORD: SwapArgs = false; SetCCOpc = X86::SETNPr; break;
765  case CmpInst::FCMP_UNO: SwapArgs = false; SetCCOpc = X86::SETPr;  break;
766  case CmpInst::FCMP_UEQ: SwapArgs = false; SetCCOpc = X86::SETEr;  break;
767  case CmpInst::FCMP_UGT: SwapArgs = true;  SetCCOpc = X86::SETBr;  break;
768  case CmpInst::FCMP_UGE: SwapArgs = true;  SetCCOpc = X86::SETBEr; break;
769  case CmpInst::FCMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr;  break;
770  case CmpInst::FCMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break;
771
772  case CmpInst::ICMP_EQ:  SwapArgs = false; SetCCOpc = X86::SETEr;  break;
773  case CmpInst::ICMP_NE:  SwapArgs = false; SetCCOpc = X86::SETNEr; break;
774  case CmpInst::ICMP_UGT: SwapArgs = false; SetCCOpc = X86::SETAr;  break;
775  case CmpInst::ICMP_UGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break;
776  case CmpInst::ICMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr;  break;
777  case CmpInst::ICMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break;
778  case CmpInst::ICMP_SGT: SwapArgs = false; SetCCOpc = X86::SETGr;  break;
779  case CmpInst::ICMP_SGE: SwapArgs = false; SetCCOpc = X86::SETGEr; break;
780  case CmpInst::ICMP_SLT: SwapArgs = false; SetCCOpc = X86::SETLr;  break;
781  case CmpInst::ICMP_SLE: SwapArgs = false; SetCCOpc = X86::SETLEr; break;
782  default:
783    return false;
784  }
785
786  Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
787  if (SwapArgs)
788    std::swap(Op0, Op1);
789
790  // Emit a compare of Op0/Op1.
791  if (!X86FastEmitCompare(Op0, Op1, VT))
792    return false;
793
794  BuildMI(MBB, DL, TII.get(SetCCOpc), ResultReg);
795  UpdateValueMap(I, ResultReg);
796  return true;
797}
798
799bool X86FastISel::X86SelectZExt(Instruction *I) {
800  // Handle zero-extension from i1 to i8, which is common.
801  if (I->getType()->isIntegerTy(8) &&
802      I->getOperand(0)->getType()->isIntegerTy(1)) {
803    unsigned ResultReg = getRegForValue(I->getOperand(0));
804    if (ResultReg == 0) return false;
805    // Set the high bits to zero.
806    ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg);
807    if (ResultReg == 0) return false;
808    UpdateValueMap(I, ResultReg);
809    return true;
810  }
811
812  return false;
813}
814
815
816bool X86FastISel::X86SelectBranch(Instruction *I) {
817  // Unconditional branches are selected by tablegen-generated code.
818  // Handle a conditional branch.
819  BranchInst *BI = cast<BranchInst>(I);
820  MachineBasicBlock *TrueMBB = MBBMap[BI->getSuccessor(0)];
821  MachineBasicBlock *FalseMBB = MBBMap[BI->getSuccessor(1)];
822
823  // Fold the common case of a conditional branch with a comparison.
824  if (CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
825    if (CI->hasOneUse()) {
826      EVT VT = TLI.getValueType(CI->getOperand(0)->getType());
827
828      // Try to take advantage of fallthrough opportunities.
829      CmpInst::Predicate Predicate = CI->getPredicate();
830      if (MBB->isLayoutSuccessor(TrueMBB)) {
831        std::swap(TrueMBB, FalseMBB);
832        Predicate = CmpInst::getInversePredicate(Predicate);
833      }
834
835      bool SwapArgs;  // false -> compare Op0, Op1.  true -> compare Op1, Op0.
836      unsigned BranchOpc; // Opcode to jump on, e.g. "X86::JA"
837
838      switch (Predicate) {
839      case CmpInst::FCMP_OEQ:
840        std::swap(TrueMBB, FalseMBB);
841        Predicate = CmpInst::FCMP_UNE;
842        // FALL THROUGH
843      case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
844      case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA_4;  break;
845      case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE_4; break;
846      case CmpInst::FCMP_OLT: SwapArgs = true;  BranchOpc = X86::JA_4;  break;
847      case CmpInst::FCMP_OLE: SwapArgs = true;  BranchOpc = X86::JAE_4; break;
848      case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
849      case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP_4; break;
850      case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP_4;  break;
851      case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE_4;  break;
852      case CmpInst::FCMP_UGT: SwapArgs = true;  BranchOpc = X86::JB_4;  break;
853      case CmpInst::FCMP_UGE: SwapArgs = true;  BranchOpc = X86::JBE_4; break;
854      case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4;  break;
855      case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break;
856
857      case CmpInst::ICMP_EQ:  SwapArgs = false; BranchOpc = X86::JE_4;  break;
858      case CmpInst::ICMP_NE:  SwapArgs = false; BranchOpc = X86::JNE_4; break;
859      case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA_4;  break;
860      case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE_4; break;
861      case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4;  break;
862      case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break;
863      case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG_4;  break;
864      case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE_4; break;
865      case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL_4;  break;
866      case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE_4; break;
867      default:
868        return false;
869      }
870
871      Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
872      if (SwapArgs)
873        std::swap(Op0, Op1);
874
875      // Emit a compare of the LHS and RHS, setting the flags.
876      if (!X86FastEmitCompare(Op0, Op1, VT))
877        return false;
878
879      BuildMI(MBB, DL, TII.get(BranchOpc)).addMBB(TrueMBB);
880
881      if (Predicate == CmpInst::FCMP_UNE) {
882        // X86 requires a second branch to handle UNE (and OEQ,
883        // which is mapped to UNE above).
884        BuildMI(MBB, DL, TII.get(X86::JP_4)).addMBB(TrueMBB);
885      }
886
887      FastEmitBranch(FalseMBB);
888      MBB->addSuccessor(TrueMBB);
889      return true;
890    }
891  } else if (ExtractValueInst *EI =
892             dyn_cast<ExtractValueInst>(BI->getCondition())) {
893    // Check to see if the branch instruction is from an "arithmetic with
894    // overflow" intrinsic. The main way these intrinsics are used is:
895    //
896    //   %t = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2)
897    //   %sum = extractvalue { i32, i1 } %t, 0
898    //   %obit = extractvalue { i32, i1 } %t, 1
899    //   br i1 %obit, label %overflow, label %normal
900    //
901    // The %sum and %obit are converted in an ADD and a SETO/SETB before
902    // reaching the branch. Therefore, we search backwards through the MBB
903    // looking for the SETO/SETB instruction. If an instruction modifies the
904    // EFLAGS register before we reach the SETO/SETB instruction, then we can't
905    // convert the branch into a JO/JB instruction.
906    if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(EI->getAggregateOperand())){
907      if (CI->getIntrinsicID() == Intrinsic::sadd_with_overflow ||
908          CI->getIntrinsicID() == Intrinsic::uadd_with_overflow) {
909        const MachineInstr *SetMI = 0;
910        unsigned Reg = lookUpRegForValue(EI);
911
912        for (MachineBasicBlock::const_reverse_iterator
913               RI = MBB->rbegin(), RE = MBB->rend(); RI != RE; ++RI) {
914          const MachineInstr &MI = *RI;
915
916          if (MI.modifiesRegister(Reg)) {
917            unsigned Src, Dst, SrcSR, DstSR;
918
919            if (getInstrInfo()->isMoveInstr(MI, Src, Dst, SrcSR, DstSR)) {
920              Reg = Src;
921              continue;
922            }
923
924            SetMI = &MI;
925            break;
926          }
927
928          const TargetInstrDesc &TID = MI.getDesc();
929          if (TID.hasUnmodeledSideEffects() ||
930              TID.hasImplicitDefOfPhysReg(X86::EFLAGS))
931            break;
932        }
933
934        if (SetMI) {
935          unsigned OpCode = SetMI->getOpcode();
936
937          if (OpCode == X86::SETOr || OpCode == X86::SETBr) {
938            BuildMI(MBB, DL, TII.get(OpCode == X86::SETOr ?
939                                        X86::JO_4 : X86::JB_4))
940              .addMBB(TrueMBB);
941            FastEmitBranch(FalseMBB);
942            MBB->addSuccessor(TrueMBB);
943            return true;
944          }
945        }
946      }
947    }
948  }
949
950  // Otherwise do a clumsy setcc and re-test it.
951  unsigned OpReg = getRegForValue(BI->getCondition());
952  if (OpReg == 0) return false;
953
954  BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(OpReg).addReg(OpReg);
955  BuildMI(MBB, DL, TII.get(X86::JNE_4)).addMBB(TrueMBB);
956  FastEmitBranch(FalseMBB);
957  MBB->addSuccessor(TrueMBB);
958  return true;
959}
960
961bool X86FastISel::X86SelectShift(Instruction *I) {
962  unsigned CReg = 0, OpReg = 0, OpImm = 0;
963  const TargetRegisterClass *RC = NULL;
964  if (I->getType()->isIntegerTy(8)) {
965    CReg = X86::CL;
966    RC = &X86::GR8RegClass;
967    switch (I->getOpcode()) {
968    case Instruction::LShr: OpReg = X86::SHR8rCL; OpImm = X86::SHR8ri; break;
969    case Instruction::AShr: OpReg = X86::SAR8rCL; OpImm = X86::SAR8ri; break;
970    case Instruction::Shl:  OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break;
971    default: return false;
972    }
973  } else if (I->getType()->isIntegerTy(16)) {
974    CReg = X86::CX;
975    RC = &X86::GR16RegClass;
976    switch (I->getOpcode()) {
977    case Instruction::LShr: OpReg = X86::SHR16rCL; OpImm = X86::SHR16ri; break;
978    case Instruction::AShr: OpReg = X86::SAR16rCL; OpImm = X86::SAR16ri; break;
979    case Instruction::Shl:  OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break;
980    default: return false;
981    }
982  } else if (I->getType()->isIntegerTy(32)) {
983    CReg = X86::ECX;
984    RC = &X86::GR32RegClass;
985    switch (I->getOpcode()) {
986    case Instruction::LShr: OpReg = X86::SHR32rCL; OpImm = X86::SHR32ri; break;
987    case Instruction::AShr: OpReg = X86::SAR32rCL; OpImm = X86::SAR32ri; break;
988    case Instruction::Shl:  OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break;
989    default: return false;
990    }
991  } else if (I->getType()->isIntegerTy(64)) {
992    CReg = X86::RCX;
993    RC = &X86::GR64RegClass;
994    switch (I->getOpcode()) {
995    case Instruction::LShr: OpReg = X86::SHR64rCL; OpImm = X86::SHR64ri; break;
996    case Instruction::AShr: OpReg = X86::SAR64rCL; OpImm = X86::SAR64ri; break;
997    case Instruction::Shl:  OpReg = X86::SHL64rCL; OpImm = X86::SHL64ri; break;
998    default: return false;
999    }
1000  } else {
1001    return false;
1002  }
1003
1004  EVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true);
1005  if (VT == MVT::Other || !isTypeLegal(I->getType(), VT))
1006    return false;
1007
1008  unsigned Op0Reg = getRegForValue(I->getOperand(0));
1009  if (Op0Reg == 0) return false;
1010
1011  // Fold immediate in shl(x,3).
1012  if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) {
1013    unsigned ResultReg = createResultReg(RC);
1014    BuildMI(MBB, DL, TII.get(OpImm),
1015            ResultReg).addReg(Op0Reg).addImm(CI->getZExtValue() & 0xff);
1016    UpdateValueMap(I, ResultReg);
1017    return true;
1018  }
1019
1020  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1021  if (Op1Reg == 0) return false;
1022  TII.copyRegToReg(*MBB, MBB->end(), CReg, Op1Reg, RC, RC);
1023
1024  // The shift instruction uses X86::CL. If we defined a super-register
1025  // of X86::CL, emit an EXTRACT_SUBREG to precisely describe what
1026  // we're doing here.
1027  if (CReg != X86::CL)
1028    BuildMI(MBB, DL, TII.get(TargetOpcode::EXTRACT_SUBREG), X86::CL)
1029      .addReg(CReg).addImm(X86::SUBREG_8BIT);
1030
1031  unsigned ResultReg = createResultReg(RC);
1032  BuildMI(MBB, DL, TII.get(OpReg), ResultReg).addReg(Op0Reg);
1033  UpdateValueMap(I, ResultReg);
1034  return true;
1035}
1036
1037bool X86FastISel::X86SelectSelect(Instruction *I) {
1038  EVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true);
1039  if (VT == MVT::Other || !isTypeLegal(I->getType(), VT))
1040    return false;
1041
1042  unsigned Opc = 0;
1043  const TargetRegisterClass *RC = NULL;
1044  if (VT.getSimpleVT() == MVT::i16) {
1045    Opc = X86::CMOVE16rr;
1046    RC = &X86::GR16RegClass;
1047  } else if (VT.getSimpleVT() == MVT::i32) {
1048    Opc = X86::CMOVE32rr;
1049    RC = &X86::GR32RegClass;
1050  } else if (VT.getSimpleVT() == MVT::i64) {
1051    Opc = X86::CMOVE64rr;
1052    RC = &X86::GR64RegClass;
1053  } else {
1054    return false;
1055  }
1056
1057  unsigned Op0Reg = getRegForValue(I->getOperand(0));
1058  if (Op0Reg == 0) return false;
1059  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1060  if (Op1Reg == 0) return false;
1061  unsigned Op2Reg = getRegForValue(I->getOperand(2));
1062  if (Op2Reg == 0) return false;
1063
1064  BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(Op0Reg).addReg(Op0Reg);
1065  unsigned ResultReg = createResultReg(RC);
1066  BuildMI(MBB, DL, TII.get(Opc), ResultReg).addReg(Op1Reg).addReg(Op2Reg);
1067  UpdateValueMap(I, ResultReg);
1068  return true;
1069}
1070
1071bool X86FastISel::X86SelectFPExt(Instruction *I) {
1072  // fpext from float to double.
1073  if (Subtarget->hasSSE2() &&
1074      I->getType()->isDoubleTy()) {
1075    Value *V = I->getOperand(0);
1076    if (V->getType()->isFloatTy()) {
1077      unsigned OpReg = getRegForValue(V);
1078      if (OpReg == 0) return false;
1079      unsigned ResultReg = createResultReg(X86::FR64RegisterClass);
1080      BuildMI(MBB, DL, TII.get(X86::CVTSS2SDrr), ResultReg).addReg(OpReg);
1081      UpdateValueMap(I, ResultReg);
1082      return true;
1083    }
1084  }
1085
1086  return false;
1087}
1088
1089bool X86FastISel::X86SelectFPTrunc(Instruction *I) {
1090  if (Subtarget->hasSSE2()) {
1091    if (I->getType()->isFloatTy()) {
1092      Value *V = I->getOperand(0);
1093      if (V->getType()->isDoubleTy()) {
1094        unsigned OpReg = getRegForValue(V);
1095        if (OpReg == 0) return false;
1096        unsigned ResultReg = createResultReg(X86::FR32RegisterClass);
1097        BuildMI(MBB, DL, TII.get(X86::CVTSD2SSrr), ResultReg).addReg(OpReg);
1098        UpdateValueMap(I, ResultReg);
1099        return true;
1100      }
1101    }
1102  }
1103
1104  return false;
1105}
1106
1107bool X86FastISel::X86SelectTrunc(Instruction *I) {
1108  if (Subtarget->is64Bit())
1109    // All other cases should be handled by the tblgen generated code.
1110    return false;
1111  EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
1112  EVT DstVT = TLI.getValueType(I->getType());
1113
1114  // This code only handles truncation to byte right now.
1115  if (DstVT != MVT::i8 && DstVT != MVT::i1)
1116    // All other cases should be handled by the tblgen generated code.
1117    return false;
1118  if (SrcVT != MVT::i16 && SrcVT != MVT::i32)
1119    // All other cases should be handled by the tblgen generated code.
1120    return false;
1121
1122  unsigned InputReg = getRegForValue(I->getOperand(0));
1123  if (!InputReg)
1124    // Unhandled operand.  Halt "fast" selection and bail.
1125    return false;
1126
1127  // First issue a copy to GR16_ABCD or GR32_ABCD.
1128  unsigned CopyOpc = (SrcVT == MVT::i16) ? X86::MOV16rr : X86::MOV32rr;
1129  const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16)
1130    ? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass;
1131  unsigned CopyReg = createResultReg(CopyRC);
1132  BuildMI(MBB, DL, TII.get(CopyOpc), CopyReg).addReg(InputReg);
1133
1134  // Then issue an extract_subreg.
1135  unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8,
1136                                                  CopyReg, X86::SUBREG_8BIT);
1137  if (!ResultReg)
1138    return false;
1139
1140  UpdateValueMap(I, ResultReg);
1141  return true;
1142}
1143
1144bool X86FastISel::X86SelectExtractValue(Instruction *I) {
1145  ExtractValueInst *EI = cast<ExtractValueInst>(I);
1146  Value *Agg = EI->getAggregateOperand();
1147
1148  if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Agg)) {
1149    switch (CI->getIntrinsicID()) {
1150    default: break;
1151    case Intrinsic::sadd_with_overflow:
1152    case Intrinsic::uadd_with_overflow:
1153      // Cheat a little. We know that the registers for "add" and "seto" are
1154      // allocated sequentially. However, we only keep track of the register
1155      // for "add" in the value map. Use extractvalue's index to get the
1156      // correct register for "seto".
1157      UpdateValueMap(I, lookUpRegForValue(Agg) + *EI->idx_begin());
1158      return true;
1159    }
1160  }
1161
1162  return false;
1163}
1164
1165bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) {
1166  // FIXME: Handle more intrinsics.
1167  switch (I.getIntrinsicID()) {
1168  default: return false;
1169  case Intrinsic::objectsize: {
1170    ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(2));
1171    const Type *Ty = I.getCalledFunction()->getReturnType();
1172
1173    assert(CI && "Non-constant type in Intrinsic::objectsize?");
1174
1175    EVT VT;
1176    if (!isTypeLegal(Ty, VT))
1177      return false;
1178
1179    unsigned OpC = 0;
1180    if (VT == MVT::i32)
1181      OpC = X86::MOV32ri;
1182    else if (VT == MVT::i64)
1183      OpC = X86::MOV64ri;
1184    else
1185      return false;
1186
1187    unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
1188    BuildMI(MBB, DL, TII.get(OpC), ResultReg).
1189                                  addImm(CI->getZExtValue() == 0 ? -1ULL : 0);
1190    UpdateValueMap(&I, ResultReg);
1191    return true;
1192  }
1193  case Intrinsic::dbg_declare: {
1194    DbgDeclareInst *DI = cast<DbgDeclareInst>(&I);
1195    X86AddressMode AM;
1196    assert(DI->getAddress() && "Null address should be checked earlier!");
1197    if (!X86SelectAddress(DI->getAddress(), AM))
1198      return false;
1199    const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
1200    // FIXME may need to add RegState::Debug to any registers produced,
1201    // although ESP/EBP should be the only ones at the moment.
1202    addFullAddress(BuildMI(MBB, DL, II), AM).addImm(0).
1203                                        addMetadata(DI->getVariable());
1204    return true;
1205  }
1206  case Intrinsic::trap: {
1207    BuildMI(MBB, DL, TII.get(X86::TRAP));
1208    return true;
1209  }
1210  case Intrinsic::sadd_with_overflow:
1211  case Intrinsic::uadd_with_overflow: {
1212    // Replace "add with overflow" intrinsics with an "add" instruction followed
1213    // by a seto/setc instruction. Later on, when the "extractvalue"
1214    // instructions are encountered, we use the fact that two registers were
1215    // created sequentially to get the correct registers for the "sum" and the
1216    // "overflow bit".
1217    const Function *Callee = I.getCalledFunction();
1218    const Type *RetTy =
1219      cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0));
1220
1221    EVT VT;
1222    if (!isTypeLegal(RetTy, VT))
1223      return false;
1224
1225    Value *Op1 = I.getOperand(1);
1226    Value *Op2 = I.getOperand(2);
1227    unsigned Reg1 = getRegForValue(Op1);
1228    unsigned Reg2 = getRegForValue(Op2);
1229
1230    if (Reg1 == 0 || Reg2 == 0)
1231      // FIXME: Handle values *not* in registers.
1232      return false;
1233
1234    unsigned OpC = 0;
1235    if (VT == MVT::i32)
1236      OpC = X86::ADD32rr;
1237    else if (VT == MVT::i64)
1238      OpC = X86::ADD64rr;
1239    else
1240      return false;
1241
1242    unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT));
1243    BuildMI(MBB, DL, TII.get(OpC), ResultReg).addReg(Reg1).addReg(Reg2);
1244    unsigned DestReg1 = UpdateValueMap(&I, ResultReg);
1245
1246    // If the add with overflow is an intra-block value then we just want to
1247    // create temporaries for it like normal.  If it is a cross-block value then
1248    // UpdateValueMap will return the cross-block register used.  Since we
1249    // *really* want the value to be live in the register pair known by
1250    // UpdateValueMap, we have to use DestReg1+1 as the destination register in
1251    // the cross block case.  In the non-cross-block case, we should just make
1252    // another register for the value.
1253    if (DestReg1 != ResultReg)
1254      ResultReg = DestReg1+1;
1255    else
1256      ResultReg = createResultReg(TLI.getRegClassFor(MVT::i8));
1257
1258    unsigned Opc = X86::SETBr;
1259    if (I.getIntrinsicID() == Intrinsic::sadd_with_overflow)
1260      Opc = X86::SETOr;
1261    BuildMI(MBB, DL, TII.get(Opc), ResultReg);
1262    return true;
1263  }
1264  }
1265}
1266
1267bool X86FastISel::X86SelectCall(Instruction *I) {
1268  CallInst *CI = cast<CallInst>(I);
1269  Value *Callee = I->getOperand(0);
1270
1271  // Can't handle inline asm yet.
1272  if (isa<InlineAsm>(Callee))
1273    return false;
1274
1275  // Handle intrinsic calls.
1276  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI))
1277    return X86VisitIntrinsicCall(*II);
1278
1279  // Handle only C and fastcc calling conventions for now.
1280  CallSite CS(CI);
1281  CallingConv::ID CC = CS.getCallingConv();
1282  if (CC != CallingConv::C &&
1283      CC != CallingConv::Fast &&
1284      CC != CallingConv::X86_FastCall)
1285    return false;
1286
1287  // fastcc with -tailcallopt is intended to provide a guaranteed
1288  // tail call optimization. Fastisel doesn't know how to do that.
1289  if (CC == CallingConv::Fast && GuaranteedTailCallOpt)
1290    return false;
1291
1292  // Let SDISel handle vararg functions.
1293  const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
1294  const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
1295  if (FTy->isVarArg())
1296    return false;
1297
1298  // Handle *simple* calls for now.
1299  const Type *RetTy = CS.getType();
1300  EVT RetVT;
1301  if (RetTy->isVoidTy())
1302    RetVT = MVT::isVoid;
1303  else if (!isTypeLegal(RetTy, RetVT, true))
1304    return false;
1305
1306  // Materialize callee address in a register. FIXME: GV address can be
1307  // handled with a CALLpcrel32 instead.
1308  X86AddressMode CalleeAM;
1309  if (!X86SelectCallAddress(Callee, CalleeAM))
1310    return false;
1311  unsigned CalleeOp = 0;
1312  GlobalValue *GV = 0;
1313  if (CalleeAM.GV != 0) {
1314    GV = CalleeAM.GV;
1315  } else if (CalleeAM.Base.Reg != 0) {
1316    CalleeOp = CalleeAM.Base.Reg;
1317  } else
1318    return false;
1319
1320  // Allow calls which produce i1 results.
1321  bool AndToI1 = false;
1322  if (RetVT == MVT::i1) {
1323    RetVT = MVT::i8;
1324    AndToI1 = true;
1325  }
1326
1327  // Deal with call operands first.
1328  SmallVector<Value*, 8> ArgVals;
1329  SmallVector<unsigned, 8> Args;
1330  SmallVector<EVT, 8> ArgVTs;
1331  SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
1332  Args.reserve(CS.arg_size());
1333  ArgVals.reserve(CS.arg_size());
1334  ArgVTs.reserve(CS.arg_size());
1335  ArgFlags.reserve(CS.arg_size());
1336  for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
1337       i != e; ++i) {
1338    unsigned Arg = getRegForValue(*i);
1339    if (Arg == 0)
1340      return false;
1341    ISD::ArgFlagsTy Flags;
1342    unsigned AttrInd = i - CS.arg_begin() + 1;
1343    if (CS.paramHasAttr(AttrInd, Attribute::SExt))
1344      Flags.setSExt();
1345    if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
1346      Flags.setZExt();
1347
1348    // FIXME: Only handle *easy* calls for now.
1349    if (CS.paramHasAttr(AttrInd, Attribute::InReg) ||
1350        CS.paramHasAttr(AttrInd, Attribute::StructRet) ||
1351        CS.paramHasAttr(AttrInd, Attribute::Nest) ||
1352        CS.paramHasAttr(AttrInd, Attribute::ByVal))
1353      return false;
1354
1355    const Type *ArgTy = (*i)->getType();
1356    EVT ArgVT;
1357    if (!isTypeLegal(ArgTy, ArgVT))
1358      return false;
1359    unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
1360    Flags.setOrigAlign(OriginalAlignment);
1361
1362    Args.push_back(Arg);
1363    ArgVals.push_back(*i);
1364    ArgVTs.push_back(ArgVT);
1365    ArgFlags.push_back(Flags);
1366  }
1367
1368  // Analyze operands of the call, assigning locations to each operand.
1369  SmallVector<CCValAssign, 16> ArgLocs;
1370  CCState CCInfo(CC, false, TM, ArgLocs, I->getParent()->getContext());
1371  CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC));
1372
1373  // Get a count of how many bytes are to be pushed on the stack.
1374  unsigned NumBytes = CCInfo.getNextStackOffset();
1375
1376  // Issue CALLSEQ_START
1377  unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode();
1378  BuildMI(MBB, DL, TII.get(AdjStackDown)).addImm(NumBytes);
1379
1380  // Process argument: walk the register/memloc assignments, inserting
1381  // copies / loads.
1382  SmallVector<unsigned, 4> RegArgs;
1383  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1384    CCValAssign &VA = ArgLocs[i];
1385    unsigned Arg = Args[VA.getValNo()];
1386    EVT ArgVT = ArgVTs[VA.getValNo()];
1387
1388    // Promote the value if needed.
1389    switch (VA.getLocInfo()) {
1390    default: llvm_unreachable("Unknown loc info!");
1391    case CCValAssign::Full: break;
1392    case CCValAssign::SExt: {
1393      bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
1394                                       Arg, ArgVT, Arg);
1395      assert(Emitted && "Failed to emit a sext!"); Emitted=Emitted;
1396      Emitted = true;
1397      ArgVT = VA.getLocVT();
1398      break;
1399    }
1400    case CCValAssign::ZExt: {
1401      bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
1402                                       Arg, ArgVT, Arg);
1403      assert(Emitted && "Failed to emit a zext!"); Emitted=Emitted;
1404      Emitted = true;
1405      ArgVT = VA.getLocVT();
1406      break;
1407    }
1408    case CCValAssign::AExt: {
1409      bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(),
1410                                       Arg, ArgVT, Arg);
1411      if (!Emitted)
1412        Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
1413                                    Arg, ArgVT, Arg);
1414      if (!Emitted)
1415        Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
1416                                    Arg, ArgVT, Arg);
1417
1418      assert(Emitted && "Failed to emit a aext!"); Emitted=Emitted;
1419      ArgVT = VA.getLocVT();
1420      break;
1421    }
1422    case CCValAssign::BCvt: {
1423      unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT().getSimpleVT(),
1424                               ISD::BIT_CONVERT, Arg);
1425      assert(BC != 0 && "Failed to emit a bitcast!");
1426      Arg = BC;
1427      ArgVT = VA.getLocVT();
1428      break;
1429    }
1430    }
1431
1432    if (VA.isRegLoc()) {
1433      TargetRegisterClass* RC = TLI.getRegClassFor(ArgVT);
1434      bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), VA.getLocReg(),
1435                                      Arg, RC, RC);
1436      assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
1437      Emitted = true;
1438      RegArgs.push_back(VA.getLocReg());
1439    } else {
1440      unsigned LocMemOffset = VA.getLocMemOffset();
1441      X86AddressMode AM;
1442      AM.Base.Reg = StackPtr;
1443      AM.Disp = LocMemOffset;
1444      Value *ArgVal = ArgVals[VA.getValNo()];
1445
1446      // If this is a really simple value, emit this with the Value* version of
1447      // X86FastEmitStore.  If it isn't simple, we don't want to do this, as it
1448      // can cause us to reevaluate the argument.
1449      if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal))
1450        X86FastEmitStore(ArgVT, ArgVal, AM);
1451      else
1452        X86FastEmitStore(ArgVT, Arg, AM);
1453    }
1454  }
1455
1456  // ELF / PIC requires GOT in the EBX register before function calls via PLT
1457  // GOT pointer.
1458  if (Subtarget->isPICStyleGOT()) {
1459    TargetRegisterClass *RC = X86::GR32RegisterClass;
1460    unsigned Base = getInstrInfo()->getGlobalBaseReg(&MF);
1461    bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), X86::EBX, Base, RC, RC);
1462    assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
1463    Emitted = true;
1464  }
1465
1466  // Issue the call.
1467  MachineInstrBuilder MIB;
1468  if (CalleeOp) {
1469    // Register-indirect call.
1470    unsigned CallOpc = Subtarget->is64Bit() ? X86::CALL64r : X86::CALL32r;
1471    MIB = BuildMI(MBB, DL, TII.get(CallOpc)).addReg(CalleeOp);
1472
1473  } else {
1474    // Direct call.
1475    assert(GV && "Not a direct call");
1476    unsigned CallOpc =
1477      Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32;
1478
1479    // See if we need any target-specific flags on the GV operand.
1480    unsigned char OpFlags = 0;
1481
1482    // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
1483    // external symbols most go through the PLT in PIC mode.  If the symbol
1484    // has hidden or protected visibility, or if it is static or local, then
1485    // we don't need to use the PLT - we can directly call it.
1486    if (Subtarget->isTargetELF() &&
1487        TM.getRelocationModel() == Reloc::PIC_ &&
1488        GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
1489      OpFlags = X86II::MO_PLT;
1490    } else if (Subtarget->isPICStyleStubAny() &&
1491               (GV->isDeclaration() || GV->isWeakForLinker()) &&
1492               Subtarget->getDarwinVers() < 9) {
1493      // PC-relative references to external symbols should go through $stub,
1494      // unless we're building with the leopard linker or later, which
1495      // automatically synthesizes these stubs.
1496      OpFlags = X86II::MO_DARWIN_STUB;
1497    }
1498
1499
1500    MIB = BuildMI(MBB, DL, TII.get(CallOpc)).addGlobalAddress(GV, 0, OpFlags);
1501  }
1502
1503  // Add an implicit use GOT pointer in EBX.
1504  if (Subtarget->isPICStyleGOT())
1505    MIB.addReg(X86::EBX);
1506
1507  // Add implicit physical register uses to the call.
1508  for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
1509    MIB.addReg(RegArgs[i]);
1510
1511  // Issue CALLSEQ_END
1512  unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode();
1513  BuildMI(MBB, DL, TII.get(AdjStackUp)).addImm(NumBytes).addImm(0);
1514
1515  // Now handle call return value (if any).
1516  if (RetVT.getSimpleVT().SimpleTy != MVT::isVoid) {
1517    SmallVector<CCValAssign, 16> RVLocs;
1518    CCState CCInfo(CC, false, TM, RVLocs, I->getParent()->getContext());
1519    CCInfo.AnalyzeCallResult(RetVT, RetCC_X86);
1520
1521    // Copy all of the result registers out of their specified physreg.
1522    assert(RVLocs.size() == 1 && "Can't handle multi-value calls!");
1523    EVT CopyVT = RVLocs[0].getValVT();
1524    TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT);
1525    TargetRegisterClass *SrcRC = DstRC;
1526
1527    // If this is a call to a function that returns an fp value on the x87 fp
1528    // stack, but where we prefer to use the value in xmm registers, copy it
1529    // out as F80 and use a truncate to move it from fp stack reg to xmm reg.
1530    if ((RVLocs[0].getLocReg() == X86::ST0 ||
1531         RVLocs[0].getLocReg() == X86::ST1) &&
1532        isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) {
1533      CopyVT = MVT::f80;
1534      SrcRC = X86::RSTRegisterClass;
1535      DstRC = X86::RFP80RegisterClass;
1536    }
1537
1538    unsigned ResultReg = createResultReg(DstRC);
1539    bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), ResultReg,
1540                                    RVLocs[0].getLocReg(), DstRC, SrcRC);
1541    assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted;
1542    Emitted = true;
1543    if (CopyVT != RVLocs[0].getValVT()) {
1544      // Round the F80 the right size, which also moves to the appropriate xmm
1545      // register. This is accomplished by storing the F80 value in memory and
1546      // then loading it back. Ewww...
1547      EVT ResVT = RVLocs[0].getValVT();
1548      unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
1549      unsigned MemSize = ResVT.getSizeInBits()/8;
1550      int FI = MFI.CreateStackObject(MemSize, MemSize, false);
1551      addFrameReference(BuildMI(MBB, DL, TII.get(Opc)), FI).addReg(ResultReg);
1552      DstRC = ResVT == MVT::f32
1553        ? X86::FR32RegisterClass : X86::FR64RegisterClass;
1554      Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm;
1555      ResultReg = createResultReg(DstRC);
1556      addFrameReference(BuildMI(MBB, DL, TII.get(Opc), ResultReg), FI);
1557    }
1558
1559    if (AndToI1) {
1560      // Mask out all but lowest bit for some call which produces an i1.
1561      unsigned AndResult = createResultReg(X86::GR8RegisterClass);
1562      BuildMI(MBB, DL,
1563              TII.get(X86::AND8ri), AndResult).addReg(ResultReg).addImm(1);
1564      ResultReg = AndResult;
1565    }
1566
1567    UpdateValueMap(I, ResultReg);
1568  }
1569
1570  return true;
1571}
1572
1573
1574bool
1575X86FastISel::TargetSelectInstruction(Instruction *I)  {
1576  switch (I->getOpcode()) {
1577  default: break;
1578  case Instruction::Load:
1579    return X86SelectLoad(I);
1580  case Instruction::Store:
1581    return X86SelectStore(I);
1582  case Instruction::ICmp:
1583  case Instruction::FCmp:
1584    return X86SelectCmp(I);
1585  case Instruction::ZExt:
1586    return X86SelectZExt(I);
1587  case Instruction::Br:
1588    return X86SelectBranch(I);
1589  case Instruction::Call:
1590    return X86SelectCall(I);
1591  case Instruction::LShr:
1592  case Instruction::AShr:
1593  case Instruction::Shl:
1594    return X86SelectShift(I);
1595  case Instruction::Select:
1596    return X86SelectSelect(I);
1597  case Instruction::Trunc:
1598    return X86SelectTrunc(I);
1599  case Instruction::FPExt:
1600    return X86SelectFPExt(I);
1601  case Instruction::FPTrunc:
1602    return X86SelectFPTrunc(I);
1603  case Instruction::ExtractValue:
1604    return X86SelectExtractValue(I);
1605  case Instruction::IntToPtr: // Deliberate fall-through.
1606  case Instruction::PtrToInt: {
1607    EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
1608    EVT DstVT = TLI.getValueType(I->getType());
1609    if (DstVT.bitsGT(SrcVT))
1610      return X86SelectZExt(I);
1611    if (DstVT.bitsLT(SrcVT))
1612      return X86SelectTrunc(I);
1613    unsigned Reg = getRegForValue(I->getOperand(0));
1614    if (Reg == 0) return false;
1615    UpdateValueMap(I, Reg);
1616    return true;
1617  }
1618  }
1619
1620  return false;
1621}
1622
1623unsigned X86FastISel::TargetMaterializeConstant(Constant *C) {
1624  EVT VT;
1625  if (!isTypeLegal(C->getType(), VT))
1626    return false;
1627
1628  // Get opcode and regclass of the output for the given load instruction.
1629  unsigned Opc = 0;
1630  const TargetRegisterClass *RC = NULL;
1631  switch (VT.getSimpleVT().SimpleTy) {
1632  default: return false;
1633  case MVT::i8:
1634    Opc = X86::MOV8rm;
1635    RC  = X86::GR8RegisterClass;
1636    break;
1637  case MVT::i16:
1638    Opc = X86::MOV16rm;
1639    RC  = X86::GR16RegisterClass;
1640    break;
1641  case MVT::i32:
1642    Opc = X86::MOV32rm;
1643    RC  = X86::GR32RegisterClass;
1644    break;
1645  case MVT::i64:
1646    // Must be in x86-64 mode.
1647    Opc = X86::MOV64rm;
1648    RC  = X86::GR64RegisterClass;
1649    break;
1650  case MVT::f32:
1651    if (Subtarget->hasSSE1()) {
1652      Opc = X86::MOVSSrm;
1653      RC  = X86::FR32RegisterClass;
1654    } else {
1655      Opc = X86::LD_Fp32m;
1656      RC  = X86::RFP32RegisterClass;
1657    }
1658    break;
1659  case MVT::f64:
1660    if (Subtarget->hasSSE2()) {
1661      Opc = X86::MOVSDrm;
1662      RC  = X86::FR64RegisterClass;
1663    } else {
1664      Opc = X86::LD_Fp64m;
1665      RC  = X86::RFP64RegisterClass;
1666    }
1667    break;
1668  case MVT::f80:
1669    // No f80 support yet.
1670    return false;
1671  }
1672
1673  // Materialize addresses with LEA instructions.
1674  if (isa<GlobalValue>(C)) {
1675    X86AddressMode AM;
1676    if (X86SelectAddress(C, AM)) {
1677      if (TLI.getPointerTy() == MVT::i32)
1678        Opc = X86::LEA32r;
1679      else
1680        Opc = X86::LEA64r;
1681      unsigned ResultReg = createResultReg(RC);
1682      addLeaAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM);
1683      return ResultReg;
1684    }
1685    return 0;
1686  }
1687
1688  // MachineConstantPool wants an explicit alignment.
1689  unsigned Align = TD.getPrefTypeAlignment(C->getType());
1690  if (Align == 0) {
1691    // Alignment of vector types.  FIXME!
1692    Align = TD.getTypeAllocSize(C->getType());
1693  }
1694
1695  // x86-32 PIC requires a PIC base register for constant pools.
1696  unsigned PICBase = 0;
1697  unsigned char OpFlag = 0;
1698  if (Subtarget->isPICStyleStubPIC()) { // Not dynamic-no-pic
1699    OpFlag = X86II::MO_PIC_BASE_OFFSET;
1700    PICBase = getInstrInfo()->getGlobalBaseReg(&MF);
1701  } else if (Subtarget->isPICStyleGOT()) {
1702    OpFlag = X86II::MO_GOTOFF;
1703    PICBase = getInstrInfo()->getGlobalBaseReg(&MF);
1704  } else if (Subtarget->isPICStyleRIPRel() &&
1705             TM.getCodeModel() == CodeModel::Small) {
1706    PICBase = X86::RIP;
1707  }
1708
1709  // Create the load from the constant pool.
1710  unsigned MCPOffset = MCP.getConstantPoolIndex(C, Align);
1711  unsigned ResultReg = createResultReg(RC);
1712  addConstantPoolReference(BuildMI(MBB, DL, TII.get(Opc), ResultReg),
1713                           MCPOffset, PICBase, OpFlag);
1714
1715  return ResultReg;
1716}
1717
1718unsigned X86FastISel::TargetMaterializeAlloca(AllocaInst *C) {
1719  // Fail on dynamic allocas. At this point, getRegForValue has already
1720  // checked its CSE maps, so if we're here trying to handle a dynamic
1721  // alloca, we're not going to succeed. X86SelectAddress has a
1722  // check for dynamic allocas, because it's called directly from
1723  // various places, but TargetMaterializeAlloca also needs a check
1724  // in order to avoid recursion between getRegForValue,
1725  // X86SelectAddrss, and TargetMaterializeAlloca.
1726  if (!StaticAllocaMap.count(C))
1727    return 0;
1728
1729  X86AddressMode AM;
1730  if (!X86SelectAddress(C, AM))
1731    return 0;
1732  unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
1733  TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
1734  unsigned ResultReg = createResultReg(RC);
1735  addLeaAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM);
1736  return ResultReg;
1737}
1738
1739namespace llvm {
1740  llvm::FastISel *X86::createFastISel(MachineFunction &mf,
1741                        MachineModuleInfo *mmi,
1742                        DwarfWriter *dw,
1743                        DenseMap<const Value *, unsigned> &vm,
1744                        DenseMap<const BasicBlock *, MachineBasicBlock *> &bm,
1745                        DenseMap<const AllocaInst *, int> &am
1746#ifndef NDEBUG
1747                        , SmallSet<Instruction*, 8> &cil
1748#endif
1749                        ) {
1750    return new X86FastISel(mf, mmi, dw, vm, bm, am
1751#ifndef NDEBUG
1752                           , cil
1753#endif
1754                           );
1755  }
1756}
1757