X86FastISel.cpp revision 251662
190792Sgshapiro//===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===//
2261363Sgshapiro//
390792Sgshapiro//                     The LLVM Compiler Infrastructure
490792Sgshapiro//
590792Sgshapiro// This file is distributed under the University of Illinois Open Source
690792Sgshapiro// License. See LICENSE.TXT for details.
790792Sgshapiro//
890792Sgshapiro//===----------------------------------------------------------------------===//
990792Sgshapiro//
1090792Sgshapiro// This file defines the X86-specific support for the FastISel class. Much
11266692Sgshapiro// of the target-specific code is generated by tablegen in the file
1290792Sgshapiro// X86GenFastISel.inc, which is #included here.
1390792Sgshapiro//
1490792Sgshapiro//===----------------------------------------------------------------------===//
1590792Sgshapiro
1690792Sgshapiro#include "X86.h"
17157001Sgshapiro#include "X86ISelLowering.h"
1890792Sgshapiro#include "X86InstrBuilder.h"
1990792Sgshapiro#include "X86RegisterInfo.h"
20120256Sgshapiro#include "X86Subtarget.h"
2190792Sgshapiro#include "X86TargetMachine.h"
2290792Sgshapiro#include "llvm/CodeGen/Analysis.h"
2390792Sgshapiro#include "llvm/CodeGen/FastISel.h"
2490792Sgshapiro#include "llvm/CodeGen/FunctionLoweringInfo.h"
2590792Sgshapiro#include "llvm/CodeGen/MachineConstantPool.h"
2690792Sgshapiro#include "llvm/CodeGen/MachineFrameInfo.h"
2790792Sgshapiro#include "llvm/CodeGen/MachineRegisterInfo.h"
2890792Sgshapiro#include "llvm/IR/CallingConv.h"
2990792Sgshapiro#include "llvm/IR/DerivedTypes.h"
3090792Sgshapiro#include "llvm/IR/GlobalAlias.h"
3190792Sgshapiro#include "llvm/IR/GlobalVariable.h"
3290792Sgshapiro#include "llvm/IR/Instructions.h"
3390792Sgshapiro#include "llvm/IR/IntrinsicInst.h"
3490792Sgshapiro#include "llvm/IR/Operator.h"
3590792Sgshapiro#include "llvm/Support/CallSite.h"
3690792Sgshapiro#include "llvm/Support/ErrorHandling.h"
3790792Sgshapiro#include "llvm/Support/GetElementPtrTypeIterator.h"
3890792Sgshapiro#include "llvm/Target/TargetOptions.h"
3990792Sgshapirousing namespace llvm;
4090792Sgshapiro
4190792Sgshapironamespace {
4290792Sgshapiro
4390792Sgshapiroclass X86FastISel : public FastISel {
4490792Sgshapiro  /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
4590792Sgshapiro  /// make the right decision when generating code for different targets.
4690792Sgshapiro  const X86Subtarget *Subtarget;
4790792Sgshapiro
4890792Sgshapiro  /// RegInfo - X86 register info.
4990792Sgshapiro  ///
5090792Sgshapiro  const X86RegisterInfo *RegInfo;
5190792Sgshapiro
5290792Sgshapiro  /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
5390792Sgshapiro  /// floating point ops.
5490792Sgshapiro  /// When SSE is available, use it for f32 operations.
5590792Sgshapiro  /// When SSE2 is available, use it for f64 operations.
5690792Sgshapiro  bool X86ScalarSSEf64;
5790792Sgshapiro  bool X86ScalarSSEf32;
5890792Sgshapiro
5990792Sgshapiropublic:
6090792Sgshapiro  explicit X86FastISel(FunctionLoweringInfo &funcInfo,
6190792Sgshapiro                       const TargetLibraryInfo *libInfo)
6290792Sgshapiro    : FastISel(funcInfo, libInfo) {
6390792Sgshapiro    Subtarget = &TM.getSubtarget<X86Subtarget>();
6490792Sgshapiro    X86ScalarSSEf64 = Subtarget->hasSSE2();
6590792Sgshapiro    X86ScalarSSEf32 = Subtarget->hasSSE1();
6690792Sgshapiro    RegInfo = static_cast<const X86RegisterInfo*>(TM.getRegisterInfo());
6790792Sgshapiro  }
6890792Sgshapiro
6990792Sgshapiro  virtual bool TargetSelectInstruction(const Instruction *I);
7090792Sgshapiro
7190792Sgshapiro  /// \brief The specified machine instr operand is a vreg, and that
7290792Sgshapiro  /// vreg is being provided by the specified load instruction.  If possible,
7390792Sgshapiro  /// try to fold the load as an operand to the instruction, returning true if
74120256Sgshapiro  /// possible.
7590792Sgshapiro  virtual bool tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
7690792Sgshapiro                                   const LoadInst *LI);
7790792Sgshapiro
7890792Sgshapiro  virtual bool FastLowerArguments();
7990792Sgshapiro
8090792Sgshapiro#include "X86GenFastISel.inc"
8190792Sgshapiro
8290792Sgshapiroprivate:
8390792Sgshapiro  bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT);
8490792Sgshapiro
8590792Sgshapiro  bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR);
8690792Sgshapiro
8790792Sgshapiro  bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM);
8890792Sgshapiro  bool X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM);
8990792Sgshapiro
9090792Sgshapiro  bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT,
91120256Sgshapiro                         unsigned &ResultReg);
9290792Sgshapiro
9390792Sgshapiro  bool X86SelectAddress(const Value *V, X86AddressMode &AM);
9490792Sgshapiro  bool X86SelectCallAddress(const Value *V, X86AddressMode &AM);
9590792Sgshapiro
9690792Sgshapiro  bool X86SelectLoad(const Instruction *I);
9790792Sgshapiro
9890792Sgshapiro  bool X86SelectStore(const Instruction *I);
9990792Sgshapiro
10090792Sgshapiro  bool X86SelectRet(const Instruction *I);
10190792Sgshapiro
10290792Sgshapiro  bool X86SelectCmp(const Instruction *I);
10390792Sgshapiro
10490792Sgshapiro  bool X86SelectZExt(const Instruction *I);
10590792Sgshapiro
106120256Sgshapiro  bool X86SelectBranch(const Instruction *I);
107120256Sgshapiro
108147078Sgshapiro  bool X86SelectShift(const Instruction *I);
109147078Sgshapiro
110147078Sgshapiro  bool X86SelectDivRem(const Instruction *I);
111147078Sgshapiro
112147078Sgshapiro  bool X86SelectSelect(const Instruction *I);
113147078Sgshapiro
114147078Sgshapiro  bool X86SelectTrunc(const Instruction *I);
115147078Sgshapiro
116147078Sgshapiro  bool X86SelectFPExt(const Instruction *I);
117147078Sgshapiro  bool X86SelectFPTrunc(const Instruction *I);
118147078Sgshapiro
119147078Sgshapiro  bool X86VisitIntrinsicCall(const IntrinsicInst &I);
120147078Sgshapiro  bool X86SelectCall(const Instruction *I);
121147078Sgshapiro
122147078Sgshapiro  bool DoSelectCall(const Instruction *I, const char *MemIntName);
123147078Sgshapiro
124147078Sgshapiro  const X86InstrInfo *getInstrInfo() const {
125147078Sgshapiro    return getTargetMachine()->getInstrInfo();
126147078Sgshapiro  }
127147078Sgshapiro  const X86TargetMachine *getTargetMachine() const {
128147078Sgshapiro    return static_cast<const X86TargetMachine *>(&TM);
129147078Sgshapiro  }
130147078Sgshapiro
131147078Sgshapiro  unsigned TargetMaterializeConstant(const Constant *C);
132147078Sgshapiro
133147078Sgshapiro  unsigned TargetMaterializeAlloca(const AllocaInst *C);
134147078Sgshapiro
135147078Sgshapiro  unsigned TargetMaterializeFloatZero(const ConstantFP *CF);
136147078Sgshapiro
137147078Sgshapiro  /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
138147078Sgshapiro  /// computed in an SSE register, not on the X87 floating point stack.
139147078Sgshapiro  bool isScalarFPTypeInSSEReg(EVT VT) const {
140147078Sgshapiro    return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
141147078Sgshapiro      (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
14290792Sgshapiro  }
143
144  bool isTypeLegal(Type *Ty, MVT &VT, bool AllowI1 = false);
145
146  bool IsMemcpySmall(uint64_t Len);
147
148  bool TryEmitSmallMemcpy(X86AddressMode DestAM,
149                          X86AddressMode SrcAM, uint64_t Len);
150};
151
152} // end anonymous namespace.
153
154bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) {
155  EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true);
156  if (evt == MVT::Other || !evt.isSimple())
157    // Unhandled type. Halt "fast" selection and bail.
158    return false;
159
160  VT = evt.getSimpleVT();
161  // For now, require SSE/SSE2 for performing floating-point operations,
162  // since x87 requires additional work.
163  if (VT == MVT::f64 && !X86ScalarSSEf64)
164    return false;
165  if (VT == MVT::f32 && !X86ScalarSSEf32)
166    return false;
167  // Similarly, no f80 support yet.
168  if (VT == MVT::f80)
169    return false;
170  // We only handle legal types. For example, on x86-32 the instruction
171  // selector contains all of the 64-bit instructions from x86-64,
172  // under the assumption that i64 won't be used if the target doesn't
173  // support it.
174  return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT);
175}
176
177#include "X86GenCallingConv.inc"
178
179/// X86FastEmitLoad - Emit a machine instruction to load a value of type VT.
180/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV.
181/// Return true and the result register by reference if it is possible.
182bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM,
183                                  unsigned &ResultReg) {
184  // Get opcode and regclass of the output for the given load instruction.
185  unsigned Opc = 0;
186  const TargetRegisterClass *RC = NULL;
187  switch (VT.getSimpleVT().SimpleTy) {
188  default: return false;
189  case MVT::i1:
190  case MVT::i8:
191    Opc = X86::MOV8rm;
192    RC  = &X86::GR8RegClass;
193    break;
194  case MVT::i16:
195    Opc = X86::MOV16rm;
196    RC  = &X86::GR16RegClass;
197    break;
198  case MVT::i32:
199    Opc = X86::MOV32rm;
200    RC  = &X86::GR32RegClass;
201    break;
202  case MVT::i64:
203    // Must be in x86-64 mode.
204    Opc = X86::MOV64rm;
205    RC  = &X86::GR64RegClass;
206    break;
207  case MVT::f32:
208    if (X86ScalarSSEf32) {
209      Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
210      RC  = &X86::FR32RegClass;
211    } else {
212      Opc = X86::LD_Fp32m;
213      RC  = &X86::RFP32RegClass;
214    }
215    break;
216  case MVT::f64:
217    if (X86ScalarSSEf64) {
218      Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
219      RC  = &X86::FR64RegClass;
220    } else {
221      Opc = X86::LD_Fp64m;
222      RC  = &X86::RFP64RegClass;
223    }
224    break;
225  case MVT::f80:
226    // No f80 support yet.
227    return false;
228  }
229
230  ResultReg = createResultReg(RC);
231  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
232                         DL, TII.get(Opc), ResultReg), AM);
233  return true;
234}
235
236/// X86FastEmitStore - Emit a machine instruction to store a value Val of
237/// type VT. The address is either pre-computed, consisted of a base ptr, Ptr
238/// and a displacement offset, or a GlobalAddress,
239/// i.e. V. Return true if it is possible.
240bool
241X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) {
242  // Get opcode and regclass of the output for the given store instruction.
243  unsigned Opc = 0;
244  switch (VT.getSimpleVT().SimpleTy) {
245  case MVT::f80: // No f80 support yet.
246  default: return false;
247  case MVT::i1: {
248    // Mask out all but lowest bit.
249    unsigned AndResult = createResultReg(&X86::GR8RegClass);
250    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
251            TII.get(X86::AND8ri), AndResult).addReg(Val).addImm(1);
252    Val = AndResult;
253  }
254  // FALLTHROUGH, handling i1 as i8.
255  case MVT::i8:  Opc = X86::MOV8mr;  break;
256  case MVT::i16: Opc = X86::MOV16mr; break;
257  case MVT::i32: Opc = X86::MOV32mr; break;
258  case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode.
259  case MVT::f32:
260    Opc = X86ScalarSSEf32 ?
261          (Subtarget->hasAVX() ? X86::VMOVSSmr : X86::MOVSSmr) : X86::ST_Fp32m;
262    break;
263  case MVT::f64:
264    Opc = X86ScalarSSEf64 ?
265          (Subtarget->hasAVX() ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m;
266    break;
267  case MVT::v4f32:
268    Opc = X86::MOVAPSmr;
269    break;
270  case MVT::v2f64:
271    Opc = X86::MOVAPDmr;
272    break;
273  case MVT::v4i32:
274  case MVT::v2i64:
275  case MVT::v8i16:
276  case MVT::v16i8:
277    Opc = X86::MOVDQAmr;
278    break;
279  }
280
281  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
282                         DL, TII.get(Opc)), AM).addReg(Val);
283  return true;
284}
285
286bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
287                                   const X86AddressMode &AM) {
288  // Handle 'null' like i32/i64 0.
289  if (isa<ConstantPointerNull>(Val))
290    Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext()));
291
292  // If this is a store of a simple constant, fold the constant into the store.
293  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
294    unsigned Opc = 0;
295    bool Signed = true;
296    switch (VT.getSimpleVT().SimpleTy) {
297    default: break;
298    case MVT::i1:  Signed = false;     // FALLTHROUGH to handle as i8.
299    case MVT::i8:  Opc = X86::MOV8mi;  break;
300    case MVT::i16: Opc = X86::MOV16mi; break;
301    case MVT::i32: Opc = X86::MOV32mi; break;
302    case MVT::i64:
303      // Must be a 32-bit sign extended value.
304      if (isInt<32>(CI->getSExtValue()))
305        Opc = X86::MOV64mi32;
306      break;
307    }
308
309    if (Opc) {
310      addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt,
311                             DL, TII.get(Opc)), AM)
312                             .addImm(Signed ? (uint64_t) CI->getSExtValue() :
313                                              CI->getZExtValue());
314      return true;
315    }
316  }
317
318  unsigned ValReg = getRegForValue(Val);
319  if (ValReg == 0)
320    return false;
321
322  return X86FastEmitStore(VT, ValReg, AM);
323}
324
325/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of
326/// type SrcVT to type DstVT using the specified extension opcode Opc (e.g.
327/// ISD::SIGN_EXTEND).
328bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT,
329                                    unsigned Src, EVT SrcVT,
330                                    unsigned &ResultReg) {
331  unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc,
332                           Src, /*TODO: Kill=*/false);
333  if (RR == 0)
334    return false;
335
336  ResultReg = RR;
337  return true;
338}
339
340/// X86SelectAddress - Attempt to fill in an address from the given value.
341///
342bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) {
343  const User *U = NULL;
344  unsigned Opcode = Instruction::UserOp1;
345  if (const Instruction *I = dyn_cast<Instruction>(V)) {
346    // Don't walk into other basic blocks; it's possible we haven't
347    // visited them yet, so the instructions may not yet be assigned
348    // virtual registers.
349    if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(V)) ||
350        FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) {
351      Opcode = I->getOpcode();
352      U = I;
353    }
354  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
355    Opcode = C->getOpcode();
356    U = C;
357  }
358
359  if (PointerType *Ty = dyn_cast<PointerType>(V->getType()))
360    if (Ty->getAddressSpace() > 255)
361      // Fast instruction selection doesn't support the special
362      // address spaces.
363      return false;
364
365  switch (Opcode) {
366  default: break;
367  case Instruction::BitCast:
368    // Look past bitcasts.
369    return X86SelectAddress(U->getOperand(0), AM);
370
371  case Instruction::IntToPtr:
372    // Look past no-op inttoptrs.
373    if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
374      return X86SelectAddress(U->getOperand(0), AM);
375    break;
376
377  case Instruction::PtrToInt:
378    // Look past no-op ptrtoints.
379    if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
380      return X86SelectAddress(U->getOperand(0), AM);
381    break;
382
383  case Instruction::Alloca: {
384    // Do static allocas.
385    const AllocaInst *A = cast<AllocaInst>(V);
386    DenseMap<const AllocaInst*, int>::iterator SI =
387      FuncInfo.StaticAllocaMap.find(A);
388    if (SI != FuncInfo.StaticAllocaMap.end()) {
389      AM.BaseType = X86AddressMode::FrameIndexBase;
390      AM.Base.FrameIndex = SI->second;
391      return true;
392    }
393    break;
394  }
395
396  case Instruction::Add: {
397    // Adds of constants are common and easy enough.
398    if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) {
399      uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue();
400      // They have to fit in the 32-bit signed displacement field though.
401      if (isInt<32>(Disp)) {
402        AM.Disp = (uint32_t)Disp;
403        return X86SelectAddress(U->getOperand(0), AM);
404      }
405    }
406    break;
407  }
408
409  case Instruction::GetElementPtr: {
410    X86AddressMode SavedAM = AM;
411
412    // Pattern-match simple GEPs.
413    uint64_t Disp = (int32_t)AM.Disp;
414    unsigned IndexReg = AM.IndexReg;
415    unsigned Scale = AM.Scale;
416    gep_type_iterator GTI = gep_type_begin(U);
417    // Iterate through the indices, folding what we can. Constants can be
418    // folded, and one dynamic index can be handled, if the scale is supported.
419    for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
420         i != e; ++i, ++GTI) {
421      const Value *Op = *i;
422      if (StructType *STy = dyn_cast<StructType>(*GTI)) {
423        const StructLayout *SL = TD.getStructLayout(STy);
424        Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
425        continue;
426      }
427
428      // A array/variable index is always of the form i*S where S is the
429      // constant scale size.  See if we can push the scale into immediates.
430      uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType());
431      for (;;) {
432        if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
433          // Constant-offset addressing.
434          Disp += CI->getSExtValue() * S;
435          break;
436        }
437        if (isa<AddOperator>(Op) &&
438            (!isa<Instruction>(Op) ||
439             FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()]
440               == FuncInfo.MBB) &&
441            isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) {
442          // An add (in the same block) with a constant operand. Fold the
443          // constant.
444          ConstantInt *CI =
445            cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1));
446          Disp += CI->getSExtValue() * S;
447          // Iterate on the other operand.
448          Op = cast<AddOperator>(Op)->getOperand(0);
449          continue;
450        }
451        if (IndexReg == 0 &&
452            (!AM.GV || !Subtarget->isPICStyleRIPRel()) &&
453            (S == 1 || S == 2 || S == 4 || S == 8)) {
454          // Scaled-index addressing.
455          Scale = S;
456          IndexReg = getRegForGEPIndex(Op).first;
457          if (IndexReg == 0)
458            return false;
459          break;
460        }
461        // Unsupported.
462        goto unsupported_gep;
463      }
464    }
465    // Check for displacement overflow.
466    if (!isInt<32>(Disp))
467      break;
468    // Ok, the GEP indices were covered by constant-offset and scaled-index
469    // addressing. Update the address state and move on to examining the base.
470    AM.IndexReg = IndexReg;
471    AM.Scale = Scale;
472    AM.Disp = (uint32_t)Disp;
473    if (X86SelectAddress(U->getOperand(0), AM))
474      return true;
475
476    // If we couldn't merge the gep value into this addr mode, revert back to
477    // our address and just match the value instead of completely failing.
478    AM = SavedAM;
479    break;
480  unsupported_gep:
481    // Ok, the GEP indices weren't all covered.
482    break;
483  }
484  }
485
486  // Handle constant address.
487  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
488    // Can't handle alternate code models yet.
489    if (TM.getCodeModel() != CodeModel::Small)
490      return false;
491
492    // Can't handle TLS yet.
493    if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
494      if (GVar->isThreadLocal())
495        return false;
496
497    // Can't handle TLS yet, part 2 (this is slightly crazy, but this is how
498    // it works...).
499    if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
500      if (const GlobalVariable *GVar =
501            dyn_cast_or_null<GlobalVariable>(GA->resolveAliasedGlobal(false)))
502        if (GVar->isThreadLocal())
503          return false;
504
505    // RIP-relative addresses can't have additional register operands, so if
506    // we've already folded stuff into the addressing mode, just force the
507    // global value into its own register, which we can use as the basereg.
508    if (!Subtarget->isPICStyleRIPRel() ||
509        (AM.Base.Reg == 0 && AM.IndexReg == 0)) {
510      // Okay, we've committed to selecting this global. Set up the address.
511      AM.GV = GV;
512
513      // Allow the subtarget to classify the global.
514      unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM);
515
516      // If this reference is relative to the pic base, set it now.
517      if (isGlobalRelativeToPICBase(GVFlags)) {
518        // FIXME: How do we know Base.Reg is free??
519        AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
520      }
521
522      // Unless the ABI requires an extra load, return a direct reference to
523      // the global.
524      if (!isGlobalStubReference(GVFlags)) {
525        if (Subtarget->isPICStyleRIPRel()) {
526          // Use rip-relative addressing if we can.  Above we verified that the
527          // base and index registers are unused.
528          assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
529          AM.Base.Reg = X86::RIP;
530        }
531        AM.GVOpFlags = GVFlags;
532        return true;
533      }
534
535      // Ok, we need to do a load from a stub.  If we've already loaded from
536      // this stub, reuse the loaded pointer, otherwise emit the load now.
537      DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V);
538      unsigned LoadReg;
539      if (I != LocalValueMap.end() && I->second != 0) {
540        LoadReg = I->second;
541      } else {
542        // Issue load from stub.
543        unsigned Opc = 0;
544        const TargetRegisterClass *RC = NULL;
545        X86AddressMode StubAM;
546        StubAM.Base.Reg = AM.Base.Reg;
547        StubAM.GV = GV;
548        StubAM.GVOpFlags = GVFlags;
549
550        // Prepare for inserting code in the local-value area.
551        SavePoint SaveInsertPt = enterLocalValueArea();
552
553        if (TLI.getPointerTy() == MVT::i64) {
554          Opc = X86::MOV64rm;
555          RC  = &X86::GR64RegClass;
556
557          if (Subtarget->isPICStyleRIPRel())
558            StubAM.Base.Reg = X86::RIP;
559        } else {
560          Opc = X86::MOV32rm;
561          RC  = &X86::GR32RegClass;
562        }
563
564        LoadReg = createResultReg(RC);
565        MachineInstrBuilder LoadMI =
566          BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg);
567        addFullAddress(LoadMI, StubAM);
568
569        // Ok, back to normal mode.
570        leaveLocalValueArea(SaveInsertPt);
571
572        // Prevent loading GV stub multiple times in same MBB.
573        LocalValueMap[V] = LoadReg;
574      }
575
576      // Now construct the final address. Note that the Disp, Scale,
577      // and Index values may already be set here.
578      AM.Base.Reg = LoadReg;
579      AM.GV = 0;
580      return true;
581    }
582  }
583
584  // If all else fails, try to materialize the value in a register.
585  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
586    if (AM.Base.Reg == 0) {
587      AM.Base.Reg = getRegForValue(V);
588      return AM.Base.Reg != 0;
589    }
590    if (AM.IndexReg == 0) {
591      assert(AM.Scale == 1 && "Scale with no index!");
592      AM.IndexReg = getRegForValue(V);
593      return AM.IndexReg != 0;
594    }
595  }
596
597  return false;
598}
599
600/// X86SelectCallAddress - Attempt to fill in an address from the given value.
601///
602bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) {
603  const User *U = NULL;
604  unsigned Opcode = Instruction::UserOp1;
605  if (const Instruction *I = dyn_cast<Instruction>(V)) {
606    Opcode = I->getOpcode();
607    U = I;
608  } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) {
609    Opcode = C->getOpcode();
610    U = C;
611  }
612
613  switch (Opcode) {
614  default: break;
615  case Instruction::BitCast:
616    // Look past bitcasts.
617    return X86SelectCallAddress(U->getOperand(0), AM);
618
619  case Instruction::IntToPtr:
620    // Look past no-op inttoptrs.
621    if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy())
622      return X86SelectCallAddress(U->getOperand(0), AM);
623    break;
624
625  case Instruction::PtrToInt:
626    // Look past no-op ptrtoints.
627    if (TLI.getValueType(U->getType()) == TLI.getPointerTy())
628      return X86SelectCallAddress(U->getOperand(0), AM);
629    break;
630  }
631
632  // Handle constant address.
633  if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
634    // Can't handle alternate code models yet.
635    if (TM.getCodeModel() != CodeModel::Small)
636      return false;
637
638    // RIP-relative addresses can't have additional register operands.
639    if (Subtarget->isPICStyleRIPRel() &&
640        (AM.Base.Reg != 0 || AM.IndexReg != 0))
641      return false;
642
643    // Can't handle DLLImport.
644    if (GV->hasDLLImportLinkage())
645      return false;
646
647    // Can't handle TLS.
648    if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
649      if (GVar->isThreadLocal())
650        return false;
651
652    // Okay, we've committed to selecting this global. Set up the basic address.
653    AM.GV = GV;
654
655    // No ABI requires an extra load for anything other than DLLImport, which
656    // we rejected above. Return a direct reference to the global.
657    if (Subtarget->isPICStyleRIPRel()) {
658      // Use rip-relative addressing if we can.  Above we verified that the
659      // base and index registers are unused.
660      assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
661      AM.Base.Reg = X86::RIP;
662    } else if (Subtarget->isPICStyleStubPIC()) {
663      AM.GVOpFlags = X86II::MO_PIC_BASE_OFFSET;
664    } else if (Subtarget->isPICStyleGOT()) {
665      AM.GVOpFlags = X86II::MO_GOTOFF;
666    }
667
668    return true;
669  }
670
671  // If all else fails, try to materialize the value in a register.
672  if (!AM.GV || !Subtarget->isPICStyleRIPRel()) {
673    if (AM.Base.Reg == 0) {
674      AM.Base.Reg = getRegForValue(V);
675      return AM.Base.Reg != 0;
676    }
677    if (AM.IndexReg == 0) {
678      assert(AM.Scale == 1 && "Scale with no index!");
679      AM.IndexReg = getRegForValue(V);
680      return AM.IndexReg != 0;
681    }
682  }
683
684  return false;
685}
686
687
688/// X86SelectStore - Select and emit code to implement store instructions.
689bool X86FastISel::X86SelectStore(const Instruction *I) {
690  // Atomic stores need special handling.
691  const StoreInst *S = cast<StoreInst>(I);
692
693  if (S->isAtomic())
694    return false;
695
696  MVT VT;
697  if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true))
698    return false;
699
700  X86AddressMode AM;
701  if (!X86SelectAddress(I->getOperand(1), AM))
702    return false;
703
704  return X86FastEmitStore(VT, I->getOperand(0), AM);
705}
706
707/// X86SelectRet - Select and emit code to implement ret instructions.
708bool X86FastISel::X86SelectRet(const Instruction *I) {
709  const ReturnInst *Ret = cast<ReturnInst>(I);
710  const Function &F = *I->getParent()->getParent();
711  const X86MachineFunctionInfo *X86MFInfo =
712      FuncInfo.MF->getInfo<X86MachineFunctionInfo>();
713
714  if (!FuncInfo.CanLowerReturn)
715    return false;
716
717  CallingConv::ID CC = F.getCallingConv();
718  if (CC != CallingConv::C &&
719      CC != CallingConv::Fast &&
720      CC != CallingConv::X86_FastCall)
721    return false;
722
723  if (Subtarget->isTargetWin64())
724    return false;
725
726  // Don't handle popping bytes on return for now.
727  if (X86MFInfo->getBytesToPopOnReturn() != 0)
728    return false;
729
730  // fastcc with -tailcallopt is intended to provide a guaranteed
731  // tail call optimization. Fastisel doesn't know how to do that.
732  if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
733    return false;
734
735  // Let SDISel handle vararg functions.
736  if (F.isVarArg())
737    return false;
738
739  // Build a list of return value registers.
740  SmallVector<unsigned, 4> RetRegs;
741
742  if (Ret->getNumOperands() > 0) {
743    SmallVector<ISD::OutputArg, 4> Outs;
744    GetReturnInfo(F.getReturnType(), F.getAttributes(), Outs, TLI);
745
746    // Analyze operands of the call, assigning locations to each operand.
747    SmallVector<CCValAssign, 16> ValLocs;
748    CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,
749                   I->getContext());
750    CCInfo.AnalyzeReturn(Outs, RetCC_X86);
751
752    const Value *RV = Ret->getOperand(0);
753    unsigned Reg = getRegForValue(RV);
754    if (Reg == 0)
755      return false;
756
757    // Only handle a single return value for now.
758    if (ValLocs.size() != 1)
759      return false;
760
761    CCValAssign &VA = ValLocs[0];
762
763    // Don't bother handling odd stuff for now.
764    if (VA.getLocInfo() != CCValAssign::Full)
765      return false;
766    // Only handle register returns for now.
767    if (!VA.isRegLoc())
768      return false;
769
770    // The calling-convention tables for x87 returns don't tell
771    // the whole story.
772    if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1)
773      return false;
774
775    unsigned SrcReg = Reg + VA.getValNo();
776    EVT SrcVT = TLI.getValueType(RV->getType());
777    EVT DstVT = VA.getValVT();
778    // Special handling for extended integers.
779    if (SrcVT != DstVT) {
780      if (SrcVT != MVT::i1 && SrcVT != MVT::i8 && SrcVT != MVT::i16)
781        return false;
782
783      if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt())
784        return false;
785
786      assert(DstVT == MVT::i32 && "X86 should always ext to i32");
787
788      if (SrcVT == MVT::i1) {
789        if (Outs[0].Flags.isSExt())
790          return false;
791        SrcReg = FastEmitZExtFromI1(MVT::i8, SrcReg, /*TODO: Kill=*/false);
792        SrcVT = MVT::i8;
793      }
794      unsigned Op = Outs[0].Flags.isZExt() ? ISD::ZERO_EXTEND :
795                                             ISD::SIGN_EXTEND;
796      SrcReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Op,
797                          SrcReg, /*TODO: Kill=*/false);
798    }
799
800    // Make the copy.
801    unsigned DstReg = VA.getLocReg();
802    const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg);
803    // Avoid a cross-class copy. This is very unlikely.
804    if (!SrcRC->contains(DstReg))
805      return false;
806    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
807            DstReg).addReg(SrcReg);
808
809    // Add register to return instruction.
810    RetRegs.push_back(VA.getLocReg());
811  }
812
813  // The x86-64 ABI for returning structs by value requires that we copy
814  // the sret argument into %rax for the return. We saved the argument into
815  // a virtual register in the entry block, so now we copy the value out
816  // and into %rax. We also do the same with %eax for Win32.
817  if (F.hasStructRetAttr() &&
818      (Subtarget->is64Bit() || Subtarget->isTargetWindows())) {
819    unsigned Reg = X86MFInfo->getSRetReturnReg();
820    assert(Reg &&
821           "SRetReturnReg should have been set in LowerFormalArguments()!");
822    unsigned RetReg = Subtarget->is64Bit() ? X86::RAX : X86::EAX;
823    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
824            RetReg).addReg(Reg);
825    RetRegs.push_back(RetReg);
826  }
827
828  // Now emit the RET.
829  MachineInstrBuilder MIB =
830    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::RET));
831  for (unsigned i = 0, e = RetRegs.size(); i != e; ++i)
832    MIB.addReg(RetRegs[i], RegState::Implicit);
833  return true;
834}
835
836/// X86SelectLoad - Select and emit code to implement load instructions.
837///
838bool X86FastISel::X86SelectLoad(const Instruction *I)  {
839  // Atomic loads need special handling.
840  if (cast<LoadInst>(I)->isAtomic())
841    return false;
842
843  MVT VT;
844  if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true))
845    return false;
846
847  X86AddressMode AM;
848  if (!X86SelectAddress(I->getOperand(0), AM))
849    return false;
850
851  unsigned ResultReg = 0;
852  if (X86FastEmitLoad(VT, AM, ResultReg)) {
853    UpdateValueMap(I, ResultReg);
854    return true;
855  }
856  return false;
857}
858
859static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) {
860  bool HasAVX = Subtarget->hasAVX();
861  bool X86ScalarSSEf32 = Subtarget->hasSSE1();
862  bool X86ScalarSSEf64 = Subtarget->hasSSE2();
863
864  switch (VT.getSimpleVT().SimpleTy) {
865  default:       return 0;
866  case MVT::i8:  return X86::CMP8rr;
867  case MVT::i16: return X86::CMP16rr;
868  case MVT::i32: return X86::CMP32rr;
869  case MVT::i64: return X86::CMP64rr;
870  case MVT::f32:
871    return X86ScalarSSEf32 ? (HasAVX ? X86::VUCOMISSrr : X86::UCOMISSrr) : 0;
872  case MVT::f64:
873    return X86ScalarSSEf64 ? (HasAVX ? X86::VUCOMISDrr : X86::UCOMISDrr) : 0;
874  }
875}
876
877/// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS
878/// of the comparison, return an opcode that works for the compare (e.g.
879/// CMP32ri) otherwise return 0.
880static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) {
881  switch (VT.getSimpleVT().SimpleTy) {
882  // Otherwise, we can't fold the immediate into this comparison.
883  default: return 0;
884  case MVT::i8: return X86::CMP8ri;
885  case MVT::i16: return X86::CMP16ri;
886  case MVT::i32: return X86::CMP32ri;
887  case MVT::i64:
888    // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext
889    // field.
890    if ((int)RHSC->getSExtValue() == RHSC->getSExtValue())
891      return X86::CMP64ri32;
892    return 0;
893  }
894}
895
896bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1,
897                                     EVT VT) {
898  unsigned Op0Reg = getRegForValue(Op0);
899  if (Op0Reg == 0) return false;
900
901  // Handle 'null' like i32/i64 0.
902  if (isa<ConstantPointerNull>(Op1))
903    Op1 = Constant::getNullValue(TD.getIntPtrType(Op0->getContext()));
904
905  // We have two options: compare with register or immediate.  If the RHS of
906  // the compare is an immediate that we can fold into this compare, use
907  // CMPri, otherwise use CMPrr.
908  if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
909    if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) {
910      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CompareImmOpc))
911        .addReg(Op0Reg)
912        .addImm(Op1C->getSExtValue());
913      return true;
914    }
915  }
916
917  unsigned CompareOpc = X86ChooseCmpOpcode(VT, Subtarget);
918  if (CompareOpc == 0) return false;
919
920  unsigned Op1Reg = getRegForValue(Op1);
921  if (Op1Reg == 0) return false;
922  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CompareOpc))
923    .addReg(Op0Reg)
924    .addReg(Op1Reg);
925
926  return true;
927}
928
929bool X86FastISel::X86SelectCmp(const Instruction *I) {
930  const CmpInst *CI = cast<CmpInst>(I);
931
932  MVT VT;
933  if (!isTypeLegal(I->getOperand(0)->getType(), VT))
934    return false;
935
936  unsigned ResultReg = createResultReg(&X86::GR8RegClass);
937  unsigned SetCCOpc;
938  bool SwapArgs;  // false -> compare Op0, Op1.  true -> compare Op1, Op0.
939  switch (CI->getPredicate()) {
940  case CmpInst::FCMP_OEQ: {
941    if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
942      return false;
943
944    unsigned EReg = createResultReg(&X86::GR8RegClass);
945    unsigned NPReg = createResultReg(&X86::GR8RegClass);
946    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETEr), EReg);
947    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
948            TII.get(X86::SETNPr), NPReg);
949    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
950            TII.get(X86::AND8rr), ResultReg).addReg(NPReg).addReg(EReg);
951    UpdateValueMap(I, ResultReg);
952    return true;
953  }
954  case CmpInst::FCMP_UNE: {
955    if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT))
956      return false;
957
958    unsigned NEReg = createResultReg(&X86::GR8RegClass);
959    unsigned PReg = createResultReg(&X86::GR8RegClass);
960    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETNEr), NEReg);
961    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETPr), PReg);
962    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::OR8rr),ResultReg)
963      .addReg(PReg).addReg(NEReg);
964    UpdateValueMap(I, ResultReg);
965    return true;
966  }
967  case CmpInst::FCMP_OGT: SwapArgs = false; SetCCOpc = X86::SETAr;  break;
968  case CmpInst::FCMP_OGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break;
969  case CmpInst::FCMP_OLT: SwapArgs = true;  SetCCOpc = X86::SETAr;  break;
970  case CmpInst::FCMP_OLE: SwapArgs = true;  SetCCOpc = X86::SETAEr; break;
971  case CmpInst::FCMP_ONE: SwapArgs = false; SetCCOpc = X86::SETNEr; break;
972  case CmpInst::FCMP_ORD: SwapArgs = false; SetCCOpc = X86::SETNPr; break;
973  case CmpInst::FCMP_UNO: SwapArgs = false; SetCCOpc = X86::SETPr;  break;
974  case CmpInst::FCMP_UEQ: SwapArgs = false; SetCCOpc = X86::SETEr;  break;
975  case CmpInst::FCMP_UGT: SwapArgs = true;  SetCCOpc = X86::SETBr;  break;
976  case CmpInst::FCMP_UGE: SwapArgs = true;  SetCCOpc = X86::SETBEr; break;
977  case CmpInst::FCMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr;  break;
978  case CmpInst::FCMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break;
979
980  case CmpInst::ICMP_EQ:  SwapArgs = false; SetCCOpc = X86::SETEr;  break;
981  case CmpInst::ICMP_NE:  SwapArgs = false; SetCCOpc = X86::SETNEr; break;
982  case CmpInst::ICMP_UGT: SwapArgs = false; SetCCOpc = X86::SETAr;  break;
983  case CmpInst::ICMP_UGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break;
984  case CmpInst::ICMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr;  break;
985  case CmpInst::ICMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break;
986  case CmpInst::ICMP_SGT: SwapArgs = false; SetCCOpc = X86::SETGr;  break;
987  case CmpInst::ICMP_SGE: SwapArgs = false; SetCCOpc = X86::SETGEr; break;
988  case CmpInst::ICMP_SLT: SwapArgs = false; SetCCOpc = X86::SETLr;  break;
989  case CmpInst::ICMP_SLE: SwapArgs = false; SetCCOpc = X86::SETLEr; break;
990  default:
991    return false;
992  }
993
994  const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
995  if (SwapArgs)
996    std::swap(Op0, Op1);
997
998  // Emit a compare of Op0/Op1.
999  if (!X86FastEmitCompare(Op0, Op1, VT))
1000    return false;
1001
1002  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(SetCCOpc), ResultReg);
1003  UpdateValueMap(I, ResultReg);
1004  return true;
1005}
1006
1007bool X86FastISel::X86SelectZExt(const Instruction *I) {
1008  // Handle zero-extension from i1 to i8, which is common.
1009  if (!I->getOperand(0)->getType()->isIntegerTy(1))
1010    return false;
1011
1012  EVT DstVT = TLI.getValueType(I->getType());
1013  if (!TLI.isTypeLegal(DstVT))
1014    return false;
1015
1016  unsigned ResultReg = getRegForValue(I->getOperand(0));
1017  if (ResultReg == 0)
1018    return false;
1019
1020  // Set the high bits to zero.
1021  ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
1022  if (ResultReg == 0)
1023    return false;
1024
1025  if (DstVT != MVT::i8) {
1026    ResultReg = FastEmit_r(MVT::i8, DstVT.getSimpleVT(), ISD::ZERO_EXTEND,
1027                           ResultReg, /*Kill=*/true);
1028    if (ResultReg == 0)
1029      return false;
1030  }
1031
1032  UpdateValueMap(I, ResultReg);
1033  return true;
1034}
1035
1036
1037bool X86FastISel::X86SelectBranch(const Instruction *I) {
1038  // Unconditional branches are selected by tablegen-generated code.
1039  // Handle a conditional branch.
1040  const BranchInst *BI = cast<BranchInst>(I);
1041  MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
1042  MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
1043
1044  // Fold the common case of a conditional branch with a comparison
1045  // in the same block (values defined on other blocks may not have
1046  // initialized registers).
1047  if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
1048    if (CI->hasOneUse() && CI->getParent() == I->getParent()) {
1049      EVT VT = TLI.getValueType(CI->getOperand(0)->getType());
1050
1051      // Try to take advantage of fallthrough opportunities.
1052      CmpInst::Predicate Predicate = CI->getPredicate();
1053      if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1054        std::swap(TrueMBB, FalseMBB);
1055        Predicate = CmpInst::getInversePredicate(Predicate);
1056      }
1057
1058      bool SwapArgs;  // false -> compare Op0, Op1.  true -> compare Op1, Op0.
1059      unsigned BranchOpc; // Opcode to jump on, e.g. "X86::JA"
1060
1061      switch (Predicate) {
1062      case CmpInst::FCMP_OEQ:
1063        std::swap(TrueMBB, FalseMBB);
1064        Predicate = CmpInst::FCMP_UNE;
1065        // FALL THROUGH
1066      case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
1067      case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA_4;  break;
1068      case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE_4; break;
1069      case CmpInst::FCMP_OLT: SwapArgs = true;  BranchOpc = X86::JA_4;  break;
1070      case CmpInst::FCMP_OLE: SwapArgs = true;  BranchOpc = X86::JAE_4; break;
1071      case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE_4; break;
1072      case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP_4; break;
1073      case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP_4;  break;
1074      case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE_4;  break;
1075      case CmpInst::FCMP_UGT: SwapArgs = true;  BranchOpc = X86::JB_4;  break;
1076      case CmpInst::FCMP_UGE: SwapArgs = true;  BranchOpc = X86::JBE_4; break;
1077      case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4;  break;
1078      case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break;
1079
1080      case CmpInst::ICMP_EQ:  SwapArgs = false; BranchOpc = X86::JE_4;  break;
1081      case CmpInst::ICMP_NE:  SwapArgs = false; BranchOpc = X86::JNE_4; break;
1082      case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA_4;  break;
1083      case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE_4; break;
1084      case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4;  break;
1085      case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break;
1086      case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG_4;  break;
1087      case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE_4; break;
1088      case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL_4;  break;
1089      case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE_4; break;
1090      default:
1091        return false;
1092      }
1093
1094      const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
1095      if (SwapArgs)
1096        std::swap(Op0, Op1);
1097
1098      // Emit a compare of the LHS and RHS, setting the flags.
1099      if (!X86FastEmitCompare(Op0, Op1, VT))
1100        return false;
1101
1102      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BranchOpc))
1103        .addMBB(TrueMBB);
1104
1105      if (Predicate == CmpInst::FCMP_UNE) {
1106        // X86 requires a second branch to handle UNE (and OEQ,
1107        // which is mapped to UNE above).
1108        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::JP_4))
1109          .addMBB(TrueMBB);
1110      }
1111
1112      FastEmitBranch(FalseMBB, DL);
1113      FuncInfo.MBB->addSuccessor(TrueMBB);
1114      return true;
1115    }
1116  } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) {
1117    // Handle things like "%cond = trunc i32 %X to i1 / br i1 %cond", which
1118    // typically happen for _Bool and C++ bools.
1119    MVT SourceVT;
1120    if (TI->hasOneUse() && TI->getParent() == I->getParent() &&
1121        isTypeLegal(TI->getOperand(0)->getType(), SourceVT)) {
1122      unsigned TestOpc = 0;
1123      switch (SourceVT.SimpleTy) {
1124      default: break;
1125      case MVT::i8:  TestOpc = X86::TEST8ri; break;
1126      case MVT::i16: TestOpc = X86::TEST16ri; break;
1127      case MVT::i32: TestOpc = X86::TEST32ri; break;
1128      case MVT::i64: TestOpc = X86::TEST64ri32; break;
1129      }
1130      if (TestOpc) {
1131        unsigned OpReg = getRegForValue(TI->getOperand(0));
1132        if (OpReg == 0) return false;
1133        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TestOpc))
1134          .addReg(OpReg).addImm(1);
1135
1136        unsigned JmpOpc = X86::JNE_4;
1137        if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) {
1138          std::swap(TrueMBB, FalseMBB);
1139          JmpOpc = X86::JE_4;
1140        }
1141
1142        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(JmpOpc))
1143          .addMBB(TrueMBB);
1144        FastEmitBranch(FalseMBB, DL);
1145        FuncInfo.MBB->addSuccessor(TrueMBB);
1146        return true;
1147      }
1148    }
1149  }
1150
1151  // Otherwise do a clumsy setcc and re-test it.
1152  // Note that i1 essentially gets ANY_EXTEND'ed to i8 where it isn't used
1153  // in an explicit cast, so make sure to handle that correctly.
1154  unsigned OpReg = getRegForValue(BI->getCondition());
1155  if (OpReg == 0) return false;
1156
1157  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8ri))
1158    .addReg(OpReg).addImm(1);
1159  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::JNE_4))
1160    .addMBB(TrueMBB);
1161  FastEmitBranch(FalseMBB, DL);
1162  FuncInfo.MBB->addSuccessor(TrueMBB);
1163  return true;
1164}
1165
1166bool X86FastISel::X86SelectShift(const Instruction *I) {
1167  unsigned CReg = 0, OpReg = 0;
1168  const TargetRegisterClass *RC = NULL;
1169  if (I->getType()->isIntegerTy(8)) {
1170    CReg = X86::CL;
1171    RC = &X86::GR8RegClass;
1172    switch (I->getOpcode()) {
1173    case Instruction::LShr: OpReg = X86::SHR8rCL; break;
1174    case Instruction::AShr: OpReg = X86::SAR8rCL; break;
1175    case Instruction::Shl:  OpReg = X86::SHL8rCL; break;
1176    default: return false;
1177    }
1178  } else if (I->getType()->isIntegerTy(16)) {
1179    CReg = X86::CX;
1180    RC = &X86::GR16RegClass;
1181    switch (I->getOpcode()) {
1182    case Instruction::LShr: OpReg = X86::SHR16rCL; break;
1183    case Instruction::AShr: OpReg = X86::SAR16rCL; break;
1184    case Instruction::Shl:  OpReg = X86::SHL16rCL; break;
1185    default: return false;
1186    }
1187  } else if (I->getType()->isIntegerTy(32)) {
1188    CReg = X86::ECX;
1189    RC = &X86::GR32RegClass;
1190    switch (I->getOpcode()) {
1191    case Instruction::LShr: OpReg = X86::SHR32rCL; break;
1192    case Instruction::AShr: OpReg = X86::SAR32rCL; break;
1193    case Instruction::Shl:  OpReg = X86::SHL32rCL; break;
1194    default: return false;
1195    }
1196  } else if (I->getType()->isIntegerTy(64)) {
1197    CReg = X86::RCX;
1198    RC = &X86::GR64RegClass;
1199    switch (I->getOpcode()) {
1200    case Instruction::LShr: OpReg = X86::SHR64rCL; break;
1201    case Instruction::AShr: OpReg = X86::SAR64rCL; break;
1202    case Instruction::Shl:  OpReg = X86::SHL64rCL; break;
1203    default: return false;
1204    }
1205  } else {
1206    return false;
1207  }
1208
1209  MVT VT;
1210  if (!isTypeLegal(I->getType(), VT))
1211    return false;
1212
1213  unsigned Op0Reg = getRegForValue(I->getOperand(0));
1214  if (Op0Reg == 0) return false;
1215
1216  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1217  if (Op1Reg == 0) return false;
1218  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
1219          CReg).addReg(Op1Reg);
1220
1221  // The shift instruction uses X86::CL. If we defined a super-register
1222  // of X86::CL, emit a subreg KILL to precisely describe what we're doing here.
1223  if (CReg != X86::CL)
1224    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1225            TII.get(TargetOpcode::KILL), X86::CL)
1226      .addReg(CReg, RegState::Kill);
1227
1228  unsigned ResultReg = createResultReg(RC);
1229  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpReg), ResultReg)
1230    .addReg(Op0Reg);
1231  UpdateValueMap(I, ResultReg);
1232  return true;
1233}
1234
1235bool X86FastISel::X86SelectDivRem(const Instruction *I) {
1236  const static unsigned NumTypes = 4; // i8, i16, i32, i64
1237  const static unsigned NumOps   = 4; // SDiv, SRem, UDiv, URem
1238  const static bool S = true;  // IsSigned
1239  const static bool U = false; // !IsSigned
1240  const static unsigned Copy = TargetOpcode::COPY;
1241  // For the X86 DIV/IDIV instruction, in most cases the dividend
1242  // (numerator) must be in a specific register pair highreg:lowreg,
1243  // producing the quotient in lowreg and the remainder in highreg.
1244  // For most data types, to set up the instruction, the dividend is
1245  // copied into lowreg, and lowreg is sign-extended or zero-extended
1246  // into highreg.  The exception is i8, where the dividend is defined
1247  // as a single register rather than a register pair, and we
1248  // therefore directly sign-extend or zero-extend the dividend into
1249  // lowreg, instead of copying, and ignore the highreg.
1250  const static struct DivRemEntry {
1251    // The following portion depends only on the data type.
1252    const TargetRegisterClass *RC;
1253    unsigned LowInReg;  // low part of the register pair
1254    unsigned HighInReg; // high part of the register pair
1255    // The following portion depends on both the data type and the operation.
1256    struct DivRemResult {
1257    unsigned OpDivRem;        // The specific DIV/IDIV opcode to use.
1258    unsigned OpSignExtend;    // Opcode for sign-extending lowreg into
1259                              // highreg, or copying a zero into highreg.
1260    unsigned OpCopy;          // Opcode for copying dividend into lowreg, or
1261                              // zero/sign-extending into lowreg for i8.
1262    unsigned DivRemResultReg; // Register containing the desired result.
1263    bool IsOpSigned;          // Whether to use signed or unsigned form.
1264    } ResultTable[NumOps];
1265  } OpTable[NumTypes] = {
1266    { &X86::GR8RegClass,  X86::AX,  0, {
1267        { X86::IDIV8r,  0,            X86::MOVSX16rr8, X86::AL,  S }, // SDiv
1268        { X86::IDIV8r,  0,            X86::MOVSX16rr8, X86::AH,  S }, // SRem
1269        { X86::DIV8r,   0,            X86::MOVZX16rr8, X86::AL,  U }, // UDiv
1270        { X86::DIV8r,   0,            X86::MOVZX16rr8, X86::AH,  U }, // URem
1271      }
1272    }, // i8
1273    { &X86::GR16RegClass, X86::AX,  X86::DX, {
1274        { X86::IDIV16r, X86::CWD,     Copy,            X86::AX,  S }, // SDiv
1275        { X86::IDIV16r, X86::CWD,     Copy,            X86::DX,  S }, // SRem
1276        { X86::DIV16r,  X86::MOV16r0, Copy,            X86::AX,  U }, // UDiv
1277        { X86::DIV16r,  X86::MOV16r0, Copy,            X86::DX,  U }, // URem
1278      }
1279    }, // i16
1280    { &X86::GR32RegClass, X86::EAX, X86::EDX, {
1281        { X86::IDIV32r, X86::CDQ,     Copy,            X86::EAX, S }, // SDiv
1282        { X86::IDIV32r, X86::CDQ,     Copy,            X86::EDX, S }, // SRem
1283        { X86::DIV32r,  X86::MOV32r0, Copy,            X86::EAX, U }, // UDiv
1284        { X86::DIV32r,  X86::MOV32r0, Copy,            X86::EDX, U }, // URem
1285      }
1286    }, // i32
1287    { &X86::GR64RegClass, X86::RAX, X86::RDX, {
1288        { X86::IDIV64r, X86::CQO,     Copy,            X86::RAX, S }, // SDiv
1289        { X86::IDIV64r, X86::CQO,     Copy,            X86::RDX, S }, // SRem
1290        { X86::DIV64r,  X86::MOV64r0, Copy,            X86::RAX, U }, // UDiv
1291        { X86::DIV64r,  X86::MOV64r0, Copy,            X86::RDX, U }, // URem
1292      }
1293    }, // i64
1294  };
1295
1296  MVT VT;
1297  if (!isTypeLegal(I->getType(), VT))
1298    return false;
1299
1300  unsigned TypeIndex, OpIndex;
1301  switch (VT.SimpleTy) {
1302  default: return false;
1303  case MVT::i8:  TypeIndex = 0; break;
1304  case MVT::i16: TypeIndex = 1; break;
1305  case MVT::i32: TypeIndex = 2; break;
1306  case MVT::i64: TypeIndex = 3;
1307    if (!Subtarget->is64Bit())
1308      return false;
1309    break;
1310  }
1311
1312  switch (I->getOpcode()) {
1313  default: llvm_unreachable("Unexpected div/rem opcode");
1314  case Instruction::SDiv: OpIndex = 0; break;
1315  case Instruction::SRem: OpIndex = 1; break;
1316  case Instruction::UDiv: OpIndex = 2; break;
1317  case Instruction::URem: OpIndex = 3; break;
1318  }
1319
1320  const DivRemEntry &TypeEntry = OpTable[TypeIndex];
1321  const DivRemEntry::DivRemResult &OpEntry = TypeEntry.ResultTable[OpIndex];
1322  unsigned Op0Reg = getRegForValue(I->getOperand(0));
1323  if (Op0Reg == 0)
1324    return false;
1325  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1326  if (Op1Reg == 0)
1327    return false;
1328
1329  // Move op0 into low-order input register.
1330  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1331          TII.get(OpEntry.OpCopy), TypeEntry.LowInReg).addReg(Op0Reg);
1332  // Zero-extend or sign-extend into high-order input register.
1333  if (OpEntry.OpSignExtend) {
1334    if (OpEntry.IsOpSigned)
1335      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1336              TII.get(OpEntry.OpSignExtend));
1337    else
1338      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1339              TII.get(OpEntry.OpSignExtend), TypeEntry.HighInReg);
1340  }
1341  // Generate the DIV/IDIV instruction.
1342  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1343          TII.get(OpEntry.OpDivRem)).addReg(Op1Reg);
1344  // Copy output register into result register.
1345  unsigned ResultReg = createResultReg(TypeEntry.RC);
1346  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1347          TII.get(Copy), ResultReg).addReg(OpEntry.DivRemResultReg);
1348  UpdateValueMap(I, ResultReg);
1349
1350  return true;
1351}
1352
1353bool X86FastISel::X86SelectSelect(const Instruction *I) {
1354  MVT VT;
1355  if (!isTypeLegal(I->getType(), VT))
1356    return false;
1357
1358  // We only use cmov here, if we don't have a cmov instruction bail.
1359  if (!Subtarget->hasCMov()) return false;
1360
1361  unsigned Opc = 0;
1362  const TargetRegisterClass *RC = NULL;
1363  if (VT == MVT::i16) {
1364    Opc = X86::CMOVE16rr;
1365    RC = &X86::GR16RegClass;
1366  } else if (VT == MVT::i32) {
1367    Opc = X86::CMOVE32rr;
1368    RC = &X86::GR32RegClass;
1369  } else if (VT == MVT::i64) {
1370    Opc = X86::CMOVE64rr;
1371    RC = &X86::GR64RegClass;
1372  } else {
1373    return false;
1374  }
1375
1376  unsigned Op0Reg = getRegForValue(I->getOperand(0));
1377  if (Op0Reg == 0) return false;
1378  unsigned Op1Reg = getRegForValue(I->getOperand(1));
1379  if (Op1Reg == 0) return false;
1380  unsigned Op2Reg = getRegForValue(I->getOperand(2));
1381  if (Op2Reg == 0) return false;
1382
1383  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8rr))
1384    .addReg(Op0Reg).addReg(Op0Reg);
1385  unsigned ResultReg = createResultReg(RC);
1386  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg)
1387    .addReg(Op1Reg).addReg(Op2Reg);
1388  UpdateValueMap(I, ResultReg);
1389  return true;
1390}
1391
1392bool X86FastISel::X86SelectFPExt(const Instruction *I) {
1393  // fpext from float to double.
1394  if (X86ScalarSSEf64 &&
1395      I->getType()->isDoubleTy()) {
1396    const Value *V = I->getOperand(0);
1397    if (V->getType()->isFloatTy()) {
1398      unsigned OpReg = getRegForValue(V);
1399      if (OpReg == 0) return false;
1400      unsigned ResultReg = createResultReg(&X86::FR64RegClass);
1401      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1402              TII.get(X86::CVTSS2SDrr), ResultReg)
1403        .addReg(OpReg);
1404      UpdateValueMap(I, ResultReg);
1405      return true;
1406    }
1407  }
1408
1409  return false;
1410}
1411
1412bool X86FastISel::X86SelectFPTrunc(const Instruction *I) {
1413  if (X86ScalarSSEf64) {
1414    if (I->getType()->isFloatTy()) {
1415      const Value *V = I->getOperand(0);
1416      if (V->getType()->isDoubleTy()) {
1417        unsigned OpReg = getRegForValue(V);
1418        if (OpReg == 0) return false;
1419        unsigned ResultReg = createResultReg(&X86::FR32RegClass);
1420        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
1421                TII.get(X86::CVTSD2SSrr), ResultReg)
1422          .addReg(OpReg);
1423        UpdateValueMap(I, ResultReg);
1424        return true;
1425      }
1426    }
1427  }
1428
1429  return false;
1430}
1431
1432bool X86FastISel::X86SelectTrunc(const Instruction *I) {
1433  EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
1434  EVT DstVT = TLI.getValueType(I->getType());
1435
1436  // This code only handles truncation to byte.
1437  if (DstVT != MVT::i8 && DstVT != MVT::i1)
1438    return false;
1439  if (!TLI.isTypeLegal(SrcVT))
1440    return false;
1441
1442  unsigned InputReg = getRegForValue(I->getOperand(0));
1443  if (!InputReg)
1444    // Unhandled operand.  Halt "fast" selection and bail.
1445    return false;
1446
1447  if (SrcVT == MVT::i8) {
1448    // Truncate from i8 to i1; no code needed.
1449    UpdateValueMap(I, InputReg);
1450    return true;
1451  }
1452
1453  if (!Subtarget->is64Bit()) {
1454    // If we're on x86-32; we can't extract an i8 from a general register.
1455    // First issue a copy to GR16_ABCD or GR32_ABCD.
1456    const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16) ?
1457      (const TargetRegisterClass*)&X86::GR16_ABCDRegClass :
1458      (const TargetRegisterClass*)&X86::GR32_ABCDRegClass;
1459    unsigned CopyReg = createResultReg(CopyRC);
1460    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
1461            CopyReg).addReg(InputReg);
1462    InputReg = CopyReg;
1463  }
1464
1465  // Issue an extract_subreg.
1466  unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8,
1467                                                  InputReg, /*Kill=*/true,
1468                                                  X86::sub_8bit);
1469  if (!ResultReg)
1470    return false;
1471
1472  UpdateValueMap(I, ResultReg);
1473  return true;
1474}
1475
1476bool X86FastISel::IsMemcpySmall(uint64_t Len) {
1477  return Len <= (Subtarget->is64Bit() ? 32 : 16);
1478}
1479
1480bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM,
1481                                     X86AddressMode SrcAM, uint64_t Len) {
1482
1483  // Make sure we don't bloat code by inlining very large memcpy's.
1484  if (!IsMemcpySmall(Len))
1485    return false;
1486
1487  bool i64Legal = Subtarget->is64Bit();
1488
1489  // We don't care about alignment here since we just emit integer accesses.
1490  while (Len) {
1491    MVT VT;
1492    if (Len >= 8 && i64Legal)
1493      VT = MVT::i64;
1494    else if (Len >= 4)
1495      VT = MVT::i32;
1496    else if (Len >= 2)
1497      VT = MVT::i16;
1498    else {
1499      VT = MVT::i8;
1500    }
1501
1502    unsigned Reg;
1503    bool RV = X86FastEmitLoad(VT, SrcAM, Reg);
1504    RV &= X86FastEmitStore(VT, Reg, DestAM);
1505    assert(RV && "Failed to emit load or store??");
1506
1507    unsigned Size = VT.getSizeInBits()/8;
1508    Len -= Size;
1509    DestAM.Disp += Size;
1510    SrcAM.Disp += Size;
1511  }
1512
1513  return true;
1514}
1515
1516bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) {
1517  // FIXME: Handle more intrinsics.
1518  switch (I.getIntrinsicID()) {
1519  default: return false;
1520  case Intrinsic::memcpy: {
1521    const MemCpyInst &MCI = cast<MemCpyInst>(I);
1522    // Don't handle volatile or variable length memcpys.
1523    if (MCI.isVolatile())
1524      return false;
1525
1526    if (isa<ConstantInt>(MCI.getLength())) {
1527      // Small memcpy's are common enough that we want to do them
1528      // without a call if possible.
1529      uint64_t Len = cast<ConstantInt>(MCI.getLength())->getZExtValue();
1530      if (IsMemcpySmall(Len)) {
1531        X86AddressMode DestAM, SrcAM;
1532        if (!X86SelectAddress(MCI.getRawDest(), DestAM) ||
1533            !X86SelectAddress(MCI.getRawSource(), SrcAM))
1534          return false;
1535        TryEmitSmallMemcpy(DestAM, SrcAM, Len);
1536        return true;
1537      }
1538    }
1539
1540    unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
1541    if (!MCI.getLength()->getType()->isIntegerTy(SizeWidth))
1542      return false;
1543
1544    if (MCI.getSourceAddressSpace() > 255 || MCI.getDestAddressSpace() > 255)
1545      return false;
1546
1547    return DoSelectCall(&I, "memcpy");
1548  }
1549  case Intrinsic::memset: {
1550    const MemSetInst &MSI = cast<MemSetInst>(I);
1551
1552    if (MSI.isVolatile())
1553      return false;
1554
1555    unsigned SizeWidth = Subtarget->is64Bit() ? 64 : 32;
1556    if (!MSI.getLength()->getType()->isIntegerTy(SizeWidth))
1557      return false;
1558
1559    if (MSI.getDestAddressSpace() > 255)
1560      return false;
1561
1562    return DoSelectCall(&I, "memset");
1563  }
1564  case Intrinsic::stackprotector: {
1565    // Emit code to store the stack guard onto the stack.
1566    EVT PtrTy = TLI.getPointerTy();
1567
1568    const Value *Op1 = I.getArgOperand(0); // The guard's value.
1569    const AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
1570
1571    // Grab the frame index.
1572    X86AddressMode AM;
1573    if (!X86SelectAddress(Slot, AM)) return false;
1574    if (!X86FastEmitStore(PtrTy, Op1, AM)) return false;
1575    return true;
1576  }
1577  case Intrinsic::dbg_declare: {
1578    const DbgDeclareInst *DI = cast<DbgDeclareInst>(&I);
1579    X86AddressMode AM;
1580    assert(DI->getAddress() && "Null address should be checked earlier!");
1581    if (!X86SelectAddress(DI->getAddress(), AM))
1582      return false;
1583    const MCInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
1584    // FIXME may need to add RegState::Debug to any registers produced,
1585    // although ESP/EBP should be the only ones at the moment.
1586    addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II), AM).
1587      addImm(0).addMetadata(DI->getVariable());
1588    return true;
1589  }
1590  case Intrinsic::trap: {
1591    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TRAP));
1592    return true;
1593  }
1594  case Intrinsic::sadd_with_overflow:
1595  case Intrinsic::uadd_with_overflow: {
1596    // FIXME: Should fold immediates.
1597
1598    // Replace "add with overflow" intrinsics with an "add" instruction followed
1599    // by a seto/setc instruction.
1600    const Function *Callee = I.getCalledFunction();
1601    Type *RetTy =
1602      cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0));
1603
1604    MVT VT;
1605    if (!isTypeLegal(RetTy, VT))
1606      return false;
1607
1608    const Value *Op1 = I.getArgOperand(0);
1609    const Value *Op2 = I.getArgOperand(1);
1610    unsigned Reg1 = getRegForValue(Op1);
1611    unsigned Reg2 = getRegForValue(Op2);
1612
1613    if (Reg1 == 0 || Reg2 == 0)
1614      // FIXME: Handle values *not* in registers.
1615      return false;
1616
1617    unsigned OpC = 0;
1618    if (VT == MVT::i32)
1619      OpC = X86::ADD32rr;
1620    else if (VT == MVT::i64)
1621      OpC = X86::ADD64rr;
1622    else
1623      return false;
1624
1625    // The call to CreateRegs builds two sequential registers, to store the
1626    // both the returned values.
1627    unsigned ResultReg = FuncInfo.CreateRegs(I.getType());
1628    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpC), ResultReg)
1629      .addReg(Reg1).addReg(Reg2);
1630
1631    unsigned Opc = X86::SETBr;
1632    if (I.getIntrinsicID() == Intrinsic::sadd_with_overflow)
1633      Opc = X86::SETOr;
1634    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg+1);
1635
1636    UpdateValueMap(&I, ResultReg, 2);
1637    return true;
1638  }
1639  }
1640}
1641
1642bool X86FastISel::FastLowerArguments() {
1643  if (!FuncInfo.CanLowerReturn)
1644    return false;
1645
1646  if (Subtarget->isTargetWin64())
1647    return false;
1648
1649  const Function *F = FuncInfo.Fn;
1650  if (F->isVarArg())
1651    return false;
1652
1653  CallingConv::ID CC = F->getCallingConv();
1654  if (CC != CallingConv::C)
1655    return false;
1656
1657  if (!Subtarget->is64Bit())
1658    return false;
1659
1660  // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments.
1661  unsigned Idx = 1;
1662  for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
1663       I != E; ++I, ++Idx) {
1664    if (Idx > 6)
1665      return false;
1666
1667    if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) ||
1668        F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
1669        F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
1670        F->getAttributes().hasAttribute(Idx, Attribute::Nest))
1671      return false;
1672
1673    Type *ArgTy = I->getType();
1674    if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
1675      return false;
1676
1677    EVT ArgVT = TLI.getValueType(ArgTy);
1678    if (!ArgVT.isSimple()) return false;
1679    switch (ArgVT.getSimpleVT().SimpleTy) {
1680    case MVT::i32:
1681    case MVT::i64:
1682      break;
1683    default:
1684      return false;
1685    }
1686  }
1687
1688  static const uint16_t GPR32ArgRegs[] = {
1689    X86::EDI, X86::ESI, X86::EDX, X86::ECX, X86::R8D, X86::R9D
1690  };
1691  static const uint16_t GPR64ArgRegs[] = {
1692    X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9
1693  };
1694
1695  Idx = 0;
1696  const TargetRegisterClass *RC32 = TLI.getRegClassFor(MVT::i32);
1697  const TargetRegisterClass *RC64 = TLI.getRegClassFor(MVT::i64);
1698  for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
1699       I != E; ++I, ++Idx) {
1700    if (I->use_empty())
1701      continue;
1702    bool is32Bit = TLI.getValueType(I->getType()) == MVT::i32;
1703    const TargetRegisterClass *RC = is32Bit ? RC32 : RC64;
1704    unsigned SrcReg = is32Bit ? GPR32ArgRegs[Idx] : GPR64ArgRegs[Idx];
1705    unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
1706    // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
1707    // Without this, EmitLiveInCopies may eliminate the livein if its only
1708    // use is a bitcast (which isn't turned into an instruction).
1709    unsigned ResultReg = createResultReg(RC);
1710    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
1711            ResultReg).addReg(DstReg, getKillRegState(true));
1712    UpdateValueMap(I, ResultReg);
1713  }
1714  return true;
1715}
1716
1717bool X86FastISel::X86SelectCall(const Instruction *I) {
1718  const CallInst *CI = cast<CallInst>(I);
1719  const Value *Callee = CI->getCalledValue();
1720
1721  // Can't handle inline asm yet.
1722  if (isa<InlineAsm>(Callee))
1723    return false;
1724
1725  // Handle intrinsic calls.
1726  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI))
1727    return X86VisitIntrinsicCall(*II);
1728
1729  // Allow SelectionDAG isel to handle tail calls.
1730  if (cast<CallInst>(I)->isTailCall())
1731    return false;
1732
1733  return DoSelectCall(I, 0);
1734}
1735
1736static unsigned computeBytesPoppedByCallee(const X86Subtarget &Subtarget,
1737                                           const ImmutableCallSite &CS) {
1738  if (Subtarget.is64Bit())
1739    return 0;
1740  if (Subtarget.isTargetWindows())
1741    return 0;
1742  CallingConv::ID CC = CS.getCallingConv();
1743  if (CC == CallingConv::Fast || CC == CallingConv::GHC)
1744    return 0;
1745  if (!CS.paramHasAttr(1, Attribute::StructRet))
1746    return 0;
1747  if (CS.paramHasAttr(1, Attribute::InReg))
1748    return 0;
1749  return 4;
1750}
1751
1752// Select either a call, or an llvm.memcpy/memmove/memset intrinsic
1753bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) {
1754  const CallInst *CI = cast<CallInst>(I);
1755  const Value *Callee = CI->getCalledValue();
1756
1757  // Handle only C and fastcc calling conventions for now.
1758  ImmutableCallSite CS(CI);
1759  CallingConv::ID CC = CS.getCallingConv();
1760  if (CC != CallingConv::C && CC != CallingConv::Fast &&
1761      CC != CallingConv::X86_FastCall)
1762    return false;
1763
1764  // fastcc with -tailcallopt is intended to provide a guaranteed
1765  // tail call optimization. Fastisel doesn't know how to do that.
1766  if (CC == CallingConv::Fast && TM.Options.GuaranteedTailCallOpt)
1767    return false;
1768
1769  PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
1770  FunctionType *FTy = cast<FunctionType>(PT->getElementType());
1771  bool isVarArg = FTy->isVarArg();
1772
1773  // Don't know how to handle Win64 varargs yet.  Nothing special needed for
1774  // x86-32.  Special handling for x86-64 is implemented.
1775  if (isVarArg && Subtarget->isTargetWin64())
1776    return false;
1777
1778  // Fast-isel doesn't know about callee-pop yet.
1779  if (X86::isCalleePop(CC, Subtarget->is64Bit(), isVarArg,
1780                       TM.Options.GuaranteedTailCallOpt))
1781    return false;
1782
1783  // Check whether the function can return without sret-demotion.
1784  SmallVector<ISD::OutputArg, 4> Outs;
1785  GetReturnInfo(I->getType(), CS.getAttributes(), Outs, TLI);
1786  bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
1787                                           *FuncInfo.MF, FTy->isVarArg(),
1788                                           Outs, FTy->getContext());
1789  if (!CanLowerReturn)
1790    return false;
1791
1792  // Materialize callee address in a register. FIXME: GV address can be
1793  // handled with a CALLpcrel32 instead.
1794  X86AddressMode CalleeAM;
1795  if (!X86SelectCallAddress(Callee, CalleeAM))
1796    return false;
1797  unsigned CalleeOp = 0;
1798  const GlobalValue *GV = 0;
1799  if (CalleeAM.GV != 0) {
1800    GV = CalleeAM.GV;
1801  } else if (CalleeAM.Base.Reg != 0) {
1802    CalleeOp = CalleeAM.Base.Reg;
1803  } else
1804    return false;
1805
1806  // Deal with call operands first.
1807  SmallVector<const Value *, 8> ArgVals;
1808  SmallVector<unsigned, 8> Args;
1809  SmallVector<MVT, 8> ArgVTs;
1810  SmallVector<ISD::ArgFlagsTy, 8> ArgFlags;
1811  unsigned arg_size = CS.arg_size();
1812  Args.reserve(arg_size);
1813  ArgVals.reserve(arg_size);
1814  ArgVTs.reserve(arg_size);
1815  ArgFlags.reserve(arg_size);
1816  for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
1817       i != e; ++i) {
1818    // If we're lowering a mem intrinsic instead of a regular call, skip the
1819    // last two arguments, which should not passed to the underlying functions.
1820    if (MemIntName && e-i <= 2)
1821      break;
1822    Value *ArgVal = *i;
1823    ISD::ArgFlagsTy Flags;
1824    unsigned AttrInd = i - CS.arg_begin() + 1;
1825    if (CS.paramHasAttr(AttrInd, Attribute::SExt))
1826      Flags.setSExt();
1827    if (CS.paramHasAttr(AttrInd, Attribute::ZExt))
1828      Flags.setZExt();
1829
1830    if (CS.paramHasAttr(AttrInd, Attribute::ByVal)) {
1831      PointerType *Ty = cast<PointerType>(ArgVal->getType());
1832      Type *ElementTy = Ty->getElementType();
1833      unsigned FrameSize = TD.getTypeAllocSize(ElementTy);
1834      unsigned FrameAlign = CS.getParamAlignment(AttrInd);
1835      if (!FrameAlign)
1836        FrameAlign = TLI.getByValTypeAlignment(ElementTy);
1837      Flags.setByVal();
1838      Flags.setByValSize(FrameSize);
1839      Flags.setByValAlign(FrameAlign);
1840      if (!IsMemcpySmall(FrameSize))
1841        return false;
1842    }
1843
1844    if (CS.paramHasAttr(AttrInd, Attribute::InReg))
1845      Flags.setInReg();
1846    if (CS.paramHasAttr(AttrInd, Attribute::Nest))
1847      Flags.setNest();
1848
1849    // If this is an i1/i8/i16 argument, promote to i32 to avoid an extra
1850    // instruction.  This is safe because it is common to all fastisel supported
1851    // calling conventions on x86.
1852    if (ConstantInt *CI = dyn_cast<ConstantInt>(ArgVal)) {
1853      if (CI->getBitWidth() == 1 || CI->getBitWidth() == 8 ||
1854          CI->getBitWidth() == 16) {
1855        if (Flags.isSExt())
1856          ArgVal = ConstantExpr::getSExt(CI,Type::getInt32Ty(CI->getContext()));
1857        else
1858          ArgVal = ConstantExpr::getZExt(CI,Type::getInt32Ty(CI->getContext()));
1859      }
1860    }
1861
1862    unsigned ArgReg;
1863
1864    // Passing bools around ends up doing a trunc to i1 and passing it.
1865    // Codegen this as an argument + "and 1".
1866    if (ArgVal->getType()->isIntegerTy(1) && isa<TruncInst>(ArgVal) &&
1867        cast<TruncInst>(ArgVal)->getParent() == I->getParent() &&
1868        ArgVal->hasOneUse()) {
1869      ArgVal = cast<TruncInst>(ArgVal)->getOperand(0);
1870      ArgReg = getRegForValue(ArgVal);
1871      if (ArgReg == 0) return false;
1872
1873      MVT ArgVT;
1874      if (!isTypeLegal(ArgVal->getType(), ArgVT)) return false;
1875
1876      ArgReg = FastEmit_ri(ArgVT, ArgVT, ISD::AND, ArgReg,
1877                           ArgVal->hasOneUse(), 1);
1878    } else {
1879      ArgReg = getRegForValue(ArgVal);
1880    }
1881
1882    if (ArgReg == 0) return false;
1883
1884    Type *ArgTy = ArgVal->getType();
1885    MVT ArgVT;
1886    if (!isTypeLegal(ArgTy, ArgVT))
1887      return false;
1888    if (ArgVT == MVT::x86mmx)
1889      return false;
1890    unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy);
1891    Flags.setOrigAlign(OriginalAlignment);
1892
1893    Args.push_back(ArgReg);
1894    ArgVals.push_back(ArgVal);
1895    ArgVTs.push_back(ArgVT);
1896    ArgFlags.push_back(Flags);
1897  }
1898
1899  // Analyze operands of the call, assigning locations to each operand.
1900  SmallVector<CCValAssign, 16> ArgLocs;
1901  CCState CCInfo(CC, isVarArg, *FuncInfo.MF, TM, ArgLocs,
1902                 I->getParent()->getContext());
1903
1904  // Allocate shadow area for Win64
1905  if (Subtarget->isTargetWin64())
1906    CCInfo.AllocateStack(32, 8);
1907
1908  CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_X86);
1909
1910  // Get a count of how many bytes are to be pushed on the stack.
1911  unsigned NumBytes = CCInfo.getNextStackOffset();
1912
1913  // Issue CALLSEQ_START
1914  unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
1915  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackDown))
1916    .addImm(NumBytes);
1917
1918  // Process argument: walk the register/memloc assignments, inserting
1919  // copies / loads.
1920  SmallVector<unsigned, 4> RegArgs;
1921  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
1922    CCValAssign &VA = ArgLocs[i];
1923    unsigned Arg = Args[VA.getValNo()];
1924    EVT ArgVT = ArgVTs[VA.getValNo()];
1925
1926    // Promote the value if needed.
1927    switch (VA.getLocInfo()) {
1928    case CCValAssign::Full: break;
1929    case CCValAssign::SExt: {
1930      assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
1931             "Unexpected extend");
1932      bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
1933                                       Arg, ArgVT, Arg);
1934      assert(Emitted && "Failed to emit a sext!"); (void)Emitted;
1935      ArgVT = VA.getLocVT();
1936      break;
1937    }
1938    case CCValAssign::ZExt: {
1939      assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
1940             "Unexpected extend");
1941      bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
1942                                       Arg, ArgVT, Arg);
1943      assert(Emitted && "Failed to emit a zext!"); (void)Emitted;
1944      ArgVT = VA.getLocVT();
1945      break;
1946    }
1947    case CCValAssign::AExt: {
1948      assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
1949             "Unexpected extend");
1950      bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(),
1951                                       Arg, ArgVT, Arg);
1952      if (!Emitted)
1953        Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(),
1954                                    Arg, ArgVT, Arg);
1955      if (!Emitted)
1956        Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(),
1957                                    Arg, ArgVT, Arg);
1958
1959      assert(Emitted && "Failed to emit a aext!"); (void)Emitted;
1960      ArgVT = VA.getLocVT();
1961      break;
1962    }
1963    case CCValAssign::BCvt: {
1964      unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT(),
1965                               ISD::BITCAST, Arg, /*TODO: Kill=*/false);
1966      assert(BC != 0 && "Failed to emit a bitcast!");
1967      Arg = BC;
1968      ArgVT = VA.getLocVT();
1969      break;
1970    }
1971    case CCValAssign::VExt:
1972      // VExt has not been implemented, so this should be impossible to reach
1973      // for now.  However, fallback to Selection DAG isel once implemented.
1974      return false;
1975    case CCValAssign::Indirect:
1976      // FIXME: Indirect doesn't need extending, but fast-isel doesn't fully
1977      // support this.
1978      return false;
1979    }
1980
1981    if (VA.isRegLoc()) {
1982      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
1983              VA.getLocReg()).addReg(Arg);
1984      RegArgs.push_back(VA.getLocReg());
1985    } else {
1986      unsigned LocMemOffset = VA.getLocMemOffset();
1987      X86AddressMode AM;
1988      AM.Base.Reg = RegInfo->getStackRegister();
1989      AM.Disp = LocMemOffset;
1990      const Value *ArgVal = ArgVals[VA.getValNo()];
1991      ISD::ArgFlagsTy Flags = ArgFlags[VA.getValNo()];
1992
1993      if (Flags.isByVal()) {
1994        X86AddressMode SrcAM;
1995        SrcAM.Base.Reg = Arg;
1996        bool Res = TryEmitSmallMemcpy(AM, SrcAM, Flags.getByValSize());
1997        assert(Res && "memcpy length already checked!"); (void)Res;
1998      } else if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) {
1999        // If this is a really simple value, emit this with the Value* version
2000        // of X86FastEmitStore.  If it isn't simple, we don't want to do this,
2001        // as it can cause us to reevaluate the argument.
2002        if (!X86FastEmitStore(ArgVT, ArgVal, AM))
2003          return false;
2004      } else {
2005        if (!X86FastEmitStore(ArgVT, Arg, AM))
2006          return false;
2007      }
2008    }
2009  }
2010
2011  // ELF / PIC requires GOT in the EBX register before function calls via PLT
2012  // GOT pointer.
2013  if (Subtarget->isPICStyleGOT()) {
2014    unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
2015    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
2016            X86::EBX).addReg(Base);
2017  }
2018
2019  if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64()) {
2020    // Count the number of XMM registers allocated.
2021    static const uint16_t XMMArgRegs[] = {
2022      X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
2023      X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
2024    };
2025    unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs, 8);
2026    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::MOV8ri),
2027            X86::AL).addImm(NumXMMRegs);
2028  }
2029
2030  // Issue the call.
2031  MachineInstrBuilder MIB;
2032  if (CalleeOp) {
2033    // Register-indirect call.
2034    unsigned CallOpc;
2035    if (Subtarget->is64Bit())
2036      CallOpc = X86::CALL64r;
2037    else
2038      CallOpc = X86::CALL32r;
2039    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))
2040      .addReg(CalleeOp);
2041
2042  } else {
2043    // Direct call.
2044    assert(GV && "Not a direct call");
2045    unsigned CallOpc;
2046    if (Subtarget->is64Bit())
2047      CallOpc = X86::CALL64pcrel32;
2048    else
2049      CallOpc = X86::CALLpcrel32;
2050
2051    // See if we need any target-specific flags on the GV operand.
2052    unsigned char OpFlags = 0;
2053
2054    // On ELF targets, in both X86-64 and X86-32 mode, direct calls to
2055    // external symbols most go through the PLT in PIC mode.  If the symbol
2056    // has hidden or protected visibility, or if it is static or local, then
2057    // we don't need to use the PLT - we can directly call it.
2058    if (Subtarget->isTargetELF() &&
2059        TM.getRelocationModel() == Reloc::PIC_ &&
2060        GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) {
2061      OpFlags = X86II::MO_PLT;
2062    } else if (Subtarget->isPICStyleStubAny() &&
2063               (GV->isDeclaration() || GV->isWeakForLinker()) &&
2064               (!Subtarget->getTargetTriple().isMacOSX() ||
2065                Subtarget->getTargetTriple().isMacOSXVersionLT(10, 5))) {
2066      // PC-relative references to external symbols should go through $stub,
2067      // unless we're building with the leopard linker or later, which
2068      // automatically synthesizes these stubs.
2069      OpFlags = X86II::MO_DARWIN_STUB;
2070    }
2071
2072
2073    MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc));
2074    if (MemIntName)
2075      MIB.addExternalSymbol(MemIntName, OpFlags);
2076    else
2077      MIB.addGlobalAddress(GV, 0, OpFlags);
2078  }
2079
2080  // Add a register mask with the call-preserved registers.
2081  // Proper defs for return values will be added by setPhysRegsDeadExcept().
2082  MIB.addRegMask(TRI.getCallPreservedMask(CS.getCallingConv()));
2083
2084  // Add an implicit use GOT pointer in EBX.
2085  if (Subtarget->isPICStyleGOT())
2086    MIB.addReg(X86::EBX, RegState::Implicit);
2087
2088  if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64())
2089    MIB.addReg(X86::AL, RegState::Implicit);
2090
2091  // Add implicit physical register uses to the call.
2092  for (unsigned i = 0, e = RegArgs.size(); i != e; ++i)
2093    MIB.addReg(RegArgs[i], RegState::Implicit);
2094
2095  // Issue CALLSEQ_END
2096  unsigned AdjStackUp = TII.getCallFrameDestroyOpcode();
2097  const unsigned NumBytesCallee = computeBytesPoppedByCallee(*Subtarget, CS);
2098  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp))
2099    .addImm(NumBytes).addImm(NumBytesCallee);
2100
2101  // Build info for return calling conv lowering code.
2102  // FIXME: This is practically a copy-paste from TargetLowering::LowerCallTo.
2103  SmallVector<ISD::InputArg, 32> Ins;
2104  SmallVector<EVT, 4> RetTys;
2105  ComputeValueVTs(TLI, I->getType(), RetTys);
2106  for (unsigned i = 0, e = RetTys.size(); i != e; ++i) {
2107    EVT VT = RetTys[i];
2108    MVT RegisterVT = TLI.getRegisterType(I->getParent()->getContext(), VT);
2109    unsigned NumRegs = TLI.getNumRegisters(I->getParent()->getContext(), VT);
2110    for (unsigned j = 0; j != NumRegs; ++j) {
2111      ISD::InputArg MyFlags;
2112      MyFlags.VT = RegisterVT;
2113      MyFlags.Used = !CS.getInstruction()->use_empty();
2114      if (CS.paramHasAttr(0, Attribute::SExt))
2115        MyFlags.Flags.setSExt();
2116      if (CS.paramHasAttr(0, Attribute::ZExt))
2117        MyFlags.Flags.setZExt();
2118      if (CS.paramHasAttr(0, Attribute::InReg))
2119        MyFlags.Flags.setInReg();
2120      Ins.push_back(MyFlags);
2121    }
2122  }
2123
2124  // Now handle call return values.
2125  SmallVector<unsigned, 4> UsedRegs;
2126  SmallVector<CCValAssign, 16> RVLocs;
2127  CCState CCRetInfo(CC, false, *FuncInfo.MF, TM, RVLocs,
2128                    I->getParent()->getContext());
2129  unsigned ResultReg = FuncInfo.CreateRegs(I->getType());
2130  CCRetInfo.AnalyzeCallResult(Ins, RetCC_X86);
2131  for (unsigned i = 0; i != RVLocs.size(); ++i) {
2132    EVT CopyVT = RVLocs[i].getValVT();
2133    unsigned CopyReg = ResultReg + i;
2134
2135    // If this is a call to a function that returns an fp value on the x87 fp
2136    // stack, but where we prefer to use the value in xmm registers, copy it
2137    // out as F80 and use a truncate to move it from fp stack reg to xmm reg.
2138    if ((RVLocs[i].getLocReg() == X86::ST0 ||
2139         RVLocs[i].getLocReg() == X86::ST1)) {
2140      if (isScalarFPTypeInSSEReg(RVLocs[i].getValVT())) {
2141        CopyVT = MVT::f80;
2142        CopyReg = createResultReg(&X86::RFP80RegClass);
2143      }
2144      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::FpPOP_RETVAL),
2145              CopyReg);
2146    } else {
2147      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY),
2148              CopyReg).addReg(RVLocs[i].getLocReg());
2149      UsedRegs.push_back(RVLocs[i].getLocReg());
2150    }
2151
2152    if (CopyVT != RVLocs[i].getValVT()) {
2153      // Round the F80 the right size, which also moves to the appropriate xmm
2154      // register. This is accomplished by storing the F80 value in memory and
2155      // then loading it back. Ewww...
2156      EVT ResVT = RVLocs[i].getValVT();
2157      unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
2158      unsigned MemSize = ResVT.getSizeInBits()/8;
2159      int FI = MFI.CreateStackObject(MemSize, MemSize, false);
2160      addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
2161                                TII.get(Opc)), FI)
2162        .addReg(CopyReg);
2163      Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm;
2164      addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
2165                                TII.get(Opc), ResultReg + i), FI);
2166    }
2167  }
2168
2169  if (RVLocs.size())
2170    UpdateValueMap(I, ResultReg, RVLocs.size());
2171
2172  // Set all unused physreg defs as dead.
2173  static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI);
2174
2175  return true;
2176}
2177
2178
2179bool
2180X86FastISel::TargetSelectInstruction(const Instruction *I)  {
2181  switch (I->getOpcode()) {
2182  default: break;
2183  case Instruction::Load:
2184    return X86SelectLoad(I);
2185  case Instruction::Store:
2186    return X86SelectStore(I);
2187  case Instruction::Ret:
2188    return X86SelectRet(I);
2189  case Instruction::ICmp:
2190  case Instruction::FCmp:
2191    return X86SelectCmp(I);
2192  case Instruction::ZExt:
2193    return X86SelectZExt(I);
2194  case Instruction::Br:
2195    return X86SelectBranch(I);
2196  case Instruction::Call:
2197    return X86SelectCall(I);
2198  case Instruction::LShr:
2199  case Instruction::AShr:
2200  case Instruction::Shl:
2201    return X86SelectShift(I);
2202  case Instruction::SDiv:
2203  case Instruction::UDiv:
2204  case Instruction::SRem:
2205  case Instruction::URem:
2206    return X86SelectDivRem(I);
2207  case Instruction::Select:
2208    return X86SelectSelect(I);
2209  case Instruction::Trunc:
2210    return X86SelectTrunc(I);
2211  case Instruction::FPExt:
2212    return X86SelectFPExt(I);
2213  case Instruction::FPTrunc:
2214    return X86SelectFPTrunc(I);
2215  case Instruction::IntToPtr: // Deliberate fall-through.
2216  case Instruction::PtrToInt: {
2217    EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
2218    EVT DstVT = TLI.getValueType(I->getType());
2219    if (DstVT.bitsGT(SrcVT))
2220      return X86SelectZExt(I);
2221    if (DstVT.bitsLT(SrcVT))
2222      return X86SelectTrunc(I);
2223    unsigned Reg = getRegForValue(I->getOperand(0));
2224    if (Reg == 0) return false;
2225    UpdateValueMap(I, Reg);
2226    return true;
2227  }
2228  }
2229
2230  return false;
2231}
2232
2233unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) {
2234  MVT VT;
2235  if (!isTypeLegal(C->getType(), VT))
2236    return 0;
2237
2238  // Can't handle alternate code models yet.
2239  if (TM.getCodeModel() != CodeModel::Small)
2240    return 0;
2241
2242  // Get opcode and regclass of the output for the given load instruction.
2243  unsigned Opc = 0;
2244  const TargetRegisterClass *RC = NULL;
2245  switch (VT.SimpleTy) {
2246  default: return 0;
2247  case MVT::i8:
2248    Opc = X86::MOV8rm;
2249    RC  = &X86::GR8RegClass;
2250    break;
2251  case MVT::i16:
2252    Opc = X86::MOV16rm;
2253    RC  = &X86::GR16RegClass;
2254    break;
2255  case MVT::i32:
2256    Opc = X86::MOV32rm;
2257    RC  = &X86::GR32RegClass;
2258    break;
2259  case MVT::i64:
2260    // Must be in x86-64 mode.
2261    Opc = X86::MOV64rm;
2262    RC  = &X86::GR64RegClass;
2263    break;
2264  case MVT::f32:
2265    if (X86ScalarSSEf32) {
2266      Opc = Subtarget->hasAVX() ? X86::VMOVSSrm : X86::MOVSSrm;
2267      RC  = &X86::FR32RegClass;
2268    } else {
2269      Opc = X86::LD_Fp32m;
2270      RC  = &X86::RFP32RegClass;
2271    }
2272    break;
2273  case MVT::f64:
2274    if (X86ScalarSSEf64) {
2275      Opc = Subtarget->hasAVX() ? X86::VMOVSDrm : X86::MOVSDrm;
2276      RC  = &X86::FR64RegClass;
2277    } else {
2278      Opc = X86::LD_Fp64m;
2279      RC  = &X86::RFP64RegClass;
2280    }
2281    break;
2282  case MVT::f80:
2283    // No f80 support yet.
2284    return 0;
2285  }
2286
2287  // Materialize addresses with LEA instructions.
2288  if (isa<GlobalValue>(C)) {
2289    X86AddressMode AM;
2290    if (X86SelectAddress(C, AM)) {
2291      // If the expression is just a basereg, then we're done, otherwise we need
2292      // to emit an LEA.
2293      if (AM.BaseType == X86AddressMode::RegBase &&
2294          AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == 0)
2295        return AM.Base.Reg;
2296
2297      Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r;
2298      unsigned ResultReg = createResultReg(RC);
2299      addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
2300                             TII.get(Opc), ResultReg), AM);
2301      return ResultReg;
2302    }
2303    return 0;
2304  }
2305
2306  // MachineConstantPool wants an explicit alignment.
2307  unsigned Align = TD.getPrefTypeAlignment(C->getType());
2308  if (Align == 0) {
2309    // Alignment of vector types.  FIXME!
2310    Align = TD.getTypeAllocSize(C->getType());
2311  }
2312
2313  // x86-32 PIC requires a PIC base register for constant pools.
2314  unsigned PICBase = 0;
2315  unsigned char OpFlag = 0;
2316  if (Subtarget->isPICStyleStubPIC()) { // Not dynamic-no-pic
2317    OpFlag = X86II::MO_PIC_BASE_OFFSET;
2318    PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
2319  } else if (Subtarget->isPICStyleGOT()) {
2320    OpFlag = X86II::MO_GOTOFF;
2321    PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF);
2322  } else if (Subtarget->isPICStyleRIPRel() &&
2323             TM.getCodeModel() == CodeModel::Small) {
2324    PICBase = X86::RIP;
2325  }
2326
2327  // Create the load from the constant pool.
2328  unsigned MCPOffset = MCP.getConstantPoolIndex(C, Align);
2329  unsigned ResultReg = createResultReg(RC);
2330  addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
2331                                   TII.get(Opc), ResultReg),
2332                           MCPOffset, PICBase, OpFlag);
2333
2334  return ResultReg;
2335}
2336
2337unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) {
2338  // Fail on dynamic allocas. At this point, getRegForValue has already
2339  // checked its CSE maps, so if we're here trying to handle a dynamic
2340  // alloca, we're not going to succeed. X86SelectAddress has a
2341  // check for dynamic allocas, because it's called directly from
2342  // various places, but TargetMaterializeAlloca also needs a check
2343  // in order to avoid recursion between getRegForValue,
2344  // X86SelectAddrss, and TargetMaterializeAlloca.
2345  if (!FuncInfo.StaticAllocaMap.count(C))
2346    return 0;
2347
2348  X86AddressMode AM;
2349  if (!X86SelectAddress(C, AM))
2350    return 0;
2351  unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r;
2352  const TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy());
2353  unsigned ResultReg = createResultReg(RC);
2354  addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
2355                         TII.get(Opc), ResultReg), AM);
2356  return ResultReg;
2357}
2358
2359unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) {
2360  MVT VT;
2361  if (!isTypeLegal(CF->getType(), VT))
2362    return 0;
2363
2364  // Get opcode and regclass for the given zero.
2365  unsigned Opc = 0;
2366  const TargetRegisterClass *RC = NULL;
2367  switch (VT.SimpleTy) {
2368  default: return 0;
2369  case MVT::f32:
2370    if (X86ScalarSSEf32) {
2371      Opc = X86::FsFLD0SS;
2372      RC  = &X86::FR32RegClass;
2373    } else {
2374      Opc = X86::LD_Fp032;
2375      RC  = &X86::RFP32RegClass;
2376    }
2377    break;
2378  case MVT::f64:
2379    if (X86ScalarSSEf64) {
2380      Opc = X86::FsFLD0SD;
2381      RC  = &X86::FR64RegClass;
2382    } else {
2383      Opc = X86::LD_Fp064;
2384      RC  = &X86::RFP64RegClass;
2385    }
2386    break;
2387  case MVT::f80:
2388    // No f80 support yet.
2389    return 0;
2390  }
2391
2392  unsigned ResultReg = createResultReg(RC);
2393  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg);
2394  return ResultReg;
2395}
2396
2397
2398bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
2399                                      const LoadInst *LI) {
2400  X86AddressMode AM;
2401  if (!X86SelectAddress(LI->getOperand(0), AM))
2402    return false;
2403
2404  const X86InstrInfo &XII = (const X86InstrInfo&)TII;
2405
2406  unsigned Size = TD.getTypeAllocSize(LI->getType());
2407  unsigned Alignment = LI->getAlignment();
2408
2409  SmallVector<MachineOperand, 8> AddrOps;
2410  AM.getFullAddress(AddrOps);
2411
2412  MachineInstr *Result =
2413    XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment);
2414  if (Result == 0) return false;
2415
2416  FuncInfo.MBB->insert(FuncInfo.InsertPt, Result);
2417  MI->eraseFromParent();
2418  return true;
2419}
2420
2421
2422namespace llvm {
2423  FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo,
2424                                const TargetLibraryInfo *libInfo) {
2425    return new X86FastISel(funcInfo, libInfo);
2426  }
2427}
2428