X86ISelLowering.h revision 239462
1//===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file defines the interfaces that X86 uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef X86ISELLOWERING_H
16#define X86ISELLOWERING_H
17
18#include "X86Subtarget.h"
19#include "X86RegisterInfo.h"
20#include "X86MachineFunctionInfo.h"
21#include "llvm/Target/TargetLowering.h"
22#include "llvm/Target/TargetOptions.h"
23#include "llvm/CodeGen/FastISel.h"
24#include "llvm/CodeGen/SelectionDAG.h"
25#include "llvm/CodeGen/CallingConvLower.h"
26
27namespace llvm {
28  namespace X86ISD {
29    // X86 Specific DAG Nodes
30    enum NodeType {
31      // Start the numbering where the builtin ops leave off.
32      FIRST_NUMBER = ISD::BUILTIN_OP_END,
33
34      /// BSF - Bit scan forward.
35      /// BSR - Bit scan reverse.
36      BSF,
37      BSR,
38
39      /// SHLD, SHRD - Double shift instructions. These correspond to
40      /// X86::SHLDxx and X86::SHRDxx instructions.
41      SHLD,
42      SHRD,
43
44      /// FAND - Bitwise logical AND of floating point values. This corresponds
45      /// to X86::ANDPS or X86::ANDPD.
46      FAND,
47
48      /// FOR - Bitwise logical OR of floating point values. This corresponds
49      /// to X86::ORPS or X86::ORPD.
50      FOR,
51
52      /// FXOR - Bitwise logical XOR of floating point values. This corresponds
53      /// to X86::XORPS or X86::XORPD.
54      FXOR,
55
56      /// FSRL - Bitwise logical right shift of floating point values. These
57      /// corresponds to X86::PSRLDQ.
58      FSRL,
59
60      /// CALL - These operations represent an abstract X86 call
61      /// instruction, which includes a bunch of information.  In particular the
62      /// operands of these node are:
63      ///
64      ///     #0 - The incoming token chain
65      ///     #1 - The callee
66      ///     #2 - The number of arg bytes the caller pushes on the stack.
67      ///     #3 - The number of arg bytes the callee pops off the stack.
68      ///     #4 - The value to pass in AL/AX/EAX (optional)
69      ///     #5 - The value to pass in DL/DX/EDX (optional)
70      ///
71      /// The result values of these nodes are:
72      ///
73      ///     #0 - The outgoing token chain
74      ///     #1 - The first register result value (optional)
75      ///     #2 - The second register result value (optional)
76      ///
77      CALL,
78
79      /// RDTSC_DAG - This operation implements the lowering for
80      /// readcyclecounter
81      RDTSC_DAG,
82
83      /// X86 compare and logical compare instructions.
84      CMP, COMI, UCOMI,
85
86      /// X86 bit-test instructions.
87      BT,
88
89      /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
90      /// operand, usually produced by a CMP instruction.
91      SETCC,
92
93      // Same as SETCC except it's materialized with a sbb and the value is all
94      // one's or all zero's.
95      SETCC_CARRY,  // R = carry_bit ? ~0 : 0
96
97      /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
98      /// Operands are two FP values to compare; result is a mask of
99      /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
100      FSETCCss, FSETCCsd,
101
102      /// X86 MOVMSK{pd|ps}, extracts sign bits of two or four FP values,
103      /// result in an integer GPR.  Needs masking for scalar result.
104      FGETSIGNx86,
105
106      /// X86 conditional moves. Operand 0 and operand 1 are the two values
107      /// to select from. Operand 2 is the condition code, and operand 3 is the
108      /// flag operand produced by a CMP or TEST instruction. It also writes a
109      /// flag result.
110      CMOV,
111
112      /// X86 conditional branches. Operand 0 is the chain operand, operand 1
113      /// is the block to branch if condition is true, operand 2 is the
114      /// condition code, and operand 3 is the flag operand produced by a CMP
115      /// or TEST instruction.
116      BRCOND,
117
118      /// Return with a flag operand. Operand 0 is the chain operand, operand
119      /// 1 is the number of bytes of stack to pop.
120      RET_FLAG,
121
122      /// REP_STOS - Repeat fill, corresponds to X86::REP_STOSx.
123      REP_STOS,
124
125      /// REP_MOVS - Repeat move, corresponds to X86::REP_MOVSx.
126      REP_MOVS,
127
128      /// GlobalBaseReg - On Darwin, this node represents the result of the popl
129      /// at function entry, used for PIC code.
130      GlobalBaseReg,
131
132      /// Wrapper - A wrapper node for TargetConstantPool,
133      /// TargetExternalSymbol, and TargetGlobalAddress.
134      Wrapper,
135
136      /// WrapperRIP - Special wrapper used under X86-64 PIC mode for RIP
137      /// relative displacements.
138      WrapperRIP,
139
140      /// MOVDQ2Q - Copies a 64-bit value from the low word of an XMM vector
141      /// to an MMX vector.  If you think this is too close to the previous
142      /// mnemonic, so do I; blame Intel.
143      MOVDQ2Q,
144
145      /// PEXTRB - Extract an 8-bit value from a vector and zero extend it to
146      /// i32, corresponds to X86::PEXTRB.
147      PEXTRB,
148
149      /// PEXTRW - Extract a 16-bit value from a vector and zero extend it to
150      /// i32, corresponds to X86::PEXTRW.
151      PEXTRW,
152
153      /// INSERTPS - Insert any element of a 4 x float vector into any element
154      /// of a destination 4 x floatvector.
155      INSERTPS,
156
157      /// PINSRB - Insert the lower 8-bits of a 32-bit value to a vector,
158      /// corresponds to X86::PINSRB.
159      PINSRB,
160
161      /// PINSRW - Insert the lower 16-bits of a 32-bit value to a vector,
162      /// corresponds to X86::PINSRW.
163      PINSRW, MMX_PINSRW,
164
165      /// PSHUFB - Shuffle 16 8-bit values within a vector.
166      PSHUFB,
167
168      /// ANDNP - Bitwise Logical AND NOT of Packed FP values.
169      ANDNP,
170
171      /// PSIGN - Copy integer sign.
172      PSIGN,
173
174      /// BLENDV - Blend where the selector is an XMM.
175      BLENDV,
176
177      /// BLENDxx - Blend where the selector is an immediate.
178      BLENDPW,
179      BLENDPS,
180      BLENDPD,
181
182      /// HADD - Integer horizontal add.
183      HADD,
184
185      /// HSUB - Integer horizontal sub.
186      HSUB,
187
188      /// FHADD - Floating point horizontal add.
189      FHADD,
190
191      /// FHSUB - Floating point horizontal sub.
192      FHSUB,
193
194      /// FMAX, FMIN - Floating point max and min.
195      ///
196      FMAX, FMIN,
197
198      /// FRSQRT, FRCP - Floating point reciprocal-sqrt and reciprocal
199      /// approximation.  Note that these typically require refinement
200      /// in order to obtain suitable precision.
201      FRSQRT, FRCP,
202
203      // TLSADDR - Thread Local Storage.
204      TLSADDR,
205
206      // TLSBASEADDR - Thread Local Storage. A call to get the start address
207      // of the TLS block for the current module.
208      TLSBASEADDR,
209
210      // TLSCALL - Thread Local Storage.  When calling to an OS provided
211      // thunk at the address from an earlier relocation.
212      TLSCALL,
213
214      // EH_RETURN - Exception Handling helpers.
215      EH_RETURN,
216
217      /// TC_RETURN - Tail call return.
218      ///   operand #0 chain
219      ///   operand #1 callee (register or absolute)
220      ///   operand #2 stack adjustment
221      ///   operand #3 optional in flag
222      TC_RETURN,
223
224      // VZEXT_MOVL - Vector move low and zero extend.
225      VZEXT_MOVL,
226
227      // VSEXT_MOVL - Vector move low and sign extend.
228      VSEXT_MOVL,
229
230      // VFPEXT - Vector FP extend.
231      VFPEXT,
232
233      // VSHL, VSRL - 128-bit vector logical left / right shift
234      VSHLDQ, VSRLDQ,
235
236      // VSHL, VSRL, VSRA - Vector shift elements
237      VSHL, VSRL, VSRA,
238
239      // VSHLI, VSRLI, VSRAI - Vector shift elements by immediate
240      VSHLI, VSRLI, VSRAI,
241
242      // CMPP - Vector packed double/float comparison.
243      CMPP,
244
245      // PCMP* - Vector integer comparisons.
246      PCMPEQ, PCMPGT,
247
248      // ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results.
249      ADD, SUB, ADC, SBB, SMUL,
250      INC, DEC, OR, XOR, AND,
251
252      ANDN, // ANDN - Bitwise AND NOT with FLAGS results.
253
254      BLSI,   // BLSI - Extract lowest set isolated bit
255      BLSMSK, // BLSMSK - Get mask up to lowest set bit
256      BLSR,   // BLSR - Reset lowest set bit
257
258      UMUL, // LOW, HI, FLAGS = umul LHS, RHS
259
260      // MUL_IMM - X86 specific multiply by immediate.
261      MUL_IMM,
262
263      // PTEST - Vector bitwise comparisons
264      PTEST,
265
266      // TESTP - Vector packed fp sign bitwise comparisons
267      TESTP,
268
269      // Several flavors of instructions with vector shuffle behaviors.
270      PALIGN,
271      PSHUFD,
272      PSHUFHW,
273      PSHUFLW,
274      SHUFP,
275      MOVDDUP,
276      MOVSHDUP,
277      MOVSLDUP,
278      MOVLHPS,
279      MOVLHPD,
280      MOVHLPS,
281      MOVLPS,
282      MOVLPD,
283      MOVSD,
284      MOVSS,
285      UNPCKL,
286      UNPCKH,
287      VPERMILP,
288      VPERMV,
289      VPERMI,
290      VPERM2X128,
291      VBROADCAST,
292
293      // PMULUDQ - Vector multiply packed unsigned doubleword integers
294      PMULUDQ,
295
296      // FMA nodes
297      FMADD,
298      FNMADD,
299      FMSUB,
300      FNMSUB,
301      FMADDSUB,
302      FMSUBADD,
303
304      // VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack,
305      // according to %al. An operator is needed so that this can be expanded
306      // with control flow.
307      VASTART_SAVE_XMM_REGS,
308
309      // WIN_ALLOCA - Windows's _chkstk call to do stack probing.
310      WIN_ALLOCA,
311
312      // SEG_ALLOCA - For allocating variable amounts of stack space when using
313      // segmented stacks. Check if the current stacklet has enough space, and
314      // falls back to heap allocation if not.
315      SEG_ALLOCA,
316
317      // WIN_FTOL - Windows's _ftol2 runtime routine to do fptoui.
318      WIN_FTOL,
319
320      // Memory barrier
321      MEMBARRIER,
322      MFENCE,
323      SFENCE,
324      LFENCE,
325
326      // FNSTSW16r - Store FP status word into i16 register.
327      FNSTSW16r,
328
329      // SAHF - Store contents of %ah into %eflags.
330      SAHF,
331
332      // RDRAND - Get a random integer and indicate whether it is valid in CF.
333      RDRAND,
334
335      // PCMP*STRI
336      PCMPISTRI,
337      PCMPESTRI,
338
339      // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG,
340      // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG -
341      // Atomic 64-bit binary operations.
342      ATOMADD64_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
343      ATOMSUB64_DAG,
344      ATOMOR64_DAG,
345      ATOMXOR64_DAG,
346      ATOMAND64_DAG,
347      ATOMNAND64_DAG,
348      ATOMSWAP64_DAG,
349
350      // LCMPXCHG_DAG, LCMPXCHG8_DAG, LCMPXCHG16_DAG - Compare and swap.
351      LCMPXCHG_DAG,
352      LCMPXCHG8_DAG,
353      LCMPXCHG16_DAG,
354
355      // VZEXT_LOAD - Load, scalar_to_vector, and zero extend.
356      VZEXT_LOAD,
357
358      // FNSTCW16m - Store FP control world into i16 memory.
359      FNSTCW16m,
360
361      /// FP_TO_INT*_IN_MEM - This instruction implements FP_TO_SINT with the
362      /// integer destination in memory and a FP reg source.  This corresponds
363      /// to the X86::FIST*m instructions and the rounding mode change stuff. It
364      /// has two inputs (token chain and address) and two outputs (int value
365      /// and token chain).
366      FP_TO_INT16_IN_MEM,
367      FP_TO_INT32_IN_MEM,
368      FP_TO_INT64_IN_MEM,
369
370      /// FILD, FILD_FLAG - This instruction implements SINT_TO_FP with the
371      /// integer source in memory and FP reg result.  This corresponds to the
372      /// X86::FILD*m instructions. It has three inputs (token chain, address,
373      /// and source type) and two outputs (FP value and token chain). FILD_FLAG
374      /// also produces a flag).
375      FILD,
376      FILD_FLAG,
377
378      /// FLD - This instruction implements an extending load to FP stack slots.
379      /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
380      /// operand, ptr to load from, and a ValueType node indicating the type
381      /// to load to.
382      FLD,
383
384      /// FST - This instruction implements a truncating store to FP stack
385      /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
386      /// chain operand, value to store, address, and a ValueType to store it
387      /// as.
388      FST,
389
390      /// VAARG_64 - This instruction grabs the address of the next argument
391      /// from a va_list. (reads and modifies the va_list in memory)
392      VAARG_64
393
394      // WARNING: Do not add anything in the end unless you want the node to
395      // have memop! In fact, starting from ATOMADD64_DAG all opcodes will be
396      // thought as target memory ops!
397    };
398  }
399
400  /// Define some predicates that are used for node matching.
401  namespace X86 {
402    /// isVEXTRACTF128Index - Return true if the specified
403    /// EXTRACT_SUBVECTOR operand specifies a vector extract that is
404    /// suitable for input to VEXTRACTF128.
405    bool isVEXTRACTF128Index(SDNode *N);
406
407    /// isVINSERTF128Index - Return true if the specified
408    /// INSERT_SUBVECTOR operand specifies a subvector insert that is
409    /// suitable for input to VINSERTF128.
410    bool isVINSERTF128Index(SDNode *N);
411
412    /// getExtractVEXTRACTF128Immediate - Return the appropriate
413    /// immediate to extract the specified EXTRACT_SUBVECTOR index
414    /// with VEXTRACTF128 instructions.
415    unsigned getExtractVEXTRACTF128Immediate(SDNode *N);
416
417    /// getInsertVINSERTF128Immediate - Return the appropriate
418    /// immediate to insert at the specified INSERT_SUBVECTOR index
419    /// with VINSERTF128 instructions.
420    unsigned getInsertVINSERTF128Immediate(SDNode *N);
421
422    /// isZeroNode - Returns true if Elt is a constant zero or a floating point
423    /// constant +0.0.
424    bool isZeroNode(SDValue Elt);
425
426    /// isOffsetSuitableForCodeModel - Returns true of the given offset can be
427    /// fit into displacement field of the instruction.
428    bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
429                                      bool hasSymbolicDisplacement = true);
430
431
432    /// isCalleePop - Determines whether the callee is required to pop its
433    /// own arguments. Callee pop is necessary to support tail calls.
434    bool isCalleePop(CallingConv::ID CallingConv,
435                     bool is64Bit, bool IsVarArg, bool TailCallOpt);
436  }
437
438  //===--------------------------------------------------------------------===//
439  //  X86TargetLowering - X86 Implementation of the TargetLowering interface
440  class X86TargetLowering : public TargetLowering {
441  public:
442    explicit X86TargetLowering(X86TargetMachine &TM);
443
444    virtual unsigned getJumpTableEncoding() const;
445
446    virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i8; }
447
448    virtual const MCExpr *
449    LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
450                              const MachineBasicBlock *MBB, unsigned uid,
451                              MCContext &Ctx) const;
452
453    /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC
454    /// jumptable.
455    virtual SDValue getPICJumpTableRelocBase(SDValue Table,
456                                             SelectionDAG &DAG) const;
457    virtual const MCExpr *
458    getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
459                                 unsigned JTI, MCContext &Ctx) const;
460
461    /// getStackPtrReg - Return the stack pointer register we are using: either
462    /// ESP or RSP.
463    unsigned getStackPtrReg() const { return X86StackPtr; }
464
465    /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
466    /// function arguments in the caller parameter area. For X86, aggregates
467    /// that contains are placed at 16-byte boundaries while the rest are at
468    /// 4-byte boundaries.
469    virtual unsigned getByValTypeAlignment(Type *Ty) const;
470
471    /// getOptimalMemOpType - Returns the target specific optimal type for load
472    /// and store operations as a result of memset, memcpy, and memmove
473    /// lowering. If DstAlign is zero that means it's safe to destination
474    /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
475    /// means there isn't a need to check it against alignment requirement,
476    /// probably because the source does not need to be loaded. If
477    /// 'IsZeroVal' is true, that means it's safe to return a
478    /// non-scalar-integer type, e.g. empty string source, constant, or loaded
479    /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is
480    /// constant so it does not need to be loaded.
481    /// It returns EVT::Other if the type should be determined using generic
482    /// target-independent logic.
483    virtual EVT
484    getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
485                        bool IsZeroVal, bool MemcpyStrSrc,
486                        MachineFunction &MF) const;
487
488    /// allowsUnalignedMemoryAccesses - Returns true if the target allows
489    /// unaligned memory accesses. of the specified type.
490    virtual bool allowsUnalignedMemoryAccesses(EVT VT) const {
491      return true;
492    }
493
494    /// LowerOperation - Provide custom lowering hooks for some operations.
495    ///
496    virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
497
498    /// ReplaceNodeResults - Replace the results of node with an illegal result
499    /// type with new values built out of custom code.
500    ///
501    virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
502                                    SelectionDAG &DAG) const;
503
504
505    virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
506
507    /// isTypeDesirableForOp - Return true if the target has native support for
508    /// the specified value type and it is 'desirable' to use the type for the
509    /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
510    /// instruction encodings are longer and some i16 instructions are slow.
511    virtual bool isTypeDesirableForOp(unsigned Opc, EVT VT) const;
512
513    /// isTypeDesirable - Return true if the target has native support for the
514    /// specified value type and it is 'desirable' to use the type. e.g. On x86
515    /// i16 is legal, but undesirable since i16 instruction encodings are longer
516    /// and some i16 instructions are slow.
517    virtual bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const;
518
519    virtual MachineBasicBlock *
520      EmitInstrWithCustomInserter(MachineInstr *MI,
521                                  MachineBasicBlock *MBB) const;
522
523
524    /// getTargetNodeName - This method returns the name of a target specific
525    /// DAG node.
526    virtual const char *getTargetNodeName(unsigned Opcode) const;
527
528    /// getSetCCResultType - Return the value type to use for ISD::SETCC.
529    virtual EVT getSetCCResultType(EVT VT) const;
530
531    /// computeMaskedBitsForTargetNode - Determine which of the bits specified
532    /// in Mask are known to be either zero or one and return them in the
533    /// KnownZero/KnownOne bitsets.
534    virtual void computeMaskedBitsForTargetNode(const SDValue Op,
535                                                APInt &KnownZero,
536                                                APInt &KnownOne,
537                                                const SelectionDAG &DAG,
538                                                unsigned Depth = 0) const;
539
540    // ComputeNumSignBitsForTargetNode - Determine the number of bits in the
541    // operation that are sign bits.
542    virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
543                                                     unsigned Depth) const;
544
545    virtual bool
546    isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const;
547
548    SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
549
550    virtual bool ExpandInlineAsm(CallInst *CI) const;
551
552    ConstraintType getConstraintType(const std::string &Constraint) const;
553
554    /// Examine constraint string and operand type and determine a weight value.
555    /// The operand object must already have been set up with the operand type.
556    virtual ConstraintWeight getSingleConstraintMatchWeight(
557      AsmOperandInfo &info, const char *constraint) const;
558
559    virtual const char *LowerXConstraint(EVT ConstraintVT) const;
560
561    /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
562    /// vector.  If it is invalid, don't add anything to Ops. If hasMemory is
563    /// true it means one of the asm constraint of the inline asm instruction
564    /// being processed is 'm'.
565    virtual void LowerAsmOperandForConstraint(SDValue Op,
566                                              std::string &Constraint,
567                                              std::vector<SDValue> &Ops,
568                                              SelectionDAG &DAG) const;
569
570    /// getRegForInlineAsmConstraint - Given a physical register constraint
571    /// (e.g. {edx}), return the register number and the register class for the
572    /// register.  This should only be used for C_Register constraints.  On
573    /// error, this returns a register number of 0.
574    std::pair<unsigned, const TargetRegisterClass*>
575      getRegForInlineAsmConstraint(const std::string &Constraint,
576                                   EVT VT) const;
577
578    /// isLegalAddressingMode - Return true if the addressing mode represented
579    /// by AM is legal for this target, for a load/store of the specified type.
580    virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const;
581
582    /// isLegalICmpImmediate - Return true if the specified immediate is legal
583    /// icmp immediate, that is the target has icmp instructions which can
584    /// compare a register against the immediate without having to materialize
585    /// the immediate into a register.
586    virtual bool isLegalICmpImmediate(int64_t Imm) const;
587
588    /// isLegalAddImmediate - Return true if the specified immediate is legal
589    /// add immediate, that is the target has add instructions which can
590    /// add a register and the immediate without having to materialize
591    /// the immediate into a register.
592    virtual bool isLegalAddImmediate(int64_t Imm) const;
593
594    /// isTruncateFree - Return true if it's free to truncate a value of
595    /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
596    /// register EAX to i16 by referencing its sub-register AX.
597    virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const;
598    virtual bool isTruncateFree(EVT VT1, EVT VT2) const;
599
600    /// isZExtFree - Return true if any actual instruction that defines a
601    /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
602    /// register. This does not necessarily include registers defined in
603    /// unknown ways, such as incoming arguments, or copies from unknown
604    /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
605    /// does not necessarily apply to truncate instructions. e.g. on x86-64,
606    /// all instructions that define 32-bit values implicit zero-extend the
607    /// result out to 64 bits.
608    virtual bool isZExtFree(Type *Ty1, Type *Ty2) const;
609    virtual bool isZExtFree(EVT VT1, EVT VT2) const;
610
611    /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than
612    /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to
613    /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd
614    /// is expanded to mul + add.
615    virtual bool isFMAFasterThanMulAndAdd(EVT) const { return true; }
616
617    /// isNarrowingProfitable - Return true if it's profitable to narrow
618    /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
619    /// from i32 to i8 but not from i32 to i16.
620    virtual bool isNarrowingProfitable(EVT VT1, EVT VT2) const;
621
622    /// isFPImmLegal - Returns true if the target can instruction select the
623    /// specified FP immediate natively. If false, the legalizer will
624    /// materialize the FP immediate as a load from a constant pool.
625    virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
626
627    /// isShuffleMaskLegal - Targets can use this to indicate that they only
628    /// support *some* VECTOR_SHUFFLE operations, those with specific masks.
629    /// By default, if a target supports the VECTOR_SHUFFLE node, all mask
630    /// values are assumed to be legal.
631    virtual bool isShuffleMaskLegal(const SmallVectorImpl<int> &Mask,
632                                    EVT VT) const;
633
634    /// isVectorClearMaskLegal - Similar to isShuffleMaskLegal. This is
635    /// used by Targets can use this to indicate if there is a suitable
636    /// VECTOR_SHUFFLE that can be used to replace a VAND with a constant
637    /// pool entry.
638    virtual bool isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask,
639                                        EVT VT) const;
640
641    /// ShouldShrinkFPConstant - If true, then instruction selection should
642    /// seek to shrink the FP constant of the specified type to a smaller type
643    /// in order to save space and / or reduce runtime.
644    virtual bool ShouldShrinkFPConstant(EVT VT) const {
645      // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
646      // expensive than a straight movsd. On the other hand, it's important to
647      // shrink long double fp constant since fldt is very slow.
648      return !X86ScalarSSEf64 || VT == MVT::f80;
649    }
650
651    const X86Subtarget* getSubtarget() const {
652      return Subtarget;
653    }
654
655    /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is
656    /// computed in an SSE register, not on the X87 floating point stack.
657    bool isScalarFPTypeInSSEReg(EVT VT) const {
658      return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
659      (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
660    }
661
662    /// isTargetFTOL - Return true if the target uses the MSVC _ftol2 routine
663    /// for fptoui.
664    bool isTargetFTOL() const {
665      return Subtarget->isTargetWindows() && !Subtarget->is64Bit();
666    }
667
668    /// isIntegerTypeFTOL - Return true if the MSVC _ftol2 routine should be
669    /// used for fptoui to the given type.
670    bool isIntegerTypeFTOL(EVT VT) const {
671      return isTargetFTOL() && VT == MVT::i64;
672    }
673
674    /// createFastISel - This method returns a target specific FastISel object,
675    /// or null if the target does not support "fast" ISel.
676    virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
677                                     const TargetLibraryInfo *libInfo) const;
678
679    /// getStackCookieLocation - Return true if the target stores stack
680    /// protector cookies at a fixed offset in some non-standard address
681    /// space, and populates the address space and offset as
682    /// appropriate.
683    virtual bool getStackCookieLocation(unsigned &AddressSpace, unsigned &Offset) const;
684
685    SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
686                      SelectionDAG &DAG) const;
687
688  protected:
689    std::pair<const TargetRegisterClass*, uint8_t>
690    findRepresentativeClass(EVT VT) const;
691
692  private:
693    /// Subtarget - Keep a pointer to the X86Subtarget around so that we can
694    /// make the right decision when generating code for different targets.
695    const X86Subtarget *Subtarget;
696    const X86RegisterInfo *RegInfo;
697    const TargetData *TD;
698
699    /// X86StackPtr - X86 physical register used as stack ptr.
700    unsigned X86StackPtr;
701
702    /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87
703    /// floating point ops.
704    /// When SSE is available, use it for f32 operations.
705    /// When SSE2 is available, use it for f64 operations.
706    bool X86ScalarSSEf32;
707    bool X86ScalarSSEf64;
708
709    /// LegalFPImmediates - A list of legal fp immediates.
710    std::vector<APFloat> LegalFPImmediates;
711
712    /// addLegalFPImmediate - Indicate that this x86 target can instruction
713    /// select the specified FP immediate natively.
714    void addLegalFPImmediate(const APFloat& Imm) {
715      LegalFPImmediates.push_back(Imm);
716    }
717
718    SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
719                            CallingConv::ID CallConv, bool isVarArg,
720                            const SmallVectorImpl<ISD::InputArg> &Ins,
721                            DebugLoc dl, SelectionDAG &DAG,
722                            SmallVectorImpl<SDValue> &InVals) const;
723    SDValue LowerMemArgument(SDValue Chain,
724                             CallingConv::ID CallConv,
725                             const SmallVectorImpl<ISD::InputArg> &ArgInfo,
726                             DebugLoc dl, SelectionDAG &DAG,
727                             const CCValAssign &VA,  MachineFrameInfo *MFI,
728                              unsigned i) const;
729    SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
730                             DebugLoc dl, SelectionDAG &DAG,
731                             const CCValAssign &VA,
732                             ISD::ArgFlagsTy Flags) const;
733
734    // Call lowering helpers.
735
736    /// IsEligibleForTailCallOptimization - Check whether the call is eligible
737    /// for tail call optimization. Targets which want to do tail call
738    /// optimization should implement this function.
739    bool IsEligibleForTailCallOptimization(SDValue Callee,
740                                           CallingConv::ID CalleeCC,
741                                           bool isVarArg,
742                                           bool isCalleeStructRet,
743                                           bool isCallerStructRet,
744                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
745                                    const SmallVectorImpl<SDValue> &OutVals,
746                                    const SmallVectorImpl<ISD::InputArg> &Ins,
747                                           SelectionDAG& DAG) const;
748    bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv) const;
749    SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
750                                SDValue Chain, bool IsTailCall, bool Is64Bit,
751                                int FPDiff, DebugLoc dl) const;
752
753    unsigned GetAlignedArgumentStackSize(unsigned StackSize,
754                                         SelectionDAG &DAG) const;
755
756    std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
757                                               bool isSigned,
758                                               bool isReplace) const;
759
760    SDValue LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
761                                   SelectionDAG &DAG) const;
762    SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
763    SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
764    SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
765    SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
766    SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const;
767    SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
768    SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const;
769    SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
770    SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
771    SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
772    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
773    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
774    SDValue LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl,
775                               int64_t Offset, SelectionDAG &DAG) const;
776    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
777    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
778    SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
779    SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const;
780    SDValue LowerBITCAST(SDValue op, SelectionDAG &DAG) const;
781    SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
782    SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
783    SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const;
784    SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const;
785    SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
786    SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
787    SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const;
788    SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) const;
789    SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
790    SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) const;
791    SDValue LowerToBT(SDValue And, ISD::CondCode CC,
792                      DebugLoc dl, SelectionDAG &DAG) const;
793    SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
794    SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const;
795    SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
796    SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
797    SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG) const;
798    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
799    SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
800    SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
801    SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
802    SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
803    SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
804    SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
805    SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
806    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
807    SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
808    SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
809    SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
810    SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
811    SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
812    SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const;
813    SDValue LowerCTLZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) const;
814    SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const;
815    SDValue LowerADD(SDValue Op, SelectionDAG &DAG) const;
816    SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) const;
817    SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
818    SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const;
819    SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const;
820
821    SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
822    SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const;
823    SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const;
824    SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const;
825    SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const;
826    SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const;
827    SDValue PerformTruncateCombine(SDNode* N, SelectionDAG &DAG, DAGCombinerInfo &DCI) const;
828
829    // Utility functions to help LowerVECTOR_SHUFFLE
830    SDValue LowerVECTOR_SHUFFLEv8i16(SDValue Op, SelectionDAG &DAG) const;
831    SDValue LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const;
832    SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const;
833
834    SDValue LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const;
835
836    virtual SDValue
837      LowerFormalArguments(SDValue Chain,
838                           CallingConv::ID CallConv, bool isVarArg,
839                           const SmallVectorImpl<ISD::InputArg> &Ins,
840                           DebugLoc dl, SelectionDAG &DAG,
841                           SmallVectorImpl<SDValue> &InVals) const;
842    virtual SDValue
843      LowerCall(CallLoweringInfo &CLI,
844                SmallVectorImpl<SDValue> &InVals) const;
845
846    virtual SDValue
847      LowerReturn(SDValue Chain,
848                  CallingConv::ID CallConv, bool isVarArg,
849                  const SmallVectorImpl<ISD::OutputArg> &Outs,
850                  const SmallVectorImpl<SDValue> &OutVals,
851                  DebugLoc dl, SelectionDAG &DAG) const;
852
853    virtual bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const;
854
855    virtual bool mayBeEmittedAsTailCall(CallInst *CI) const;
856
857    virtual EVT
858    getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT,
859                             ISD::NodeType ExtendKind) const;
860
861    virtual bool
862    CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
863                   bool isVarArg,
864                   const SmallVectorImpl<ISD::OutputArg> &Outs,
865                   LLVMContext &Context) const;
866
867    /// Utility function to emit string processing sse4.2 instructions
868    /// that return in xmm0.
869    /// This takes the instruction to expand, the associated machine basic
870    /// block, the number of args, and whether or not the second arg is
871    /// in memory or not.
872    MachineBasicBlock *EmitPCMP(MachineInstr *BInstr, MachineBasicBlock *BB,
873                                unsigned argNum, bool inMem) const;
874
875    /// Utility functions to emit monitor and mwait instructions. These
876    /// need to make sure that the arguments to the intrinsic are in the
877    /// correct registers.
878    MachineBasicBlock *EmitMonitor(MachineInstr *MI,
879                                   MachineBasicBlock *BB) const;
880    MachineBasicBlock *EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const;
881
882    /// Utility function to emit atomic bitwise operations (and, or, xor).
883    /// It takes the bitwise instruction to expand, the associated machine basic
884    /// block, and the associated X86 opcodes for reg/reg and reg/imm.
885    MachineBasicBlock *EmitAtomicBitwiseWithCustomInserter(
886                                                    MachineInstr *BInstr,
887                                                    MachineBasicBlock *BB,
888                                                    unsigned regOpc,
889                                                    unsigned immOpc,
890                                                    unsigned loadOpc,
891                                                    unsigned cxchgOpc,
892                                                    unsigned notOpc,
893                                                    unsigned EAXreg,
894                                              const TargetRegisterClass *RC,
895                                                    bool Invert = false) const;
896
897    MachineBasicBlock *EmitAtomicBit6432WithCustomInserter(
898                                                    MachineInstr *BInstr,
899                                                    MachineBasicBlock *BB,
900                                                    unsigned regOpcL,
901                                                    unsigned regOpcH,
902                                                    unsigned immOpcL,
903                                                    unsigned immOpcH,
904                                                    bool Invert = false) const;
905
906    /// Utility function to emit atomic min and max.  It takes the min/max
907    /// instruction to expand, the associated basic block, and the associated
908    /// cmov opcode for moving the min or max value.
909    MachineBasicBlock *EmitAtomicMinMaxWithCustomInserter(MachineInstr *BInstr,
910                                                          MachineBasicBlock *BB,
911                                                        unsigned cmovOpc) const;
912
913    // Utility function to emit the low-level va_arg code for X86-64.
914    MachineBasicBlock *EmitVAARG64WithCustomInserter(
915                       MachineInstr *MI,
916                       MachineBasicBlock *MBB) const;
917
918    /// Utility function to emit the xmm reg save portion of va_start.
919    MachineBasicBlock *EmitVAStartSaveXMMRegsWithCustomInserter(
920                                                   MachineInstr *BInstr,
921                                                   MachineBasicBlock *BB) const;
922
923    MachineBasicBlock *EmitLoweredSelect(MachineInstr *I,
924                                         MachineBasicBlock *BB) const;
925
926    MachineBasicBlock *EmitLoweredWinAlloca(MachineInstr *MI,
927                                              MachineBasicBlock *BB) const;
928
929    MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr *MI,
930                                            MachineBasicBlock *BB,
931                                            bool Is64Bit) const;
932
933    MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI,
934                                          MachineBasicBlock *BB) const;
935
936    MachineBasicBlock *emitLoweredTLSAddr(MachineInstr *MI,
937                                          MachineBasicBlock *BB) const;
938
939    /// Emit nodes that will be selected as "test Op0,Op0", or something
940    /// equivalent, for use with the given x86 condition code.
941    SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG) const;
942
943    /// Emit nodes that will be selected as "cmp Op0,Op1", or something
944    /// equivalent, for use with the given x86 condition code.
945    SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
946                    SelectionDAG &DAG) const;
947
948    /// Convert a comparison if required by the subtarget.
949    SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
950  };
951
952  namespace X86 {
953    FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
954                             const TargetLibraryInfo *libInfo);
955  }
956}
957
958#endif    // X86ISELLOWERING_H
959