X86ISelLowering.h revision 239462
1//===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that X86 uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#ifndef X86ISELLOWERING_H 16#define X86ISELLOWERING_H 17 18#include "X86Subtarget.h" 19#include "X86RegisterInfo.h" 20#include "X86MachineFunctionInfo.h" 21#include "llvm/Target/TargetLowering.h" 22#include "llvm/Target/TargetOptions.h" 23#include "llvm/CodeGen/FastISel.h" 24#include "llvm/CodeGen/SelectionDAG.h" 25#include "llvm/CodeGen/CallingConvLower.h" 26 27namespace llvm { 28 namespace X86ISD { 29 // X86 Specific DAG Nodes 30 enum NodeType { 31 // Start the numbering where the builtin ops leave off. 32 FIRST_NUMBER = ISD::BUILTIN_OP_END, 33 34 /// BSF - Bit scan forward. 35 /// BSR - Bit scan reverse. 36 BSF, 37 BSR, 38 39 /// SHLD, SHRD - Double shift instructions. These correspond to 40 /// X86::SHLDxx and X86::SHRDxx instructions. 41 SHLD, 42 SHRD, 43 44 /// FAND - Bitwise logical AND of floating point values. This corresponds 45 /// to X86::ANDPS or X86::ANDPD. 46 FAND, 47 48 /// FOR - Bitwise logical OR of floating point values. This corresponds 49 /// to X86::ORPS or X86::ORPD. 50 FOR, 51 52 /// FXOR - Bitwise logical XOR of floating point values. This corresponds 53 /// to X86::XORPS or X86::XORPD. 54 FXOR, 55 56 /// FSRL - Bitwise logical right shift of floating point values. These 57 /// corresponds to X86::PSRLDQ. 58 FSRL, 59 60 /// CALL - These operations represent an abstract X86 call 61 /// instruction, which includes a bunch of information. In particular the 62 /// operands of these node are: 63 /// 64 /// #0 - The incoming token chain 65 /// #1 - The callee 66 /// #2 - The number of arg bytes the caller pushes on the stack. 67 /// #3 - The number of arg bytes the callee pops off the stack. 68 /// #4 - The value to pass in AL/AX/EAX (optional) 69 /// #5 - The value to pass in DL/DX/EDX (optional) 70 /// 71 /// The result values of these nodes are: 72 /// 73 /// #0 - The outgoing token chain 74 /// #1 - The first register result value (optional) 75 /// #2 - The second register result value (optional) 76 /// 77 CALL, 78 79 /// RDTSC_DAG - This operation implements the lowering for 80 /// readcyclecounter 81 RDTSC_DAG, 82 83 /// X86 compare and logical compare instructions. 84 CMP, COMI, UCOMI, 85 86 /// X86 bit-test instructions. 87 BT, 88 89 /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS 90 /// operand, usually produced by a CMP instruction. 91 SETCC, 92 93 // Same as SETCC except it's materialized with a sbb and the value is all 94 // one's or all zero's. 95 SETCC_CARRY, // R = carry_bit ? ~0 : 0 96 97 /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD. 98 /// Operands are two FP values to compare; result is a mask of 99 /// 0s or 1s. Generally DTRT for C/C++ with NaNs. 100 FSETCCss, FSETCCsd, 101 102 /// X86 MOVMSK{pd|ps}, extracts sign bits of two or four FP values, 103 /// result in an integer GPR. Needs masking for scalar result. 104 FGETSIGNx86, 105 106 /// X86 conditional moves. Operand 0 and operand 1 are the two values 107 /// to select from. Operand 2 is the condition code, and operand 3 is the 108 /// flag operand produced by a CMP or TEST instruction. It also writes a 109 /// flag result. 110 CMOV, 111 112 /// X86 conditional branches. Operand 0 is the chain operand, operand 1 113 /// is the block to branch if condition is true, operand 2 is the 114 /// condition code, and operand 3 is the flag operand produced by a CMP 115 /// or TEST instruction. 116 BRCOND, 117 118 /// Return with a flag operand. Operand 0 is the chain operand, operand 119 /// 1 is the number of bytes of stack to pop. 120 RET_FLAG, 121 122 /// REP_STOS - Repeat fill, corresponds to X86::REP_STOSx. 123 REP_STOS, 124 125 /// REP_MOVS - Repeat move, corresponds to X86::REP_MOVSx. 126 REP_MOVS, 127 128 /// GlobalBaseReg - On Darwin, this node represents the result of the popl 129 /// at function entry, used for PIC code. 130 GlobalBaseReg, 131 132 /// Wrapper - A wrapper node for TargetConstantPool, 133 /// TargetExternalSymbol, and TargetGlobalAddress. 134 Wrapper, 135 136 /// WrapperRIP - Special wrapper used under X86-64 PIC mode for RIP 137 /// relative displacements. 138 WrapperRIP, 139 140 /// MOVDQ2Q - Copies a 64-bit value from the low word of an XMM vector 141 /// to an MMX vector. If you think this is too close to the previous 142 /// mnemonic, so do I; blame Intel. 143 MOVDQ2Q, 144 145 /// PEXTRB - Extract an 8-bit value from a vector and zero extend it to 146 /// i32, corresponds to X86::PEXTRB. 147 PEXTRB, 148 149 /// PEXTRW - Extract a 16-bit value from a vector and zero extend it to 150 /// i32, corresponds to X86::PEXTRW. 151 PEXTRW, 152 153 /// INSERTPS - Insert any element of a 4 x float vector into any element 154 /// of a destination 4 x floatvector. 155 INSERTPS, 156 157 /// PINSRB - Insert the lower 8-bits of a 32-bit value to a vector, 158 /// corresponds to X86::PINSRB. 159 PINSRB, 160 161 /// PINSRW - Insert the lower 16-bits of a 32-bit value to a vector, 162 /// corresponds to X86::PINSRW. 163 PINSRW, MMX_PINSRW, 164 165 /// PSHUFB - Shuffle 16 8-bit values within a vector. 166 PSHUFB, 167 168 /// ANDNP - Bitwise Logical AND NOT of Packed FP values. 169 ANDNP, 170 171 /// PSIGN - Copy integer sign. 172 PSIGN, 173 174 /// BLENDV - Blend where the selector is an XMM. 175 BLENDV, 176 177 /// BLENDxx - Blend where the selector is an immediate. 178 BLENDPW, 179 BLENDPS, 180 BLENDPD, 181 182 /// HADD - Integer horizontal add. 183 HADD, 184 185 /// HSUB - Integer horizontal sub. 186 HSUB, 187 188 /// FHADD - Floating point horizontal add. 189 FHADD, 190 191 /// FHSUB - Floating point horizontal sub. 192 FHSUB, 193 194 /// FMAX, FMIN - Floating point max and min. 195 /// 196 FMAX, FMIN, 197 198 /// FRSQRT, FRCP - Floating point reciprocal-sqrt and reciprocal 199 /// approximation. Note that these typically require refinement 200 /// in order to obtain suitable precision. 201 FRSQRT, FRCP, 202 203 // TLSADDR - Thread Local Storage. 204 TLSADDR, 205 206 // TLSBASEADDR - Thread Local Storage. A call to get the start address 207 // of the TLS block for the current module. 208 TLSBASEADDR, 209 210 // TLSCALL - Thread Local Storage. When calling to an OS provided 211 // thunk at the address from an earlier relocation. 212 TLSCALL, 213 214 // EH_RETURN - Exception Handling helpers. 215 EH_RETURN, 216 217 /// TC_RETURN - Tail call return. 218 /// operand #0 chain 219 /// operand #1 callee (register or absolute) 220 /// operand #2 stack adjustment 221 /// operand #3 optional in flag 222 TC_RETURN, 223 224 // VZEXT_MOVL - Vector move low and zero extend. 225 VZEXT_MOVL, 226 227 // VSEXT_MOVL - Vector move low and sign extend. 228 VSEXT_MOVL, 229 230 // VFPEXT - Vector FP extend. 231 VFPEXT, 232 233 // VSHL, VSRL - 128-bit vector logical left / right shift 234 VSHLDQ, VSRLDQ, 235 236 // VSHL, VSRL, VSRA - Vector shift elements 237 VSHL, VSRL, VSRA, 238 239 // VSHLI, VSRLI, VSRAI - Vector shift elements by immediate 240 VSHLI, VSRLI, VSRAI, 241 242 // CMPP - Vector packed double/float comparison. 243 CMPP, 244 245 // PCMP* - Vector integer comparisons. 246 PCMPEQ, PCMPGT, 247 248 // ADD, SUB, SMUL, etc. - Arithmetic operations with FLAGS results. 249 ADD, SUB, ADC, SBB, SMUL, 250 INC, DEC, OR, XOR, AND, 251 252 ANDN, // ANDN - Bitwise AND NOT with FLAGS results. 253 254 BLSI, // BLSI - Extract lowest set isolated bit 255 BLSMSK, // BLSMSK - Get mask up to lowest set bit 256 BLSR, // BLSR - Reset lowest set bit 257 258 UMUL, // LOW, HI, FLAGS = umul LHS, RHS 259 260 // MUL_IMM - X86 specific multiply by immediate. 261 MUL_IMM, 262 263 // PTEST - Vector bitwise comparisons 264 PTEST, 265 266 // TESTP - Vector packed fp sign bitwise comparisons 267 TESTP, 268 269 // Several flavors of instructions with vector shuffle behaviors. 270 PALIGN, 271 PSHUFD, 272 PSHUFHW, 273 PSHUFLW, 274 SHUFP, 275 MOVDDUP, 276 MOVSHDUP, 277 MOVSLDUP, 278 MOVLHPS, 279 MOVLHPD, 280 MOVHLPS, 281 MOVLPS, 282 MOVLPD, 283 MOVSD, 284 MOVSS, 285 UNPCKL, 286 UNPCKH, 287 VPERMILP, 288 VPERMV, 289 VPERMI, 290 VPERM2X128, 291 VBROADCAST, 292 293 // PMULUDQ - Vector multiply packed unsigned doubleword integers 294 PMULUDQ, 295 296 // FMA nodes 297 FMADD, 298 FNMADD, 299 FMSUB, 300 FNMSUB, 301 FMADDSUB, 302 FMSUBADD, 303 304 // VASTART_SAVE_XMM_REGS - Save xmm argument registers to the stack, 305 // according to %al. An operator is needed so that this can be expanded 306 // with control flow. 307 VASTART_SAVE_XMM_REGS, 308 309 // WIN_ALLOCA - Windows's _chkstk call to do stack probing. 310 WIN_ALLOCA, 311 312 // SEG_ALLOCA - For allocating variable amounts of stack space when using 313 // segmented stacks. Check if the current stacklet has enough space, and 314 // falls back to heap allocation if not. 315 SEG_ALLOCA, 316 317 // WIN_FTOL - Windows's _ftol2 runtime routine to do fptoui. 318 WIN_FTOL, 319 320 // Memory barrier 321 MEMBARRIER, 322 MFENCE, 323 SFENCE, 324 LFENCE, 325 326 // FNSTSW16r - Store FP status word into i16 register. 327 FNSTSW16r, 328 329 // SAHF - Store contents of %ah into %eflags. 330 SAHF, 331 332 // RDRAND - Get a random integer and indicate whether it is valid in CF. 333 RDRAND, 334 335 // PCMP*STRI 336 PCMPISTRI, 337 PCMPESTRI, 338 339 // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG, 340 // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG - 341 // Atomic 64-bit binary operations. 342 ATOMADD64_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, 343 ATOMSUB64_DAG, 344 ATOMOR64_DAG, 345 ATOMXOR64_DAG, 346 ATOMAND64_DAG, 347 ATOMNAND64_DAG, 348 ATOMSWAP64_DAG, 349 350 // LCMPXCHG_DAG, LCMPXCHG8_DAG, LCMPXCHG16_DAG - Compare and swap. 351 LCMPXCHG_DAG, 352 LCMPXCHG8_DAG, 353 LCMPXCHG16_DAG, 354 355 // VZEXT_LOAD - Load, scalar_to_vector, and zero extend. 356 VZEXT_LOAD, 357 358 // FNSTCW16m - Store FP control world into i16 memory. 359 FNSTCW16m, 360 361 /// FP_TO_INT*_IN_MEM - This instruction implements FP_TO_SINT with the 362 /// integer destination in memory and a FP reg source. This corresponds 363 /// to the X86::FIST*m instructions and the rounding mode change stuff. It 364 /// has two inputs (token chain and address) and two outputs (int value 365 /// and token chain). 366 FP_TO_INT16_IN_MEM, 367 FP_TO_INT32_IN_MEM, 368 FP_TO_INT64_IN_MEM, 369 370 /// FILD, FILD_FLAG - This instruction implements SINT_TO_FP with the 371 /// integer source in memory and FP reg result. This corresponds to the 372 /// X86::FILD*m instructions. It has three inputs (token chain, address, 373 /// and source type) and two outputs (FP value and token chain). FILD_FLAG 374 /// also produces a flag). 375 FILD, 376 FILD_FLAG, 377 378 /// FLD - This instruction implements an extending load to FP stack slots. 379 /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain 380 /// operand, ptr to load from, and a ValueType node indicating the type 381 /// to load to. 382 FLD, 383 384 /// FST - This instruction implements a truncating store to FP stack 385 /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a 386 /// chain operand, value to store, address, and a ValueType to store it 387 /// as. 388 FST, 389 390 /// VAARG_64 - This instruction grabs the address of the next argument 391 /// from a va_list. (reads and modifies the va_list in memory) 392 VAARG_64 393 394 // WARNING: Do not add anything in the end unless you want the node to 395 // have memop! In fact, starting from ATOMADD64_DAG all opcodes will be 396 // thought as target memory ops! 397 }; 398 } 399 400 /// Define some predicates that are used for node matching. 401 namespace X86 { 402 /// isVEXTRACTF128Index - Return true if the specified 403 /// EXTRACT_SUBVECTOR operand specifies a vector extract that is 404 /// suitable for input to VEXTRACTF128. 405 bool isVEXTRACTF128Index(SDNode *N); 406 407 /// isVINSERTF128Index - Return true if the specified 408 /// INSERT_SUBVECTOR operand specifies a subvector insert that is 409 /// suitable for input to VINSERTF128. 410 bool isVINSERTF128Index(SDNode *N); 411 412 /// getExtractVEXTRACTF128Immediate - Return the appropriate 413 /// immediate to extract the specified EXTRACT_SUBVECTOR index 414 /// with VEXTRACTF128 instructions. 415 unsigned getExtractVEXTRACTF128Immediate(SDNode *N); 416 417 /// getInsertVINSERTF128Immediate - Return the appropriate 418 /// immediate to insert at the specified INSERT_SUBVECTOR index 419 /// with VINSERTF128 instructions. 420 unsigned getInsertVINSERTF128Immediate(SDNode *N); 421 422 /// isZeroNode - Returns true if Elt is a constant zero or a floating point 423 /// constant +0.0. 424 bool isZeroNode(SDValue Elt); 425 426 /// isOffsetSuitableForCodeModel - Returns true of the given offset can be 427 /// fit into displacement field of the instruction. 428 bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, 429 bool hasSymbolicDisplacement = true); 430 431 432 /// isCalleePop - Determines whether the callee is required to pop its 433 /// own arguments. Callee pop is necessary to support tail calls. 434 bool isCalleePop(CallingConv::ID CallingConv, 435 bool is64Bit, bool IsVarArg, bool TailCallOpt); 436 } 437 438 //===--------------------------------------------------------------------===// 439 // X86TargetLowering - X86 Implementation of the TargetLowering interface 440 class X86TargetLowering : public TargetLowering { 441 public: 442 explicit X86TargetLowering(X86TargetMachine &TM); 443 444 virtual unsigned getJumpTableEncoding() const; 445 446 virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i8; } 447 448 virtual const MCExpr * 449 LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, 450 const MachineBasicBlock *MBB, unsigned uid, 451 MCContext &Ctx) const; 452 453 /// getPICJumpTableRelocaBase - Returns relocation base for the given PIC 454 /// jumptable. 455 virtual SDValue getPICJumpTableRelocBase(SDValue Table, 456 SelectionDAG &DAG) const; 457 virtual const MCExpr * 458 getPICJumpTableRelocBaseExpr(const MachineFunction *MF, 459 unsigned JTI, MCContext &Ctx) const; 460 461 /// getStackPtrReg - Return the stack pointer register we are using: either 462 /// ESP or RSP. 463 unsigned getStackPtrReg() const { return X86StackPtr; } 464 465 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate 466 /// function arguments in the caller parameter area. For X86, aggregates 467 /// that contains are placed at 16-byte boundaries while the rest are at 468 /// 4-byte boundaries. 469 virtual unsigned getByValTypeAlignment(Type *Ty) const; 470 471 /// getOptimalMemOpType - Returns the target specific optimal type for load 472 /// and store operations as a result of memset, memcpy, and memmove 473 /// lowering. If DstAlign is zero that means it's safe to destination 474 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it 475 /// means there isn't a need to check it against alignment requirement, 476 /// probably because the source does not need to be loaded. If 477 /// 'IsZeroVal' is true, that means it's safe to return a 478 /// non-scalar-integer type, e.g. empty string source, constant, or loaded 479 /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is 480 /// constant so it does not need to be loaded. 481 /// It returns EVT::Other if the type should be determined using generic 482 /// target-independent logic. 483 virtual EVT 484 getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, 485 bool IsZeroVal, bool MemcpyStrSrc, 486 MachineFunction &MF) const; 487 488 /// allowsUnalignedMemoryAccesses - Returns true if the target allows 489 /// unaligned memory accesses. of the specified type. 490 virtual bool allowsUnalignedMemoryAccesses(EVT VT) const { 491 return true; 492 } 493 494 /// LowerOperation - Provide custom lowering hooks for some operations. 495 /// 496 virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; 497 498 /// ReplaceNodeResults - Replace the results of node with an illegal result 499 /// type with new values built out of custom code. 500 /// 501 virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results, 502 SelectionDAG &DAG) const; 503 504 505 virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; 506 507 /// isTypeDesirableForOp - Return true if the target has native support for 508 /// the specified value type and it is 'desirable' to use the type for the 509 /// given node type. e.g. On x86 i16 is legal, but undesirable since i16 510 /// instruction encodings are longer and some i16 instructions are slow. 511 virtual bool isTypeDesirableForOp(unsigned Opc, EVT VT) const; 512 513 /// isTypeDesirable - Return true if the target has native support for the 514 /// specified value type and it is 'desirable' to use the type. e.g. On x86 515 /// i16 is legal, but undesirable since i16 instruction encodings are longer 516 /// and some i16 instructions are slow. 517 virtual bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const; 518 519 virtual MachineBasicBlock * 520 EmitInstrWithCustomInserter(MachineInstr *MI, 521 MachineBasicBlock *MBB) const; 522 523 524 /// getTargetNodeName - This method returns the name of a target specific 525 /// DAG node. 526 virtual const char *getTargetNodeName(unsigned Opcode) const; 527 528 /// getSetCCResultType - Return the value type to use for ISD::SETCC. 529 virtual EVT getSetCCResultType(EVT VT) const; 530 531 /// computeMaskedBitsForTargetNode - Determine which of the bits specified 532 /// in Mask are known to be either zero or one and return them in the 533 /// KnownZero/KnownOne bitsets. 534 virtual void computeMaskedBitsForTargetNode(const SDValue Op, 535 APInt &KnownZero, 536 APInt &KnownOne, 537 const SelectionDAG &DAG, 538 unsigned Depth = 0) const; 539 540 // ComputeNumSignBitsForTargetNode - Determine the number of bits in the 541 // operation that are sign bits. 542 virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, 543 unsigned Depth) const; 544 545 virtual bool 546 isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const; 547 548 SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const; 549 550 virtual bool ExpandInlineAsm(CallInst *CI) const; 551 552 ConstraintType getConstraintType(const std::string &Constraint) const; 553 554 /// Examine constraint string and operand type and determine a weight value. 555 /// The operand object must already have been set up with the operand type. 556 virtual ConstraintWeight getSingleConstraintMatchWeight( 557 AsmOperandInfo &info, const char *constraint) const; 558 559 virtual const char *LowerXConstraint(EVT ConstraintVT) const; 560 561 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 562 /// vector. If it is invalid, don't add anything to Ops. If hasMemory is 563 /// true it means one of the asm constraint of the inline asm instruction 564 /// being processed is 'm'. 565 virtual void LowerAsmOperandForConstraint(SDValue Op, 566 std::string &Constraint, 567 std::vector<SDValue> &Ops, 568 SelectionDAG &DAG) const; 569 570 /// getRegForInlineAsmConstraint - Given a physical register constraint 571 /// (e.g. {edx}), return the register number and the register class for the 572 /// register. This should only be used for C_Register constraints. On 573 /// error, this returns a register number of 0. 574 std::pair<unsigned, const TargetRegisterClass*> 575 getRegForInlineAsmConstraint(const std::string &Constraint, 576 EVT VT) const; 577 578 /// isLegalAddressingMode - Return true if the addressing mode represented 579 /// by AM is legal for this target, for a load/store of the specified type. 580 virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const; 581 582 /// isLegalICmpImmediate - Return true if the specified immediate is legal 583 /// icmp immediate, that is the target has icmp instructions which can 584 /// compare a register against the immediate without having to materialize 585 /// the immediate into a register. 586 virtual bool isLegalICmpImmediate(int64_t Imm) const; 587 588 /// isLegalAddImmediate - Return true if the specified immediate is legal 589 /// add immediate, that is the target has add instructions which can 590 /// add a register and the immediate without having to materialize 591 /// the immediate into a register. 592 virtual bool isLegalAddImmediate(int64_t Imm) const; 593 594 /// isTruncateFree - Return true if it's free to truncate a value of 595 /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in 596 /// register EAX to i16 by referencing its sub-register AX. 597 virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const; 598 virtual bool isTruncateFree(EVT VT1, EVT VT2) const; 599 600 /// isZExtFree - Return true if any actual instruction that defines a 601 /// value of type Ty1 implicit zero-extends the value to Ty2 in the result 602 /// register. This does not necessarily include registers defined in 603 /// unknown ways, such as incoming arguments, or copies from unknown 604 /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this 605 /// does not necessarily apply to truncate instructions. e.g. on x86-64, 606 /// all instructions that define 32-bit values implicit zero-extend the 607 /// result out to 64 bits. 608 virtual bool isZExtFree(Type *Ty1, Type *Ty2) const; 609 virtual bool isZExtFree(EVT VT1, EVT VT2) const; 610 611 /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than 612 /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to 613 /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd 614 /// is expanded to mul + add. 615 virtual bool isFMAFasterThanMulAndAdd(EVT) const { return true; } 616 617 /// isNarrowingProfitable - Return true if it's profitable to narrow 618 /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow 619 /// from i32 to i8 but not from i32 to i16. 620 virtual bool isNarrowingProfitable(EVT VT1, EVT VT2) const; 621 622 /// isFPImmLegal - Returns true if the target can instruction select the 623 /// specified FP immediate natively. If false, the legalizer will 624 /// materialize the FP immediate as a load from a constant pool. 625 virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const; 626 627 /// isShuffleMaskLegal - Targets can use this to indicate that they only 628 /// support *some* VECTOR_SHUFFLE operations, those with specific masks. 629 /// By default, if a target supports the VECTOR_SHUFFLE node, all mask 630 /// values are assumed to be legal. 631 virtual bool isShuffleMaskLegal(const SmallVectorImpl<int> &Mask, 632 EVT VT) const; 633 634 /// isVectorClearMaskLegal - Similar to isShuffleMaskLegal. This is 635 /// used by Targets can use this to indicate if there is a suitable 636 /// VECTOR_SHUFFLE that can be used to replace a VAND with a constant 637 /// pool entry. 638 virtual bool isVectorClearMaskLegal(const SmallVectorImpl<int> &Mask, 639 EVT VT) const; 640 641 /// ShouldShrinkFPConstant - If true, then instruction selection should 642 /// seek to shrink the FP constant of the specified type to a smaller type 643 /// in order to save space and / or reduce runtime. 644 virtual bool ShouldShrinkFPConstant(EVT VT) const { 645 // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more 646 // expensive than a straight movsd. On the other hand, it's important to 647 // shrink long double fp constant since fldt is very slow. 648 return !X86ScalarSSEf64 || VT == MVT::f80; 649 } 650 651 const X86Subtarget* getSubtarget() const { 652 return Subtarget; 653 } 654 655 /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is 656 /// computed in an SSE register, not on the X87 floating point stack. 657 bool isScalarFPTypeInSSEReg(EVT VT) const { 658 return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2 659 (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 660 } 661 662 /// isTargetFTOL - Return true if the target uses the MSVC _ftol2 routine 663 /// for fptoui. 664 bool isTargetFTOL() const { 665 return Subtarget->isTargetWindows() && !Subtarget->is64Bit(); 666 } 667 668 /// isIntegerTypeFTOL - Return true if the MSVC _ftol2 routine should be 669 /// used for fptoui to the given type. 670 bool isIntegerTypeFTOL(EVT VT) const { 671 return isTargetFTOL() && VT == MVT::i64; 672 } 673 674 /// createFastISel - This method returns a target specific FastISel object, 675 /// or null if the target does not support "fast" ISel. 676 virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 677 const TargetLibraryInfo *libInfo) const; 678 679 /// getStackCookieLocation - Return true if the target stores stack 680 /// protector cookies at a fixed offset in some non-standard address 681 /// space, and populates the address space and offset as 682 /// appropriate. 683 virtual bool getStackCookieLocation(unsigned &AddressSpace, unsigned &Offset) const; 684 685 SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot, 686 SelectionDAG &DAG) const; 687 688 protected: 689 std::pair<const TargetRegisterClass*, uint8_t> 690 findRepresentativeClass(EVT VT) const; 691 692 private: 693 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can 694 /// make the right decision when generating code for different targets. 695 const X86Subtarget *Subtarget; 696 const X86RegisterInfo *RegInfo; 697 const TargetData *TD; 698 699 /// X86StackPtr - X86 physical register used as stack ptr. 700 unsigned X86StackPtr; 701 702 /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87 703 /// floating point ops. 704 /// When SSE is available, use it for f32 operations. 705 /// When SSE2 is available, use it for f64 operations. 706 bool X86ScalarSSEf32; 707 bool X86ScalarSSEf64; 708 709 /// LegalFPImmediates - A list of legal fp immediates. 710 std::vector<APFloat> LegalFPImmediates; 711 712 /// addLegalFPImmediate - Indicate that this x86 target can instruction 713 /// select the specified FP immediate natively. 714 void addLegalFPImmediate(const APFloat& Imm) { 715 LegalFPImmediates.push_back(Imm); 716 } 717 718 SDValue LowerCallResult(SDValue Chain, SDValue InFlag, 719 CallingConv::ID CallConv, bool isVarArg, 720 const SmallVectorImpl<ISD::InputArg> &Ins, 721 DebugLoc dl, SelectionDAG &DAG, 722 SmallVectorImpl<SDValue> &InVals) const; 723 SDValue LowerMemArgument(SDValue Chain, 724 CallingConv::ID CallConv, 725 const SmallVectorImpl<ISD::InputArg> &ArgInfo, 726 DebugLoc dl, SelectionDAG &DAG, 727 const CCValAssign &VA, MachineFrameInfo *MFI, 728 unsigned i) const; 729 SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, 730 DebugLoc dl, SelectionDAG &DAG, 731 const CCValAssign &VA, 732 ISD::ArgFlagsTy Flags) const; 733 734 // Call lowering helpers. 735 736 /// IsEligibleForTailCallOptimization - Check whether the call is eligible 737 /// for tail call optimization. Targets which want to do tail call 738 /// optimization should implement this function. 739 bool IsEligibleForTailCallOptimization(SDValue Callee, 740 CallingConv::ID CalleeCC, 741 bool isVarArg, 742 bool isCalleeStructRet, 743 bool isCallerStructRet, 744 const SmallVectorImpl<ISD::OutputArg> &Outs, 745 const SmallVectorImpl<SDValue> &OutVals, 746 const SmallVectorImpl<ISD::InputArg> &Ins, 747 SelectionDAG& DAG) const; 748 bool IsCalleePop(bool isVarArg, CallingConv::ID CallConv) const; 749 SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr, 750 SDValue Chain, bool IsTailCall, bool Is64Bit, 751 int FPDiff, DebugLoc dl) const; 752 753 unsigned GetAlignedArgumentStackSize(unsigned StackSize, 754 SelectionDAG &DAG) const; 755 756 std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, 757 bool isSigned, 758 bool isReplace) const; 759 760 SDValue LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl, 761 SelectionDAG &DAG) const; 762 SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; 763 SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const; 764 SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; 765 SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 766 SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const; 767 SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; 768 SDValue LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) const; 769 SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; 770 SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 771 SDValue LowerINSERT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const; 772 SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; 773 SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; 774 SDValue LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl, 775 int64_t Offset, SelectionDAG &DAG) const; 776 SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; 777 SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; 778 SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; 779 SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) const; 780 SDValue LowerBITCAST(SDValue op, SelectionDAG &DAG) const; 781 SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 782 SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; 783 SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const; 784 SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const; 785 SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const; 786 SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const; 787 SDValue LowerFABS(SDValue Op, SelectionDAG &DAG) const; 788 SDValue LowerFNEG(SDValue Op, SelectionDAG &DAG) const; 789 SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; 790 SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) const; 791 SDValue LowerToBT(SDValue And, ISD::CondCode CC, 792 DebugLoc dl, SelectionDAG &DAG) const; 793 SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const; 794 SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) const; 795 SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; 796 SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; 797 SDValue LowerMEMSET(SDValue Op, SelectionDAG &DAG) const; 798 SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; 799 SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; 800 SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; 801 SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; 802 SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const; 803 SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; 804 SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; 805 SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; 806 SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; 807 SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const; 808 SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const; 809 SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; 810 SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const; 811 SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const; 812 SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const; 813 SDValue LowerCTLZ_ZERO_UNDEF(SDValue Op, SelectionDAG &DAG) const; 814 SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG) const; 815 SDValue LowerADD(SDValue Op, SelectionDAG &DAG) const; 816 SDValue LowerSUB(SDValue Op, SelectionDAG &DAG) const; 817 SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; 818 SDValue LowerShift(SDValue Op, SelectionDAG &DAG) const; 819 SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const; 820 821 SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG) const; 822 SDValue LowerLOAD_SUB(SDValue Op, SelectionDAG &DAG) const; 823 SDValue LowerREADCYCLECOUNTER(SDValue Op, SelectionDAG &DAG) const; 824 SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG) const; 825 SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const; 826 SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; 827 SDValue PerformTruncateCombine(SDNode* N, SelectionDAG &DAG, DAGCombinerInfo &DCI) const; 828 829 // Utility functions to help LowerVECTOR_SHUFFLE 830 SDValue LowerVECTOR_SHUFFLEv8i16(SDValue Op, SelectionDAG &DAG) const; 831 SDValue LowerVectorBroadcast(SDValue &Op, SelectionDAG &DAG) const; 832 SDValue NormalizeVectorShuffle(SDValue Op, SelectionDAG &DAG) const; 833 834 SDValue LowerVectorFpExtend(SDValue &Op, SelectionDAG &DAG) const; 835 836 virtual SDValue 837 LowerFormalArguments(SDValue Chain, 838 CallingConv::ID CallConv, bool isVarArg, 839 const SmallVectorImpl<ISD::InputArg> &Ins, 840 DebugLoc dl, SelectionDAG &DAG, 841 SmallVectorImpl<SDValue> &InVals) const; 842 virtual SDValue 843 LowerCall(CallLoweringInfo &CLI, 844 SmallVectorImpl<SDValue> &InVals) const; 845 846 virtual SDValue 847 LowerReturn(SDValue Chain, 848 CallingConv::ID CallConv, bool isVarArg, 849 const SmallVectorImpl<ISD::OutputArg> &Outs, 850 const SmallVectorImpl<SDValue> &OutVals, 851 DebugLoc dl, SelectionDAG &DAG) const; 852 853 virtual bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const; 854 855 virtual bool mayBeEmittedAsTailCall(CallInst *CI) const; 856 857 virtual EVT 858 getTypeForExtArgOrReturn(LLVMContext &Context, EVT VT, 859 ISD::NodeType ExtendKind) const; 860 861 virtual bool 862 CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, 863 bool isVarArg, 864 const SmallVectorImpl<ISD::OutputArg> &Outs, 865 LLVMContext &Context) const; 866 867 /// Utility function to emit string processing sse4.2 instructions 868 /// that return in xmm0. 869 /// This takes the instruction to expand, the associated machine basic 870 /// block, the number of args, and whether or not the second arg is 871 /// in memory or not. 872 MachineBasicBlock *EmitPCMP(MachineInstr *BInstr, MachineBasicBlock *BB, 873 unsigned argNum, bool inMem) const; 874 875 /// Utility functions to emit monitor and mwait instructions. These 876 /// need to make sure that the arguments to the intrinsic are in the 877 /// correct registers. 878 MachineBasicBlock *EmitMonitor(MachineInstr *MI, 879 MachineBasicBlock *BB) const; 880 MachineBasicBlock *EmitMwait(MachineInstr *MI, MachineBasicBlock *BB) const; 881 882 /// Utility function to emit atomic bitwise operations (and, or, xor). 883 /// It takes the bitwise instruction to expand, the associated machine basic 884 /// block, and the associated X86 opcodes for reg/reg and reg/imm. 885 MachineBasicBlock *EmitAtomicBitwiseWithCustomInserter( 886 MachineInstr *BInstr, 887 MachineBasicBlock *BB, 888 unsigned regOpc, 889 unsigned immOpc, 890 unsigned loadOpc, 891 unsigned cxchgOpc, 892 unsigned notOpc, 893 unsigned EAXreg, 894 const TargetRegisterClass *RC, 895 bool Invert = false) const; 896 897 MachineBasicBlock *EmitAtomicBit6432WithCustomInserter( 898 MachineInstr *BInstr, 899 MachineBasicBlock *BB, 900 unsigned regOpcL, 901 unsigned regOpcH, 902 unsigned immOpcL, 903 unsigned immOpcH, 904 bool Invert = false) const; 905 906 /// Utility function to emit atomic min and max. It takes the min/max 907 /// instruction to expand, the associated basic block, and the associated 908 /// cmov opcode for moving the min or max value. 909 MachineBasicBlock *EmitAtomicMinMaxWithCustomInserter(MachineInstr *BInstr, 910 MachineBasicBlock *BB, 911 unsigned cmovOpc) const; 912 913 // Utility function to emit the low-level va_arg code for X86-64. 914 MachineBasicBlock *EmitVAARG64WithCustomInserter( 915 MachineInstr *MI, 916 MachineBasicBlock *MBB) const; 917 918 /// Utility function to emit the xmm reg save portion of va_start. 919 MachineBasicBlock *EmitVAStartSaveXMMRegsWithCustomInserter( 920 MachineInstr *BInstr, 921 MachineBasicBlock *BB) const; 922 923 MachineBasicBlock *EmitLoweredSelect(MachineInstr *I, 924 MachineBasicBlock *BB) const; 925 926 MachineBasicBlock *EmitLoweredWinAlloca(MachineInstr *MI, 927 MachineBasicBlock *BB) const; 928 929 MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr *MI, 930 MachineBasicBlock *BB, 931 bool Is64Bit) const; 932 933 MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI, 934 MachineBasicBlock *BB) const; 935 936 MachineBasicBlock *emitLoweredTLSAddr(MachineInstr *MI, 937 MachineBasicBlock *BB) const; 938 939 /// Emit nodes that will be selected as "test Op0,Op0", or something 940 /// equivalent, for use with the given x86 condition code. 941 SDValue EmitTest(SDValue Op0, unsigned X86CC, SelectionDAG &DAG) const; 942 943 /// Emit nodes that will be selected as "cmp Op0,Op1", or something 944 /// equivalent, for use with the given x86 condition code. 945 SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, 946 SelectionDAG &DAG) const; 947 948 /// Convert a comparison if required by the subtarget. 949 SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const; 950 }; 951 952 namespace X86 { 953 FastISel *createFastISel(FunctionLoweringInfo &funcInfo, 954 const TargetLibraryInfo *libInfo); 955 } 956} 957 958#endif // X86ISELLOWERING_H 959