1//===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that X86 uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
15#define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
16
17#include "llvm/CodeGen/TargetLowering.h"
18
19namespace llvm {
20  class X86Subtarget;
21  class X86TargetMachine;
22
23  namespace X86ISD {
24    // X86 Specific DAG Nodes
25  enum NodeType : unsigned {
26    // Start the numbering where the builtin ops leave off.
27    FIRST_NUMBER = ISD::BUILTIN_OP_END,
28
29    /// Bit scan forward.
30    BSF,
31    /// Bit scan reverse.
32    BSR,
33
34    /// X86 funnel/double shift i16 instructions. These correspond to
35    /// X86::SHLDW and X86::SHRDW instructions which have different amt
36    /// modulo rules to generic funnel shifts.
37    /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD.
38    FSHL,
39    FSHR,
40
41    /// Bitwise logical AND of floating point values. This corresponds
42    /// to X86::ANDPS or X86::ANDPD.
43    FAND,
44
45    /// Bitwise logical OR of floating point values. This corresponds
46    /// to X86::ORPS or X86::ORPD.
47    FOR,
48
49    /// Bitwise logical XOR of floating point values. This corresponds
50    /// to X86::XORPS or X86::XORPD.
51    FXOR,
52
53    ///  Bitwise logical ANDNOT of floating point values. This
54    /// corresponds to X86::ANDNPS or X86::ANDNPD.
55    FANDN,
56
57    /// These operations represent an abstract X86 call
58    /// instruction, which includes a bunch of information.  In particular the
59    /// operands of these node are:
60    ///
61    ///     #0 - The incoming token chain
62    ///     #1 - The callee
63    ///     #2 - The number of arg bytes the caller pushes on the stack.
64    ///     #3 - The number of arg bytes the callee pops off the stack.
65    ///     #4 - The value to pass in AL/AX/EAX (optional)
66    ///     #5 - The value to pass in DL/DX/EDX (optional)
67    ///
68    /// The result values of these nodes are:
69    ///
70    ///     #0 - The outgoing token chain
71    ///     #1 - The first register result value (optional)
72    ///     #2 - The second register result value (optional)
73    ///
74    CALL,
75
76    /// Same as call except it adds the NoTrack prefix.
77    NT_CALL,
78
79    // Pseudo for a OBJC call that gets emitted together with a special
80    // marker instruction.
81    CALL_RVMARKER,
82
83    /// X86 compare and logical compare instructions.
84    CMP,
85    FCMP,
86    COMI,
87    UCOMI,
88
89    /// X86 bit-test instructions.
90    BT,
91
92    /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
93    /// operand, usually produced by a CMP instruction.
94    SETCC,
95
96    /// X86 Select
97    SELECTS,
98
99    // Same as SETCC except it's materialized with a sbb and the value is all
100    // one's or all zero's.
101    SETCC_CARRY, // R = carry_bit ? ~0 : 0
102
103    /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
104    /// Operands are two FP values to compare; result is a mask of
105    /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
106    FSETCC,
107
108    /// X86 FP SETCC, similar to above, but with output as an i1 mask and
109    /// and a version with SAE.
110    FSETCCM,
111    FSETCCM_SAE,
112
113    /// X86 conditional moves. Operand 0 and operand 1 are the two values
114    /// to select from. Operand 2 is the condition code, and operand 3 is the
115    /// flag operand produced by a CMP or TEST instruction.
116    CMOV,
117
118    /// X86 conditional branches. Operand 0 is the chain operand, operand 1
119    /// is the block to branch if condition is true, operand 2 is the
120    /// condition code, and operand 3 is the flag operand produced by a CMP
121    /// or TEST instruction.
122    BRCOND,
123
124    /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
125    /// operand 1 is the target address.
126    NT_BRIND,
127
128    /// Return with a flag operand. Operand 0 is the chain operand, operand
129    /// 1 is the number of bytes of stack to pop.
130    RET_FLAG,
131
132    /// Return from interrupt. Operand 0 is the number of bytes to pop.
133    IRET,
134
135    /// Repeat fill, corresponds to X86::REP_STOSx.
136    REP_STOS,
137
138    /// Repeat move, corresponds to X86::REP_MOVSx.
139    REP_MOVS,
140
141    /// On Darwin, this node represents the result of the popl
142    /// at function entry, used for PIC code.
143    GlobalBaseReg,
144
145    /// A wrapper node for TargetConstantPool, TargetJumpTable,
146    /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
147    /// MCSymbol and TargetBlockAddress.
148    Wrapper,
149
150    /// Special wrapper used under X86-64 PIC mode for RIP
151    /// relative displacements.
152    WrapperRIP,
153
154    /// Copies a 64-bit value from an MMX vector to the low word
155    /// of an XMM vector, with the high word zero filled.
156    MOVQ2DQ,
157
158    /// Copies a 64-bit value from the low word of an XMM vector
159    /// to an MMX vector.
160    MOVDQ2Q,
161
162    /// Copies a 32-bit value from the low word of a MMX
163    /// vector to a GPR.
164    MMX_MOVD2W,
165
166    /// Copies a GPR into the low 32-bit word of a MMX vector
167    /// and zero out the high word.
168    MMX_MOVW2D,
169
170    /// Extract an 8-bit value from a vector and zero extend it to
171    /// i32, corresponds to X86::PEXTRB.
172    PEXTRB,
173
174    /// Extract a 16-bit value from a vector and zero extend it to
175    /// i32, corresponds to X86::PEXTRW.
176    PEXTRW,
177
178    /// Insert any element of a 4 x float vector into any element
179    /// of a destination 4 x floatvector.
180    INSERTPS,
181
182    /// Insert the lower 8-bits of a 32-bit value to a vector,
183    /// corresponds to X86::PINSRB.
184    PINSRB,
185
186    /// Insert the lower 16-bits of a 32-bit value to a vector,
187    /// corresponds to X86::PINSRW.
188    PINSRW,
189
190    /// Shuffle 16 8-bit values within a vector.
191    PSHUFB,
192
193    /// Compute Sum of Absolute Differences.
194    PSADBW,
195    /// Compute Double Block Packed Sum-Absolute-Differences
196    DBPSADBW,
197
198    /// Bitwise Logical AND NOT of Packed FP values.
199    ANDNP,
200
201    /// Blend where the selector is an immediate.
202    BLENDI,
203
204    /// Dynamic (non-constant condition) vector blend where only the sign bits
205    /// of the condition elements are used. This is used to enforce that the
206    /// condition mask is not valid for generic VSELECT optimizations. This
207    /// is also used to implement the intrinsics.
208    /// Operands are in VSELECT order: MASK, TRUE, FALSE
209    BLENDV,
210
211    /// Combined add and sub on an FP vector.
212    ADDSUB,
213
214    //  FP vector ops with rounding mode.
215    FADD_RND,
216    FADDS,
217    FADDS_RND,
218    FSUB_RND,
219    FSUBS,
220    FSUBS_RND,
221    FMUL_RND,
222    FMULS,
223    FMULS_RND,
224    FDIV_RND,
225    FDIVS,
226    FDIVS_RND,
227    FMAX_SAE,
228    FMAXS_SAE,
229    FMIN_SAE,
230    FMINS_SAE,
231    FSQRT_RND,
232    FSQRTS,
233    FSQRTS_RND,
234
235    // FP vector get exponent.
236    FGETEXP,
237    FGETEXP_SAE,
238    FGETEXPS,
239    FGETEXPS_SAE,
240    // Extract Normalized Mantissas.
241    VGETMANT,
242    VGETMANT_SAE,
243    VGETMANTS,
244    VGETMANTS_SAE,
245    // FP Scale.
246    SCALEF,
247    SCALEF_RND,
248    SCALEFS,
249    SCALEFS_RND,
250
251    // Unsigned Integer average.
252    AVG,
253
254    /// Integer horizontal add/sub.
255    HADD,
256    HSUB,
257
258    /// Floating point horizontal add/sub.
259    FHADD,
260    FHSUB,
261
262    // Detect Conflicts Within a Vector
263    CONFLICT,
264
265    /// Floating point max and min.
266    FMAX,
267    FMIN,
268
269    /// Commutative FMIN and FMAX.
270    FMAXC,
271    FMINC,
272
273    /// Scalar intrinsic floating point max and min.
274    FMAXS,
275    FMINS,
276
277    /// Floating point reciprocal-sqrt and reciprocal approximation.
278    /// Note that these typically require refinement
279    /// in order to obtain suitable precision.
280    FRSQRT,
281    FRCP,
282
283    // AVX-512 reciprocal approximations with a little more precision.
284    RSQRT14,
285    RSQRT14S,
286    RCP14,
287    RCP14S,
288
289    // Thread Local Storage.
290    TLSADDR,
291
292    // Thread Local Storage. A call to get the start address
293    // of the TLS block for the current module.
294    TLSBASEADDR,
295
296    // Thread Local Storage.  When calling to an OS provided
297    // thunk at the address from an earlier relocation.
298    TLSCALL,
299
300    // Exception Handling helpers.
301    EH_RETURN,
302
303    // SjLj exception handling setjmp.
304    EH_SJLJ_SETJMP,
305
306    // SjLj exception handling longjmp.
307    EH_SJLJ_LONGJMP,
308
309    // SjLj exception handling dispatch.
310    EH_SJLJ_SETUP_DISPATCH,
311
312    /// Tail call return. See X86TargetLowering::LowerCall for
313    /// the list of operands.
314    TC_RETURN,
315
316    // Vector move to low scalar and zero higher vector elements.
317    VZEXT_MOVL,
318
319    // Vector integer truncate.
320    VTRUNC,
321    // Vector integer truncate with unsigned/signed saturation.
322    VTRUNCUS,
323    VTRUNCS,
324
325    // Masked version of the above. Used when less than a 128-bit result is
326    // produced since the mask only applies to the lower elements and can't
327    // be represented by a select.
328    // SRC, PASSTHRU, MASK
329    VMTRUNC,
330    VMTRUNCUS,
331    VMTRUNCS,
332
333    // Vector FP extend.
334    VFPEXT,
335    VFPEXT_SAE,
336    VFPEXTS,
337    VFPEXTS_SAE,
338
339    // Vector FP round.
340    VFPROUND,
341    VFPROUND_RND,
342    VFPROUNDS,
343    VFPROUNDS_RND,
344
345    // Masked version of above. Used for v2f64->v4f32.
346    // SRC, PASSTHRU, MASK
347    VMFPROUND,
348
349    // 128-bit vector logical left / right shift
350    VSHLDQ,
351    VSRLDQ,
352
353    // Vector shift elements
354    VSHL,
355    VSRL,
356    VSRA,
357
358    // Vector variable shift
359    VSHLV,
360    VSRLV,
361    VSRAV,
362
363    // Vector shift elements by immediate
364    VSHLI,
365    VSRLI,
366    VSRAI,
367
368    // Shifts of mask registers.
369    KSHIFTL,
370    KSHIFTR,
371
372    // Bit rotate by immediate
373    VROTLI,
374    VROTRI,
375
376    // Vector packed double/float comparison.
377    CMPP,
378
379    // Vector integer comparisons.
380    PCMPEQ,
381    PCMPGT,
382
383    // v8i16 Horizontal minimum and position.
384    PHMINPOS,
385
386    MULTISHIFT,
387
388    /// Vector comparison generating mask bits for fp and
389    /// integer signed and unsigned data types.
390    CMPM,
391    // Vector mask comparison generating mask bits for FP values.
392    CMPMM,
393    // Vector mask comparison with SAE for FP values.
394    CMPMM_SAE,
395
396    // Arithmetic operations with FLAGS results.
397    ADD,
398    SUB,
399    ADC,
400    SBB,
401    SMUL,
402    UMUL,
403    OR,
404    XOR,
405    AND,
406
407    // Bit field extract.
408    BEXTR,
409    BEXTRI,
410
411    // Zero High Bits Starting with Specified Bit Position.
412    BZHI,
413
414    // Parallel extract and deposit.
415    PDEP,
416    PEXT,
417
418    // X86-specific multiply by immediate.
419    MUL_IMM,
420
421    // Vector sign bit extraction.
422    MOVMSK,
423
424    // Vector bitwise comparisons.
425    PTEST,
426
427    // Vector packed fp sign bitwise comparisons.
428    TESTP,
429
430    // OR/AND test for masks.
431    KORTEST,
432    KTEST,
433
434    // ADD for masks.
435    KADD,
436
437    // Several flavors of instructions with vector shuffle behaviors.
438    // Saturated signed/unnsigned packing.
439    PACKSS,
440    PACKUS,
441    // Intra-lane alignr.
442    PALIGNR,
443    // AVX512 inter-lane alignr.
444    VALIGN,
445    PSHUFD,
446    PSHUFHW,
447    PSHUFLW,
448    SHUFP,
449    // VBMI2 Concat & Shift.
450    VSHLD,
451    VSHRD,
452    VSHLDV,
453    VSHRDV,
454    // Shuffle Packed Values at 128-bit granularity.
455    SHUF128,
456    MOVDDUP,
457    MOVSHDUP,
458    MOVSLDUP,
459    MOVLHPS,
460    MOVHLPS,
461    MOVSD,
462    MOVSS,
463    UNPCKL,
464    UNPCKH,
465    VPERMILPV,
466    VPERMILPI,
467    VPERMI,
468    VPERM2X128,
469
470    // Variable Permute (VPERM).
471    // Res = VPERMV MaskV, V0
472    VPERMV,
473
474    // 3-op Variable Permute (VPERMT2).
475    // Res = VPERMV3 V0, MaskV, V1
476    VPERMV3,
477
478    // Bitwise ternary logic.
479    VPTERNLOG,
480    // Fix Up Special Packed Float32/64 values.
481    VFIXUPIMM,
482    VFIXUPIMM_SAE,
483    VFIXUPIMMS,
484    VFIXUPIMMS_SAE,
485    // Range Restriction Calculation For Packed Pairs of Float32/64 values.
486    VRANGE,
487    VRANGE_SAE,
488    VRANGES,
489    VRANGES_SAE,
490    // Reduce - Perform Reduction Transformation on scalar\packed FP.
491    VREDUCE,
492    VREDUCE_SAE,
493    VREDUCES,
494    VREDUCES_SAE,
495    // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
496    // Also used by the legacy (V)ROUND intrinsics where we mask out the
497    // scaling part of the immediate.
498    VRNDSCALE,
499    VRNDSCALE_SAE,
500    VRNDSCALES,
501    VRNDSCALES_SAE,
502    // Tests Types Of a FP Values for packed types.
503    VFPCLASS,
504    // Tests Types Of a FP Values for scalar types.
505    VFPCLASSS,
506
507    // Broadcast (splat) scalar or element 0 of a vector. If the operand is
508    // a vector, this node may change the vector length as part of the splat.
509    VBROADCAST,
510    // Broadcast mask to vector.
511    VBROADCASTM,
512
513    /// SSE4A Extraction and Insertion.
514    EXTRQI,
515    INSERTQI,
516
517    // XOP arithmetic/logical shifts.
518    VPSHA,
519    VPSHL,
520    // XOP signed/unsigned integer comparisons.
521    VPCOM,
522    VPCOMU,
523    // XOP packed permute bytes.
524    VPPERM,
525    // XOP two source permutation.
526    VPERMIL2,
527
528    // Vector multiply packed unsigned doubleword integers.
529    PMULUDQ,
530    // Vector multiply packed signed doubleword integers.
531    PMULDQ,
532    // Vector Multiply Packed UnsignedIntegers with Round and Scale.
533    MULHRS,
534
535    // Multiply and Add Packed Integers.
536    VPMADDUBSW,
537    VPMADDWD,
538
539    // AVX512IFMA multiply and add.
540    // NOTE: These are different than the instruction and perform
541    // op0 x op1 + op2.
542    VPMADD52L,
543    VPMADD52H,
544
545    // VNNI
546    VPDPBUSD,
547    VPDPBUSDS,
548    VPDPWSSD,
549    VPDPWSSDS,
550
551    // FMA nodes.
552    // We use the target independent ISD::FMA for the non-inverted case.
553    FNMADD,
554    FMSUB,
555    FNMSUB,
556    FMADDSUB,
557    FMSUBADD,
558
559    // FMA with rounding mode.
560    FMADD_RND,
561    FNMADD_RND,
562    FMSUB_RND,
563    FNMSUB_RND,
564    FMADDSUB_RND,
565    FMSUBADD_RND,
566
567    // Compress and expand.
568    COMPRESS,
569    EXPAND,
570
571    // Bits shuffle
572    VPSHUFBITQMB,
573
574    // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
575    SINT_TO_FP_RND,
576    UINT_TO_FP_RND,
577    SCALAR_SINT_TO_FP,
578    SCALAR_UINT_TO_FP,
579    SCALAR_SINT_TO_FP_RND,
580    SCALAR_UINT_TO_FP_RND,
581
582    // Vector float/double to signed/unsigned integer.
583    CVTP2SI,
584    CVTP2UI,
585    CVTP2SI_RND,
586    CVTP2UI_RND,
587    // Scalar float/double to signed/unsigned integer.
588    CVTS2SI,
589    CVTS2UI,
590    CVTS2SI_RND,
591    CVTS2UI_RND,
592
593    // Vector float/double to signed/unsigned integer with truncation.
594    CVTTP2SI,
595    CVTTP2UI,
596    CVTTP2SI_SAE,
597    CVTTP2UI_SAE,
598    // Scalar float/double to signed/unsigned integer with truncation.
599    CVTTS2SI,
600    CVTTS2UI,
601    CVTTS2SI_SAE,
602    CVTTS2UI_SAE,
603
604    // Vector signed/unsigned integer to float/double.
605    CVTSI2P,
606    CVTUI2P,
607
608    // Masked versions of above. Used for v2f64->v4f32.
609    // SRC, PASSTHRU, MASK
610    MCVTP2SI,
611    MCVTP2UI,
612    MCVTTP2SI,
613    MCVTTP2UI,
614    MCVTSI2P,
615    MCVTUI2P,
616
617    // Vector float to bfloat16.
618    // Convert TWO packed single data to one packed BF16 data
619    CVTNE2PS2BF16,
620    // Convert packed single data to packed BF16 data
621    CVTNEPS2BF16,
622    // Masked version of above.
623    // SRC, PASSTHRU, MASK
624    MCVTNEPS2BF16,
625
626    // Dot product of BF16 pairs to accumulated into
627    // packed single precision.
628    DPBF16PS,
629
630    // Save xmm argument registers to the stack, according to %al. An operator
631    // is needed so that this can be expanded with control flow.
632    VASTART_SAVE_XMM_REGS,
633
634    // Windows's _chkstk call to do stack probing.
635    WIN_ALLOCA,
636
637    // For allocating variable amounts of stack space when using
638    // segmented stacks. Check if the current stacklet has enough space, and
639    // falls back to heap allocation if not.
640    SEG_ALLOCA,
641
642    // For allocating stack space when using stack clash protector.
643    // Allocation is performed by block, and each block is probed.
644    PROBED_ALLOCA,
645
646    // Memory barriers.
647    MEMBARRIER,
648    MFENCE,
649
650    // Get a random integer and indicate whether it is valid in CF.
651    RDRAND,
652
653    // Get a NIST SP800-90B & C compliant random integer and
654    // indicate whether it is valid in CF.
655    RDSEED,
656
657    // Protection keys
658    // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
659    // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
660    // value for ECX.
661    RDPKRU,
662    WRPKRU,
663
664    // SSE42 string comparisons.
665    // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
666    // will emit one or two instructions based on which results are used. If
667    // flags and index/mask this allows us to use a single instruction since
668    // we won't have to pick and opcode for flags. Instead we can rely on the
669    // DAG to CSE everything and decide at isel.
670    PCMPISTR,
671    PCMPESTR,
672
673    // Test if in transactional execution.
674    XTEST,
675
676    // ERI instructions.
677    RSQRT28,
678    RSQRT28_SAE,
679    RSQRT28S,
680    RSQRT28S_SAE,
681    RCP28,
682    RCP28_SAE,
683    RCP28S,
684    RCP28S_SAE,
685    EXP2,
686    EXP2_SAE,
687
688    // Conversions between float and half-float.
689    CVTPS2PH,
690    CVTPH2PS,
691    CVTPH2PS_SAE,
692
693    // Masked version of above.
694    // SRC, RND, PASSTHRU, MASK
695    MCVTPS2PH,
696
697    // Galois Field Arithmetic Instructions
698    GF2P8AFFINEINVQB,
699    GF2P8AFFINEQB,
700    GF2P8MULB,
701
702    // LWP insert record.
703    LWPINS,
704
705    // User level wait
706    UMWAIT,
707    TPAUSE,
708
709    // Enqueue Stores Instructions
710    ENQCMD,
711    ENQCMDS,
712
713    // For avx512-vp2intersect
714    VP2INTERSECT,
715
716    // User level interrupts - testui
717    TESTUI,
718
719    /// X86 strict FP compare instructions.
720    STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
721    STRICT_FCMPS,
722
723    // Vector packed double/float comparison.
724    STRICT_CMPP,
725
726    /// Vector comparison generating mask bits for fp and
727    /// integer signed and unsigned data types.
728    STRICT_CMPM,
729
730    // Vector float/double to signed/unsigned integer with truncation.
731    STRICT_CVTTP2SI,
732    STRICT_CVTTP2UI,
733
734    // Vector FP extend.
735    STRICT_VFPEXT,
736
737    // Vector FP round.
738    STRICT_VFPROUND,
739
740    // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
741    // Also used by the legacy (V)ROUND intrinsics where we mask out the
742    // scaling part of the immediate.
743    STRICT_VRNDSCALE,
744
745    // Vector signed/unsigned integer to float/double.
746    STRICT_CVTSI2P,
747    STRICT_CVTUI2P,
748
749    // Strict FMA nodes.
750    STRICT_FNMADD,
751    STRICT_FMSUB,
752    STRICT_FNMSUB,
753
754    // Conversions between float and half-float.
755    STRICT_CVTPS2PH,
756    STRICT_CVTPH2PS,
757
758    // WARNING: Only add nodes here if they are stric FP nodes. Non-memory and
759    // non-strict FP nodes should be above FIRST_TARGET_STRICTFP_OPCODE.
760
761    // Compare and swap.
762    LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
763    LCMPXCHG8_DAG,
764    LCMPXCHG16_DAG,
765    LCMPXCHG16_SAVE_RBX_DAG,
766
767    /// LOCK-prefixed arithmetic read-modify-write instructions.
768    /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
769    LADD,
770    LSUB,
771    LOR,
772    LXOR,
773    LAND,
774
775    // Load, scalar_to_vector, and zero extend.
776    VZEXT_LOAD,
777
778    // extract_vector_elt, store.
779    VEXTRACT_STORE,
780
781    // scalar broadcast from memory.
782    VBROADCAST_LOAD,
783
784    // subvector broadcast from memory.
785    SUBV_BROADCAST_LOAD,
786
787    // Store FP control word into i16 memory.
788    FNSTCW16m,
789
790    // Load FP control word from i16 memory.
791    FLDCW16m,
792
793    /// This instruction implements FP_TO_SINT with the
794    /// integer destination in memory and a FP reg source.  This corresponds
795    /// to the X86::FIST*m instructions and the rounding mode change stuff. It
796    /// has two inputs (token chain and address) and two outputs (int value
797    /// and token chain). Memory VT specifies the type to store to.
798    FP_TO_INT_IN_MEM,
799
800    /// This instruction implements SINT_TO_FP with the
801    /// integer source in memory and FP reg result.  This corresponds to the
802    /// X86::FILD*m instructions. It has two inputs (token chain and address)
803    /// and two outputs (FP value and token chain). The integer source type is
804    /// specified by the memory VT.
805    FILD,
806
807    /// This instruction implements a fp->int store from FP stack
808    /// slots. This corresponds to the fist instruction. It takes a
809    /// chain operand, value to store, address, and glue. The memory VT
810    /// specifies the type to store as.
811    FIST,
812
813    /// This instruction implements an extending load to FP stack slots.
814    /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
815    /// operand, and ptr to load from. The memory VT specifies the type to
816    /// load from.
817    FLD,
818
819    /// This instruction implements a truncating store from FP stack
820    /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
821    /// chain operand, value to store, address, and glue. The memory VT
822    /// specifies the type to store as.
823    FST,
824
825    /// These instructions grab the address of the next argument
826    /// from a va_list. (reads and modifies the va_list in memory)
827    VAARG_64,
828    VAARG_X32,
829
830    // Vector truncating store with unsigned/signed saturation
831    VTRUNCSTOREUS,
832    VTRUNCSTORES,
833    // Vector truncating masked store with unsigned/signed saturation
834    VMTRUNCSTOREUS,
835    VMTRUNCSTORES,
836
837    // X86 specific gather and scatter
838    MGATHER,
839    MSCATTER,
840
841    // Key locker nodes that produce flags.
842    AESENC128KL,
843    AESDEC128KL,
844    AESENC256KL,
845    AESDEC256KL,
846    AESENCWIDE128KL,
847    AESDECWIDE128KL,
848    AESENCWIDE256KL,
849    AESDECWIDE256KL,
850
851    // WARNING: Do not add anything in the end unless you want the node to
852    // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
853    // opcodes will be thought as target memory ops!
854  };
855  } // end namespace X86ISD
856
857  namespace X86 {
858    /// Current rounding mode is represented in bits 11:10 of FPSR. These
859    /// values are same as corresponding constants for rounding mode used
860    /// in glibc.
861    enum RoundingMode {
862      rmToNearest   = 0,        // FE_TONEAREST
863      rmDownward    = 1 << 10,  // FE_DOWNWARD
864      rmUpward      = 2 << 10,  // FE_UPWARD
865      rmTowardZero  = 3 << 10,  // FE_TOWARDZERO
866      rmMask        = 3 << 10   // Bit mask selecting rounding mode
867    };
868  }
869
870  /// Define some predicates that are used for node matching.
871  namespace X86 {
872    /// Returns true if Elt is a constant zero or floating point constant +0.0.
873    bool isZeroNode(SDValue Elt);
874
875    /// Returns true of the given offset can be
876    /// fit into displacement field of the instruction.
877    bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
878                                      bool hasSymbolicDisplacement);
879
880    /// Determines whether the callee is required to pop its
881    /// own arguments. Callee pop is necessary to support tail calls.
882    bool isCalleePop(CallingConv::ID CallingConv,
883                     bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
884
885    /// If Op is a constant whose elements are all the same constant or
886    /// undefined, return true and return the constant value in \p SplatVal.
887    /// If we have undef bits that don't cover an entire element, we treat these
888    /// as zero if AllowPartialUndefs is set, else we fail and return false.
889    bool isConstantSplat(SDValue Op, APInt &SplatVal,
890                         bool AllowPartialUndefs = true);
891  } // end namespace X86
892
893  //===--------------------------------------------------------------------===//
894  //  X86 Implementation of the TargetLowering interface
895  class X86TargetLowering final : public TargetLowering {
896  public:
897    explicit X86TargetLowering(const X86TargetMachine &TM,
898                               const X86Subtarget &STI);
899
900    unsigned getJumpTableEncoding() const override;
901    bool useSoftFloat() const override;
902
903    void markLibCallAttributes(MachineFunction *MF, unsigned CC,
904                               ArgListTy &Args) const override;
905
906    MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
907      return MVT::i8;
908    }
909
910    const MCExpr *
911    LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
912                              const MachineBasicBlock *MBB, unsigned uid,
913                              MCContext &Ctx) const override;
914
915    /// Returns relocation base for the given PIC jumptable.
916    SDValue getPICJumpTableRelocBase(SDValue Table,
917                                     SelectionDAG &DAG) const override;
918    const MCExpr *
919    getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
920                                 unsigned JTI, MCContext &Ctx) const override;
921
922    /// Return the desired alignment for ByVal aggregate
923    /// function arguments in the caller parameter area. For X86, aggregates
924    /// that contains are placed at 16-byte boundaries while the rest are at
925    /// 4-byte boundaries.
926    unsigned getByValTypeAlignment(Type *Ty,
927                                   const DataLayout &DL) const override;
928
929    EVT getOptimalMemOpType(const MemOp &Op,
930                            const AttributeList &FuncAttributes) const override;
931
932    /// Returns true if it's safe to use load / store of the
933    /// specified type to expand memcpy / memset inline. This is mostly true
934    /// for all types except for some special cases. For example, on X86
935    /// targets without SSE2 f64 load / store are done with fldl / fstpl which
936    /// also does type conversion. Note the specified type doesn't have to be
937    /// legal as the hook is used before type legalization.
938    bool isSafeMemOpType(MVT VT) const override;
939
940    /// Returns true if the target allows unaligned memory accesses of the
941    /// specified type. Returns whether it is "fast" in the last argument.
942    bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment,
943                                        MachineMemOperand::Flags Flags,
944                                        bool *Fast) const override;
945
946    /// Provide custom lowering hooks for some operations.
947    ///
948    SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
949
950    /// Replace the results of node with an illegal result
951    /// type with new values built out of custom code.
952    ///
953    void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
954                            SelectionDAG &DAG) const override;
955
956    SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
957
958    /// Return true if the target has native support for
959    /// the specified value type and it is 'desirable' to use the type for the
960    /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
961    /// instruction encodings are longer and some i16 instructions are slow.
962    bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
963
964    /// Return true if the target has native support for the
965    /// specified value type and it is 'desirable' to use the type. e.g. On x86
966    /// i16 is legal, but undesirable since i16 instruction encodings are longer
967    /// and some i16 instructions are slow.
968    bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
969
970    /// Return the newly negated expression if the cost is not expensive and
971    /// set the cost in \p Cost to indicate that if it is cheaper or neutral to
972    /// do the negation.
973    SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
974                                 bool LegalOperations, bool ForCodeSize,
975                                 NegatibleCost &Cost,
976                                 unsigned Depth) const override;
977
978    MachineBasicBlock *
979    EmitInstrWithCustomInserter(MachineInstr &MI,
980                                MachineBasicBlock *MBB) const override;
981
982    /// This method returns the name of a target specific DAG node.
983    const char *getTargetNodeName(unsigned Opcode) const override;
984
985    /// Do not merge vector stores after legalization because that may conflict
986    /// with x86-specific store splitting optimizations.
987    bool mergeStoresAfterLegalization(EVT MemVT) const override {
988      return !MemVT.isVector();
989    }
990
991    bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
992                          const SelectionDAG &DAG) const override;
993
994    bool isCheapToSpeculateCttz() const override;
995
996    bool isCheapToSpeculateCtlz() const override;
997
998    bool isCtlzFast() const override;
999
1000    bool hasBitPreservingFPLogic(EVT VT) const override {
1001      return VT == MVT::f32 || VT == MVT::f64 || VT.isVector();
1002    }
1003
1004    bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
1005      // If the pair to store is a mixture of float and int values, we will
1006      // save two bitwise instructions and one float-to-int instruction and
1007      // increase one store instruction. There is potentially a more
1008      // significant benefit because it avoids the float->int domain switch
1009      // for input value. So It is more likely a win.
1010      if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
1011          (LTy.isInteger() && HTy.isFloatingPoint()))
1012        return true;
1013      // If the pair only contains int values, we will save two bitwise
1014      // instructions and increase one store instruction (costing one more
1015      // store buffer). Since the benefit is more blurred so we leave
1016      // such pair out until we get testcase to prove it is a win.
1017      return false;
1018    }
1019
1020    bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
1021
1022    bool hasAndNotCompare(SDValue Y) const override;
1023
1024    bool hasAndNot(SDValue Y) const override;
1025
1026    bool hasBitTest(SDValue X, SDValue Y) const override;
1027
1028    bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
1029        SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
1030        unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1031        SelectionDAG &DAG) const override;
1032
1033    bool shouldFoldConstantShiftPairToMask(const SDNode *N,
1034                                           CombineLevel Level) const override;
1035
1036    bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
1037
1038    bool
1039    shouldTransformSignedTruncationCheck(EVT XVT,
1040                                         unsigned KeptBits) const override {
1041      // For vectors, we don't have a preference..
1042      if (XVT.isVector())
1043        return false;
1044
1045      auto VTIsOk = [](EVT VT) -> bool {
1046        return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
1047               VT == MVT::i64;
1048      };
1049
1050      // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
1051      // XVT will be larger than KeptBitsVT.
1052      MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
1053      return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
1054    }
1055
1056    bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;
1057
1058    bool shouldSplatInsEltVarIndex(EVT VT) const override;
1059
1060    bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
1061      return VT.isScalarInteger();
1062    }
1063
1064    /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
1065    MVT hasFastEqualityCompare(unsigned NumBits) const override;
1066
1067    /// Return the value type to use for ISD::SETCC.
1068    EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
1069                           EVT VT) const override;
1070
1071    bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
1072                                      const APInt &DemandedElts,
1073                                      TargetLoweringOpt &TLO) const override;
1074
1075    /// Determine which of the bits specified in Mask are known to be either
1076    /// zero or one and return them in the KnownZero/KnownOne bitsets.
1077    void computeKnownBitsForTargetNode(const SDValue Op,
1078                                       KnownBits &Known,
1079                                       const APInt &DemandedElts,
1080                                       const SelectionDAG &DAG,
1081                                       unsigned Depth = 0) const override;
1082
1083    /// Determine the number of bits in the operation that are sign bits.
1084    unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
1085                                             const APInt &DemandedElts,
1086                                             const SelectionDAG &DAG,
1087                                             unsigned Depth) const override;
1088
1089    bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
1090                                                 const APInt &DemandedElts,
1091                                                 APInt &KnownUndef,
1092                                                 APInt &KnownZero,
1093                                                 TargetLoweringOpt &TLO,
1094                                                 unsigned Depth) const override;
1095
1096    bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op,
1097                                                    const APInt &DemandedElts,
1098                                                    unsigned MaskIndex,
1099                                                    TargetLoweringOpt &TLO,
1100                                                    unsigned Depth) const;
1101
1102    bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1103                                           const APInt &DemandedBits,
1104                                           const APInt &DemandedElts,
1105                                           KnownBits &Known,
1106                                           TargetLoweringOpt &TLO,
1107                                           unsigned Depth) const override;
1108
1109    SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
1110        SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
1111        SelectionDAG &DAG, unsigned Depth) const override;
1112
1113    const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
1114
1115    SDValue unwrapAddress(SDValue N) const override;
1116
1117    SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
1118
1119    bool ExpandInlineAsm(CallInst *CI) const override;
1120
1121    ConstraintType getConstraintType(StringRef Constraint) const override;
1122
1123    /// Examine constraint string and operand type and determine a weight value.
1124    /// The operand object must already have been set up with the operand type.
1125    ConstraintWeight
1126      getSingleConstraintMatchWeight(AsmOperandInfo &info,
1127                                     const char *constraint) const override;
1128
1129    const char *LowerXConstraint(EVT ConstraintVT) const override;
1130
1131    /// Lower the specified operand into the Ops vector. If it is invalid, don't
1132    /// add anything to Ops. If hasMemory is true it means one of the asm
1133    /// constraint of the inline asm instruction being processed is 'm'.
1134    void LowerAsmOperandForConstraint(SDValue Op,
1135                                      std::string &Constraint,
1136                                      std::vector<SDValue> &Ops,
1137                                      SelectionDAG &DAG) const override;
1138
1139    unsigned
1140    getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1141      if (ConstraintCode == "v")
1142        return InlineAsm::Constraint_v;
1143      return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1144    }
1145
1146    /// Handle Lowering flag assembly outputs.
1147    SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
1148                                        const SDLoc &DL,
1149                                        const AsmOperandInfo &Constraint,
1150                                        SelectionDAG &DAG) const override;
1151
1152    /// Given a physical register constraint
1153    /// (e.g. {edx}), return the register number and the register class for the
1154    /// register.  This should only be used for C_Register constraints.  On
1155    /// error, this returns a register number of 0.
1156    std::pair<unsigned, const TargetRegisterClass *>
1157    getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1158                                 StringRef Constraint, MVT VT) const override;
1159
1160    /// Return true if the addressing mode represented
1161    /// by AM is legal for this target, for a load/store of the specified type.
1162    bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
1163                               Type *Ty, unsigned AS,
1164                               Instruction *I = nullptr) const override;
1165
1166    /// Return true if the specified immediate is legal
1167    /// icmp immediate, that is the target has icmp instructions which can
1168    /// compare a register against the immediate without having to materialize
1169    /// the immediate into a register.
1170    bool isLegalICmpImmediate(int64_t Imm) const override;
1171
1172    /// Return true if the specified immediate is legal
1173    /// add immediate, that is the target has add instructions which can
1174    /// add a register and the immediate without having to materialize
1175    /// the immediate into a register.
1176    bool isLegalAddImmediate(int64_t Imm) const override;
1177
1178    bool isLegalStoreImmediate(int64_t Imm) const override;
1179
1180    /// Return the cost of the scaling factor used in the addressing
1181    /// mode represented by AM for this target, for a load/store
1182    /// of the specified type.
1183    /// If the AM is supported, the return value must be >= 0.
1184    /// If the AM is not supported, it returns a negative value.
1185    InstructionCost getScalingFactorCost(const DataLayout &DL,
1186                                         const AddrMode &AM, Type *Ty,
1187                                         unsigned AS) const override;
1188
1189    /// This is used to enable splatted operand transforms for vector shifts
1190    /// and vector funnel shifts.
1191    bool isVectorShiftByScalarCheap(Type *Ty) const override;
1192
1193    /// Add x86-specific opcodes to the default list.
1194    bool isBinOp(unsigned Opcode) const override;
1195
1196    /// Returns true if the opcode is a commutative binary operation.
1197    bool isCommutativeBinOp(unsigned Opcode) const override;
1198
1199    /// Return true if it's free to truncate a value of
1200    /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
1201    /// register EAX to i16 by referencing its sub-register AX.
1202    bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
1203    bool isTruncateFree(EVT VT1, EVT VT2) const override;
1204
1205    bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
1206
1207    /// Return true if any actual instruction that defines a
1208    /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
1209    /// register. This does not necessarily include registers defined in
1210    /// unknown ways, such as incoming arguments, or copies from unknown
1211    /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
1212    /// does not necessarily apply to truncate instructions. e.g. on x86-64,
1213    /// all instructions that define 32-bit values implicit zero-extend the
1214    /// result out to 64 bits.
1215    bool isZExtFree(Type *Ty1, Type *Ty2) const override;
1216    bool isZExtFree(EVT VT1, EVT VT2) const override;
1217    bool isZExtFree(SDValue Val, EVT VT2) const override;
1218
1219    bool shouldSinkOperands(Instruction *I,
1220                            SmallVectorImpl<Use *> &Ops) const override;
1221    bool shouldConvertPhiType(Type *From, Type *To) const override;
1222
1223    /// Return true if folding a vector load into ExtVal (a sign, zero, or any
1224    /// extend node) is profitable.
1225    bool isVectorLoadExtDesirable(SDValue) const override;
1226
1227    /// Return true if an FMA operation is faster than a pair of fmul and fadd
1228    /// instructions. fmuladd intrinsics will be expanded to FMAs when this
1229    /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
1230    bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
1231                                    EVT VT) const override;
1232
1233    /// Return true if it's profitable to narrow
1234    /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
1235    /// from i32 to i8 but not from i32 to i16.
1236    bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
1237
1238    /// Given an intrinsic, checks if on the target the intrinsic will need to map
1239    /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1240    /// true and stores the intrinsic information into the IntrinsicInfo that was
1241    /// passed to the function.
1242    bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
1243                            MachineFunction &MF,
1244                            unsigned Intrinsic) const override;
1245
1246    /// Returns true if the target can instruction select the
1247    /// specified FP immediate natively. If false, the legalizer will
1248    /// materialize the FP immediate as a load from a constant pool.
1249    bool isFPImmLegal(const APFloat &Imm, EVT VT,
1250                      bool ForCodeSize) const override;
1251
1252    /// Targets can use this to indicate that they only support *some*
1253    /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1254    /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1255    /// be legal.
1256    bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1257
1258    /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1259    /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1260    /// constant pool entry.
1261    bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1262
1263    /// Returns true if lowering to a jump table is allowed.
1264    bool areJTsAllowed(const Function *Fn) const override;
1265
1266    /// If true, then instruction selection should
1267    /// seek to shrink the FP constant of the specified type to a smaller type
1268    /// in order to save space and / or reduce runtime.
1269    bool ShouldShrinkFPConstant(EVT VT) const override {
1270      // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
1271      // expensive than a straight movsd. On the other hand, it's important to
1272      // shrink long double fp constant since fldt is very slow.
1273      return !X86ScalarSSEf64 || VT == MVT::f80;
1274    }
1275
1276    /// Return true if we believe it is correct and profitable to reduce the
1277    /// load node to a smaller type.
1278    bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1279                               EVT NewVT) const override;
1280
1281    /// Return true if the specified scalar FP type is computed in an SSE
1282    /// register, not on the X87 floating point stack.
1283    bool isScalarFPTypeInSSEReg(EVT VT) const {
1284      return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
1285             (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
1286    }
1287
1288    /// Returns true if it is beneficial to convert a load of a constant
1289    /// to just the constant itself.
1290    bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1291                                           Type *Ty) const override;
1292
1293    bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override;
1294
1295    bool convertSelectOfConstantsToMath(EVT VT) const override;
1296
1297    bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
1298                                SDValue C) const override;
1299
1300    /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1301    /// with this index.
1302    bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1303                                 unsigned Index) const override;
1304
1305    /// Scalar ops always have equal or better analysis/performance/power than
1306    /// the vector equivalent, so this always makes sense if the scalar op is
1307    /// supported.
1308    bool shouldScalarizeBinop(SDValue) const override;
1309
1310    /// Extract of a scalar FP value from index 0 of a vector is free.
1311    bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
1312      EVT EltVT = VT.getScalarType();
1313      return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
1314    }
1315
1316    /// Overflow nodes should get combined/lowered to optimal instructions
1317    /// (they should allow eliminating explicit compares by getting flags from
1318    /// math ops).
1319    bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
1320                              bool MathUsed) const override;
1321
1322    bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
1323                                      unsigned AddrSpace) const override {
1324      // If we can replace more than 2 scalar stores, there will be a reduction
1325      // in instructions even after we add a vector constant load.
1326      return NumElem > 2;
1327    }
1328
1329    bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
1330                                 const SelectionDAG &DAG,
1331                                 const MachineMemOperand &MMO) const override;
1332
1333    /// Intel processors have a unified instruction and data cache
1334    const char * getClearCacheBuiltinName() const override {
1335      return nullptr; // nothing to do, move along.
1336    }
1337
1338    Register getRegisterByName(const char* RegName, LLT VT,
1339                               const MachineFunction &MF) const override;
1340
1341    /// If a physical register, this returns the register that receives the
1342    /// exception address on entry to an EH pad.
1343    Register
1344    getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1345
1346    /// If a physical register, this returns the register that receives the
1347    /// exception typeid on entry to a landing pad.
1348    Register
1349    getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1350
1351    virtual bool needsFixedCatchObjects() const override;
1352
1353    /// This method returns a target specific FastISel object,
1354    /// or null if the target does not support "fast" ISel.
1355    FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1356                             const TargetLibraryInfo *libInfo) const override;
1357
1358    /// If the target has a standard location for the stack protector cookie,
1359    /// returns the address of that location. Otherwise, returns nullptr.
1360    Value *getIRStackGuard(IRBuilder<> &IRB) const override;
1361
1362    bool useLoadStackGuardNode() const override;
1363    bool useStackGuardXorFP() const override;
1364    void insertSSPDeclarations(Module &M) const override;
1365    Value *getSDagStackGuard(const Module &M) const override;
1366    Function *getSSPStackGuardCheck(const Module &M) const override;
1367    SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1368                                const SDLoc &DL) const override;
1369
1370
1371    /// Return true if the target stores SafeStack pointer at a fixed offset in
1372    /// some non-standard address space, and populates the address space and
1373    /// offset as appropriate.
1374    Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
1375
1376    std::pair<SDValue, SDValue> BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL,
1377                                          SDValue Chain, SDValue Pointer,
1378                                          MachinePointerInfo PtrInfo,
1379                                          Align Alignment,
1380                                          SelectionDAG &DAG) const;
1381
1382    /// Customize the preferred legalization strategy for certain types.
1383    LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1384
1385    bool softPromoteHalfType() const override { return true; }
1386
1387    MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1388                                      EVT VT) const override;
1389
1390    unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1391                                           CallingConv::ID CC,
1392                                           EVT VT) const override;
1393
1394    unsigned getVectorTypeBreakdownForCallingConv(
1395        LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
1396        unsigned &NumIntermediates, MVT &RegisterVT) const override;
1397
1398    bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1399
1400    bool supportSwiftError() const override;
1401
1402    bool hasStackProbeSymbol(MachineFunction &MF) const override;
1403    bool hasInlineStackProbe(MachineFunction &MF) const override;
1404    StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
1405
1406    unsigned getStackProbeSize(MachineFunction &MF) const;
1407
1408    bool hasVectorBlend() const override { return true; }
1409
1410    unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1411
1412    /// Lower interleaved load(s) into target specific
1413    /// instructions/intrinsics.
1414    bool lowerInterleavedLoad(LoadInst *LI,
1415                              ArrayRef<ShuffleVectorInst *> Shuffles,
1416                              ArrayRef<unsigned> Indices,
1417                              unsigned Factor) const override;
1418
1419    /// Lower interleaved store(s) into target specific
1420    /// instructions/intrinsics.
1421    bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1422                               unsigned Factor) const override;
1423
1424    SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
1425                                   SDValue Addr, SelectionDAG &DAG)
1426                                   const override;
1427
1428    Align getPrefLoopAlignment(MachineLoop *ML) const override;
1429
1430  protected:
1431    std::pair<const TargetRegisterClass *, uint8_t>
1432    findRepresentativeClass(const TargetRegisterInfo *TRI,
1433                            MVT VT) const override;
1434
1435  private:
1436    /// Keep a reference to the X86Subtarget around so that we can
1437    /// make the right decision when generating code for different targets.
1438    const X86Subtarget &Subtarget;
1439
1440    /// Select between SSE or x87 floating point ops.
1441    /// When SSE is available, use it for f32 operations.
1442    /// When SSE2 is available, use it for f64 operations.
1443    bool X86ScalarSSEf32;
1444    bool X86ScalarSSEf64;
1445
1446    /// A list of legal FP immediates.
1447    std::vector<APFloat> LegalFPImmediates;
1448
1449    /// Indicate that this x86 target can instruction
1450    /// select the specified FP immediate natively.
1451    void addLegalFPImmediate(const APFloat& Imm) {
1452      LegalFPImmediates.push_back(Imm);
1453    }
1454
1455    SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
1456                            CallingConv::ID CallConv, bool isVarArg,
1457                            const SmallVectorImpl<ISD::InputArg> &Ins,
1458                            const SDLoc &dl, SelectionDAG &DAG,
1459                            SmallVectorImpl<SDValue> &InVals,
1460                            uint32_t *RegMask) const;
1461    SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1462                             const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1463                             const SDLoc &dl, SelectionDAG &DAG,
1464                             const CCValAssign &VA, MachineFrameInfo &MFI,
1465                             unsigned i) const;
1466    SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1467                             const SDLoc &dl, SelectionDAG &DAG,
1468                             const CCValAssign &VA,
1469                             ISD::ArgFlagsTy Flags, bool isByval) const;
1470
1471    // Call lowering helpers.
1472
1473    /// Check whether the call is eligible for tail call optimization. Targets
1474    /// that want to do tail call optimization should implement this function.
1475    bool IsEligibleForTailCallOptimization(SDValue Callee,
1476                                           CallingConv::ID CalleeCC,
1477                                           bool isVarArg,
1478                                           bool isCalleeStructRet,
1479                                           bool isCallerStructRet,
1480                                           Type *RetTy,
1481                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
1482                                    const SmallVectorImpl<SDValue> &OutVals,
1483                                    const SmallVectorImpl<ISD::InputArg> &Ins,
1484                                           SelectionDAG& DAG) const;
1485    SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1486                                    SDValue Chain, bool IsTailCall,
1487                                    bool Is64Bit, int FPDiff,
1488                                    const SDLoc &dl) const;
1489
1490    unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1491                                         SelectionDAG &DAG) const;
1492
1493    unsigned getAddressSpace(void) const;
1494
1495    SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned,
1496                            SDValue &Chain) const;
1497    SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const;
1498
1499    SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1500    SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1501    SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1502    SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1503
1504    unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr,
1505                                  const unsigned char OpFlags = 0) const;
1506    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1507    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1508    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1509    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1510    SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1511
1512    /// Creates target global address or external symbol nodes for calls or
1513    /// other uses.
1514    SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1515                                  bool ForCall) const;
1516
1517    SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1518    SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1519    SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1520    SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1521    SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1522    SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const;
1523    SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1524    SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1525    SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1526    SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1527    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1528    SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1529    SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1530    SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1531    SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1532    SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1533    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1534    SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1535    SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1536    SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1537    SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1538    SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1539    SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1540    SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
1541    SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1542    SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1543    SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const;
1544    SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1545    SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const;
1546    SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1547    SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1548
1549    SDValue
1550    LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1551                         const SmallVectorImpl<ISD::InputArg> &Ins,
1552                         const SDLoc &dl, SelectionDAG &DAG,
1553                         SmallVectorImpl<SDValue> &InVals) const override;
1554    SDValue LowerCall(CallLoweringInfo &CLI,
1555                      SmallVectorImpl<SDValue> &InVals) const override;
1556
1557    SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1558                        const SmallVectorImpl<ISD::OutputArg> &Outs,
1559                        const SmallVectorImpl<SDValue> &OutVals,
1560                        const SDLoc &dl, SelectionDAG &DAG) const override;
1561
1562    bool supportSplitCSR(MachineFunction *MF) const override {
1563      return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1564          MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1565    }
1566    void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1567    void insertCopiesSplitCSR(
1568      MachineBasicBlock *Entry,
1569      const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1570
1571    bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1572
1573    bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1574
1575    EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1576                            ISD::NodeType ExtendKind) const override;
1577
1578    bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1579                        bool isVarArg,
1580                        const SmallVectorImpl<ISD::OutputArg> &Outs,
1581                        LLVMContext &Context) const override;
1582
1583    const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1584
1585    TargetLoweringBase::AtomicExpansionKind
1586    shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
1587    bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1588    TargetLoweringBase::AtomicExpansionKind
1589    shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1590
1591    LoadInst *
1592    lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1593
1594    bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const override;
1595    bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const override;
1596
1597    bool needsCmpXchgNb(Type *MemType) const;
1598
1599    void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1600                                MachineBasicBlock *DispatchBB, int FI) const;
1601
1602    // Utility function to emit the low-level va_arg code for X86-64.
1603    MachineBasicBlock *
1604    EmitVAARGWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const;
1605
1606    /// Utility function to emit the xmm reg save portion of va_start.
1607    MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1608                                                 MachineInstr &MI2,
1609                                                 MachineBasicBlock *BB) const;
1610
1611    MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1612                                         MachineBasicBlock *BB) const;
1613
1614    MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1615                                           MachineBasicBlock *BB) const;
1616
1617    MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1618                                            MachineBasicBlock *BB) const;
1619
1620    MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI,
1621                                               MachineBasicBlock *BB) const;
1622
1623    MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1624                                          MachineBasicBlock *BB) const;
1625
1626    MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1627                                          MachineBasicBlock *BB) const;
1628
1629    MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI,
1630                                                MachineBasicBlock *BB) const;
1631
1632    MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1633                                        MachineBasicBlock *MBB) const;
1634
1635    void emitSetJmpShadowStackFix(MachineInstr &MI,
1636                                  MachineBasicBlock *MBB) const;
1637
1638    MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1639                                         MachineBasicBlock *MBB) const;
1640
1641    MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1642                                                 MachineBasicBlock *MBB) const;
1643
1644    MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1645                                             MachineBasicBlock *MBB) const;
1646
1647    /// Emit flags for the given setcc condition and operands. Also returns the
1648    /// corresponding X86 condition code constant in X86CC.
1649    SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC,
1650                              const SDLoc &dl, SelectionDAG &DAG,
1651                              SDValue &X86CC) const;
1652
1653    /// Check if replacement of SQRT with RSQRT should be disabled.
1654    bool isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const override;
1655
1656    /// Use rsqrt* to speed up sqrt calculations.
1657    SDValue getSqrtEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1658                            int &RefinementSteps, bool &UseOneConstNR,
1659                            bool Reciprocal) const override;
1660
1661    /// Use rcp* to speed up fdiv calculations.
1662    SDValue getRecipEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1663                             int &RefinementSteps) const override;
1664
1665    /// Reassociate floating point divisions into multiply by reciprocal.
1666    unsigned combineRepeatedFPDivisors() const override;
1667
1668    SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1669                          SmallVectorImpl<SDNode *> &Created) const override;
1670  };
1671
1672  namespace X86 {
1673    FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1674                             const TargetLibraryInfo *libInfo);
1675  } // end namespace X86
1676
1677  // X86 specific Gather/Scatter nodes.
1678  // The class has the same order of operands as MaskedGatherScatterSDNode for
1679  // convenience.
1680  class X86MaskedGatherScatterSDNode : public MemIntrinsicSDNode {
1681  public:
1682    // This is a intended as a utility and should never be directly created.
1683    X86MaskedGatherScatterSDNode() = delete;
1684    ~X86MaskedGatherScatterSDNode() = delete;
1685
1686    const SDValue &getBasePtr() const { return getOperand(3); }
1687    const SDValue &getIndex()   const { return getOperand(4); }
1688    const SDValue &getMask()    const { return getOperand(2); }
1689    const SDValue &getScale()   const { return getOperand(5); }
1690
1691    static bool classof(const SDNode *N) {
1692      return N->getOpcode() == X86ISD::MGATHER ||
1693             N->getOpcode() == X86ISD::MSCATTER;
1694    }
1695  };
1696
1697  class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1698  public:
1699    const SDValue &getPassThru() const { return getOperand(1); }
1700
1701    static bool classof(const SDNode *N) {
1702      return N->getOpcode() == X86ISD::MGATHER;
1703    }
1704  };
1705
1706  class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1707  public:
1708    const SDValue &getValue() const { return getOperand(1); }
1709
1710    static bool classof(const SDNode *N) {
1711      return N->getOpcode() == X86ISD::MSCATTER;
1712    }
1713  };
1714
1715  /// Generate unpacklo/unpackhi shuffle mask.
1716  void createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask, bool Lo,
1717                               bool Unary);
1718
1719  /// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation
1720  /// imposed by AVX and specific to the unary pattern. Example:
1721  /// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3>
1722  /// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7>
1723  void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo);
1724
1725} // end namespace llvm
1726
1727#endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
1728