1//===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that X86 uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
15#define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
16
17#include "llvm/CodeGen/MachineFunction.h"
18#include "llvm/CodeGen/TargetLowering.h"
19
20namespace llvm {
21  class X86Subtarget;
22  class X86TargetMachine;
23
24  namespace X86ISD {
25    // X86 Specific DAG Nodes
26  enum NodeType : unsigned {
27    // Start the numbering where the builtin ops leave off.
28    FIRST_NUMBER = ISD::BUILTIN_OP_END,
29
30    /// Bit scan forward.
31    BSF,
32    /// Bit scan reverse.
33    BSR,
34
35    /// X86 funnel/double shift i16 instructions. These correspond to
36    /// X86::SHLDW and X86::SHRDW instructions which have different amt
37    /// modulo rules to generic funnel shifts.
38    /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD.
39    FSHL,
40    FSHR,
41
42    /// Bitwise logical AND of floating point values. This corresponds
43    /// to X86::ANDPS or X86::ANDPD.
44    FAND,
45
46    /// Bitwise logical OR of floating point values. This corresponds
47    /// to X86::ORPS or X86::ORPD.
48    FOR,
49
50    /// Bitwise logical XOR of floating point values. This corresponds
51    /// to X86::XORPS or X86::XORPD.
52    FXOR,
53
54    ///  Bitwise logical ANDNOT of floating point values. This
55    /// corresponds to X86::ANDNPS or X86::ANDNPD.
56    FANDN,
57
58    /// These operations represent an abstract X86 call
59    /// instruction, which includes a bunch of information.  In particular the
60    /// operands of these node are:
61    ///
62    ///     #0 - The incoming token chain
63    ///     #1 - The callee
64    ///     #2 - The number of arg bytes the caller pushes on the stack.
65    ///     #3 - The number of arg bytes the callee pops off the stack.
66    ///     #4 - The value to pass in AL/AX/EAX (optional)
67    ///     #5 - The value to pass in DL/DX/EDX (optional)
68    ///
69    /// The result values of these nodes are:
70    ///
71    ///     #0 - The outgoing token chain
72    ///     #1 - The first register result value (optional)
73    ///     #2 - The second register result value (optional)
74    ///
75    CALL,
76
77    /// Same as call except it adds the NoTrack prefix.
78    NT_CALL,
79
80    // Pseudo for a OBJC call that gets emitted together with a special
81    // marker instruction.
82    CALL_RVMARKER,
83
84    /// X86 compare and logical compare instructions.
85    CMP,
86    FCMP,
87    COMI,
88    UCOMI,
89
90    /// X86 bit-test instructions.
91    BT,
92
93    /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
94    /// operand, usually produced by a CMP instruction.
95    SETCC,
96
97    /// X86 Select
98    SELECTS,
99
100    // Same as SETCC except it's materialized with a sbb and the value is all
101    // one's or all zero's.
102    SETCC_CARRY, // R = carry_bit ? ~0 : 0
103
104    /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
105    /// Operands are two FP values to compare; result is a mask of
106    /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
107    FSETCC,
108
109    /// X86 FP SETCC, similar to above, but with output as an i1 mask and
110    /// and a version with SAE.
111    FSETCCM,
112    FSETCCM_SAE,
113
114    /// X86 conditional moves. Operand 0 and operand 1 are the two values
115    /// to select from. Operand 2 is the condition code, and operand 3 is the
116    /// flag operand produced by a CMP or TEST instruction.
117    CMOV,
118
119    /// X86 conditional branches. Operand 0 is the chain operand, operand 1
120    /// is the block to branch if condition is true, operand 2 is the
121    /// condition code, and operand 3 is the flag operand produced by a CMP
122    /// or TEST instruction.
123    BRCOND,
124
125    /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
126    /// operand 1 is the target address.
127    NT_BRIND,
128
129    /// Return with a glue operand. Operand 0 is the chain operand, operand
130    /// 1 is the number of bytes of stack to pop.
131    RET_GLUE,
132
133    /// Return from interrupt. Operand 0 is the number of bytes to pop.
134    IRET,
135
136    /// Repeat fill, corresponds to X86::REP_STOSx.
137    REP_STOS,
138
139    /// Repeat move, corresponds to X86::REP_MOVSx.
140    REP_MOVS,
141
142    /// On Darwin, this node represents the result of the popl
143    /// at function entry, used for PIC code.
144    GlobalBaseReg,
145
146    /// A wrapper node for TargetConstantPool, TargetJumpTable,
147    /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
148    /// MCSymbol and TargetBlockAddress.
149    Wrapper,
150
151    /// Special wrapper used under X86-64 PIC mode for RIP
152    /// relative displacements.
153    WrapperRIP,
154
155    /// Copies a 64-bit value from an MMX vector to the low word
156    /// of an XMM vector, with the high word zero filled.
157    MOVQ2DQ,
158
159    /// Copies a 64-bit value from the low word of an XMM vector
160    /// to an MMX vector.
161    MOVDQ2Q,
162
163    /// Copies a 32-bit value from the low word of a MMX
164    /// vector to a GPR.
165    MMX_MOVD2W,
166
167    /// Copies a GPR into the low 32-bit word of a MMX vector
168    /// and zero out the high word.
169    MMX_MOVW2D,
170
171    /// Extract an 8-bit value from a vector and zero extend it to
172    /// i32, corresponds to X86::PEXTRB.
173    PEXTRB,
174
175    /// Extract a 16-bit value from a vector and zero extend it to
176    /// i32, corresponds to X86::PEXTRW.
177    PEXTRW,
178
179    /// Insert any element of a 4 x float vector into any element
180    /// of a destination 4 x floatvector.
181    INSERTPS,
182
183    /// Insert the lower 8-bits of a 32-bit value to a vector,
184    /// corresponds to X86::PINSRB.
185    PINSRB,
186
187    /// Insert the lower 16-bits of a 32-bit value to a vector,
188    /// corresponds to X86::PINSRW.
189    PINSRW,
190
191    /// Shuffle 16 8-bit values within a vector.
192    PSHUFB,
193
194    /// Compute Sum of Absolute Differences.
195    PSADBW,
196    /// Compute Double Block Packed Sum-Absolute-Differences
197    DBPSADBW,
198
199    /// Bitwise Logical AND NOT of Packed FP values.
200    ANDNP,
201
202    /// Blend where the selector is an immediate.
203    BLENDI,
204
205    /// Dynamic (non-constant condition) vector blend where only the sign bits
206    /// of the condition elements are used. This is used to enforce that the
207    /// condition mask is not valid for generic VSELECT optimizations. This
208    /// is also used to implement the intrinsics.
209    /// Operands are in VSELECT order: MASK, TRUE, FALSE
210    BLENDV,
211
212    /// Combined add and sub on an FP vector.
213    ADDSUB,
214
215    //  FP vector ops with rounding mode.
216    FADD_RND,
217    FADDS,
218    FADDS_RND,
219    FSUB_RND,
220    FSUBS,
221    FSUBS_RND,
222    FMUL_RND,
223    FMULS,
224    FMULS_RND,
225    FDIV_RND,
226    FDIVS,
227    FDIVS_RND,
228    FMAX_SAE,
229    FMAXS_SAE,
230    FMIN_SAE,
231    FMINS_SAE,
232    FSQRT_RND,
233    FSQRTS,
234    FSQRTS_RND,
235
236    // FP vector get exponent.
237    FGETEXP,
238    FGETEXP_SAE,
239    FGETEXPS,
240    FGETEXPS_SAE,
241    // Extract Normalized Mantissas.
242    VGETMANT,
243    VGETMANT_SAE,
244    VGETMANTS,
245    VGETMANTS_SAE,
246    // FP Scale.
247    SCALEF,
248    SCALEF_RND,
249    SCALEFS,
250    SCALEFS_RND,
251
252    /// Integer horizontal add/sub.
253    HADD,
254    HSUB,
255
256    /// Floating point horizontal add/sub.
257    FHADD,
258    FHSUB,
259
260    // Detect Conflicts Within a Vector
261    CONFLICT,
262
263    /// Floating point max and min.
264    FMAX,
265    FMIN,
266
267    /// Commutative FMIN and FMAX.
268    FMAXC,
269    FMINC,
270
271    /// Scalar intrinsic floating point max and min.
272    FMAXS,
273    FMINS,
274
275    /// Floating point reciprocal-sqrt and reciprocal approximation.
276    /// Note that these typically require refinement
277    /// in order to obtain suitable precision.
278    FRSQRT,
279    FRCP,
280
281    // AVX-512 reciprocal approximations with a little more precision.
282    RSQRT14,
283    RSQRT14S,
284    RCP14,
285    RCP14S,
286
287    // Thread Local Storage.
288    TLSADDR,
289
290    // Thread Local Storage. A call to get the start address
291    // of the TLS block for the current module.
292    TLSBASEADDR,
293
294    // Thread Local Storage.  When calling to an OS provided
295    // thunk at the address from an earlier relocation.
296    TLSCALL,
297
298    // Exception Handling helpers.
299    EH_RETURN,
300
301    // SjLj exception handling setjmp.
302    EH_SJLJ_SETJMP,
303
304    // SjLj exception handling longjmp.
305    EH_SJLJ_LONGJMP,
306
307    // SjLj exception handling dispatch.
308    EH_SJLJ_SETUP_DISPATCH,
309
310    /// Tail call return. See X86TargetLowering::LowerCall for
311    /// the list of operands.
312    TC_RETURN,
313
314    // Vector move to low scalar and zero higher vector elements.
315    VZEXT_MOVL,
316
317    // Vector integer truncate.
318    VTRUNC,
319    // Vector integer truncate with unsigned/signed saturation.
320    VTRUNCUS,
321    VTRUNCS,
322
323    // Masked version of the above. Used when less than a 128-bit result is
324    // produced since the mask only applies to the lower elements and can't
325    // be represented by a select.
326    // SRC, PASSTHRU, MASK
327    VMTRUNC,
328    VMTRUNCUS,
329    VMTRUNCS,
330
331    // Vector FP extend.
332    VFPEXT,
333    VFPEXT_SAE,
334    VFPEXTS,
335    VFPEXTS_SAE,
336
337    // Vector FP round.
338    VFPROUND,
339    VFPROUND_RND,
340    VFPROUNDS,
341    VFPROUNDS_RND,
342
343    // Masked version of above. Used for v2f64->v4f32.
344    // SRC, PASSTHRU, MASK
345    VMFPROUND,
346
347    // 128-bit vector logical left / right shift
348    VSHLDQ,
349    VSRLDQ,
350
351    // Vector shift elements
352    VSHL,
353    VSRL,
354    VSRA,
355
356    // Vector variable shift
357    VSHLV,
358    VSRLV,
359    VSRAV,
360
361    // Vector shift elements by immediate
362    VSHLI,
363    VSRLI,
364    VSRAI,
365
366    // Shifts of mask registers.
367    KSHIFTL,
368    KSHIFTR,
369
370    // Bit rotate by immediate
371    VROTLI,
372    VROTRI,
373
374    // Vector packed double/float comparison.
375    CMPP,
376
377    // Vector integer comparisons.
378    PCMPEQ,
379    PCMPGT,
380
381    // v8i16 Horizontal minimum and position.
382    PHMINPOS,
383
384    MULTISHIFT,
385
386    /// Vector comparison generating mask bits for fp and
387    /// integer signed and unsigned data types.
388    CMPM,
389    // Vector mask comparison generating mask bits for FP values.
390    CMPMM,
391    // Vector mask comparison with SAE for FP values.
392    CMPMM_SAE,
393
394    // Arithmetic operations with FLAGS results.
395    ADD,
396    SUB,
397    ADC,
398    SBB,
399    SMUL,
400    UMUL,
401    OR,
402    XOR,
403    AND,
404
405    // Bit field extract.
406    BEXTR,
407    BEXTRI,
408
409    // Zero High Bits Starting with Specified Bit Position.
410    BZHI,
411
412    // Parallel extract and deposit.
413    PDEP,
414    PEXT,
415
416    // X86-specific multiply by immediate.
417    MUL_IMM,
418
419    // Vector sign bit extraction.
420    MOVMSK,
421
422    // Vector bitwise comparisons.
423    PTEST,
424
425    // Vector packed fp sign bitwise comparisons.
426    TESTP,
427
428    // OR/AND test for masks.
429    KORTEST,
430    KTEST,
431
432    // ADD for masks.
433    KADD,
434
435    // Several flavors of instructions with vector shuffle behaviors.
436    // Saturated signed/unnsigned packing.
437    PACKSS,
438    PACKUS,
439    // Intra-lane alignr.
440    PALIGNR,
441    // AVX512 inter-lane alignr.
442    VALIGN,
443    PSHUFD,
444    PSHUFHW,
445    PSHUFLW,
446    SHUFP,
447    // VBMI2 Concat & Shift.
448    VSHLD,
449    VSHRD,
450    VSHLDV,
451    VSHRDV,
452    // Shuffle Packed Values at 128-bit granularity.
453    SHUF128,
454    MOVDDUP,
455    MOVSHDUP,
456    MOVSLDUP,
457    MOVLHPS,
458    MOVHLPS,
459    MOVSD,
460    MOVSS,
461    MOVSH,
462    UNPCKL,
463    UNPCKH,
464    VPERMILPV,
465    VPERMILPI,
466    VPERMI,
467    VPERM2X128,
468
469    // Variable Permute (VPERM).
470    // Res = VPERMV MaskV, V0
471    VPERMV,
472
473    // 3-op Variable Permute (VPERMT2).
474    // Res = VPERMV3 V0, MaskV, V1
475    VPERMV3,
476
477    // Bitwise ternary logic.
478    VPTERNLOG,
479    // Fix Up Special Packed Float32/64 values.
480    VFIXUPIMM,
481    VFIXUPIMM_SAE,
482    VFIXUPIMMS,
483    VFIXUPIMMS_SAE,
484    // Range Restriction Calculation For Packed Pairs of Float32/64 values.
485    VRANGE,
486    VRANGE_SAE,
487    VRANGES,
488    VRANGES_SAE,
489    // Reduce - Perform Reduction Transformation on scalar\packed FP.
490    VREDUCE,
491    VREDUCE_SAE,
492    VREDUCES,
493    VREDUCES_SAE,
494    // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
495    // Also used by the legacy (V)ROUND intrinsics where we mask out the
496    // scaling part of the immediate.
497    VRNDSCALE,
498    VRNDSCALE_SAE,
499    VRNDSCALES,
500    VRNDSCALES_SAE,
501    // Tests Types Of a FP Values for packed types.
502    VFPCLASS,
503    // Tests Types Of a FP Values for scalar types.
504    VFPCLASSS,
505
506    // Broadcast (splat) scalar or element 0 of a vector. If the operand is
507    // a vector, this node may change the vector length as part of the splat.
508    VBROADCAST,
509    // Broadcast mask to vector.
510    VBROADCASTM,
511
512    /// SSE4A Extraction and Insertion.
513    EXTRQI,
514    INSERTQI,
515
516    // XOP arithmetic/logical shifts.
517    VPSHA,
518    VPSHL,
519    // XOP signed/unsigned integer comparisons.
520    VPCOM,
521    VPCOMU,
522    // XOP packed permute bytes.
523    VPPERM,
524    // XOP two source permutation.
525    VPERMIL2,
526
527    // Vector multiply packed unsigned doubleword integers.
528    PMULUDQ,
529    // Vector multiply packed signed doubleword integers.
530    PMULDQ,
531    // Vector Multiply Packed UnsignedIntegers with Round and Scale.
532    MULHRS,
533
534    // Multiply and Add Packed Integers.
535    VPMADDUBSW,
536    VPMADDWD,
537
538    // AVX512IFMA multiply and add.
539    // NOTE: These are different than the instruction and perform
540    // op0 x op1 + op2.
541    VPMADD52L,
542    VPMADD52H,
543
544    // VNNI
545    VPDPBUSD,
546    VPDPBUSDS,
547    VPDPWSSD,
548    VPDPWSSDS,
549
550    // FMA nodes.
551    // We use the target independent ISD::FMA for the non-inverted case.
552    FNMADD,
553    FMSUB,
554    FNMSUB,
555    FMADDSUB,
556    FMSUBADD,
557
558    // FMA with rounding mode.
559    FMADD_RND,
560    FNMADD_RND,
561    FMSUB_RND,
562    FNMSUB_RND,
563    FMADDSUB_RND,
564    FMSUBADD_RND,
565
566    // AVX512-FP16 complex addition and multiplication.
567    VFMADDC,
568    VFMADDC_RND,
569    VFCMADDC,
570    VFCMADDC_RND,
571
572    VFMULC,
573    VFMULC_RND,
574    VFCMULC,
575    VFCMULC_RND,
576
577    VFMADDCSH,
578    VFMADDCSH_RND,
579    VFCMADDCSH,
580    VFCMADDCSH_RND,
581
582    VFMULCSH,
583    VFMULCSH_RND,
584    VFCMULCSH,
585    VFCMULCSH_RND,
586
587    VPDPBSUD,
588    VPDPBSUDS,
589    VPDPBUUD,
590    VPDPBUUDS,
591    VPDPBSSD,
592    VPDPBSSDS,
593
594    // Compress and expand.
595    COMPRESS,
596    EXPAND,
597
598    // Bits shuffle
599    VPSHUFBITQMB,
600
601    // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
602    SINT_TO_FP_RND,
603    UINT_TO_FP_RND,
604    SCALAR_SINT_TO_FP,
605    SCALAR_UINT_TO_FP,
606    SCALAR_SINT_TO_FP_RND,
607    SCALAR_UINT_TO_FP_RND,
608
609    // Vector float/double to signed/unsigned integer.
610    CVTP2SI,
611    CVTP2UI,
612    CVTP2SI_RND,
613    CVTP2UI_RND,
614    // Scalar float/double to signed/unsigned integer.
615    CVTS2SI,
616    CVTS2UI,
617    CVTS2SI_RND,
618    CVTS2UI_RND,
619
620    // Vector float/double to signed/unsigned integer with truncation.
621    CVTTP2SI,
622    CVTTP2UI,
623    CVTTP2SI_SAE,
624    CVTTP2UI_SAE,
625    // Scalar float/double to signed/unsigned integer with truncation.
626    CVTTS2SI,
627    CVTTS2UI,
628    CVTTS2SI_SAE,
629    CVTTS2UI_SAE,
630
631    // Vector signed/unsigned integer to float/double.
632    CVTSI2P,
633    CVTUI2P,
634
635    // Masked versions of above. Used for v2f64->v4f32.
636    // SRC, PASSTHRU, MASK
637    MCVTP2SI,
638    MCVTP2UI,
639    MCVTTP2SI,
640    MCVTTP2UI,
641    MCVTSI2P,
642    MCVTUI2P,
643
644    // Vector float to bfloat16.
645    // Convert TWO packed single data to one packed BF16 data
646    CVTNE2PS2BF16,
647    // Convert packed single data to packed BF16 data
648    CVTNEPS2BF16,
649    // Masked version of above.
650    // SRC, PASSTHRU, MASK
651    MCVTNEPS2BF16,
652
653    // Dot product of BF16 pairs to accumulated into
654    // packed single precision.
655    DPBF16PS,
656
657    // A stack checking function call. On Windows it's _chkstk call.
658    DYN_ALLOCA,
659
660    // For allocating variable amounts of stack space when using
661    // segmented stacks. Check if the current stacklet has enough space, and
662    // falls back to heap allocation if not.
663    SEG_ALLOCA,
664
665    // For allocating stack space when using stack clash protector.
666    // Allocation is performed by block, and each block is probed.
667    PROBED_ALLOCA,
668
669    // Memory barriers.
670    MFENCE,
671
672    // Get a random integer and indicate whether it is valid in CF.
673    RDRAND,
674
675    // Get a NIST SP800-90B & C compliant random integer and
676    // indicate whether it is valid in CF.
677    RDSEED,
678
679    // Protection keys
680    // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
681    // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
682    // value for ECX.
683    RDPKRU,
684    WRPKRU,
685
686    // SSE42 string comparisons.
687    // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
688    // will emit one or two instructions based on which results are used. If
689    // flags and index/mask this allows us to use a single instruction since
690    // we won't have to pick and opcode for flags. Instead we can rely on the
691    // DAG to CSE everything and decide at isel.
692    PCMPISTR,
693    PCMPESTR,
694
695    // Test if in transactional execution.
696    XTEST,
697
698    // ERI instructions.
699    RSQRT28,
700    RSQRT28_SAE,
701    RSQRT28S,
702    RSQRT28S_SAE,
703    RCP28,
704    RCP28_SAE,
705    RCP28S,
706    RCP28S_SAE,
707    EXP2,
708    EXP2_SAE,
709
710    // Conversions between float and half-float.
711    CVTPS2PH,
712    CVTPS2PH_SAE,
713    CVTPH2PS,
714    CVTPH2PS_SAE,
715
716    // Masked version of above.
717    // SRC, RND, PASSTHRU, MASK
718    MCVTPS2PH,
719    MCVTPS2PH_SAE,
720
721    // Galois Field Arithmetic Instructions
722    GF2P8AFFINEINVQB,
723    GF2P8AFFINEQB,
724    GF2P8MULB,
725
726    // LWP insert record.
727    LWPINS,
728
729    // User level wait
730    UMWAIT,
731    TPAUSE,
732
733    // Enqueue Stores Instructions
734    ENQCMD,
735    ENQCMDS,
736
737    // For avx512-vp2intersect
738    VP2INTERSECT,
739
740    // User level interrupts - testui
741    TESTUI,
742
743    // Perform an FP80 add after changing precision control in FPCW.
744    FP80_ADD,
745
746    /// X86 strict FP compare instructions.
747    STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
748    STRICT_FCMPS,
749
750    // Vector packed double/float comparison.
751    STRICT_CMPP,
752
753    /// Vector comparison generating mask bits for fp and
754    /// integer signed and unsigned data types.
755    STRICT_CMPM,
756
757    // Vector float/double to signed/unsigned integer with truncation.
758    STRICT_CVTTP2SI,
759    STRICT_CVTTP2UI,
760
761    // Vector FP extend.
762    STRICT_VFPEXT,
763
764    // Vector FP round.
765    STRICT_VFPROUND,
766
767    // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
768    // Also used by the legacy (V)ROUND intrinsics where we mask out the
769    // scaling part of the immediate.
770    STRICT_VRNDSCALE,
771
772    // Vector signed/unsigned integer to float/double.
773    STRICT_CVTSI2P,
774    STRICT_CVTUI2P,
775
776    // Strict FMA nodes.
777    STRICT_FNMADD,
778    STRICT_FMSUB,
779    STRICT_FNMSUB,
780
781    // Conversions between float and half-float.
782    STRICT_CVTPS2PH,
783    STRICT_CVTPH2PS,
784
785    // Perform an FP80 add after changing precision control in FPCW.
786    STRICT_FP80_ADD,
787
788    // WARNING: Only add nodes here if they are strict FP nodes. Non-memory and
789    // non-strict FP nodes should be above FIRST_TARGET_STRICTFP_OPCODE.
790
791    // Compare and swap.
792    LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
793    LCMPXCHG8_DAG,
794    LCMPXCHG16_DAG,
795    LCMPXCHG16_SAVE_RBX_DAG,
796
797    /// LOCK-prefixed arithmetic read-modify-write instructions.
798    /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
799    LADD,
800    LSUB,
801    LOR,
802    LXOR,
803    LAND,
804    LBTS,
805    LBTC,
806    LBTR,
807    LBTS_RM,
808    LBTC_RM,
809    LBTR_RM,
810
811    /// RAO arithmetic instructions.
812    /// OUTCHAIN = AADD(INCHAIN, PTR, RHS)
813    AADD,
814    AOR,
815    AXOR,
816    AAND,
817
818    // Load, scalar_to_vector, and zero extend.
819    VZEXT_LOAD,
820
821    // extract_vector_elt, store.
822    VEXTRACT_STORE,
823
824    // scalar broadcast from memory.
825    VBROADCAST_LOAD,
826
827    // subvector broadcast from memory.
828    SUBV_BROADCAST_LOAD,
829
830    // Store FP control word into i16 memory.
831    FNSTCW16m,
832
833    // Load FP control word from i16 memory.
834    FLDCW16m,
835
836    // Store x87 FPU environment into memory.
837    FNSTENVm,
838
839    // Load x87 FPU environment from memory.
840    FLDENVm,
841
842    /// This instruction implements FP_TO_SINT with the
843    /// integer destination in memory and a FP reg source.  This corresponds
844    /// to the X86::FIST*m instructions and the rounding mode change stuff. It
845    /// has two inputs (token chain and address) and two outputs (int value
846    /// and token chain). Memory VT specifies the type to store to.
847    FP_TO_INT_IN_MEM,
848
849    /// This instruction implements SINT_TO_FP with the
850    /// integer source in memory and FP reg result.  This corresponds to the
851    /// X86::FILD*m instructions. It has two inputs (token chain and address)
852    /// and two outputs (FP value and token chain). The integer source type is
853    /// specified by the memory VT.
854    FILD,
855
856    /// This instruction implements a fp->int store from FP stack
857    /// slots. This corresponds to the fist instruction. It takes a
858    /// chain operand, value to store, address, and glue. The memory VT
859    /// specifies the type to store as.
860    FIST,
861
862    /// This instruction implements an extending load to FP stack slots.
863    /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
864    /// operand, and ptr to load from. The memory VT specifies the type to
865    /// load from.
866    FLD,
867
868    /// This instruction implements a truncating store from FP stack
869    /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
870    /// chain operand, value to store, address, and glue. The memory VT
871    /// specifies the type to store as.
872    FST,
873
874    /// These instructions grab the address of the next argument
875    /// from a va_list. (reads and modifies the va_list in memory)
876    VAARG_64,
877    VAARG_X32,
878
879    // Vector truncating store with unsigned/signed saturation
880    VTRUNCSTOREUS,
881    VTRUNCSTORES,
882    // Vector truncating masked store with unsigned/signed saturation
883    VMTRUNCSTOREUS,
884    VMTRUNCSTORES,
885
886    // X86 specific gather and scatter
887    MGATHER,
888    MSCATTER,
889
890    // Key locker nodes that produce flags.
891    AESENC128KL,
892    AESDEC128KL,
893    AESENC256KL,
894    AESDEC256KL,
895    AESENCWIDE128KL,
896    AESDECWIDE128KL,
897    AESENCWIDE256KL,
898    AESDECWIDE256KL,
899
900    /// Compare and Add if Condition is Met. Compare value in operand 2 with
901    /// value in memory of operand 1. If condition of operand 4 is met, add
902    /// value operand 3 to m32 and write new value in operand 1. Operand 2 is
903    /// always updated with the original value from operand 1.
904    CMPCCXADD,
905
906    // Save xmm argument registers to the stack, according to %al. An operator
907    // is needed so that this can be expanded with control flow.
908    VASTART_SAVE_XMM_REGS,
909
910    // WARNING: Do not add anything in the end unless you want the node to
911    // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
912    // opcodes will be thought as target memory ops!
913  };
914  } // end namespace X86ISD
915
916  namespace X86 {
917    /// Current rounding mode is represented in bits 11:10 of FPSR. These
918    /// values are same as corresponding constants for rounding mode used
919    /// in glibc.
920    enum RoundingMode {
921      rmToNearest   = 0,        // FE_TONEAREST
922      rmDownward    = 1 << 10,  // FE_DOWNWARD
923      rmUpward      = 2 << 10,  // FE_UPWARD
924      rmTowardZero  = 3 << 10,  // FE_TOWARDZERO
925      rmMask        = 3 << 10   // Bit mask selecting rounding mode
926    };
927  }
928
929  /// Define some predicates that are used for node matching.
930  namespace X86 {
931    /// Returns true if Elt is a constant zero or floating point constant +0.0.
932    bool isZeroNode(SDValue Elt);
933
934    /// Returns true of the given offset can be
935    /// fit into displacement field of the instruction.
936    bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
937                                      bool hasSymbolicDisplacement);
938
939    /// Determines whether the callee is required to pop its
940    /// own arguments. Callee pop is necessary to support tail calls.
941    bool isCalleePop(CallingConv::ID CallingConv,
942                     bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
943
944    /// If Op is a constant whose elements are all the same constant or
945    /// undefined, return true and return the constant value in \p SplatVal.
946    /// If we have undef bits that don't cover an entire element, we treat these
947    /// as zero if AllowPartialUndefs is set, else we fail and return false.
948    bool isConstantSplat(SDValue Op, APInt &SplatVal,
949                         bool AllowPartialUndefs = true);
950
951    /// Check if Op is a load operation that could be folded into some other x86
952    /// instruction as a memory operand. Example: vpaddd (%rdi), %xmm0, %xmm0.
953    bool mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget,
954                     bool AssumeSingleUse = false);
955
956    /// Check if Op is a load operation that could be folded into a vector splat
957    /// instruction as a memory operand. Example: vbroadcastss 16(%rdi), %xmm2.
958    bool mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
959                                         const X86Subtarget &Subtarget,
960                                         bool AssumeSingleUse = false);
961
962    /// Check if Op is a value that could be used to fold a store into some
963    /// other x86 instruction as a memory operand. Ex: pextrb $0, %xmm0, (%rdi).
964    bool mayFoldIntoStore(SDValue Op);
965
966    /// Check if Op is an operation that could be folded into a zero extend x86
967    /// instruction.
968    bool mayFoldIntoZeroExtend(SDValue Op);
969  } // end namespace X86
970
971  //===--------------------------------------------------------------------===//
972  //  X86 Implementation of the TargetLowering interface
973  class X86TargetLowering final : public TargetLowering {
974  public:
975    explicit X86TargetLowering(const X86TargetMachine &TM,
976                               const X86Subtarget &STI);
977
978    unsigned getJumpTableEncoding() const override;
979    bool useSoftFloat() const override;
980
981    void markLibCallAttributes(MachineFunction *MF, unsigned CC,
982                               ArgListTy &Args) const override;
983
984    MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
985      return MVT::i8;
986    }
987
988    const MCExpr *
989    LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
990                              const MachineBasicBlock *MBB, unsigned uid,
991                              MCContext &Ctx) const override;
992
993    /// Returns relocation base for the given PIC jumptable.
994    SDValue getPICJumpTableRelocBase(SDValue Table,
995                                     SelectionDAG &DAG) const override;
996    const MCExpr *
997    getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
998                                 unsigned JTI, MCContext &Ctx) const override;
999
1000    /// Return the desired alignment for ByVal aggregate
1001    /// function arguments in the caller parameter area. For X86, aggregates
1002    /// that contains are placed at 16-byte boundaries while the rest are at
1003    /// 4-byte boundaries.
1004    uint64_t getByValTypeAlignment(Type *Ty,
1005                                   const DataLayout &DL) const override;
1006
1007    EVT getOptimalMemOpType(const MemOp &Op,
1008                            const AttributeList &FuncAttributes) const override;
1009
1010    /// Returns true if it's safe to use load / store of the
1011    /// specified type to expand memcpy / memset inline. This is mostly true
1012    /// for all types except for some special cases. For example, on X86
1013    /// targets without SSE2 f64 load / store are done with fldl / fstpl which
1014    /// also does type conversion. Note the specified type doesn't have to be
1015    /// legal as the hook is used before type legalization.
1016    bool isSafeMemOpType(MVT VT) const override;
1017
1018    bool isMemoryAccessFast(EVT VT, Align Alignment) const;
1019
1020    /// Returns true if the target allows unaligned memory accesses of the
1021    /// specified type. Returns whether it is "fast" in the last argument.
1022    bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment,
1023                                        MachineMemOperand::Flags Flags,
1024                                        unsigned *Fast) const override;
1025
1026    /// This function returns true if the memory access is aligned or if the
1027    /// target allows this specific unaligned memory access. If the access is
1028    /// allowed, the optional final parameter returns a relative speed of the
1029    /// access (as defined by the target).
1030    bool allowsMemoryAccess(
1031        LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,
1032        Align Alignment,
1033        MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1034        unsigned *Fast = nullptr) const override;
1035
1036    bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
1037                            const MachineMemOperand &MMO,
1038                            unsigned *Fast) const {
1039      return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(),
1040                                MMO.getAlign(), MMO.getFlags(), Fast);
1041    }
1042
1043    /// Provide custom lowering hooks for some operations.
1044    ///
1045    SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
1046
1047    /// Replace the results of node with an illegal result
1048    /// type with new values built out of custom code.
1049    ///
1050    void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
1051                            SelectionDAG &DAG) const override;
1052
1053    SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
1054
1055    bool preferABDSToABSWithNSW(EVT VT) const override;
1056
1057    bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT,
1058                                   EVT ExtVT) const override;
1059
1060    bool isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode Cond,
1061                                           EVT VT) const override;
1062
1063    /// Return true if the target has native support for
1064    /// the specified value type and it is 'desirable' to use the type for the
1065    /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
1066    /// instruction encodings are longer and some i16 instructions are slow.
1067    bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
1068
1069    /// Return true if the target has native support for the
1070    /// specified value type and it is 'desirable' to use the type. e.g. On x86
1071    /// i16 is legal, but undesirable since i16 instruction encodings are longer
1072    /// and some i16 instructions are slow.
1073    bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
1074
1075    /// Return prefered fold type, Abs if this is a vector, AddAnd if its an
1076    /// integer, None otherwise.
1077    TargetLowering::AndOrSETCCFoldKind
1078    isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp,
1079                                       const SDNode *SETCC0,
1080                                       const SDNode *SETCC1) const override;
1081
1082    /// Return the newly negated expression if the cost is not expensive and
1083    /// set the cost in \p Cost to indicate that if it is cheaper or neutral to
1084    /// do the negation.
1085    SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
1086                                 bool LegalOperations, bool ForCodeSize,
1087                                 NegatibleCost &Cost,
1088                                 unsigned Depth) const override;
1089
1090    MachineBasicBlock *
1091    EmitInstrWithCustomInserter(MachineInstr &MI,
1092                                MachineBasicBlock *MBB) const override;
1093
1094    /// This method returns the name of a target specific DAG node.
1095    const char *getTargetNodeName(unsigned Opcode) const override;
1096
1097    /// Do not merge vector stores after legalization because that may conflict
1098    /// with x86-specific store splitting optimizations.
1099    bool mergeStoresAfterLegalization(EVT MemVT) const override {
1100      return !MemVT.isVector();
1101    }
1102
1103    bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
1104                          const MachineFunction &MF) const override;
1105
1106    bool isCheapToSpeculateCttz(Type *Ty) const override;
1107
1108    bool isCheapToSpeculateCtlz(Type *Ty) const override;
1109
1110    bool isCtlzFast() const override;
1111
1112    bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
1113      // If the pair to store is a mixture of float and int values, we will
1114      // save two bitwise instructions and one float-to-int instruction and
1115      // increase one store instruction. There is potentially a more
1116      // significant benefit because it avoids the float->int domain switch
1117      // for input value. So It is more likely a win.
1118      if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
1119          (LTy.isInteger() && HTy.isFloatingPoint()))
1120        return true;
1121      // If the pair only contains int values, we will save two bitwise
1122      // instructions and increase one store instruction (costing one more
1123      // store buffer). Since the benefit is more blurred so we leave
1124      // such pair out until we get testcase to prove it is a win.
1125      return false;
1126    }
1127
1128    bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
1129
1130    bool hasAndNotCompare(SDValue Y) const override;
1131
1132    bool hasAndNot(SDValue Y) const override;
1133
1134    bool hasBitTest(SDValue X, SDValue Y) const override;
1135
1136    bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
1137        SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
1138        unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1139        SelectionDAG &DAG) const override;
1140
1141    unsigned preferedOpcodeForCmpEqPiecesOfOperand(
1142        EVT VT, unsigned ShiftOpc, bool MayTransformRotate,
1143        const APInt &ShiftOrRotateAmt,
1144        const std::optional<APInt> &AndMask) const override;
1145
1146    bool preferScalarizeSplat(SDNode *N) const override;
1147
1148    bool shouldFoldConstantShiftPairToMask(const SDNode *N,
1149                                           CombineLevel Level) const override;
1150
1151    bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
1152
1153    bool
1154    shouldTransformSignedTruncationCheck(EVT XVT,
1155                                         unsigned KeptBits) const override {
1156      // For vectors, we don't have a preference..
1157      if (XVT.isVector())
1158        return false;
1159
1160      auto VTIsOk = [](EVT VT) -> bool {
1161        return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
1162               VT == MVT::i64;
1163      };
1164
1165      // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
1166      // XVT will be larger than KeptBitsVT.
1167      MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
1168      return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
1169    }
1170
1171    ShiftLegalizationStrategy
1172    preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
1173                                       unsigned ExpansionFactor) const override;
1174
1175    bool shouldSplatInsEltVarIndex(EVT VT) const override;
1176
1177    bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override {
1178      // Converting to sat variants holds little benefit on X86 as we will just
1179      // need to saturate the value back using fp arithmatic.
1180      return Op != ISD::FP_TO_UINT_SAT && isOperationLegalOrCustom(Op, VT);
1181    }
1182
1183    bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
1184      return VT.isScalarInteger();
1185    }
1186
1187    /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
1188    MVT hasFastEqualityCompare(unsigned NumBits) const override;
1189
1190    /// Return the value type to use for ISD::SETCC.
1191    EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
1192                           EVT VT) const override;
1193
1194    bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
1195                                      const APInt &DemandedElts,
1196                                      TargetLoweringOpt &TLO) const override;
1197
1198    /// Determine which of the bits specified in Mask are known to be either
1199    /// zero or one and return them in the KnownZero/KnownOne bitsets.
1200    void computeKnownBitsForTargetNode(const SDValue Op,
1201                                       KnownBits &Known,
1202                                       const APInt &DemandedElts,
1203                                       const SelectionDAG &DAG,
1204                                       unsigned Depth = 0) const override;
1205
1206    /// Determine the number of bits in the operation that are sign bits.
1207    unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
1208                                             const APInt &DemandedElts,
1209                                             const SelectionDAG &DAG,
1210                                             unsigned Depth) const override;
1211
1212    bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
1213                                                 const APInt &DemandedElts,
1214                                                 APInt &KnownUndef,
1215                                                 APInt &KnownZero,
1216                                                 TargetLoweringOpt &TLO,
1217                                                 unsigned Depth) const override;
1218
1219    bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op,
1220                                                    const APInt &DemandedElts,
1221                                                    unsigned MaskIndex,
1222                                                    TargetLoweringOpt &TLO,
1223                                                    unsigned Depth) const;
1224
1225    bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1226                                           const APInt &DemandedBits,
1227                                           const APInt &DemandedElts,
1228                                           KnownBits &Known,
1229                                           TargetLoweringOpt &TLO,
1230                                           unsigned Depth) const override;
1231
1232    SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
1233        SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
1234        SelectionDAG &DAG, unsigned Depth) const override;
1235
1236    bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(
1237        SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
1238        bool PoisonOnly, unsigned Depth) const override;
1239
1240    bool canCreateUndefOrPoisonForTargetNode(
1241        SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
1242        bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override;
1243
1244    bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts,
1245                                   APInt &UndefElts, const SelectionDAG &DAG,
1246                                   unsigned Depth) const override;
1247
1248    bool isTargetCanonicalConstantNode(SDValue Op) const override {
1249      // Peek through bitcasts/extracts/inserts to see if we have a broadcast
1250      // vector from memory.
1251      while (Op.getOpcode() == ISD::BITCAST ||
1252             Op.getOpcode() == ISD::EXTRACT_SUBVECTOR ||
1253             (Op.getOpcode() == ISD::INSERT_SUBVECTOR &&
1254              Op.getOperand(0).isUndef()))
1255        Op = Op.getOperand(Op.getOpcode() == ISD::INSERT_SUBVECTOR ? 1 : 0);
1256
1257      return Op.getOpcode() == X86ISD::VBROADCAST_LOAD ||
1258             TargetLowering::isTargetCanonicalConstantNode(Op);
1259    }
1260
1261    const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
1262
1263    SDValue unwrapAddress(SDValue N) const override;
1264
1265    SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
1266
1267    bool ExpandInlineAsm(CallInst *CI) const override;
1268
1269    ConstraintType getConstraintType(StringRef Constraint) const override;
1270
1271    /// Examine constraint string and operand type and determine a weight value.
1272    /// The operand object must already have been set up with the operand type.
1273    ConstraintWeight
1274      getSingleConstraintMatchWeight(AsmOperandInfo &Info,
1275                                     const char *Constraint) const override;
1276
1277    const char *LowerXConstraint(EVT ConstraintVT) const override;
1278
1279    /// Lower the specified operand into the Ops vector. If it is invalid, don't
1280    /// add anything to Ops. If hasMemory is true it means one of the asm
1281    /// constraint of the inline asm instruction being processed is 'm'.
1282    void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
1283                                      std::vector<SDValue> &Ops,
1284                                      SelectionDAG &DAG) const override;
1285
1286    InlineAsm::ConstraintCode
1287    getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1288      if (ConstraintCode == "v")
1289        return InlineAsm::ConstraintCode::v;
1290      return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1291    }
1292
1293    /// Handle Lowering flag assembly outputs.
1294    SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
1295                                        const SDLoc &DL,
1296                                        const AsmOperandInfo &Constraint,
1297                                        SelectionDAG &DAG) const override;
1298
1299    /// Given a physical register constraint
1300    /// (e.g. {edx}), return the register number and the register class for the
1301    /// register.  This should only be used for C_Register constraints.  On
1302    /// error, this returns a register number of 0.
1303    std::pair<unsigned, const TargetRegisterClass *>
1304    getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1305                                 StringRef Constraint, MVT VT) const override;
1306
1307    /// Return true if the addressing mode represented
1308    /// by AM is legal for this target, for a load/store of the specified type.
1309    bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
1310                               Type *Ty, unsigned AS,
1311                               Instruction *I = nullptr) const override;
1312
1313    /// Return true if the specified immediate is legal
1314    /// icmp immediate, that is the target has icmp instructions which can
1315    /// compare a register against the immediate without having to materialize
1316    /// the immediate into a register.
1317    bool isLegalICmpImmediate(int64_t Imm) const override;
1318
1319    /// Return true if the specified immediate is legal
1320    /// add immediate, that is the target has add instructions which can
1321    /// add a register and the immediate without having to materialize
1322    /// the immediate into a register.
1323    bool isLegalAddImmediate(int64_t Imm) const override;
1324
1325    bool isLegalStoreImmediate(int64_t Imm) const override;
1326
1327    /// This is used to enable splatted operand transforms for vector shifts
1328    /// and vector funnel shifts.
1329    bool isVectorShiftByScalarCheap(Type *Ty) const override;
1330
1331    /// Add x86-specific opcodes to the default list.
1332    bool isBinOp(unsigned Opcode) const override;
1333
1334    /// Returns true if the opcode is a commutative binary operation.
1335    bool isCommutativeBinOp(unsigned Opcode) const override;
1336
1337    /// Return true if it's free to truncate a value of
1338    /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
1339    /// register EAX to i16 by referencing its sub-register AX.
1340    bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
1341    bool isTruncateFree(EVT VT1, EVT VT2) const override;
1342
1343    bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
1344
1345    /// Return true if any actual instruction that defines a
1346    /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
1347    /// register. This does not necessarily include registers defined in
1348    /// unknown ways, such as incoming arguments, or copies from unknown
1349    /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
1350    /// does not necessarily apply to truncate instructions. e.g. on x86-64,
1351    /// all instructions that define 32-bit values implicit zero-extend the
1352    /// result out to 64 bits.
1353    bool isZExtFree(Type *Ty1, Type *Ty2) const override;
1354    bool isZExtFree(EVT VT1, EVT VT2) const override;
1355    bool isZExtFree(SDValue Val, EVT VT2) const override;
1356
1357    bool shouldSinkOperands(Instruction *I,
1358                            SmallVectorImpl<Use *> &Ops) const override;
1359    bool shouldConvertPhiType(Type *From, Type *To) const override;
1360
1361    /// Return true if folding a vector load into ExtVal (a sign, zero, or any
1362    /// extend node) is profitable.
1363    bool isVectorLoadExtDesirable(SDValue) const override;
1364
1365    /// Return true if an FMA operation is faster than a pair of fmul and fadd
1366    /// instructions. fmuladd intrinsics will be expanded to FMAs when this
1367    /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
1368    bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
1369                                    EVT VT) const override;
1370
1371    /// Return true if it's profitable to narrow operations of type SrcVT to
1372    /// DestVT. e.g. on x86, it's profitable to narrow from i32 to i8 but not
1373    /// from i32 to i16.
1374    bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const override;
1375
1376    bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
1377                                              EVT VT) const override;
1378
1379    /// Given an intrinsic, checks if on the target the intrinsic will need to map
1380    /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1381    /// true and stores the intrinsic information into the IntrinsicInfo that was
1382    /// passed to the function.
1383    bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
1384                            MachineFunction &MF,
1385                            unsigned Intrinsic) const override;
1386
1387    /// Returns true if the target can instruction select the
1388    /// specified FP immediate natively. If false, the legalizer will
1389    /// materialize the FP immediate as a load from a constant pool.
1390    bool isFPImmLegal(const APFloat &Imm, EVT VT,
1391                      bool ForCodeSize) const override;
1392
1393    /// Targets can use this to indicate that they only support *some*
1394    /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1395    /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1396    /// be legal.
1397    bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1398
1399    /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1400    /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1401    /// constant pool entry.
1402    bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1403
1404    /// Returns true if lowering to a jump table is allowed.
1405    bool areJTsAllowed(const Function *Fn) const override;
1406
1407    MVT getPreferredSwitchConditionType(LLVMContext &Context,
1408                                        EVT ConditionVT) const override;
1409
1410    /// If true, then instruction selection should
1411    /// seek to shrink the FP constant of the specified type to a smaller type
1412    /// in order to save space and / or reduce runtime.
1413    bool ShouldShrinkFPConstant(EVT VT) const override;
1414
1415    /// Return true if we believe it is correct and profitable to reduce the
1416    /// load node to a smaller type.
1417    bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1418                               EVT NewVT) const override;
1419
1420    /// Return true if the specified scalar FP type is computed in an SSE
1421    /// register, not on the X87 floating point stack.
1422    bool isScalarFPTypeInSSEReg(EVT VT) const;
1423
1424    /// Returns true if it is beneficial to convert a load of a constant
1425    /// to just the constant itself.
1426    bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1427                                           Type *Ty) const override;
1428
1429    bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override;
1430
1431    bool convertSelectOfConstantsToMath(EVT VT) const override;
1432
1433    bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
1434                                SDValue C) const override;
1435
1436    /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1437    /// with this index.
1438    bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1439                                 unsigned Index) const override;
1440
1441    /// Scalar ops always have equal or better analysis/performance/power than
1442    /// the vector equivalent, so this always makes sense if the scalar op is
1443    /// supported.
1444    bool shouldScalarizeBinop(SDValue) const override;
1445
1446    /// Extract of a scalar FP value from index 0 of a vector is free.
1447    bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
1448      EVT EltVT = VT.getScalarType();
1449      return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
1450    }
1451
1452    /// Overflow nodes should get combined/lowered to optimal instructions
1453    /// (they should allow eliminating explicit compares by getting flags from
1454    /// math ops).
1455    bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
1456                              bool MathUsed) const override;
1457
1458    bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem,
1459                                      unsigned AddrSpace) const override {
1460      // If we can replace more than 2 scalar stores, there will be a reduction
1461      // in instructions even after we add a vector constant load.
1462      return IsZero || NumElem > 2;
1463    }
1464
1465    bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
1466                                 const SelectionDAG &DAG,
1467                                 const MachineMemOperand &MMO) const override;
1468
1469    /// Intel processors have a unified instruction and data cache
1470    const char * getClearCacheBuiltinName() const override {
1471      return nullptr; // nothing to do, move along.
1472    }
1473
1474    Register getRegisterByName(const char* RegName, LLT VT,
1475                               const MachineFunction &MF) const override;
1476
1477    /// If a physical register, this returns the register that receives the
1478    /// exception address on entry to an EH pad.
1479    Register
1480    getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1481
1482    /// If a physical register, this returns the register that receives the
1483    /// exception typeid on entry to a landing pad.
1484    Register
1485    getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1486
1487    bool needsFixedCatchObjects() const override;
1488
1489    /// This method returns a target specific FastISel object,
1490    /// or null if the target does not support "fast" ISel.
1491    FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1492                             const TargetLibraryInfo *libInfo) const override;
1493
1494    /// If the target has a standard location for the stack protector cookie,
1495    /// returns the address of that location. Otherwise, returns nullptr.
1496    Value *getIRStackGuard(IRBuilderBase &IRB) const override;
1497
1498    bool useLoadStackGuardNode() const override;
1499    bool useStackGuardXorFP() const override;
1500    void insertSSPDeclarations(Module &M) const override;
1501    Value *getSDagStackGuard(const Module &M) const override;
1502    Function *getSSPStackGuardCheck(const Module &M) const override;
1503    SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1504                                const SDLoc &DL) const override;
1505
1506
1507    /// Return true if the target stores SafeStack pointer at a fixed offset in
1508    /// some non-standard address space, and populates the address space and
1509    /// offset as appropriate.
1510    Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;
1511
1512    std::pair<SDValue, SDValue> BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL,
1513                                          SDValue Chain, SDValue Pointer,
1514                                          MachinePointerInfo PtrInfo,
1515                                          Align Alignment,
1516                                          SelectionDAG &DAG) const;
1517
1518    /// Customize the preferred legalization strategy for certain types.
1519    LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1520
1521    bool softPromoteHalfType() const override { return true; }
1522
1523    MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1524                                      EVT VT) const override;
1525
1526    unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1527                                           CallingConv::ID CC,
1528                                           EVT VT) const override;
1529
1530    unsigned getVectorTypeBreakdownForCallingConv(
1531        LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
1532        unsigned &NumIntermediates, MVT &RegisterVT) const override;
1533
1534    bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1535
1536    bool supportSwiftError() const override;
1537
1538    bool supportKCFIBundles() const override { return true; }
1539
1540    MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,
1541                                MachineBasicBlock::instr_iterator &MBBI,
1542                                const TargetInstrInfo *TII) const override;
1543
1544    bool hasStackProbeSymbol(const MachineFunction &MF) const override;
1545    bool hasInlineStackProbe(const MachineFunction &MF) const override;
1546    StringRef getStackProbeSymbolName(const MachineFunction &MF) const override;
1547
1548    unsigned getStackProbeSize(const MachineFunction &MF) const;
1549
1550    bool hasVectorBlend() const override { return true; }
1551
1552    unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1553
1554    bool isInlineAsmTargetBranch(const SmallVectorImpl<StringRef> &AsmStrs,
1555                                 unsigned OpNo) const override;
1556
1557    /// Lower interleaved load(s) into target specific
1558    /// instructions/intrinsics.
1559    bool lowerInterleavedLoad(LoadInst *LI,
1560                              ArrayRef<ShuffleVectorInst *> Shuffles,
1561                              ArrayRef<unsigned> Indices,
1562                              unsigned Factor) const override;
1563
1564    /// Lower interleaved store(s) into target specific
1565    /// instructions/intrinsics.
1566    bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1567                               unsigned Factor) const override;
1568
1569    SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr,
1570                                   int JTI, SelectionDAG &DAG) const override;
1571
1572    Align getPrefLoopAlignment(MachineLoop *ML) const override;
1573
1574    EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const override {
1575      if (VT == MVT::f80)
1576        return EVT::getIntegerVT(Context, 96);
1577      return TargetLoweringBase::getTypeToTransformTo(Context, VT);
1578    }
1579
1580  protected:
1581    std::pair<const TargetRegisterClass *, uint8_t>
1582    findRepresentativeClass(const TargetRegisterInfo *TRI,
1583                            MVT VT) const override;
1584
1585  private:
1586    /// Keep a reference to the X86Subtarget around so that we can
1587    /// make the right decision when generating code for different targets.
1588    const X86Subtarget &Subtarget;
1589
1590    /// A list of legal FP immediates.
1591    std::vector<APFloat> LegalFPImmediates;
1592
1593    /// Indicate that this x86 target can instruction
1594    /// select the specified FP immediate natively.
1595    void addLegalFPImmediate(const APFloat& Imm) {
1596      LegalFPImmediates.push_back(Imm);
1597    }
1598
1599    SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
1600                            CallingConv::ID CallConv, bool isVarArg,
1601                            const SmallVectorImpl<ISD::InputArg> &Ins,
1602                            const SDLoc &dl, SelectionDAG &DAG,
1603                            SmallVectorImpl<SDValue> &InVals,
1604                            uint32_t *RegMask) const;
1605    SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1606                             const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1607                             const SDLoc &dl, SelectionDAG &DAG,
1608                             const CCValAssign &VA, MachineFrameInfo &MFI,
1609                             unsigned i) const;
1610    SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1611                             const SDLoc &dl, SelectionDAG &DAG,
1612                             const CCValAssign &VA,
1613                             ISD::ArgFlagsTy Flags, bool isByval) const;
1614
1615    // Call lowering helpers.
1616
1617    /// Check whether the call is eligible for tail call optimization. Targets
1618    /// that want to do tail call optimization should implement this function.
1619    bool IsEligibleForTailCallOptimization(
1620        SDValue Callee, CallingConv::ID CalleeCC, bool IsCalleeStackStructRet,
1621        bool isVarArg, Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs,
1622        const SmallVectorImpl<SDValue> &OutVals,
1623        const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const;
1624    SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1625                                    SDValue Chain, bool IsTailCall,
1626                                    bool Is64Bit, int FPDiff,
1627                                    const SDLoc &dl) const;
1628
1629    unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1630                                         SelectionDAG &DAG) const;
1631
1632    unsigned getAddressSpace() const;
1633
1634    SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned,
1635                            SDValue &Chain) const;
1636    SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const;
1637
1638    SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1639    SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1640    SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1641    SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1642
1643    unsigned getGlobalWrapperKind(const GlobalValue *GV,
1644                                  const unsigned char OpFlags) const;
1645    SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1646    SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1647    SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1648    SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1649    SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1650
1651    /// Creates target global address or external symbol nodes for calls or
1652    /// other uses.
1653    SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1654                                  bool ForCall) const;
1655
1656    SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1657    SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1658    SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1659    SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1660    SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1661    SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const;
1662    SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1663    SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1664    SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1665    SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1666    SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1667    SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1668    SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1669    SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1670    SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1671    SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1672    SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1673    SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1674    SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1675    SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1676    SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1677    SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1678    SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1679    SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1680    SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1681    SDValue LowerGET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const;
1682    SDValue LowerSET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const;
1683    SDValue LowerRESET_FPENV(SDValue Op, SelectionDAG &DAG) const;
1684    SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1685    SDValue LowerWin64_FP_TO_INT128(SDValue Op, SelectionDAG &DAG,
1686                                    SDValue &Chain) const;
1687    SDValue LowerWin64_INT128_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1688    SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const;
1689    SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1690    SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const;
1691    SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1692    SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1693    SDValue LowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const;
1694
1695    SDValue
1696    LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1697                         const SmallVectorImpl<ISD::InputArg> &Ins,
1698                         const SDLoc &dl, SelectionDAG &DAG,
1699                         SmallVectorImpl<SDValue> &InVals) const override;
1700    SDValue LowerCall(CallLoweringInfo &CLI,
1701                      SmallVectorImpl<SDValue> &InVals) const override;
1702
1703    SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1704                        const SmallVectorImpl<ISD::OutputArg> &Outs,
1705                        const SmallVectorImpl<SDValue> &OutVals,
1706                        const SDLoc &dl, SelectionDAG &DAG) const override;
1707
1708    bool supportSplitCSR(MachineFunction *MF) const override {
1709      return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1710          MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1711    }
1712    void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1713    void insertCopiesSplitCSR(
1714      MachineBasicBlock *Entry,
1715      const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1716
1717    bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1718
1719    bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1720
1721    EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1722                            ISD::NodeType ExtendKind) const override;
1723
1724    bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1725                        bool isVarArg,
1726                        const SmallVectorImpl<ISD::OutputArg> &Outs,
1727                        LLVMContext &Context) const override;
1728
1729    const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1730    ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
1731
1732    TargetLoweringBase::AtomicExpansionKind
1733    shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
1734    TargetLoweringBase::AtomicExpansionKind
1735    shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1736    TargetLoweringBase::AtomicExpansionKind
1737    shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1738    TargetLoweringBase::AtomicExpansionKind
1739    shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const;
1740    void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
1741    void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
1742
1743    LoadInst *
1744    lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1745
1746    bool needsCmpXchgNb(Type *MemType) const;
1747
1748    void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1749                                MachineBasicBlock *DispatchBB, int FI) const;
1750
1751    // Utility function to emit the low-level va_arg code for X86-64.
1752    MachineBasicBlock *
1753    EmitVAARGWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const;
1754
1755    /// Utility function to emit the xmm reg save portion of va_start.
1756    MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1757                                                 MachineInstr &MI2,
1758                                                 MachineBasicBlock *BB) const;
1759
1760    MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1761                                         MachineBasicBlock *BB) const;
1762
1763    MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1764                                           MachineBasicBlock *BB) const;
1765
1766    MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1767                                            MachineBasicBlock *BB) const;
1768
1769    MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI,
1770                                               MachineBasicBlock *BB) const;
1771
1772    MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1773                                          MachineBasicBlock *BB) const;
1774
1775    MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1776                                          MachineBasicBlock *BB) const;
1777
1778    MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI,
1779                                                MachineBasicBlock *BB) const;
1780
1781    MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1782                                        MachineBasicBlock *MBB) const;
1783
1784    void emitSetJmpShadowStackFix(MachineInstr &MI,
1785                                  MachineBasicBlock *MBB) const;
1786
1787    MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1788                                         MachineBasicBlock *MBB) const;
1789
1790    MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1791                                                 MachineBasicBlock *MBB) const;
1792
1793    MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1794                                             MachineBasicBlock *MBB) const;
1795
1796    /// Emit flags for the given setcc condition and operands. Also returns the
1797    /// corresponding X86 condition code constant in X86CC.
1798    SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC,
1799                              const SDLoc &dl, SelectionDAG &DAG,
1800                              SDValue &X86CC) const;
1801
1802    bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst,
1803                                             SDValue IntPow2) const override;
1804
1805    /// Check if replacement of SQRT with RSQRT should be disabled.
1806    bool isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const override;
1807
1808    /// Use rsqrt* to speed up sqrt calculations.
1809    SDValue getSqrtEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1810                            int &RefinementSteps, bool &UseOneConstNR,
1811                            bool Reciprocal) const override;
1812
1813    /// Use rcp* to speed up fdiv calculations.
1814    SDValue getRecipEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1815                             int &RefinementSteps) const override;
1816
1817    /// Reassociate floating point divisions into multiply by reciprocal.
1818    unsigned combineRepeatedFPDivisors() const override;
1819
1820    SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1821                          SmallVectorImpl<SDNode *> &Created) const override;
1822
1823    SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
1824                    SDValue V2) const;
1825  };
1826
1827  namespace X86 {
1828    FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1829                             const TargetLibraryInfo *libInfo);
1830  } // end namespace X86
1831
1832  // X86 specific Gather/Scatter nodes.
1833  // The class has the same order of operands as MaskedGatherScatterSDNode for
1834  // convenience.
1835  class X86MaskedGatherScatterSDNode : public MemIntrinsicSDNode {
1836  public:
1837    // This is a intended as a utility and should never be directly created.
1838    X86MaskedGatherScatterSDNode() = delete;
1839    ~X86MaskedGatherScatterSDNode() = delete;
1840
1841    const SDValue &getBasePtr() const { return getOperand(3); }
1842    const SDValue &getIndex()   const { return getOperand(4); }
1843    const SDValue &getMask()    const { return getOperand(2); }
1844    const SDValue &getScale()   const { return getOperand(5); }
1845
1846    static bool classof(const SDNode *N) {
1847      return N->getOpcode() == X86ISD::MGATHER ||
1848             N->getOpcode() == X86ISD::MSCATTER;
1849    }
1850  };
1851
1852  class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1853  public:
1854    const SDValue &getPassThru() const { return getOperand(1); }
1855
1856    static bool classof(const SDNode *N) {
1857      return N->getOpcode() == X86ISD::MGATHER;
1858    }
1859  };
1860
1861  class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1862  public:
1863    const SDValue &getValue() const { return getOperand(1); }
1864
1865    static bool classof(const SDNode *N) {
1866      return N->getOpcode() == X86ISD::MSCATTER;
1867    }
1868  };
1869
1870  /// Generate unpacklo/unpackhi shuffle mask.
1871  void createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask, bool Lo,
1872                               bool Unary);
1873
1874  /// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation
1875  /// imposed by AVX and specific to the unary pattern. Example:
1876  /// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3>
1877  /// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7>
1878  void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo);
1879
1880} // end namespace llvm
1881
1882#endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
1883