1119610Sache/*===------------- avx512ifmavlintrin.h - IFMA intrinsics ------------------===
2119610Sache *
3119610Sache *
4119610Sache * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5119610Sache * See https://llvm.org/LICENSE.txt for license information.
6119610Sache * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7119610Sache *
8119610Sache *===-----------------------------------------------------------------------===
9119610Sache */
10119610Sache#ifndef __IMMINTRIN_H
11119610Sache#error "Never use <avx512ifmavlintrin.h> directly; include <immintrin.h> instead."
12119610Sache#endif
13119610Sache
14119610Sache#ifndef __IFMAVLINTRIN_H
15119610Sache#define __IFMAVLINTRIN_H
16119610Sache
17119610Sache/* Define the default attributes for the functions in this file. */
18119610Sache#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl"), __min_vector_width__(128)))
19119610Sache#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl"), __min_vector_width__(256)))
20119610Sache
21119610Sache#define _mm_madd52hi_epu64(X, Y, Z)                                            \
22119610Sache  ((__m128i)__builtin_ia32_vpmadd52huq128((__v2di)(X), (__v2di)(Y),            \
23119610Sache                                          (__v2di)(Z)))
24119610Sache
25119610Sache#define _mm256_madd52hi_epu64(X, Y, Z)                                         \
26119610Sache  ((__m256i)__builtin_ia32_vpmadd52huq256((__v4di)(X), (__v4di)(Y),            \
27119610Sache                                          (__v4di)(Z)))
28119610Sache
29119610Sache#define _mm_madd52lo_epu64(X, Y, Z)                                            \
30119610Sache  ((__m128i)__builtin_ia32_vpmadd52luq128((__v2di)(X), (__v2di)(Y),            \
31119610Sache                                          (__v2di)(Z)))
32119610Sache
33119610Sache#define _mm256_madd52lo_epu64(X, Y, Z)                                         \
34119610Sache  ((__m256i)__builtin_ia32_vpmadd52luq256((__v4di)(X), (__v4di)(Y),            \
35119610Sache                                          (__v4di)(Z)))
36119610Sache
37119610Sachestatic __inline__ __m128i __DEFAULT_FN_ATTRS128
38119610Sache_mm_mask_madd52hi_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
39119610Sache{
40119610Sache  return (__m128i)__builtin_ia32_selectq_128(__M,
41119610Sache                                      (__v2di)_mm_madd52hi_epu64(__W, __X, __Y),
42119610Sache                                      (__v2di)__W);
43119610Sache}
44119610Sache
45119610Sachestatic __inline__ __m128i __DEFAULT_FN_ATTRS128
46119610Sache_mm_maskz_madd52hi_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z)
47119610Sache{
48119610Sache  return (__m128i)__builtin_ia32_selectq_128(__M,
49119610Sache                                      (__v2di)_mm_madd52hi_epu64(__X, __Y, __Z),
50119610Sache                                      (__v2di)_mm_setzero_si128());
51119610Sache}
52119610Sache
53119610Sachestatic __inline__ __m256i __DEFAULT_FN_ATTRS256
54119610Sache_mm256_mask_madd52hi_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
55119610Sache{
56119610Sache  return (__m256i)__builtin_ia32_selectq_256(__M,
57119610Sache                                   (__v4di)_mm256_madd52hi_epu64(__W, __X, __Y),
58119610Sache                                   (__v4di)__W);
59119610Sache}
60119610Sache
61119610Sachestatic __inline__ __m256i __DEFAULT_FN_ATTRS256
62119610Sache_mm256_maskz_madd52hi_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z)
63119610Sache{
64119610Sache  return (__m256i)__builtin_ia32_selectq_256(__M,
65119610Sache                                   (__v4di)_mm256_madd52hi_epu64(__X, __Y, __Z),
66119610Sache                                   (__v4di)_mm256_setzero_si256());
67119610Sache}
68119610Sache
69119610Sachestatic __inline__ __m128i __DEFAULT_FN_ATTRS128
70119610Sache_mm_mask_madd52lo_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
71119610Sache{
72119610Sache  return (__m128i)__builtin_ia32_selectq_128(__M,
73119610Sache                                      (__v2di)_mm_madd52lo_epu64(__W, __X, __Y),
74119610Sache                                      (__v2di)__W);
75119610Sache}
76119610Sache
77119610Sachestatic __inline__ __m128i __DEFAULT_FN_ATTRS128
78119610Sache_mm_maskz_madd52lo_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z)
79119610Sache{
80119610Sache  return (__m128i)__builtin_ia32_selectq_128(__M,
81119610Sache                                      (__v2di)_mm_madd52lo_epu64(__X, __Y, __Z),
82119610Sache                                      (__v2di)_mm_setzero_si128());
83119610Sache}
84119610Sache
85119610Sachestatic __inline__ __m256i __DEFAULT_FN_ATTRS256
86119610Sache_mm256_mask_madd52lo_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
87119610Sache{
88119610Sache  return (__m256i)__builtin_ia32_selectq_256(__M,
89119610Sache                                   (__v4di)_mm256_madd52lo_epu64(__W, __X, __Y),
90119610Sache                                   (__v4di)__W);
91119610Sache}
92119610Sache
93119610Sachestatic __inline__ __m256i __DEFAULT_FN_ATTRS256
94119610Sache_mm256_maskz_madd52lo_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z)
95119610Sache{
96119610Sache  return (__m256i)__builtin_ia32_selectq_256(__M,
97119610Sache                                   (__v4di)_mm256_madd52lo_epu64(__X, __Y, __Z),
98119610Sache                                   (__v4di)_mm256_setzero_si256());
99119610Sache}
100119610Sache
101119610Sache
102119610Sache#undef __DEFAULT_FN_ATTRS128
103119610Sache#undef __DEFAULT_FN_ATTRS256
104119610Sache
105119610Sache#endif
106119610Sache