1234285Sdim/*===------------- avx512ifmavlintrin.h - IFMA intrinsics ------------------=== 2234285Sdim * 3234285Sdim * 4234285Sdim * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5234285Sdim * See https://llvm.org/LICENSE.txt for license information. 6234285Sdim * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7234285Sdim * 8234285Sdim *===-----------------------------------------------------------------------=== 9234285Sdim */ 10234285Sdim#ifndef __IMMINTRIN_H 11234285Sdim#error "Never use <avx512ifmavlintrin.h> directly; include <immintrin.h> instead." 12234285Sdim#endif 13234285Sdim 14280031Sdim#ifndef __IFMAVLINTRIN_H 15280031Sdim#define __IFMAVLINTRIN_H 16234285Sdim 17234285Sdim/* Define the default attributes for the functions in this file. */ 18234285Sdim#define __DEFAULT_FN_ATTRS128 \ 19234285Sdim __attribute__((__always_inline__, __nodebug__, \ 20234285Sdim __target__("avx512ifma,avx512vl,no-evex512"), \ 21234285Sdim __min_vector_width__(128))) 22234285Sdim#define __DEFAULT_FN_ATTRS256 \ 23234285Sdim __attribute__((__always_inline__, __nodebug__, \ 24234285Sdim __target__("avx512ifma,avx512vl,no-evex512"), \ 25234285Sdim __min_vector_width__(256))) 26234285Sdim 27234285Sdim#define _mm_madd52hi_epu64(X, Y, Z) \ 28288943Sdim ((__m128i)__builtin_ia32_vpmadd52huq128((__v2di)(X), (__v2di)(Y), \ 29288943Sdim (__v2di)(Z))) 30288943Sdim 31288943Sdim#define _mm256_madd52hi_epu64(X, Y, Z) \ 32288943Sdim ((__m256i)__builtin_ia32_vpmadd52huq256((__v4di)(X), (__v4di)(Y), \ 33288943Sdim (__v4di)(Z))) 34234285Sdim 35234285Sdim#define _mm_madd52lo_epu64(X, Y, Z) \ 36276479Sdim ((__m128i)__builtin_ia32_vpmadd52luq128((__v2di)(X), (__v2di)(Y), \ 37234285Sdim (__v2di)(Z))) 38234285Sdim 39234285Sdim#define _mm256_madd52lo_epu64(X, Y, Z) \ 40276479Sdim ((__m256i)__builtin_ia32_vpmadd52luq256((__v4di)(X), (__v4di)(Y), \ 41276479Sdim (__v4di)(Z))) 42234285Sdim 43276479Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 44234285Sdim_mm_mask_madd52hi_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) 45296417Sdim{ 46296417Sdim return (__m128i)__builtin_ia32_selectq_128(__M, 47296417Sdim (__v2di)_mm_madd52hi_epu64(__W, __X, __Y), 48296417Sdim (__v2di)__W); 49234285Sdim} 50234285Sdim 51234285Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 52276479Sdim_mm_maskz_madd52hi_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) 53234285Sdim{ 54234285Sdim return (__m128i)__builtin_ia32_selectq_128(__M, 55276479Sdim (__v2di)_mm_madd52hi_epu64(__X, __Y, __Z), 56234285Sdim (__v2di)_mm_setzero_si128()); 57234285Sdim} 58234285Sdim 59234285Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 60234285Sdim_mm256_mask_madd52hi_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) 61234285Sdim{ 62234285Sdim return (__m256i)__builtin_ia32_selectq_256(__M, 63 (__v4di)_mm256_madd52hi_epu64(__W, __X, __Y), 64 (__v4di)__W); 65} 66 67static __inline__ __m256i __DEFAULT_FN_ATTRS256 68_mm256_maskz_madd52hi_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) 69{ 70 return (__m256i)__builtin_ia32_selectq_256(__M, 71 (__v4di)_mm256_madd52hi_epu64(__X, __Y, __Z), 72 (__v4di)_mm256_setzero_si256()); 73} 74 75static __inline__ __m128i __DEFAULT_FN_ATTRS128 76_mm_mask_madd52lo_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) 77{ 78 return (__m128i)__builtin_ia32_selectq_128(__M, 79 (__v2di)_mm_madd52lo_epu64(__W, __X, __Y), 80 (__v2di)__W); 81} 82 83static __inline__ __m128i __DEFAULT_FN_ATTRS128 84_mm_maskz_madd52lo_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) 85{ 86 return (__m128i)__builtin_ia32_selectq_128(__M, 87 (__v2di)_mm_madd52lo_epu64(__X, __Y, __Z), 88 (__v2di)_mm_setzero_si128()); 89} 90 91static __inline__ __m256i __DEFAULT_FN_ATTRS256 92_mm256_mask_madd52lo_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) 93{ 94 return (__m256i)__builtin_ia32_selectq_256(__M, 95 (__v4di)_mm256_madd52lo_epu64(__W, __X, __Y), 96 (__v4di)__W); 97} 98 99static __inline__ __m256i __DEFAULT_FN_ATTRS256 100_mm256_maskz_madd52lo_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) 101{ 102 return (__m256i)__builtin_ia32_selectq_256(__M, 103 (__v4di)_mm256_madd52lo_epu64(__X, __Y, __Z), 104 (__v4di)_mm256_setzero_si256()); 105} 106 107 108#undef __DEFAULT_FN_ATTRS128 109#undef __DEFAULT_FN_ATTRS256 110 111#endif 112