1119610Sache/*===------------- avx512ifmavlintrin.h - IFMA intrinsics ------------------=== 2119610Sache * 3119610Sache * 4119610Sache * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5119610Sache * See https://llvm.org/LICENSE.txt for license information. 6119610Sache * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7119610Sache * 8119610Sache *===-----------------------------------------------------------------------=== 9119610Sache */ 10119610Sache#ifndef __IMMINTRIN_H 11119610Sache#error "Never use <avx512ifmavlintrin.h> directly; include <immintrin.h> instead." 12119610Sache#endif 13119610Sache 14119610Sache#ifndef __IFMAVLINTRIN_H 15119610Sache#define __IFMAVLINTRIN_H 16119610Sache 17119610Sache/* Define the default attributes for the functions in this file. */ 18119610Sache#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl"), __min_vector_width__(128))) 19119610Sache#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl"), __min_vector_width__(256))) 20119610Sache 21119610Sache#define _mm_madd52hi_epu64(X, Y, Z) \ 22119610Sache ((__m128i)__builtin_ia32_vpmadd52huq128((__v2di)(X), (__v2di)(Y), \ 23119610Sache (__v2di)(Z))) 24119610Sache 25119610Sache#define _mm256_madd52hi_epu64(X, Y, Z) \ 26119610Sache ((__m256i)__builtin_ia32_vpmadd52huq256((__v4di)(X), (__v4di)(Y), \ 27119610Sache (__v4di)(Z))) 28119610Sache 29119610Sache#define _mm_madd52lo_epu64(X, Y, Z) \ 30119610Sache ((__m128i)__builtin_ia32_vpmadd52luq128((__v2di)(X), (__v2di)(Y), \ 31119610Sache (__v2di)(Z))) 32119610Sache 33119610Sache#define _mm256_madd52lo_epu64(X, Y, Z) \ 34119610Sache ((__m256i)__builtin_ia32_vpmadd52luq256((__v4di)(X), (__v4di)(Y), \ 35119610Sache (__v4di)(Z))) 36119610Sache 37119610Sachestatic __inline__ __m128i __DEFAULT_FN_ATTRS128 38119610Sache_mm_mask_madd52hi_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) 39119610Sache{ 40119610Sache return (__m128i)__builtin_ia32_selectq_128(__M, 41119610Sache (__v2di)_mm_madd52hi_epu64(__W, __X, __Y), 42119610Sache (__v2di)__W); 43119610Sache} 44119610Sache 45119610Sachestatic __inline__ __m128i __DEFAULT_FN_ATTRS128 46119610Sache_mm_maskz_madd52hi_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) 47119610Sache{ 48119610Sache return (__m128i)__builtin_ia32_selectq_128(__M, 49119610Sache (__v2di)_mm_madd52hi_epu64(__X, __Y, __Z), 50119610Sache (__v2di)_mm_setzero_si128()); 51119610Sache} 52119610Sache 53119610Sachestatic __inline__ __m256i __DEFAULT_FN_ATTRS256 54119610Sache_mm256_mask_madd52hi_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) 55119610Sache{ 56119610Sache return (__m256i)__builtin_ia32_selectq_256(__M, 57119610Sache (__v4di)_mm256_madd52hi_epu64(__W, __X, __Y), 58119610Sache (__v4di)__W); 59119610Sache} 60119610Sache 61119610Sachestatic __inline__ __m256i __DEFAULT_FN_ATTRS256 62119610Sache_mm256_maskz_madd52hi_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) 63119610Sache{ 64119610Sache return (__m256i)__builtin_ia32_selectq_256(__M, 65119610Sache (__v4di)_mm256_madd52hi_epu64(__X, __Y, __Z), 66119610Sache (__v4di)_mm256_setzero_si256()); 67119610Sache} 68119610Sache 69119610Sachestatic __inline__ __m128i __DEFAULT_FN_ATTRS128 70119610Sache_mm_mask_madd52lo_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) 71119610Sache{ 72119610Sache return (__m128i)__builtin_ia32_selectq_128(__M, 73119610Sache (__v2di)_mm_madd52lo_epu64(__W, __X, __Y), 74119610Sache (__v2di)__W); 75119610Sache} 76119610Sache 77119610Sachestatic __inline__ __m128i __DEFAULT_FN_ATTRS128 78119610Sache_mm_maskz_madd52lo_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) 79119610Sache{ 80119610Sache return (__m128i)__builtin_ia32_selectq_128(__M, 81119610Sache (__v2di)_mm_madd52lo_epu64(__X, __Y, __Z), 82119610Sache (__v2di)_mm_setzero_si128()); 83119610Sache} 84119610Sache 85119610Sachestatic __inline__ __m256i __DEFAULT_FN_ATTRS256 86119610Sache_mm256_mask_madd52lo_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) 87119610Sache{ 88119610Sache return (__m256i)__builtin_ia32_selectq_256(__M, 89119610Sache (__v4di)_mm256_madd52lo_epu64(__W, __X, __Y), 90119610Sache (__v4di)__W); 91119610Sache} 92119610Sache 93119610Sachestatic __inline__ __m256i __DEFAULT_FN_ATTRS256 94119610Sache_mm256_maskz_madd52lo_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) 95119610Sache{ 96119610Sache return (__m256i)__builtin_ia32_selectq_256(__M, 97119610Sache (__v4di)_mm256_madd52lo_epu64(__X, __Y, __Z), 98119610Sache (__v4di)_mm256_setzero_si256()); 99119610Sache} 100119610Sache 101119610Sache 102119610Sache#undef __DEFAULT_FN_ATTRS128 103119610Sache#undef __DEFAULT_FN_ATTRS256 104119610Sache 105119610Sache#endif 106119610Sache