1327302Sdim/*===------------- avx512vlvnniintrin.h - VNNI intrinsics ------------------=== 2327302Sdim * 3327302Sdim * 4353358Sdim * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5353358Sdim * See https://llvm.org/LICENSE.txt for license information. 6353358Sdim * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7327302Sdim * 8327302Sdim *===-----------------------------------------------------------------------=== 9327302Sdim */ 10327302Sdim#ifndef __IMMINTRIN_H 11327302Sdim#error "Never use <avx512vlvnniintrin.h> directly; include <immintrin.h> instead." 12327302Sdim#endif 13327302Sdim 14327302Sdim#ifndef __AVX512VLVNNIINTRIN_H 15327302Sdim#define __AVX512VLVNNIINTRIN_H 16327302Sdim 17327302Sdim/* Define the default attributes for the functions in this file. */ 18341825Sdim#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"), __min_vector_width__(128))) 19341825Sdim#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"), __min_vector_width__(256))) 20327302Sdim 21327302Sdim 22341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 23341825Sdim_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B) 24341825Sdim{ 25341825Sdim return (__m256i)__builtin_ia32_vpdpbusd256((__v8si)__S, (__v8si)__A, 26341825Sdim (__v8si)__B); 27341825Sdim} 28341825Sdim 29341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 30327302Sdim_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) 31327302Sdim{ 32341825Sdim return (__m256i)__builtin_ia32_selectd_256(__U, 33341825Sdim (__v8si)_mm256_dpbusd_epi32(__S, __A, __B), 34341825Sdim (__v8si)__S); 35327302Sdim} 36327302Sdim 37341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 38327302Sdim_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) 39327302Sdim{ 40341825Sdim return (__m256i)__builtin_ia32_selectd_256(__U, 41341825Sdim (__v8si)_mm256_dpbusd_epi32(__S, __A, __B), 42341825Sdim (__v8si)_mm256_setzero_si256()); 43327302Sdim} 44327302Sdim 45341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 46341825Sdim_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) 47327302Sdim{ 48341825Sdim return (__m256i)__builtin_ia32_vpdpbusds256((__v8si)__S, (__v8si)__A, 49341825Sdim (__v8si)__B); 50327302Sdim} 51327302Sdim 52341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 53327302Sdim_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) 54327302Sdim{ 55341825Sdim return (__m256i)__builtin_ia32_selectd_256(__U, 56341825Sdim (__v8si)_mm256_dpbusds_epi32(__S, __A, __B), 57341825Sdim (__v8si)__S); 58327302Sdim} 59327302Sdim 60341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 61327302Sdim_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) 62327302Sdim{ 63341825Sdim return (__m256i)__builtin_ia32_selectd_256(__U, 64341825Sdim (__v8si)_mm256_dpbusds_epi32(__S, __A, __B), 65341825Sdim (__v8si)_mm256_setzero_si256()); 66327302Sdim} 67327302Sdim 68341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 69341825Sdim_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B) 70327302Sdim{ 71341825Sdim return (__m256i)__builtin_ia32_vpdpwssd256((__v8si)__S, (__v8si)__A, 72341825Sdim (__v8si)__B); 73327302Sdim} 74327302Sdim 75341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 76327302Sdim_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) 77327302Sdim{ 78341825Sdim return (__m256i)__builtin_ia32_selectd_256(__U, 79341825Sdim (__v8si)_mm256_dpwssd_epi32(__S, __A, __B), 80341825Sdim (__v8si)__S); 81327302Sdim} 82327302Sdim 83341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 84327302Sdim_mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) 85327302Sdim{ 86341825Sdim return (__m256i)__builtin_ia32_selectd_256(__U, 87341825Sdim (__v8si)_mm256_dpwssd_epi32(__S, __A, __B), 88341825Sdim (__v8si)_mm256_setzero_si256()); 89327302Sdim} 90327302Sdim 91341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 92341825Sdim_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) 93327302Sdim{ 94341825Sdim return (__m256i)__builtin_ia32_vpdpwssds256((__v8si)__S, (__v8si)__A, 95341825Sdim (__v8si)__B); 96327302Sdim} 97327302Sdim 98341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 99327302Sdim_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) 100327302Sdim{ 101341825Sdim return (__m256i)__builtin_ia32_selectd_256(__U, 102341825Sdim (__v8si)_mm256_dpwssds_epi32(__S, __A, __B), 103341825Sdim (__v8si)__S); 104327302Sdim} 105327302Sdim 106341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 107327302Sdim_mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) 108327302Sdim{ 109341825Sdim return (__m256i)__builtin_ia32_selectd_256(__U, 110341825Sdim (__v8si)_mm256_dpwssds_epi32(__S, __A, __B), 111341825Sdim (__v8si)_mm256_setzero_si256()); 112327302Sdim} 113327302Sdim 114341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 115341825Sdim_mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) 116327302Sdim{ 117341825Sdim return (__m128i)__builtin_ia32_vpdpbusd128((__v4si)__S, (__v4si)__A, 118341825Sdim (__v4si)__B); 119327302Sdim} 120327302Sdim 121341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 122335799Sdim_mm_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) 123327302Sdim{ 124341825Sdim return (__m128i)__builtin_ia32_selectd_128(__U, 125341825Sdim (__v4si)_mm_dpbusd_epi32(__S, __A, __B), 126341825Sdim (__v4si)__S); 127327302Sdim} 128327302Sdim 129341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 130335799Sdim_mm_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) 131327302Sdim{ 132341825Sdim return (__m128i)__builtin_ia32_selectd_128(__U, 133341825Sdim (__v4si)_mm_dpbusd_epi32(__S, __A, __B), 134341825Sdim (__v4si)_mm_setzero_si128()); 135327302Sdim} 136327302Sdim 137341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 138341825Sdim_mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) 139327302Sdim{ 140341825Sdim return (__m128i)__builtin_ia32_vpdpbusds128((__v4si)__S, (__v4si)__A, 141341825Sdim (__v4si)__B); 142327302Sdim} 143327302Sdim 144341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 145335799Sdim_mm_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) 146327302Sdim{ 147341825Sdim return (__m128i)__builtin_ia32_selectd_128(__U, 148341825Sdim (__v4si)_mm_dpbusds_epi32(__S, __A, __B), 149341825Sdim (__v4si)__S); 150327302Sdim} 151327302Sdim 152341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 153335799Sdim_mm_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) 154327302Sdim{ 155341825Sdim return (__m128i)__builtin_ia32_selectd_128(__U, 156341825Sdim (__v4si)_mm_dpbusds_epi32(__S, __A, __B), 157341825Sdim (__v4si)_mm_setzero_si128()); 158327302Sdim} 159327302Sdim 160341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 161341825Sdim_mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) 162327302Sdim{ 163341825Sdim return (__m128i)__builtin_ia32_vpdpwssd128((__v4si)__S, (__v4si)__A, 164341825Sdim (__v4si)__B); 165327302Sdim} 166327302Sdim 167341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 168335799Sdim_mm_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) 169327302Sdim{ 170341825Sdim return (__m128i)__builtin_ia32_selectd_128(__U, 171341825Sdim (__v4si)_mm_dpwssd_epi32(__S, __A, __B), 172341825Sdim (__v4si)__S); 173327302Sdim} 174327302Sdim 175341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 176335799Sdim_mm_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) 177327302Sdim{ 178341825Sdim return (__m128i)__builtin_ia32_selectd_128(__U, 179341825Sdim (__v4si)_mm_dpwssd_epi32(__S, __A, __B), 180341825Sdim (__v4si)_mm_setzero_si128()); 181327302Sdim} 182327302Sdim 183341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 184341825Sdim_mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) 185327302Sdim{ 186341825Sdim return (__m128i)__builtin_ia32_vpdpwssds128((__v4si)__S, (__v4si)__A, 187341825Sdim (__v4si)__B); 188327302Sdim} 189327302Sdim 190341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 191335799Sdim_mm_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) 192327302Sdim{ 193341825Sdim return (__m128i)__builtin_ia32_selectd_128(__U, 194341825Sdim (__v4si)_mm_dpwssds_epi32(__S, __A, __B), 195341825Sdim (__v4si)__S); 196327302Sdim} 197327302Sdim 198341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 199335799Sdim_mm_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) 200327302Sdim{ 201341825Sdim return (__m128i)__builtin_ia32_selectd_128(__U, 202341825Sdim (__v4si)_mm_dpwssds_epi32(__S, __A, __B), 203341825Sdim (__v4si)_mm_setzero_si128()); 204327302Sdim} 205327302Sdim 206341825Sdim#undef __DEFAULT_FN_ATTRS128 207341825Sdim#undef __DEFAULT_FN_ATTRS256 208327302Sdim 209327302Sdim#endif 210