1327302Sdim/*===------------- avx512vlvnniintrin.h - VNNI intrinsics ------------------===
2327302Sdim *
3327302Sdim *
4353358Sdim * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5353358Sdim * See https://llvm.org/LICENSE.txt for license information.
6353358Sdim * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7327302Sdim *
8327302Sdim *===-----------------------------------------------------------------------===
9327302Sdim */
10327302Sdim#ifndef __IMMINTRIN_H
11327302Sdim#error "Never use <avx512vlvnniintrin.h> directly; include <immintrin.h> instead."
12327302Sdim#endif
13327302Sdim
14327302Sdim#ifndef __AVX512VLVNNIINTRIN_H
15327302Sdim#define __AVX512VLVNNIINTRIN_H
16327302Sdim
17327302Sdim/* Define the default attributes for the functions in this file. */
18341825Sdim#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"), __min_vector_width__(128)))
19341825Sdim#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"), __min_vector_width__(256)))
20327302Sdim
21327302Sdim
22341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256
23341825Sdim_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B)
24341825Sdim{
25341825Sdim  return (__m256i)__builtin_ia32_vpdpbusd256((__v8si)__S, (__v8si)__A,
26341825Sdim                                             (__v8si)__B);
27341825Sdim}
28341825Sdim
29341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256
30327302Sdim_mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
31327302Sdim{
32341825Sdim  return (__m256i)__builtin_ia32_selectd_256(__U,
33341825Sdim                                     (__v8si)_mm256_dpbusd_epi32(__S, __A, __B),
34341825Sdim                                     (__v8si)__S);
35327302Sdim}
36327302Sdim
37341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256
38327302Sdim_mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
39327302Sdim{
40341825Sdim  return (__m256i)__builtin_ia32_selectd_256(__U,
41341825Sdim                                     (__v8si)_mm256_dpbusd_epi32(__S, __A, __B),
42341825Sdim                                     (__v8si)_mm256_setzero_si256());
43327302Sdim}
44327302Sdim
45341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256
46341825Sdim_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B)
47327302Sdim{
48341825Sdim  return (__m256i)__builtin_ia32_vpdpbusds256((__v8si)__S, (__v8si)__A,
49341825Sdim                                              (__v8si)__B);
50327302Sdim}
51327302Sdim
52341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256
53327302Sdim_mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
54327302Sdim{
55341825Sdim  return (__m256i)__builtin_ia32_selectd_256(__U,
56341825Sdim                                    (__v8si)_mm256_dpbusds_epi32(__S, __A, __B),
57341825Sdim                                    (__v8si)__S);
58327302Sdim}
59327302Sdim
60341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256
61327302Sdim_mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
62327302Sdim{
63341825Sdim  return (__m256i)__builtin_ia32_selectd_256(__U,
64341825Sdim                                     (__v8si)_mm256_dpbusds_epi32(__S, __A, __B),
65341825Sdim                                     (__v8si)_mm256_setzero_si256());
66327302Sdim}
67327302Sdim
68341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256
69341825Sdim_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B)
70327302Sdim{
71341825Sdim  return (__m256i)__builtin_ia32_vpdpwssd256((__v8si)__S, (__v8si)__A,
72341825Sdim                                             (__v8si)__B);
73327302Sdim}
74327302Sdim
75341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256
76327302Sdim_mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
77327302Sdim{
78341825Sdim  return (__m256i)__builtin_ia32_selectd_256(__U,
79341825Sdim                                     (__v8si)_mm256_dpwssd_epi32(__S, __A, __B),
80341825Sdim                                     (__v8si)__S);
81327302Sdim}
82327302Sdim
83341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256
84327302Sdim_mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
85327302Sdim{
86341825Sdim  return (__m256i)__builtin_ia32_selectd_256(__U,
87341825Sdim                                     (__v8si)_mm256_dpwssd_epi32(__S, __A, __B),
88341825Sdim                                     (__v8si)_mm256_setzero_si256());
89327302Sdim}
90327302Sdim
91341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256
92341825Sdim_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B)
93327302Sdim{
94341825Sdim  return (__m256i)__builtin_ia32_vpdpwssds256((__v8si)__S, (__v8si)__A,
95341825Sdim                                              (__v8si)__B);
96327302Sdim}
97327302Sdim
98341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256
99327302Sdim_mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
100327302Sdim{
101341825Sdim  return (__m256i)__builtin_ia32_selectd_256(__U,
102341825Sdim                                    (__v8si)_mm256_dpwssds_epi32(__S, __A, __B),
103341825Sdim                                    (__v8si)__S);
104327302Sdim}
105327302Sdim
106341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256
107327302Sdim_mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
108327302Sdim{
109341825Sdim  return (__m256i)__builtin_ia32_selectd_256(__U,
110341825Sdim                                    (__v8si)_mm256_dpwssds_epi32(__S, __A, __B),
111341825Sdim                                    (__v8si)_mm256_setzero_si256());
112327302Sdim}
113327302Sdim
114341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128
115341825Sdim_mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B)
116327302Sdim{
117341825Sdim  return (__m128i)__builtin_ia32_vpdpbusd128((__v4si)__S, (__v4si)__A,
118341825Sdim                                             (__v4si)__B);
119327302Sdim}
120327302Sdim
121341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128
122335799Sdim_mm_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
123327302Sdim{
124341825Sdim  return (__m128i)__builtin_ia32_selectd_128(__U,
125341825Sdim                                        (__v4si)_mm_dpbusd_epi32(__S, __A, __B),
126341825Sdim                                        (__v4si)__S);
127327302Sdim}
128327302Sdim
129341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128
130335799Sdim_mm_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
131327302Sdim{
132341825Sdim  return (__m128i)__builtin_ia32_selectd_128(__U,
133341825Sdim                                        (__v4si)_mm_dpbusd_epi32(__S, __A, __B),
134341825Sdim                                        (__v4si)_mm_setzero_si128());
135327302Sdim}
136327302Sdim
137341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128
138341825Sdim_mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B)
139327302Sdim{
140341825Sdim  return (__m128i)__builtin_ia32_vpdpbusds128((__v4si)__S, (__v4si)__A,
141341825Sdim                                              (__v4si)__B);
142327302Sdim}
143327302Sdim
144341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128
145335799Sdim_mm_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
146327302Sdim{
147341825Sdim  return (__m128i)__builtin_ia32_selectd_128(__U,
148341825Sdim                                       (__v4si)_mm_dpbusds_epi32(__S, __A, __B),
149341825Sdim                                       (__v4si)__S);
150327302Sdim}
151327302Sdim
152341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128
153335799Sdim_mm_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
154327302Sdim{
155341825Sdim  return (__m128i)__builtin_ia32_selectd_128(__U,
156341825Sdim                                       (__v4si)_mm_dpbusds_epi32(__S, __A, __B),
157341825Sdim                                       (__v4si)_mm_setzero_si128());
158327302Sdim}
159327302Sdim
160341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128
161341825Sdim_mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B)
162327302Sdim{
163341825Sdim  return (__m128i)__builtin_ia32_vpdpwssd128((__v4si)__S, (__v4si)__A,
164341825Sdim                                             (__v4si)__B);
165327302Sdim}
166327302Sdim
167341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128
168335799Sdim_mm_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
169327302Sdim{
170341825Sdim  return (__m128i)__builtin_ia32_selectd_128(__U,
171341825Sdim                                        (__v4si)_mm_dpwssd_epi32(__S, __A, __B),
172341825Sdim                                        (__v4si)__S);
173327302Sdim}
174327302Sdim
175341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128
176335799Sdim_mm_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
177327302Sdim{
178341825Sdim  return (__m128i)__builtin_ia32_selectd_128(__U,
179341825Sdim                                        (__v4si)_mm_dpwssd_epi32(__S, __A, __B),
180341825Sdim                                        (__v4si)_mm_setzero_si128());
181327302Sdim}
182327302Sdim
183341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128
184341825Sdim_mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B)
185327302Sdim{
186341825Sdim  return (__m128i)__builtin_ia32_vpdpwssds128((__v4si)__S, (__v4si)__A,
187341825Sdim                                              (__v4si)__B);
188327302Sdim}
189327302Sdim
190341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128
191335799Sdim_mm_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
192327302Sdim{
193341825Sdim  return (__m128i)__builtin_ia32_selectd_128(__U,
194341825Sdim                                       (__v4si)_mm_dpwssds_epi32(__S, __A, __B),
195341825Sdim                                       (__v4si)__S);
196327302Sdim}
197327302Sdim
198341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128
199335799Sdim_mm_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
200327302Sdim{
201341825Sdim  return (__m128i)__builtin_ia32_selectd_128(__U,
202341825Sdim                                       (__v4si)_mm_dpwssds_epi32(__S, __A, __B),
203341825Sdim                                       (__v4si)_mm_setzero_si128());
204327302Sdim}
205327302Sdim
206341825Sdim#undef __DEFAULT_FN_ATTRS128
207341825Sdim#undef __DEFAULT_FN_ATTRS256
208327302Sdim
209327302Sdim#endif
210