1327302Sdim/*===------------- avx512vbmi2intrin.h - VBMI2 intrinsics ------------------===
2327302Sdim *
3327302Sdim *
4353358Sdim * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5353358Sdim * See https://llvm.org/LICENSE.txt for license information.
6353358Sdim * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7327302Sdim *
8327302Sdim *===-----------------------------------------------------------------------===
9327302Sdim */
10327302Sdim#ifndef __IMMINTRIN_H
11327302Sdim#error "Never use <avx512vbmi2intrin.h> directly; include <immintrin.h> instead."
12327302Sdim#endif
13327302Sdim
14327302Sdim#ifndef __AVX512VBMI2INTRIN_H
15327302Sdim#define __AVX512VBMI2INTRIN_H
16327302Sdim
17327302Sdim/* Define the default attributes for the functions in this file. */
18341825Sdim#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2"), __min_vector_width__(512)))
19327302Sdim
20327302Sdim
21327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
22327302Sdim_mm512_mask_compress_epi16(__m512i __S, __mmask32 __U, __m512i __D)
23327302Sdim{
24327302Sdim  return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D,
25327302Sdim              (__v32hi) __S,
26327302Sdim              __U);
27327302Sdim}
28327302Sdim
29327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
30327302Sdim_mm512_maskz_compress_epi16(__mmask32 __U, __m512i __D)
31327302Sdim{
32327302Sdim  return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D,
33341825Sdim              (__v32hi) _mm512_setzero_si512(),
34327302Sdim              __U);
35327302Sdim}
36327302Sdim
37327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
38327302Sdim_mm512_mask_compress_epi8(__m512i __S, __mmask64 __U, __m512i __D)
39327302Sdim{
40327302Sdim  return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D,
41327302Sdim              (__v64qi) __S,
42327302Sdim              __U);
43327302Sdim}
44327302Sdim
45327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
46327302Sdim_mm512_maskz_compress_epi8(__mmask64 __U, __m512i __D)
47327302Sdim{
48327302Sdim  return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D,
49341825Sdim              (__v64qi) _mm512_setzero_si512(),
50327302Sdim              __U);
51327302Sdim}
52327302Sdim
53327302Sdimstatic __inline__ void __DEFAULT_FN_ATTRS
54327302Sdim_mm512_mask_compressstoreu_epi16(void *__P, __mmask32 __U, __m512i __D)
55327302Sdim{
56327302Sdim  __builtin_ia32_compressstorehi512_mask ((__v32hi *) __P, (__v32hi) __D,
57327302Sdim              __U);
58327302Sdim}
59327302Sdim
60327302Sdimstatic __inline__ void __DEFAULT_FN_ATTRS
61327302Sdim_mm512_mask_compressstoreu_epi8(void *__P, __mmask64 __U, __m512i __D)
62327302Sdim{
63327302Sdim  __builtin_ia32_compressstoreqi512_mask ((__v64qi *) __P, (__v64qi) __D,
64327302Sdim              __U);
65327302Sdim}
66327302Sdim
67327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
68327302Sdim_mm512_mask_expand_epi16(__m512i __S, __mmask32 __U, __m512i __D)
69327302Sdim{
70327302Sdim  return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D,
71327302Sdim              (__v32hi) __S,
72327302Sdim              __U);
73327302Sdim}
74327302Sdim
75327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
76327302Sdim_mm512_maskz_expand_epi16(__mmask32 __U, __m512i __D)
77327302Sdim{
78327302Sdim  return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D,
79341825Sdim              (__v32hi) _mm512_setzero_si512(),
80327302Sdim              __U);
81327302Sdim}
82327302Sdim
83327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
84327302Sdim_mm512_mask_expand_epi8(__m512i __S, __mmask64 __U, __m512i __D)
85327302Sdim{
86327302Sdim  return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D,
87327302Sdim              (__v64qi) __S,
88327302Sdim              __U);
89327302Sdim}
90327302Sdim
91327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
92327302Sdim_mm512_maskz_expand_epi8(__mmask64 __U, __m512i __D)
93327302Sdim{
94327302Sdim  return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D,
95341825Sdim              (__v64qi) _mm512_setzero_si512(),
96327302Sdim              __U);
97327302Sdim}
98327302Sdim
99327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
100327302Sdim_mm512_mask_expandloadu_epi16(__m512i __S, __mmask32 __U, void const *__P)
101327302Sdim{
102327302Sdim  return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P,
103327302Sdim              (__v32hi) __S,
104327302Sdim              __U);
105327302Sdim}
106327302Sdim
107327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
108327302Sdim_mm512_maskz_expandloadu_epi16(__mmask32 __U, void const *__P)
109327302Sdim{
110327302Sdim  return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P,
111341825Sdim              (__v32hi) _mm512_setzero_si512(),
112327302Sdim              __U);
113327302Sdim}
114327302Sdim
115327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
116327302Sdim_mm512_mask_expandloadu_epi8(__m512i __S, __mmask64 __U, void const *__P)
117327302Sdim{
118327302Sdim  return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P,
119327302Sdim              (__v64qi) __S,
120327302Sdim              __U);
121327302Sdim}
122327302Sdim
123327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
124327302Sdim_mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P)
125327302Sdim{
126327302Sdim  return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P,
127341825Sdim              (__v64qi) _mm512_setzero_si512(),
128327302Sdim              __U);
129327302Sdim}
130327302Sdim
131341825Sdim#define _mm512_shldi_epi64(A, B, I) \
132341825Sdim  (__m512i)__builtin_ia32_vpshldq512((__v8di)(__m512i)(A), \
133341825Sdim                                     (__v8di)(__m512i)(B), (int)(I))
134327302Sdim
135341825Sdim#define _mm512_mask_shldi_epi64(S, U, A, B, I) \
136341825Sdim  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
137341825Sdim                                    (__v8di)_mm512_shldi_epi64((A), (B), (I)), \
138341825Sdim                                    (__v8di)(__m512i)(S))
139341825Sdim
140327302Sdim#define _mm512_maskz_shldi_epi64(U, A, B, I) \
141341825Sdim  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
142341825Sdim                                    (__v8di)_mm512_shldi_epi64((A), (B), (I)), \
143341825Sdim                                    (__v8di)_mm512_setzero_si512())
144327302Sdim
145341825Sdim#define _mm512_shldi_epi32(A, B, I) \
146341825Sdim  (__m512i)__builtin_ia32_vpshldd512((__v16si)(__m512i)(A), \
147341825Sdim                                     (__v16si)(__m512i)(B), (int)(I))
148327302Sdim
149341825Sdim#define _mm512_mask_shldi_epi32(S, U, A, B, I) \
150341825Sdim  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
151341825Sdim                                   (__v16si)_mm512_shldi_epi32((A), (B), (I)), \
152341825Sdim                                   (__v16si)(__m512i)(S))
153327302Sdim
154327302Sdim#define _mm512_maskz_shldi_epi32(U, A, B, I) \
155341825Sdim  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
156341825Sdim                                   (__v16si)_mm512_shldi_epi32((A), (B), (I)), \
157341825Sdim                                   (__v16si)_mm512_setzero_si512())
158327302Sdim
159341825Sdim#define _mm512_shldi_epi16(A, B, I) \
160341825Sdim  (__m512i)__builtin_ia32_vpshldw512((__v32hi)(__m512i)(A), \
161341825Sdim                                     (__v32hi)(__m512i)(B), (int)(I))
162327302Sdim
163341825Sdim#define _mm512_mask_shldi_epi16(S, U, A, B, I) \
164341825Sdim  (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
165341825Sdim                                   (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \
166341825Sdim                                   (__v32hi)(__m512i)(S))
167327302Sdim
168327302Sdim#define _mm512_maskz_shldi_epi16(U, A, B, I) \
169341825Sdim  (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
170341825Sdim                                   (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \
171341825Sdim                                   (__v32hi)_mm512_setzero_si512())
172327302Sdim
173341825Sdim#define _mm512_shrdi_epi64(A, B, I) \
174341825Sdim  (__m512i)__builtin_ia32_vpshrdq512((__v8di)(__m512i)(A), \
175341825Sdim                                     (__v8di)(__m512i)(B), (int)(I))
176327302Sdim
177341825Sdim#define _mm512_mask_shrdi_epi64(S, U, A, B, I) \
178341825Sdim  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
179341825Sdim                                    (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \
180341825Sdim                                    (__v8di)(__m512i)(S))
181327302Sdim
182327302Sdim#define _mm512_maskz_shrdi_epi64(U, A, B, I) \
183341825Sdim  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
184341825Sdim                                    (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \
185341825Sdim                                    (__v8di)_mm512_setzero_si512())
186327302Sdim
187341825Sdim#define _mm512_shrdi_epi32(A, B, I) \
188341825Sdim  (__m512i)__builtin_ia32_vpshrdd512((__v16si)(__m512i)(A), \
189341825Sdim                                     (__v16si)(__m512i)(B), (int)(I))
190327302Sdim
191341825Sdim#define _mm512_mask_shrdi_epi32(S, U, A, B, I) \
192341825Sdim  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
193341825Sdim                                   (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \
194341825Sdim                                   (__v16si)(__m512i)(S))
195327302Sdim
196327302Sdim#define _mm512_maskz_shrdi_epi32(U, A, B, I) \
197341825Sdim  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
198341825Sdim                                   (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \
199341825Sdim                                   (__v16si)_mm512_setzero_si512())
200327302Sdim
201341825Sdim#define _mm512_shrdi_epi16(A, B, I) \
202341825Sdim  (__m512i)__builtin_ia32_vpshrdw512((__v32hi)(__m512i)(A), \
203341825Sdim                                     (__v32hi)(__m512i)(B), (int)(I))
204327302Sdim
205341825Sdim#define _mm512_mask_shrdi_epi16(S, U, A, B, I) \
206341825Sdim  (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
207341825Sdim                                   (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \
208341825Sdim                                   (__v32hi)(__m512i)(S))
209327302Sdim
210327302Sdim#define _mm512_maskz_shrdi_epi16(U, A, B, I) \
211341825Sdim  (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
212341825Sdim                                   (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \
213341825Sdim                                   (__v32hi)_mm512_setzero_si512())
214327302Sdim
215327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
216344779Sdim_mm512_shldv_epi64(__m512i __A, __m512i __B, __m512i __C)
217327302Sdim{
218344779Sdim  return (__m512i)__builtin_ia32_vpshldvq512((__v8di)__A, (__v8di)__B,
219344779Sdim                                             (__v8di)__C);
220327302Sdim}
221327302Sdim
222327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
223344779Sdim_mm512_mask_shldv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C)
224327302Sdim{
225344779Sdim  return (__m512i)__builtin_ia32_selectq_512(__U,
226344779Sdim                                      (__v8di)_mm512_shldv_epi64(__A, __B, __C),
227344779Sdim                                      (__v8di)__A);
228327302Sdim}
229327302Sdim
230327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
231344779Sdim_mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C)
232327302Sdim{
233344779Sdim  return (__m512i)__builtin_ia32_selectq_512(__U,
234344779Sdim                                      (__v8di)_mm512_shldv_epi64(__A, __B, __C),
235344779Sdim                                      (__v8di)_mm512_setzero_si512());
236327302Sdim}
237327302Sdim
238327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
239344779Sdim_mm512_shldv_epi32(__m512i __A, __m512i __B, __m512i __C)
240327302Sdim{
241344779Sdim  return (__m512i)__builtin_ia32_vpshldvd512((__v16si)__A, (__v16si)__B,
242344779Sdim                                             (__v16si)__C);
243327302Sdim}
244327302Sdim
245327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
246344779Sdim_mm512_mask_shldv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C)
247327302Sdim{
248344779Sdim  return (__m512i)__builtin_ia32_selectd_512(__U,
249344779Sdim                                     (__v16si)_mm512_shldv_epi32(__A, __B, __C),
250344779Sdim                                     (__v16si)__A);
251327302Sdim}
252327302Sdim
253327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
254344779Sdim_mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C)
255327302Sdim{
256344779Sdim  return (__m512i)__builtin_ia32_selectd_512(__U,
257344779Sdim                                     (__v16si)_mm512_shldv_epi32(__A, __B, __C),
258344779Sdim                                     (__v16si)_mm512_setzero_si512());
259327302Sdim}
260327302Sdim
261327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
262344779Sdim_mm512_shldv_epi16(__m512i __A, __m512i __B, __m512i __C)
263327302Sdim{
264344779Sdim  return (__m512i)__builtin_ia32_vpshldvw512((__v32hi)__A, (__v32hi)__B,
265344779Sdim                                             (__v32hi)__C);
266327302Sdim}
267327302Sdim
268327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
269344779Sdim_mm512_mask_shldv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C)
270327302Sdim{
271344779Sdim  return (__m512i)__builtin_ia32_selectw_512(__U,
272344779Sdim                                     (__v32hi)_mm512_shldv_epi16(__A, __B, __C),
273344779Sdim                                     (__v32hi)__A);
274327302Sdim}
275327302Sdim
276327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
277344779Sdim_mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C)
278327302Sdim{
279344779Sdim  return (__m512i)__builtin_ia32_selectw_512(__U,
280344779Sdim                                     (__v32hi)_mm512_shldv_epi16(__A, __B, __C),
281344779Sdim                                     (__v32hi)_mm512_setzero_si512());
282327302Sdim}
283327302Sdim
284327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
285344779Sdim_mm512_shrdv_epi64(__m512i __A, __m512i __B, __m512i __C)
286327302Sdim{
287344779Sdim  return (__m512i)__builtin_ia32_vpshrdvq512((__v8di)__A, (__v8di)__B,
288344779Sdim                                             (__v8di)__C);
289327302Sdim}
290327302Sdim
291327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
292344779Sdim_mm512_mask_shrdv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C)
293327302Sdim{
294344779Sdim  return (__m512i)__builtin_ia32_selectq_512(__U,
295344779Sdim                                      (__v8di)_mm512_shrdv_epi64(__A, __B, __C),
296344779Sdim                                      (__v8di)__A);
297327302Sdim}
298327302Sdim
299327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
300344779Sdim_mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C)
301327302Sdim{
302344779Sdim  return (__m512i)__builtin_ia32_selectq_512(__U,
303344779Sdim                                      (__v8di)_mm512_shrdv_epi64(__A, __B, __C),
304344779Sdim                                      (__v8di)_mm512_setzero_si512());
305327302Sdim}
306327302Sdim
307327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
308344779Sdim_mm512_shrdv_epi32(__m512i __A, __m512i __B, __m512i __C)
309327302Sdim{
310344779Sdim  return (__m512i)__builtin_ia32_vpshrdvd512((__v16si)__A, (__v16si)__B,
311344779Sdim                                             (__v16si)__C);
312327302Sdim}
313327302Sdim
314327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
315344779Sdim_mm512_mask_shrdv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C)
316327302Sdim{
317344779Sdim  return (__m512i) __builtin_ia32_selectd_512(__U,
318344779Sdim                                     (__v16si)_mm512_shrdv_epi32(__A, __B, __C),
319344779Sdim                                     (__v16si)__A);
320327302Sdim}
321327302Sdim
322327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
323344779Sdim_mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C)
324327302Sdim{
325344779Sdim  return (__m512i) __builtin_ia32_selectd_512(__U,
326344779Sdim                                     (__v16si)_mm512_shrdv_epi32(__A, __B, __C),
327344779Sdim                                     (__v16si)_mm512_setzero_si512());
328327302Sdim}
329327302Sdim
330327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
331344779Sdim_mm512_shrdv_epi16(__m512i __A, __m512i __B, __m512i __C)
332327302Sdim{
333344779Sdim  return (__m512i)__builtin_ia32_vpshrdvw512((__v32hi)__A, (__v32hi)__B,
334344779Sdim                                             (__v32hi)__C);
335327302Sdim}
336327302Sdim
337327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
338344779Sdim_mm512_mask_shrdv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C)
339327302Sdim{
340344779Sdim  return (__m512i)__builtin_ia32_selectw_512(__U,
341344779Sdim                                     (__v32hi)_mm512_shrdv_epi16(__A, __B, __C),
342344779Sdim                                     (__v32hi)__A);
343327302Sdim}
344327302Sdim
345327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
346344779Sdim_mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C)
347327302Sdim{
348344779Sdim  return (__m512i)__builtin_ia32_selectw_512(__U,
349344779Sdim                                     (__v32hi)_mm512_shrdv_epi16(__A, __B, __C),
350344779Sdim                                     (__v32hi)_mm512_setzero_si512());
351327302Sdim}
352327302Sdim
353327302Sdim
354327302Sdim#undef __DEFAULT_FN_ATTRS
355327302Sdim
356327302Sdim#endif
357327302Sdim
358