avx512vbmi2intrin.h revision 341825
1327302Sdim/*===------------- avx512vbmi2intrin.h - VBMI2 intrinsics ------------------===
2327302Sdim *
3327302Sdim *
4327302Sdim * Permission is hereby granted, free of charge, to any person obtaining a copy
5327302Sdim * of this software and associated documentation files (the "Software"), to deal
6327302Sdim * in the Software without restriction, including without limitation the rights
7327302Sdim * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8327302Sdim * copies of the Software, and to permit persons to whom the Software is
9327302Sdim * furnished to do so, subject to the following conditions:
10327302Sdim *
11327302Sdim * The above copyright notice and this permission notice shall be included in
12327302Sdim * all copies or substantial portions of the Software.
13327302Sdim *
14327302Sdim * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15327302Sdim * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16327302Sdim * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17327302Sdim * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18327302Sdim * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19327302Sdim * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20327302Sdim * THE SOFTWARE.
21327302Sdim *
22327302Sdim *===-----------------------------------------------------------------------===
23327302Sdim */
24327302Sdim#ifndef __IMMINTRIN_H
25327302Sdim#error "Never use <avx512vbmi2intrin.h> directly; include <immintrin.h> instead."
26327302Sdim#endif
27327302Sdim
28327302Sdim#ifndef __AVX512VBMI2INTRIN_H
29327302Sdim#define __AVX512VBMI2INTRIN_H
30327302Sdim
31327302Sdim/* Define the default attributes for the functions in this file. */
32341825Sdim#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2"), __min_vector_width__(512)))
33327302Sdim
34327302Sdim
35327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
36327302Sdim_mm512_mask_compress_epi16(__m512i __S, __mmask32 __U, __m512i __D)
37327302Sdim{
38327302Sdim  return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D,
39327302Sdim              (__v32hi) __S,
40327302Sdim              __U);
41327302Sdim}
42327302Sdim
43327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
44327302Sdim_mm512_maskz_compress_epi16(__mmask32 __U, __m512i __D)
45327302Sdim{
46327302Sdim  return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D,
47341825Sdim              (__v32hi) _mm512_setzero_si512(),
48327302Sdim              __U);
49327302Sdim}
50327302Sdim
51327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
52327302Sdim_mm512_mask_compress_epi8(__m512i __S, __mmask64 __U, __m512i __D)
53327302Sdim{
54327302Sdim  return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D,
55327302Sdim              (__v64qi) __S,
56327302Sdim              __U);
57327302Sdim}
58327302Sdim
59327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
60327302Sdim_mm512_maskz_compress_epi8(__mmask64 __U, __m512i __D)
61327302Sdim{
62327302Sdim  return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D,
63341825Sdim              (__v64qi) _mm512_setzero_si512(),
64327302Sdim              __U);
65327302Sdim}
66327302Sdim
67327302Sdimstatic __inline__ void __DEFAULT_FN_ATTRS
68327302Sdim_mm512_mask_compressstoreu_epi16(void *__P, __mmask32 __U, __m512i __D)
69327302Sdim{
70327302Sdim  __builtin_ia32_compressstorehi512_mask ((__v32hi *) __P, (__v32hi) __D,
71327302Sdim              __U);
72327302Sdim}
73327302Sdim
74327302Sdimstatic __inline__ void __DEFAULT_FN_ATTRS
75327302Sdim_mm512_mask_compressstoreu_epi8(void *__P, __mmask64 __U, __m512i __D)
76327302Sdim{
77327302Sdim  __builtin_ia32_compressstoreqi512_mask ((__v64qi *) __P, (__v64qi) __D,
78327302Sdim              __U);
79327302Sdim}
80327302Sdim
81327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
82327302Sdim_mm512_mask_expand_epi16(__m512i __S, __mmask32 __U, __m512i __D)
83327302Sdim{
84327302Sdim  return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D,
85327302Sdim              (__v32hi) __S,
86327302Sdim              __U);
87327302Sdim}
88327302Sdim
89327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
90327302Sdim_mm512_maskz_expand_epi16(__mmask32 __U, __m512i __D)
91327302Sdim{
92327302Sdim  return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D,
93341825Sdim              (__v32hi) _mm512_setzero_si512(),
94327302Sdim              __U);
95327302Sdim}
96327302Sdim
97327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
98327302Sdim_mm512_mask_expand_epi8(__m512i __S, __mmask64 __U, __m512i __D)
99327302Sdim{
100327302Sdim  return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D,
101327302Sdim              (__v64qi) __S,
102327302Sdim              __U);
103327302Sdim}
104327302Sdim
105327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
106327302Sdim_mm512_maskz_expand_epi8(__mmask64 __U, __m512i __D)
107327302Sdim{
108327302Sdim  return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D,
109341825Sdim              (__v64qi) _mm512_setzero_si512(),
110327302Sdim              __U);
111327302Sdim}
112327302Sdim
113327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
114327302Sdim_mm512_mask_expandloadu_epi16(__m512i __S, __mmask32 __U, void const *__P)
115327302Sdim{
116327302Sdim  return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P,
117327302Sdim              (__v32hi) __S,
118327302Sdim              __U);
119327302Sdim}
120327302Sdim
121327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
122327302Sdim_mm512_maskz_expandloadu_epi16(__mmask32 __U, void const *__P)
123327302Sdim{
124327302Sdim  return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P,
125341825Sdim              (__v32hi) _mm512_setzero_si512(),
126327302Sdim              __U);
127327302Sdim}
128327302Sdim
129327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
130327302Sdim_mm512_mask_expandloadu_epi8(__m512i __S, __mmask64 __U, void const *__P)
131327302Sdim{
132327302Sdim  return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P,
133327302Sdim              (__v64qi) __S,
134327302Sdim              __U);
135327302Sdim}
136327302Sdim
137327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
138327302Sdim_mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P)
139327302Sdim{
140327302Sdim  return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P,
141341825Sdim              (__v64qi) _mm512_setzero_si512(),
142327302Sdim              __U);
143327302Sdim}
144327302Sdim
145341825Sdim#define _mm512_shldi_epi64(A, B, I) \
146341825Sdim  (__m512i)__builtin_ia32_vpshldq512((__v8di)(__m512i)(A), \
147341825Sdim                                     (__v8di)(__m512i)(B), (int)(I))
148327302Sdim
149341825Sdim#define _mm512_mask_shldi_epi64(S, U, A, B, I) \
150341825Sdim  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
151341825Sdim                                    (__v8di)_mm512_shldi_epi64((A), (B), (I)), \
152341825Sdim                                    (__v8di)(__m512i)(S))
153341825Sdim
154327302Sdim#define _mm512_maskz_shldi_epi64(U, A, B, I) \
155341825Sdim  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
156341825Sdim                                    (__v8di)_mm512_shldi_epi64((A), (B), (I)), \
157341825Sdim                                    (__v8di)_mm512_setzero_si512())
158327302Sdim
159341825Sdim#define _mm512_shldi_epi32(A, B, I) \
160341825Sdim  (__m512i)__builtin_ia32_vpshldd512((__v16si)(__m512i)(A), \
161341825Sdim                                     (__v16si)(__m512i)(B), (int)(I))
162327302Sdim
163341825Sdim#define _mm512_mask_shldi_epi32(S, U, A, B, I) \
164341825Sdim  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
165341825Sdim                                   (__v16si)_mm512_shldi_epi32((A), (B), (I)), \
166341825Sdim                                   (__v16si)(__m512i)(S))
167327302Sdim
168327302Sdim#define _mm512_maskz_shldi_epi32(U, A, B, I) \
169341825Sdim  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
170341825Sdim                                   (__v16si)_mm512_shldi_epi32((A), (B), (I)), \
171341825Sdim                                   (__v16si)_mm512_setzero_si512())
172327302Sdim
173341825Sdim#define _mm512_shldi_epi16(A, B, I) \
174341825Sdim  (__m512i)__builtin_ia32_vpshldw512((__v32hi)(__m512i)(A), \
175341825Sdim                                     (__v32hi)(__m512i)(B), (int)(I))
176327302Sdim
177341825Sdim#define _mm512_mask_shldi_epi16(S, U, A, B, I) \
178341825Sdim  (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
179341825Sdim                                   (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \
180341825Sdim                                   (__v32hi)(__m512i)(S))
181327302Sdim
182327302Sdim#define _mm512_maskz_shldi_epi16(U, A, B, I) \
183341825Sdim  (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
184341825Sdim                                   (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \
185341825Sdim                                   (__v32hi)_mm512_setzero_si512())
186327302Sdim
187341825Sdim#define _mm512_shrdi_epi64(A, B, I) \
188341825Sdim  (__m512i)__builtin_ia32_vpshrdq512((__v8di)(__m512i)(A), \
189341825Sdim                                     (__v8di)(__m512i)(B), (int)(I))
190327302Sdim
191341825Sdim#define _mm512_mask_shrdi_epi64(S, U, A, B, I) \
192341825Sdim  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
193341825Sdim                                    (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \
194341825Sdim                                    (__v8di)(__m512i)(S))
195327302Sdim
196327302Sdim#define _mm512_maskz_shrdi_epi64(U, A, B, I) \
197341825Sdim  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
198341825Sdim                                    (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \
199341825Sdim                                    (__v8di)_mm512_setzero_si512())
200327302Sdim
201341825Sdim#define _mm512_shrdi_epi32(A, B, I) \
202341825Sdim  (__m512i)__builtin_ia32_vpshrdd512((__v16si)(__m512i)(A), \
203341825Sdim                                     (__v16si)(__m512i)(B), (int)(I))
204327302Sdim
205341825Sdim#define _mm512_mask_shrdi_epi32(S, U, A, B, I) \
206341825Sdim  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
207341825Sdim                                   (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \
208341825Sdim                                   (__v16si)(__m512i)(S))
209327302Sdim
210327302Sdim#define _mm512_maskz_shrdi_epi32(U, A, B, I) \
211341825Sdim  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
212341825Sdim                                   (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \
213341825Sdim                                   (__v16si)_mm512_setzero_si512())
214327302Sdim
215341825Sdim#define _mm512_shrdi_epi16(A, B, I) \
216341825Sdim  (__m512i)__builtin_ia32_vpshrdw512((__v32hi)(__m512i)(A), \
217341825Sdim                                     (__v32hi)(__m512i)(B), (int)(I))
218327302Sdim
219341825Sdim#define _mm512_mask_shrdi_epi16(S, U, A, B, I) \
220341825Sdim  (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
221341825Sdim                                   (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \
222341825Sdim                                   (__v32hi)(__m512i)(S))
223327302Sdim
224327302Sdim#define _mm512_maskz_shrdi_epi16(U, A, B, I) \
225341825Sdim  (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
226341825Sdim                                   (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \
227341825Sdim                                   (__v32hi)_mm512_setzero_si512())
228327302Sdim
229327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
230327302Sdim_mm512_mask_shldv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B)
231327302Sdim{
232327302Sdim  return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S,
233327302Sdim              (__v8di) __A,
234327302Sdim              (__v8di) __B,
235327302Sdim              __U);
236327302Sdim}
237327302Sdim
238327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
239327302Sdim_mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B)
240327302Sdim{
241327302Sdim  return (__m512i) __builtin_ia32_vpshldvq512_maskz ((__v8di) __S,
242327302Sdim              (__v8di) __A,
243327302Sdim              (__v8di) __B,
244327302Sdim              __U);
245327302Sdim}
246327302Sdim
247327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
248327302Sdim_mm512_shldv_epi64(__m512i __S, __m512i __A, __m512i __B)
249327302Sdim{
250327302Sdim  return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S,
251327302Sdim              (__v8di) __A,
252327302Sdim              (__v8di) __B,
253327302Sdim              (__mmask8) -1);
254327302Sdim}
255327302Sdim
256327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
257327302Sdim_mm512_mask_shldv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
258327302Sdim{
259327302Sdim  return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S,
260327302Sdim              (__v16si) __A,
261327302Sdim              (__v16si) __B,
262327302Sdim              __U);
263327302Sdim}
264327302Sdim
265327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
266327302Sdim_mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
267327302Sdim{
268327302Sdim  return (__m512i) __builtin_ia32_vpshldvd512_maskz ((__v16si) __S,
269327302Sdim              (__v16si) __A,
270327302Sdim              (__v16si) __B,
271327302Sdim              __U);
272327302Sdim}
273327302Sdim
274327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
275327302Sdim_mm512_shldv_epi32(__m512i __S, __m512i __A, __m512i __B)
276327302Sdim{
277327302Sdim  return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S,
278327302Sdim              (__v16si) __A,
279327302Sdim              (__v16si) __B,
280327302Sdim              (__mmask16) -1);
281327302Sdim}
282327302Sdim
283327302Sdim
284327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
285327302Sdim_mm512_mask_shldv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B)
286327302Sdim{
287327302Sdim  return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S,
288327302Sdim              (__v32hi) __A,
289327302Sdim              (__v32hi) __B,
290327302Sdim              __U);
291327302Sdim}
292327302Sdim
293327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
294327302Sdim_mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B)
295327302Sdim{
296327302Sdim  return (__m512i) __builtin_ia32_vpshldvw512_maskz ((__v32hi) __S,
297327302Sdim              (__v32hi) __A,
298327302Sdim              (__v32hi) __B,
299327302Sdim              __U);
300327302Sdim}
301327302Sdim
302327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
303327302Sdim_mm512_shldv_epi16(__m512i __S, __m512i __A, __m512i __B)
304327302Sdim{
305327302Sdim  return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S,
306327302Sdim              (__v32hi) __A,
307327302Sdim              (__v32hi) __B,
308327302Sdim              (__mmask32) -1);
309327302Sdim}
310327302Sdim
311327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
312327302Sdim_mm512_mask_shrdv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B)
313327302Sdim{
314327302Sdim  return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S,
315327302Sdim              (__v8di) __A,
316327302Sdim              (__v8di) __B,
317327302Sdim              __U);
318327302Sdim}
319327302Sdim
320327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
321327302Sdim_mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B)
322327302Sdim{
323327302Sdim  return (__m512i) __builtin_ia32_vpshrdvq512_maskz ((__v8di) __S,
324327302Sdim              (__v8di) __A,
325327302Sdim              (__v8di) __B,
326327302Sdim              __U);
327327302Sdim}
328327302Sdim
329327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
330327302Sdim_mm512_shrdv_epi64(__m512i __S, __m512i __A, __m512i __B)
331327302Sdim{
332327302Sdim  return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S,
333327302Sdim              (__v8di) __A,
334327302Sdim              (__v8di) __B,
335327302Sdim              (__mmask8) -1);
336327302Sdim}
337327302Sdim
338327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
339327302Sdim_mm512_mask_shrdv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
340327302Sdim{
341327302Sdim  return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S,
342327302Sdim              (__v16si) __A,
343327302Sdim              (__v16si) __B,
344327302Sdim              __U);
345327302Sdim}
346327302Sdim
347327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
348327302Sdim_mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
349327302Sdim{
350327302Sdim  return (__m512i) __builtin_ia32_vpshrdvd512_maskz ((__v16si) __S,
351327302Sdim              (__v16si) __A,
352327302Sdim              (__v16si) __B,
353327302Sdim              __U);
354327302Sdim}
355327302Sdim
356327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
357327302Sdim_mm512_shrdv_epi32(__m512i __S, __m512i __A, __m512i __B)
358327302Sdim{
359327302Sdim  return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S,
360327302Sdim              (__v16si) __A,
361327302Sdim              (__v16si) __B,
362327302Sdim              (__mmask16) -1);
363327302Sdim}
364327302Sdim
365327302Sdim
366327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
367327302Sdim_mm512_mask_shrdv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B)
368327302Sdim{
369327302Sdim  return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S,
370327302Sdim              (__v32hi) __A,
371327302Sdim              (__v32hi) __B,
372327302Sdim              __U);
373327302Sdim}
374327302Sdim
375327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
376327302Sdim_mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B)
377327302Sdim{
378327302Sdim  return (__m512i) __builtin_ia32_vpshrdvw512_maskz ((__v32hi) __S,
379327302Sdim              (__v32hi) __A,
380327302Sdim              (__v32hi) __B,
381327302Sdim              __U);
382327302Sdim}
383327302Sdim
384327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
385327302Sdim_mm512_shrdv_epi16(__m512i __S, __m512i __A, __m512i __B)
386327302Sdim{
387327302Sdim  return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S,
388327302Sdim              (__v32hi) __A,
389327302Sdim              (__v32hi) __B,
390327302Sdim              (__mmask32) -1);
391327302Sdim}
392327302Sdim
393327302Sdim
394327302Sdim#undef __DEFAULT_FN_ATTRS
395327302Sdim
396327302Sdim#endif
397327302Sdim
398