avx512vbmi2intrin.h revision 327302
1327302Sdim/*===------------- avx512vbmi2intrin.h - VBMI2 intrinsics ------------------===
2327302Sdim *
3327302Sdim *
4327302Sdim * Permission is hereby granted, free of charge, to any person obtaining a copy
5327302Sdim * of this software and associated documentation files (the "Software"), to deal
6327302Sdim * in the Software without restriction, including without limitation the rights
7327302Sdim * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8327302Sdim * copies of the Software, and to permit persons to whom the Software is
9327302Sdim * furnished to do so, subject to the following conditions:
10327302Sdim *
11327302Sdim * The above copyright notice and this permission notice shall be included in
12327302Sdim * all copies or substantial portions of the Software.
13327302Sdim *
14327302Sdim * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15327302Sdim * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16327302Sdim * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17327302Sdim * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18327302Sdim * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19327302Sdim * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20327302Sdim * THE SOFTWARE.
21327302Sdim *
22327302Sdim *===-----------------------------------------------------------------------===
23327302Sdim */
24327302Sdim#ifndef __IMMINTRIN_H
25327302Sdim#error "Never use <avx512vbmi2intrin.h> directly; include <immintrin.h> instead."
26327302Sdim#endif
27327302Sdim
28327302Sdim#ifndef __AVX512VBMI2INTRIN_H
29327302Sdim#define __AVX512VBMI2INTRIN_H
30327302Sdim
31327302Sdim/* Define the default attributes for the functions in this file. */
32327302Sdim#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2")))
33327302Sdim
34327302Sdim
35327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
36327302Sdim_mm512_mask_compress_epi16(__m512i __S, __mmask32 __U, __m512i __D)
37327302Sdim{
38327302Sdim  return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D,
39327302Sdim              (__v32hi) __S,
40327302Sdim              __U);
41327302Sdim}
42327302Sdim
43327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
44327302Sdim_mm512_maskz_compress_epi16(__mmask32 __U, __m512i __D)
45327302Sdim{
46327302Sdim  return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D,
47327302Sdim              (__v32hi) _mm512_setzero_hi(),
48327302Sdim              __U);
49327302Sdim}
50327302Sdim
51327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
52327302Sdim_mm512_mask_compress_epi8(__m512i __S, __mmask64 __U, __m512i __D)
53327302Sdim{
54327302Sdim  return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D,
55327302Sdim              (__v64qi) __S,
56327302Sdim              __U);
57327302Sdim}
58327302Sdim
59327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
60327302Sdim_mm512_maskz_compress_epi8(__mmask64 __U, __m512i __D)
61327302Sdim{
62327302Sdim  return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D,
63327302Sdim              (__v64qi) _mm512_setzero_qi(),
64327302Sdim              __U);
65327302Sdim}
66327302Sdim
67327302Sdimstatic __inline__ void __DEFAULT_FN_ATTRS
68327302Sdim_mm512_mask_compressstoreu_epi16(void *__P, __mmask32 __U, __m512i __D)
69327302Sdim{
70327302Sdim  __builtin_ia32_compressstorehi512_mask ((__v32hi *) __P, (__v32hi) __D,
71327302Sdim              __U);
72327302Sdim}
73327302Sdim
74327302Sdimstatic __inline__ void __DEFAULT_FN_ATTRS
75327302Sdim_mm512_mask_compressstoreu_epi8(void *__P, __mmask64 __U, __m512i __D)
76327302Sdim{
77327302Sdim  __builtin_ia32_compressstoreqi512_mask ((__v64qi *) __P, (__v64qi) __D,
78327302Sdim              __U);
79327302Sdim}
80327302Sdim
81327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
82327302Sdim_mm512_mask_expand_epi16(__m512i __S, __mmask32 __U, __m512i __D)
83327302Sdim{
84327302Sdim  return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D,
85327302Sdim              (__v32hi) __S,
86327302Sdim              __U);
87327302Sdim}
88327302Sdim
89327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
90327302Sdim_mm512_maskz_expand_epi16(__mmask32 __U, __m512i __D)
91327302Sdim{
92327302Sdim  return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D,
93327302Sdim              (__v32hi) _mm512_setzero_hi(),
94327302Sdim              __U);
95327302Sdim}
96327302Sdim
97327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
98327302Sdim_mm512_mask_expand_epi8(__m512i __S, __mmask64 __U, __m512i __D)
99327302Sdim{
100327302Sdim  return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D,
101327302Sdim              (__v64qi) __S,
102327302Sdim              __U);
103327302Sdim}
104327302Sdim
105327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
106327302Sdim_mm512_maskz_expand_epi8(__mmask64 __U, __m512i __D)
107327302Sdim{
108327302Sdim  return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D,
109327302Sdim              (__v64qi) _mm512_setzero_qi(),
110327302Sdim              __U);
111327302Sdim}
112327302Sdim
113327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
114327302Sdim_mm512_mask_expandloadu_epi16(__m512i __S, __mmask32 __U, void const *__P)
115327302Sdim{
116327302Sdim  return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P,
117327302Sdim              (__v32hi) __S,
118327302Sdim              __U);
119327302Sdim}
120327302Sdim
121327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
122327302Sdim_mm512_maskz_expandloadu_epi16(__mmask32 __U, void const *__P)
123327302Sdim{
124327302Sdim  return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P,
125327302Sdim              (__v32hi) _mm512_setzero_hi(),
126327302Sdim              __U);
127327302Sdim}
128327302Sdim
129327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
130327302Sdim_mm512_mask_expandloadu_epi8(__m512i __S, __mmask64 __U, void const *__P)
131327302Sdim{
132327302Sdim  return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P,
133327302Sdim              (__v64qi) __S,
134327302Sdim              __U);
135327302Sdim}
136327302Sdim
137327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
138327302Sdim_mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P)
139327302Sdim{
140327302Sdim  return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P,
141327302Sdim              (__v64qi) _mm512_setzero_qi(),
142327302Sdim              __U);
143327302Sdim}
144327302Sdim
145327302Sdim#define _mm512_mask_shldi_epi64(S, U, A, B, I) __extension__ ({ \
146327302Sdim  (__m512i)__builtin_ia32_vpshldq512_mask((__v8di)(A), \
147327302Sdim                                          (__v8di)(B), \
148327302Sdim                                          (int)(I), \
149327302Sdim                                          (__v8di)(S), \
150327302Sdim                                          (__mmask8)(U)); })
151327302Sdim
152327302Sdim#define _mm512_maskz_shldi_epi64(U, A, B, I) \
153327302Sdim  _mm512_mask_shldi_epi64(_mm512_setzero_hi(), (U), (A), (B), (I))
154327302Sdim
155327302Sdim#define _mm512_shldi_epi64(A, B, I) \
156327302Sdim  _mm512_mask_shldi_epi64(_mm512_undefined(), (__mmask8)(-1), (A), (B), (I))
157327302Sdim
158327302Sdim#define _mm512_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \
159327302Sdim  (__m512i)__builtin_ia32_vpshldd512_mask((__v16si)(A), \
160327302Sdim                                          (__v16si)(B), \
161327302Sdim                                          (int)(I), \
162327302Sdim                                          (__v16si)(S), \
163327302Sdim                                          (__mmask16)(U)); })
164327302Sdim
165327302Sdim#define _mm512_maskz_shldi_epi32(U, A, B, I) \
166327302Sdim  _mm512_mask_shldi_epi32(_mm512_setzero_hi(), (U), (A), (B), (I))
167327302Sdim
168327302Sdim#define _mm512_shldi_epi32(A, B, I) \
169327302Sdim  _mm512_mask_shldi_epi32(_mm512_undefined(), (__mmask16)(-1), (A), (B), (I))
170327302Sdim
171327302Sdim#define _mm512_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \
172327302Sdim  (__m512i)__builtin_ia32_vpshldw512_mask((__v32hi)(A), \
173327302Sdim                                          (__v32hi)(B), \
174327302Sdim                                          (int)(I), \
175327302Sdim                                          (__v32hi)(S), \
176327302Sdim                                          (__mmask32)(U)); })
177327302Sdim
178327302Sdim#define _mm512_maskz_shldi_epi16(U, A, B, I) \
179327302Sdim  _mm512_mask_shldi_epi16(_mm512_setzero_hi(), (U), (A), (B), (I))
180327302Sdim
181327302Sdim#define _mm512_shldi_epi16(A, B, I) \
182327302Sdim  _mm512_mask_shldi_epi16(_mm512_undefined(), (__mmask32)(-1), (A), (B), (I))
183327302Sdim
184327302Sdim#define _mm512_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \
185327302Sdim  (__m512i)__builtin_ia32_vpshrdq512_mask((__v8di)(A), \
186327302Sdim                                          (__v8di)(B), \
187327302Sdim                                          (int)(I), \
188327302Sdim                                          (__v8di)(S), \
189327302Sdim                                          (__mmask8)(U)); })
190327302Sdim
191327302Sdim#define _mm512_maskz_shrdi_epi64(U, A, B, I) \
192327302Sdim  _mm512_mask_shrdi_epi64(_mm512_setzero_hi(), (U), (A), (B), (I))
193327302Sdim
194327302Sdim#define _mm512_shrdi_epi64(A, B, I) \
195327302Sdim  _mm512_mask_shrdi_epi64(_mm512_undefined(), (__mmask8)(-1), (A), (B), (I))
196327302Sdim
197327302Sdim#define _mm512_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \
198327302Sdim  (__m512i)__builtin_ia32_vpshrdd512_mask((__v16si)(A), \
199327302Sdim                                          (__v16si)(B), \
200327302Sdim                                          (int)(I), \
201327302Sdim                                          (__v16si)(S), \
202327302Sdim                                          (__mmask16)(U)); })
203327302Sdim
204327302Sdim#define _mm512_maskz_shrdi_epi32(U, A, B, I) \
205327302Sdim  _mm512_mask_shrdi_epi32(_mm512_setzero_hi(), (U), (A), (B), (I))
206327302Sdim
207327302Sdim#define _mm512_shrdi_epi32(A, B, I) \
208327302Sdim  _mm512_mask_shrdi_epi32(_mm512_undefined(), (__mmask16)(-1), (A), (B), (I))
209327302Sdim
210327302Sdim#define _mm512_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \
211327302Sdim  (__m512i)__builtin_ia32_vpshrdw512_mask((__v32hi)(A), \
212327302Sdim                                          (__v32hi)(B), \
213327302Sdim                                          (int)(I), \
214327302Sdim                                          (__v32hi)(S), \
215327302Sdim                                          (__mmask32)(U)); })
216327302Sdim
217327302Sdim#define _mm512_maskz_shrdi_epi16(U, A, B, I) \
218327302Sdim  _mm512_mask_shrdi_epi16(_mm512_setzero_hi(), (U), (A), (B), (I))
219327302Sdim
220327302Sdim#define _mm512_shrdi_epi16(A, B, I) \
221327302Sdim  _mm512_mask_shrdi_epi16(_mm512_undefined(), (__mmask32)(-1), (A), (B), (I))
222327302Sdim
223327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
224327302Sdim_mm512_mask_shldv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B)
225327302Sdim{
226327302Sdim  return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S,
227327302Sdim              (__v8di) __A,
228327302Sdim              (__v8di) __B,
229327302Sdim              __U);
230327302Sdim}
231327302Sdim
232327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
233327302Sdim_mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B)
234327302Sdim{
235327302Sdim  return (__m512i) __builtin_ia32_vpshldvq512_maskz ((__v8di) __S,
236327302Sdim              (__v8di) __A,
237327302Sdim              (__v8di) __B,
238327302Sdim              __U);
239327302Sdim}
240327302Sdim
241327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
242327302Sdim_mm512_shldv_epi64(__m512i __S, __m512i __A, __m512i __B)
243327302Sdim{
244327302Sdim  return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S,
245327302Sdim              (__v8di) __A,
246327302Sdim              (__v8di) __B,
247327302Sdim              (__mmask8) -1);
248327302Sdim}
249327302Sdim
250327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
251327302Sdim_mm512_mask_shldv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
252327302Sdim{
253327302Sdim  return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S,
254327302Sdim              (__v16si) __A,
255327302Sdim              (__v16si) __B,
256327302Sdim              __U);
257327302Sdim}
258327302Sdim
259327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
260327302Sdim_mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
261327302Sdim{
262327302Sdim  return (__m512i) __builtin_ia32_vpshldvd512_maskz ((__v16si) __S,
263327302Sdim              (__v16si) __A,
264327302Sdim              (__v16si) __B,
265327302Sdim              __U);
266327302Sdim}
267327302Sdim
268327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
269327302Sdim_mm512_shldv_epi32(__m512i __S, __m512i __A, __m512i __B)
270327302Sdim{
271327302Sdim  return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S,
272327302Sdim              (__v16si) __A,
273327302Sdim              (__v16si) __B,
274327302Sdim              (__mmask16) -1);
275327302Sdim}
276327302Sdim
277327302Sdim
278327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
279327302Sdim_mm512_mask_shldv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B)
280327302Sdim{
281327302Sdim  return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S,
282327302Sdim              (__v32hi) __A,
283327302Sdim              (__v32hi) __B,
284327302Sdim              __U);
285327302Sdim}
286327302Sdim
287327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
288327302Sdim_mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B)
289327302Sdim{
290327302Sdim  return (__m512i) __builtin_ia32_vpshldvw512_maskz ((__v32hi) __S,
291327302Sdim              (__v32hi) __A,
292327302Sdim              (__v32hi) __B,
293327302Sdim              __U);
294327302Sdim}
295327302Sdim
296327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
297327302Sdim_mm512_shldv_epi16(__m512i __S, __m512i __A, __m512i __B)
298327302Sdim{
299327302Sdim  return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S,
300327302Sdim              (__v32hi) __A,
301327302Sdim              (__v32hi) __B,
302327302Sdim              (__mmask32) -1);
303327302Sdim}
304327302Sdim
305327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
306327302Sdim_mm512_mask_shrdv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B)
307327302Sdim{
308327302Sdim  return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S,
309327302Sdim              (__v8di) __A,
310327302Sdim              (__v8di) __B,
311327302Sdim              __U);
312327302Sdim}
313327302Sdim
314327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
315327302Sdim_mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B)
316327302Sdim{
317327302Sdim  return (__m512i) __builtin_ia32_vpshrdvq512_maskz ((__v8di) __S,
318327302Sdim              (__v8di) __A,
319327302Sdim              (__v8di) __B,
320327302Sdim              __U);
321327302Sdim}
322327302Sdim
323327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
324327302Sdim_mm512_shrdv_epi64(__m512i __S, __m512i __A, __m512i __B)
325327302Sdim{
326327302Sdim  return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S,
327327302Sdim              (__v8di) __A,
328327302Sdim              (__v8di) __B,
329327302Sdim              (__mmask8) -1);
330327302Sdim}
331327302Sdim
332327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
333327302Sdim_mm512_mask_shrdv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
334327302Sdim{
335327302Sdim  return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S,
336327302Sdim              (__v16si) __A,
337327302Sdim              (__v16si) __B,
338327302Sdim              __U);
339327302Sdim}
340327302Sdim
341327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
342327302Sdim_mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
343327302Sdim{
344327302Sdim  return (__m512i) __builtin_ia32_vpshrdvd512_maskz ((__v16si) __S,
345327302Sdim              (__v16si) __A,
346327302Sdim              (__v16si) __B,
347327302Sdim              __U);
348327302Sdim}
349327302Sdim
350327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
351327302Sdim_mm512_shrdv_epi32(__m512i __S, __m512i __A, __m512i __B)
352327302Sdim{
353327302Sdim  return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S,
354327302Sdim              (__v16si) __A,
355327302Sdim              (__v16si) __B,
356327302Sdim              (__mmask16) -1);
357327302Sdim}
358327302Sdim
359327302Sdim
360327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
361327302Sdim_mm512_mask_shrdv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B)
362327302Sdim{
363327302Sdim  return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S,
364327302Sdim              (__v32hi) __A,
365327302Sdim              (__v32hi) __B,
366327302Sdim              __U);
367327302Sdim}
368327302Sdim
369327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
370327302Sdim_mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B)
371327302Sdim{
372327302Sdim  return (__m512i) __builtin_ia32_vpshrdvw512_maskz ((__v32hi) __S,
373327302Sdim              (__v32hi) __A,
374327302Sdim              (__v32hi) __B,
375327302Sdim              __U);
376327302Sdim}
377327302Sdim
378327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS
379327302Sdim_mm512_shrdv_epi16(__m512i __S, __m512i __A, __m512i __B)
380327302Sdim{
381327302Sdim  return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S,
382327302Sdim              (__v32hi) __A,
383327302Sdim              (__v32hi) __B,
384327302Sdim              (__mmask32) -1);
385327302Sdim}
386327302Sdim
387327302Sdim
388327302Sdim#undef __DEFAULT_FN_ATTRS
389327302Sdim
390327302Sdim#endif
391327302Sdim
392