avx512vbmi2intrin.h revision 327330
1/*===------------- avx512vbmi2intrin.h - VBMI2 intrinsics ------------------===
2 *
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 * THE SOFTWARE.
21 *
22 *===-----------------------------------------------------------------------===
23 */
24#ifndef __IMMINTRIN_H
25#error "Never use <avx512vbmi2intrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef __AVX512VBMI2INTRIN_H
29#define __AVX512VBMI2INTRIN_H
30
31/* Define the default attributes for the functions in this file. */
32#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2")))
33
34
35static __inline__ __m512i __DEFAULT_FN_ATTRS
36_mm512_mask_compress_epi16(__m512i __S, __mmask32 __U, __m512i __D)
37{
38  return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D,
39              (__v32hi) __S,
40              __U);
41}
42
43static __inline__ __m512i __DEFAULT_FN_ATTRS
44_mm512_maskz_compress_epi16(__mmask32 __U, __m512i __D)
45{
46  return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D,
47              (__v32hi) _mm512_setzero_hi(),
48              __U);
49}
50
51static __inline__ __m512i __DEFAULT_FN_ATTRS
52_mm512_mask_compress_epi8(__m512i __S, __mmask64 __U, __m512i __D)
53{
54  return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D,
55              (__v64qi) __S,
56              __U);
57}
58
59static __inline__ __m512i __DEFAULT_FN_ATTRS
60_mm512_maskz_compress_epi8(__mmask64 __U, __m512i __D)
61{
62  return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D,
63              (__v64qi) _mm512_setzero_qi(),
64              __U);
65}
66
67static __inline__ void __DEFAULT_FN_ATTRS
68_mm512_mask_compressstoreu_epi16(void *__P, __mmask32 __U, __m512i __D)
69{
70  __builtin_ia32_compressstorehi512_mask ((__v32hi *) __P, (__v32hi) __D,
71              __U);
72}
73
74static __inline__ void __DEFAULT_FN_ATTRS
75_mm512_mask_compressstoreu_epi8(void *__P, __mmask64 __U, __m512i __D)
76{
77  __builtin_ia32_compressstoreqi512_mask ((__v64qi *) __P, (__v64qi) __D,
78              __U);
79}
80
81static __inline__ __m512i __DEFAULT_FN_ATTRS
82_mm512_mask_expand_epi16(__m512i __S, __mmask32 __U, __m512i __D)
83{
84  return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D,
85              (__v32hi) __S,
86              __U);
87}
88
89static __inline__ __m512i __DEFAULT_FN_ATTRS
90_mm512_maskz_expand_epi16(__mmask32 __U, __m512i __D)
91{
92  return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D,
93              (__v32hi) _mm512_setzero_hi(),
94              __U);
95}
96
97static __inline__ __m512i __DEFAULT_FN_ATTRS
98_mm512_mask_expand_epi8(__m512i __S, __mmask64 __U, __m512i __D)
99{
100  return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D,
101              (__v64qi) __S,
102              __U);
103}
104
105static __inline__ __m512i __DEFAULT_FN_ATTRS
106_mm512_maskz_expand_epi8(__mmask64 __U, __m512i __D)
107{
108  return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D,
109              (__v64qi) _mm512_setzero_qi(),
110              __U);
111}
112
113static __inline__ __m512i __DEFAULT_FN_ATTRS
114_mm512_mask_expandloadu_epi16(__m512i __S, __mmask32 __U, void const *__P)
115{
116  return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P,
117              (__v32hi) __S,
118              __U);
119}
120
121static __inline__ __m512i __DEFAULT_FN_ATTRS
122_mm512_maskz_expandloadu_epi16(__mmask32 __U, void const *__P)
123{
124  return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P,
125              (__v32hi) _mm512_setzero_hi(),
126              __U);
127}
128
129static __inline__ __m512i __DEFAULT_FN_ATTRS
130_mm512_mask_expandloadu_epi8(__m512i __S, __mmask64 __U, void const *__P)
131{
132  return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P,
133              (__v64qi) __S,
134              __U);
135}
136
137static __inline__ __m512i __DEFAULT_FN_ATTRS
138_mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P)
139{
140  return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P,
141              (__v64qi) _mm512_setzero_qi(),
142              __U);
143}
144
145#define _mm512_mask_shldi_epi64(S, U, A, B, I) __extension__ ({ \
146  (__m512i)__builtin_ia32_vpshldq512_mask((__v8di)(A), \
147                                          (__v8di)(B), \
148                                          (int)(I), \
149                                          (__v8di)(S), \
150                                          (__mmask8)(U)); })
151
152#define _mm512_maskz_shldi_epi64(U, A, B, I) \
153  _mm512_mask_shldi_epi64(_mm512_setzero_hi(), (U), (A), (B), (I))
154
155#define _mm512_shldi_epi64(A, B, I) \
156  _mm512_mask_shldi_epi64(_mm512_undefined(), (__mmask8)(-1), (A), (B), (I))
157
158#define _mm512_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \
159  (__m512i)__builtin_ia32_vpshldd512_mask((__v16si)(A), \
160                                          (__v16si)(B), \
161                                          (int)(I), \
162                                          (__v16si)(S), \
163                                          (__mmask16)(U)); })
164
165#define _mm512_maskz_shldi_epi32(U, A, B, I) \
166  _mm512_mask_shldi_epi32(_mm512_setzero_hi(), (U), (A), (B), (I))
167
168#define _mm512_shldi_epi32(A, B, I) \
169  _mm512_mask_shldi_epi32(_mm512_undefined(), (__mmask16)(-1), (A), (B), (I))
170
171#define _mm512_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \
172  (__m512i)__builtin_ia32_vpshldw512_mask((__v32hi)(A), \
173                                          (__v32hi)(B), \
174                                          (int)(I), \
175                                          (__v32hi)(S), \
176                                          (__mmask32)(U)); })
177
178#define _mm512_maskz_shldi_epi16(U, A, B, I) \
179  _mm512_mask_shldi_epi16(_mm512_setzero_hi(), (U), (A), (B), (I))
180
181#define _mm512_shldi_epi16(A, B, I) \
182  _mm512_mask_shldi_epi16(_mm512_undefined(), (__mmask32)(-1), (A), (B), (I))
183
184#define _mm512_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \
185  (__m512i)__builtin_ia32_vpshrdq512_mask((__v8di)(A), \
186                                          (__v8di)(B), \
187                                          (int)(I), \
188                                          (__v8di)(S), \
189                                          (__mmask8)(U)); })
190
191#define _mm512_maskz_shrdi_epi64(U, A, B, I) \
192  _mm512_mask_shrdi_epi64(_mm512_setzero_hi(), (U), (A), (B), (I))
193
194#define _mm512_shrdi_epi64(A, B, I) \
195  _mm512_mask_shrdi_epi64(_mm512_undefined(), (__mmask8)(-1), (A), (B), (I))
196
197#define _mm512_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \
198  (__m512i)__builtin_ia32_vpshrdd512_mask((__v16si)(A), \
199                                          (__v16si)(B), \
200                                          (int)(I), \
201                                          (__v16si)(S), \
202                                          (__mmask16)(U)); })
203
204#define _mm512_maskz_shrdi_epi32(U, A, B, I) \
205  _mm512_mask_shrdi_epi32(_mm512_setzero_hi(), (U), (A), (B), (I))
206
207#define _mm512_shrdi_epi32(A, B, I) \
208  _mm512_mask_shrdi_epi32(_mm512_undefined(), (__mmask16)(-1), (A), (B), (I))
209
210#define _mm512_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \
211  (__m512i)__builtin_ia32_vpshrdw512_mask((__v32hi)(A), \
212                                          (__v32hi)(B), \
213                                          (int)(I), \
214                                          (__v32hi)(S), \
215                                          (__mmask32)(U)); })
216
217#define _mm512_maskz_shrdi_epi16(U, A, B, I) \
218  _mm512_mask_shrdi_epi16(_mm512_setzero_hi(), (U), (A), (B), (I))
219
220#define _mm512_shrdi_epi16(A, B, I) \
221  _mm512_mask_shrdi_epi16(_mm512_undefined(), (__mmask32)(-1), (A), (B), (I))
222
223static __inline__ __m512i __DEFAULT_FN_ATTRS
224_mm512_mask_shldv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B)
225{
226  return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S,
227              (__v8di) __A,
228              (__v8di) __B,
229              __U);
230}
231
232static __inline__ __m512i __DEFAULT_FN_ATTRS
233_mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B)
234{
235  return (__m512i) __builtin_ia32_vpshldvq512_maskz ((__v8di) __S,
236              (__v8di) __A,
237              (__v8di) __B,
238              __U);
239}
240
241static __inline__ __m512i __DEFAULT_FN_ATTRS
242_mm512_shldv_epi64(__m512i __S, __m512i __A, __m512i __B)
243{
244  return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S,
245              (__v8di) __A,
246              (__v8di) __B,
247              (__mmask8) -1);
248}
249
250static __inline__ __m512i __DEFAULT_FN_ATTRS
251_mm512_mask_shldv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
252{
253  return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S,
254              (__v16si) __A,
255              (__v16si) __B,
256              __U);
257}
258
259static __inline__ __m512i __DEFAULT_FN_ATTRS
260_mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
261{
262  return (__m512i) __builtin_ia32_vpshldvd512_maskz ((__v16si) __S,
263              (__v16si) __A,
264              (__v16si) __B,
265              __U);
266}
267
268static __inline__ __m512i __DEFAULT_FN_ATTRS
269_mm512_shldv_epi32(__m512i __S, __m512i __A, __m512i __B)
270{
271  return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S,
272              (__v16si) __A,
273              (__v16si) __B,
274              (__mmask16) -1);
275}
276
277
278static __inline__ __m512i __DEFAULT_FN_ATTRS
279_mm512_mask_shldv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B)
280{
281  return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S,
282              (__v32hi) __A,
283              (__v32hi) __B,
284              __U);
285}
286
287static __inline__ __m512i __DEFAULT_FN_ATTRS
288_mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B)
289{
290  return (__m512i) __builtin_ia32_vpshldvw512_maskz ((__v32hi) __S,
291              (__v32hi) __A,
292              (__v32hi) __B,
293              __U);
294}
295
296static __inline__ __m512i __DEFAULT_FN_ATTRS
297_mm512_shldv_epi16(__m512i __S, __m512i __A, __m512i __B)
298{
299  return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S,
300              (__v32hi) __A,
301              (__v32hi) __B,
302              (__mmask32) -1);
303}
304
305static __inline__ __m512i __DEFAULT_FN_ATTRS
306_mm512_mask_shrdv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B)
307{
308  return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S,
309              (__v8di) __A,
310              (__v8di) __B,
311              __U);
312}
313
314static __inline__ __m512i __DEFAULT_FN_ATTRS
315_mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B)
316{
317  return (__m512i) __builtin_ia32_vpshrdvq512_maskz ((__v8di) __S,
318              (__v8di) __A,
319              (__v8di) __B,
320              __U);
321}
322
323static __inline__ __m512i __DEFAULT_FN_ATTRS
324_mm512_shrdv_epi64(__m512i __S, __m512i __A, __m512i __B)
325{
326  return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S,
327              (__v8di) __A,
328              (__v8di) __B,
329              (__mmask8) -1);
330}
331
332static __inline__ __m512i __DEFAULT_FN_ATTRS
333_mm512_mask_shrdv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
334{
335  return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S,
336              (__v16si) __A,
337              (__v16si) __B,
338              __U);
339}
340
341static __inline__ __m512i __DEFAULT_FN_ATTRS
342_mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
343{
344  return (__m512i) __builtin_ia32_vpshrdvd512_maskz ((__v16si) __S,
345              (__v16si) __A,
346              (__v16si) __B,
347              __U);
348}
349
350static __inline__ __m512i __DEFAULT_FN_ATTRS
351_mm512_shrdv_epi32(__m512i __S, __m512i __A, __m512i __B)
352{
353  return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S,
354              (__v16si) __A,
355              (__v16si) __B,
356              (__mmask16) -1);
357}
358
359
360static __inline__ __m512i __DEFAULT_FN_ATTRS
361_mm512_mask_shrdv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B)
362{
363  return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S,
364              (__v32hi) __A,
365              (__v32hi) __B,
366              __U);
367}
368
369static __inline__ __m512i __DEFAULT_FN_ATTRS
370_mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B)
371{
372  return (__m512i) __builtin_ia32_vpshrdvw512_maskz ((__v32hi) __S,
373              (__v32hi) __A,
374              (__v32hi) __B,
375              __U);
376}
377
378static __inline__ __m512i __DEFAULT_FN_ATTRS
379_mm512_shrdv_epi16(__m512i __S, __m512i __A, __m512i __B)
380{
381  return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S,
382              (__v32hi) __A,
383              (__v32hi) __B,
384              (__mmask32) -1);
385}
386
387
388#undef __DEFAULT_FN_ATTRS
389
390#endif
391
392