avx512vbmi2intrin.h revision 341825
1/*===------------- avx512vbmi2intrin.h - VBMI2 intrinsics ------------------===
2 *
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 * THE SOFTWARE.
21 *
22 *===-----------------------------------------------------------------------===
23 */
24#ifndef __IMMINTRIN_H
25#error "Never use <avx512vbmi2intrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef __AVX512VBMI2INTRIN_H
29#define __AVX512VBMI2INTRIN_H
30
31/* Define the default attributes for the functions in this file. */
32#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2"), __min_vector_width__(512)))
33
34
35static __inline__ __m512i __DEFAULT_FN_ATTRS
36_mm512_mask_compress_epi16(__m512i __S, __mmask32 __U, __m512i __D)
37{
38  return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D,
39              (__v32hi) __S,
40              __U);
41}
42
43static __inline__ __m512i __DEFAULT_FN_ATTRS
44_mm512_maskz_compress_epi16(__mmask32 __U, __m512i __D)
45{
46  return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D,
47              (__v32hi) _mm512_setzero_si512(),
48              __U);
49}
50
51static __inline__ __m512i __DEFAULT_FN_ATTRS
52_mm512_mask_compress_epi8(__m512i __S, __mmask64 __U, __m512i __D)
53{
54  return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D,
55              (__v64qi) __S,
56              __U);
57}
58
59static __inline__ __m512i __DEFAULT_FN_ATTRS
60_mm512_maskz_compress_epi8(__mmask64 __U, __m512i __D)
61{
62  return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D,
63              (__v64qi) _mm512_setzero_si512(),
64              __U);
65}
66
67static __inline__ void __DEFAULT_FN_ATTRS
68_mm512_mask_compressstoreu_epi16(void *__P, __mmask32 __U, __m512i __D)
69{
70  __builtin_ia32_compressstorehi512_mask ((__v32hi *) __P, (__v32hi) __D,
71              __U);
72}
73
74static __inline__ void __DEFAULT_FN_ATTRS
75_mm512_mask_compressstoreu_epi8(void *__P, __mmask64 __U, __m512i __D)
76{
77  __builtin_ia32_compressstoreqi512_mask ((__v64qi *) __P, (__v64qi) __D,
78              __U);
79}
80
81static __inline__ __m512i __DEFAULT_FN_ATTRS
82_mm512_mask_expand_epi16(__m512i __S, __mmask32 __U, __m512i __D)
83{
84  return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D,
85              (__v32hi) __S,
86              __U);
87}
88
89static __inline__ __m512i __DEFAULT_FN_ATTRS
90_mm512_maskz_expand_epi16(__mmask32 __U, __m512i __D)
91{
92  return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D,
93              (__v32hi) _mm512_setzero_si512(),
94              __U);
95}
96
97static __inline__ __m512i __DEFAULT_FN_ATTRS
98_mm512_mask_expand_epi8(__m512i __S, __mmask64 __U, __m512i __D)
99{
100  return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D,
101              (__v64qi) __S,
102              __U);
103}
104
105static __inline__ __m512i __DEFAULT_FN_ATTRS
106_mm512_maskz_expand_epi8(__mmask64 __U, __m512i __D)
107{
108  return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D,
109              (__v64qi) _mm512_setzero_si512(),
110              __U);
111}
112
113static __inline__ __m512i __DEFAULT_FN_ATTRS
114_mm512_mask_expandloadu_epi16(__m512i __S, __mmask32 __U, void const *__P)
115{
116  return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P,
117              (__v32hi) __S,
118              __U);
119}
120
121static __inline__ __m512i __DEFAULT_FN_ATTRS
122_mm512_maskz_expandloadu_epi16(__mmask32 __U, void const *__P)
123{
124  return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P,
125              (__v32hi) _mm512_setzero_si512(),
126              __U);
127}
128
129static __inline__ __m512i __DEFAULT_FN_ATTRS
130_mm512_mask_expandloadu_epi8(__m512i __S, __mmask64 __U, void const *__P)
131{
132  return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P,
133              (__v64qi) __S,
134              __U);
135}
136
137static __inline__ __m512i __DEFAULT_FN_ATTRS
138_mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P)
139{
140  return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P,
141              (__v64qi) _mm512_setzero_si512(),
142              __U);
143}
144
145#define _mm512_shldi_epi64(A, B, I) \
146  (__m512i)__builtin_ia32_vpshldq512((__v8di)(__m512i)(A), \
147                                     (__v8di)(__m512i)(B), (int)(I))
148
149#define _mm512_mask_shldi_epi64(S, U, A, B, I) \
150  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
151                                    (__v8di)_mm512_shldi_epi64((A), (B), (I)), \
152                                    (__v8di)(__m512i)(S))
153
154#define _mm512_maskz_shldi_epi64(U, A, B, I) \
155  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
156                                    (__v8di)_mm512_shldi_epi64((A), (B), (I)), \
157                                    (__v8di)_mm512_setzero_si512())
158
159#define _mm512_shldi_epi32(A, B, I) \
160  (__m512i)__builtin_ia32_vpshldd512((__v16si)(__m512i)(A), \
161                                     (__v16si)(__m512i)(B), (int)(I))
162
163#define _mm512_mask_shldi_epi32(S, U, A, B, I) \
164  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
165                                   (__v16si)_mm512_shldi_epi32((A), (B), (I)), \
166                                   (__v16si)(__m512i)(S))
167
168#define _mm512_maskz_shldi_epi32(U, A, B, I) \
169  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
170                                   (__v16si)_mm512_shldi_epi32((A), (B), (I)), \
171                                   (__v16si)_mm512_setzero_si512())
172
173#define _mm512_shldi_epi16(A, B, I) \
174  (__m512i)__builtin_ia32_vpshldw512((__v32hi)(__m512i)(A), \
175                                     (__v32hi)(__m512i)(B), (int)(I))
176
177#define _mm512_mask_shldi_epi16(S, U, A, B, I) \
178  (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
179                                   (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \
180                                   (__v32hi)(__m512i)(S))
181
182#define _mm512_maskz_shldi_epi16(U, A, B, I) \
183  (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
184                                   (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \
185                                   (__v32hi)_mm512_setzero_si512())
186
187#define _mm512_shrdi_epi64(A, B, I) \
188  (__m512i)__builtin_ia32_vpshrdq512((__v8di)(__m512i)(A), \
189                                     (__v8di)(__m512i)(B), (int)(I))
190
191#define _mm512_mask_shrdi_epi64(S, U, A, B, I) \
192  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
193                                    (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \
194                                    (__v8di)(__m512i)(S))
195
196#define _mm512_maskz_shrdi_epi64(U, A, B, I) \
197  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
198                                    (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \
199                                    (__v8di)_mm512_setzero_si512())
200
201#define _mm512_shrdi_epi32(A, B, I) \
202  (__m512i)__builtin_ia32_vpshrdd512((__v16si)(__m512i)(A), \
203                                     (__v16si)(__m512i)(B), (int)(I))
204
205#define _mm512_mask_shrdi_epi32(S, U, A, B, I) \
206  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
207                                   (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \
208                                   (__v16si)(__m512i)(S))
209
210#define _mm512_maskz_shrdi_epi32(U, A, B, I) \
211  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
212                                   (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \
213                                   (__v16si)_mm512_setzero_si512())
214
215#define _mm512_shrdi_epi16(A, B, I) \
216  (__m512i)__builtin_ia32_vpshrdw512((__v32hi)(__m512i)(A), \
217                                     (__v32hi)(__m512i)(B), (int)(I))
218
219#define _mm512_mask_shrdi_epi16(S, U, A, B, I) \
220  (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
221                                   (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \
222                                   (__v32hi)(__m512i)(S))
223
224#define _mm512_maskz_shrdi_epi16(U, A, B, I) \
225  (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
226                                   (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \
227                                   (__v32hi)_mm512_setzero_si512())
228
229static __inline__ __m512i __DEFAULT_FN_ATTRS
230_mm512_mask_shldv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B)
231{
232  return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S,
233              (__v8di) __A,
234              (__v8di) __B,
235              __U);
236}
237
238static __inline__ __m512i __DEFAULT_FN_ATTRS
239_mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B)
240{
241  return (__m512i) __builtin_ia32_vpshldvq512_maskz ((__v8di) __S,
242              (__v8di) __A,
243              (__v8di) __B,
244              __U);
245}
246
247static __inline__ __m512i __DEFAULT_FN_ATTRS
248_mm512_shldv_epi64(__m512i __S, __m512i __A, __m512i __B)
249{
250  return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S,
251              (__v8di) __A,
252              (__v8di) __B,
253              (__mmask8) -1);
254}
255
256static __inline__ __m512i __DEFAULT_FN_ATTRS
257_mm512_mask_shldv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
258{
259  return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S,
260              (__v16si) __A,
261              (__v16si) __B,
262              __U);
263}
264
265static __inline__ __m512i __DEFAULT_FN_ATTRS
266_mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
267{
268  return (__m512i) __builtin_ia32_vpshldvd512_maskz ((__v16si) __S,
269              (__v16si) __A,
270              (__v16si) __B,
271              __U);
272}
273
274static __inline__ __m512i __DEFAULT_FN_ATTRS
275_mm512_shldv_epi32(__m512i __S, __m512i __A, __m512i __B)
276{
277  return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S,
278              (__v16si) __A,
279              (__v16si) __B,
280              (__mmask16) -1);
281}
282
283
284static __inline__ __m512i __DEFAULT_FN_ATTRS
285_mm512_mask_shldv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B)
286{
287  return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S,
288              (__v32hi) __A,
289              (__v32hi) __B,
290              __U);
291}
292
293static __inline__ __m512i __DEFAULT_FN_ATTRS
294_mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B)
295{
296  return (__m512i) __builtin_ia32_vpshldvw512_maskz ((__v32hi) __S,
297              (__v32hi) __A,
298              (__v32hi) __B,
299              __U);
300}
301
302static __inline__ __m512i __DEFAULT_FN_ATTRS
303_mm512_shldv_epi16(__m512i __S, __m512i __A, __m512i __B)
304{
305  return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S,
306              (__v32hi) __A,
307              (__v32hi) __B,
308              (__mmask32) -1);
309}
310
311static __inline__ __m512i __DEFAULT_FN_ATTRS
312_mm512_mask_shrdv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B)
313{
314  return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S,
315              (__v8di) __A,
316              (__v8di) __B,
317              __U);
318}
319
320static __inline__ __m512i __DEFAULT_FN_ATTRS
321_mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B)
322{
323  return (__m512i) __builtin_ia32_vpshrdvq512_maskz ((__v8di) __S,
324              (__v8di) __A,
325              (__v8di) __B,
326              __U);
327}
328
329static __inline__ __m512i __DEFAULT_FN_ATTRS
330_mm512_shrdv_epi64(__m512i __S, __m512i __A, __m512i __B)
331{
332  return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S,
333              (__v8di) __A,
334              (__v8di) __B,
335              (__mmask8) -1);
336}
337
338static __inline__ __m512i __DEFAULT_FN_ATTRS
339_mm512_mask_shrdv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B)
340{
341  return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S,
342              (__v16si) __A,
343              (__v16si) __B,
344              __U);
345}
346
347static __inline__ __m512i __DEFAULT_FN_ATTRS
348_mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B)
349{
350  return (__m512i) __builtin_ia32_vpshrdvd512_maskz ((__v16si) __S,
351              (__v16si) __A,
352              (__v16si) __B,
353              __U);
354}
355
356static __inline__ __m512i __DEFAULT_FN_ATTRS
357_mm512_shrdv_epi32(__m512i __S, __m512i __A, __m512i __B)
358{
359  return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S,
360              (__v16si) __A,
361              (__v16si) __B,
362              (__mmask16) -1);
363}
364
365
366static __inline__ __m512i __DEFAULT_FN_ATTRS
367_mm512_mask_shrdv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B)
368{
369  return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S,
370              (__v32hi) __A,
371              (__v32hi) __B,
372              __U);
373}
374
375static __inline__ __m512i __DEFAULT_FN_ATTRS
376_mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B)
377{
378  return (__m512i) __builtin_ia32_vpshrdvw512_maskz ((__v32hi) __S,
379              (__v32hi) __A,
380              (__v32hi) __B,
381              __U);
382}
383
384static __inline__ __m512i __DEFAULT_FN_ATTRS
385_mm512_shrdv_epi16(__m512i __S, __m512i __A, __m512i __B)
386{
387  return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S,
388              (__v32hi) __A,
389              (__v32hi) __B,
390              (__mmask32) -1);
391}
392
393
394#undef __DEFAULT_FN_ATTRS
395
396#endif
397
398