avx512vlvbmi2intrin.h revision 335799
1/*===------------- avx512vlvbmi2intrin.h - VBMI2 intrinsics -----------------===
2 *
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 * THE SOFTWARE.
21 *
22 *===-----------------------------------------------------------------------===
23 */
24#ifndef __IMMINTRIN_H
25#error "Never use <avx512vlvbmi2intrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef __AVX512VLVBMI2INTRIN_H
29#define __AVX512VLVBMI2INTRIN_H
30
31/* Define the default attributes for the functions in this file. */
32#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2")))
33
34static __inline__ __m128i __DEFAULT_FN_ATTRS
35_mm_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D)
36{
37  return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D,
38              (__v8hi) __S,
39              __U);
40}
41
42static __inline__ __m128i __DEFAULT_FN_ATTRS
43_mm_maskz_compress_epi16(__mmask8 __U, __m128i __D)
44{
45  return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D,
46              (__v8hi) _mm_setzero_si128(),
47              __U);
48}
49
50static __inline__ __m128i __DEFAULT_FN_ATTRS
51_mm_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D)
52{
53  return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D,
54              (__v16qi) __S,
55              __U);
56}
57
58static __inline__ __m128i __DEFAULT_FN_ATTRS
59_mm_maskz_compress_epi8(__mmask16 __U, __m128i __D)
60{
61  return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D,
62              (__v16qi) _mm_setzero_si128(),
63              __U);
64}
65
66static __inline__ void __DEFAULT_FN_ATTRS
67_mm_mask_compressstoreu_epi16(void *__P, __mmask8 __U, __m128i __D)
68{
69  __builtin_ia32_compressstorehi128_mask ((__v8hi *) __P, (__v8hi) __D,
70              __U);
71}
72
73static __inline__ void __DEFAULT_FN_ATTRS
74_mm_mask_compressstoreu_epi8(void *__P, __mmask16 __U, __m128i __D)
75{
76  __builtin_ia32_compressstoreqi128_mask ((__v16qi *) __P, (__v16qi) __D,
77              __U);
78}
79
80static __inline__ __m128i __DEFAULT_FN_ATTRS
81_mm_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D)
82{
83  return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D,
84              (__v8hi) __S,
85              __U);
86}
87
88static __inline__ __m128i __DEFAULT_FN_ATTRS
89_mm_maskz_expand_epi16(__mmask8 __U, __m128i __D)
90{
91  return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D,
92              (__v8hi) _mm_setzero_si128(),
93              __U);
94}
95
96static __inline__ __m128i __DEFAULT_FN_ATTRS
97_mm_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D)
98{
99  return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D,
100              (__v16qi) __S,
101              __U);
102}
103
104static __inline__ __m128i __DEFAULT_FN_ATTRS
105_mm_maskz_expand_epi8(__mmask16 __U, __m128i __D)
106{
107  return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D,
108              (__v16qi) _mm_setzero_si128(),
109              __U);
110}
111
112static __inline__ __m128i __DEFAULT_FN_ATTRS
113_mm_mask_expandloadu_epi16(__m128i __S, __mmask8 __U, void const *__P)
114{
115  return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P,
116              (__v8hi) __S,
117              __U);
118}
119
120static __inline__ __m128i __DEFAULT_FN_ATTRS
121_mm_maskz_expandloadu_epi16(__mmask8 __U, void const *__P)
122{
123  return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P,
124              (__v8hi) _mm_setzero_si128(),
125              __U);
126}
127
128static __inline__ __m128i __DEFAULT_FN_ATTRS
129_mm_mask_expandloadu_epi8(__m128i __S, __mmask16 __U, void const *__P)
130{
131  return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P,
132              (__v16qi) __S,
133              __U);
134}
135
136static __inline__ __m128i __DEFAULT_FN_ATTRS
137_mm_maskz_expandloadu_epi8(__mmask16 __U, void const *__P)
138{
139  return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P,
140              (__v16qi) _mm_setzero_si128(),
141              __U);
142}
143
144static __inline__ __m256i __DEFAULT_FN_ATTRS
145_mm256_mask_compress_epi16(__m256i __S, __mmask16 __U, __m256i __D)
146{
147  return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D,
148              (__v16hi) __S,
149              __U);
150}
151
152static __inline__ __m256i __DEFAULT_FN_ATTRS
153_mm256_maskz_compress_epi16(__mmask16 __U, __m256i __D)
154{
155  return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D,
156              (__v16hi) _mm256_setzero_si256(),
157              __U);
158}
159
160static __inline__ __m256i __DEFAULT_FN_ATTRS
161_mm256_mask_compress_epi8(__m256i __S, __mmask32 __U, __m256i __D)
162{
163  return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D,
164              (__v32qi) __S,
165              __U);
166}
167
168static __inline__ __m256i __DEFAULT_FN_ATTRS
169_mm256_maskz_compress_epi8(__mmask32 __U, __m256i __D)
170{
171  return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D,
172              (__v32qi) _mm256_setzero_si256(),
173              __U);
174}
175
176static __inline__ void __DEFAULT_FN_ATTRS
177_mm256_mask_compressstoreu_epi16(void *__P, __mmask16 __U, __m256i __D)
178{
179  __builtin_ia32_compressstorehi256_mask ((__v16hi *) __P, (__v16hi) __D,
180              __U);
181}
182
183static __inline__ void __DEFAULT_FN_ATTRS
184_mm256_mask_compressstoreu_epi8(void *__P, __mmask32 __U, __m256i __D)
185{
186  __builtin_ia32_compressstoreqi256_mask ((__v32qi *) __P, (__v32qi) __D,
187              __U);
188}
189
190static __inline__ __m256i __DEFAULT_FN_ATTRS
191_mm256_mask_expand_epi16(__m256i __S, __mmask16 __U, __m256i __D)
192{
193  return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D,
194              (__v16hi) __S,
195              __U);
196}
197
198static __inline__ __m256i __DEFAULT_FN_ATTRS
199_mm256_maskz_expand_epi16(__mmask16 __U, __m256i __D)
200{
201  return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D,
202              (__v16hi) _mm256_setzero_si256(),
203              __U);
204}
205
206static __inline__ __m256i __DEFAULT_FN_ATTRS
207_mm256_mask_expand_epi8(__m256i __S, __mmask32 __U, __m256i __D)
208{
209  return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D,
210              (__v32qi) __S,
211              __U);
212}
213
214static __inline__ __m256i __DEFAULT_FN_ATTRS
215_mm256_maskz_expand_epi8(__mmask32 __U, __m256i __D)
216{
217  return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D,
218              (__v32qi) _mm256_setzero_si256(),
219              __U);
220}
221
222static __inline__ __m256i __DEFAULT_FN_ATTRS
223_mm256_mask_expandloadu_epi16(__m256i __S, __mmask16 __U, void const *__P)
224{
225  return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P,
226              (__v16hi) __S,
227              __U);
228}
229
230static __inline__ __m256i __DEFAULT_FN_ATTRS
231_mm256_maskz_expandloadu_epi16(__mmask16 __U, void const *__P)
232{
233  return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P,
234              (__v16hi) _mm256_setzero_si256(),
235              __U);
236}
237
238static __inline__ __m256i __DEFAULT_FN_ATTRS
239_mm256_mask_expandloadu_epi8(__m256i __S, __mmask32 __U, void const *__P)
240{
241  return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P,
242              (__v32qi) __S,
243              __U);
244}
245
246static __inline__ __m256i __DEFAULT_FN_ATTRS
247_mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P)
248{
249  return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P,
250              (__v32qi) _mm256_setzero_si256(),
251              __U);
252}
253
254#define _mm256_mask_shldi_epi64(S, U, A, B, I) __extension__ ({ \
255  (__m256i)__builtin_ia32_vpshldq256_mask((__v4di)(A), \
256                                          (__v4di)(B), \
257                                          (int)(I), \
258                                          (__v4di)(S), \
259                                          (__mmask8)(U)); })
260
261#define _mm256_maskz_shldi_epi64(U, A, B, I) \
262  _mm256_mask_shldi_epi64(_mm256_setzero_si256(), (U), (A), (B), (I))
263
264#define _mm256_shldi_epi64(A, B, I) \
265  _mm256_mask_shldi_epi64(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
266
267#define _mm_mask_shldi_epi64(S, U, A, B, I) __extension__ ({ \
268  (__m128i)__builtin_ia32_vpshldq128_mask((__v2di)(A), \
269                                          (__v2di)(B), \
270                                          (int)(I), \
271                                          (__v2di)(S), \
272                                          (__mmask8)(U)); })
273
274#define _mm_maskz_shldi_epi64(U, A, B, I) \
275  _mm_mask_shldi_epi64(_mm_setzero_si128(), (U), (A), (B), (I))
276
277#define _mm_shldi_epi64(A, B, I) \
278  _mm_mask_shldi_epi64(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
279
280#define _mm256_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \
281  (__m256i)__builtin_ia32_vpshldd256_mask((__v8si)(A), \
282                                          (__v8si)(B), \
283                                          (int)(I), \
284                                          (__v8si)(S), \
285                                          (__mmask8)(U)); })
286
287#define _mm256_maskz_shldi_epi32(U, A, B, I) \
288  _mm256_mask_shldi_epi32(_mm256_setzero_si256(), (U), (A), (B), (I))
289
290#define _mm256_shldi_epi32(A, B, I) \
291  _mm256_mask_shldi_epi32(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
292
293#define _mm_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \
294  (__m128i)__builtin_ia32_vpshldd128_mask((__v4si)(A), \
295                                          (__v4si)(B), \
296                                          (int)(I), \
297                                          (__v4si)(S), \
298                                          (__mmask8)(U)); })
299
300#define _mm_maskz_shldi_epi32(U, A, B, I) \
301  _mm_mask_shldi_epi32(_mm_setzero_si128(), (U), (A), (B), (I))
302
303#define _mm_shldi_epi32(A, B, I) \
304  _mm_mask_shldi_epi32(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
305
306#define _mm256_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \
307  (__m256i)__builtin_ia32_vpshldw256_mask((__v16hi)(A), \
308                                          (__v16hi)(B), \
309                                          (int)(I), \
310                                          (__v16hi)(S), \
311                                          (__mmask16)(U)); })
312
313#define _mm256_maskz_shldi_epi16(U, A, B, I) \
314  _mm256_mask_shldi_epi16(_mm256_setzero_si256(), (U), (A), (B), (I))
315
316#define _mm256_shldi_epi16(A, B, I) \
317  _mm256_mask_shldi_epi16(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
318
319#define _mm_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \
320  (__m128i)__builtin_ia32_vpshldw128_mask((__v8hi)(A), \
321                                          (__v8hi)(B), \
322                                          (int)(I), \
323                                          (__v8hi)(S), \
324                                          (__mmask8)(U)); })
325
326#define _mm_maskz_shldi_epi16(U, A, B, I) \
327  _mm_mask_shldi_epi16(_mm_setzero_si128(), (U), (A), (B), (I))
328
329#define _mm_shldi_epi16(A, B, I) \
330  _mm_mask_shldi_epi16(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
331
332#define _mm256_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \
333  (__m256i)__builtin_ia32_vpshrdq256_mask((__v4di)(A), \
334                                          (__v4di)(B), \
335                                          (int)(I), \
336                                          (__v4di)(S), \
337                                          (__mmask8)(U)); })
338
339#define _mm256_maskz_shrdi_epi64(U, A, B, I) \
340  _mm256_mask_shrdi_epi64(_mm256_setzero_si256(), (U), (A), (B), (I))
341
342#define _mm256_shrdi_epi64(A, B, I) \
343  _mm256_mask_shrdi_epi64(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
344
345#define _mm_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \
346  (__m128i)__builtin_ia32_vpshrdq128_mask((__v2di)(A), \
347                                          (__v2di)(B), \
348                                          (int)(I), \
349                                          (__v2di)(S), \
350                                          (__mmask8)(U)); })
351
352#define _mm_maskz_shrdi_epi64(U, A, B, I) \
353  _mm_mask_shrdi_epi64(_mm_setzero_si128(), (U), (A), (B), (I))
354
355#define _mm_shrdi_epi64(A, B, I) \
356  _mm_mask_shrdi_epi64(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
357
358#define _mm256_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \
359  (__m256i)__builtin_ia32_vpshrdd256_mask((__v8si)(A), \
360                                          (__v8si)(B), \
361                                          (int)(I), \
362                                          (__v8si)(S), \
363                                          (__mmask8)(U)); })
364
365#define _mm256_maskz_shrdi_epi32(U, A, B, I) \
366  _mm256_mask_shrdi_epi32(_mm256_setzero_si256(), (U), (A), (B), (I))
367
368#define _mm256_shrdi_epi32(A, B, I) \
369  _mm256_mask_shrdi_epi32(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
370
371#define _mm_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \
372  (__m128i)__builtin_ia32_vpshrdd128_mask((__v4si)(A), \
373                                          (__v4si)(B), \
374                                          (int)(I), \
375                                          (__v4si)(S), \
376                                          (__mmask8)(U)); })
377
378#define _mm_maskz_shrdi_epi32(U, A, B, I) \
379  _mm_mask_shrdi_epi32(_mm_setzero_si128(), (U), (A), (B), (I))
380
381#define _mm_shrdi_epi32(A, B, I) \
382  _mm_mask_shrdi_epi32(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
383
384#define _mm256_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \
385  (__m256i)__builtin_ia32_vpshrdw256_mask((__v16hi)(A), \
386                                          (__v16hi)(B), \
387                                          (int)(I), \
388                                          (__v16hi)(S), \
389                                          (__mmask16)(U)); })
390
391#define _mm256_maskz_shrdi_epi16(U, A, B, I) \
392  _mm256_mask_shrdi_epi16(_mm256_setzero_si256(), (U), (A), (B), (I))
393
394#define _mm256_shrdi_epi16(A, B, I) \
395  _mm256_mask_shrdi_epi16(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I))
396
397#define _mm_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \
398  (__m128i)__builtin_ia32_vpshrdw128_mask((__v8hi)(A), \
399                                          (__v8hi)(B), \
400                                          (int)(I), \
401                                          (__v8hi)(S), \
402                                          (__mmask8)(U)); })
403
404#define _mm_maskz_shrdi_epi16(U, A, B, I) \
405  _mm_mask_shrdi_epi16(_mm_setzero_si128(), (U), (A), (B), (I))
406
407#define _mm_shrdi_epi16(A, B, I) \
408  _mm_mask_shrdi_epi16(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I))
409
410static __inline__ __m256i __DEFAULT_FN_ATTRS
411_mm256_mask_shldv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
412{
413  return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S,
414              (__v4di) __A,
415              (__v4di) __B,
416              __U);
417}
418
419static __inline__ __m256i __DEFAULT_FN_ATTRS
420_mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
421{
422  return (__m256i) __builtin_ia32_vpshldvq256_maskz ((__v4di) __S,
423              (__v4di) __A,
424              (__v4di) __B,
425              __U);
426}
427
428static __inline__ __m256i __DEFAULT_FN_ATTRS
429_mm256_shldv_epi64(__m256i __S, __m256i __A, __m256i __B)
430{
431  return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S,
432              (__v4di) __A,
433              (__v4di) __B,
434              (__mmask8) -1);
435}
436
437static __inline__ __m128i __DEFAULT_FN_ATTRS
438_mm_mask_shldv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
439{
440  return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S,
441              (__v2di) __A,
442              (__v2di) __B,
443              __U);
444}
445
446static __inline__ __m128i __DEFAULT_FN_ATTRS
447_mm_maskz_shldv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
448{
449  return (__m128i) __builtin_ia32_vpshldvq128_maskz ((__v2di) __S,
450              (__v2di) __A,
451              (__v2di) __B,
452              __U);
453}
454
455static __inline__ __m128i __DEFAULT_FN_ATTRS
456_mm_shldv_epi64(__m128i __S, __m128i __A, __m128i __B)
457{
458  return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S,
459              (__v2di) __A,
460              (__v2di) __B,
461              (__mmask8) -1);
462}
463
464static __inline__ __m256i __DEFAULT_FN_ATTRS
465_mm256_mask_shldv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
466{
467  return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S,
468              (__v8si) __A,
469              (__v8si) __B,
470              __U);
471}
472
473static __inline__ __m256i __DEFAULT_FN_ATTRS
474_mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
475{
476  return (__m256i) __builtin_ia32_vpshldvd256_maskz ((__v8si) __S,
477              (__v8si) __A,
478              (__v8si) __B,
479              __U);
480}
481
482static __inline__ __m256i __DEFAULT_FN_ATTRS
483_mm256_shldv_epi32(__m256i __S, __m256i __A, __m256i __B)
484{
485  return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S,
486              (__v8si) __A,
487              (__v8si) __B,
488              (__mmask8) -1);
489}
490
491static __inline__ __m128i __DEFAULT_FN_ATTRS
492_mm_mask_shldv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
493{
494  return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S,
495              (__v4si) __A,
496              (__v4si) __B,
497              __U);
498}
499
500static __inline__ __m128i __DEFAULT_FN_ATTRS
501_mm_maskz_shldv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
502{
503  return (__m128i) __builtin_ia32_vpshldvd128_maskz ((__v4si) __S,
504              (__v4si) __A,
505              (__v4si) __B,
506              __U);
507}
508
509static __inline__ __m128i __DEFAULT_FN_ATTRS
510_mm_shldv_epi32(__m128i __S, __m128i __A, __m128i __B)
511{
512  return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S,
513              (__v4si) __A,
514              (__v4si) __B,
515              (__mmask8) -1);
516}
517
518static __inline__ __m256i __DEFAULT_FN_ATTRS
519_mm256_mask_shldv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B)
520{
521  return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S,
522              (__v16hi) __A,
523              (__v16hi) __B,
524              __U);
525}
526
527static __inline__ __m256i __DEFAULT_FN_ATTRS
528_mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B)
529{
530  return (__m256i) __builtin_ia32_vpshldvw256_maskz ((__v16hi) __S,
531              (__v16hi) __A,
532              (__v16hi) __B,
533              __U);
534}
535
536static __inline__ __m256i __DEFAULT_FN_ATTRS
537_mm256_shldv_epi16(__m256i __S, __m256i __A, __m256i __B)
538{
539  return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S,
540              (__v16hi) __A,
541              (__v16hi) __B,
542              (__mmask16) -1);
543}
544
545static __inline__ __m128i __DEFAULT_FN_ATTRS
546_mm_mask_shldv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
547{
548  return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S,
549              (__v8hi) __A,
550              (__v8hi) __B,
551              __U);
552}
553
554static __inline__ __m128i __DEFAULT_FN_ATTRS
555_mm_maskz_shldv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
556{
557  return (__m128i) __builtin_ia32_vpshldvw128_maskz ((__v8hi) __S,
558              (__v8hi) __A,
559              (__v8hi) __B,
560              __U);
561}
562
563static __inline__ __m128i __DEFAULT_FN_ATTRS
564_mm_shldv_epi16(__m128i __S, __m128i __A, __m128i __B)
565{
566  return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S,
567              (__v8hi) __A,
568              (__v8hi) __B,
569              (__mmask8) -1);
570}
571
572static __inline__ __m256i __DEFAULT_FN_ATTRS
573_mm256_mask_shrdv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
574{
575  return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S,
576              (__v4di) __A,
577              (__v4di) __B,
578              __U);
579}
580
581static __inline__ __m256i __DEFAULT_FN_ATTRS
582_mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
583{
584  return (__m256i) __builtin_ia32_vpshrdvq256_maskz ((__v4di) __S,
585              (__v4di) __A,
586              (__v4di) __B,
587              __U);
588}
589
590static __inline__ __m256i __DEFAULT_FN_ATTRS
591_mm256_shrdv_epi64(__m256i __S, __m256i __A, __m256i __B)
592{
593  return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S,
594              (__v4di) __A,
595              (__v4di) __B,
596              (__mmask8) -1);
597}
598
599static __inline__ __m128i __DEFAULT_FN_ATTRS
600_mm_mask_shrdv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
601{
602  return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S,
603              (__v2di) __A,
604              (__v2di) __B,
605              __U);
606}
607
608static __inline__ __m128i __DEFAULT_FN_ATTRS
609_mm_maskz_shrdv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
610{
611  return (__m128i) __builtin_ia32_vpshrdvq128_maskz ((__v2di) __S,
612              (__v2di) __A,
613              (__v2di) __B,
614              __U);
615}
616
617static __inline__ __m128i __DEFAULT_FN_ATTRS
618_mm_shrdv_epi64(__m128i __S, __m128i __A, __m128i __B)
619{
620  return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S,
621              (__v2di) __A,
622              (__v2di) __B,
623              (__mmask8) -1);
624}
625
626static __inline__ __m256i __DEFAULT_FN_ATTRS
627_mm256_mask_shrdv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
628{
629  return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S,
630              (__v8si) __A,
631              (__v8si) __B,
632              __U);
633}
634
635static __inline__ __m256i __DEFAULT_FN_ATTRS
636_mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
637{
638  return (__m256i) __builtin_ia32_vpshrdvd256_maskz ((__v8si) __S,
639              (__v8si) __A,
640              (__v8si) __B,
641              __U);
642}
643
644static __inline__ __m256i __DEFAULT_FN_ATTRS
645_mm256_shrdv_epi32(__m256i __S, __m256i __A, __m256i __B)
646{
647  return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S,
648              (__v8si) __A,
649              (__v8si) __B,
650              (__mmask8) -1);
651}
652
653static __inline__ __m128i __DEFAULT_FN_ATTRS
654_mm_mask_shrdv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
655{
656  return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S,
657              (__v4si) __A,
658              (__v4si) __B,
659              __U);
660}
661
662static __inline__ __m128i __DEFAULT_FN_ATTRS
663_mm_maskz_shrdv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
664{
665  return (__m128i) __builtin_ia32_vpshrdvd128_maskz ((__v4si) __S,
666              (__v4si) __A,
667              (__v4si) __B,
668              __U);
669}
670
671static __inline__ __m128i __DEFAULT_FN_ATTRS
672_mm_shrdv_epi32(__m128i __S, __m128i __A, __m128i __B)
673{
674  return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S,
675              (__v4si) __A,
676              (__v4si) __B,
677              (__mmask8) -1);
678}
679
680static __inline__ __m256i __DEFAULT_FN_ATTRS
681_mm256_mask_shrdv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B)
682{
683  return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S,
684              (__v16hi) __A,
685              (__v16hi) __B,
686              __U);
687}
688
689static __inline__ __m256i __DEFAULT_FN_ATTRS
690_mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B)
691{
692  return (__m256i) __builtin_ia32_vpshrdvw256_maskz ((__v16hi) __S,
693              (__v16hi) __A,
694              (__v16hi) __B,
695              __U);
696}
697
698static __inline__ __m256i __DEFAULT_FN_ATTRS
699_mm256_shrdv_epi16(__m256i __S, __m256i __A, __m256i __B)
700{
701  return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S,
702              (__v16hi) __A,
703              (__v16hi) __B,
704              (__mmask16) -1);
705}
706
707static __inline__ __m128i __DEFAULT_FN_ATTRS
708_mm_mask_shrdv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
709{
710  return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S,
711              (__v8hi) __A,
712              (__v8hi) __B,
713              __U);
714}
715
716static __inline__ __m128i __DEFAULT_FN_ATTRS
717_mm_maskz_shrdv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
718{
719  return (__m128i) __builtin_ia32_vpshrdvw128_maskz ((__v8hi) __S,
720              (__v8hi) __A,
721              (__v8hi) __B,
722              __U);
723}
724
725static __inline__ __m128i __DEFAULT_FN_ATTRS
726_mm_shrdv_epi16(__m128i __S, __m128i __A, __m128i __B)
727{
728  return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S,
729              (__v8hi) __A,
730              (__v8hi) __B,
731              (__mmask8) -1);
732}
733
734
735#undef __DEFAULT_FN_ATTRS
736
737#endif
738