1/*===------------- avx512vbmivlintrin.h - VBMI intrinsics ------------------===
2 *
3 *
4 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 * See https://llvm.org/LICENSE.txt for license information.
6 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 *
8 *===-----------------------------------------------------------------------===
9 */
10#ifndef __IMMINTRIN_H
11#error "Never use <avx512vbmivlintrin.h> directly; include <immintrin.h> instead."
12#endif
13
14#ifndef __VBMIVLINTRIN_H
15#define __VBMIVLINTRIN_H
16
17/* Define the default attributes for the functions in this file. */
18#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"), __min_vector_width__(128)))
19#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"), __min_vector_width__(256)))
20
21
22static __inline__ __m128i __DEFAULT_FN_ATTRS128
23_mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B)
24{
25  return (__m128i)__builtin_ia32_vpermi2varqi128((__v16qi)__A,
26                                                 (__v16qi)__I,
27                                                 (__v16qi)__B);
28}
29
30static __inline__ __m128i __DEFAULT_FN_ATTRS128
31_mm_mask_permutex2var_epi8(__m128i __A, __mmask16 __U, __m128i __I,
32                           __m128i __B)
33{
34  return (__m128i)__builtin_ia32_selectb_128(__U,
35                                  (__v16qi)_mm_permutex2var_epi8(__A, __I, __B),
36                                  (__v16qi)__A);
37}
38
39static __inline__ __m128i __DEFAULT_FN_ATTRS128
40_mm_mask2_permutex2var_epi8(__m128i __A, __m128i __I, __mmask16 __U,
41                            __m128i __B)
42{
43  return (__m128i)__builtin_ia32_selectb_128(__U,
44                                  (__v16qi)_mm_permutex2var_epi8(__A, __I, __B),
45                                  (__v16qi)__I);
46}
47
48static __inline__ __m128i __DEFAULT_FN_ATTRS128
49_mm_maskz_permutex2var_epi8(__mmask16 __U, __m128i __A, __m128i __I,
50                            __m128i __B)
51{
52  return (__m128i)__builtin_ia32_selectb_128(__U,
53                                  (__v16qi)_mm_permutex2var_epi8(__A, __I, __B),
54                                  (__v16qi)_mm_setzero_si128());
55}
56
57static __inline__ __m256i __DEFAULT_FN_ATTRS256
58_mm256_permutex2var_epi8(__m256i __A, __m256i __I, __m256i __B)
59{
60  return (__m256i)__builtin_ia32_vpermi2varqi256((__v32qi)__A, (__v32qi)__I,
61                                                 (__v32qi)__B);
62}
63
64static __inline__ __m256i __DEFAULT_FN_ATTRS256
65_mm256_mask_permutex2var_epi8(__m256i __A, __mmask32 __U, __m256i __I,
66                              __m256i __B)
67{
68  return (__m256i)__builtin_ia32_selectb_256(__U,
69                               (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B),
70                               (__v32qi)__A);
71}
72
73static __inline__ __m256i __DEFAULT_FN_ATTRS256
74_mm256_mask2_permutex2var_epi8(__m256i __A, __m256i __I, __mmask32 __U,
75                               __m256i __B)
76{
77  return (__m256i)__builtin_ia32_selectb_256(__U,
78                               (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B),
79                               (__v32qi)__I);
80}
81
82static __inline__ __m256i __DEFAULT_FN_ATTRS256
83_mm256_maskz_permutex2var_epi8(__mmask32 __U, __m256i __A, __m256i __I,
84                               __m256i __B)
85{
86  return (__m256i)__builtin_ia32_selectb_256(__U,
87                               (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B),
88                               (__v32qi)_mm256_setzero_si256());
89}
90
91static __inline__ __m128i __DEFAULT_FN_ATTRS128
92_mm_permutexvar_epi8 (__m128i __A, __m128i __B)
93{
94  return (__m128i)__builtin_ia32_permvarqi128((__v16qi)__B, (__v16qi)__A);
95}
96
97static __inline__ __m128i __DEFAULT_FN_ATTRS128
98_mm_maskz_permutexvar_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
99{
100  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
101                                        (__v16qi)_mm_permutexvar_epi8(__A, __B),
102                                        (__v16qi)_mm_setzero_si128());
103}
104
105static __inline__ __m128i __DEFAULT_FN_ATTRS128
106_mm_mask_permutexvar_epi8 (__m128i __W, __mmask16 __M, __m128i __A,
107          __m128i __B)
108{
109  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
110                                        (__v16qi)_mm_permutexvar_epi8(__A, __B),
111                                        (__v16qi)__W);
112}
113
114static __inline__ __m256i __DEFAULT_FN_ATTRS256
115_mm256_permutexvar_epi8 (__m256i __A, __m256i __B)
116{
117  return (__m256i)__builtin_ia32_permvarqi256((__v32qi) __B, (__v32qi) __A);
118}
119
120static __inline__ __m256i __DEFAULT_FN_ATTRS256
121_mm256_maskz_permutexvar_epi8 (__mmask32 __M, __m256i __A,
122        __m256i __B)
123{
124  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
125                                     (__v32qi)_mm256_permutexvar_epi8(__A, __B),
126                                     (__v32qi)_mm256_setzero_si256());
127}
128
129static __inline__ __m256i __DEFAULT_FN_ATTRS256
130_mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
131             __m256i __B)
132{
133  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
134                                     (__v32qi)_mm256_permutexvar_epi8(__A, __B),
135                                     (__v32qi)__W);
136}
137
138static __inline__ __m128i __DEFAULT_FN_ATTRS128
139_mm_multishift_epi64_epi8(__m128i __X, __m128i __Y)
140{
141  return (__m128i)__builtin_ia32_vpmultishiftqb128((__v16qi)__X, (__v16qi)__Y);
142}
143
144static __inline__ __m128i __DEFAULT_FN_ATTRS128
145_mm_mask_multishift_epi64_epi8(__m128i __W, __mmask16 __M, __m128i __X,
146                               __m128i __Y)
147{
148  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
149                                   (__v16qi)_mm_multishift_epi64_epi8(__X, __Y),
150                                   (__v16qi)__W);
151}
152
153static __inline__ __m128i __DEFAULT_FN_ATTRS128
154_mm_maskz_multishift_epi64_epi8(__mmask16 __M, __m128i __X, __m128i __Y)
155{
156  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
157                                   (__v16qi)_mm_multishift_epi64_epi8(__X, __Y),
158                                   (__v16qi)_mm_setzero_si128());
159}
160
161static __inline__ __m256i __DEFAULT_FN_ATTRS256
162_mm256_multishift_epi64_epi8(__m256i __X, __m256i __Y)
163{
164  return (__m256i)__builtin_ia32_vpmultishiftqb256((__v32qi)__X, (__v32qi)__Y);
165}
166
167static __inline__ __m256i __DEFAULT_FN_ATTRS256
168_mm256_mask_multishift_epi64_epi8(__m256i __W, __mmask32 __M, __m256i __X,
169                                  __m256i __Y)
170{
171  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
172                                (__v32qi)_mm256_multishift_epi64_epi8(__X, __Y),
173                                (__v32qi)__W);
174}
175
176static __inline__ __m256i __DEFAULT_FN_ATTRS256
177_mm256_maskz_multishift_epi64_epi8(__mmask32 __M, __m256i __X, __m256i __Y)
178{
179  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
180                                (__v32qi)_mm256_multishift_epi64_epi8(__X, __Y),
181                                (__v32qi)_mm256_setzero_si256());
182}
183
184
185#undef __DEFAULT_FN_ATTRS128
186#undef __DEFAULT_FN_ATTRS256
187
188#endif
189