avx512vbmivlintrin.h revision 344779
1303233Sdim/*===------------- avx512vbmivlintrin.h - VBMI intrinsics ------------------===
2303233Sdim *
3303233Sdim *
4303233Sdim * Permission is hereby granted, free of charge, to any person obtaining a copy
5303233Sdim * of this software and associated documentation files (the "Software"), to deal
6303233Sdim * in the Software without restriction, including without limitation the rights
7303233Sdim * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8303233Sdim * copies of the Software, and to permit persons to whom the Software is
9303233Sdim * furnished to do so, subject to the following conditions:
10303233Sdim *
11303233Sdim * The above copyright notice and this permission notice shall be included in
12303233Sdim * all copies or substantial portions of the Software.
13303233Sdim *
14303233Sdim * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15303233Sdim * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16303233Sdim * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17303233Sdim * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18303233Sdim * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19303233Sdim * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20303233Sdim * THE SOFTWARE.
21303233Sdim *
22303233Sdim *===-----------------------------------------------------------------------===
23303233Sdim */
24303233Sdim#ifndef __IMMINTRIN_H
25303233Sdim#error "Never use <avx512vbmivlintrin.h> directly; include <immintrin.h> instead."
26303233Sdim#endif
27303233Sdim
28303233Sdim#ifndef __VBMIVLINTRIN_H
29303233Sdim#define __VBMIVLINTRIN_H
30303233Sdim
31303233Sdim/* Define the default attributes for the functions in this file. */
32341825Sdim#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"), __min_vector_width__(128)))
33341825Sdim#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"), __min_vector_width__(256)))
34303233Sdim
35303233Sdim
36341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128
37341825Sdim_mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B)
38303233Sdim{
39341825Sdim  return (__m128i)__builtin_ia32_vpermi2varqi128((__v16qi)__A,
40341825Sdim                                                 (__v16qi)__I,
41341825Sdim                                                 (__v16qi)__B);
42303233Sdim}
43303233Sdim
44341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128
45341825Sdim_mm_mask_permutex2var_epi8(__m128i __A, __mmask16 __U, __m128i __I,
46341825Sdim                           __m128i __B)
47303233Sdim{
48341825Sdim  return (__m128i)__builtin_ia32_selectb_128(__U,
49341825Sdim                                  (__v16qi)_mm_permutex2var_epi8(__A, __I, __B),
50341825Sdim                                  (__v16qi)__A);
51303233Sdim}
52303233Sdim
53341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128
54341825Sdim_mm_mask2_permutex2var_epi8(__m128i __A, __m128i __I, __mmask16 __U,
55341825Sdim                            __m128i __B)
56303233Sdim{
57341825Sdim  return (__m128i)__builtin_ia32_selectb_128(__U,
58341825Sdim                                  (__v16qi)_mm_permutex2var_epi8(__A, __I, __B),
59341825Sdim                                  (__v16qi)__I);
60303233Sdim}
61303233Sdim
62341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128
63341825Sdim_mm_maskz_permutex2var_epi8(__mmask16 __U, __m128i __A, __m128i __I,
64341825Sdim                            __m128i __B)
65303233Sdim{
66341825Sdim  return (__m128i)__builtin_ia32_selectb_128(__U,
67341825Sdim                                  (__v16qi)_mm_permutex2var_epi8(__A, __I, __B),
68341825Sdim                                  (__v16qi)_mm_setzero_si128());
69303233Sdim}
70303233Sdim
71341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256
72341825Sdim_mm256_permutex2var_epi8(__m256i __A, __m256i __I, __m256i __B)
73303233Sdim{
74341825Sdim  return (__m256i)__builtin_ia32_vpermi2varqi256((__v32qi)__A, (__v32qi)__I,
75341825Sdim                                                 (__v32qi)__B);
76303233Sdim}
77303233Sdim
78341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256
79341825Sdim_mm256_mask_permutex2var_epi8(__m256i __A, __mmask32 __U, __m256i __I,
80341825Sdim                              __m256i __B)
81303233Sdim{
82341825Sdim  return (__m256i)__builtin_ia32_selectb_256(__U,
83341825Sdim                               (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B),
84341825Sdim                               (__v32qi)__A);
85303233Sdim}
86303233Sdim
87341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256
88341825Sdim_mm256_mask2_permutex2var_epi8(__m256i __A, __m256i __I, __mmask32 __U,
89341825Sdim                               __m256i __B)
90303233Sdim{
91341825Sdim  return (__m256i)__builtin_ia32_selectb_256(__U,
92341825Sdim                               (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B),
93341825Sdim                               (__v32qi)__I);
94303233Sdim}
95303233Sdim
96341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256
97341825Sdim_mm256_maskz_permutex2var_epi8(__mmask32 __U, __m256i __A, __m256i __I,
98341825Sdim                               __m256i __B)
99303233Sdim{
100341825Sdim  return (__m256i)__builtin_ia32_selectb_256(__U,
101341825Sdim                               (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B),
102341825Sdim                               (__v32qi)_mm256_setzero_si256());
103303233Sdim}
104303233Sdim
105341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128
106303233Sdim_mm_permutexvar_epi8 (__m128i __A, __m128i __B)
107303233Sdim{
108341825Sdim  return (__m128i)__builtin_ia32_permvarqi128((__v16qi)__B, (__v16qi)__A);
109303233Sdim}
110303233Sdim
111341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128
112303233Sdim_mm_maskz_permutexvar_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
113303233Sdim{
114341825Sdim  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
115341825Sdim                                        (__v16qi)_mm_permutexvar_epi8(__A, __B),
116341825Sdim                                        (__v16qi)_mm_setzero_si128());
117303233Sdim}
118303233Sdim
119341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128
120303233Sdim_mm_mask_permutexvar_epi8 (__m128i __W, __mmask16 __M, __m128i __A,
121303233Sdim          __m128i __B)
122303233Sdim{
123341825Sdim  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
124341825Sdim                                        (__v16qi)_mm_permutexvar_epi8(__A, __B),
125341825Sdim                                        (__v16qi)__W);
126303233Sdim}
127303233Sdim
128341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256
129303233Sdim_mm256_permutexvar_epi8 (__m256i __A, __m256i __B)
130303233Sdim{
131341825Sdim  return (__m256i)__builtin_ia32_permvarqi256((__v32qi) __B, (__v32qi) __A);
132303233Sdim}
133303233Sdim
134341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256
135303233Sdim_mm256_maskz_permutexvar_epi8 (__mmask32 __M, __m256i __A,
136303233Sdim        __m256i __B)
137303233Sdim{
138341825Sdim  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
139341825Sdim                                     (__v32qi)_mm256_permutexvar_epi8(__A, __B),
140341825Sdim                                     (__v32qi)_mm256_setzero_si256());
141303233Sdim}
142303233Sdim
143341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256
144303233Sdim_mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
145303233Sdim             __m256i __B)
146303233Sdim{
147341825Sdim  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
148341825Sdim                                     (__v32qi)_mm256_permutexvar_epi8(__A, __B),
149341825Sdim                                     (__v32qi)__W);
150303233Sdim}
151303233Sdim
152341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128
153344779Sdim_mm_multishift_epi64_epi8(__m128i __X, __m128i __Y)
154303233Sdim{
155344779Sdim  return (__m128i)__builtin_ia32_vpmultishiftqb128((__v16qi)__X, (__v16qi)__Y);
156303233Sdim}
157303233Sdim
158341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128
159344779Sdim_mm_mask_multishift_epi64_epi8(__m128i __W, __mmask16 __M, __m128i __X,
160344779Sdim                               __m128i __Y)
161303233Sdim{
162344779Sdim  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
163344779Sdim                                   (__v16qi)_mm_multishift_epi64_epi8(__X, __Y),
164344779Sdim                                   (__v16qi)__W);
165303233Sdim}
166303233Sdim
167341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128
168344779Sdim_mm_maskz_multishift_epi64_epi8(__mmask16 __M, __m128i __X, __m128i __Y)
169303233Sdim{
170344779Sdim  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
171344779Sdim                                   (__v16qi)_mm_multishift_epi64_epi8(__X, __Y),
172344779Sdim                                   (__v16qi)_mm_setzero_si128());
173303233Sdim}
174303233Sdim
175341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256
176344779Sdim_mm256_multishift_epi64_epi8(__m256i __X, __m256i __Y)
177303233Sdim{
178344779Sdim  return (__m256i)__builtin_ia32_vpmultishiftqb256((__v32qi)__X, (__v32qi)__Y);
179303233Sdim}
180303233Sdim
181341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256
182344779Sdim_mm256_mask_multishift_epi64_epi8(__m256i __W, __mmask32 __M, __m256i __X,
183344779Sdim                                  __m256i __Y)
184303233Sdim{
185344779Sdim  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
186344779Sdim                                (__v32qi)_mm256_multishift_epi64_epi8(__X, __Y),
187344779Sdim                                (__v32qi)__W);
188303233Sdim}
189303233Sdim
190341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256
191344779Sdim_mm256_maskz_multishift_epi64_epi8(__mmask32 __M, __m256i __X, __m256i __Y)
192303233Sdim{
193344779Sdim  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
194344779Sdim                                (__v32qi)_mm256_multishift_epi64_epi8(__X, __Y),
195344779Sdim                                (__v32qi)_mm256_setzero_si256());
196303233Sdim}
197303233Sdim
198303233Sdim
199341825Sdim#undef __DEFAULT_FN_ATTRS128
200341825Sdim#undef __DEFAULT_FN_ATTRS256
201303233Sdim
202303233Sdim#endif
203