avx512vbmivlintrin.h revision 344779
1139749Simp/*===------------- avx512vbmivlintrin.h - VBMI intrinsics ------------------===
2113584Ssimokawa *
3103285Sikob *
4103285Sikob * Permission is hereby granted, free of charge, to any person obtaining a copy
5103285Sikob * of this software and associated documentation files (the "Software"), to deal
6103285Sikob * in the Software without restriction, including without limitation the rights
7103285Sikob * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8103285Sikob * copies of the Software, and to permit persons to whom the Software is
9103285Sikob * furnished to do so, subject to the following conditions:
10103285Sikob *
11103285Sikob * The above copyright notice and this permission notice shall be included in
12103285Sikob * all copies or substantial portions of the Software.
13103285Sikob *
14103285Sikob * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15103285Sikob * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16103285Sikob * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17103285Sikob * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18103285Sikob * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19103285Sikob * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20103285Sikob * THE SOFTWARE.
21103285Sikob *
22103285Sikob *===-----------------------------------------------------------------------===
23103285Sikob */
24103285Sikob#ifndef __IMMINTRIN_H
25103285Sikob#error "Never use <avx512vbmivlintrin.h> directly; include <immintrin.h> instead."
26103285Sikob#endif
27103285Sikob
28103285Sikob#ifndef __VBMIVLINTRIN_H
29103285Sikob#define __VBMIVLINTRIN_H
30103285Sikob
31103285Sikob/* Define the default attributes for the functions in this file. */
32103285Sikob#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"), __min_vector_width__(128)))
33103285Sikob#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"), __min_vector_width__(256)))
34103285Sikob
35227843Smarius
36227843Smariusstatic __inline__ __m128i __DEFAULT_FN_ATTRS128
37227843Smarius_mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B)
38103285Sikob{
39103285Sikob  return (__m128i)__builtin_ia32_vpermi2varqi128((__v16qi)__A,
40103285Sikob                                                 (__v16qi)__I,
41103285Sikob                                                 (__v16qi)__B);
42193066Sjamie}
43103285Sikob
44129879Sphkstatic __inline__ __m128i __DEFAULT_FN_ATTRS128
45103285Sikob_mm_mask_permutex2var_epi8(__m128i __A, __mmask16 __U, __m128i __I,
46103285Sikob                           __m128i __B)
47103285Sikob{
48169806Ssimokawa  return (__m128i)__builtin_ia32_selectb_128(__U,
49103285Sikob                                  (__v16qi)_mm_permutex2var_epi8(__A, __I, __B),
50170374Ssimokawa                                  (__v16qi)__A);
51170374Ssimokawa}
52127468Ssimokawa
53117067Ssimokawastatic __inline__ __m128i __DEFAULT_FN_ATTRS128
54117067Ssimokawa_mm_mask2_permutex2var_epi8(__m128i __A, __m128i __I, __mmask16 __U,
55103285Sikob                            __m128i __B)
56103285Sikob{
57113584Ssimokawa  return (__m128i)__builtin_ia32_selectb_128(__U,
58103285Sikob                                  (__v16qi)_mm_permutex2var_epi8(__A, __I, __B),
59127468Ssimokawa                                  (__v16qi)__I);
60127468Ssimokawa}
61127468Ssimokawa
62127468Ssimokawastatic __inline__ __m128i __DEFAULT_FN_ATTRS128
63127468Ssimokawa_mm_maskz_permutex2var_epi8(__mmask16 __U, __m128i __A, __m128i __I,
64127468Ssimokawa                            __m128i __B)
65127468Ssimokawa{
66103285Sikob  return (__m128i)__builtin_ia32_selectb_128(__U,
67103285Sikob                                  (__v16qi)_mm_permutex2var_epi8(__A, __I, __B),
68110072Ssimokawa                                  (__v16qi)_mm_setzero_si128());
69103285Sikob}
70103285Sikob
71127468Ssimokawastatic __inline__ __m256i __DEFAULT_FN_ATTRS256
72103285Sikob_mm256_permutex2var_epi8(__m256i __A, __m256i __I, __m256i __B)
73116376Ssimokawa{
74116376Ssimokawa  return (__m256i)__builtin_ia32_vpermi2varqi256((__v32qi)__A, (__v32qi)__I,
75116376Ssimokawa                                                 (__v32qi)__B);
76116376Ssimokawa}
77116376Ssimokawa
78116376Ssimokawastatic __inline__ __m256i __DEFAULT_FN_ATTRS256
79116376Ssimokawa_mm256_mask_permutex2var_epi8(__m256i __A, __mmask32 __U, __m256i __I,
80188704Ssbruno                              __m256i __B)
81103285Sikob{
82108281Ssimokawa  return (__m256i)__builtin_ia32_selectb_256(__U,
83109736Ssimokawa                               (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B),
84109736Ssimokawa                               (__v32qi)__A);
85109736Ssimokawa}
86120850Ssimokawa
87120850Ssimokawastatic __inline__ __m256i __DEFAULT_FN_ATTRS256
88103285Sikob_mm256_mask2_permutex2var_epi8(__m256i __A, __m256i __I, __mmask32 __U,
89110195Ssimokawa                               __m256i __B)
90110269Ssimokawa{
91110195Ssimokawa  return (__m256i)__builtin_ia32_selectb_256(__U,
92103285Sikob                               (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B),
93103285Sikob                               (__v32qi)__I);
94103285Sikob}
95103285Sikob
96125238Ssimokawastatic __inline__ __m256i __DEFAULT_FN_ATTRS256
97125238Ssimokawa_mm256_maskz_permutex2var_epi8(__mmask32 __U, __m256i __A, __m256i __I,
98124169Ssimokawa                               __m256i __B)
99124169Ssimokawa{
100124169Ssimokawa  return (__m256i)__builtin_ia32_selectb_256(__U,
101170374Ssimokawa                               (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B),
102103285Sikob                               (__v32qi)_mm256_setzero_si256());
103124169Ssimokawa}
104103285Sikob
105212413Savgstatic __inline__ __m128i __DEFAULT_FN_ATTRS128
106124169Ssimokawa_mm_permutexvar_epi8 (__m128i __A, __m128i __B)
107124169Ssimokawa{
108124169Ssimokawa  return (__m128i)__builtin_ia32_permvarqi128((__v16qi)__B, (__v16qi)__A);
109124169Ssimokawa}
110124169Ssimokawa
111124169Ssimokawastatic __inline__ __m128i __DEFAULT_FN_ATTRS128
112169806Ssimokawa_mm_maskz_permutexvar_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
113106543Ssimokawa{
114124169Ssimokawa  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
115106543Ssimokawa                                        (__v16qi)_mm_permutexvar_epi8(__A, __B),
116124169Ssimokawa                                        (__v16qi)_mm_setzero_si128());
117170374Ssimokawa}
118103285Sikob
119103285Sikobstatic __inline__ __m128i __DEFAULT_FN_ATTRS128
120103285Sikob_mm_mask_permutexvar_epi8 (__m128i __W, __mmask16 __M, __m128i __A,
121125238Ssimokawa          __m128i __B)
122125238Ssimokawa{
123103285Sikob  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
124103285Sikob                                        (__v16qi)_mm_permutexvar_epi8(__A, __B),
125108642Ssimokawa                                        (__v16qi)__W);
126116978Ssimokawa}
127103285Sikob
128103285Sikobstatic __inline__ __m256i __DEFAULT_FN_ATTRS256
129103285Sikob_mm256_permutexvar_epi8 (__m256i __A, __m256i __B)
130103285Sikob{
131103285Sikob  return (__m256i)__builtin_ia32_permvarqi256((__v32qi) __B, (__v32qi) __A);
132227843Smarius}
133103285Sikob
134124251Ssimokawastatic __inline__ __m256i __DEFAULT_FN_ATTRS256
135124251Ssimokawa_mm256_maskz_permutexvar_epi8 (__mmask32 __M, __m256i __A,
136124251Ssimokawa        __m256i __B)
137124251Ssimokawa{
138103285Sikob  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
139124251Ssimokawa                                     (__v32qi)_mm256_permutexvar_epi8(__A, __B),
140124251Ssimokawa                                     (__v32qi)_mm256_setzero_si256());
141124251Ssimokawa}
142124251Ssimokawa
143124251Ssimokawastatic __inline__ __m256i __DEFAULT_FN_ATTRS256
144124251Ssimokawa_mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
145124251Ssimokawa             __m256i __B)
146114909Ssimokawa{
147114909Ssimokawa  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
148114909Ssimokawa                                     (__v32qi)_mm256_permutexvar_epi8(__A, __B),
149114909Ssimokawa                                     (__v32qi)__W);
150106813Ssimokawa}
151103285Sikob
152103285Sikobstatic __inline__ __m128i __DEFAULT_FN_ATTRS128
153103285Sikob_mm_multishift_epi64_epi8(__m128i __X, __m128i __Y)
154103285Sikob{
155103285Sikob  return (__m128i)__builtin_ia32_vpmultishiftqb128((__v16qi)__X, (__v16qi)__Y);
156103285Sikob}
157103285Sikob
158110072Ssimokawastatic __inline__ __m128i __DEFAULT_FN_ATTRS128
159103285Sikob_mm_mask_multishift_epi64_epi8(__m128i __W, __mmask16 __M, __m128i __X,
160106810Ssimokawa                               __m128i __Y)
161110072Ssimokawa{
162103285Sikob  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
163103285Sikob                                   (__v16qi)_mm_multishift_epi64_epi8(__X, __Y),
164110072Ssimokawa                                   (__v16qi)__W);
165110072Ssimokawa}
166110072Ssimokawa
167110193Ssimokawastatic __inline__ __m128i __DEFAULT_FN_ATTRS128
168120660Ssimokawa_mm_maskz_multishift_epi64_epi8(__mmask16 __M, __m128i __X, __m128i __Y)
169103285Sikob{
170110072Ssimokawa  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
171110072Ssimokawa                                   (__v16qi)_mm_multishift_epi64_epi8(__X, __Y),
172106810Ssimokawa                                   (__v16qi)_mm_setzero_si128());
173103285Sikob}
174106813Ssimokawa
175103285Sikobstatic __inline__ __m256i __DEFAULT_FN_ATTRS256
176110072Ssimokawa_mm256_multishift_epi64_epi8(__m256i __X, __m256i __Y)
177110072Ssimokawa{
178110072Ssimokawa  return (__m256i)__builtin_ia32_vpmultishiftqb256((__v32qi)__X, (__v32qi)__Y);
179110582Ssimokawa}
180110072Ssimokawa
181110072Ssimokawastatic __inline__ __m256i __DEFAULT_FN_ATTRS256
182110072Ssimokawa_mm256_mask_multishift_epi64_epi8(__m256i __W, __mmask32 __M, __m256i __X,
183110072Ssimokawa                                  __m256i __Y)
184110072Ssimokawa{
185170374Ssimokawa  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
186110193Ssimokawa                                (__v32qi)_mm256_multishift_epi64_epi8(__X, __Y),
187110582Ssimokawa                                (__v32qi)__W);
188110072Ssimokawa}
189170374Ssimokawa
190110072Ssimokawastatic __inline__ __m256i __DEFAULT_FN_ATTRS256
191110072Ssimokawa_mm256_maskz_multishift_epi64_epi8(__mmask32 __M, __m256i __X, __m256i __Y)
192110072Ssimokawa{
193110072Ssimokawa  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
194110072Ssimokawa                                (__v32qi)_mm256_multishift_epi64_epi8(__X, __Y),
195110072Ssimokawa                                (__v32qi)_mm256_setzero_si256());
196110072Ssimokawa}
197110072Ssimokawa
198103285Sikob
199103285Sikob#undef __DEFAULT_FN_ATTRS128
200103285Sikob#undef __DEFAULT_FN_ATTRS256
201103285Sikob
202103285Sikob#endif
203103285Sikob