avx512vlcdintrin.h revision 341825
1/*===---- avx512vlcdintrin.h - AVX512VL and AVX512CD intrinsics ------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23#ifndef __IMMINTRIN_H
24#error "Never use <avx512vlcdintrin.h> directly; include <immintrin.h> instead."
25#endif
26
27#ifndef __AVX512VLCDINTRIN_H
28#define __AVX512VLCDINTRIN_H
29
30/* Define the default attributes for the functions in this file. */
31#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd"), __min_vector_width__(128)))
32#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd"), __min_vector_width__(256)))
33
34
35static __inline__ __m128i __DEFAULT_FN_ATTRS128
36_mm_broadcastmb_epi64 (__mmask8 __A)
37{
38  return (__m128i) _mm_set1_epi64x((long long) __A);
39}
40
41static __inline__ __m256i __DEFAULT_FN_ATTRS256
42_mm256_broadcastmb_epi64 (__mmask8 __A)
43{
44  return (__m256i) _mm256_set1_epi64x((long long)__A);
45}
46
47static __inline__ __m128i __DEFAULT_FN_ATTRS128
48_mm_broadcastmw_epi32 (__mmask16 __A)
49{
50  return (__m128i) _mm_set1_epi32((int)__A);
51}
52
53static __inline__ __m256i __DEFAULT_FN_ATTRS256
54_mm256_broadcastmw_epi32 (__mmask16 __A)
55{
56  return (__m256i) _mm256_set1_epi32((int)__A);
57}
58
59
60static __inline__ __m128i __DEFAULT_FN_ATTRS128
61_mm_conflict_epi64 (__m128i __A)
62{
63  return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
64               (__v2di) _mm_undefined_si128 (),
65               (__mmask8) -1);
66}
67
68static __inline__ __m128i __DEFAULT_FN_ATTRS128
69_mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
70{
71  return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
72               (__v2di) __W,
73               (__mmask8) __U);
74}
75
76static __inline__ __m128i __DEFAULT_FN_ATTRS128
77_mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A)
78{
79  return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
80               (__v2di)
81               _mm_setzero_si128 (),
82               (__mmask8) __U);
83}
84
85static __inline__ __m256i __DEFAULT_FN_ATTRS256
86_mm256_conflict_epi64 (__m256i __A)
87{
88  return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
89               (__v4di)  _mm256_undefined_si256 (),
90               (__mmask8) -1);
91}
92
93static __inline__ __m256i __DEFAULT_FN_ATTRS256
94_mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
95{
96  return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
97               (__v4di) __W,
98               (__mmask8) __U);
99}
100
101static __inline__ __m256i __DEFAULT_FN_ATTRS256
102_mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A)
103{
104  return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
105               (__v4di) _mm256_setzero_si256 (),
106               (__mmask8) __U);
107}
108
109static __inline__ __m128i __DEFAULT_FN_ATTRS128
110_mm_conflict_epi32 (__m128i __A)
111{
112  return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
113               (__v4si) _mm_undefined_si128 (),
114               (__mmask8) -1);
115}
116
117static __inline__ __m128i __DEFAULT_FN_ATTRS128
118_mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
119{
120  return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
121               (__v4si) __W,
122               (__mmask8) __U);
123}
124
125static __inline__ __m128i __DEFAULT_FN_ATTRS128
126_mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A)
127{
128  return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
129               (__v4si) _mm_setzero_si128 (),
130               (__mmask8) __U);
131}
132
133static __inline__ __m256i __DEFAULT_FN_ATTRS256
134_mm256_conflict_epi32 (__m256i __A)
135{
136  return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
137               (__v8si) _mm256_undefined_si256 (),
138               (__mmask8) -1);
139}
140
141static __inline__ __m256i __DEFAULT_FN_ATTRS256
142_mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
143{
144  return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
145               (__v8si) __W,
146               (__mmask8) __U);
147}
148
149static __inline__ __m256i __DEFAULT_FN_ATTRS256
150_mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A)
151{
152  return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
153               (__v8si)
154               _mm256_setzero_si256 (),
155               (__mmask8) __U);
156}
157
158static __inline__ __m128i __DEFAULT_FN_ATTRS128
159_mm_lzcnt_epi32 (__m128i __A)
160{
161  return (__m128i) __builtin_ia32_vplzcntd_128 ((__v4si) __A);
162}
163
164static __inline__ __m128i __DEFAULT_FN_ATTRS128
165_mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
166{
167  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
168                                             (__v4si)_mm_lzcnt_epi32(__A),
169                                             (__v4si)__W);
170}
171
172static __inline__ __m128i __DEFAULT_FN_ATTRS128
173_mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A)
174{
175  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
176                                             (__v4si)_mm_lzcnt_epi32(__A),
177                                             (__v4si)_mm_setzero_si128());
178}
179
180static __inline__ __m256i __DEFAULT_FN_ATTRS256
181_mm256_lzcnt_epi32 (__m256i __A)
182{
183  return (__m256i) __builtin_ia32_vplzcntd_256 ((__v8si) __A);
184}
185
186static __inline__ __m256i __DEFAULT_FN_ATTRS256
187_mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
188{
189  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
190                                             (__v8si)_mm256_lzcnt_epi32(__A),
191                                             (__v8si)__W);
192}
193
194static __inline__ __m256i __DEFAULT_FN_ATTRS256
195_mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A)
196{
197  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
198                                             (__v8si)_mm256_lzcnt_epi32(__A),
199                                             (__v8si)_mm256_setzero_si256());
200}
201
202static __inline__ __m128i __DEFAULT_FN_ATTRS128
203_mm_lzcnt_epi64 (__m128i __A)
204{
205  return (__m128i) __builtin_ia32_vplzcntq_128 ((__v2di) __A);
206}
207
208static __inline__ __m128i __DEFAULT_FN_ATTRS128
209_mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
210{
211  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
212                                             (__v2di)_mm_lzcnt_epi64(__A),
213                                             (__v2di)__W);
214}
215
216static __inline__ __m128i __DEFAULT_FN_ATTRS128
217_mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A)
218{
219  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
220                                             (__v2di)_mm_lzcnt_epi64(__A),
221                                             (__v2di)_mm_setzero_si128());
222}
223
224static __inline__ __m256i __DEFAULT_FN_ATTRS256
225_mm256_lzcnt_epi64 (__m256i __A)
226{
227  return (__m256i) __builtin_ia32_vplzcntq_256 ((__v4di) __A);
228}
229
230static __inline__ __m256i __DEFAULT_FN_ATTRS256
231_mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
232{
233  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
234                                             (__v4di)_mm256_lzcnt_epi64(__A),
235                                             (__v4di)__W);
236}
237
238static __inline__ __m256i __DEFAULT_FN_ATTRS256
239_mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A)
240{
241  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
242                                             (__v4di)_mm256_lzcnt_epi64(__A),
243                                             (__v4di)_mm256_setzero_si256());
244}
245
246#undef __DEFAULT_FN_ATTRS128
247#undef __DEFAULT_FN_ATTRS256
248
249#endif /* __AVX512VLCDINTRIN_H */
250