avx512vldqintrin.h revision 283633
1/*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ---------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24#ifndef __IMMINTRIN_H
25#error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef __AVX512VLDQINTRIN_H
29#define __AVX512VLDQINTRIN_H
30
31
32static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
33_mm256_mullo_epi64 (__m256i __A, __m256i __B) {
34  return (__m256i) ((__v4di) __A * (__v4di) __B);
35}
36
37static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
38_mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
39  return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
40              (__v4di) __B,
41              (__v4di) __W,
42              (__mmask8) __U);
43}
44
45static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
46_mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B) {
47  return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
48              (__v4di) __B,
49              (__v4di)
50              _mm256_setzero_si256 (),
51              (__mmask8) __U);
52}
53
54static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
55_mm_mullo_epi64 (__m128i __A, __m128i __B) {
56  return (__m128i) ((__v2di) __A * (__v2di) __B);
57}
58
59static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
60_mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
61  return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
62              (__v2di) __B,
63              (__v2di) __W,
64              (__mmask8) __U);
65}
66
67static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
68_mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) {
69  return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
70              (__v2di) __B,
71              (__v2di)
72              _mm_setzero_si128 (),
73              (__mmask8) __U);
74}
75
76static __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__))
77_mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
78  return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
79              (__v4df) __B,
80              (__v4df) __W,
81              (__mmask8) __U);
82}
83
84static __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__))
85_mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) {
86  return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
87              (__v4df) __B,
88              (__v4df)
89              _mm256_setzero_pd (),
90              (__mmask8) __U);
91}
92
93static __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__))
94_mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
95  return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
96              (__v2df) __B,
97              (__v2df) __W,
98              (__mmask8) __U);
99}
100
101static __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__))
102_mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) {
103  return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
104              (__v2df) __B,
105              (__v2df)
106              _mm_setzero_pd (),
107              (__mmask8) __U);
108}
109
110static __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__))
111_mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
112  return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
113             (__v8sf) __B,
114             (__v8sf) __W,
115             (__mmask8) __U);
116}
117
118static __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__))
119_mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) {
120  return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
121             (__v8sf) __B,
122             (__v8sf)
123             _mm256_setzero_ps (),
124             (__mmask8) __U);
125}
126
127static __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__))
128_mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
129  return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
130             (__v4sf) __B,
131             (__v4sf) __W,
132             (__mmask8) __U);
133}
134
135static __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__))
136_mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) {
137  return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
138             (__v4sf) __B,
139             (__v4sf)
140             _mm_setzero_ps (),
141             (__mmask8) __U);
142}
143
144static __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__))
145_mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
146  return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
147             (__v4df) __B,
148             (__v4df) __W,
149             (__mmask8) __U);
150}
151
152static __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__))
153_mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B) {
154  return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
155             (__v4df) __B,
156             (__v4df)
157             _mm256_setzero_pd (),
158             (__mmask8) __U);
159}
160
161static __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__))
162_mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
163  return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
164             (__v2df) __B,
165             (__v2df) __W,
166             (__mmask8) __U);
167}
168
169static __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__))
170_mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B) {
171  return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
172             (__v2df) __B,
173             (__v2df)
174             _mm_setzero_pd (),
175             (__mmask8) __U);
176}
177
178static __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__))
179_mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
180  return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
181            (__v8sf) __B,
182            (__v8sf) __W,
183            (__mmask8) __U);
184}
185
186static __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__))
187_mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B) {
188  return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
189            (__v8sf) __B,
190            (__v8sf)
191            _mm256_setzero_ps (),
192            (__mmask8) __U);
193}
194
195static __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__))
196_mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
197  return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
198            (__v4sf) __B,
199            (__v4sf) __W,
200            (__mmask8) __U);
201}
202
203static __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__))
204_mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B) {
205  return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
206            (__v4sf) __B,
207            (__v4sf)
208            _mm_setzero_ps (),
209            (__mmask8) __U);
210}
211
212static __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__))
213_mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A,
214        __m256d __B) {
215  return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
216             (__v4df) __B,
217             (__v4df) __W,
218             (__mmask8) __U);
219}
220
221static __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__))
222_mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B) {
223  return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
224             (__v4df) __B,
225             (__v4df)
226             _mm256_setzero_pd (),
227             (__mmask8) __U);
228}
229
230static __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__))
231_mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
232  return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
233             (__v2df) __B,
234             (__v2df) __W,
235             (__mmask8) __U);
236}
237
238static __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__))
239_mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) {
240  return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
241             (__v2df) __B,
242             (__v2df)
243             _mm_setzero_pd (),
244             (__mmask8) __U);
245}
246
247static __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__))
248_mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
249  return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
250            (__v8sf) __B,
251            (__v8sf) __W,
252            (__mmask8) __U);
253}
254
255static __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__))
256_mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B) {
257  return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
258            (__v8sf) __B,
259            (__v8sf)
260            _mm256_setzero_ps (),
261            (__mmask8) __U);
262}
263
264static __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__))
265_mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
266  return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
267            (__v4sf) __B,
268            (__v4sf) __W,
269            (__mmask8) __U);
270}
271
272static __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__))
273_mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B) {
274  return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
275            (__v4sf) __B,
276            (__v4sf)
277            _mm_setzero_ps (),
278            (__mmask8) __U);
279}
280
281static __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__))
282_mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
283  return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
284            (__v4df) __B,
285            (__v4df) __W,
286            (__mmask8) __U);
287}
288
289static __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__))
290_mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B) {
291  return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
292            (__v4df) __B,
293            (__v4df)
294            _mm256_setzero_pd (),
295            (__mmask8) __U);
296}
297
298static __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__))
299_mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
300  return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
301            (__v2df) __B,
302            (__v2df) __W,
303            (__mmask8) __U);
304}
305
306static __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__))
307_mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B) {
308  return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
309            (__v2df) __B,
310            (__v2df)
311            _mm_setzero_pd (),
312            (__mmask8) __U);
313}
314
315static __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__))
316_mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
317  return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
318                 (__v8sf) __B,
319                 (__v8sf) __W,
320                 (__mmask8) __U);
321}
322
323static __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__))
324_mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B) {
325  return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
326                 (__v8sf) __B,
327                 (__v8sf)
328                 _mm256_setzero_ps (),
329                 (__mmask8) __U);
330}
331
332static __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__))
333_mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
334  return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
335                 (__v4sf) __B,
336                 (__v4sf) __W,
337                 (__mmask8) __U);
338}
339
340static __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__))
341_mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) {
342  return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
343                 (__v4sf) __B,
344                 (__v4sf)
345                 _mm_setzero_ps (),
346                 (__mmask8) __U);
347}
348
349#endif
350