avx512vldqintrin.h revision 287506
1/*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ---------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24#ifndef __IMMINTRIN_H
25#error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef __AVX512VLDQINTRIN_H
29#define __AVX512VLDQINTRIN_H
30
31/* Define the default attributes for the functions in this file. */
32#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
33
34static __inline__ __m256i __DEFAULT_FN_ATTRS
35_mm256_mullo_epi64 (__m256i __A, __m256i __B) {
36  return (__m256i) ((__v4di) __A * (__v4di) __B);
37}
38
39static __inline__ __m256i __DEFAULT_FN_ATTRS
40_mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
41  return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
42              (__v4di) __B,
43              (__v4di) __W,
44              (__mmask8) __U);
45}
46
47static __inline__ __m256i __DEFAULT_FN_ATTRS
48_mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B) {
49  return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
50              (__v4di) __B,
51              (__v4di)
52              _mm256_setzero_si256 (),
53              (__mmask8) __U);
54}
55
56static __inline__ __m128i __DEFAULT_FN_ATTRS
57_mm_mullo_epi64 (__m128i __A, __m128i __B) {
58  return (__m128i) ((__v2di) __A * (__v2di) __B);
59}
60
61static __inline__ __m128i __DEFAULT_FN_ATTRS
62_mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
63  return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
64              (__v2di) __B,
65              (__v2di) __W,
66              (__mmask8) __U);
67}
68
69static __inline__ __m128i __DEFAULT_FN_ATTRS
70_mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) {
71  return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
72              (__v2di) __B,
73              (__v2di)
74              _mm_setzero_si128 (),
75              (__mmask8) __U);
76}
77
78static __inline__ __m256d __DEFAULT_FN_ATTRS
79_mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
80  return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
81              (__v4df) __B,
82              (__v4df) __W,
83              (__mmask8) __U);
84}
85
86static __inline__ __m256d __DEFAULT_FN_ATTRS
87_mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) {
88  return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
89              (__v4df) __B,
90              (__v4df)
91              _mm256_setzero_pd (),
92              (__mmask8) __U);
93}
94
95static __inline__ __m128d __DEFAULT_FN_ATTRS
96_mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
97  return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
98              (__v2df) __B,
99              (__v2df) __W,
100              (__mmask8) __U);
101}
102
103static __inline__ __m128d __DEFAULT_FN_ATTRS
104_mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) {
105  return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
106              (__v2df) __B,
107              (__v2df)
108              _mm_setzero_pd (),
109              (__mmask8) __U);
110}
111
112static __inline__ __m256 __DEFAULT_FN_ATTRS
113_mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
114  return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
115             (__v8sf) __B,
116             (__v8sf) __W,
117             (__mmask8) __U);
118}
119
120static __inline__ __m256 __DEFAULT_FN_ATTRS
121_mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) {
122  return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
123             (__v8sf) __B,
124             (__v8sf)
125             _mm256_setzero_ps (),
126             (__mmask8) __U);
127}
128
129static __inline__ __m128 __DEFAULT_FN_ATTRS
130_mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
131  return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
132             (__v4sf) __B,
133             (__v4sf) __W,
134             (__mmask8) __U);
135}
136
137static __inline__ __m128 __DEFAULT_FN_ATTRS
138_mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) {
139  return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
140             (__v4sf) __B,
141             (__v4sf)
142             _mm_setzero_ps (),
143             (__mmask8) __U);
144}
145
146static __inline__ __m256d __DEFAULT_FN_ATTRS
147_mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
148  return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
149             (__v4df) __B,
150             (__v4df) __W,
151             (__mmask8) __U);
152}
153
154static __inline__ __m256d __DEFAULT_FN_ATTRS
155_mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B) {
156  return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
157             (__v4df) __B,
158             (__v4df)
159             _mm256_setzero_pd (),
160             (__mmask8) __U);
161}
162
163static __inline__ __m128d __DEFAULT_FN_ATTRS
164_mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
165  return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
166             (__v2df) __B,
167             (__v2df) __W,
168             (__mmask8) __U);
169}
170
171static __inline__ __m128d __DEFAULT_FN_ATTRS
172_mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B) {
173  return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
174             (__v2df) __B,
175             (__v2df)
176             _mm_setzero_pd (),
177             (__mmask8) __U);
178}
179
180static __inline__ __m256 __DEFAULT_FN_ATTRS
181_mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
182  return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
183            (__v8sf) __B,
184            (__v8sf) __W,
185            (__mmask8) __U);
186}
187
188static __inline__ __m256 __DEFAULT_FN_ATTRS
189_mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B) {
190  return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
191            (__v8sf) __B,
192            (__v8sf)
193            _mm256_setzero_ps (),
194            (__mmask8) __U);
195}
196
197static __inline__ __m128 __DEFAULT_FN_ATTRS
198_mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
199  return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
200            (__v4sf) __B,
201            (__v4sf) __W,
202            (__mmask8) __U);
203}
204
205static __inline__ __m128 __DEFAULT_FN_ATTRS
206_mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B) {
207  return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
208            (__v4sf) __B,
209            (__v4sf)
210            _mm_setzero_ps (),
211            (__mmask8) __U);
212}
213
214static __inline__ __m256d __DEFAULT_FN_ATTRS
215_mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A,
216        __m256d __B) {
217  return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
218             (__v4df) __B,
219             (__v4df) __W,
220             (__mmask8) __U);
221}
222
223static __inline__ __m256d __DEFAULT_FN_ATTRS
224_mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B) {
225  return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
226             (__v4df) __B,
227             (__v4df)
228             _mm256_setzero_pd (),
229             (__mmask8) __U);
230}
231
232static __inline__ __m128d __DEFAULT_FN_ATTRS
233_mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
234  return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
235             (__v2df) __B,
236             (__v2df) __W,
237             (__mmask8) __U);
238}
239
240static __inline__ __m128d __DEFAULT_FN_ATTRS
241_mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) {
242  return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
243             (__v2df) __B,
244             (__v2df)
245             _mm_setzero_pd (),
246             (__mmask8) __U);
247}
248
249static __inline__ __m256 __DEFAULT_FN_ATTRS
250_mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
251  return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
252            (__v8sf) __B,
253            (__v8sf) __W,
254            (__mmask8) __U);
255}
256
257static __inline__ __m256 __DEFAULT_FN_ATTRS
258_mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B) {
259  return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
260            (__v8sf) __B,
261            (__v8sf)
262            _mm256_setzero_ps (),
263            (__mmask8) __U);
264}
265
266static __inline__ __m128 __DEFAULT_FN_ATTRS
267_mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
268  return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
269            (__v4sf) __B,
270            (__v4sf) __W,
271            (__mmask8) __U);
272}
273
274static __inline__ __m128 __DEFAULT_FN_ATTRS
275_mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B) {
276  return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
277            (__v4sf) __B,
278            (__v4sf)
279            _mm_setzero_ps (),
280            (__mmask8) __U);
281}
282
283static __inline__ __m256d __DEFAULT_FN_ATTRS
284_mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
285  return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
286            (__v4df) __B,
287            (__v4df) __W,
288            (__mmask8) __U);
289}
290
291static __inline__ __m256d __DEFAULT_FN_ATTRS
292_mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B) {
293  return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
294            (__v4df) __B,
295            (__v4df)
296            _mm256_setzero_pd (),
297            (__mmask8) __U);
298}
299
300static __inline__ __m128d __DEFAULT_FN_ATTRS
301_mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
302  return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
303            (__v2df) __B,
304            (__v2df) __W,
305            (__mmask8) __U);
306}
307
308static __inline__ __m128d __DEFAULT_FN_ATTRS
309_mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B) {
310  return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
311            (__v2df) __B,
312            (__v2df)
313            _mm_setzero_pd (),
314            (__mmask8) __U);
315}
316
317static __inline__ __m256 __DEFAULT_FN_ATTRS
318_mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
319  return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
320                 (__v8sf) __B,
321                 (__v8sf) __W,
322                 (__mmask8) __U);
323}
324
325static __inline__ __m256 __DEFAULT_FN_ATTRS
326_mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B) {
327  return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
328                 (__v8sf) __B,
329                 (__v8sf)
330                 _mm256_setzero_ps (),
331                 (__mmask8) __U);
332}
333
334static __inline__ __m128 __DEFAULT_FN_ATTRS
335_mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
336  return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
337                 (__v4sf) __B,
338                 (__v4sf) __W,
339                 (__mmask8) __U);
340}
341
342static __inline__ __m128 __DEFAULT_FN_ATTRS
343_mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) {
344  return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
345                 (__v4sf) __B,
346                 (__v4sf)
347                 _mm_setzero_ps (),
348                 (__mmask8) __U);
349}
350
351#undef __DEFAULT_FN_ATTRS
352
353#endif
354