avx512dqintrin.h revision 341825
1/*===---- avx512dqintrin.h - AVX512DQ intrinsics ---------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24#ifndef __IMMINTRIN_H
25#error "Never use <avx512dqintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef __AVX512DQINTRIN_H
29#define __AVX512DQINTRIN_H
30
31/* Define the default attributes for the functions in this file. */
32#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"), __min_vector_width__(512)))
33
34static __inline__ __m512i __DEFAULT_FN_ATTRS
35_mm512_mullo_epi64 (__m512i __A, __m512i __B) {
36  return (__m512i) ((__v8du) __A * (__v8du) __B);
37}
38
39static __inline__ __m512i __DEFAULT_FN_ATTRS
40_mm512_mask_mullo_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
41  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
42                                             (__v8di)_mm512_mullo_epi64(__A, __B),
43                                             (__v8di)__W);
44}
45
46static __inline__ __m512i __DEFAULT_FN_ATTRS
47_mm512_maskz_mullo_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
48  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
49                                             (__v8di)_mm512_mullo_epi64(__A, __B),
50                                             (__v8di)_mm512_setzero_si512());
51}
52
53static __inline__ __m512d __DEFAULT_FN_ATTRS
54_mm512_xor_pd(__m512d __A, __m512d __B) {
55  return (__m512d)((__v8du)__A ^ (__v8du)__B);
56}
57
58static __inline__ __m512d __DEFAULT_FN_ATTRS
59_mm512_mask_xor_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
60  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
61                                              (__v8df)_mm512_xor_pd(__A, __B),
62                                              (__v8df)__W);
63}
64
65static __inline__ __m512d __DEFAULT_FN_ATTRS
66_mm512_maskz_xor_pd(__mmask8 __U, __m512d __A, __m512d __B) {
67  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
68                                              (__v8df)_mm512_xor_pd(__A, __B),
69                                              (__v8df)_mm512_setzero_pd());
70}
71
72static __inline__ __m512 __DEFAULT_FN_ATTRS
73_mm512_xor_ps (__m512 __A, __m512 __B) {
74  return (__m512)((__v16su)__A ^ (__v16su)__B);
75}
76
77static __inline__ __m512 __DEFAULT_FN_ATTRS
78_mm512_mask_xor_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
79  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
80                                             (__v16sf)_mm512_xor_ps(__A, __B),
81                                             (__v16sf)__W);
82}
83
84static __inline__ __m512 __DEFAULT_FN_ATTRS
85_mm512_maskz_xor_ps(__mmask16 __U, __m512 __A, __m512 __B) {
86  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
87                                             (__v16sf)_mm512_xor_ps(__A, __B),
88                                             (__v16sf)_mm512_setzero_ps());
89}
90
91static __inline__ __m512d __DEFAULT_FN_ATTRS
92_mm512_or_pd(__m512d __A, __m512d __B) {
93  return (__m512d)((__v8du)__A | (__v8du)__B);
94}
95
96static __inline__ __m512d __DEFAULT_FN_ATTRS
97_mm512_mask_or_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
98  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
99                                              (__v8df)_mm512_or_pd(__A, __B),
100                                              (__v8df)__W);
101}
102
103static __inline__ __m512d __DEFAULT_FN_ATTRS
104_mm512_maskz_or_pd(__mmask8 __U, __m512d __A, __m512d __B) {
105  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
106                                              (__v8df)_mm512_or_pd(__A, __B),
107                                              (__v8df)_mm512_setzero_pd());
108}
109
110static __inline__ __m512 __DEFAULT_FN_ATTRS
111_mm512_or_ps(__m512 __A, __m512 __B) {
112  return (__m512)((__v16su)__A | (__v16su)__B);
113}
114
115static __inline__ __m512 __DEFAULT_FN_ATTRS
116_mm512_mask_or_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
117  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
118                                             (__v16sf)_mm512_or_ps(__A, __B),
119                                             (__v16sf)__W);
120}
121
122static __inline__ __m512 __DEFAULT_FN_ATTRS
123_mm512_maskz_or_ps(__mmask16 __U, __m512 __A, __m512 __B) {
124  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
125                                             (__v16sf)_mm512_or_ps(__A, __B),
126                                             (__v16sf)_mm512_setzero_ps());
127}
128
129static __inline__ __m512d __DEFAULT_FN_ATTRS
130_mm512_and_pd(__m512d __A, __m512d __B) {
131  return (__m512d)((__v8du)__A & (__v8du)__B);
132}
133
134static __inline__ __m512d __DEFAULT_FN_ATTRS
135_mm512_mask_and_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
136  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
137                                              (__v8df)_mm512_and_pd(__A, __B),
138                                              (__v8df)__W);
139}
140
141static __inline__ __m512d __DEFAULT_FN_ATTRS
142_mm512_maskz_and_pd(__mmask8 __U, __m512d __A, __m512d __B) {
143  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
144                                              (__v8df)_mm512_and_pd(__A, __B),
145                                              (__v8df)_mm512_setzero_pd());
146}
147
148static __inline__ __m512 __DEFAULT_FN_ATTRS
149_mm512_and_ps(__m512 __A, __m512 __B) {
150  return (__m512)((__v16su)__A & (__v16su)__B);
151}
152
153static __inline__ __m512 __DEFAULT_FN_ATTRS
154_mm512_mask_and_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
155  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
156                                             (__v16sf)_mm512_and_ps(__A, __B),
157                                             (__v16sf)__W);
158}
159
160static __inline__ __m512 __DEFAULT_FN_ATTRS
161_mm512_maskz_and_ps(__mmask16 __U, __m512 __A, __m512 __B) {
162  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
163                                             (__v16sf)_mm512_and_ps(__A, __B),
164                                             (__v16sf)_mm512_setzero_ps());
165}
166
167static __inline__ __m512d __DEFAULT_FN_ATTRS
168_mm512_andnot_pd(__m512d __A, __m512d __B) {
169  return (__m512d)(~(__v8du)__A & (__v8du)__B);
170}
171
172static __inline__ __m512d __DEFAULT_FN_ATTRS
173_mm512_mask_andnot_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
174  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
175                                              (__v8df)_mm512_andnot_pd(__A, __B),
176                                              (__v8df)__W);
177}
178
179static __inline__ __m512d __DEFAULT_FN_ATTRS
180_mm512_maskz_andnot_pd(__mmask8 __U, __m512d __A, __m512d __B) {
181  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
182                                              (__v8df)_mm512_andnot_pd(__A, __B),
183                                              (__v8df)_mm512_setzero_pd());
184}
185
186static __inline__ __m512 __DEFAULT_FN_ATTRS
187_mm512_andnot_ps(__m512 __A, __m512 __B) {
188  return (__m512)(~(__v16su)__A & (__v16su)__B);
189}
190
191static __inline__ __m512 __DEFAULT_FN_ATTRS
192_mm512_mask_andnot_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
193  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
194                                             (__v16sf)_mm512_andnot_ps(__A, __B),
195                                             (__v16sf)__W);
196}
197
198static __inline__ __m512 __DEFAULT_FN_ATTRS
199_mm512_maskz_andnot_ps(__mmask16 __U, __m512 __A, __m512 __B) {
200  return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
201                                             (__v16sf)_mm512_andnot_ps(__A, __B),
202                                             (__v16sf)_mm512_setzero_ps());
203}
204
205static __inline__ __m512i __DEFAULT_FN_ATTRS
206_mm512_cvtpd_epi64 (__m512d __A) {
207  return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
208                (__v8di) _mm512_setzero_si512(),
209                (__mmask8) -1,
210                _MM_FROUND_CUR_DIRECTION);
211}
212
213static __inline__ __m512i __DEFAULT_FN_ATTRS
214_mm512_mask_cvtpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) {
215  return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
216                (__v8di) __W,
217                (__mmask8) __U,
218                _MM_FROUND_CUR_DIRECTION);
219}
220
221static __inline__ __m512i __DEFAULT_FN_ATTRS
222_mm512_maskz_cvtpd_epi64 (__mmask8 __U, __m512d __A) {
223  return (__m512i) __builtin_ia32_cvtpd2qq512_mask ((__v8df) __A,
224                (__v8di) _mm512_setzero_si512(),
225                (__mmask8) __U,
226                _MM_FROUND_CUR_DIRECTION);
227}
228
229#define _mm512_cvt_roundpd_epi64(A, R) \
230  (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
231                                           (__v8di)_mm512_setzero_si512(), \
232                                           (__mmask8)-1, (int)(R))
233
234#define _mm512_mask_cvt_roundpd_epi64(W, U, A, R) \
235  (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
236                                           (__v8di)(__m512i)(W), \
237                                           (__mmask8)(U), (int)(R))
238
239#define _mm512_maskz_cvt_roundpd_epi64(U, A, R) \
240  (__m512i)__builtin_ia32_cvtpd2qq512_mask((__v8df)(__m512d)(A), \
241                                           (__v8di)_mm512_setzero_si512(), \
242                                           (__mmask8)(U), (int)(R))
243
244static __inline__ __m512i __DEFAULT_FN_ATTRS
245_mm512_cvtpd_epu64 (__m512d __A) {
246  return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
247                 (__v8di) _mm512_setzero_si512(),
248                 (__mmask8) -1,
249                 _MM_FROUND_CUR_DIRECTION);
250}
251
252static __inline__ __m512i __DEFAULT_FN_ATTRS
253_mm512_mask_cvtpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) {
254  return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
255                 (__v8di) __W,
256                 (__mmask8) __U,
257                 _MM_FROUND_CUR_DIRECTION);
258}
259
260static __inline__ __m512i __DEFAULT_FN_ATTRS
261_mm512_maskz_cvtpd_epu64 (__mmask8 __U, __m512d __A) {
262  return (__m512i) __builtin_ia32_cvtpd2uqq512_mask ((__v8df) __A,
263                 (__v8di) _mm512_setzero_si512(),
264                 (__mmask8) __U,
265                 _MM_FROUND_CUR_DIRECTION);
266}
267
268#define _mm512_cvt_roundpd_epu64(A, R) \
269  (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
270                                            (__v8di)_mm512_setzero_si512(), \
271                                            (__mmask8)-1, (int)(R))
272
273#define _mm512_mask_cvt_roundpd_epu64(W, U, A, R) \
274  (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
275                                            (__v8di)(__m512i)(W), \
276                                            (__mmask8)(U), (int)(R))
277
278#define _mm512_maskz_cvt_roundpd_epu64(U, A, R) \
279  (__m512i)__builtin_ia32_cvtpd2uqq512_mask((__v8df)(__m512d)(A), \
280                                            (__v8di)_mm512_setzero_si512(), \
281                                            (__mmask8)(U), (int)(R))
282
283static __inline__ __m512i __DEFAULT_FN_ATTRS
284_mm512_cvtps_epi64 (__m256 __A) {
285  return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
286                (__v8di) _mm512_setzero_si512(),
287                (__mmask8) -1,
288                _MM_FROUND_CUR_DIRECTION);
289}
290
291static __inline__ __m512i __DEFAULT_FN_ATTRS
292_mm512_mask_cvtps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) {
293  return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
294                (__v8di) __W,
295                (__mmask8) __U,
296                _MM_FROUND_CUR_DIRECTION);
297}
298
299static __inline__ __m512i __DEFAULT_FN_ATTRS
300_mm512_maskz_cvtps_epi64 (__mmask8 __U, __m256 __A) {
301  return (__m512i) __builtin_ia32_cvtps2qq512_mask ((__v8sf) __A,
302                (__v8di) _mm512_setzero_si512(),
303                (__mmask8) __U,
304                _MM_FROUND_CUR_DIRECTION);
305}
306
307#define _mm512_cvt_roundps_epi64(A, R) \
308  (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
309                                           (__v8di)_mm512_setzero_si512(), \
310                                           (__mmask8)-1, (int)(R))
311
312#define _mm512_mask_cvt_roundps_epi64(W, U, A, R) \
313  (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
314                                           (__v8di)(__m512i)(W), \
315                                           (__mmask8)(U), (int)(R))
316
317#define _mm512_maskz_cvt_roundps_epi64(U, A, R) \
318  (__m512i)__builtin_ia32_cvtps2qq512_mask((__v8sf)(__m256)(A), \
319                                           (__v8di)_mm512_setzero_si512(), \
320                                           (__mmask8)(U), (int)(R))
321
322static __inline__ __m512i __DEFAULT_FN_ATTRS
323_mm512_cvtps_epu64 (__m256 __A) {
324  return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
325                 (__v8di) _mm512_setzero_si512(),
326                 (__mmask8) -1,
327                 _MM_FROUND_CUR_DIRECTION);
328}
329
330static __inline__ __m512i __DEFAULT_FN_ATTRS
331_mm512_mask_cvtps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) {
332  return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
333                 (__v8di) __W,
334                 (__mmask8) __U,
335                 _MM_FROUND_CUR_DIRECTION);
336}
337
338static __inline__ __m512i __DEFAULT_FN_ATTRS
339_mm512_maskz_cvtps_epu64 (__mmask8 __U, __m256 __A) {
340  return (__m512i) __builtin_ia32_cvtps2uqq512_mask ((__v8sf) __A,
341                 (__v8di) _mm512_setzero_si512(),
342                 (__mmask8) __U,
343                 _MM_FROUND_CUR_DIRECTION);
344}
345
346#define _mm512_cvt_roundps_epu64(A, R) \
347  (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
348                                            (__v8di)_mm512_setzero_si512(), \
349                                            (__mmask8)-1, (int)(R))
350
351#define _mm512_mask_cvt_roundps_epu64(W, U, A, R) \
352  (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
353                                            (__v8di)(__m512i)(W), \
354                                            (__mmask8)(U), (int)(R))
355
356#define _mm512_maskz_cvt_roundps_epu64(U, A, R) \
357  (__m512i)__builtin_ia32_cvtps2uqq512_mask((__v8sf)(__m256)(A), \
358                                            (__v8di)_mm512_setzero_si512(), \
359                                            (__mmask8)(U), (int)(R))
360
361
362static __inline__ __m512d __DEFAULT_FN_ATTRS
363_mm512_cvtepi64_pd (__m512i __A) {
364  return (__m512d)__builtin_convertvector((__v8di)__A, __v8df);
365}
366
367static __inline__ __m512d __DEFAULT_FN_ATTRS
368_mm512_mask_cvtepi64_pd (__m512d __W, __mmask8 __U, __m512i __A) {
369  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
370                                              (__v8df)_mm512_cvtepi64_pd(__A),
371                                              (__v8df)__W);
372}
373
374static __inline__ __m512d __DEFAULT_FN_ATTRS
375_mm512_maskz_cvtepi64_pd (__mmask8 __U, __m512i __A) {
376  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
377                                              (__v8df)_mm512_cvtepi64_pd(__A),
378                                              (__v8df)_mm512_setzero_pd());
379}
380
381#define _mm512_cvt_roundepi64_pd(A, R) \
382  (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
383                                           (__v8df)_mm512_setzero_pd(), \
384                                           (__mmask8)-1, (int)(R))
385
386#define _mm512_mask_cvt_roundepi64_pd(W, U, A, R) \
387  (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
388                                           (__v8df)(__m512d)(W), \
389                                           (__mmask8)(U), (int)(R))
390
391#define _mm512_maskz_cvt_roundepi64_pd(U, A, R) \
392  (__m512d)__builtin_ia32_cvtqq2pd512_mask((__v8di)(__m512i)(A), \
393                                           (__v8df)_mm512_setzero_pd(), \
394                                           (__mmask8)(U), (int)(R))
395
396static __inline__ __m256 __DEFAULT_FN_ATTRS
397_mm512_cvtepi64_ps (__m512i __A) {
398  return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
399               (__v8sf) _mm256_setzero_ps(),
400               (__mmask8) -1,
401               _MM_FROUND_CUR_DIRECTION);
402}
403
404static __inline__ __m256 __DEFAULT_FN_ATTRS
405_mm512_mask_cvtepi64_ps (__m256 __W, __mmask8 __U, __m512i __A) {
406  return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
407               (__v8sf) __W,
408               (__mmask8) __U,
409               _MM_FROUND_CUR_DIRECTION);
410}
411
412static __inline__ __m256 __DEFAULT_FN_ATTRS
413_mm512_maskz_cvtepi64_ps (__mmask8 __U, __m512i __A) {
414  return (__m256) __builtin_ia32_cvtqq2ps512_mask ((__v8di) __A,
415               (__v8sf) _mm256_setzero_ps(),
416               (__mmask8) __U,
417               _MM_FROUND_CUR_DIRECTION);
418}
419
420#define _mm512_cvt_roundepi64_ps(A, R) \
421  (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
422                                          (__v8sf)_mm256_setzero_ps(), \
423                                          (__mmask8)-1, (int)(R))
424
425#define _mm512_mask_cvt_roundepi64_ps(W, U, A, R) \
426  (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
427                                          (__v8sf)(__m256)(W), (__mmask8)(U), \
428                                          (int)(R))
429
430#define _mm512_maskz_cvt_roundepi64_ps(U, A, R) \
431  (__m256)__builtin_ia32_cvtqq2ps512_mask((__v8di)(__m512i)(A), \
432                                          (__v8sf)_mm256_setzero_ps(), \
433                                          (__mmask8)(U), (int)(R))
434
435
436static __inline__ __m512i __DEFAULT_FN_ATTRS
437_mm512_cvttpd_epi64 (__m512d __A) {
438  return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
439                 (__v8di) _mm512_setzero_si512(),
440                 (__mmask8) -1,
441                 _MM_FROUND_CUR_DIRECTION);
442}
443
444static __inline__ __m512i __DEFAULT_FN_ATTRS
445_mm512_mask_cvttpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A) {
446  return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
447                 (__v8di) __W,
448                 (__mmask8) __U,
449                 _MM_FROUND_CUR_DIRECTION);
450}
451
452static __inline__ __m512i __DEFAULT_FN_ATTRS
453_mm512_maskz_cvttpd_epi64 (__mmask8 __U, __m512d __A) {
454  return (__m512i) __builtin_ia32_cvttpd2qq512_mask ((__v8df) __A,
455                 (__v8di) _mm512_setzero_si512(),
456                 (__mmask8) __U,
457                 _MM_FROUND_CUR_DIRECTION);
458}
459
460#define _mm512_cvtt_roundpd_epi64(A, R) \
461  (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
462                                            (__v8di)_mm512_setzero_si512(), \
463                                            (__mmask8)-1, (int)(R))
464
465#define _mm512_mask_cvtt_roundpd_epi64(W, U, A, R) \
466  (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
467                                            (__v8di)(__m512i)(W), \
468                                            (__mmask8)(U), (int)(R))
469
470#define _mm512_maskz_cvtt_roundpd_epi64(U, A, R) \
471  (__m512i)__builtin_ia32_cvttpd2qq512_mask((__v8df)(__m512d)(A), \
472                                            (__v8di)_mm512_setzero_si512(), \
473                                            (__mmask8)(U), (int)(R))
474
475static __inline__ __m512i __DEFAULT_FN_ATTRS
476_mm512_cvttpd_epu64 (__m512d __A) {
477  return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
478                  (__v8di) _mm512_setzero_si512(),
479                  (__mmask8) -1,
480                  _MM_FROUND_CUR_DIRECTION);
481}
482
483static __inline__ __m512i __DEFAULT_FN_ATTRS
484_mm512_mask_cvttpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A) {
485  return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
486                  (__v8di) __W,
487                  (__mmask8) __U,
488                  _MM_FROUND_CUR_DIRECTION);
489}
490
491static __inline__ __m512i __DEFAULT_FN_ATTRS
492_mm512_maskz_cvttpd_epu64 (__mmask8 __U, __m512d __A) {
493  return (__m512i) __builtin_ia32_cvttpd2uqq512_mask ((__v8df) __A,
494                  (__v8di) _mm512_setzero_si512(),
495                  (__mmask8) __U,
496                  _MM_FROUND_CUR_DIRECTION);
497}
498
499#define _mm512_cvtt_roundpd_epu64(A, R) \
500  (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
501                                             (__v8di)_mm512_setzero_si512(), \
502                                             (__mmask8)-1, (int)(R))
503
504#define _mm512_mask_cvtt_roundpd_epu64(W, U, A, R) \
505  (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
506                                             (__v8di)(__m512i)(W), \
507                                             (__mmask8)(U), (int)(R))
508
509#define _mm512_maskz_cvtt_roundpd_epu64(U, A, R) \
510  (__m512i)__builtin_ia32_cvttpd2uqq512_mask((__v8df)(__m512d)(A), \
511                                             (__v8di)_mm512_setzero_si512(), \
512                                             (__mmask8)(U), (int)(R))
513
514static __inline__ __m512i __DEFAULT_FN_ATTRS
515_mm512_cvttps_epi64 (__m256 __A) {
516  return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
517                 (__v8di) _mm512_setzero_si512(),
518                 (__mmask8) -1,
519                 _MM_FROUND_CUR_DIRECTION);
520}
521
522static __inline__ __m512i __DEFAULT_FN_ATTRS
523_mm512_mask_cvttps_epi64 (__m512i __W, __mmask8 __U, __m256 __A) {
524  return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
525                 (__v8di) __W,
526                 (__mmask8) __U,
527                 _MM_FROUND_CUR_DIRECTION);
528}
529
530static __inline__ __m512i __DEFAULT_FN_ATTRS
531_mm512_maskz_cvttps_epi64 (__mmask8 __U, __m256 __A) {
532  return (__m512i) __builtin_ia32_cvttps2qq512_mask ((__v8sf) __A,
533                 (__v8di) _mm512_setzero_si512(),
534                 (__mmask8) __U,
535                 _MM_FROUND_CUR_DIRECTION);
536}
537
538#define _mm512_cvtt_roundps_epi64(A, R) \
539  (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
540                                            (__v8di)_mm512_setzero_si512(), \
541                                            (__mmask8)-1, (int)(R))
542
543#define _mm512_mask_cvtt_roundps_epi64(W, U, A, R) \
544  (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
545                                            (__v8di)(__m512i)(W), \
546                                            (__mmask8)(U), (int)(R))
547
548#define _mm512_maskz_cvtt_roundps_epi64(U, A, R) \
549  (__m512i)__builtin_ia32_cvttps2qq512_mask((__v8sf)(__m256)(A), \
550                                            (__v8di)_mm512_setzero_si512(), \
551                                            (__mmask8)(U), (int)(R))
552
553static __inline__ __m512i __DEFAULT_FN_ATTRS
554_mm512_cvttps_epu64 (__m256 __A) {
555  return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
556                  (__v8di) _mm512_setzero_si512(),
557                  (__mmask8) -1,
558                  _MM_FROUND_CUR_DIRECTION);
559}
560
561static __inline__ __m512i __DEFAULT_FN_ATTRS
562_mm512_mask_cvttps_epu64 (__m512i __W, __mmask8 __U, __m256 __A) {
563  return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
564                  (__v8di) __W,
565                  (__mmask8) __U,
566                  _MM_FROUND_CUR_DIRECTION);
567}
568
569static __inline__ __m512i __DEFAULT_FN_ATTRS
570_mm512_maskz_cvttps_epu64 (__mmask8 __U, __m256 __A) {
571  return (__m512i) __builtin_ia32_cvttps2uqq512_mask ((__v8sf) __A,
572                  (__v8di) _mm512_setzero_si512(),
573                  (__mmask8) __U,
574                  _MM_FROUND_CUR_DIRECTION);
575}
576
577#define _mm512_cvtt_roundps_epu64(A, R) \
578  (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
579                                             (__v8di)_mm512_setzero_si512(), \
580                                             (__mmask8)-1, (int)(R))
581
582#define _mm512_mask_cvtt_roundps_epu64(W, U, A, R) \
583  (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
584                                             (__v8di)(__m512i)(W), \
585                                             (__mmask8)(U), (int)(R))
586
587#define _mm512_maskz_cvtt_roundps_epu64(U, A, R) \
588  (__m512i)__builtin_ia32_cvttps2uqq512_mask((__v8sf)(__m256)(A), \
589                                             (__v8di)_mm512_setzero_si512(), \
590                                             (__mmask8)(U), (int)(R))
591
592static __inline__ __m512d __DEFAULT_FN_ATTRS
593_mm512_cvtepu64_pd (__m512i __A) {
594  return (__m512d)__builtin_convertvector((__v8du)__A, __v8df);
595}
596
597static __inline__ __m512d __DEFAULT_FN_ATTRS
598_mm512_mask_cvtepu64_pd (__m512d __W, __mmask8 __U, __m512i __A) {
599  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
600                                              (__v8df)_mm512_cvtepu64_pd(__A),
601                                              (__v8df)__W);
602}
603
604static __inline__ __m512d __DEFAULT_FN_ATTRS
605_mm512_maskz_cvtepu64_pd (__mmask8 __U, __m512i __A) {
606  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
607                                              (__v8df)_mm512_cvtepu64_pd(__A),
608                                              (__v8df)_mm512_setzero_pd());
609}
610
611#define _mm512_cvt_roundepu64_pd(A, R) \
612  (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
613                                            (__v8df)_mm512_setzero_pd(), \
614                                            (__mmask8)-1, (int)(R))
615
616#define _mm512_mask_cvt_roundepu64_pd(W, U, A, R) \
617  (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
618                                            (__v8df)(__m512d)(W), \
619                                            (__mmask8)(U), (int)(R))
620
621
622#define _mm512_maskz_cvt_roundepu64_pd(U, A, R) \
623  (__m512d)__builtin_ia32_cvtuqq2pd512_mask((__v8di)(__m512i)(A), \
624                                            (__v8df)_mm512_setzero_pd(), \
625                                            (__mmask8)(U), (int)(R))
626
627
628static __inline__ __m256 __DEFAULT_FN_ATTRS
629_mm512_cvtepu64_ps (__m512i __A) {
630  return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
631                (__v8sf) _mm256_setzero_ps(),
632                (__mmask8) -1,
633                _MM_FROUND_CUR_DIRECTION);
634}
635
636static __inline__ __m256 __DEFAULT_FN_ATTRS
637_mm512_mask_cvtepu64_ps (__m256 __W, __mmask8 __U, __m512i __A) {
638  return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
639                (__v8sf) __W,
640                (__mmask8) __U,
641                _MM_FROUND_CUR_DIRECTION);
642}
643
644static __inline__ __m256 __DEFAULT_FN_ATTRS
645_mm512_maskz_cvtepu64_ps (__mmask8 __U, __m512i __A) {
646  return (__m256) __builtin_ia32_cvtuqq2ps512_mask ((__v8di) __A,
647                (__v8sf) _mm256_setzero_ps(),
648                (__mmask8) __U,
649                _MM_FROUND_CUR_DIRECTION);
650}
651
652#define _mm512_cvt_roundepu64_ps(A, R) \
653  (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
654                                           (__v8sf)_mm256_setzero_ps(), \
655                                           (__mmask8)-1, (int)(R))
656
657#define _mm512_mask_cvt_roundepu64_ps(W, U, A, R) \
658  (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
659                                           (__v8sf)(__m256)(W), (__mmask8)(U), \
660                                           (int)(R))
661
662#define _mm512_maskz_cvt_roundepu64_ps(U, A, R) \
663  (__m256)__builtin_ia32_cvtuqq2ps512_mask((__v8di)(__m512i)(A), \
664                                           (__v8sf)_mm256_setzero_ps(), \
665                                           (__mmask8)(U), (int)(R))
666
667#define _mm512_range_pd(A, B, C) \
668  (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
669                                          (__v8df)(__m512d)(B), (int)(C), \
670                                          (__v8df)_mm512_setzero_pd(), \
671                                          (__mmask8)-1, \
672                                          _MM_FROUND_CUR_DIRECTION)
673
674#define _mm512_mask_range_pd(W, U, A, B, C) \
675  (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
676                                          (__v8df)(__m512d)(B), (int)(C), \
677                                          (__v8df)(__m512d)(W), (__mmask8)(U), \
678                                          _MM_FROUND_CUR_DIRECTION)
679
680#define _mm512_maskz_range_pd(U, A, B, C) \
681  (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
682                                          (__v8df)(__m512d)(B), (int)(C), \
683                                          (__v8df)_mm512_setzero_pd(), \
684                                          (__mmask8)(U), \
685                                          _MM_FROUND_CUR_DIRECTION)
686
687#define _mm512_range_round_pd(A, B, C, R) \
688  (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
689                                          (__v8df)(__m512d)(B), (int)(C), \
690                                          (__v8df)_mm512_setzero_pd(), \
691                                          (__mmask8)-1, (int)(R))
692
693#define _mm512_mask_range_round_pd(W, U, A, B, C, R) \
694  (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
695                                          (__v8df)(__m512d)(B), (int)(C), \
696                                          (__v8df)(__m512d)(W), (__mmask8)(U), \
697                                          (int)(R))
698
699#define _mm512_maskz_range_round_pd(U, A, B, C, R) \
700  (__m512d)__builtin_ia32_rangepd512_mask((__v8df)(__m512d)(A), \
701                                          (__v8df)(__m512d)(B), (int)(C), \
702                                          (__v8df)_mm512_setzero_pd(), \
703                                          (__mmask8)(U), (int)(R))
704
705#define _mm512_range_ps(A, B, C) \
706  (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
707                                         (__v16sf)(__m512)(B), (int)(C), \
708                                         (__v16sf)_mm512_setzero_ps(), \
709                                         (__mmask16)-1, \
710                                         _MM_FROUND_CUR_DIRECTION)
711
712#define _mm512_mask_range_ps(W, U, A, B, C) \
713  (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
714                                         (__v16sf)(__m512)(B), (int)(C), \
715                                         (__v16sf)(__m512)(W), (__mmask16)(U), \
716                                         _MM_FROUND_CUR_DIRECTION)
717
718#define _mm512_maskz_range_ps(U, A, B, C) \
719  (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
720                                         (__v16sf)(__m512)(B), (int)(C), \
721                                         (__v16sf)_mm512_setzero_ps(), \
722                                         (__mmask16)(U), \
723                                         _MM_FROUND_CUR_DIRECTION)
724
725#define _mm512_range_round_ps(A, B, C, R) \
726  (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
727                                         (__v16sf)(__m512)(B), (int)(C), \
728                                         (__v16sf)_mm512_setzero_ps(), \
729                                         (__mmask16)-1, (int)(R))
730
731#define _mm512_mask_range_round_ps(W, U, A, B, C, R) \
732  (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
733                                         (__v16sf)(__m512)(B), (int)(C), \
734                                         (__v16sf)(__m512)(W), (__mmask16)(U), \
735                                         (int)(R))
736
737#define _mm512_maskz_range_round_ps(U, A, B, C, R) \
738  (__m512)__builtin_ia32_rangeps512_mask((__v16sf)(__m512)(A), \
739                                         (__v16sf)(__m512)(B), (int)(C), \
740                                         (__v16sf)_mm512_setzero_ps(), \
741                                         (__mmask16)(U), (int)(R))
742
743#define _mm_range_round_ss(A, B, C, R) \
744  (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
745                                               (__v4sf)(__m128)(B), \
746                                               (__v4sf)_mm_setzero_ps(), \
747                                               (__mmask8) -1, (int)(C),\
748                                               (int)(R))
749
750#define _mm_range_ss(A ,B , C) _mm_range_round_ss(A, B, C ,_MM_FROUND_CUR_DIRECTION)
751
752#define _mm_mask_range_round_ss(W, U, A, B, C, R) \
753  (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
754                                               (__v4sf)(__m128)(B), \
755                                               (__v4sf)(__m128)(W),\
756                                               (__mmask8)(U), (int)(C),\
757                                               (int)(R))
758
759#define _mm_mask_range_ss(W , U, A, B, C) _mm_mask_range_round_ss(W, U, A, B, C , _MM_FROUND_CUR_DIRECTION)
760
761#define _mm_maskz_range_round_ss(U, A, B, C, R) \
762  (__m128)__builtin_ia32_rangess128_round_mask((__v4sf)(__m128)(A), \
763                                               (__v4sf)(__m128)(B), \
764                                               (__v4sf)_mm_setzero_ps(), \
765                                               (__mmask8)(U), (int)(C),\
766                                               (int)(R))
767
768#define _mm_maskz_range_ss(U, A ,B , C) _mm_maskz_range_round_ss(U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
769
770#define _mm_range_round_sd(A, B, C, R) \
771  (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
772                                                (__v2df)(__m128d)(B), \
773                                                (__v2df)_mm_setzero_pd(), \
774                                                (__mmask8) -1, (int)(C),\
775                                                (int)(R))
776
777#define _mm_range_sd(A ,B , C) _mm_range_round_sd(A, B, C ,_MM_FROUND_CUR_DIRECTION)
778
779#define _mm_mask_range_round_sd(W, U, A, B, C, R) \
780  (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
781                                                (__v2df)(__m128d)(B), \
782                                                (__v2df)(__m128d)(W),\
783                                                (__mmask8)(U), (int)(C),\
784                                                (int)(R))
785
786#define _mm_mask_range_sd(W, U, A, B, C) _mm_mask_range_round_sd(W, U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
787
788#define _mm_maskz_range_round_sd(U, A, B, C, R) \
789  (__m128d)__builtin_ia32_rangesd128_round_mask((__v2df)(__m128d)(A), \
790                                                (__v2df)(__m128d)(B), \
791                                                (__v2df)_mm_setzero_pd(), \
792                                                (__mmask8)(U), (int)(C),\
793                                                (int)(R))
794
795#define _mm_maskz_range_sd(U, A, B, C) _mm_maskz_range_round_sd(U, A, B, C ,_MM_FROUND_CUR_DIRECTION)
796
797#define _mm512_reduce_pd(A, B) \
798  (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
799                                           (__v8df)_mm512_setzero_pd(), \
800                                           (__mmask8)-1, \
801                                           _MM_FROUND_CUR_DIRECTION)
802
803#define _mm512_mask_reduce_pd(W, U, A, B) \
804  (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
805                                           (__v8df)(__m512d)(W), \
806                                           (__mmask8)(U), \
807                                           _MM_FROUND_CUR_DIRECTION)
808
809#define _mm512_maskz_reduce_pd(U, A, B) \
810  (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
811                                           (__v8df)_mm512_setzero_pd(), \
812                                           (__mmask8)(U), \
813                                           _MM_FROUND_CUR_DIRECTION)
814
815#define _mm512_reduce_ps(A, B) \
816  (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
817                                          (__v16sf)_mm512_setzero_ps(), \
818                                          (__mmask16)-1, \
819                                          _MM_FROUND_CUR_DIRECTION)
820
821#define _mm512_mask_reduce_ps(W, U, A, B) \
822  (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
823                                          (__v16sf)(__m512)(W), \
824                                          (__mmask16)(U), \
825                                          _MM_FROUND_CUR_DIRECTION)
826
827#define _mm512_maskz_reduce_ps(U, A, B) \
828  (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
829                                          (__v16sf)_mm512_setzero_ps(), \
830                                          (__mmask16)(U), \
831                                          _MM_FROUND_CUR_DIRECTION)
832
833#define _mm512_reduce_round_pd(A, B, R) \
834  (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
835                                           (__v8df)_mm512_setzero_pd(), \
836                                           (__mmask8)-1, (int)(R))
837
838#define _mm512_mask_reduce_round_pd(W, U, A, B, R) \
839  (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
840                                           (__v8df)(__m512d)(W), \
841                                           (__mmask8)(U), (int)(R))
842
843#define _mm512_maskz_reduce_round_pd(U, A, B, R) \
844  (__m512d)__builtin_ia32_reducepd512_mask((__v8df)(__m512d)(A), (int)(B), \
845                                           (__v8df)_mm512_setzero_pd(), \
846                                           (__mmask8)(U), (int)(R))
847
848#define _mm512_reduce_round_ps(A, B, R) \
849  (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
850                                          (__v16sf)_mm512_setzero_ps(), \
851                                          (__mmask16)-1, (int)(R))
852
853#define _mm512_mask_reduce_round_ps(W, U, A, B, R) \
854  (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
855                                          (__v16sf)(__m512)(W), \
856                                          (__mmask16)(U), (int)(R))
857
858#define _mm512_maskz_reduce_round_ps(U, A, B, R) \
859  (__m512)__builtin_ia32_reduceps512_mask((__v16sf)(__m512)(A), (int)(B), \
860                                          (__v16sf)_mm512_setzero_ps(), \
861                                          (__mmask16)(U), (int)(R))
862
863#define _mm_reduce_ss(A, B, C) \
864  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
865                                       (__v4sf)(__m128)(B), \
866                                       (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \
867                                       (int)(C), _MM_FROUND_CUR_DIRECTION)
868
869#define _mm_mask_reduce_ss(W, U, A, B, C) \
870  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
871                                       (__v4sf)(__m128)(B), \
872                                       (__v4sf)(__m128)(W), (__mmask8)(U), \
873                                       (int)(C), _MM_FROUND_CUR_DIRECTION)
874
875#define _mm_maskz_reduce_ss(U, A, B, C) \
876  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
877                                       (__v4sf)(__m128)(B), \
878                                       (__v4sf)_mm_setzero_ps(), \
879                                       (__mmask8)(U), (int)(C), \
880                                       _MM_FROUND_CUR_DIRECTION)
881
882#define _mm_reduce_round_ss(A, B, C, R) \
883  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
884                                       (__v4sf)(__m128)(B), \
885                                       (__v4sf)_mm_setzero_ps(), (__mmask8)-1, \
886                                       (int)(C), (int)(R))
887
888#define _mm_mask_reduce_round_ss(W, U, A, B, C, R) \
889  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
890                                       (__v4sf)(__m128)(B), \
891                                       (__v4sf)(__m128)(W), (__mmask8)(U), \
892                                       (int)(C), (int)(R))
893
894#define _mm_maskz_reduce_round_ss(U, A, B, C, R) \
895  (__m128)__builtin_ia32_reducess_mask((__v4sf)(__m128)(A), \
896                                       (__v4sf)(__m128)(B), \
897                                       (__v4sf)_mm_setzero_ps(), \
898                                       (__mmask8)(U), (int)(C), (int)(R))
899
900#define _mm_reduce_sd(A, B, C) \
901  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
902                                        (__v2df)(__m128d)(B), \
903                                        (__v2df)_mm_setzero_pd(), \
904                                        (__mmask8)-1, (int)(C), \
905                                        _MM_FROUND_CUR_DIRECTION)
906
907#define _mm_mask_reduce_sd(W, U, A, B, C) \
908  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
909                                        (__v2df)(__m128d)(B), \
910                                        (__v2df)(__m128d)(W), (__mmask8)(U), \
911                                        (int)(C), _MM_FROUND_CUR_DIRECTION)
912
913#define _mm_maskz_reduce_sd(U, A, B, C) \
914  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
915                                        (__v2df)(__m128d)(B), \
916                                        (__v2df)_mm_setzero_pd(), \
917                                        (__mmask8)(U), (int)(C), \
918                                        _MM_FROUND_CUR_DIRECTION)
919
920#define _mm_reduce_round_sd(A, B, C, R) \
921  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
922                                        (__v2df)(__m128d)(B), \
923                                        (__v2df)_mm_setzero_pd(), \
924                                        (__mmask8)-1, (int)(C), (int)(R))
925
926#define _mm_mask_reduce_round_sd(W, U, A, B, C, R) \
927  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
928                                        (__v2df)(__m128d)(B), \
929                                        (__v2df)(__m128d)(W), (__mmask8)(U), \
930                                        (int)(C), (int)(R))
931
932#define _mm_maskz_reduce_round_sd(U, A, B, C, R) \
933  (__m128d)__builtin_ia32_reducesd_mask((__v2df)(__m128d)(A), \
934                                        (__v2df)(__m128d)(B), \
935                                        (__v2df)_mm_setzero_pd(), \
936                                        (__mmask8)(U), (int)(C), (int)(R))
937
938static __inline__ __mmask16 __DEFAULT_FN_ATTRS
939_mm512_movepi32_mask (__m512i __A)
940{
941  return (__mmask16) __builtin_ia32_cvtd2mask512 ((__v16si) __A);
942}
943
944static __inline__ __m512i __DEFAULT_FN_ATTRS
945_mm512_movm_epi32 (__mmask16 __A)
946{
947  return (__m512i) __builtin_ia32_cvtmask2d512 (__A);
948}
949
950static __inline__ __m512i __DEFAULT_FN_ATTRS
951_mm512_movm_epi64 (__mmask8 __A)
952{
953  return (__m512i) __builtin_ia32_cvtmask2q512 (__A);
954}
955
956static __inline__ __mmask8 __DEFAULT_FN_ATTRS
957_mm512_movepi64_mask (__m512i __A)
958{
959  return (__mmask8) __builtin_ia32_cvtq2mask512 ((__v8di) __A);
960}
961
962
963static __inline__ __m512 __DEFAULT_FN_ATTRS
964_mm512_broadcast_f32x2 (__m128 __A)
965{
966  return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
967                                         0, 1, 0, 1, 0, 1, 0, 1,
968                                         0, 1, 0, 1, 0, 1, 0, 1);
969}
970
971static __inline__ __m512 __DEFAULT_FN_ATTRS
972_mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A)
973{
974  return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
975                                             (__v16sf)_mm512_broadcast_f32x2(__A),
976                                             (__v16sf)__O);
977}
978
979static __inline__ __m512 __DEFAULT_FN_ATTRS
980_mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A)
981{
982  return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
983                                             (__v16sf)_mm512_broadcast_f32x2(__A),
984                                             (__v16sf)_mm512_setzero_ps());
985}
986
987static __inline__ __m512 __DEFAULT_FN_ATTRS
988_mm512_broadcast_f32x8(__m256 __A)
989{
990  return (__m512)__builtin_shufflevector((__v8sf)__A, (__v8sf)__A,
991                                         0, 1, 2, 3, 4, 5, 6, 7,
992                                         0, 1, 2, 3, 4, 5, 6, 7);
993}
994
995static __inline__ __m512 __DEFAULT_FN_ATTRS
996_mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A)
997{
998  return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
999                                           (__v16sf)_mm512_broadcast_f32x8(__A),
1000                                           (__v16sf)__O);
1001}
1002
1003static __inline__ __m512 __DEFAULT_FN_ATTRS
1004_mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A)
1005{
1006  return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
1007                                           (__v16sf)_mm512_broadcast_f32x8(__A),
1008                                           (__v16sf)_mm512_setzero_ps());
1009}
1010
1011static __inline__ __m512d __DEFAULT_FN_ATTRS
1012_mm512_broadcast_f64x2(__m128d __A)
1013{
1014  return (__m512d)__builtin_shufflevector((__v2df)__A, (__v2df)__A,
1015                                          0, 1, 0, 1, 0, 1, 0, 1);
1016}
1017
1018static __inline__ __m512d __DEFAULT_FN_ATTRS
1019_mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A)
1020{
1021  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
1022                                            (__v8df)_mm512_broadcast_f64x2(__A),
1023                                            (__v8df)__O);
1024}
1025
1026static __inline__ __m512d __DEFAULT_FN_ATTRS
1027_mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A)
1028{
1029  return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
1030                                            (__v8df)_mm512_broadcast_f64x2(__A),
1031                                            (__v8df)_mm512_setzero_pd());
1032}
1033
1034static __inline__ __m512i __DEFAULT_FN_ATTRS
1035_mm512_broadcast_i32x2 (__m128i __A)
1036{
1037  return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
1038                                          0, 1, 0, 1, 0, 1, 0, 1,
1039                                          0, 1, 0, 1, 0, 1, 0, 1);
1040}
1041
1042static __inline__ __m512i __DEFAULT_FN_ATTRS
1043_mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A)
1044{
1045  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1046                                             (__v16si)_mm512_broadcast_i32x2(__A),
1047                                             (__v16si)__O);
1048}
1049
1050static __inline__ __m512i __DEFAULT_FN_ATTRS
1051_mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A)
1052{
1053  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1054                                             (__v16si)_mm512_broadcast_i32x2(__A),
1055                                             (__v16si)_mm512_setzero_si512());
1056}
1057
1058static __inline__ __m512i __DEFAULT_FN_ATTRS
1059_mm512_broadcast_i32x8(__m256i __A)
1060{
1061  return (__m512i)__builtin_shufflevector((__v8si)__A, (__v8si)__A,
1062                                          0, 1, 2, 3, 4, 5, 6, 7,
1063                                          0, 1, 2, 3, 4, 5, 6, 7);
1064}
1065
1066static __inline__ __m512i __DEFAULT_FN_ATTRS
1067_mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A)
1068{
1069  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1070                                           (__v16si)_mm512_broadcast_i32x8(__A),
1071                                           (__v16si)__O);
1072}
1073
1074static __inline__ __m512i __DEFAULT_FN_ATTRS
1075_mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A)
1076{
1077  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
1078                                           (__v16si)_mm512_broadcast_i32x8(__A),
1079                                           (__v16si)_mm512_setzero_si512());
1080}
1081
1082static __inline__ __m512i __DEFAULT_FN_ATTRS
1083_mm512_broadcast_i64x2(__m128i __A)
1084{
1085  return (__m512i)__builtin_shufflevector((__v2di)__A, (__v2di)__A,
1086                                          0, 1, 0, 1, 0, 1, 0, 1);
1087}
1088
1089static __inline__ __m512i __DEFAULT_FN_ATTRS
1090_mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A)
1091{
1092  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1093                                            (__v8di)_mm512_broadcast_i64x2(__A),
1094                                            (__v8di)__O);
1095}
1096
1097static __inline__ __m512i __DEFAULT_FN_ATTRS
1098_mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A)
1099{
1100  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
1101                                            (__v8di)_mm512_broadcast_i64x2(__A),
1102                                            (__v8di)_mm512_setzero_si512());
1103}
1104
1105#define _mm512_extractf32x8_ps(A, imm) \
1106  (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
1107                                           (__v8sf)_mm256_undefined_ps(), \
1108                                           (__mmask8)-1)
1109
1110#define _mm512_mask_extractf32x8_ps(W, U, A, imm) \
1111  (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
1112                                           (__v8sf)(__m256)(W), \
1113                                           (__mmask8)(U))
1114
1115#define _mm512_maskz_extractf32x8_ps(U, A, imm) \
1116  (__m256)__builtin_ia32_extractf32x8_mask((__v16sf)(__m512)(A), (int)(imm), \
1117                                           (__v8sf)_mm256_setzero_ps(), \
1118                                           (__mmask8)(U))
1119
1120#define _mm512_extractf64x2_pd(A, imm) \
1121  (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
1122                                                (int)(imm), \
1123                                                (__v2df)_mm_undefined_pd(), \
1124                                                (__mmask8)-1)
1125
1126#define _mm512_mask_extractf64x2_pd(W, U, A, imm) \
1127  (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
1128                                                (int)(imm), \
1129                                                (__v2df)(__m128d)(W), \
1130                                                (__mmask8)(U))
1131
1132#define _mm512_maskz_extractf64x2_pd(U, A, imm) \
1133  (__m128d)__builtin_ia32_extractf64x2_512_mask((__v8df)(__m512d)(A), \
1134                                                (int)(imm), \
1135                                                (__v2df)_mm_setzero_pd(), \
1136                                                (__mmask8)(U))
1137
1138#define _mm512_extracti32x8_epi32(A, imm) \
1139  (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
1140                                            (__v8si)_mm256_undefined_si256(), \
1141                                            (__mmask8)-1)
1142
1143#define _mm512_mask_extracti32x8_epi32(W, U, A, imm) \
1144  (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
1145                                            (__v8si)(__m256i)(W), \
1146                                            (__mmask8)(U))
1147
1148#define _mm512_maskz_extracti32x8_epi32(U, A, imm) \
1149  (__m256i)__builtin_ia32_extracti32x8_mask((__v16si)(__m512i)(A), (int)(imm), \
1150                                            (__v8si)_mm256_setzero_si256(), \
1151                                            (__mmask8)(U))
1152
1153#define _mm512_extracti64x2_epi64(A, imm) \
1154  (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
1155                                                (int)(imm), \
1156                                                (__v2di)_mm_undefined_si128(), \
1157                                                (__mmask8)-1)
1158
1159#define _mm512_mask_extracti64x2_epi64(W, U, A, imm) \
1160  (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
1161                                                (int)(imm), \
1162                                                (__v2di)(__m128i)(W), \
1163                                                (__mmask8)(U))
1164
1165#define _mm512_maskz_extracti64x2_epi64(U, A, imm) \
1166  (__m128i)__builtin_ia32_extracti64x2_512_mask((__v8di)(__m512i)(A), \
1167                                                (int)(imm), \
1168                                                (__v2di)_mm_setzero_si128(), \
1169                                                (__mmask8)(U))
1170
1171#define _mm512_insertf32x8(A, B, imm) \
1172  (__m512)__builtin_ia32_insertf32x8((__v16sf)(__m512)(A), \
1173                                     (__v8sf)(__m256)(B), (int)(imm))
1174
1175#define _mm512_mask_insertf32x8(W, U, A, B, imm) \
1176  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1177                                 (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \
1178                                 (__v16sf)(__m512)(W))
1179
1180#define _mm512_maskz_insertf32x8(U, A, B, imm) \
1181  (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \
1182                                 (__v16sf)_mm512_insertf32x8((A), (B), (imm)), \
1183                                 (__v16sf)_mm512_setzero_ps())
1184
1185#define _mm512_insertf64x2(A, B, imm) \
1186  (__m512d)__builtin_ia32_insertf64x2_512((__v8df)(__m512d)(A), \
1187                                          (__v2df)(__m128d)(B), (int)(imm))
1188
1189#define _mm512_mask_insertf64x2(W, U, A, B, imm) \
1190  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1191                                  (__v8df)_mm512_insertf64x2((A), (B), (imm)), \
1192                                  (__v8df)(__m512d)(W))
1193
1194#define _mm512_maskz_insertf64x2(U, A, B, imm) \
1195  (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \
1196                                  (__v8df)_mm512_insertf64x2((A), (B), (imm)), \
1197                                  (__v8df)_mm512_setzero_pd())
1198
1199#define _mm512_inserti32x8(A, B, imm) \
1200  (__m512i)__builtin_ia32_inserti32x8((__v16si)(__m512i)(A), \
1201                                      (__v8si)(__m256i)(B), (int)(imm))
1202
1203#define _mm512_mask_inserti32x8(W, U, A, B, imm) \
1204  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
1205                                 (__v16si)_mm512_inserti32x8((A), (B), (imm)), \
1206                                 (__v16si)(__m512i)(W))
1207
1208#define _mm512_maskz_inserti32x8(U, A, B, imm) \
1209  (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
1210                                 (__v16si)_mm512_inserti32x8((A), (B), (imm)), \
1211                                 (__v16si)_mm512_setzero_si512())
1212
1213#define _mm512_inserti64x2(A, B, imm) \
1214  (__m512i)__builtin_ia32_inserti64x2_512((__v8di)(__m512i)(A), \
1215                                          (__v2di)(__m128i)(B), (int)(imm))
1216
1217#define _mm512_mask_inserti64x2(W, U, A, B, imm) \
1218  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
1219                                  (__v8di)_mm512_inserti64x2((A), (B), (imm)), \
1220                                  (__v8di)(__m512i)(W))
1221
1222#define _mm512_maskz_inserti64x2(U, A, B, imm) \
1223  (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
1224                                  (__v8di)_mm512_inserti64x2((A), (B), (imm)), \
1225                                  (__v8di)_mm512_setzero_si512())
1226
1227#define _mm512_mask_fpclass_ps_mask(U, A, imm) \
1228  (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \
1229                                              (int)(imm), (__mmask16)(U))
1230
1231#define _mm512_fpclass_ps_mask(A, imm) \
1232  (__mmask16)__builtin_ia32_fpclassps512_mask((__v16sf)(__m512)(A), \
1233                                              (int)(imm), (__mmask16)-1)
1234
1235#define _mm512_mask_fpclass_pd_mask(U, A, imm) \
1236  (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \
1237                                             (__mmask8)(U))
1238
1239#define _mm512_fpclass_pd_mask(A, imm) \
1240  (__mmask8)__builtin_ia32_fpclasspd512_mask((__v8df)(__m512d)(A), (int)(imm), \
1241                                             (__mmask8)-1)
1242
1243#define _mm_fpclass_sd_mask(A, imm) \
1244  (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \
1245                                          (__mmask8)-1)
1246
1247#define _mm_mask_fpclass_sd_mask(U, A, imm) \
1248  (__mmask8)__builtin_ia32_fpclasssd_mask((__v2df)(__m128d)(A), (int)(imm), \
1249                                          (__mmask8)(U))
1250
1251#define _mm_fpclass_ss_mask(A, imm) \
1252  (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \
1253                                          (__mmask8)-1)
1254
1255#define _mm_mask_fpclass_ss_mask(U, A, imm) \
1256  (__mmask8)__builtin_ia32_fpclassss_mask((__v4sf)(__m128)(A), (int)(imm), \
1257                                          (__mmask8)(U))
1258
1259#undef __DEFAULT_FN_ATTRS
1260
1261#endif
1262