avx512vldqintrin.h revision 284734
1283627Sdim/*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ---------------------------===
2283627Sdim *
3283627Sdim * Permission is hereby granted, free of charge, to any person obtaining a copy
4283627Sdim * of this software and associated documentation files (the "Software"), to deal
5283627Sdim * in the Software without restriction, including without limitation the rights
6283627Sdim * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7283627Sdim * copies of the Software, and to permit persons to whom the Software is
8283627Sdim * furnished to do so, subject to the following conditions:
9283627Sdim *
10283627Sdim * The above copyright notice and this permission notice shall be included in
11283627Sdim * all copies or substantial portions of the Software.
12283627Sdim *
13283627Sdim * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14283627Sdim * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15283627Sdim * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16283627Sdim * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17283627Sdim * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18283627Sdim * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19283627Sdim * THE SOFTWARE.
20283627Sdim *
21283627Sdim *===-----------------------------------------------------------------------===
22283627Sdim */
23283627Sdim
24283627Sdim#ifndef __IMMINTRIN_H
25283627Sdim#error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead."
26283627Sdim#endif
27283627Sdim
28283627Sdim#ifndef __AVX512VLDQINTRIN_H
29283627Sdim#define __AVX512VLDQINTRIN_H
30283627Sdim
31284734Sdim/* Define the default attributes for the functions in this file. */
32284734Sdim#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq")))
33283627Sdim
34284734Sdimstatic __inline__ __m256i DEFAULT_FN_ATTRS
35283627Sdim_mm256_mullo_epi64 (__m256i __A, __m256i __B) {
36283627Sdim  return (__m256i) ((__v4di) __A * (__v4di) __B);
37283627Sdim}
38283627Sdim
39284734Sdimstatic __inline__ __m256i DEFAULT_FN_ATTRS
40283627Sdim_mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
41283627Sdim  return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
42283627Sdim              (__v4di) __B,
43283627Sdim              (__v4di) __W,
44283627Sdim              (__mmask8) __U);
45283627Sdim}
46283627Sdim
47284734Sdimstatic __inline__ __m256i DEFAULT_FN_ATTRS
48283627Sdim_mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B) {
49283627Sdim  return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
50283627Sdim              (__v4di) __B,
51283627Sdim              (__v4di)
52283627Sdim              _mm256_setzero_si256 (),
53283627Sdim              (__mmask8) __U);
54283627Sdim}
55283627Sdim
56284734Sdimstatic __inline__ __m128i DEFAULT_FN_ATTRS
57283627Sdim_mm_mullo_epi64 (__m128i __A, __m128i __B) {
58283627Sdim  return (__m128i) ((__v2di) __A * (__v2di) __B);
59283627Sdim}
60283627Sdim
61284734Sdimstatic __inline__ __m128i DEFAULT_FN_ATTRS
62283627Sdim_mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
63283627Sdim  return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
64283627Sdim              (__v2di) __B,
65283627Sdim              (__v2di) __W,
66283627Sdim              (__mmask8) __U);
67283627Sdim}
68283627Sdim
69284734Sdimstatic __inline__ __m128i DEFAULT_FN_ATTRS
70283627Sdim_mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) {
71283627Sdim  return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
72283627Sdim              (__v2di) __B,
73283627Sdim              (__v2di)
74283627Sdim              _mm_setzero_si128 (),
75283627Sdim              (__mmask8) __U);
76283627Sdim}
77283627Sdim
78284734Sdimstatic __inline__ __m256d DEFAULT_FN_ATTRS
79283627Sdim_mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
80283627Sdim  return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
81283627Sdim              (__v4df) __B,
82283627Sdim              (__v4df) __W,
83283627Sdim              (__mmask8) __U);
84283627Sdim}
85283627Sdim
86284734Sdimstatic __inline__ __m256d DEFAULT_FN_ATTRS
87283627Sdim_mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) {
88283627Sdim  return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
89283627Sdim              (__v4df) __B,
90283627Sdim              (__v4df)
91283627Sdim              _mm256_setzero_pd (),
92283627Sdim              (__mmask8) __U);
93283627Sdim}
94283627Sdim
95284734Sdimstatic __inline__ __m128d DEFAULT_FN_ATTRS
96283627Sdim_mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
97283627Sdim  return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
98283627Sdim              (__v2df) __B,
99283627Sdim              (__v2df) __W,
100283627Sdim              (__mmask8) __U);
101283627Sdim}
102283627Sdim
103284734Sdimstatic __inline__ __m128d DEFAULT_FN_ATTRS
104283627Sdim_mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) {
105283627Sdim  return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
106283627Sdim              (__v2df) __B,
107283627Sdim              (__v2df)
108283627Sdim              _mm_setzero_pd (),
109283627Sdim              (__mmask8) __U);
110283627Sdim}
111283627Sdim
112284734Sdimstatic __inline__ __m256 DEFAULT_FN_ATTRS
113283627Sdim_mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
114283627Sdim  return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
115283627Sdim             (__v8sf) __B,
116283627Sdim             (__v8sf) __W,
117283627Sdim             (__mmask8) __U);
118283627Sdim}
119283627Sdim
120284734Sdimstatic __inline__ __m256 DEFAULT_FN_ATTRS
121283627Sdim_mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) {
122283627Sdim  return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
123283627Sdim             (__v8sf) __B,
124283627Sdim             (__v8sf)
125283627Sdim             _mm256_setzero_ps (),
126283627Sdim             (__mmask8) __U);
127283627Sdim}
128283627Sdim
129284734Sdimstatic __inline__ __m128 DEFAULT_FN_ATTRS
130283627Sdim_mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
131283627Sdim  return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
132283627Sdim             (__v4sf) __B,
133283627Sdim             (__v4sf) __W,
134283627Sdim             (__mmask8) __U);
135283627Sdim}
136283627Sdim
137284734Sdimstatic __inline__ __m128 DEFAULT_FN_ATTRS
138283627Sdim_mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) {
139283627Sdim  return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
140283627Sdim             (__v4sf) __B,
141283627Sdim             (__v4sf)
142283627Sdim             _mm_setzero_ps (),
143283627Sdim             (__mmask8) __U);
144283627Sdim}
145283627Sdim
146284734Sdimstatic __inline__ __m256d DEFAULT_FN_ATTRS
147283627Sdim_mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
148283627Sdim  return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
149283627Sdim             (__v4df) __B,
150283627Sdim             (__v4df) __W,
151283627Sdim             (__mmask8) __U);
152283627Sdim}
153283627Sdim
154284734Sdimstatic __inline__ __m256d DEFAULT_FN_ATTRS
155283627Sdim_mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B) {
156283627Sdim  return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
157283627Sdim             (__v4df) __B,
158283627Sdim             (__v4df)
159283627Sdim             _mm256_setzero_pd (),
160283627Sdim             (__mmask8) __U);
161283627Sdim}
162283627Sdim
163284734Sdimstatic __inline__ __m128d DEFAULT_FN_ATTRS
164283627Sdim_mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
165283627Sdim  return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
166283627Sdim             (__v2df) __B,
167283627Sdim             (__v2df) __W,
168283627Sdim             (__mmask8) __U);
169283627Sdim}
170283627Sdim
171284734Sdimstatic __inline__ __m128d DEFAULT_FN_ATTRS
172283627Sdim_mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B) {
173283627Sdim  return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
174283627Sdim             (__v2df) __B,
175283627Sdim             (__v2df)
176283627Sdim             _mm_setzero_pd (),
177283627Sdim             (__mmask8) __U);
178283627Sdim}
179283627Sdim
180284734Sdimstatic __inline__ __m256 DEFAULT_FN_ATTRS
181283627Sdim_mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
182283627Sdim  return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
183283627Sdim            (__v8sf) __B,
184283627Sdim            (__v8sf) __W,
185283627Sdim            (__mmask8) __U);
186283627Sdim}
187283627Sdim
188284734Sdimstatic __inline__ __m256 DEFAULT_FN_ATTRS
189283627Sdim_mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B) {
190283627Sdim  return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
191283627Sdim            (__v8sf) __B,
192283627Sdim            (__v8sf)
193283627Sdim            _mm256_setzero_ps (),
194283627Sdim            (__mmask8) __U);
195283627Sdim}
196283627Sdim
197284734Sdimstatic __inline__ __m128 DEFAULT_FN_ATTRS
198283627Sdim_mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
199283627Sdim  return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
200283627Sdim            (__v4sf) __B,
201283627Sdim            (__v4sf) __W,
202283627Sdim            (__mmask8) __U);
203283627Sdim}
204283627Sdim
205284734Sdimstatic __inline__ __m128 DEFAULT_FN_ATTRS
206283627Sdim_mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B) {
207283627Sdim  return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
208283627Sdim            (__v4sf) __B,
209283627Sdim            (__v4sf)
210283627Sdim            _mm_setzero_ps (),
211283627Sdim            (__mmask8) __U);
212283627Sdim}
213283627Sdim
214284734Sdimstatic __inline__ __m256d DEFAULT_FN_ATTRS
215283627Sdim_mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A,
216283627Sdim        __m256d __B) {
217283627Sdim  return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
218283627Sdim             (__v4df) __B,
219283627Sdim             (__v4df) __W,
220283627Sdim             (__mmask8) __U);
221283627Sdim}
222283627Sdim
223284734Sdimstatic __inline__ __m256d DEFAULT_FN_ATTRS
224283627Sdim_mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B) {
225283627Sdim  return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
226283627Sdim             (__v4df) __B,
227283627Sdim             (__v4df)
228283627Sdim             _mm256_setzero_pd (),
229283627Sdim             (__mmask8) __U);
230283627Sdim}
231283627Sdim
232284734Sdimstatic __inline__ __m128d DEFAULT_FN_ATTRS
233283627Sdim_mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
234283627Sdim  return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
235283627Sdim             (__v2df) __B,
236283627Sdim             (__v2df) __W,
237283627Sdim             (__mmask8) __U);
238283627Sdim}
239283627Sdim
240284734Sdimstatic __inline__ __m128d DEFAULT_FN_ATTRS
241283627Sdim_mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) {
242283627Sdim  return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
243283627Sdim             (__v2df) __B,
244283627Sdim             (__v2df)
245283627Sdim             _mm_setzero_pd (),
246283627Sdim             (__mmask8) __U);
247283627Sdim}
248283627Sdim
249284734Sdimstatic __inline__ __m256 DEFAULT_FN_ATTRS
250283627Sdim_mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
251283627Sdim  return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
252283627Sdim            (__v8sf) __B,
253283627Sdim            (__v8sf) __W,
254283627Sdim            (__mmask8) __U);
255283627Sdim}
256283627Sdim
257284734Sdimstatic __inline__ __m256 DEFAULT_FN_ATTRS
258283627Sdim_mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B) {
259283627Sdim  return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
260283627Sdim            (__v8sf) __B,
261283627Sdim            (__v8sf)
262283627Sdim            _mm256_setzero_ps (),
263283627Sdim            (__mmask8) __U);
264283627Sdim}
265283627Sdim
266284734Sdimstatic __inline__ __m128 DEFAULT_FN_ATTRS
267283627Sdim_mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
268283627Sdim  return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
269283627Sdim            (__v4sf) __B,
270283627Sdim            (__v4sf) __W,
271283627Sdim            (__mmask8) __U);
272283627Sdim}
273283627Sdim
274284734Sdimstatic __inline__ __m128 DEFAULT_FN_ATTRS
275283627Sdim_mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B) {
276283627Sdim  return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
277283627Sdim            (__v4sf) __B,
278283627Sdim            (__v4sf)
279283627Sdim            _mm_setzero_ps (),
280283627Sdim            (__mmask8) __U);
281283627Sdim}
282283627Sdim
283284734Sdimstatic __inline__ __m256d DEFAULT_FN_ATTRS
284283627Sdim_mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
285283627Sdim  return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
286283627Sdim            (__v4df) __B,
287283627Sdim            (__v4df) __W,
288283627Sdim            (__mmask8) __U);
289283627Sdim}
290283627Sdim
291284734Sdimstatic __inline__ __m256d DEFAULT_FN_ATTRS
292283627Sdim_mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B) {
293283627Sdim  return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
294283627Sdim            (__v4df) __B,
295283627Sdim            (__v4df)
296283627Sdim            _mm256_setzero_pd (),
297283627Sdim            (__mmask8) __U);
298283627Sdim}
299283627Sdim
300284734Sdimstatic __inline__ __m128d DEFAULT_FN_ATTRS
301283627Sdim_mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
302283627Sdim  return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
303283627Sdim            (__v2df) __B,
304283627Sdim            (__v2df) __W,
305283627Sdim            (__mmask8) __U);
306283627Sdim}
307283627Sdim
308284734Sdimstatic __inline__ __m128d DEFAULT_FN_ATTRS
309283627Sdim_mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B) {
310283627Sdim  return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
311283627Sdim            (__v2df) __B,
312283627Sdim            (__v2df)
313283627Sdim            _mm_setzero_pd (),
314283627Sdim            (__mmask8) __U);
315283627Sdim}
316283627Sdim
317284734Sdimstatic __inline__ __m256 DEFAULT_FN_ATTRS
318283627Sdim_mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
319283627Sdim  return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
320283627Sdim                 (__v8sf) __B,
321283627Sdim                 (__v8sf) __W,
322283627Sdim                 (__mmask8) __U);
323283627Sdim}
324283627Sdim
325284734Sdimstatic __inline__ __m256 DEFAULT_FN_ATTRS
326283627Sdim_mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B) {
327283627Sdim  return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
328283627Sdim                 (__v8sf) __B,
329283627Sdim                 (__v8sf)
330283627Sdim                 _mm256_setzero_ps (),
331283627Sdim                 (__mmask8) __U);
332283627Sdim}
333283627Sdim
334284734Sdimstatic __inline__ __m128 DEFAULT_FN_ATTRS
335283627Sdim_mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
336283627Sdim  return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
337283627Sdim                 (__v4sf) __B,
338283627Sdim                 (__v4sf) __W,
339283627Sdim                 (__mmask8) __U);
340283627Sdim}
341283627Sdim
342284734Sdimstatic __inline__ __m128 DEFAULT_FN_ATTRS
343283627Sdim_mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) {
344283627Sdim  return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
345283627Sdim                 (__v4sf) __B,
346283627Sdim                 (__v4sf)
347283627Sdim                 _mm_setzero_ps (),
348283627Sdim                 (__mmask8) __U);
349283627Sdim}
350283627Sdim
351284734Sdim#undef DEFAULT_FN_ATTRS
352284734Sdim
353283627Sdim#endif
354