avx512vldqintrin.h revision 283627
1283627Sdim/*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ---------------------------===
2283627Sdim *
3283627Sdim * Permission is hereby granted, free of charge, to any person obtaining a copy
4283627Sdim * of this software and associated documentation files (the "Software"), to deal
5283627Sdim * in the Software without restriction, including without limitation the rights
6283627Sdim * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7283627Sdim * copies of the Software, and to permit persons to whom the Software is
8283627Sdim * furnished to do so, subject to the following conditions:
9283627Sdim *
10283627Sdim * The above copyright notice and this permission notice shall be included in
11283627Sdim * all copies or substantial portions of the Software.
12283627Sdim *
13283627Sdim * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14283627Sdim * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15283627Sdim * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16283627Sdim * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17283627Sdim * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18283627Sdim * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19283627Sdim * THE SOFTWARE.
20283627Sdim *
21283627Sdim *===-----------------------------------------------------------------------===
22283627Sdim */
23283627Sdim
24283627Sdim#ifndef __IMMINTRIN_H
25283627Sdim#error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead."
26283627Sdim#endif
27283627Sdim
28283627Sdim#ifndef __AVX512VLDQINTRIN_H
29283627Sdim#define __AVX512VLDQINTRIN_H
30283627Sdim
31283627Sdim
32283627Sdimstatic __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
33283627Sdim_mm256_mullo_epi64 (__m256i __A, __m256i __B) {
34283627Sdim  return (__m256i) ((__v4di) __A * (__v4di) __B);
35283627Sdim}
36283627Sdim
37283627Sdimstatic __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
38283627Sdim_mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
39283627Sdim  return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
40283627Sdim              (__v4di) __B,
41283627Sdim              (__v4di) __W,
42283627Sdim              (__mmask8) __U);
43283627Sdim}
44283627Sdim
45283627Sdimstatic __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__))
46283627Sdim_mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B) {
47283627Sdim  return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
48283627Sdim              (__v4di) __B,
49283627Sdim              (__v4di)
50283627Sdim              _mm256_setzero_si256 (),
51283627Sdim              (__mmask8) __U);
52283627Sdim}
53283627Sdim
54283627Sdimstatic __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
55283627Sdim_mm_mullo_epi64 (__m128i __A, __m128i __B) {
56283627Sdim  return (__m128i) ((__v2di) __A * (__v2di) __B);
57283627Sdim}
58283627Sdim
59283627Sdimstatic __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
60283627Sdim_mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
61283627Sdim  return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
62283627Sdim              (__v2di) __B,
63283627Sdim              (__v2di) __W,
64283627Sdim              (__mmask8) __U);
65283627Sdim}
66283627Sdim
67283627Sdimstatic __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__))
68283627Sdim_mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) {
69283627Sdim  return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
70283627Sdim              (__v2di) __B,
71283627Sdim              (__v2di)
72283627Sdim              _mm_setzero_si128 (),
73283627Sdim              (__mmask8) __U);
74283627Sdim}
75283627Sdim
76283627Sdimstatic __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__))
77283627Sdim_mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
78283627Sdim  return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
79283627Sdim              (__v4df) __B,
80283627Sdim              (__v4df) __W,
81283627Sdim              (__mmask8) __U);
82283627Sdim}
83283627Sdim
84283627Sdimstatic __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__))
85283627Sdim_mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) {
86283627Sdim  return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
87283627Sdim              (__v4df) __B,
88283627Sdim              (__v4df)
89283627Sdim              _mm256_setzero_pd (),
90283627Sdim              (__mmask8) __U);
91283627Sdim}
92283627Sdim
93283627Sdimstatic __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__))
94283627Sdim_mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
95283627Sdim  return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
96283627Sdim              (__v2df) __B,
97283627Sdim              (__v2df) __W,
98283627Sdim              (__mmask8) __U);
99283627Sdim}
100283627Sdim
101283627Sdimstatic __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__))
102283627Sdim_mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) {
103283627Sdim  return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
104283627Sdim              (__v2df) __B,
105283627Sdim              (__v2df)
106283627Sdim              _mm_setzero_pd (),
107283627Sdim              (__mmask8) __U);
108283627Sdim}
109283627Sdim
110283627Sdimstatic __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__))
111283627Sdim_mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
112283627Sdim  return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
113283627Sdim             (__v8sf) __B,
114283627Sdim             (__v8sf) __W,
115283627Sdim             (__mmask8) __U);
116283627Sdim}
117283627Sdim
118283627Sdimstatic __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__))
119283627Sdim_mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) {
120283627Sdim  return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
121283627Sdim             (__v8sf) __B,
122283627Sdim             (__v8sf)
123283627Sdim             _mm256_setzero_ps (),
124283627Sdim             (__mmask8) __U);
125283627Sdim}
126283627Sdim
127283627Sdimstatic __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__))
128283627Sdim_mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
129283627Sdim  return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
130283627Sdim             (__v4sf) __B,
131283627Sdim             (__v4sf) __W,
132283627Sdim             (__mmask8) __U);
133283627Sdim}
134283627Sdim
135283627Sdimstatic __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__))
136283627Sdim_mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) {
137283627Sdim  return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
138283627Sdim             (__v4sf) __B,
139283627Sdim             (__v4sf)
140283627Sdim             _mm_setzero_ps (),
141283627Sdim             (__mmask8) __U);
142283627Sdim}
143283627Sdim
144283627Sdimstatic __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__))
145283627Sdim_mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
146283627Sdim  return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
147283627Sdim             (__v4df) __B,
148283627Sdim             (__v4df) __W,
149283627Sdim             (__mmask8) __U);
150283627Sdim}
151283627Sdim
152283627Sdimstatic __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__))
153283627Sdim_mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B) {
154283627Sdim  return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
155283627Sdim             (__v4df) __B,
156283627Sdim             (__v4df)
157283627Sdim             _mm256_setzero_pd (),
158283627Sdim             (__mmask8) __U);
159283627Sdim}
160283627Sdim
161283627Sdimstatic __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__))
162283627Sdim_mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
163283627Sdim  return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
164283627Sdim             (__v2df) __B,
165283627Sdim             (__v2df) __W,
166283627Sdim             (__mmask8) __U);
167283627Sdim}
168283627Sdim
169283627Sdimstatic __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__))
170283627Sdim_mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B) {
171283627Sdim  return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
172283627Sdim             (__v2df) __B,
173283627Sdim             (__v2df)
174283627Sdim             _mm_setzero_pd (),
175283627Sdim             (__mmask8) __U);
176283627Sdim}
177283627Sdim
178283627Sdimstatic __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__))
179283627Sdim_mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
180283627Sdim  return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
181283627Sdim            (__v8sf) __B,
182283627Sdim            (__v8sf) __W,
183283627Sdim            (__mmask8) __U);
184283627Sdim}
185283627Sdim
186283627Sdimstatic __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__))
187283627Sdim_mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B) {
188283627Sdim  return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
189283627Sdim            (__v8sf) __B,
190283627Sdim            (__v8sf)
191283627Sdim            _mm256_setzero_ps (),
192283627Sdim            (__mmask8) __U);
193283627Sdim}
194283627Sdim
195283627Sdimstatic __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__))
196283627Sdim_mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
197283627Sdim  return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
198283627Sdim            (__v4sf) __B,
199283627Sdim            (__v4sf) __W,
200283627Sdim            (__mmask8) __U);
201283627Sdim}
202283627Sdim
203283627Sdimstatic __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__))
204283627Sdim_mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B) {
205283627Sdim  return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
206283627Sdim            (__v4sf) __B,
207283627Sdim            (__v4sf)
208283627Sdim            _mm_setzero_ps (),
209283627Sdim            (__mmask8) __U);
210283627Sdim}
211283627Sdim
212283627Sdimstatic __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__))
213283627Sdim_mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A,
214283627Sdim        __m256d __B) {
215283627Sdim  return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
216283627Sdim             (__v4df) __B,
217283627Sdim             (__v4df) __W,
218283627Sdim             (__mmask8) __U);
219283627Sdim}
220283627Sdim
221283627Sdimstatic __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__))
222283627Sdim_mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B) {
223283627Sdim  return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
224283627Sdim             (__v4df) __B,
225283627Sdim             (__v4df)
226283627Sdim             _mm256_setzero_pd (),
227283627Sdim             (__mmask8) __U);
228283627Sdim}
229283627Sdim
230283627Sdimstatic __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__))
231283627Sdim_mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
232283627Sdim  return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
233283627Sdim             (__v2df) __B,
234283627Sdim             (__v2df) __W,
235283627Sdim             (__mmask8) __U);
236283627Sdim}
237283627Sdim
238283627Sdimstatic __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__))
239283627Sdim_mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) {
240283627Sdim  return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
241283627Sdim             (__v2df) __B,
242283627Sdim             (__v2df)
243283627Sdim             _mm_setzero_pd (),
244283627Sdim             (__mmask8) __U);
245283627Sdim}
246283627Sdim
247283627Sdimstatic __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__))
248283627Sdim_mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
249283627Sdim  return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
250283627Sdim            (__v8sf) __B,
251283627Sdim            (__v8sf) __W,
252283627Sdim            (__mmask8) __U);
253283627Sdim}
254283627Sdim
255283627Sdimstatic __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__))
256283627Sdim_mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B) {
257283627Sdim  return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
258283627Sdim            (__v8sf) __B,
259283627Sdim            (__v8sf)
260283627Sdim            _mm256_setzero_ps (),
261283627Sdim            (__mmask8) __U);
262283627Sdim}
263283627Sdim
264283627Sdimstatic __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__))
265283627Sdim_mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
266283627Sdim  return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
267283627Sdim            (__v4sf) __B,
268283627Sdim            (__v4sf) __W,
269283627Sdim            (__mmask8) __U);
270283627Sdim}
271283627Sdim
272283627Sdimstatic __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__))
273283627Sdim_mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B) {
274283627Sdim  return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
275283627Sdim            (__v4sf) __B,
276283627Sdim            (__v4sf)
277283627Sdim            _mm_setzero_ps (),
278283627Sdim            (__mmask8) __U);
279283627Sdim}
280283627Sdim
281283627Sdimstatic __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__))
282283627Sdim_mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
283283627Sdim  return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
284283627Sdim            (__v4df) __B,
285283627Sdim            (__v4df) __W,
286283627Sdim            (__mmask8) __U);
287283627Sdim}
288283627Sdim
289283627Sdimstatic __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__))
290283627Sdim_mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B) {
291283627Sdim  return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
292283627Sdim            (__v4df) __B,
293283627Sdim            (__v4df)
294283627Sdim            _mm256_setzero_pd (),
295283627Sdim            (__mmask8) __U);
296283627Sdim}
297283627Sdim
298283627Sdimstatic __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__))
299283627Sdim_mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
300283627Sdim  return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
301283627Sdim            (__v2df) __B,
302283627Sdim            (__v2df) __W,
303283627Sdim            (__mmask8) __U);
304283627Sdim}
305283627Sdim
306283627Sdimstatic __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__))
307283627Sdim_mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B) {
308283627Sdim  return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
309283627Sdim            (__v2df) __B,
310283627Sdim            (__v2df)
311283627Sdim            _mm_setzero_pd (),
312283627Sdim            (__mmask8) __U);
313283627Sdim}
314283627Sdim
315283627Sdimstatic __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__))
316283627Sdim_mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
317283627Sdim  return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
318283627Sdim                 (__v8sf) __B,
319283627Sdim                 (__v8sf) __W,
320283627Sdim                 (__mmask8) __U);
321283627Sdim}
322283627Sdim
323283627Sdimstatic __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__))
324283627Sdim_mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B) {
325283627Sdim  return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
326283627Sdim                 (__v8sf) __B,
327283627Sdim                 (__v8sf)
328283627Sdim                 _mm256_setzero_ps (),
329283627Sdim                 (__mmask8) __U);
330283627Sdim}
331283627Sdim
332283627Sdimstatic __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__))
333283627Sdim_mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
334283627Sdim  return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
335283627Sdim                 (__v4sf) __B,
336283627Sdim                 (__v4sf) __W,
337283627Sdim                 (__mmask8) __U);
338283627Sdim}
339283627Sdim
340283627Sdimstatic __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__))
341283627Sdim_mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) {
342283627Sdim  return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
343283627Sdim                 (__v4sf) __B,
344283627Sdim                 (__v4sf)
345283627Sdim                 _mm_setzero_ps (),
346283627Sdim                 (__mmask8) __U);
347283627Sdim}
348283627Sdim
349283627Sdim#endif
350