avx512vldqintrin.h revision 284734
1168515Sgshapiro/*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ---------------------------===
2168515Sgshapiro *
3168515Sgshapiro * Permission is hereby granted, free of charge, to any person obtaining a copy
4132943Sgshapiro * of this software and associated documentation files (the "Software"), to deal
5168515Sgshapiro * in the Software without restriction, including without limitation the rights
6132943Sgshapiro * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7168515Sgshapiro * copies of the Software, and to permit persons to whom the Software is
890792Sgshapiro * furnished to do so, subject to the following conditions:
990792Sgshapiro *
10168515Sgshapiro * The above copyright notice and this permission notice shall be included in
11168515Sgshapiro * all copies or substantial portions of the Software.
12168515Sgshapiro *
13168515Sgshapiro * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14168515Sgshapiro * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15168515Sgshapiro * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16168515Sgshapiro * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17168515Sgshapiro * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18168515Sgshapiro * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19168515Sgshapiro * THE SOFTWARE.
20168515Sgshapiro *
21168515Sgshapiro *===-----------------------------------------------------------------------===
22168515Sgshapiro */
23168515Sgshapiro
2498121Sgshapiro#ifndef __IMMINTRIN_H
2598121Sgshapiro#error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead."
2698121Sgshapiro#endif
2790792Sgshapiro
2890792Sgshapiro#ifndef __AVX512VLDQINTRIN_H
2990792Sgshapiro#define __AVX512VLDQINTRIN_H
3090792Sgshapiro
3190792Sgshapiro/* Define the default attributes for the functions in this file. */
3298121Sgshapiro#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq")))
3398121Sgshapiro
3498121Sgshapirostatic __inline__ __m256i DEFAULT_FN_ATTRS
3590792Sgshapiro_mm256_mullo_epi64 (__m256i __A, __m256i __B) {
3698121Sgshapiro  return (__m256i) ((__v4di) __A * (__v4di) __B);
3798121Sgshapiro}
3898121Sgshapiro
3998121Sgshapirostatic __inline__ __m256i DEFAULT_FN_ATTRS
4090792Sgshapiro_mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
4190792Sgshapiro  return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
4290792Sgshapiro              (__v4di) __B,
4390792Sgshapiro              (__v4di) __W,
4498121Sgshapiro              (__mmask8) __U);
4598121Sgshapiro}
4698121Sgshapiro
4798121Sgshapirostatic __inline__ __m256i DEFAULT_FN_ATTRS
4890792Sgshapiro_mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B) {
4990792Sgshapiro  return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A,
50168515Sgshapiro              (__v4di) __B,
5190792Sgshapiro              (__v4di)
5298121Sgshapiro              _mm256_setzero_si256 (),
5398121Sgshapiro              (__mmask8) __U);
5490792Sgshapiro}
5598121Sgshapiro
5698121Sgshapirostatic __inline__ __m128i DEFAULT_FN_ATTRS
5790792Sgshapiro_mm_mullo_epi64 (__m128i __A, __m128i __B) {
5890792Sgshapiro  return (__m128i) ((__v2di) __A * (__v2di) __B);
59168515Sgshapiro}
6098121Sgshapiro
6198121Sgshapirostatic __inline__ __m128i DEFAULT_FN_ATTRS
6298121Sgshapiro_mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
6390792Sgshapiro  return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
6498121Sgshapiro              (__v2di) __B,
6598121Sgshapiro              (__v2di) __W,
6690792Sgshapiro              (__mmask8) __U);
6798121Sgshapiro}
6898121Sgshapiro
6998121Sgshapirostatic __inline__ __m128i DEFAULT_FN_ATTRS
7098121Sgshapiro_mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) {
7198121Sgshapiro  return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A,
7298121Sgshapiro              (__v2di) __B,
7398121Sgshapiro              (__v2di)
7498121Sgshapiro              _mm_setzero_si128 (),
7590792Sgshapiro              (__mmask8) __U);
7698121Sgshapiro}
77168515Sgshapiro
7898121Sgshapirostatic __inline__ __m256d DEFAULT_FN_ATTRS
79168515Sgshapiro_mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
8098121Sgshapiro  return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
8198121Sgshapiro              (__v4df) __B,
8298121Sgshapiro              (__v4df) __W,
8398121Sgshapiro              (__mmask8) __U);
8498121Sgshapiro}
8598121Sgshapiro
8698121Sgshapirostatic __inline__ __m256d DEFAULT_FN_ATTRS
8798121Sgshapiro_mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) {
8898121Sgshapiro  return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A,
8998121Sgshapiro              (__v4df) __B,
9090792Sgshapiro              (__v4df)
9190792Sgshapiro              _mm256_setzero_pd (),
9290792Sgshapiro              (__mmask8) __U);
93168515Sgshapiro}
9498121Sgshapiro
9598121Sgshapirostatic __inline__ __m128d DEFAULT_FN_ATTRS
9690792Sgshapiro_mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
9798121Sgshapiro  return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
9898121Sgshapiro              (__v2df) __B,
9998121Sgshapiro              (__v2df) __W,
10098121Sgshapiro              (__mmask8) __U);
10198121Sgshapiro}
102168515Sgshapiro
10398121Sgshapirostatic __inline__ __m128d DEFAULT_FN_ATTRS
10498121Sgshapiro_mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) {
10598121Sgshapiro  return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A,
10698121Sgshapiro              (__v2df) __B,
10798121Sgshapiro              (__v2df)
10898121Sgshapiro              _mm_setzero_pd (),
10998121Sgshapiro              (__mmask8) __U);
11098121Sgshapiro}
11198121Sgshapiro
11298121Sgshapirostatic __inline__ __m256 DEFAULT_FN_ATTRS
11398121Sgshapiro_mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
11498121Sgshapiro  return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
11598121Sgshapiro             (__v8sf) __B,
11698121Sgshapiro             (__v8sf) __W,
11798121Sgshapiro             (__mmask8) __U);
11898121Sgshapiro}
11998121Sgshapiro
12090792Sgshapirostatic __inline__ __m256 DEFAULT_FN_ATTRS
12190792Sgshapiro_mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) {
12290792Sgshapiro  return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A,
123168515Sgshapiro             (__v8sf) __B,
12498121Sgshapiro             (__v8sf)
12598121Sgshapiro             _mm256_setzero_ps (),
12690792Sgshapiro             (__mmask8) __U);
127132943Sgshapiro}
128132943Sgshapiro
12998121Sgshapirostatic __inline__ __m128 DEFAULT_FN_ATTRS
130168515Sgshapiro_mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
13190792Sgshapiro  return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
13298121Sgshapiro             (__v4sf) __B,
13398121Sgshapiro             (__v4sf) __W,
13498121Sgshapiro             (__mmask8) __U);
13598121Sgshapiro}
13698121Sgshapiro
13798121Sgshapirostatic __inline__ __m128 DEFAULT_FN_ATTRS
13890792Sgshapiro_mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) {
139132943Sgshapiro  return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A,
14098121Sgshapiro             (__v4sf) __B,
14198121Sgshapiro             (__v4sf)
14298121Sgshapiro             _mm_setzero_ps (),
14398121Sgshapiro             (__mmask8) __U);
14498121Sgshapiro}
145132943Sgshapiro
14698121Sgshapirostatic __inline__ __m256d DEFAULT_FN_ATTRS
147132943Sgshapiro_mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
14898121Sgshapiro  return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
14998121Sgshapiro             (__v4df) __B,
15098121Sgshapiro             (__v4df) __W,
15198121Sgshapiro             (__mmask8) __U);
15298121Sgshapiro}
15398121Sgshapiro
15498121Sgshapirostatic __inline__ __m256d DEFAULT_FN_ATTRS
15598121Sgshapiro_mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B) {
15698121Sgshapiro  return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A,
15798121Sgshapiro             (__v4df) __B,
15898121Sgshapiro             (__v4df)
15998121Sgshapiro             _mm256_setzero_pd (),
16098121Sgshapiro             (__mmask8) __U);
16198121Sgshapiro}
16298121Sgshapiro
16398121Sgshapirostatic __inline__ __m128d DEFAULT_FN_ATTRS
16498121Sgshapiro_mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
16598121Sgshapiro  return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
16698121Sgshapiro             (__v2df) __B,
16798121Sgshapiro             (__v2df) __W,
16898121Sgshapiro             (__mmask8) __U);
16998121Sgshapiro}
17098121Sgshapiro
17198121Sgshapirostatic __inline__ __m128d DEFAULT_FN_ATTRS
17298121Sgshapiro_mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B) {
17398121Sgshapiro  return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A,
17490792Sgshapiro             (__v2df) __B,
17590792Sgshapiro             (__v2df)
17690792Sgshapiro             _mm_setzero_pd (),
177168515Sgshapiro             (__mmask8) __U);
17898121Sgshapiro}
17998121Sgshapiro
18090792Sgshapirostatic __inline__ __m256 DEFAULT_FN_ATTRS
18198121Sgshapiro_mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
182168515Sgshapiro  return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
18398121Sgshapiro            (__v8sf) __B,
18498121Sgshapiro            (__v8sf) __W,
18598121Sgshapiro            (__mmask8) __U);
18698121Sgshapiro}
18798121Sgshapiro
18898121Sgshapirostatic __inline__ __m256 DEFAULT_FN_ATTRS
18998121Sgshapiro_mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B) {
19098121Sgshapiro  return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A,
19198121Sgshapiro            (__v8sf) __B,
19298121Sgshapiro            (__v8sf)
19398121Sgshapiro            _mm256_setzero_ps (),
19498121Sgshapiro            (__mmask8) __U);
19598121Sgshapiro}
19698121Sgshapiro
19798121Sgshapirostatic __inline__ __m128 DEFAULT_FN_ATTRS
19898121Sgshapiro_mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
19998121Sgshapiro  return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
20090792Sgshapiro            (__v4sf) __B,
20198121Sgshapiro            (__v4sf) __W,
20298121Sgshapiro            (__mmask8) __U);
20398121Sgshapiro}
20498121Sgshapiro
20598121Sgshapirostatic __inline__ __m128 DEFAULT_FN_ATTRS
20698121Sgshapiro_mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B) {
20798121Sgshapiro  return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A,
20898121Sgshapiro            (__v4sf) __B,
20998121Sgshapiro            (__v4sf)
21098121Sgshapiro            _mm_setzero_ps (),
21190792Sgshapiro            (__mmask8) __U);
21290792Sgshapiro}
21390792Sgshapiro
214168515Sgshapirostatic __inline__ __m256d DEFAULT_FN_ATTRS
21598121Sgshapiro_mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A,
21698121Sgshapiro        __m256d __B) {
21798121Sgshapiro  return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
21890792Sgshapiro             (__v4df) __B,
21998121Sgshapiro             (__v4df) __W,
22098121Sgshapiro             (__mmask8) __U);
22198121Sgshapiro}
22298121Sgshapiro
22398121Sgshapirostatic __inline__ __m256d DEFAULT_FN_ATTRS
22498121Sgshapiro_mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B) {
22590792Sgshapiro  return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A,
22698121Sgshapiro             (__v4df) __B,
22798121Sgshapiro             (__v4df)
22890792Sgshapiro             _mm256_setzero_pd (),
22990792Sgshapiro             (__mmask8) __U);
23090792Sgshapiro}
231168515Sgshapiro
23298121Sgshapirostatic __inline__ __m128d DEFAULT_FN_ATTRS
23390792Sgshapiro_mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
23498121Sgshapiro  return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
23598121Sgshapiro             (__v2df) __B,
23698121Sgshapiro             (__v2df) __W,
23798121Sgshapiro             (__mmask8) __U);
23898121Sgshapiro}
23998121Sgshapiro
24090792Sgshapirostatic __inline__ __m128d DEFAULT_FN_ATTRS
24198121Sgshapiro_mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) {
24298121Sgshapiro  return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A,
24390792Sgshapiro             (__v2df) __B,
24490792Sgshapiro             (__v2df)
24590792Sgshapiro             _mm_setzero_pd (),
246168515Sgshapiro             (__mmask8) __U);
24798121Sgshapiro}
24898121Sgshapiro
24998121Sgshapirostatic __inline__ __m256 DEFAULT_FN_ATTRS
25090792Sgshapiro_mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
25198121Sgshapiro  return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
25290792Sgshapiro            (__v8sf) __B,
25398121Sgshapiro            (__v8sf) __W,
25498121Sgshapiro            (__mmask8) __U);
25598121Sgshapiro}
25698121Sgshapiro
25798121Sgshapirostatic __inline__ __m256 DEFAULT_FN_ATTRS
25898121Sgshapiro_mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B) {
25998121Sgshapiro  return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A,
26098121Sgshapiro            (__v8sf) __B,
26198121Sgshapiro            (__v8sf)
26298121Sgshapiro            _mm256_setzero_ps (),
26398121Sgshapiro            (__mmask8) __U);
26498121Sgshapiro}
26590792Sgshapiro
26690792Sgshapirostatic __inline__ __m128 DEFAULT_FN_ATTRS
26790792Sgshapiro_mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
268168515Sgshapiro  return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
26998121Sgshapiro            (__v4sf) __B,
27090792Sgshapiro            (__v4sf) __W,
27198121Sgshapiro            (__mmask8) __U);
27298121Sgshapiro}
27398121Sgshapiro
27498121Sgshapirostatic __inline__ __m128 DEFAULT_FN_ATTRS
275168515Sgshapiro_mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B) {
27698121Sgshapiro  return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A,
27790792Sgshapiro            (__v4sf) __B,
27890792Sgshapiro            (__v4sf)
27990792Sgshapiro            _mm_setzero_ps (),
280168515Sgshapiro            (__mmask8) __U);
28198121Sgshapiro}
28290792Sgshapiro
28398121Sgshapirostatic __inline__ __m256d DEFAULT_FN_ATTRS
28490792Sgshapiro_mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
28590792Sgshapiro  return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
28690792Sgshapiro            (__v4df) __B,
28790792Sgshapiro            (__v4df) __W,
28898121Sgshapiro            (__mmask8) __U);
28998121Sgshapiro}
29090792Sgshapiro
29198121Sgshapirostatic __inline__ __m256d DEFAULT_FN_ATTRS
29298121Sgshapiro_mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B) {
29398121Sgshapiro  return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A,
29498121Sgshapiro            (__v4df) __B,
29598121Sgshapiro            (__v4df)
29690792Sgshapiro            _mm256_setzero_pd (),
29798121Sgshapiro            (__mmask8) __U);
29898121Sgshapiro}
29990792Sgshapiro
30098121Sgshapirostatic __inline__ __m128d DEFAULT_FN_ATTRS
30198121Sgshapiro_mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
30298121Sgshapiro  return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
30398121Sgshapiro            (__v2df) __B,
30498121Sgshapiro            (__v2df) __W,
30598121Sgshapiro            (__mmask8) __U);
30698121Sgshapiro}
30798121Sgshapiro
30898121Sgshapirostatic __inline__ __m128d DEFAULT_FN_ATTRS
30998121Sgshapiro_mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B) {
31098121Sgshapiro  return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A,
31198121Sgshapiro            (__v2df) __B,
31298121Sgshapiro            (__v2df)
31398121Sgshapiro            _mm_setzero_pd (),
31498121Sgshapiro            (__mmask8) __U);
31598121Sgshapiro}
31698121Sgshapiro
31798121Sgshapirostatic __inline__ __m256 DEFAULT_FN_ATTRS
31898121Sgshapiro_mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
31998121Sgshapiro  return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
320168515Sgshapiro                 (__v8sf) __B,
32198121Sgshapiro                 (__v8sf) __W,
32298121Sgshapiro                 (__mmask8) __U);
32398121Sgshapiro}
32498121Sgshapiro
32598121Sgshapirostatic __inline__ __m256 DEFAULT_FN_ATTRS
32698121Sgshapiro_mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B) {
32798121Sgshapiro  return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A,
32898121Sgshapiro                 (__v8sf) __B,
32998121Sgshapiro                 (__v8sf)
33098121Sgshapiro                 _mm256_setzero_ps (),
33190792Sgshapiro                 (__mmask8) __U);
33298121Sgshapiro}
33398121Sgshapiro
33498121Sgshapirostatic __inline__ __m128 DEFAULT_FN_ATTRS
33598121Sgshapiro_mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
33698121Sgshapiro  return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
33798121Sgshapiro                 (__v4sf) __B,
33898121Sgshapiro                 (__v4sf) __W,
33990792Sgshapiro                 (__mmask8) __U);
34098121Sgshapiro}
34198121Sgshapiro
34298121Sgshapirostatic __inline__ __m128 DEFAULT_FN_ATTRS
34390792Sgshapiro_mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) {
34498121Sgshapiro  return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A,
34598121Sgshapiro                 (__v4sf) __B,
34690792Sgshapiro                 (__v4sf)
34790792Sgshapiro                 _mm_setzero_ps (),
34890792Sgshapiro                 (__mmask8) __U);
349168515Sgshapiro}
35098121Sgshapiro
35190792Sgshapiro#undef DEFAULT_FN_ATTRS
35298121Sgshapiro
35398121Sgshapiro#endif
35498121Sgshapiro