avx512vldqintrin.h revision 284734
1168515Sgshapiro/*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ---------------------------=== 2168515Sgshapiro * 3168515Sgshapiro * Permission is hereby granted, free of charge, to any person obtaining a copy 4132943Sgshapiro * of this software and associated documentation files (the "Software"), to deal 5168515Sgshapiro * in the Software without restriction, including without limitation the rights 6132943Sgshapiro * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7168515Sgshapiro * copies of the Software, and to permit persons to whom the Software is 890792Sgshapiro * furnished to do so, subject to the following conditions: 990792Sgshapiro * 10168515Sgshapiro * The above copyright notice and this permission notice shall be included in 11168515Sgshapiro * all copies or substantial portions of the Software. 12168515Sgshapiro * 13168515Sgshapiro * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14168515Sgshapiro * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15168515Sgshapiro * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16168515Sgshapiro * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17168515Sgshapiro * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18168515Sgshapiro * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19168515Sgshapiro * THE SOFTWARE. 20168515Sgshapiro * 21168515Sgshapiro *===-----------------------------------------------------------------------=== 22168515Sgshapiro */ 23168515Sgshapiro 2498121Sgshapiro#ifndef __IMMINTRIN_H 2598121Sgshapiro#error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead." 2698121Sgshapiro#endif 2790792Sgshapiro 2890792Sgshapiro#ifndef __AVX512VLDQINTRIN_H 2990792Sgshapiro#define __AVX512VLDQINTRIN_H 3090792Sgshapiro 3190792Sgshapiro/* Define the default attributes for the functions in this file. */ 3298121Sgshapiro#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"))) 3398121Sgshapiro 3498121Sgshapirostatic __inline__ __m256i DEFAULT_FN_ATTRS 3590792Sgshapiro_mm256_mullo_epi64 (__m256i __A, __m256i __B) { 3698121Sgshapiro return (__m256i) ((__v4di) __A * (__v4di) __B); 3798121Sgshapiro} 3898121Sgshapiro 3998121Sgshapirostatic __inline__ __m256i DEFAULT_FN_ATTRS 4090792Sgshapiro_mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { 4190792Sgshapiro return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A, 4290792Sgshapiro (__v4di) __B, 4390792Sgshapiro (__v4di) __W, 4498121Sgshapiro (__mmask8) __U); 4598121Sgshapiro} 4698121Sgshapiro 4798121Sgshapirostatic __inline__ __m256i DEFAULT_FN_ATTRS 4890792Sgshapiro_mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { 4990792Sgshapiro return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A, 50168515Sgshapiro (__v4di) __B, 5190792Sgshapiro (__v4di) 5298121Sgshapiro _mm256_setzero_si256 (), 5398121Sgshapiro (__mmask8) __U); 5490792Sgshapiro} 5598121Sgshapiro 5698121Sgshapirostatic __inline__ __m128i DEFAULT_FN_ATTRS 5790792Sgshapiro_mm_mullo_epi64 (__m128i __A, __m128i __B) { 5890792Sgshapiro return (__m128i) ((__v2di) __A * (__v2di) __B); 59168515Sgshapiro} 6098121Sgshapiro 6198121Sgshapirostatic __inline__ __m128i DEFAULT_FN_ATTRS 6298121Sgshapiro_mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { 6390792Sgshapiro return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A, 6498121Sgshapiro (__v2di) __B, 6598121Sgshapiro (__v2di) __W, 6690792Sgshapiro (__mmask8) __U); 6798121Sgshapiro} 6898121Sgshapiro 6998121Sgshapirostatic __inline__ __m128i DEFAULT_FN_ATTRS 7098121Sgshapiro_mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { 7198121Sgshapiro return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A, 7298121Sgshapiro (__v2di) __B, 7398121Sgshapiro (__v2di) 7498121Sgshapiro _mm_setzero_si128 (), 7590792Sgshapiro (__mmask8) __U); 7698121Sgshapiro} 77168515Sgshapiro 7898121Sgshapirostatic __inline__ __m256d DEFAULT_FN_ATTRS 79168515Sgshapiro_mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 8098121Sgshapiro return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A, 8198121Sgshapiro (__v4df) __B, 8298121Sgshapiro (__v4df) __W, 8398121Sgshapiro (__mmask8) __U); 8498121Sgshapiro} 8598121Sgshapiro 8698121Sgshapirostatic __inline__ __m256d DEFAULT_FN_ATTRS 8798121Sgshapiro_mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) { 8898121Sgshapiro return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A, 8998121Sgshapiro (__v4df) __B, 9090792Sgshapiro (__v4df) 9190792Sgshapiro _mm256_setzero_pd (), 9290792Sgshapiro (__mmask8) __U); 93168515Sgshapiro} 9498121Sgshapiro 9598121Sgshapirostatic __inline__ __m128d DEFAULT_FN_ATTRS 9690792Sgshapiro_mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 9798121Sgshapiro return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A, 9898121Sgshapiro (__v2df) __B, 9998121Sgshapiro (__v2df) __W, 10098121Sgshapiro (__mmask8) __U); 10198121Sgshapiro} 102168515Sgshapiro 10398121Sgshapirostatic __inline__ __m128d DEFAULT_FN_ATTRS 10498121Sgshapiro_mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) { 10598121Sgshapiro return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A, 10698121Sgshapiro (__v2df) __B, 10798121Sgshapiro (__v2df) 10898121Sgshapiro _mm_setzero_pd (), 10998121Sgshapiro (__mmask8) __U); 11098121Sgshapiro} 11198121Sgshapiro 11298121Sgshapirostatic __inline__ __m256 DEFAULT_FN_ATTRS 11398121Sgshapiro_mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 11498121Sgshapiro return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A, 11598121Sgshapiro (__v8sf) __B, 11698121Sgshapiro (__v8sf) __W, 11798121Sgshapiro (__mmask8) __U); 11898121Sgshapiro} 11998121Sgshapiro 12090792Sgshapirostatic __inline__ __m256 DEFAULT_FN_ATTRS 12190792Sgshapiro_mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) { 12290792Sgshapiro return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A, 123168515Sgshapiro (__v8sf) __B, 12498121Sgshapiro (__v8sf) 12598121Sgshapiro _mm256_setzero_ps (), 12690792Sgshapiro (__mmask8) __U); 127132943Sgshapiro} 128132943Sgshapiro 12998121Sgshapirostatic __inline__ __m128 DEFAULT_FN_ATTRS 130168515Sgshapiro_mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 13190792Sgshapiro return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A, 13298121Sgshapiro (__v4sf) __B, 13398121Sgshapiro (__v4sf) __W, 13498121Sgshapiro (__mmask8) __U); 13598121Sgshapiro} 13698121Sgshapiro 13798121Sgshapirostatic __inline__ __m128 DEFAULT_FN_ATTRS 13890792Sgshapiro_mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) { 139132943Sgshapiro return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A, 14098121Sgshapiro (__v4sf) __B, 14198121Sgshapiro (__v4sf) 14298121Sgshapiro _mm_setzero_ps (), 14398121Sgshapiro (__mmask8) __U); 14498121Sgshapiro} 145132943Sgshapiro 14698121Sgshapirostatic __inline__ __m256d DEFAULT_FN_ATTRS 147132943Sgshapiro_mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 14898121Sgshapiro return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A, 14998121Sgshapiro (__v4df) __B, 15098121Sgshapiro (__v4df) __W, 15198121Sgshapiro (__mmask8) __U); 15298121Sgshapiro} 15398121Sgshapiro 15498121Sgshapirostatic __inline__ __m256d DEFAULT_FN_ATTRS 15598121Sgshapiro_mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B) { 15698121Sgshapiro return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A, 15798121Sgshapiro (__v4df) __B, 15898121Sgshapiro (__v4df) 15998121Sgshapiro _mm256_setzero_pd (), 16098121Sgshapiro (__mmask8) __U); 16198121Sgshapiro} 16298121Sgshapiro 16398121Sgshapirostatic __inline__ __m128d DEFAULT_FN_ATTRS 16498121Sgshapiro_mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 16598121Sgshapiro return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A, 16698121Sgshapiro (__v2df) __B, 16798121Sgshapiro (__v2df) __W, 16898121Sgshapiro (__mmask8) __U); 16998121Sgshapiro} 17098121Sgshapiro 17198121Sgshapirostatic __inline__ __m128d DEFAULT_FN_ATTRS 17298121Sgshapiro_mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B) { 17398121Sgshapiro return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A, 17490792Sgshapiro (__v2df) __B, 17590792Sgshapiro (__v2df) 17690792Sgshapiro _mm_setzero_pd (), 177168515Sgshapiro (__mmask8) __U); 17898121Sgshapiro} 17998121Sgshapiro 18090792Sgshapirostatic __inline__ __m256 DEFAULT_FN_ATTRS 18198121Sgshapiro_mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 182168515Sgshapiro return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A, 18398121Sgshapiro (__v8sf) __B, 18498121Sgshapiro (__v8sf) __W, 18598121Sgshapiro (__mmask8) __U); 18698121Sgshapiro} 18798121Sgshapiro 18898121Sgshapirostatic __inline__ __m256 DEFAULT_FN_ATTRS 18998121Sgshapiro_mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B) { 19098121Sgshapiro return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A, 19198121Sgshapiro (__v8sf) __B, 19298121Sgshapiro (__v8sf) 19398121Sgshapiro _mm256_setzero_ps (), 19498121Sgshapiro (__mmask8) __U); 19598121Sgshapiro} 19698121Sgshapiro 19798121Sgshapirostatic __inline__ __m128 DEFAULT_FN_ATTRS 19898121Sgshapiro_mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 19998121Sgshapiro return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A, 20090792Sgshapiro (__v4sf) __B, 20198121Sgshapiro (__v4sf) __W, 20298121Sgshapiro (__mmask8) __U); 20398121Sgshapiro} 20498121Sgshapiro 20598121Sgshapirostatic __inline__ __m128 DEFAULT_FN_ATTRS 20698121Sgshapiro_mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B) { 20798121Sgshapiro return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A, 20898121Sgshapiro (__v4sf) __B, 20998121Sgshapiro (__v4sf) 21098121Sgshapiro _mm_setzero_ps (), 21190792Sgshapiro (__mmask8) __U); 21290792Sgshapiro} 21390792Sgshapiro 214168515Sgshapirostatic __inline__ __m256d DEFAULT_FN_ATTRS 21598121Sgshapiro_mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A, 21698121Sgshapiro __m256d __B) { 21798121Sgshapiro return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A, 21890792Sgshapiro (__v4df) __B, 21998121Sgshapiro (__v4df) __W, 22098121Sgshapiro (__mmask8) __U); 22198121Sgshapiro} 22298121Sgshapiro 22398121Sgshapirostatic __inline__ __m256d DEFAULT_FN_ATTRS 22498121Sgshapiro_mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B) { 22590792Sgshapiro return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A, 22698121Sgshapiro (__v4df) __B, 22798121Sgshapiro (__v4df) 22890792Sgshapiro _mm256_setzero_pd (), 22990792Sgshapiro (__mmask8) __U); 23090792Sgshapiro} 231168515Sgshapiro 23298121Sgshapirostatic __inline__ __m128d DEFAULT_FN_ATTRS 23390792Sgshapiro_mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 23498121Sgshapiro return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A, 23598121Sgshapiro (__v2df) __B, 23698121Sgshapiro (__v2df) __W, 23798121Sgshapiro (__mmask8) __U); 23898121Sgshapiro} 23998121Sgshapiro 24090792Sgshapirostatic __inline__ __m128d DEFAULT_FN_ATTRS 24198121Sgshapiro_mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) { 24298121Sgshapiro return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A, 24390792Sgshapiro (__v2df) __B, 24490792Sgshapiro (__v2df) 24590792Sgshapiro _mm_setzero_pd (), 246168515Sgshapiro (__mmask8) __U); 24798121Sgshapiro} 24898121Sgshapiro 24998121Sgshapirostatic __inline__ __m256 DEFAULT_FN_ATTRS 25090792Sgshapiro_mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 25198121Sgshapiro return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A, 25290792Sgshapiro (__v8sf) __B, 25398121Sgshapiro (__v8sf) __W, 25498121Sgshapiro (__mmask8) __U); 25598121Sgshapiro} 25698121Sgshapiro 25798121Sgshapirostatic __inline__ __m256 DEFAULT_FN_ATTRS 25898121Sgshapiro_mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B) { 25998121Sgshapiro return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A, 26098121Sgshapiro (__v8sf) __B, 26198121Sgshapiro (__v8sf) 26298121Sgshapiro _mm256_setzero_ps (), 26398121Sgshapiro (__mmask8) __U); 26498121Sgshapiro} 26590792Sgshapiro 26690792Sgshapirostatic __inline__ __m128 DEFAULT_FN_ATTRS 26790792Sgshapiro_mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 268168515Sgshapiro return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A, 26998121Sgshapiro (__v4sf) __B, 27090792Sgshapiro (__v4sf) __W, 27198121Sgshapiro (__mmask8) __U); 27298121Sgshapiro} 27398121Sgshapiro 27498121Sgshapirostatic __inline__ __m128 DEFAULT_FN_ATTRS 275168515Sgshapiro_mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B) { 27698121Sgshapiro return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A, 27790792Sgshapiro (__v4sf) __B, 27890792Sgshapiro (__v4sf) 27990792Sgshapiro _mm_setzero_ps (), 280168515Sgshapiro (__mmask8) __U); 28198121Sgshapiro} 28290792Sgshapiro 28398121Sgshapirostatic __inline__ __m256d DEFAULT_FN_ATTRS 28490792Sgshapiro_mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 28590792Sgshapiro return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A, 28690792Sgshapiro (__v4df) __B, 28790792Sgshapiro (__v4df) __W, 28898121Sgshapiro (__mmask8) __U); 28998121Sgshapiro} 29090792Sgshapiro 29198121Sgshapirostatic __inline__ __m256d DEFAULT_FN_ATTRS 29298121Sgshapiro_mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B) { 29398121Sgshapiro return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A, 29498121Sgshapiro (__v4df) __B, 29598121Sgshapiro (__v4df) 29690792Sgshapiro _mm256_setzero_pd (), 29798121Sgshapiro (__mmask8) __U); 29898121Sgshapiro} 29990792Sgshapiro 30098121Sgshapirostatic __inline__ __m128d DEFAULT_FN_ATTRS 30198121Sgshapiro_mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 30298121Sgshapiro return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A, 30398121Sgshapiro (__v2df) __B, 30498121Sgshapiro (__v2df) __W, 30598121Sgshapiro (__mmask8) __U); 30698121Sgshapiro} 30798121Sgshapiro 30898121Sgshapirostatic __inline__ __m128d DEFAULT_FN_ATTRS 30998121Sgshapiro_mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B) { 31098121Sgshapiro return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A, 31198121Sgshapiro (__v2df) __B, 31298121Sgshapiro (__v2df) 31398121Sgshapiro _mm_setzero_pd (), 31498121Sgshapiro (__mmask8) __U); 31598121Sgshapiro} 31698121Sgshapiro 31798121Sgshapirostatic __inline__ __m256 DEFAULT_FN_ATTRS 31898121Sgshapiro_mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 31998121Sgshapiro return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A, 320168515Sgshapiro (__v8sf) __B, 32198121Sgshapiro (__v8sf) __W, 32298121Sgshapiro (__mmask8) __U); 32398121Sgshapiro} 32498121Sgshapiro 32598121Sgshapirostatic __inline__ __m256 DEFAULT_FN_ATTRS 32698121Sgshapiro_mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B) { 32798121Sgshapiro return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A, 32898121Sgshapiro (__v8sf) __B, 32998121Sgshapiro (__v8sf) 33098121Sgshapiro _mm256_setzero_ps (), 33190792Sgshapiro (__mmask8) __U); 33298121Sgshapiro} 33398121Sgshapiro 33498121Sgshapirostatic __inline__ __m128 DEFAULT_FN_ATTRS 33598121Sgshapiro_mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 33698121Sgshapiro return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A, 33798121Sgshapiro (__v4sf) __B, 33898121Sgshapiro (__v4sf) __W, 33990792Sgshapiro (__mmask8) __U); 34098121Sgshapiro} 34198121Sgshapiro 34298121Sgshapirostatic __inline__ __m128 DEFAULT_FN_ATTRS 34390792Sgshapiro_mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) { 34498121Sgshapiro return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A, 34598121Sgshapiro (__v4sf) __B, 34690792Sgshapiro (__v4sf) 34790792Sgshapiro _mm_setzero_ps (), 34890792Sgshapiro (__mmask8) __U); 349168515Sgshapiro} 35098121Sgshapiro 35190792Sgshapiro#undef DEFAULT_FN_ATTRS 35298121Sgshapiro 35398121Sgshapiro#endif 35498121Sgshapiro