avx512vldqintrin.h revision 284734
1283627Sdim/*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ---------------------------=== 2283627Sdim * 3283627Sdim * Permission is hereby granted, free of charge, to any person obtaining a copy 4283627Sdim * of this software and associated documentation files (the "Software"), to deal 5283627Sdim * in the Software without restriction, including without limitation the rights 6283627Sdim * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7283627Sdim * copies of the Software, and to permit persons to whom the Software is 8283627Sdim * furnished to do so, subject to the following conditions: 9283627Sdim * 10283627Sdim * The above copyright notice and this permission notice shall be included in 11283627Sdim * all copies or substantial portions of the Software. 12283627Sdim * 13283627Sdim * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14283627Sdim * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15283627Sdim * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16283627Sdim * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17283627Sdim * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18283627Sdim * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19283627Sdim * THE SOFTWARE. 20283627Sdim * 21283627Sdim *===-----------------------------------------------------------------------=== 22283627Sdim */ 23283627Sdim 24283627Sdim#ifndef __IMMINTRIN_H 25283627Sdim#error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead." 26283627Sdim#endif 27283627Sdim 28283627Sdim#ifndef __AVX512VLDQINTRIN_H 29283627Sdim#define __AVX512VLDQINTRIN_H 30283627Sdim 31284734Sdim/* Define the default attributes for the functions in this file. */ 32284734Sdim#define DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"))) 33283627Sdim 34284734Sdimstatic __inline__ __m256i DEFAULT_FN_ATTRS 35283627Sdim_mm256_mullo_epi64 (__m256i __A, __m256i __B) { 36283627Sdim return (__m256i) ((__v4di) __A * (__v4di) __B); 37283627Sdim} 38283627Sdim 39284734Sdimstatic __inline__ __m256i DEFAULT_FN_ATTRS 40283627Sdim_mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { 41283627Sdim return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A, 42283627Sdim (__v4di) __B, 43283627Sdim (__v4di) __W, 44283627Sdim (__mmask8) __U); 45283627Sdim} 46283627Sdim 47284734Sdimstatic __inline__ __m256i DEFAULT_FN_ATTRS 48283627Sdim_mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { 49283627Sdim return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A, 50283627Sdim (__v4di) __B, 51283627Sdim (__v4di) 52283627Sdim _mm256_setzero_si256 (), 53283627Sdim (__mmask8) __U); 54283627Sdim} 55283627Sdim 56284734Sdimstatic __inline__ __m128i DEFAULT_FN_ATTRS 57283627Sdim_mm_mullo_epi64 (__m128i __A, __m128i __B) { 58283627Sdim return (__m128i) ((__v2di) __A * (__v2di) __B); 59283627Sdim} 60283627Sdim 61284734Sdimstatic __inline__ __m128i DEFAULT_FN_ATTRS 62283627Sdim_mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { 63283627Sdim return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A, 64283627Sdim (__v2di) __B, 65283627Sdim (__v2di) __W, 66283627Sdim (__mmask8) __U); 67283627Sdim} 68283627Sdim 69284734Sdimstatic __inline__ __m128i DEFAULT_FN_ATTRS 70283627Sdim_mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { 71283627Sdim return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A, 72283627Sdim (__v2di) __B, 73283627Sdim (__v2di) 74283627Sdim _mm_setzero_si128 (), 75283627Sdim (__mmask8) __U); 76283627Sdim} 77283627Sdim 78284734Sdimstatic __inline__ __m256d DEFAULT_FN_ATTRS 79283627Sdim_mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 80283627Sdim return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A, 81283627Sdim (__v4df) __B, 82283627Sdim (__v4df) __W, 83283627Sdim (__mmask8) __U); 84283627Sdim} 85283627Sdim 86284734Sdimstatic __inline__ __m256d DEFAULT_FN_ATTRS 87283627Sdim_mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) { 88283627Sdim return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A, 89283627Sdim (__v4df) __B, 90283627Sdim (__v4df) 91283627Sdim _mm256_setzero_pd (), 92283627Sdim (__mmask8) __U); 93283627Sdim} 94283627Sdim 95284734Sdimstatic __inline__ __m128d DEFAULT_FN_ATTRS 96283627Sdim_mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 97283627Sdim return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A, 98283627Sdim (__v2df) __B, 99283627Sdim (__v2df) __W, 100283627Sdim (__mmask8) __U); 101283627Sdim} 102283627Sdim 103284734Sdimstatic __inline__ __m128d DEFAULT_FN_ATTRS 104283627Sdim_mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) { 105283627Sdim return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A, 106283627Sdim (__v2df) __B, 107283627Sdim (__v2df) 108283627Sdim _mm_setzero_pd (), 109283627Sdim (__mmask8) __U); 110283627Sdim} 111283627Sdim 112284734Sdimstatic __inline__ __m256 DEFAULT_FN_ATTRS 113283627Sdim_mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 114283627Sdim return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A, 115283627Sdim (__v8sf) __B, 116283627Sdim (__v8sf) __W, 117283627Sdim (__mmask8) __U); 118283627Sdim} 119283627Sdim 120284734Sdimstatic __inline__ __m256 DEFAULT_FN_ATTRS 121283627Sdim_mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) { 122283627Sdim return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A, 123283627Sdim (__v8sf) __B, 124283627Sdim (__v8sf) 125283627Sdim _mm256_setzero_ps (), 126283627Sdim (__mmask8) __U); 127283627Sdim} 128283627Sdim 129284734Sdimstatic __inline__ __m128 DEFAULT_FN_ATTRS 130283627Sdim_mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 131283627Sdim return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A, 132283627Sdim (__v4sf) __B, 133283627Sdim (__v4sf) __W, 134283627Sdim (__mmask8) __U); 135283627Sdim} 136283627Sdim 137284734Sdimstatic __inline__ __m128 DEFAULT_FN_ATTRS 138283627Sdim_mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) { 139283627Sdim return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A, 140283627Sdim (__v4sf) __B, 141283627Sdim (__v4sf) 142283627Sdim _mm_setzero_ps (), 143283627Sdim (__mmask8) __U); 144283627Sdim} 145283627Sdim 146284734Sdimstatic __inline__ __m256d DEFAULT_FN_ATTRS 147283627Sdim_mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 148283627Sdim return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A, 149283627Sdim (__v4df) __B, 150283627Sdim (__v4df) __W, 151283627Sdim (__mmask8) __U); 152283627Sdim} 153283627Sdim 154284734Sdimstatic __inline__ __m256d DEFAULT_FN_ATTRS 155283627Sdim_mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B) { 156283627Sdim return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A, 157283627Sdim (__v4df) __B, 158283627Sdim (__v4df) 159283627Sdim _mm256_setzero_pd (), 160283627Sdim (__mmask8) __U); 161283627Sdim} 162283627Sdim 163284734Sdimstatic __inline__ __m128d DEFAULT_FN_ATTRS 164283627Sdim_mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 165283627Sdim return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A, 166283627Sdim (__v2df) __B, 167283627Sdim (__v2df) __W, 168283627Sdim (__mmask8) __U); 169283627Sdim} 170283627Sdim 171284734Sdimstatic __inline__ __m128d DEFAULT_FN_ATTRS 172283627Sdim_mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B) { 173283627Sdim return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A, 174283627Sdim (__v2df) __B, 175283627Sdim (__v2df) 176283627Sdim _mm_setzero_pd (), 177283627Sdim (__mmask8) __U); 178283627Sdim} 179283627Sdim 180284734Sdimstatic __inline__ __m256 DEFAULT_FN_ATTRS 181283627Sdim_mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 182283627Sdim return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A, 183283627Sdim (__v8sf) __B, 184283627Sdim (__v8sf) __W, 185283627Sdim (__mmask8) __U); 186283627Sdim} 187283627Sdim 188284734Sdimstatic __inline__ __m256 DEFAULT_FN_ATTRS 189283627Sdim_mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B) { 190283627Sdim return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A, 191283627Sdim (__v8sf) __B, 192283627Sdim (__v8sf) 193283627Sdim _mm256_setzero_ps (), 194283627Sdim (__mmask8) __U); 195283627Sdim} 196283627Sdim 197284734Sdimstatic __inline__ __m128 DEFAULT_FN_ATTRS 198283627Sdim_mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 199283627Sdim return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A, 200283627Sdim (__v4sf) __B, 201283627Sdim (__v4sf) __W, 202283627Sdim (__mmask8) __U); 203283627Sdim} 204283627Sdim 205284734Sdimstatic __inline__ __m128 DEFAULT_FN_ATTRS 206283627Sdim_mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B) { 207283627Sdim return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A, 208283627Sdim (__v4sf) __B, 209283627Sdim (__v4sf) 210283627Sdim _mm_setzero_ps (), 211283627Sdim (__mmask8) __U); 212283627Sdim} 213283627Sdim 214284734Sdimstatic __inline__ __m256d DEFAULT_FN_ATTRS 215283627Sdim_mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A, 216283627Sdim __m256d __B) { 217283627Sdim return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A, 218283627Sdim (__v4df) __B, 219283627Sdim (__v4df) __W, 220283627Sdim (__mmask8) __U); 221283627Sdim} 222283627Sdim 223284734Sdimstatic __inline__ __m256d DEFAULT_FN_ATTRS 224283627Sdim_mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B) { 225283627Sdim return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A, 226283627Sdim (__v4df) __B, 227283627Sdim (__v4df) 228283627Sdim _mm256_setzero_pd (), 229283627Sdim (__mmask8) __U); 230283627Sdim} 231283627Sdim 232284734Sdimstatic __inline__ __m128d DEFAULT_FN_ATTRS 233283627Sdim_mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 234283627Sdim return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A, 235283627Sdim (__v2df) __B, 236283627Sdim (__v2df) __W, 237283627Sdim (__mmask8) __U); 238283627Sdim} 239283627Sdim 240284734Sdimstatic __inline__ __m128d DEFAULT_FN_ATTRS 241283627Sdim_mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) { 242283627Sdim return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A, 243283627Sdim (__v2df) __B, 244283627Sdim (__v2df) 245283627Sdim _mm_setzero_pd (), 246283627Sdim (__mmask8) __U); 247283627Sdim} 248283627Sdim 249284734Sdimstatic __inline__ __m256 DEFAULT_FN_ATTRS 250283627Sdim_mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 251283627Sdim return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A, 252283627Sdim (__v8sf) __B, 253283627Sdim (__v8sf) __W, 254283627Sdim (__mmask8) __U); 255283627Sdim} 256283627Sdim 257284734Sdimstatic __inline__ __m256 DEFAULT_FN_ATTRS 258283627Sdim_mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B) { 259283627Sdim return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A, 260283627Sdim (__v8sf) __B, 261283627Sdim (__v8sf) 262283627Sdim _mm256_setzero_ps (), 263283627Sdim (__mmask8) __U); 264283627Sdim} 265283627Sdim 266284734Sdimstatic __inline__ __m128 DEFAULT_FN_ATTRS 267283627Sdim_mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 268283627Sdim return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A, 269283627Sdim (__v4sf) __B, 270283627Sdim (__v4sf) __W, 271283627Sdim (__mmask8) __U); 272283627Sdim} 273283627Sdim 274284734Sdimstatic __inline__ __m128 DEFAULT_FN_ATTRS 275283627Sdim_mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B) { 276283627Sdim return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A, 277283627Sdim (__v4sf) __B, 278283627Sdim (__v4sf) 279283627Sdim _mm_setzero_ps (), 280283627Sdim (__mmask8) __U); 281283627Sdim} 282283627Sdim 283284734Sdimstatic __inline__ __m256d DEFAULT_FN_ATTRS 284283627Sdim_mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 285283627Sdim return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A, 286283627Sdim (__v4df) __B, 287283627Sdim (__v4df) __W, 288283627Sdim (__mmask8) __U); 289283627Sdim} 290283627Sdim 291284734Sdimstatic __inline__ __m256d DEFAULT_FN_ATTRS 292283627Sdim_mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B) { 293283627Sdim return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A, 294283627Sdim (__v4df) __B, 295283627Sdim (__v4df) 296283627Sdim _mm256_setzero_pd (), 297283627Sdim (__mmask8) __U); 298283627Sdim} 299283627Sdim 300284734Sdimstatic __inline__ __m128d DEFAULT_FN_ATTRS 301283627Sdim_mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 302283627Sdim return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A, 303283627Sdim (__v2df) __B, 304283627Sdim (__v2df) __W, 305283627Sdim (__mmask8) __U); 306283627Sdim} 307283627Sdim 308284734Sdimstatic __inline__ __m128d DEFAULT_FN_ATTRS 309283627Sdim_mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B) { 310283627Sdim return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A, 311283627Sdim (__v2df) __B, 312283627Sdim (__v2df) 313283627Sdim _mm_setzero_pd (), 314283627Sdim (__mmask8) __U); 315283627Sdim} 316283627Sdim 317284734Sdimstatic __inline__ __m256 DEFAULT_FN_ATTRS 318283627Sdim_mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 319283627Sdim return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A, 320283627Sdim (__v8sf) __B, 321283627Sdim (__v8sf) __W, 322283627Sdim (__mmask8) __U); 323283627Sdim} 324283627Sdim 325284734Sdimstatic __inline__ __m256 DEFAULT_FN_ATTRS 326283627Sdim_mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B) { 327283627Sdim return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A, 328283627Sdim (__v8sf) __B, 329283627Sdim (__v8sf) 330283627Sdim _mm256_setzero_ps (), 331283627Sdim (__mmask8) __U); 332283627Sdim} 333283627Sdim 334284734Sdimstatic __inline__ __m128 DEFAULT_FN_ATTRS 335283627Sdim_mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 336283627Sdim return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A, 337283627Sdim (__v4sf) __B, 338283627Sdim (__v4sf) __W, 339283627Sdim (__mmask8) __U); 340283627Sdim} 341283627Sdim 342284734Sdimstatic __inline__ __m128 DEFAULT_FN_ATTRS 343283627Sdim_mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) { 344283627Sdim return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A, 345283627Sdim (__v4sf) __B, 346283627Sdim (__v4sf) 347283627Sdim _mm_setzero_ps (), 348283627Sdim (__mmask8) __U); 349283627Sdim} 350283627Sdim 351284734Sdim#undef DEFAULT_FN_ATTRS 352284734Sdim 353283627Sdim#endif 354