avx512vldqintrin.h revision 283627
1283627Sdim/*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ---------------------------=== 2283627Sdim * 3283627Sdim * Permission is hereby granted, free of charge, to any person obtaining a copy 4283627Sdim * of this software and associated documentation files (the "Software"), to deal 5283627Sdim * in the Software without restriction, including without limitation the rights 6283627Sdim * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7283627Sdim * copies of the Software, and to permit persons to whom the Software is 8283627Sdim * furnished to do so, subject to the following conditions: 9283627Sdim * 10283627Sdim * The above copyright notice and this permission notice shall be included in 11283627Sdim * all copies or substantial portions of the Software. 12283627Sdim * 13283627Sdim * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14283627Sdim * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15283627Sdim * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16283627Sdim * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17283627Sdim * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18283627Sdim * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19283627Sdim * THE SOFTWARE. 20283627Sdim * 21283627Sdim *===-----------------------------------------------------------------------=== 22283627Sdim */ 23283627Sdim 24283627Sdim#ifndef __IMMINTRIN_H 25283627Sdim#error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead." 26283627Sdim#endif 27283627Sdim 28283627Sdim#ifndef __AVX512VLDQINTRIN_H 29283627Sdim#define __AVX512VLDQINTRIN_H 30283627Sdim 31283627Sdim 32283627Sdimstatic __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__)) 33283627Sdim_mm256_mullo_epi64 (__m256i __A, __m256i __B) { 34283627Sdim return (__m256i) ((__v4di) __A * (__v4di) __B); 35283627Sdim} 36283627Sdim 37283627Sdimstatic __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__)) 38283627Sdim_mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { 39283627Sdim return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A, 40283627Sdim (__v4di) __B, 41283627Sdim (__v4di) __W, 42283627Sdim (__mmask8) __U); 43283627Sdim} 44283627Sdim 45283627Sdimstatic __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__)) 46283627Sdim_mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { 47283627Sdim return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A, 48283627Sdim (__v4di) __B, 49283627Sdim (__v4di) 50283627Sdim _mm256_setzero_si256 (), 51283627Sdim (__mmask8) __U); 52283627Sdim} 53283627Sdim 54283627Sdimstatic __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__)) 55283627Sdim_mm_mullo_epi64 (__m128i __A, __m128i __B) { 56283627Sdim return (__m128i) ((__v2di) __A * (__v2di) __B); 57283627Sdim} 58283627Sdim 59283627Sdimstatic __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__)) 60283627Sdim_mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { 61283627Sdim return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A, 62283627Sdim (__v2di) __B, 63283627Sdim (__v2di) __W, 64283627Sdim (__mmask8) __U); 65283627Sdim} 66283627Sdim 67283627Sdimstatic __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__)) 68283627Sdim_mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { 69283627Sdim return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A, 70283627Sdim (__v2di) __B, 71283627Sdim (__v2di) 72283627Sdim _mm_setzero_si128 (), 73283627Sdim (__mmask8) __U); 74283627Sdim} 75283627Sdim 76283627Sdimstatic __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__)) 77283627Sdim_mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 78283627Sdim return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A, 79283627Sdim (__v4df) __B, 80283627Sdim (__v4df) __W, 81283627Sdim (__mmask8) __U); 82283627Sdim} 83283627Sdim 84283627Sdimstatic __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__)) 85283627Sdim_mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) { 86283627Sdim return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A, 87283627Sdim (__v4df) __B, 88283627Sdim (__v4df) 89283627Sdim _mm256_setzero_pd (), 90283627Sdim (__mmask8) __U); 91283627Sdim} 92283627Sdim 93283627Sdimstatic __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__)) 94283627Sdim_mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 95283627Sdim return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A, 96283627Sdim (__v2df) __B, 97283627Sdim (__v2df) __W, 98283627Sdim (__mmask8) __U); 99283627Sdim} 100283627Sdim 101283627Sdimstatic __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__)) 102283627Sdim_mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) { 103283627Sdim return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A, 104283627Sdim (__v2df) __B, 105283627Sdim (__v2df) 106283627Sdim _mm_setzero_pd (), 107283627Sdim (__mmask8) __U); 108283627Sdim} 109283627Sdim 110283627Sdimstatic __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__)) 111283627Sdim_mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 112283627Sdim return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A, 113283627Sdim (__v8sf) __B, 114283627Sdim (__v8sf) __W, 115283627Sdim (__mmask8) __U); 116283627Sdim} 117283627Sdim 118283627Sdimstatic __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__)) 119283627Sdim_mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) { 120283627Sdim return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A, 121283627Sdim (__v8sf) __B, 122283627Sdim (__v8sf) 123283627Sdim _mm256_setzero_ps (), 124283627Sdim (__mmask8) __U); 125283627Sdim} 126283627Sdim 127283627Sdimstatic __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__)) 128283627Sdim_mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 129283627Sdim return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A, 130283627Sdim (__v4sf) __B, 131283627Sdim (__v4sf) __W, 132283627Sdim (__mmask8) __U); 133283627Sdim} 134283627Sdim 135283627Sdimstatic __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__)) 136283627Sdim_mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) { 137283627Sdim return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A, 138283627Sdim (__v4sf) __B, 139283627Sdim (__v4sf) 140283627Sdim _mm_setzero_ps (), 141283627Sdim (__mmask8) __U); 142283627Sdim} 143283627Sdim 144283627Sdimstatic __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__)) 145283627Sdim_mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 146283627Sdim return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A, 147283627Sdim (__v4df) __B, 148283627Sdim (__v4df) __W, 149283627Sdim (__mmask8) __U); 150283627Sdim} 151283627Sdim 152283627Sdimstatic __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__)) 153283627Sdim_mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B) { 154283627Sdim return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A, 155283627Sdim (__v4df) __B, 156283627Sdim (__v4df) 157283627Sdim _mm256_setzero_pd (), 158283627Sdim (__mmask8) __U); 159283627Sdim} 160283627Sdim 161283627Sdimstatic __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__)) 162283627Sdim_mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 163283627Sdim return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A, 164283627Sdim (__v2df) __B, 165283627Sdim (__v2df) __W, 166283627Sdim (__mmask8) __U); 167283627Sdim} 168283627Sdim 169283627Sdimstatic __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__)) 170283627Sdim_mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B) { 171283627Sdim return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A, 172283627Sdim (__v2df) __B, 173283627Sdim (__v2df) 174283627Sdim _mm_setzero_pd (), 175283627Sdim (__mmask8) __U); 176283627Sdim} 177283627Sdim 178283627Sdimstatic __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__)) 179283627Sdim_mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 180283627Sdim return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A, 181283627Sdim (__v8sf) __B, 182283627Sdim (__v8sf) __W, 183283627Sdim (__mmask8) __U); 184283627Sdim} 185283627Sdim 186283627Sdimstatic __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__)) 187283627Sdim_mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B) { 188283627Sdim return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A, 189283627Sdim (__v8sf) __B, 190283627Sdim (__v8sf) 191283627Sdim _mm256_setzero_ps (), 192283627Sdim (__mmask8) __U); 193283627Sdim} 194283627Sdim 195283627Sdimstatic __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__)) 196283627Sdim_mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 197283627Sdim return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A, 198283627Sdim (__v4sf) __B, 199283627Sdim (__v4sf) __W, 200283627Sdim (__mmask8) __U); 201283627Sdim} 202283627Sdim 203283627Sdimstatic __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__)) 204283627Sdim_mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B) { 205283627Sdim return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A, 206283627Sdim (__v4sf) __B, 207283627Sdim (__v4sf) 208283627Sdim _mm_setzero_ps (), 209283627Sdim (__mmask8) __U); 210283627Sdim} 211283627Sdim 212283627Sdimstatic __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__)) 213283627Sdim_mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A, 214283627Sdim __m256d __B) { 215283627Sdim return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A, 216283627Sdim (__v4df) __B, 217283627Sdim (__v4df) __W, 218283627Sdim (__mmask8) __U); 219283627Sdim} 220283627Sdim 221283627Sdimstatic __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__)) 222283627Sdim_mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B) { 223283627Sdim return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A, 224283627Sdim (__v4df) __B, 225283627Sdim (__v4df) 226283627Sdim _mm256_setzero_pd (), 227283627Sdim (__mmask8) __U); 228283627Sdim} 229283627Sdim 230283627Sdimstatic __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__)) 231283627Sdim_mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 232283627Sdim return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A, 233283627Sdim (__v2df) __B, 234283627Sdim (__v2df) __W, 235283627Sdim (__mmask8) __U); 236283627Sdim} 237283627Sdim 238283627Sdimstatic __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__)) 239283627Sdim_mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) { 240283627Sdim return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A, 241283627Sdim (__v2df) __B, 242283627Sdim (__v2df) 243283627Sdim _mm_setzero_pd (), 244283627Sdim (__mmask8) __U); 245283627Sdim} 246283627Sdim 247283627Sdimstatic __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__)) 248283627Sdim_mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 249283627Sdim return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A, 250283627Sdim (__v8sf) __B, 251283627Sdim (__v8sf) __W, 252283627Sdim (__mmask8) __U); 253283627Sdim} 254283627Sdim 255283627Sdimstatic __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__)) 256283627Sdim_mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B) { 257283627Sdim return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A, 258283627Sdim (__v8sf) __B, 259283627Sdim (__v8sf) 260283627Sdim _mm256_setzero_ps (), 261283627Sdim (__mmask8) __U); 262283627Sdim} 263283627Sdim 264283627Sdimstatic __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__)) 265283627Sdim_mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 266283627Sdim return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A, 267283627Sdim (__v4sf) __B, 268283627Sdim (__v4sf) __W, 269283627Sdim (__mmask8) __U); 270283627Sdim} 271283627Sdim 272283627Sdimstatic __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__)) 273283627Sdim_mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B) { 274283627Sdim return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A, 275283627Sdim (__v4sf) __B, 276283627Sdim (__v4sf) 277283627Sdim _mm_setzero_ps (), 278283627Sdim (__mmask8) __U); 279283627Sdim} 280283627Sdim 281283627Sdimstatic __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__)) 282283627Sdim_mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 283283627Sdim return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A, 284283627Sdim (__v4df) __B, 285283627Sdim (__v4df) __W, 286283627Sdim (__mmask8) __U); 287283627Sdim} 288283627Sdim 289283627Sdimstatic __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__)) 290283627Sdim_mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B) { 291283627Sdim return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A, 292283627Sdim (__v4df) __B, 293283627Sdim (__v4df) 294283627Sdim _mm256_setzero_pd (), 295283627Sdim (__mmask8) __U); 296283627Sdim} 297283627Sdim 298283627Sdimstatic __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__)) 299283627Sdim_mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 300283627Sdim return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A, 301283627Sdim (__v2df) __B, 302283627Sdim (__v2df) __W, 303283627Sdim (__mmask8) __U); 304283627Sdim} 305283627Sdim 306283627Sdimstatic __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__)) 307283627Sdim_mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B) { 308283627Sdim return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A, 309283627Sdim (__v2df) __B, 310283627Sdim (__v2df) 311283627Sdim _mm_setzero_pd (), 312283627Sdim (__mmask8) __U); 313283627Sdim} 314283627Sdim 315283627Sdimstatic __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__)) 316283627Sdim_mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 317283627Sdim return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A, 318283627Sdim (__v8sf) __B, 319283627Sdim (__v8sf) __W, 320283627Sdim (__mmask8) __U); 321283627Sdim} 322283627Sdim 323283627Sdimstatic __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__)) 324283627Sdim_mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B) { 325283627Sdim return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A, 326283627Sdim (__v8sf) __B, 327283627Sdim (__v8sf) 328283627Sdim _mm256_setzero_ps (), 329283627Sdim (__mmask8) __U); 330283627Sdim} 331283627Sdim 332283627Sdimstatic __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__)) 333283627Sdim_mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 334283627Sdim return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A, 335283627Sdim (__v4sf) __B, 336283627Sdim (__v4sf) __W, 337283627Sdim (__mmask8) __U); 338283627Sdim} 339283627Sdim 340283627Sdimstatic __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__)) 341283627Sdim_mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) { 342283627Sdim return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A, 343283627Sdim (__v4sf) __B, 344283627Sdim (__v4sf) 345283627Sdim _mm_setzero_ps (), 346283627Sdim (__mmask8) __U); 347283627Sdim} 348283627Sdim 349283627Sdim#endif 350