avx512vldqintrin.h revision 283633
1/*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ---------------------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 24#ifndef __IMMINTRIN_H 25#error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead." 26#endif 27 28#ifndef __AVX512VLDQINTRIN_H 29#define __AVX512VLDQINTRIN_H 30 31 32static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__)) 33_mm256_mullo_epi64 (__m256i __A, __m256i __B) { 34 return (__m256i) ((__v4di) __A * (__v4di) __B); 35} 36 37static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__)) 38_mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { 39 return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A, 40 (__v4di) __B, 41 (__v4di) __W, 42 (__mmask8) __U); 43} 44 45static __inline__ __m256i __attribute__ ((__always_inline__, __nodebug__)) 46_mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { 47 return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A, 48 (__v4di) __B, 49 (__v4di) 50 _mm256_setzero_si256 (), 51 (__mmask8) __U); 52} 53 54static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__)) 55_mm_mullo_epi64 (__m128i __A, __m128i __B) { 56 return (__m128i) ((__v2di) __A * (__v2di) __B); 57} 58 59static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__)) 60_mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { 61 return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A, 62 (__v2di) __B, 63 (__v2di) __W, 64 (__mmask8) __U); 65} 66 67static __inline__ __m128i __attribute__ ((__always_inline__, __nodebug__)) 68_mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { 69 return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A, 70 (__v2di) __B, 71 (__v2di) 72 _mm_setzero_si128 (), 73 (__mmask8) __U); 74} 75 76static __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__)) 77_mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 78 return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A, 79 (__v4df) __B, 80 (__v4df) __W, 81 (__mmask8) __U); 82} 83 84static __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__)) 85_mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) { 86 return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A, 87 (__v4df) __B, 88 (__v4df) 89 _mm256_setzero_pd (), 90 (__mmask8) __U); 91} 92 93static __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__)) 94_mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 95 return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A, 96 (__v2df) __B, 97 (__v2df) __W, 98 (__mmask8) __U); 99} 100 101static __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__)) 102_mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) { 103 return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A, 104 (__v2df) __B, 105 (__v2df) 106 _mm_setzero_pd (), 107 (__mmask8) __U); 108} 109 110static __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__)) 111_mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 112 return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A, 113 (__v8sf) __B, 114 (__v8sf) __W, 115 (__mmask8) __U); 116} 117 118static __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__)) 119_mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) { 120 return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A, 121 (__v8sf) __B, 122 (__v8sf) 123 _mm256_setzero_ps (), 124 (__mmask8) __U); 125} 126 127static __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__)) 128_mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 129 return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A, 130 (__v4sf) __B, 131 (__v4sf) __W, 132 (__mmask8) __U); 133} 134 135static __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__)) 136_mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) { 137 return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A, 138 (__v4sf) __B, 139 (__v4sf) 140 _mm_setzero_ps (), 141 (__mmask8) __U); 142} 143 144static __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__)) 145_mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 146 return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A, 147 (__v4df) __B, 148 (__v4df) __W, 149 (__mmask8) __U); 150} 151 152static __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__)) 153_mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B) { 154 return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A, 155 (__v4df) __B, 156 (__v4df) 157 _mm256_setzero_pd (), 158 (__mmask8) __U); 159} 160 161static __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__)) 162_mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 163 return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A, 164 (__v2df) __B, 165 (__v2df) __W, 166 (__mmask8) __U); 167} 168 169static __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__)) 170_mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B) { 171 return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A, 172 (__v2df) __B, 173 (__v2df) 174 _mm_setzero_pd (), 175 (__mmask8) __U); 176} 177 178static __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__)) 179_mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 180 return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A, 181 (__v8sf) __B, 182 (__v8sf) __W, 183 (__mmask8) __U); 184} 185 186static __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__)) 187_mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B) { 188 return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A, 189 (__v8sf) __B, 190 (__v8sf) 191 _mm256_setzero_ps (), 192 (__mmask8) __U); 193} 194 195static __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__)) 196_mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 197 return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A, 198 (__v4sf) __B, 199 (__v4sf) __W, 200 (__mmask8) __U); 201} 202 203static __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__)) 204_mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B) { 205 return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A, 206 (__v4sf) __B, 207 (__v4sf) 208 _mm_setzero_ps (), 209 (__mmask8) __U); 210} 211 212static __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__)) 213_mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A, 214 __m256d __B) { 215 return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A, 216 (__v4df) __B, 217 (__v4df) __W, 218 (__mmask8) __U); 219} 220 221static __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__)) 222_mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B) { 223 return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A, 224 (__v4df) __B, 225 (__v4df) 226 _mm256_setzero_pd (), 227 (__mmask8) __U); 228} 229 230static __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__)) 231_mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 232 return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A, 233 (__v2df) __B, 234 (__v2df) __W, 235 (__mmask8) __U); 236} 237 238static __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__)) 239_mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) { 240 return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A, 241 (__v2df) __B, 242 (__v2df) 243 _mm_setzero_pd (), 244 (__mmask8) __U); 245} 246 247static __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__)) 248_mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 249 return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A, 250 (__v8sf) __B, 251 (__v8sf) __W, 252 (__mmask8) __U); 253} 254 255static __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__)) 256_mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B) { 257 return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A, 258 (__v8sf) __B, 259 (__v8sf) 260 _mm256_setzero_ps (), 261 (__mmask8) __U); 262} 263 264static __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__)) 265_mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 266 return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A, 267 (__v4sf) __B, 268 (__v4sf) __W, 269 (__mmask8) __U); 270} 271 272static __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__)) 273_mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B) { 274 return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A, 275 (__v4sf) __B, 276 (__v4sf) 277 _mm_setzero_ps (), 278 (__mmask8) __U); 279} 280 281static __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__)) 282_mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 283 return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A, 284 (__v4df) __B, 285 (__v4df) __W, 286 (__mmask8) __U); 287} 288 289static __inline__ __m256d __attribute__ ((__always_inline__, __nodebug__)) 290_mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B) { 291 return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A, 292 (__v4df) __B, 293 (__v4df) 294 _mm256_setzero_pd (), 295 (__mmask8) __U); 296} 297 298static __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__)) 299_mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 300 return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A, 301 (__v2df) __B, 302 (__v2df) __W, 303 (__mmask8) __U); 304} 305 306static __inline__ __m128d __attribute__ ((__always_inline__, __nodebug__)) 307_mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B) { 308 return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A, 309 (__v2df) __B, 310 (__v2df) 311 _mm_setzero_pd (), 312 (__mmask8) __U); 313} 314 315static __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__)) 316_mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 317 return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A, 318 (__v8sf) __B, 319 (__v8sf) __W, 320 (__mmask8) __U); 321} 322 323static __inline__ __m256 __attribute__ ((__always_inline__, __nodebug__)) 324_mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B) { 325 return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A, 326 (__v8sf) __B, 327 (__v8sf) 328 _mm256_setzero_ps (), 329 (__mmask8) __U); 330} 331 332static __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__)) 333_mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 334 return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A, 335 (__v4sf) __B, 336 (__v4sf) __W, 337 (__mmask8) __U); 338} 339 340static __inline__ __m128 __attribute__ ((__always_inline__, __nodebug__)) 341_mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) { 342 return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A, 343 (__v4sf) __B, 344 (__v4sf) 345 _mm_setzero_ps (), 346 (__mmask8) __U); 347} 348 349#endif 350