avx512vldqintrin.h revision 287506
1/*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ---------------------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 24#ifndef __IMMINTRIN_H 25#error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead." 26#endif 27 28#ifndef __AVX512VLDQINTRIN_H 29#define __AVX512VLDQINTRIN_H 30 31/* Define the default attributes for the functions in this file. */ 32#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) 33 34static __inline__ __m256i __DEFAULT_FN_ATTRS 35_mm256_mullo_epi64 (__m256i __A, __m256i __B) { 36 return (__m256i) ((__v4di) __A * (__v4di) __B); 37} 38 39static __inline__ __m256i __DEFAULT_FN_ATTRS 40_mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { 41 return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A, 42 (__v4di) __B, 43 (__v4di) __W, 44 (__mmask8) __U); 45} 46 47static __inline__ __m256i __DEFAULT_FN_ATTRS 48_mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { 49 return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A, 50 (__v4di) __B, 51 (__v4di) 52 _mm256_setzero_si256 (), 53 (__mmask8) __U); 54} 55 56static __inline__ __m128i __DEFAULT_FN_ATTRS 57_mm_mullo_epi64 (__m128i __A, __m128i __B) { 58 return (__m128i) ((__v2di) __A * (__v2di) __B); 59} 60 61static __inline__ __m128i __DEFAULT_FN_ATTRS 62_mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { 63 return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A, 64 (__v2di) __B, 65 (__v2di) __W, 66 (__mmask8) __U); 67} 68 69static __inline__ __m128i __DEFAULT_FN_ATTRS 70_mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { 71 return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A, 72 (__v2di) __B, 73 (__v2di) 74 _mm_setzero_si128 (), 75 (__mmask8) __U); 76} 77 78static __inline__ __m256d __DEFAULT_FN_ATTRS 79_mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 80 return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A, 81 (__v4df) __B, 82 (__v4df) __W, 83 (__mmask8) __U); 84} 85 86static __inline__ __m256d __DEFAULT_FN_ATTRS 87_mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) { 88 return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A, 89 (__v4df) __B, 90 (__v4df) 91 _mm256_setzero_pd (), 92 (__mmask8) __U); 93} 94 95static __inline__ __m128d __DEFAULT_FN_ATTRS 96_mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 97 return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A, 98 (__v2df) __B, 99 (__v2df) __W, 100 (__mmask8) __U); 101} 102 103static __inline__ __m128d __DEFAULT_FN_ATTRS 104_mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) { 105 return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A, 106 (__v2df) __B, 107 (__v2df) 108 _mm_setzero_pd (), 109 (__mmask8) __U); 110} 111 112static __inline__ __m256 __DEFAULT_FN_ATTRS 113_mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 114 return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A, 115 (__v8sf) __B, 116 (__v8sf) __W, 117 (__mmask8) __U); 118} 119 120static __inline__ __m256 __DEFAULT_FN_ATTRS 121_mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) { 122 return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A, 123 (__v8sf) __B, 124 (__v8sf) 125 _mm256_setzero_ps (), 126 (__mmask8) __U); 127} 128 129static __inline__ __m128 __DEFAULT_FN_ATTRS 130_mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 131 return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A, 132 (__v4sf) __B, 133 (__v4sf) __W, 134 (__mmask8) __U); 135} 136 137static __inline__ __m128 __DEFAULT_FN_ATTRS 138_mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) { 139 return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A, 140 (__v4sf) __B, 141 (__v4sf) 142 _mm_setzero_ps (), 143 (__mmask8) __U); 144} 145 146static __inline__ __m256d __DEFAULT_FN_ATTRS 147_mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 148 return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A, 149 (__v4df) __B, 150 (__v4df) __W, 151 (__mmask8) __U); 152} 153 154static __inline__ __m256d __DEFAULT_FN_ATTRS 155_mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B) { 156 return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A, 157 (__v4df) __B, 158 (__v4df) 159 _mm256_setzero_pd (), 160 (__mmask8) __U); 161} 162 163static __inline__ __m128d __DEFAULT_FN_ATTRS 164_mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 165 return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A, 166 (__v2df) __B, 167 (__v2df) __W, 168 (__mmask8) __U); 169} 170 171static __inline__ __m128d __DEFAULT_FN_ATTRS 172_mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B) { 173 return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A, 174 (__v2df) __B, 175 (__v2df) 176 _mm_setzero_pd (), 177 (__mmask8) __U); 178} 179 180static __inline__ __m256 __DEFAULT_FN_ATTRS 181_mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 182 return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A, 183 (__v8sf) __B, 184 (__v8sf) __W, 185 (__mmask8) __U); 186} 187 188static __inline__ __m256 __DEFAULT_FN_ATTRS 189_mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B) { 190 return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A, 191 (__v8sf) __B, 192 (__v8sf) 193 _mm256_setzero_ps (), 194 (__mmask8) __U); 195} 196 197static __inline__ __m128 __DEFAULT_FN_ATTRS 198_mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 199 return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A, 200 (__v4sf) __B, 201 (__v4sf) __W, 202 (__mmask8) __U); 203} 204 205static __inline__ __m128 __DEFAULT_FN_ATTRS 206_mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B) { 207 return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A, 208 (__v4sf) __B, 209 (__v4sf) 210 _mm_setzero_ps (), 211 (__mmask8) __U); 212} 213 214static __inline__ __m256d __DEFAULT_FN_ATTRS 215_mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A, 216 __m256d __B) { 217 return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A, 218 (__v4df) __B, 219 (__v4df) __W, 220 (__mmask8) __U); 221} 222 223static __inline__ __m256d __DEFAULT_FN_ATTRS 224_mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B) { 225 return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A, 226 (__v4df) __B, 227 (__v4df) 228 _mm256_setzero_pd (), 229 (__mmask8) __U); 230} 231 232static __inline__ __m128d __DEFAULT_FN_ATTRS 233_mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 234 return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A, 235 (__v2df) __B, 236 (__v2df) __W, 237 (__mmask8) __U); 238} 239 240static __inline__ __m128d __DEFAULT_FN_ATTRS 241_mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) { 242 return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A, 243 (__v2df) __B, 244 (__v2df) 245 _mm_setzero_pd (), 246 (__mmask8) __U); 247} 248 249static __inline__ __m256 __DEFAULT_FN_ATTRS 250_mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 251 return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A, 252 (__v8sf) __B, 253 (__v8sf) __W, 254 (__mmask8) __U); 255} 256 257static __inline__ __m256 __DEFAULT_FN_ATTRS 258_mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B) { 259 return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A, 260 (__v8sf) __B, 261 (__v8sf) 262 _mm256_setzero_ps (), 263 (__mmask8) __U); 264} 265 266static __inline__ __m128 __DEFAULT_FN_ATTRS 267_mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 268 return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A, 269 (__v4sf) __B, 270 (__v4sf) __W, 271 (__mmask8) __U); 272} 273 274static __inline__ __m128 __DEFAULT_FN_ATTRS 275_mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B) { 276 return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A, 277 (__v4sf) __B, 278 (__v4sf) 279 _mm_setzero_ps (), 280 (__mmask8) __U); 281} 282 283static __inline__ __m256d __DEFAULT_FN_ATTRS 284_mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 285 return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A, 286 (__v4df) __B, 287 (__v4df) __W, 288 (__mmask8) __U); 289} 290 291static __inline__ __m256d __DEFAULT_FN_ATTRS 292_mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B) { 293 return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A, 294 (__v4df) __B, 295 (__v4df) 296 _mm256_setzero_pd (), 297 (__mmask8) __U); 298} 299 300static __inline__ __m128d __DEFAULT_FN_ATTRS 301_mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 302 return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A, 303 (__v2df) __B, 304 (__v2df) __W, 305 (__mmask8) __U); 306} 307 308static __inline__ __m128d __DEFAULT_FN_ATTRS 309_mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B) { 310 return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A, 311 (__v2df) __B, 312 (__v2df) 313 _mm_setzero_pd (), 314 (__mmask8) __U); 315} 316 317static __inline__ __m256 __DEFAULT_FN_ATTRS 318_mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 319 return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A, 320 (__v8sf) __B, 321 (__v8sf) __W, 322 (__mmask8) __U); 323} 324 325static __inline__ __m256 __DEFAULT_FN_ATTRS 326_mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B) { 327 return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A, 328 (__v8sf) __B, 329 (__v8sf) 330 _mm256_setzero_ps (), 331 (__mmask8) __U); 332} 333 334static __inline__ __m128 __DEFAULT_FN_ATTRS 335_mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 336 return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A, 337 (__v4sf) __B, 338 (__v4sf) __W, 339 (__mmask8) __U); 340} 341 342static __inline__ __m128 __DEFAULT_FN_ATTRS 343_mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) { 344 return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A, 345 (__v4sf) __B, 346 (__v4sf) 347 _mm_setzero_ps (), 348 (__mmask8) __U); 349} 350 351#undef __DEFAULT_FN_ATTRS 352 353#endif 354