1/*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23#ifndef __IMMINTRIN_H 24#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead." 25#endif 26 27#ifndef __AVX512FINTRIN_H 28#define __AVX512FINTRIN_H 29 30typedef double __v8df __attribute__((__vector_size__(64))); 31typedef float __v16sf __attribute__((__vector_size__(64))); 32typedef long long __v8di __attribute__((__vector_size__(64))); 33typedef int __v16si __attribute__((__vector_size__(64))); 34 35typedef float __m512 __attribute__((__vector_size__(64))); 36typedef double __m512d __attribute__((__vector_size__(64))); 37typedef long long __m512i __attribute__((__vector_size__(64))); 38 39typedef unsigned char __mmask8; 40typedef unsigned short __mmask16; 41 42/* Rounding mode macros. */ 43#define _MM_FROUND_TO_NEAREST_INT 0x00 44#define _MM_FROUND_TO_NEG_INF 0x01 45#define _MM_FROUND_TO_POS_INF 0x02 46#define _MM_FROUND_TO_ZERO 0x03 47#define _MM_FROUND_CUR_DIRECTION 0x04 48 49/* Define the default attributes for the functions in this file. */ 50#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f"))) 51 52/* Create vectors with repeated elements */ 53 54static __inline __m512i __DEFAULT_FN_ATTRS 55_mm512_setzero_si512(void) 56{ 57 return (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; 58} 59 60static __inline__ __m512d __DEFAULT_FN_ATTRS 61_mm512_undefined_pd() 62{ 63 return (__m512d)__builtin_ia32_undef512(); 64} 65 66static __inline__ __m512 __DEFAULT_FN_ATTRS 67_mm512_undefined() 68{ 69 return (__m512)__builtin_ia32_undef512(); 70} 71 72static __inline__ __m512 __DEFAULT_FN_ATTRS 73_mm512_undefined_ps() 74{ 75 return (__m512)__builtin_ia32_undef512(); 76} 77 78static __inline__ __m512i __DEFAULT_FN_ATTRS 79_mm512_undefined_epi32() 80{ 81 return (__m512i)__builtin_ia32_undef512(); 82} 83 84static __inline __m512i __DEFAULT_FN_ATTRS 85_mm512_maskz_set1_epi32(__mmask16 __M, int __A) 86{ 87 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, 88 (__v16si) 89 _mm512_setzero_si512 (), 90 __M); 91} 92 93static __inline __m512i __DEFAULT_FN_ATTRS 94_mm512_maskz_set1_epi64(__mmask8 __M, long long __A) 95{ 96#ifdef __x86_64__ 97 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, 98 (__v8di) 99 _mm512_setzero_si512 (), 100 __M); 101#else 102 return (__m512i) __builtin_ia32_pbroadcastq512_mem_mask (__A, 103 (__v8di) 104 _mm512_setzero_si512 (), 105 __M); 106#endif 107} 108 109static __inline __m512 __DEFAULT_FN_ATTRS 110_mm512_setzero_ps(void) 111{ 112 return (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 113 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; 114} 115static __inline __m512d __DEFAULT_FN_ATTRS 116_mm512_setzero_pd(void) 117{ 118 return (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; 119} 120 121static __inline __m512 __DEFAULT_FN_ATTRS 122_mm512_set1_ps(float __w) 123{ 124 return (__m512){ __w, __w, __w, __w, __w, __w, __w, __w, 125 __w, __w, __w, __w, __w, __w, __w, __w }; 126} 127 128static __inline __m512d __DEFAULT_FN_ATTRS 129_mm512_set1_pd(double __w) 130{ 131 return (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w }; 132} 133 134static __inline __m512i __DEFAULT_FN_ATTRS 135_mm512_set1_epi32(int __s) 136{ 137 return (__m512i)(__v16si){ __s, __s, __s, __s, __s, __s, __s, __s, 138 __s, __s, __s, __s, __s, __s, __s, __s }; 139} 140 141static __inline __m512i __DEFAULT_FN_ATTRS 142_mm512_set1_epi64(long long __d) 143{ 144 return (__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d }; 145} 146 147static __inline__ __m512 __DEFAULT_FN_ATTRS 148_mm512_broadcastss_ps(__m128 __X) 149{ 150 float __f = __X[0]; 151 return (__v16sf){ __f, __f, __f, __f, 152 __f, __f, __f, __f, 153 __f, __f, __f, __f, 154 __f, __f, __f, __f }; 155} 156 157static __inline__ __m512d __DEFAULT_FN_ATTRS 158_mm512_broadcastsd_pd(__m128d __X) 159{ 160 double __d = __X[0]; 161 return (__v8df){ __d, __d, __d, __d, 162 __d, __d, __d, __d }; 163} 164 165/* Cast between vector types */ 166 167static __inline __m512d __DEFAULT_FN_ATTRS 168_mm512_castpd256_pd512(__m256d __a) 169{ 170 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1); 171} 172 173static __inline __m512 __DEFAULT_FN_ATTRS 174_mm512_castps256_ps512(__m256 __a) 175{ 176 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 177 -1, -1, -1, -1, -1, -1, -1, -1); 178} 179 180static __inline __m128d __DEFAULT_FN_ATTRS 181_mm512_castpd512_pd128(__m512d __a) 182{ 183 return __builtin_shufflevector(__a, __a, 0, 1); 184} 185 186static __inline __m128 __DEFAULT_FN_ATTRS 187_mm512_castps512_ps128(__m512 __a) 188{ 189 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); 190} 191 192/* Bitwise operators */ 193static __inline__ __m512i __DEFAULT_FN_ATTRS 194_mm512_and_epi32(__m512i __a, __m512i __b) 195{ 196 return __a & __b; 197} 198 199static __inline__ __m512i __DEFAULT_FN_ATTRS 200_mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) 201{ 202 return (__m512i) __builtin_ia32_pandd512_mask((__v16si) __a, 203 (__v16si) __b, 204 (__v16si) __src, 205 (__mmask16) __k); 206} 207static __inline__ __m512i __DEFAULT_FN_ATTRS 208_mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b) 209{ 210 return (__m512i) __builtin_ia32_pandd512_mask((__v16si) __a, 211 (__v16si) __b, 212 (__v16si) 213 _mm512_setzero_si512 (), 214 (__mmask16) __k); 215} 216 217static __inline__ __m512i __DEFAULT_FN_ATTRS 218_mm512_and_epi64(__m512i __a, __m512i __b) 219{ 220 return __a & __b; 221} 222 223static __inline__ __m512i __DEFAULT_FN_ATTRS 224_mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) 225{ 226 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __a, 227 (__v8di) __b, 228 (__v8di) __src, 229 (__mmask8) __k); 230} 231static __inline__ __m512i __DEFAULT_FN_ATTRS 232_mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b) 233{ 234 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __a, 235 (__v8di) __b, 236 (__v8di) 237 _mm512_setzero_si512 (), 238 (__mmask8) __k); 239} 240 241static __inline__ __m512i __DEFAULT_FN_ATTRS 242_mm512_andnot_epi32 (__m512i __A, __m512i __B) 243{ 244 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, 245 (__v16si) __B, 246 (__v16si) 247 _mm512_setzero_si512 (), 248 (__mmask16) -1); 249} 250 251static __inline__ __m512i __DEFAULT_FN_ATTRS 252_mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 253{ 254 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, 255 (__v16si) __B, 256 (__v16si) __W, 257 (__mmask16) __U); 258} 259 260static __inline__ __m512i __DEFAULT_FN_ATTRS 261_mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 262{ 263 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, 264 (__v16si) __B, 265 (__v16si) 266 _mm512_setzero_si512 (), 267 (__mmask16) __U); 268} 269 270static __inline__ __m512i __DEFAULT_FN_ATTRS 271_mm512_andnot_epi64 (__m512i __A, __m512i __B) 272{ 273 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A, 274 (__v8di) __B, 275 (__v8di) 276 _mm512_setzero_si512 (), 277 (__mmask8) -1); 278} 279 280static __inline__ __m512i __DEFAULT_FN_ATTRS 281_mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 282{ 283 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A, 284 (__v8di) __B, 285 (__v8di) __W, __U); 286} 287 288static __inline__ __m512i __DEFAULT_FN_ATTRS 289_mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 290{ 291 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A, 292 (__v8di) __B, 293 (__v8di) 294 _mm512_setzero_pd (), 295 __U); 296} 297static __inline__ __m512i __DEFAULT_FN_ATTRS 298_mm512_or_epi32(__m512i __a, __m512i __b) 299{ 300 return __a | __b; 301} 302 303static __inline__ __m512i __DEFAULT_FN_ATTRS 304_mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) 305{ 306 return (__m512i) __builtin_ia32_pord512_mask((__v16si) __a, 307 (__v16si) __b, 308 (__v16si) __src, 309 (__mmask16) __k); 310} 311static __inline__ __m512i __DEFAULT_FN_ATTRS 312_mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b) 313{ 314 return (__m512i) __builtin_ia32_pord512_mask((__v16si) __a, 315 (__v16si) __b, 316 (__v16si) 317 _mm512_setzero_si512 (), 318 (__mmask16) __k); 319} 320 321static __inline__ __m512i __DEFAULT_FN_ATTRS 322_mm512_or_epi64(__m512i __a, __m512i __b) 323{ 324 return __a | __b; 325} 326 327static __inline__ __m512i __DEFAULT_FN_ATTRS 328_mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) 329{ 330 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __a, 331 (__v8di) __b, 332 (__v8di) __src, 333 (__mmask8) __k); 334} 335static __inline__ __m512i __DEFAULT_FN_ATTRS 336_mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b) 337{ 338 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __a, 339 (__v8di) __b, 340 (__v8di) 341 _mm512_setzero_si512 (), 342 (__mmask8) __k); 343} 344 345static __inline__ __m512i __DEFAULT_FN_ATTRS 346_mm512_xor_epi32(__m512i __a, __m512i __b) 347{ 348 return __a ^ __b; 349} 350 351static __inline__ __m512i __DEFAULT_FN_ATTRS 352_mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) 353{ 354 return (__m512i) __builtin_ia32_pxord512_mask((__v16si) __a, 355 (__v16si) __b, 356 (__v16si) __src, 357 (__mmask16) __k); 358} 359static __inline__ __m512i __DEFAULT_FN_ATTRS 360_mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b) 361{ 362 return (__m512i) __builtin_ia32_pxord512_mask((__v16si) __a, 363 (__v16si) __b, 364 (__v16si) 365 _mm512_setzero_si512 (), 366 (__mmask16) __k); 367} 368 369static __inline__ __m512i __DEFAULT_FN_ATTRS 370_mm512_xor_epi64(__m512i __a, __m512i __b) 371{ 372 return __a ^ __b; 373} 374 375static __inline__ __m512i __DEFAULT_FN_ATTRS 376_mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) 377{ 378 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __a, 379 (__v8di) __b, 380 (__v8di) __src, 381 (__mmask8) __k); 382} 383static __inline__ __m512i __DEFAULT_FN_ATTRS 384_mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b) 385{ 386 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __a, 387 (__v8di) __b, 388 (__v8di) 389 _mm512_setzero_si512 (), 390 (__mmask8) __k); 391} 392 393static __inline__ __m512i __DEFAULT_FN_ATTRS 394_mm512_and_si512(__m512i __a, __m512i __b) 395{ 396 return __a & __b; 397} 398 399static __inline__ __m512i __DEFAULT_FN_ATTRS 400_mm512_or_si512(__m512i __a, __m512i __b) 401{ 402 return __a | __b; 403} 404 405static __inline__ __m512i __DEFAULT_FN_ATTRS 406_mm512_xor_si512(__m512i __a, __m512i __b) 407{ 408 return __a ^ __b; 409} 410/* Arithmetic */ 411 412static __inline __m512d __DEFAULT_FN_ATTRS 413_mm512_add_pd(__m512d __a, __m512d __b) 414{ 415 return __a + __b; 416} 417 418static __inline __m512 __DEFAULT_FN_ATTRS 419_mm512_add_ps(__m512 __a, __m512 __b) 420{ 421 return __a + __b; 422} 423 424static __inline __m512d __DEFAULT_FN_ATTRS 425_mm512_mul_pd(__m512d __a, __m512d __b) 426{ 427 return __a * __b; 428} 429 430static __inline __m512 __DEFAULT_FN_ATTRS 431_mm512_mul_ps(__m512 __a, __m512 __b) 432{ 433 return __a * __b; 434} 435 436static __inline __m512d __DEFAULT_FN_ATTRS 437_mm512_sub_pd(__m512d __a, __m512d __b) 438{ 439 return __a - __b; 440} 441 442static __inline __m512 __DEFAULT_FN_ATTRS 443_mm512_sub_ps(__m512 __a, __m512 __b) 444{ 445 return __a - __b; 446} 447 448static __inline__ __m512i __DEFAULT_FN_ATTRS 449_mm512_add_epi64 (__m512i __A, __m512i __B) 450{ 451 return (__m512i) ((__v8di) __A + (__v8di) __B); 452} 453 454static __inline__ __m512i __DEFAULT_FN_ATTRS 455_mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 456{ 457 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A, 458 (__v8di) __B, 459 (__v8di) __W, 460 (__mmask8) __U); 461} 462 463static __inline__ __m512i __DEFAULT_FN_ATTRS 464_mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 465{ 466 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A, 467 (__v8di) __B, 468 (__v8di) 469 _mm512_setzero_si512 (), 470 (__mmask8) __U); 471} 472 473static __inline__ __m512i __DEFAULT_FN_ATTRS 474_mm512_sub_epi64 (__m512i __A, __m512i __B) 475{ 476 return (__m512i) ((__v8di) __A - (__v8di) __B); 477} 478 479static __inline__ __m512i __DEFAULT_FN_ATTRS 480_mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 481{ 482 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A, 483 (__v8di) __B, 484 (__v8di) __W, 485 (__mmask8) __U); 486} 487 488static __inline__ __m512i __DEFAULT_FN_ATTRS 489_mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 490{ 491 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A, 492 (__v8di) __B, 493 (__v8di) 494 _mm512_setzero_si512 (), 495 (__mmask8) __U); 496} 497 498static __inline__ __m512i __DEFAULT_FN_ATTRS 499_mm512_add_epi32 (__m512i __A, __m512i __B) 500{ 501 return (__m512i) ((__v16si) __A + (__v16si) __B); 502} 503 504static __inline__ __m512i __DEFAULT_FN_ATTRS 505_mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 506{ 507 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A, 508 (__v16si) __B, 509 (__v16si) __W, 510 (__mmask16) __U); 511} 512 513static __inline__ __m512i __DEFAULT_FN_ATTRS 514_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 515{ 516 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A, 517 (__v16si) __B, 518 (__v16si) 519 _mm512_setzero_si512 (), 520 (__mmask16) __U); 521} 522 523static __inline__ __m512i __DEFAULT_FN_ATTRS 524_mm512_sub_epi32 (__m512i __A, __m512i __B) 525{ 526 return (__m512i) ((__v16si) __A - (__v16si) __B); 527} 528 529static __inline__ __m512i __DEFAULT_FN_ATTRS 530_mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 531{ 532 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A, 533 (__v16si) __B, 534 (__v16si) __W, 535 (__mmask16) __U); 536} 537 538static __inline__ __m512i __DEFAULT_FN_ATTRS 539_mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 540{ 541 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A, 542 (__v16si) __B, 543 (__v16si) 544 _mm512_setzero_si512 (), 545 (__mmask16) __U); 546} 547 548static __inline__ __m512d __DEFAULT_FN_ATTRS 549_mm512_max_pd(__m512d __A, __m512d __B) 550{ 551 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, 552 (__v8df) __B, 553 (__v8df) 554 _mm512_setzero_pd (), 555 (__mmask8) -1, 556 _MM_FROUND_CUR_DIRECTION); 557} 558 559static __inline__ __m512 __DEFAULT_FN_ATTRS 560_mm512_max_ps(__m512 __A, __m512 __B) 561{ 562 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, 563 (__v16sf) __B, 564 (__v16sf) 565 _mm512_setzero_ps (), 566 (__mmask16) -1, 567 _MM_FROUND_CUR_DIRECTION); 568} 569 570static __inline__ __m128 __DEFAULT_FN_ATTRS 571_mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 572 return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, 573 (__v4sf) __B, 574 (__v4sf) __W, 575 (__mmask8) __U, 576 _MM_FROUND_CUR_DIRECTION); 577} 578 579static __inline__ __m128 __DEFAULT_FN_ATTRS 580_mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) { 581 return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, 582 (__v4sf) __B, 583 (__v4sf) _mm_setzero_ps (), 584 (__mmask8) __U, 585 _MM_FROUND_CUR_DIRECTION); 586} 587 588#define _mm_max_round_ss(__A, __B, __R) __extension__ ({ \ 589 (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, (__v4sf) __B, \ 590 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); }) 591 592#define _mm_mask_max_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \ 593 (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, (__v4sf) __B, \ 594 (__v4sf) __W, (__mmask8) __U,__R); }) 595 596#define _mm_maskz_max_round_ss(__U, __A, __B, __R) __extension__ ({ \ 597 (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, (__v4sf) __B, \ 598 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); }) 599 600static __inline__ __m128d __DEFAULT_FN_ATTRS 601_mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 602 return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, 603 (__v2df) __B, 604 (__v2df) __W, 605 (__mmask8) __U, 606 _MM_FROUND_CUR_DIRECTION); 607} 608 609static __inline__ __m128d __DEFAULT_FN_ATTRS 610_mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) { 611 return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, 612 (__v2df) __B, 613 (__v2df) _mm_setzero_pd (), 614 (__mmask8) __U, 615 _MM_FROUND_CUR_DIRECTION); 616} 617 618#define _mm_max_round_sd(__A, __B, __R) __extension__ ({ \ 619 (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, (__v2df) __B, \ 620 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); }) 621 622#define _mm_mask_max_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \ 623 (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, (__v2df) __B, \ 624 (__v2df) __W, (__mmask8) __U,__R); }) 625 626#define _mm_maskz_max_round_sd(__U, __A, __B, __R) __extension__ ({ \ 627 (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, (__v2df) __B, \ 628 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); }) 629 630static __inline __m512i 631__DEFAULT_FN_ATTRS 632_mm512_max_epi32(__m512i __A, __m512i __B) 633{ 634 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A, 635 (__v16si) __B, 636 (__v16si) 637 _mm512_setzero_si512 (), 638 (__mmask16) -1); 639} 640 641static __inline __m512i __DEFAULT_FN_ATTRS 642_mm512_max_epu32(__m512i __A, __m512i __B) 643{ 644 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A, 645 (__v16si) __B, 646 (__v16si) 647 _mm512_setzero_si512 (), 648 (__mmask16) -1); 649} 650 651static __inline __m512i __DEFAULT_FN_ATTRS 652_mm512_max_epi64(__m512i __A, __m512i __B) 653{ 654 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A, 655 (__v8di) __B, 656 (__v8di) 657 _mm512_setzero_si512 (), 658 (__mmask8) -1); 659} 660 661static __inline __m512i __DEFAULT_FN_ATTRS 662_mm512_max_epu64(__m512i __A, __m512i __B) 663{ 664 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A, 665 (__v8di) __B, 666 (__v8di) 667 _mm512_setzero_si512 (), 668 (__mmask8) -1); 669} 670 671static __inline__ __m512d __DEFAULT_FN_ATTRS 672_mm512_min_pd(__m512d __A, __m512d __B) 673{ 674 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, 675 (__v8df) __B, 676 (__v8df) 677 _mm512_setzero_pd (), 678 (__mmask8) -1, 679 _MM_FROUND_CUR_DIRECTION); 680} 681 682static __inline__ __m512 __DEFAULT_FN_ATTRS 683_mm512_min_ps(__m512 __A, __m512 __B) 684{ 685 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, 686 (__v16sf) __B, 687 (__v16sf) 688 _mm512_setzero_ps (), 689 (__mmask16) -1, 690 _MM_FROUND_CUR_DIRECTION); 691} 692 693static __inline__ __m128 __DEFAULT_FN_ATTRS 694_mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 695 return (__m128) __builtin_ia32_minss_round ((__v4sf) __A, 696 (__v4sf) __B, 697 (__v4sf) __W, 698 (__mmask8) __U, 699 _MM_FROUND_CUR_DIRECTION); 700} 701 702static __inline__ __m128 __DEFAULT_FN_ATTRS 703_mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) { 704 return (__m128) __builtin_ia32_minss_round ((__v4sf) __A, 705 (__v4sf) __B, 706 (__v4sf) _mm_setzero_ps (), 707 (__mmask8) __U, 708 _MM_FROUND_CUR_DIRECTION); 709} 710 711#define _mm_min_round_ss(__A, __B, __R) __extension__ ({ \ 712 (__m128) __builtin_ia32_minss_round ((__v4sf) __A, (__v4sf) __B, \ 713 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); }) 714 715#define _mm_mask_min_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \ 716 (__m128) __builtin_ia32_minss_round ((__v4sf) __A, (__v4sf) __B, \ 717 (__v4sf) __W, (__mmask8) __U,__R); }) 718 719#define _mm_maskz_min_round_ss(__U, __A, __B, __R) __extension__ ({ \ 720 (__m128) __builtin_ia32_minss_round ((__v4sf) __A, (__v4sf) __B, \ 721 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); }) 722 723static __inline__ __m128d __DEFAULT_FN_ATTRS 724_mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 725 return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, 726 (__v2df) __B, 727 (__v2df) __W, 728 (__mmask8) __U, 729 _MM_FROUND_CUR_DIRECTION); 730} 731 732static __inline__ __m128d __DEFAULT_FN_ATTRS 733_mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) { 734 return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, 735 (__v2df) __B, 736 (__v2df) _mm_setzero_pd (), 737 (__mmask8) __U, 738 _MM_FROUND_CUR_DIRECTION); 739} 740 741#define _mm_min_round_sd(__A, __B, __R) __extension__ ({ \ 742 (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, (__v2df) __B, \ 743 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); }) 744 745#define _mm_mask_min_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \ 746 (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, (__v2df) __B, \ 747 (__v2df) __W, (__mmask8) __U,__R); }) 748 749#define _mm_maskz_min_round_sd(__U, __A, __B, __R) __extension__ ({ \ 750 (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, (__v2df) __B, \ 751 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); }) 752 753static __inline __m512i 754__DEFAULT_FN_ATTRS 755_mm512_min_epi32(__m512i __A, __m512i __B) 756{ 757 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A, 758 (__v16si) __B, 759 (__v16si) 760 _mm512_setzero_si512 (), 761 (__mmask16) -1); 762} 763 764static __inline __m512i __DEFAULT_FN_ATTRS 765_mm512_min_epu32(__m512i __A, __m512i __B) 766{ 767 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A, 768 (__v16si) __B, 769 (__v16si) 770 _mm512_setzero_si512 (), 771 (__mmask16) -1); 772} 773 774static __inline __m512i __DEFAULT_FN_ATTRS 775_mm512_min_epi64(__m512i __A, __m512i __B) 776{ 777 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A, 778 (__v8di) __B, 779 (__v8di) 780 _mm512_setzero_si512 (), 781 (__mmask8) -1); 782} 783 784static __inline __m512i __DEFAULT_FN_ATTRS 785_mm512_min_epu64(__m512i __A, __m512i __B) 786{ 787 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A, 788 (__v8di) __B, 789 (__v8di) 790 _mm512_setzero_si512 (), 791 (__mmask8) -1); 792} 793 794static __inline __m512i __DEFAULT_FN_ATTRS 795_mm512_mul_epi32(__m512i __X, __m512i __Y) 796{ 797 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, 798 (__v16si) __Y, 799 (__v8di) 800 _mm512_setzero_si512 (), 801 (__mmask8) -1); 802} 803 804static __inline __m512i __DEFAULT_FN_ATTRS 805_mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) 806{ 807 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, 808 (__v16si) __Y, 809 (__v8di) __W, __M); 810} 811 812static __inline __m512i __DEFAULT_FN_ATTRS 813_mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y) 814{ 815 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, 816 (__v16si) __Y, 817 (__v8di) 818 _mm512_setzero_si512 (), 819 __M); 820} 821 822static __inline __m512i __DEFAULT_FN_ATTRS 823_mm512_mul_epu32(__m512i __X, __m512i __Y) 824{ 825 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, 826 (__v16si) __Y, 827 (__v8di) 828 _mm512_setzero_si512 (), 829 (__mmask8) -1); 830} 831 832static __inline __m512i __DEFAULT_FN_ATTRS 833_mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) 834{ 835 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, 836 (__v16si) __Y, 837 (__v8di) __W, __M); 838} 839 840static __inline __m512i __DEFAULT_FN_ATTRS 841_mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y) 842{ 843 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, 844 (__v16si) __Y, 845 (__v8di) 846 _mm512_setzero_si512 (), 847 __M); 848} 849 850static __inline __m512i __DEFAULT_FN_ATTRS 851_mm512_mullo_epi32 (__m512i __A, __m512i __B) 852{ 853 return (__m512i) ((__v16si) __A * (__v16si) __B); 854} 855 856static __inline __m512i __DEFAULT_FN_ATTRS 857_mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B) 858{ 859 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A, 860 (__v16si) __B, 861 (__v16si) 862 _mm512_setzero_si512 (), 863 __M); 864} 865 866static __inline __m512i __DEFAULT_FN_ATTRS 867_mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 868{ 869 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A, 870 (__v16si) __B, 871 (__v16si) __W, __M); 872} 873 874static __inline__ __m512d __DEFAULT_FN_ATTRS 875_mm512_sqrt_pd(__m512d __a) 876{ 877 return (__m512d)__builtin_ia32_sqrtpd512_mask((__v8df)__a, 878 (__v8df) _mm512_setzero_pd (), 879 (__mmask8) -1, 880 _MM_FROUND_CUR_DIRECTION); 881} 882 883static __inline__ __m512 __DEFAULT_FN_ATTRS 884_mm512_sqrt_ps(__m512 __a) 885{ 886 return (__m512)__builtin_ia32_sqrtps512_mask((__v16sf)__a, 887 (__v16sf) _mm512_setzero_ps (), 888 (__mmask16) -1, 889 _MM_FROUND_CUR_DIRECTION); 890} 891 892static __inline__ __m512d __DEFAULT_FN_ATTRS 893_mm512_rsqrt14_pd(__m512d __A) 894{ 895 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, 896 (__v8df) 897 _mm512_setzero_pd (), 898 (__mmask8) -1);} 899 900static __inline__ __m512 __DEFAULT_FN_ATTRS 901_mm512_rsqrt14_ps(__m512 __A) 902{ 903 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, 904 (__v16sf) 905 _mm512_setzero_ps (), 906 (__mmask16) -1); 907} 908 909static __inline__ __m128 __DEFAULT_FN_ATTRS 910_mm_rsqrt14_ss(__m128 __A, __m128 __B) 911{ 912 return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __A, 913 (__v4sf) __B, 914 (__v4sf) 915 _mm_setzero_ps (), 916 (__mmask8) -1); 917} 918 919static __inline__ __m128d __DEFAULT_FN_ATTRS 920_mm_rsqrt14_sd(__m128d __A, __m128d __B) 921{ 922 return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __A, 923 (__v2df) __B, 924 (__v2df) 925 _mm_setzero_pd (), 926 (__mmask8) -1); 927} 928 929static __inline__ __m512d __DEFAULT_FN_ATTRS 930_mm512_rcp14_pd(__m512d __A) 931{ 932 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, 933 (__v8df) 934 _mm512_setzero_pd (), 935 (__mmask8) -1); 936} 937 938static __inline__ __m512 __DEFAULT_FN_ATTRS 939_mm512_rcp14_ps(__m512 __A) 940{ 941 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, 942 (__v16sf) 943 _mm512_setzero_ps (), 944 (__mmask16) -1); 945} 946static __inline__ __m128 __DEFAULT_FN_ATTRS 947_mm_rcp14_ss(__m128 __A, __m128 __B) 948{ 949 return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __A, 950 (__v4sf) __B, 951 (__v4sf) 952 _mm_setzero_ps (), 953 (__mmask8) -1); 954} 955 956static __inline__ __m128d __DEFAULT_FN_ATTRS 957_mm_rcp14_sd(__m128d __A, __m128d __B) 958{ 959 return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __A, 960 (__v2df) __B, 961 (__v2df) 962 _mm_setzero_pd (), 963 (__mmask8) -1); 964} 965 966static __inline __m512 __DEFAULT_FN_ATTRS 967_mm512_floor_ps(__m512 __A) 968{ 969 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 970 _MM_FROUND_FLOOR, 971 (__v16sf) __A, -1, 972 _MM_FROUND_CUR_DIRECTION); 973} 974 975static __inline __m512d __DEFAULT_FN_ATTRS 976_mm512_floor_pd(__m512d __A) 977{ 978 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 979 _MM_FROUND_FLOOR, 980 (__v8df) __A, -1, 981 _MM_FROUND_CUR_DIRECTION); 982} 983 984static __inline __m512 __DEFAULT_FN_ATTRS 985_mm512_ceil_ps(__m512 __A) 986{ 987 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 988 _MM_FROUND_CEIL, 989 (__v16sf) __A, -1, 990 _MM_FROUND_CUR_DIRECTION); 991} 992 993static __inline __m512d __DEFAULT_FN_ATTRS 994_mm512_ceil_pd(__m512d __A) 995{ 996 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 997 _MM_FROUND_CEIL, 998 (__v8df) __A, -1, 999 _MM_FROUND_CUR_DIRECTION); 1000} 1001 1002static __inline __m512i __DEFAULT_FN_ATTRS 1003_mm512_abs_epi64(__m512i __A) 1004{ 1005 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A, 1006 (__v8di) 1007 _mm512_setzero_si512 (), 1008 (__mmask8) -1); 1009} 1010 1011static __inline __m512i __DEFAULT_FN_ATTRS 1012_mm512_abs_epi32(__m512i __A) 1013{ 1014 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A, 1015 (__v16si) 1016 _mm512_setzero_si512 (), 1017 (__mmask16) -1); 1018} 1019 1020static __inline__ __m128 __DEFAULT_FN_ATTRS 1021_mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 1022 return (__m128) __builtin_ia32_addss_round ((__v4sf) __A, 1023 (__v4sf) __B, 1024 (__v4sf) __W, 1025 (__mmask8) __U, 1026 _MM_FROUND_CUR_DIRECTION); 1027} 1028 1029static __inline__ __m128 __DEFAULT_FN_ATTRS 1030_mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) { 1031 return (__m128) __builtin_ia32_addss_round ((__v4sf) __A, 1032 (__v4sf) __B, 1033 (__v4sf) _mm_setzero_ps (), 1034 (__mmask8) __U, 1035 _MM_FROUND_CUR_DIRECTION); 1036} 1037 1038#define _mm_add_round_ss(__A, __B, __R) __extension__ ({ \ 1039 (__m128) __builtin_ia32_addss_round ((__v4sf) __A, (__v4sf) __B, \ 1040 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); }) 1041 1042#define _mm_mask_add_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \ 1043 (__m128) __builtin_ia32_addss_round ((__v4sf) __A, (__v4sf) __B, \ 1044 (__v4sf) __W, (__mmask8) __U,__R); }) 1045 1046#define _mm_maskz_add_round_ss(__U, __A, __B, __R) __extension__ ({ \ 1047 (__m128) __builtin_ia32_addss_round ((__v4sf) __A, (__v4sf) __B, \ 1048 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); }) 1049 1050static __inline__ __m128d __DEFAULT_FN_ATTRS 1051_mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 1052 return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, 1053 (__v2df) __B, 1054 (__v2df) __W, 1055 (__mmask8) __U, 1056 _MM_FROUND_CUR_DIRECTION); 1057} 1058 1059static __inline__ __m128d __DEFAULT_FN_ATTRS 1060_mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) { 1061 return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, 1062 (__v2df) __B, 1063 (__v2df) _mm_setzero_pd (), 1064 (__mmask8) __U, 1065 _MM_FROUND_CUR_DIRECTION); 1066} 1067#define _mm_add_round_sd(__A, __B, __R) __extension__ ({ \ 1068 (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, (__v2df) __B, \ 1069 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); }) 1070 1071#define _mm_mask_add_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \ 1072 (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, (__v2df) __B, \ 1073 (__v2df) __W, (__mmask8) __U,__R); }) 1074 1075#define _mm_maskz_add_round_sd(__U, __A, __B, __R) __extension__ ({ \ 1076 (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, (__v2df) __B, \ 1077 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); }) 1078 1079static __inline__ __m512d __DEFAULT_FN_ATTRS 1080_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 1081 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, 1082 (__v8df) __B, 1083 (__v8df) __W, 1084 (__mmask8) __U, 1085 _MM_FROUND_CUR_DIRECTION); 1086} 1087 1088static __inline__ __m512d __DEFAULT_FN_ATTRS 1089_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) { 1090 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, 1091 (__v8df) __B, 1092 (__v8df) _mm512_setzero_pd (), 1093 (__mmask8) __U, 1094 _MM_FROUND_CUR_DIRECTION); 1095} 1096 1097static __inline__ __m512 __DEFAULT_FN_ATTRS 1098_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 1099 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, 1100 (__v16sf) __B, 1101 (__v16sf) __W, 1102 (__mmask16) __U, 1103 _MM_FROUND_CUR_DIRECTION); 1104} 1105 1106static __inline__ __m512 __DEFAULT_FN_ATTRS 1107_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) { 1108 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, 1109 (__v16sf) __B, 1110 (__v16sf) _mm512_setzero_ps (), 1111 (__mmask16) __U, 1112 _MM_FROUND_CUR_DIRECTION); 1113} 1114 1115#define _mm512_add_round_pd(__A, __B, __R) __extension__ ({ \ 1116 (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, (__v8df) __B, \ 1117 (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); }) 1118 1119#define _mm512_mask_add_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \ 1120 (__m512d) __builtin_ia32_addpd512_mask((__v8df) __A, (__v8df) __B, \ 1121 (__v8df) __W, (__mmask8) __U, __R); }) 1122 1123#define _mm512_maskz_add_round_pd(__U, __A, __B, __R) __extension__ ({ \ 1124 (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, (__v8df) __B, \ 1125 (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R); }) 1126 1127#define _mm512_add_round_ps(__A, __B, __R) __extension__ ({ \ 1128 (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \ 1129 (__v16sf) _mm512_setzero_ps(), (__mmask16) -1, __R); }) 1130 1131#define _mm512_mask_add_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \ 1132 (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \ 1133 (__v16sf) __W, (__mmask16)__U, __R); }) 1134 1135#define _mm512_maskz_add_round_ps(__U, __A, __B, __R) __extension__ ({ \ 1136 (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, (__v16sf) __B, \ 1137 (__v16sf) _mm512_setzero_ps(), (__mmask16)__U, __R); }) 1138 1139static __inline__ __m128 __DEFAULT_FN_ATTRS 1140_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 1141 return (__m128) __builtin_ia32_subss_round ((__v4sf) __A, 1142 (__v4sf) __B, 1143 (__v4sf) __W, 1144 (__mmask8) __U, 1145 _MM_FROUND_CUR_DIRECTION); 1146} 1147 1148static __inline__ __m128 __DEFAULT_FN_ATTRS 1149_mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) { 1150 return (__m128) __builtin_ia32_subss_round ((__v4sf) __A, 1151 (__v4sf) __B, 1152 (__v4sf) _mm_setzero_ps (), 1153 (__mmask8) __U, 1154 _MM_FROUND_CUR_DIRECTION); 1155} 1156#define _mm_sub_round_ss(__A, __B, __R) __extension__ ({ \ 1157 (__m128) __builtin_ia32_subss_round ((__v4sf) __A, (__v4sf) __B, \ 1158 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); }) 1159 1160#define _mm_mask_sub_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \ 1161 (__m128) __builtin_ia32_subss_round ((__v4sf) __A, (__v4sf) __B, \ 1162 (__v4sf) __W, (__mmask8) __U,__R); }) 1163 1164#define _mm_maskz_sub_round_ss(__U, __A, __B, __R) __extension__ ({ \ 1165 (__m128) __builtin_ia32_subss_round ((__v4sf) __A, (__v4sf) __B, \ 1166 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); }) 1167 1168static __inline__ __m128d __DEFAULT_FN_ATTRS 1169_mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 1170 return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, 1171 (__v2df) __B, 1172 (__v2df) __W, 1173 (__mmask8) __U, 1174 _MM_FROUND_CUR_DIRECTION); 1175} 1176 1177static __inline__ __m128d __DEFAULT_FN_ATTRS 1178_mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) { 1179 return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, 1180 (__v2df) __B, 1181 (__v2df) _mm_setzero_pd (), 1182 (__mmask8) __U, 1183 _MM_FROUND_CUR_DIRECTION); 1184} 1185 1186#define _mm_sub_round_sd(__A, __B, __R) __extension__ ({ \ 1187 (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, (__v2df) __B, \ 1188 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); }) 1189 1190#define _mm_mask_sub_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \ 1191 (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, (__v2df) __B, \ 1192 (__v2df) __W, (__mmask8) __U,__R); }) 1193 1194#define _mm_maskz_sub_round_sd(__U, __A, __B, __R) __extension__ ({ \ 1195 (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, (__v2df) __B, \ 1196 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); }) 1197 1198static __inline__ __m512d __DEFAULT_FN_ATTRS 1199_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 1200 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, 1201 (__v8df) __B, 1202 (__v8df) __W, 1203 (__mmask8) __U, 1204 _MM_FROUND_CUR_DIRECTION); 1205} 1206 1207static __inline__ __m512d __DEFAULT_FN_ATTRS 1208_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) { 1209 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, 1210 (__v8df) __B, 1211 (__v8df) 1212 _mm512_setzero_pd (), 1213 (__mmask8) __U, 1214 _MM_FROUND_CUR_DIRECTION); 1215} 1216 1217static __inline__ __m512 __DEFAULT_FN_ATTRS 1218_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 1219 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, 1220 (__v16sf) __B, 1221 (__v16sf) __W, 1222 (__mmask16) __U, 1223 _MM_FROUND_CUR_DIRECTION); 1224} 1225 1226static __inline__ __m512 __DEFAULT_FN_ATTRS 1227_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) { 1228 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, 1229 (__v16sf) __B, 1230 (__v16sf) 1231 _mm512_setzero_ps (), 1232 (__mmask16) __U, 1233 _MM_FROUND_CUR_DIRECTION); 1234} 1235 1236#define _mm512_sub_round_pd(__A, __B, __R) __extension__ ({ \ 1237 (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B,\ 1238 (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); }) 1239 1240#define _mm512_mask_sub_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \ 1241 (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B, \ 1242 (__v8df) __W, (__mmask8) __U, __R); }) 1243 1244#define _mm512_maskz_sub_round_pd(__U, __A, __B, __R) __extension__ ({ \ 1245 (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, (__v8df) __B, \ 1246 (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);}) 1247 1248#define _mm512_sub_round_ps(__A, __B, __R) __extension__ ({ \ 1249 (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \ 1250 (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);}) 1251 1252#define _mm512_mask_sub_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \ 1253 (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \ 1254 (__v16sf) __W, (__mmask16) __U, __R); }); 1255 1256#define _mm512_maskz_sub_round_ps(__U, __A, __B, __R) __extension__ ({ \ 1257 (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, (__v16sf) __B, \ 1258 (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);}); 1259 1260static __inline__ __m128 __DEFAULT_FN_ATTRS 1261_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 1262 return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, 1263 (__v4sf) __B, 1264 (__v4sf) __W, 1265 (__mmask8) __U, 1266 _MM_FROUND_CUR_DIRECTION); 1267} 1268 1269static __inline__ __m128 __DEFAULT_FN_ATTRS 1270_mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) { 1271 return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, 1272 (__v4sf) __B, 1273 (__v4sf) _mm_setzero_ps (), 1274 (__mmask8) __U, 1275 _MM_FROUND_CUR_DIRECTION); 1276} 1277#define _mm_mul_round_ss(__A, __B, __R) __extension__ ({ \ 1278 (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, (__v4sf) __B, \ 1279 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); }) 1280 1281#define _mm_mask_mul_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \ 1282 (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, (__v4sf) __B, \ 1283 (__v4sf) __W, (__mmask8) __U,__R); }) 1284 1285#define _mm_maskz_mul_round_ss(__U, __A, __B, __R) __extension__ ({ \ 1286 (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, (__v4sf) __B, \ 1287 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); }) 1288 1289static __inline__ __m128d __DEFAULT_FN_ATTRS 1290_mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 1291 return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, 1292 (__v2df) __B, 1293 (__v2df) __W, 1294 (__mmask8) __U, 1295 _MM_FROUND_CUR_DIRECTION); 1296} 1297 1298static __inline__ __m128d __DEFAULT_FN_ATTRS 1299_mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) { 1300 return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, 1301 (__v2df) __B, 1302 (__v2df) _mm_setzero_pd (), 1303 (__mmask8) __U, 1304 _MM_FROUND_CUR_DIRECTION); 1305} 1306 1307#define _mm_mul_round_sd(__A, __B, __R) __extension__ ({ \ 1308 (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, (__v2df) __B, \ 1309 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); }) 1310 1311#define _mm_mask_mul_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \ 1312 (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, (__v2df) __B, \ 1313 (__v2df) __W, (__mmask8) __U,__R); }) 1314 1315#define _mm_maskz_mul_round_sd(__U, __A, __B, __R) __extension__ ({ \ 1316 (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, (__v2df) __B, \ 1317 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); }) 1318 1319static __inline__ __m512d __DEFAULT_FN_ATTRS 1320_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 1321 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, 1322 (__v8df) __B, 1323 (__v8df) __W, 1324 (__mmask8) __U, 1325 _MM_FROUND_CUR_DIRECTION); 1326} 1327 1328static __inline__ __m512d __DEFAULT_FN_ATTRS 1329_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) { 1330 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, 1331 (__v8df) __B, 1332 (__v8df) 1333 _mm512_setzero_pd (), 1334 (__mmask8) __U, 1335 _MM_FROUND_CUR_DIRECTION); 1336} 1337 1338static __inline__ __m512 __DEFAULT_FN_ATTRS 1339_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 1340 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, 1341 (__v16sf) __B, 1342 (__v16sf) __W, 1343 (__mmask16) __U, 1344 _MM_FROUND_CUR_DIRECTION); 1345} 1346 1347static __inline__ __m512 __DEFAULT_FN_ATTRS 1348_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) { 1349 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, 1350 (__v16sf) __B, 1351 (__v16sf) 1352 _mm512_setzero_ps (), 1353 (__mmask16) __U, 1354 _MM_FROUND_CUR_DIRECTION); 1355} 1356 1357#define _mm512_mul_round_pd(__A, __B, __R) __extension__ ({ \ 1358 (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B,\ 1359 (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); }) 1360 1361#define _mm512_mask_mul_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \ 1362 (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B, \ 1363 (__v8df) __W, (__mmask8) __U, __R); }) 1364 1365#define _mm512_maskz_mul_round_pd(__U, __A, __B, __R) __extension__ ({ \ 1366 (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, (__v8df) __B, \ 1367 (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);}) 1368 1369#define _mm512_mul_round_ps(__A, __B, __R) __extension__ ({ \ 1370 (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \ 1371 (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);}) 1372 1373#define _mm512_mask_mul_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \ 1374 (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \ 1375 (__v16sf) __W, (__mmask16) __U, __R); }); 1376 1377#define _mm512_maskz_mul_round_ps(__U, __A, __B, __R) __extension__ ({ \ 1378 (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, (__v16sf) __B, \ 1379 (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);}); 1380 1381static __inline__ __m128 __DEFAULT_FN_ATTRS 1382_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { 1383 return (__m128) __builtin_ia32_divss_round ((__v4sf) __A, 1384 (__v4sf) __B, 1385 (__v4sf) __W, 1386 (__mmask8) __U, 1387 _MM_FROUND_CUR_DIRECTION); 1388} 1389 1390static __inline__ __m128 __DEFAULT_FN_ATTRS 1391_mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) { 1392 return (__m128) __builtin_ia32_divss_round ((__v4sf) __A, 1393 (__v4sf) __B, 1394 (__v4sf) _mm_setzero_ps (), 1395 (__mmask8) __U, 1396 _MM_FROUND_CUR_DIRECTION); 1397} 1398 1399#define _mm_div_round_ss(__A, __B, __R) __extension__ ({ \ 1400 (__m128) __builtin_ia32_divss_round ((__v4sf) __A, (__v4sf) __B, \ 1401 (__v4sf) _mm_setzero_ps(), (__mmask8) -1, __R); }) 1402 1403#define _mm_mask_div_round_ss(__W, __U, __A, __B, __R) __extension__ ({ \ 1404 (__m128) __builtin_ia32_divss_round ((__v4sf) __A, (__v4sf) __B, \ 1405 (__v4sf) __W, (__mmask8) __U,__R); }) 1406 1407#define _mm_maskz_div_round_ss(__U, __A, __B, __R) __extension__ ({ \ 1408 (__m128) __builtin_ia32_divss_round ((__v4sf) __A, (__v4sf) __B, \ 1409 (__v4sf) _mm_setzero_ps(), (__mmask8) __U,__R); }) 1410 1411static __inline__ __m128d __DEFAULT_FN_ATTRS 1412_mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { 1413 return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, 1414 (__v2df) __B, 1415 (__v2df) __W, 1416 (__mmask8) __U, 1417 _MM_FROUND_CUR_DIRECTION); 1418} 1419 1420static __inline__ __m128d __DEFAULT_FN_ATTRS 1421_mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) { 1422 return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, 1423 (__v2df) __B, 1424 (__v2df) _mm_setzero_pd (), 1425 (__mmask8) __U, 1426 _MM_FROUND_CUR_DIRECTION); 1427} 1428 1429#define _mm_div_round_sd(__A, __B, __R) __extension__ ({ \ 1430 (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, (__v2df) __B, \ 1431 (__v2df) _mm_setzero_pd(), (__mmask8) -1, __R); }) 1432 1433#define _mm_mask_div_round_sd(__W, __U, __A, __B, __R) __extension__ ({ \ 1434 (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, (__v2df) __B, \ 1435 (__v2df) __W, (__mmask8) __U,__R); }) 1436 1437#define _mm_maskz_div_round_sd(__U, __A, __B, __R) __extension__ ({ \ 1438 (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, (__v2df) __B, \ 1439 (__v2df) _mm_setzero_pd(), (__mmask8) __U,__R); }) 1440 1441static __inline__ __m512d __DEFAULT_FN_ATTRS 1442_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { 1443 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, 1444 (__v8df) __B, 1445 (__v8df) __W, 1446 (__mmask8) __U, 1447 _MM_FROUND_CUR_DIRECTION); 1448} 1449 1450static __inline__ __m512d __DEFAULT_FN_ATTRS 1451_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) { 1452 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, 1453 (__v8df) __B, 1454 (__v8df) 1455 _mm512_setzero_pd (), 1456 (__mmask8) __U, 1457 _MM_FROUND_CUR_DIRECTION); 1458} 1459 1460static __inline__ __m512 __DEFAULT_FN_ATTRS 1461_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { 1462 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, 1463 (__v16sf) __B, 1464 (__v16sf) __W, 1465 (__mmask16) __U, 1466 _MM_FROUND_CUR_DIRECTION); 1467} 1468 1469static __inline__ __m512 __DEFAULT_FN_ATTRS 1470_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) { 1471 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, 1472 (__v16sf) __B, 1473 (__v16sf) 1474 _mm512_setzero_ps (), 1475 (__mmask16) __U, 1476 _MM_FROUND_CUR_DIRECTION); 1477} 1478 1479#define _mm512_div_round_pd(__A, __B, __R) __extension__ ({ \ 1480 (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B,\ 1481 (__v8df) _mm512_setzero_pd(), (__mmask8) -1, __R); }) 1482 1483#define _mm512_mask_div_round_pd(__W, __U, __A, __B, __R) __extension__ ({ \ 1484 (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B, \ 1485 (__v8df) __W, (__mmask8) __U, __R); }) 1486 1487#define _mm512_maskz_div_round_pd(__U, __A, __B, __R) __extension__ ({ \ 1488 (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __A, (__v8df) __B, \ 1489 (__v8df) _mm512_setzero_pd(), (__mmask8) __U, __R);}) 1490 1491#define _mm512_div_round_ps(__A, __B, __R) __extension__ ({ \ 1492 (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \ 1493 (__v16sf) _mm512_setzero_ps (), (__mmask16) -1, __R);}) 1494 1495#define _mm512_mask_div_round_ps(__W, __U, __A, __B, __R) __extension__ ({ \ 1496 (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \ 1497 (__v16sf) __W, (__mmask16) __U, __R); }); 1498 1499#define _mm512_maskz_div_round_ps(__U, __A, __B, __R) __extension__ ({ \ 1500 (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, (__v16sf) __B, \ 1501 (__v16sf) _mm512_setzero_ps (), (__mmask16) __U, __R);}); 1502 1503#define _mm512_roundscale_ps(A, B) __extension__ ({ \ 1504 (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(A), (B), (__v16sf)(A), \ 1505 -1, _MM_FROUND_CUR_DIRECTION); }) 1506 1507#define _mm512_roundscale_pd(A, B) __extension__ ({ \ 1508 (__m512d)__builtin_ia32_rndscalepd_mask((__v8df)(A), (B), (__v8df)(A), \ 1509 -1, _MM_FROUND_CUR_DIRECTION); }) 1510 1511#define _mm512_fmadd_round_pd(A, B, C, R) __extension__ ({ \ 1512 (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \ 1513 (__v8df) (B), (__v8df) (C), \ 1514 (__mmask8) -1, (R)); }) 1515 1516 1517#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) __extension__ ({ \ 1518 (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \ 1519 (__v8df) (B), (__v8df) (C), \ 1520 (__mmask8) (U), (R)); }) 1521 1522 1523#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) __extension__ ({ \ 1524 (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) (A), \ 1525 (__v8df) (B), (__v8df) (C), \ 1526 (__mmask8) (U), (R)); }) 1527 1528 1529#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) __extension__ ({ \ 1530 (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) (A), \ 1531 (__v8df) (B), (__v8df) (C), \ 1532 (__mmask8) (U), (R)); }) 1533 1534 1535#define _mm512_fmsub_round_pd(A, B, C, R) __extension__ ({ \ 1536 (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \ 1537 (__v8df) (B), -(__v8df) (C), \ 1538 (__mmask8) -1, (R)); }) 1539 1540 1541#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) __extension__ ({ \ 1542 (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) (A), \ 1543 (__v8df) (B), -(__v8df) (C), \ 1544 (__mmask8) (U), (R)); }) 1545 1546 1547#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) __extension__ ({ \ 1548 (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) (A), \ 1549 (__v8df) (B), -(__v8df) (C), \ 1550 (__mmask8) (U), (R)); }) 1551 1552 1553#define _mm512_fnmadd_round_pd(A, B, C, R) __extension__ ({ \ 1554 (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) (A), \ 1555 (__v8df) (B), (__v8df) (C), \ 1556 (__mmask8) -1, (R)); }) 1557 1558 1559#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) __extension__ ({ \ 1560 (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) (A), \ 1561 (__v8df) (B), (__v8df) (C), \ 1562 (__mmask8) (U), (R)); }) 1563 1564 1565#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) __extension__ ({ \ 1566 (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) (A), \ 1567 (__v8df) (B), (__v8df) (C), \ 1568 (__mmask8) (U), (R)); }) 1569 1570 1571#define _mm512_fnmsub_round_pd(A, B, C, R) __extension__ ({ \ 1572 (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) (A), \ 1573 (__v8df) (B), -(__v8df) (C), \ 1574 (__mmask8) -1, (R)); }) 1575 1576 1577#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) __extension__ ({ \ 1578 (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) (A), \ 1579 (__v8df) (B), -(__v8df) (C), \ 1580 (__mmask8) (U), (R)); }) 1581 1582 1583static __inline__ __m512d __DEFAULT_FN_ATTRS 1584_mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) 1585{ 1586 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 1587 (__v8df) __B, 1588 (__v8df) __C, 1589 (__mmask8) -1, 1590 _MM_FROUND_CUR_DIRECTION); 1591} 1592 1593static __inline__ __m512d __DEFAULT_FN_ATTRS 1594_mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 1595{ 1596 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 1597 (__v8df) __B, 1598 (__v8df) __C, 1599 (__mmask8) __U, 1600 _MM_FROUND_CUR_DIRECTION); 1601} 1602 1603static __inline__ __m512d __DEFAULT_FN_ATTRS 1604_mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 1605{ 1606 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A, 1607 (__v8df) __B, 1608 (__v8df) __C, 1609 (__mmask8) __U, 1610 _MM_FROUND_CUR_DIRECTION); 1611} 1612 1613static __inline__ __m512d __DEFAULT_FN_ATTRS 1614_mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 1615{ 1616 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, 1617 (__v8df) __B, 1618 (__v8df) __C, 1619 (__mmask8) __U, 1620 _MM_FROUND_CUR_DIRECTION); 1621} 1622 1623static __inline__ __m512d __DEFAULT_FN_ATTRS 1624_mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C) 1625{ 1626 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 1627 (__v8df) __B, 1628 -(__v8df) __C, 1629 (__mmask8) -1, 1630 _MM_FROUND_CUR_DIRECTION); 1631} 1632 1633static __inline__ __m512d __DEFAULT_FN_ATTRS 1634_mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 1635{ 1636 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 1637 (__v8df) __B, 1638 -(__v8df) __C, 1639 (__mmask8) __U, 1640 _MM_FROUND_CUR_DIRECTION); 1641} 1642 1643static __inline__ __m512d __DEFAULT_FN_ATTRS 1644_mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 1645{ 1646 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, 1647 (__v8df) __B, 1648 -(__v8df) __C, 1649 (__mmask8) __U, 1650 _MM_FROUND_CUR_DIRECTION); 1651} 1652 1653static __inline__ __m512d __DEFAULT_FN_ATTRS 1654_mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C) 1655{ 1656 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, 1657 (__v8df) __B, 1658 (__v8df) __C, 1659 (__mmask8) -1, 1660 _MM_FROUND_CUR_DIRECTION); 1661} 1662 1663static __inline__ __m512d __DEFAULT_FN_ATTRS 1664_mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 1665{ 1666 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A, 1667 (__v8df) __B, 1668 (__v8df) __C, 1669 (__mmask8) __U, 1670 _MM_FROUND_CUR_DIRECTION); 1671} 1672 1673static __inline__ __m512d __DEFAULT_FN_ATTRS 1674_mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 1675{ 1676 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, 1677 (__v8df) __B, 1678 (__v8df) __C, 1679 (__mmask8) __U, 1680 _MM_FROUND_CUR_DIRECTION); 1681} 1682 1683static __inline__ __m512d __DEFAULT_FN_ATTRS 1684_mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C) 1685{ 1686 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, 1687 (__v8df) __B, 1688 -(__v8df) __C, 1689 (__mmask8) -1, 1690 _MM_FROUND_CUR_DIRECTION); 1691} 1692 1693static __inline__ __m512d __DEFAULT_FN_ATTRS 1694_mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 1695{ 1696 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, 1697 (__v8df) __B, 1698 -(__v8df) __C, 1699 (__mmask8) __U, 1700 _MM_FROUND_CUR_DIRECTION); 1701} 1702 1703#define _mm512_fmadd_round_ps(A, B, C, R) __extension__ ({ \ 1704 (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \ 1705 (__v16sf) (B), (__v16sf) (C), \ 1706 (__mmask16) -1, (R)); }) 1707 1708 1709#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) __extension__ ({ \ 1710 (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \ 1711 (__v16sf) (B), (__v16sf) (C), \ 1712 (__mmask16) (U), (R)); }) 1713 1714 1715#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) __extension__ ({ \ 1716 (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) (A), \ 1717 (__v16sf) (B), (__v16sf) (C), \ 1718 (__mmask16) (U), (R)); }) 1719 1720 1721#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) __extension__ ({ \ 1722 (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) (A), \ 1723 (__v16sf) (B), (__v16sf) (C), \ 1724 (__mmask16) (U), (R)); }) 1725 1726 1727#define _mm512_fmsub_round_ps(A, B, C, R) __extension__ ({ \ 1728 (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \ 1729 (__v16sf) (B), -(__v16sf) (C), \ 1730 (__mmask16) -1, (R)); }) 1731 1732 1733#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) __extension__ ({ \ 1734 (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) (A), \ 1735 (__v16sf) (B), -(__v16sf) (C), \ 1736 (__mmask16) (U), (R)); }) 1737 1738 1739#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) __extension__ ({ \ 1740 (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) (A), \ 1741 (__v16sf) (B), -(__v16sf) (C), \ 1742 (__mmask16) (U), (R)); }) 1743 1744 1745#define _mm512_fnmadd_round_ps(A, B, C, R) __extension__ ({ \ 1746 (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) (A), \ 1747 (__v16sf) (B), (__v16sf) (C), \ 1748 (__mmask16) -1, (R)); }) 1749 1750 1751#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) __extension__ ({ \ 1752 (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) (A), \ 1753 (__v16sf) (B), (__v16sf) (C), \ 1754 (__mmask16) (U), (R)); }) 1755 1756 1757#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) __extension__ ({ \ 1758 (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) (A), \ 1759 (__v16sf) (B), (__v16sf) (C), \ 1760 (__mmask16) (U), (R)); }) 1761 1762 1763#define _mm512_fnmsub_round_ps(A, B, C, R) __extension__ ({ \ 1764 (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) (A), \ 1765 (__v16sf) (B), -(__v16sf) (C), \ 1766 (__mmask16) -1, (R)); }) 1767 1768 1769#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) __extension__ ({ \ 1770 (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) (A), \ 1771 (__v16sf) (B), -(__v16sf) (C), \ 1772 (__mmask16) (U), (R)); }) 1773 1774 1775static __inline__ __m512 __DEFAULT_FN_ATTRS 1776_mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C) 1777{ 1778 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 1779 (__v16sf) __B, 1780 (__v16sf) __C, 1781 (__mmask16) -1, 1782 _MM_FROUND_CUR_DIRECTION); 1783} 1784 1785static __inline__ __m512 __DEFAULT_FN_ATTRS 1786_mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 1787{ 1788 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 1789 (__v16sf) __B, 1790 (__v16sf) __C, 1791 (__mmask16) __U, 1792 _MM_FROUND_CUR_DIRECTION); 1793} 1794 1795static __inline__ __m512 __DEFAULT_FN_ATTRS 1796_mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 1797{ 1798 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A, 1799 (__v16sf) __B, 1800 (__v16sf) __C, 1801 (__mmask16) __U, 1802 _MM_FROUND_CUR_DIRECTION); 1803} 1804 1805static __inline__ __m512 __DEFAULT_FN_ATTRS 1806_mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 1807{ 1808 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, 1809 (__v16sf) __B, 1810 (__v16sf) __C, 1811 (__mmask16) __U, 1812 _MM_FROUND_CUR_DIRECTION); 1813} 1814 1815static __inline__ __m512 __DEFAULT_FN_ATTRS 1816_mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C) 1817{ 1818 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 1819 (__v16sf) __B, 1820 -(__v16sf) __C, 1821 (__mmask16) -1, 1822 _MM_FROUND_CUR_DIRECTION); 1823} 1824 1825static __inline__ __m512 __DEFAULT_FN_ATTRS 1826_mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 1827{ 1828 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 1829 (__v16sf) __B, 1830 -(__v16sf) __C, 1831 (__mmask16) __U, 1832 _MM_FROUND_CUR_DIRECTION); 1833} 1834 1835static __inline__ __m512 __DEFAULT_FN_ATTRS 1836_mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 1837{ 1838 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, 1839 (__v16sf) __B, 1840 -(__v16sf) __C, 1841 (__mmask16) __U, 1842 _MM_FROUND_CUR_DIRECTION); 1843} 1844 1845static __inline__ __m512 __DEFAULT_FN_ATTRS 1846_mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C) 1847{ 1848 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, 1849 (__v16sf) __B, 1850 (__v16sf) __C, 1851 (__mmask16) -1, 1852 _MM_FROUND_CUR_DIRECTION); 1853} 1854 1855static __inline__ __m512 __DEFAULT_FN_ATTRS 1856_mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 1857{ 1858 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A, 1859 (__v16sf) __B, 1860 (__v16sf) __C, 1861 (__mmask16) __U, 1862 _MM_FROUND_CUR_DIRECTION); 1863} 1864 1865static __inline__ __m512 __DEFAULT_FN_ATTRS 1866_mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 1867{ 1868 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, 1869 (__v16sf) __B, 1870 (__v16sf) __C, 1871 (__mmask16) __U, 1872 _MM_FROUND_CUR_DIRECTION); 1873} 1874 1875static __inline__ __m512 __DEFAULT_FN_ATTRS 1876_mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C) 1877{ 1878 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, 1879 (__v16sf) __B, 1880 -(__v16sf) __C, 1881 (__mmask16) -1, 1882 _MM_FROUND_CUR_DIRECTION); 1883} 1884 1885static __inline__ __m512 __DEFAULT_FN_ATTRS 1886_mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 1887{ 1888 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, 1889 (__v16sf) __B, 1890 -(__v16sf) __C, 1891 (__mmask16) __U, 1892 _MM_FROUND_CUR_DIRECTION); 1893} 1894 1895#define _mm512_fmaddsub_round_pd(A, B, C, R) __extension__ ({ \ 1896 (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \ 1897 (__v8df) (B), (__v8df) (C), \ 1898 (__mmask8) -1, (R)); }) 1899 1900 1901#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) __extension__ ({ \ 1902 (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \ 1903 (__v8df) (B), (__v8df) (C), \ 1904 (__mmask8) (U), (R)); }) 1905 1906 1907#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) __extension__ ({ \ 1908 (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) (A), \ 1909 (__v8df) (B), (__v8df) (C), \ 1910 (__mmask8) (U), (R)); }) 1911 1912 1913#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) __extension__ ({ \ 1914 (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) (A), \ 1915 (__v8df) (B), (__v8df) (C), \ 1916 (__mmask8) (U), (R)); }) 1917 1918 1919#define _mm512_fmsubadd_round_pd(A, B, C, R) __extension__ ({ \ 1920 (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \ 1921 (__v8df) (B), -(__v8df) (C), \ 1922 (__mmask8) -1, (R)); }) 1923 1924 1925#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) __extension__ ({ \ 1926 (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) (A), \ 1927 (__v8df) (B), -(__v8df) (C), \ 1928 (__mmask8) (U), (R)); }) 1929 1930 1931#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) __extension__ ({ \ 1932 (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) (A), \ 1933 (__v8df) (B), -(__v8df) (C), \ 1934 (__mmask8) (U), (R)); }) 1935 1936 1937static __inline__ __m512d __DEFAULT_FN_ATTRS 1938_mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C) 1939{ 1940 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 1941 (__v8df) __B, 1942 (__v8df) __C, 1943 (__mmask8) -1, 1944 _MM_FROUND_CUR_DIRECTION); 1945} 1946 1947static __inline__ __m512d __DEFAULT_FN_ATTRS 1948_mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 1949{ 1950 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 1951 (__v8df) __B, 1952 (__v8df) __C, 1953 (__mmask8) __U, 1954 _MM_FROUND_CUR_DIRECTION); 1955} 1956 1957static __inline__ __m512d __DEFAULT_FN_ATTRS 1958_mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 1959{ 1960 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A, 1961 (__v8df) __B, 1962 (__v8df) __C, 1963 (__mmask8) __U, 1964 _MM_FROUND_CUR_DIRECTION); 1965} 1966 1967static __inline__ __m512d __DEFAULT_FN_ATTRS 1968_mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 1969{ 1970 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, 1971 (__v8df) __B, 1972 (__v8df) __C, 1973 (__mmask8) __U, 1974 _MM_FROUND_CUR_DIRECTION); 1975} 1976 1977static __inline__ __m512d __DEFAULT_FN_ATTRS 1978_mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C) 1979{ 1980 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 1981 (__v8df) __B, 1982 -(__v8df) __C, 1983 (__mmask8) -1, 1984 _MM_FROUND_CUR_DIRECTION); 1985} 1986 1987static __inline__ __m512d __DEFAULT_FN_ATTRS 1988_mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 1989{ 1990 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 1991 (__v8df) __B, 1992 -(__v8df) __C, 1993 (__mmask8) __U, 1994 _MM_FROUND_CUR_DIRECTION); 1995} 1996 1997static __inline__ __m512d __DEFAULT_FN_ATTRS 1998_mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 1999{ 2000 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, 2001 (__v8df) __B, 2002 -(__v8df) __C, 2003 (__mmask8) __U, 2004 _MM_FROUND_CUR_DIRECTION); 2005} 2006 2007#define _mm512_fmaddsub_round_ps(A, B, C, R) __extension__ ({ \ 2008 (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \ 2009 (__v16sf) (B), (__v16sf) (C), \ 2010 (__mmask16) -1, (R)); }) 2011 2012 2013#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) __extension__ ({ \ 2014 (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \ 2015 (__v16sf) (B), (__v16sf) (C), \ 2016 (__mmask16) (U), (R)); }) 2017 2018 2019#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) __extension__ ({ \ 2020 (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) (A), \ 2021 (__v16sf) (B), (__v16sf) (C), \ 2022 (__mmask16) (U), (R)); }) 2023 2024 2025#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) __extension__ ({ \ 2026 (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) (A), \ 2027 (__v16sf) (B), (__v16sf) (C), \ 2028 (__mmask16) (U), (R)); }) 2029 2030 2031#define _mm512_fmsubadd_round_ps(A, B, C, R) __extension__ ({ \ 2032 (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \ 2033 (__v16sf) (B), -(__v16sf) (C), \ 2034 (__mmask16) -1, (R)); }) 2035 2036 2037#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) __extension__ ({ \ 2038 (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) (A), \ 2039 (__v16sf) (B), -(__v16sf) (C), \ 2040 (__mmask16) (U), (R)); }) 2041 2042 2043#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) __extension__ ({ \ 2044 (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) (A), \ 2045 (__v16sf) (B), -(__v16sf) (C), \ 2046 (__mmask16) (U), (R)); }) 2047 2048 2049static __inline__ __m512 __DEFAULT_FN_ATTRS 2050_mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C) 2051{ 2052 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 2053 (__v16sf) __B, 2054 (__v16sf) __C, 2055 (__mmask16) -1, 2056 _MM_FROUND_CUR_DIRECTION); 2057} 2058 2059static __inline__ __m512 __DEFAULT_FN_ATTRS 2060_mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 2061{ 2062 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 2063 (__v16sf) __B, 2064 (__v16sf) __C, 2065 (__mmask16) __U, 2066 _MM_FROUND_CUR_DIRECTION); 2067} 2068 2069static __inline__ __m512 __DEFAULT_FN_ATTRS 2070_mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 2071{ 2072 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A, 2073 (__v16sf) __B, 2074 (__v16sf) __C, 2075 (__mmask16) __U, 2076 _MM_FROUND_CUR_DIRECTION); 2077} 2078 2079static __inline__ __m512 __DEFAULT_FN_ATTRS 2080_mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 2081{ 2082 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, 2083 (__v16sf) __B, 2084 (__v16sf) __C, 2085 (__mmask16) __U, 2086 _MM_FROUND_CUR_DIRECTION); 2087} 2088 2089static __inline__ __m512 __DEFAULT_FN_ATTRS 2090_mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C) 2091{ 2092 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 2093 (__v16sf) __B, 2094 -(__v16sf) __C, 2095 (__mmask16) -1, 2096 _MM_FROUND_CUR_DIRECTION); 2097} 2098 2099static __inline__ __m512 __DEFAULT_FN_ATTRS 2100_mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 2101{ 2102 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 2103 (__v16sf) __B, 2104 -(__v16sf) __C, 2105 (__mmask16) __U, 2106 _MM_FROUND_CUR_DIRECTION); 2107} 2108 2109static __inline__ __m512 __DEFAULT_FN_ATTRS 2110_mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 2111{ 2112 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, 2113 (__v16sf) __B, 2114 -(__v16sf) __C, 2115 (__mmask16) __U, 2116 _MM_FROUND_CUR_DIRECTION); 2117} 2118 2119#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) __extension__ ({ \ 2120 (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) (A), \ 2121 (__v8df) (B), (__v8df) (C), \ 2122 (__mmask8) (U), (R)); }) 2123 2124 2125static __inline__ __m512d __DEFAULT_FN_ATTRS 2126_mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 2127{ 2128 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A, 2129 (__v8df) __B, 2130 (__v8df) __C, 2131 (__mmask8) __U, 2132 _MM_FROUND_CUR_DIRECTION); 2133} 2134 2135#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) __extension__ ({ \ 2136 (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) (A), \ 2137 (__v16sf) (B), (__v16sf) (C), \ 2138 (__mmask16) (U), (R)); }) 2139 2140 2141static __inline__ __m512 __DEFAULT_FN_ATTRS 2142_mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 2143{ 2144 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A, 2145 (__v16sf) __B, 2146 (__v16sf) __C, 2147 (__mmask16) __U, 2148 _MM_FROUND_CUR_DIRECTION); 2149} 2150 2151#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) __extension__ ({ \ 2152 (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) (A), \ 2153 (__v8df) (B), (__v8df) (C), \ 2154 (__mmask8) (U), (R)); }) 2155 2156 2157static __inline__ __m512d __DEFAULT_FN_ATTRS 2158_mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 2159{ 2160 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A, 2161 (__v8df) __B, 2162 (__v8df) __C, 2163 (__mmask8) __U, 2164 _MM_FROUND_CUR_DIRECTION); 2165} 2166 2167#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) __extension__ ({ \ 2168 (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) (A), \ 2169 (__v16sf) (B), (__v16sf) (C), \ 2170 (__mmask16) (U), (R)); }) 2171 2172 2173static __inline__ __m512 __DEFAULT_FN_ATTRS 2174_mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 2175{ 2176 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A, 2177 (__v16sf) __B, 2178 (__v16sf) __C, 2179 (__mmask16) __U, 2180 _MM_FROUND_CUR_DIRECTION); 2181} 2182 2183#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) __extension__ ({ \ 2184 (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) (A), \ 2185 (__v8df) (B), (__v8df) (C), \ 2186 (__mmask8) (U), (R)); }) 2187 2188 2189static __inline__ __m512d __DEFAULT_FN_ATTRS 2190_mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 2191{ 2192 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A, 2193 (__v8df) __B, 2194 (__v8df) __C, 2195 (__mmask8) __U, 2196 _MM_FROUND_CUR_DIRECTION); 2197} 2198 2199#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) __extension__ ({ \ 2200 (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) (A), \ 2201 (__v16sf) (B), (__v16sf) (C), \ 2202 (__mmask16) (U), (R)); }) 2203 2204 2205static __inline__ __m512 __DEFAULT_FN_ATTRS 2206_mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 2207{ 2208 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A, 2209 (__v16sf) __B, 2210 (__v16sf) __C, 2211 (__mmask16) __U, 2212 _MM_FROUND_CUR_DIRECTION); 2213} 2214 2215#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) __extension__ ({ \ 2216 (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) (A), \ 2217 (__v8df) (B), (__v8df) (C), \ 2218 (__mmask8) (U), (R)); }) 2219 2220 2221#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) __extension__ ({ \ 2222 (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) (A), \ 2223 (__v8df) (B), (__v8df) (C), \ 2224 (__mmask8) (U), (R)); }) 2225 2226 2227static __inline__ __m512d __DEFAULT_FN_ATTRS 2228_mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 2229{ 2230 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A, 2231 (__v8df) __B, 2232 (__v8df) __C, 2233 (__mmask8) __U, 2234 _MM_FROUND_CUR_DIRECTION); 2235} 2236 2237static __inline__ __m512d __DEFAULT_FN_ATTRS 2238_mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 2239{ 2240 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A, 2241 (__v8df) __B, 2242 (__v8df) __C, 2243 (__mmask8) __U, 2244 _MM_FROUND_CUR_DIRECTION); 2245} 2246 2247#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) __extension__ ({ \ 2248 (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) (A), \ 2249 (__v16sf) (B), (__v16sf) (C), \ 2250 (__mmask16) (U), (R)); }) 2251 2252 2253#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) __extension__ ({ \ 2254 (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) (A), \ 2255 (__v16sf) (B), (__v16sf) (C), \ 2256 (__mmask16) (U), (R)); }) 2257 2258 2259static __inline__ __m512 __DEFAULT_FN_ATTRS 2260_mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 2261{ 2262 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A, 2263 (__v16sf) __B, 2264 (__v16sf) __C, 2265 (__mmask16) __U, 2266 _MM_FROUND_CUR_DIRECTION); 2267} 2268 2269static __inline__ __m512 __DEFAULT_FN_ATTRS 2270_mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 2271{ 2272 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A, 2273 (__v16sf) __B, 2274 (__v16sf) __C, 2275 (__mmask16) __U, 2276 _MM_FROUND_CUR_DIRECTION); 2277} 2278 2279 2280 2281/* Vector permutations */ 2282 2283static __inline __m512i __DEFAULT_FN_ATTRS 2284_mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B) 2285{ 2286 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I 2287 /* idx */ , 2288 (__v16si) __A, 2289 (__v16si) __B, 2290 (__mmask16) -1); 2291} 2292static __inline __m512i __DEFAULT_FN_ATTRS 2293_mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B) 2294{ 2295 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I 2296 /* idx */ , 2297 (__v8di) __A, 2298 (__v8di) __B, 2299 (__mmask8) -1); 2300} 2301 2302static __inline __m512d __DEFAULT_FN_ATTRS 2303_mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B) 2304{ 2305 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I 2306 /* idx */ , 2307 (__v8df) __A, 2308 (__v8df) __B, 2309 (__mmask8) -1); 2310} 2311static __inline __m512 __DEFAULT_FN_ATTRS 2312_mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B) 2313{ 2314 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I 2315 /* idx */ , 2316 (__v16sf) __A, 2317 (__v16sf) __B, 2318 (__mmask16) -1); 2319} 2320 2321#define _mm512_alignr_epi64(A, B, I) __extension__ ({ \ 2322 (__m512i)__builtin_ia32_alignq512_mask((__v8di)(__m512i)(A), \ 2323 (__v8di)(__m512i)(B), \ 2324 (I), (__v8di)_mm512_setzero_si512(), \ 2325 (__mmask8)-1); }) 2326 2327#define _mm512_alignr_epi32(A, B, I) __extension__ ({ \ 2328 (__m512i)__builtin_ia32_alignd512_mask((__v16si)(__m512i)(A), \ 2329 (__v16si)(__m512i)(B), \ 2330 (I), (__v16si)_mm512_setzero_si512(), \ 2331 (__mmask16)-1); }) 2332 2333/* Vector Extract */ 2334 2335#define _mm512_extractf64x4_pd(A, I) __extension__ ({ \ 2336 (__m256d) \ 2337 __builtin_ia32_extractf64x4_mask((__v8df)(__m512d)(A), \ 2338 (I), \ 2339 (__v4df)_mm256_setzero_si256(), \ 2340 (__mmask8) -1); }) 2341 2342#define _mm512_extractf32x4_ps(A, I) __extension__ ({ \ 2343 (__m128) \ 2344 __builtin_ia32_extractf32x4_mask((__v16sf)(__m512)(A), \ 2345 (I), \ 2346 (__v4sf)_mm_setzero_ps(), \ 2347 (__mmask8) -1); }) 2348 2349/* Vector Blend */ 2350 2351static __inline __m512d __DEFAULT_FN_ATTRS 2352_mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) 2353{ 2354 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A, 2355 (__v8df) __W, 2356 (__mmask8) __U); 2357} 2358 2359static __inline __m512 __DEFAULT_FN_ATTRS 2360_mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W) 2361{ 2362 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A, 2363 (__v16sf) __W, 2364 (__mmask16) __U); 2365} 2366 2367static __inline __m512i __DEFAULT_FN_ATTRS 2368_mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) 2369{ 2370 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A, 2371 (__v8di) __W, 2372 (__mmask8) __U); 2373} 2374 2375static __inline __m512i __DEFAULT_FN_ATTRS 2376_mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) 2377{ 2378 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A, 2379 (__v16si) __W, 2380 (__mmask16) __U); 2381} 2382 2383/* Compare */ 2384 2385#define _mm512_cmp_round_ps_mask(A, B, P, R) __extension__ ({ \ 2386 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ 2387 (__v16sf)(__m512)(B), \ 2388 (P), (__mmask16)-1, (R)); }) 2389 2390#define _mm512_mask_cmp_round_ps_mask(U, A, B, P, R) __extension__ ({ \ 2391 (__mmask16)__builtin_ia32_cmpps512_mask((__v16sf)(__m512)(A), \ 2392 (__v16sf)(__m512)(B), \ 2393 (P), (__mmask16)(U), (R)); }) 2394 2395#define _mm512_cmp_ps_mask(A, B, P) \ 2396 _mm512_cmp_round_ps_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) 2397 2398#define _mm512_mask_cmp_ps_mask(U, A, B, P) \ 2399 _mm512_mask_cmp_round_ps_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) 2400 2401#define _mm512_cmp_round_pd_mask(A, B, P, R) __extension__ ({ \ 2402 (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ 2403 (__v8df)(__m512d)(B), \ 2404 (P), (__mmask8)-1, (R)); }) 2405 2406#define _mm512_mask_cmp_round_pd_mask(U, A, B, P, R) __extension__ ({ \ 2407 (__mmask8)__builtin_ia32_cmppd512_mask((__v8df)(__m512d)(A), \ 2408 (__v8df)(__m512d)(B), \ 2409 (P), (__mmask8)(U), (R)); }) 2410 2411#define _mm512_cmp_pd_mask(A, B, P) \ 2412 _mm512_cmp_round_pd_mask((A), (B), (P), _MM_FROUND_CUR_DIRECTION) 2413 2414#define _mm512_mask_cmp_pd_mask(U, A, B, P) \ 2415 _mm512_mask_cmp_round_pd_mask((U), (A), (B), (P), _MM_FROUND_CUR_DIRECTION) 2416 2417/* Conversion */ 2418 2419static __inline __m512i __DEFAULT_FN_ATTRS 2420_mm512_cvttps_epu32(__m512 __A) 2421{ 2422 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 2423 (__v16si) 2424 _mm512_setzero_si512 (), 2425 (__mmask16) -1, 2426 _MM_FROUND_CUR_DIRECTION); 2427} 2428 2429#define _mm512_cvt_roundepi32_ps(A, R) __extension__ ({ \ 2430 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), \ 2431 (__v16sf)_mm512_setzero_ps(), \ 2432 (__mmask16)-1, (R)); }) 2433 2434#define _mm512_cvt_roundepu32_ps(A, R) __extension__ ({ \ 2435 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), \ 2436 (__v16sf)_mm512_setzero_ps(), \ 2437 (__mmask16)-1, (R)); }) 2438 2439static __inline __m512d __DEFAULT_FN_ATTRS 2440_mm512_cvtepi32_pd(__m256i __A) 2441{ 2442 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A, 2443 (__v8df) 2444 _mm512_setzero_pd (), 2445 (__mmask8) -1); 2446} 2447 2448static __inline __m512d __DEFAULT_FN_ATTRS 2449_mm512_cvtepu32_pd(__m256i __A) 2450{ 2451 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A, 2452 (__v8df) 2453 _mm512_setzero_pd (), 2454 (__mmask8) -1); 2455} 2456 2457#define _mm512_cvt_roundpd_ps(A, R) __extension__ ({ \ 2458 (__m256)__builtin_ia32_cvtpd2ps512_mask((__v8df)(A), \ 2459 (__v8sf)_mm256_setzero_ps(), \ 2460 (__mmask8)-1, (R)); }) 2461 2462#define _mm512_cvtps_ph(A, I) __extension__ ({ \ 2463 (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(A), (I), \ 2464 (__v16hi)_mm256_setzero_si256(), \ 2465 -1); }) 2466 2467static __inline __m512 __DEFAULT_FN_ATTRS 2468_mm512_cvtph_ps(__m256i __A) 2469{ 2470 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 2471 (__v16sf) 2472 _mm512_setzero_ps (), 2473 (__mmask16) -1, 2474 _MM_FROUND_CUR_DIRECTION); 2475} 2476 2477static __inline __m512i __DEFAULT_FN_ATTRS 2478_mm512_cvttps_epi32(__m512 __a) 2479{ 2480 return (__m512i) 2481 __builtin_ia32_cvttps2dq512_mask((__v16sf) __a, 2482 (__v16si) _mm512_setzero_si512 (), 2483 (__mmask16) -1, _MM_FROUND_CUR_DIRECTION); 2484} 2485 2486static __inline __m256i __DEFAULT_FN_ATTRS 2487_mm512_cvttpd_epi32(__m512d __a) 2488{ 2489 return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a, 2490 (__v8si)_mm256_setzero_si256(), 2491 (__mmask8) -1, 2492 _MM_FROUND_CUR_DIRECTION); 2493} 2494 2495#define _mm512_cvtt_roundpd_epi32(A, R) __extension__ ({ \ 2496 (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df)(A), \ 2497 (__v8si)_mm256_setzero_si256(), \ 2498 (__mmask8)-1, (R)); }) 2499 2500#define _mm512_cvtt_roundps_epi32(A, R) __extension__ ({ \ 2501 (__m512i)__builtin_ia32_cvttps2dq512_mask((__v16sf)(A), \ 2502 (__v16si)_mm512_setzero_si512(), \ 2503 (__mmask16)-1, (R)); }) 2504 2505#define _mm512_cvt_roundps_epi32(A, R) __extension__ ({ \ 2506 (__m512i)__builtin_ia32_cvtps2dq512_mask((__v16sf)(A), \ 2507 (__v16si)_mm512_setzero_si512(), \ 2508 (__mmask16)-1, (R)); }) 2509 2510#define _mm512_cvt_roundpd_epi32(A, R) __extension__ ({ \ 2511 (__m256i)__builtin_ia32_cvtpd2dq512_mask((__v8df)(A), \ 2512 (__v8si)_mm256_setzero_si256(), \ 2513 (__mmask8)-1, (R)); }) 2514 2515#define _mm512_cvt_roundps_epu32(A, R) __extension__ ({ \ 2516 (__m512i)__builtin_ia32_cvtps2udq512_mask((__v16sf)(A), \ 2517 (__v16si)_mm512_setzero_si512(), \ 2518 (__mmask16)-1, (R)); }) 2519 2520#define _mm512_cvt_roundpd_epu32(A, R) __extension__ ({ \ 2521 (__m256i)__builtin_ia32_cvtpd2udq512_mask((__v8df)(A), \ 2522 (__v8si)_mm256_setzero_si256(), \ 2523 (__mmask8) -1, (R)); }) 2524 2525/* Unpack and Interleave */ 2526static __inline __m512d __DEFAULT_FN_ATTRS 2527_mm512_unpackhi_pd(__m512d __a, __m512d __b) 2528{ 2529 return __builtin_shufflevector(__a, __b, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6); 2530} 2531 2532static __inline __m512d __DEFAULT_FN_ATTRS 2533_mm512_unpacklo_pd(__m512d __a, __m512d __b) 2534{ 2535 return __builtin_shufflevector(__a, __b, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6); 2536} 2537 2538static __inline __m512 __DEFAULT_FN_ATTRS 2539_mm512_unpackhi_ps(__m512 __a, __m512 __b) 2540{ 2541 return __builtin_shufflevector(__a, __b, 2542 2, 18, 3, 19, 2543 2+4, 18+4, 3+4, 19+4, 2544 2+8, 18+8, 3+8, 19+8, 2545 2+12, 18+12, 3+12, 19+12); 2546} 2547 2548static __inline __m512 __DEFAULT_FN_ATTRS 2549_mm512_unpacklo_ps(__m512 __a, __m512 __b) 2550{ 2551 return __builtin_shufflevector(__a, __b, 2552 0, 16, 1, 17, 2553 0+4, 16+4, 1+4, 17+4, 2554 0+8, 16+8, 1+8, 17+8, 2555 0+12, 16+12, 1+12, 17+12); 2556} 2557 2558/* Bit Test */ 2559 2560static __inline __mmask16 __DEFAULT_FN_ATTRS 2561_mm512_test_epi32_mask(__m512i __A, __m512i __B) 2562{ 2563 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A, 2564 (__v16si) __B, 2565 (__mmask16) -1); 2566} 2567 2568static __inline __mmask8 __DEFAULT_FN_ATTRS 2569_mm512_test_epi64_mask(__m512i __A, __m512i __B) 2570{ 2571 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, 2572 (__v8di) __B, 2573 (__mmask8) -1); 2574} 2575 2576/* SIMD load ops */ 2577 2578static __inline __m512i __DEFAULT_FN_ATTRS 2579_mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P) 2580{ 2581 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *)__P, 2582 (__v16si) 2583 _mm512_setzero_si512 (), 2584 (__mmask16) __U); 2585} 2586 2587static __inline __m512i __DEFAULT_FN_ATTRS 2588_mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P) 2589{ 2590 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *)__P, 2591 (__v8di) 2592 _mm512_setzero_si512 (), 2593 (__mmask8) __U); 2594} 2595 2596static __inline __m512 __DEFAULT_FN_ATTRS 2597_mm512_maskz_loadu_ps(__mmask16 __U, void const *__P) 2598{ 2599 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *)__P, 2600 (__v16sf) 2601 _mm512_setzero_ps (), 2602 (__mmask16) __U); 2603} 2604 2605static __inline __m512d __DEFAULT_FN_ATTRS 2606_mm512_maskz_loadu_pd(__mmask8 __U, void const *__P) 2607{ 2608 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *)__P, 2609 (__v8df) 2610 _mm512_setzero_pd (), 2611 (__mmask8) __U); 2612} 2613 2614static __inline __m512 __DEFAULT_FN_ATTRS 2615_mm512_maskz_load_ps(__mmask16 __U, void const *__P) 2616{ 2617 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P, 2618 (__v16sf) 2619 _mm512_setzero_ps (), 2620 (__mmask16) __U); 2621} 2622 2623static __inline __m512d __DEFAULT_FN_ATTRS 2624_mm512_maskz_load_pd(__mmask8 __U, void const *__P) 2625{ 2626 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P, 2627 (__v8df) 2628 _mm512_setzero_pd (), 2629 (__mmask8) __U); 2630} 2631 2632static __inline __m512d __DEFAULT_FN_ATTRS 2633_mm512_loadu_pd(double const *__p) 2634{ 2635 struct __loadu_pd { 2636 __m512d __v; 2637 } __attribute__((__packed__, __may_alias__)); 2638 return ((struct __loadu_pd*)__p)->__v; 2639} 2640 2641static __inline __m512 __DEFAULT_FN_ATTRS 2642_mm512_loadu_ps(float const *__p) 2643{ 2644 struct __loadu_ps { 2645 __m512 __v; 2646 } __attribute__((__packed__, __may_alias__)); 2647 return ((struct __loadu_ps*)__p)->__v; 2648} 2649 2650static __inline __m512 __DEFAULT_FN_ATTRS 2651_mm512_load_ps(double const *__p) 2652{ 2653 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__p, 2654 (__v16sf) 2655 _mm512_setzero_ps (), 2656 (__mmask16) -1); 2657} 2658 2659static __inline __m512d __DEFAULT_FN_ATTRS 2660_mm512_load_pd(float const *__p) 2661{ 2662 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__p, 2663 (__v8df) 2664 _mm512_setzero_pd (), 2665 (__mmask8) -1); 2666} 2667 2668/* SIMD store ops */ 2669 2670static __inline void __DEFAULT_FN_ATTRS 2671_mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A) 2672{ 2673 __builtin_ia32_storedqudi512_mask ((__v8di *)__P, (__v8di) __A, 2674 (__mmask8) __U); 2675} 2676 2677static __inline void __DEFAULT_FN_ATTRS 2678_mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A) 2679{ 2680 __builtin_ia32_storedqusi512_mask ((__v16si *)__P, (__v16si) __A, 2681 (__mmask16) __U); 2682} 2683 2684static __inline void __DEFAULT_FN_ATTRS 2685_mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A) 2686{ 2687 __builtin_ia32_storeupd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U); 2688} 2689 2690static __inline void __DEFAULT_FN_ATTRS 2691_mm512_storeu_pd(void *__P, __m512d __A) 2692{ 2693 __builtin_ia32_storeupd512_mask((__v8df *)__P, (__v8df)__A, (__mmask8)-1); 2694} 2695 2696static __inline void __DEFAULT_FN_ATTRS 2697_mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A) 2698{ 2699 __builtin_ia32_storeups512_mask ((__v16sf *)__P, (__v16sf) __A, 2700 (__mmask16) __U); 2701} 2702 2703static __inline void __DEFAULT_FN_ATTRS 2704_mm512_storeu_ps(void *__P, __m512 __A) 2705{ 2706 __builtin_ia32_storeups512_mask((__v16sf *)__P, (__v16sf)__A, (__mmask16)-1); 2707} 2708 2709static __inline void __DEFAULT_FN_ATTRS 2710_mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A) 2711{ 2712 __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U); 2713} 2714 2715static __inline void __DEFAULT_FN_ATTRS 2716_mm512_store_pd(void *__P, __m512d __A) 2717{ 2718 *(__m512d*)__P = __A; 2719} 2720 2721static __inline void __DEFAULT_FN_ATTRS 2722_mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A) 2723{ 2724 __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A, 2725 (__mmask16) __U); 2726} 2727 2728static __inline void __DEFAULT_FN_ATTRS 2729_mm512_store_ps(void *__P, __m512 __A) 2730{ 2731 *(__m512*)__P = __A; 2732} 2733 2734/* Mask ops */ 2735 2736static __inline __mmask16 __DEFAULT_FN_ATTRS 2737_mm512_knot(__mmask16 __M) 2738{ 2739 return __builtin_ia32_knothi(__M); 2740} 2741 2742/* Integer compare */ 2743 2744static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2745_mm512_cmpeq_epi32_mask(__m512i __a, __m512i __b) { 2746 return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b, 2747 (__mmask16)-1); 2748} 2749 2750static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2751_mm512_mask_cmpeq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 2752 return (__mmask16)__builtin_ia32_pcmpeqd512_mask((__v16si)__a, (__v16si)__b, 2753 __u); 2754} 2755 2756static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2757_mm512_cmpeq_epu32_mask(__m512i __a, __m512i __b) { 2758 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0, 2759 (__mmask16)-1); 2760} 2761 2762static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2763_mm512_mask_cmpeq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 2764 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 0, 2765 __u); 2766} 2767 2768static __inline__ __mmask8 __DEFAULT_FN_ATTRS 2769_mm512_mask_cmpeq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 2770 return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b, 2771 __u); 2772} 2773 2774static __inline__ __mmask8 __DEFAULT_FN_ATTRS 2775_mm512_cmpeq_epi64_mask(__m512i __a, __m512i __b) { 2776 return (__mmask8)__builtin_ia32_pcmpeqq512_mask((__v8di)__a, (__v8di)__b, 2777 (__mmask8)-1); 2778} 2779 2780static __inline__ __mmask8 __DEFAULT_FN_ATTRS 2781_mm512_cmpeq_epu64_mask(__m512i __a, __m512i __b) { 2782 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0, 2783 (__mmask8)-1); 2784} 2785 2786static __inline__ __mmask8 __DEFAULT_FN_ATTRS 2787_mm512_mask_cmpeq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 2788 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 0, 2789 __u); 2790} 2791 2792static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2793_mm512_cmpge_epi32_mask(__m512i __a, __m512i __b) { 2794 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5, 2795 (__mmask16)-1); 2796} 2797 2798static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2799_mm512_mask_cmpge_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 2800 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 5, 2801 __u); 2802} 2803 2804static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2805_mm512_cmpge_epu32_mask(__m512i __a, __m512i __b) { 2806 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5, 2807 (__mmask16)-1); 2808} 2809 2810static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2811_mm512_mask_cmpge_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 2812 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 5, 2813 __u); 2814} 2815 2816static __inline__ __mmask8 __DEFAULT_FN_ATTRS 2817_mm512_cmpge_epi64_mask(__m512i __a, __m512i __b) { 2818 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5, 2819 (__mmask8)-1); 2820} 2821 2822static __inline__ __mmask8 __DEFAULT_FN_ATTRS 2823_mm512_mask_cmpge_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 2824 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 5, 2825 __u); 2826} 2827 2828static __inline__ __mmask8 __DEFAULT_FN_ATTRS 2829_mm512_cmpge_epu64_mask(__m512i __a, __m512i __b) { 2830 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5, 2831 (__mmask8)-1); 2832} 2833 2834static __inline__ __mmask8 __DEFAULT_FN_ATTRS 2835_mm512_mask_cmpge_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 2836 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 5, 2837 __u); 2838} 2839 2840static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2841_mm512_cmpgt_epi32_mask(__m512i __a, __m512i __b) { 2842 return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b, 2843 (__mmask16)-1); 2844} 2845 2846static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2847_mm512_mask_cmpgt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 2848 return (__mmask16)__builtin_ia32_pcmpgtd512_mask((__v16si)__a, (__v16si)__b, 2849 __u); 2850} 2851 2852static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2853_mm512_cmpgt_epu32_mask(__m512i __a, __m512i __b) { 2854 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6, 2855 (__mmask16)-1); 2856} 2857 2858static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2859_mm512_mask_cmpgt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 2860 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 6, 2861 __u); 2862} 2863 2864static __inline__ __mmask8 __DEFAULT_FN_ATTRS 2865_mm512_mask_cmpgt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 2866 return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b, 2867 __u); 2868} 2869 2870static __inline__ __mmask8 __DEFAULT_FN_ATTRS 2871_mm512_cmpgt_epi64_mask(__m512i __a, __m512i __b) { 2872 return (__mmask8)__builtin_ia32_pcmpgtq512_mask((__v8di)__a, (__v8di)__b, 2873 (__mmask8)-1); 2874} 2875 2876static __inline__ __mmask8 __DEFAULT_FN_ATTRS 2877_mm512_cmpgt_epu64_mask(__m512i __a, __m512i __b) { 2878 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6, 2879 (__mmask8)-1); 2880} 2881 2882static __inline__ __mmask8 __DEFAULT_FN_ATTRS 2883_mm512_mask_cmpgt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 2884 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 6, 2885 __u); 2886} 2887 2888static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2889_mm512_cmple_epi32_mask(__m512i __a, __m512i __b) { 2890 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2, 2891 (__mmask16)-1); 2892} 2893 2894static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2895_mm512_mask_cmple_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 2896 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 2, 2897 __u); 2898} 2899 2900static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2901_mm512_cmple_epu32_mask(__m512i __a, __m512i __b) { 2902 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2, 2903 (__mmask16)-1); 2904} 2905 2906static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2907_mm512_mask_cmple_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 2908 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 2, 2909 __u); 2910} 2911 2912static __inline__ __mmask8 __DEFAULT_FN_ATTRS 2913_mm512_cmple_epi64_mask(__m512i __a, __m512i __b) { 2914 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2, 2915 (__mmask8)-1); 2916} 2917 2918static __inline__ __mmask8 __DEFAULT_FN_ATTRS 2919_mm512_mask_cmple_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 2920 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 2, 2921 __u); 2922} 2923 2924static __inline__ __mmask8 __DEFAULT_FN_ATTRS 2925_mm512_cmple_epu64_mask(__m512i __a, __m512i __b) { 2926 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2, 2927 (__mmask8)-1); 2928} 2929 2930static __inline__ __mmask8 __DEFAULT_FN_ATTRS 2931_mm512_mask_cmple_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 2932 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 2, 2933 __u); 2934} 2935 2936static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2937_mm512_cmplt_epi32_mask(__m512i __a, __m512i __b) { 2938 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1, 2939 (__mmask16)-1); 2940} 2941 2942static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2943_mm512_mask_cmplt_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 2944 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 1, 2945 __u); 2946} 2947 2948static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2949_mm512_cmplt_epu32_mask(__m512i __a, __m512i __b) { 2950 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1, 2951 (__mmask16)-1); 2952} 2953 2954static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2955_mm512_mask_cmplt_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 2956 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 1, 2957 __u); 2958} 2959 2960static __inline__ __mmask8 __DEFAULT_FN_ATTRS 2961_mm512_cmplt_epi64_mask(__m512i __a, __m512i __b) { 2962 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1, 2963 (__mmask8)-1); 2964} 2965 2966static __inline__ __mmask8 __DEFAULT_FN_ATTRS 2967_mm512_mask_cmplt_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 2968 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 1, 2969 __u); 2970} 2971 2972static __inline__ __mmask8 __DEFAULT_FN_ATTRS 2973_mm512_cmplt_epu64_mask(__m512i __a, __m512i __b) { 2974 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1, 2975 (__mmask8)-1); 2976} 2977 2978static __inline__ __mmask8 __DEFAULT_FN_ATTRS 2979_mm512_mask_cmplt_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 2980 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 1, 2981 __u); 2982} 2983 2984static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2985_mm512_cmpneq_epi32_mask(__m512i __a, __m512i __b) { 2986 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4, 2987 (__mmask16)-1); 2988} 2989 2990static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2991_mm512_mask_cmpneq_epi32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 2992 return (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)__a, (__v16si)__b, 4, 2993 __u); 2994} 2995 2996static __inline__ __mmask16 __DEFAULT_FN_ATTRS 2997_mm512_cmpneq_epu32_mask(__m512i __a, __m512i __b) { 2998 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4, 2999 (__mmask16)-1); 3000} 3001 3002static __inline__ __mmask16 __DEFAULT_FN_ATTRS 3003_mm512_mask_cmpneq_epu32_mask(__mmask16 __u, __m512i __a, __m512i __b) { 3004 return (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)__a, (__v16si)__b, 4, 3005 __u); 3006} 3007 3008static __inline__ __mmask8 __DEFAULT_FN_ATTRS 3009_mm512_cmpneq_epi64_mask(__m512i __a, __m512i __b) { 3010 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4, 3011 (__mmask8)-1); 3012} 3013 3014static __inline__ __mmask8 __DEFAULT_FN_ATTRS 3015_mm512_mask_cmpneq_epi64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 3016 return (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)__a, (__v8di)__b, 4, 3017 __u); 3018} 3019 3020static __inline__ __mmask8 __DEFAULT_FN_ATTRS 3021_mm512_cmpneq_epu64_mask(__m512i __a, __m512i __b) { 3022 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4, 3023 (__mmask8)-1); 3024} 3025 3026static __inline__ __mmask8 __DEFAULT_FN_ATTRS 3027_mm512_mask_cmpneq_epu64_mask(__mmask8 __u, __m512i __a, __m512i __b) { 3028 return (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)__a, (__v8di)__b, 4, 3029 __u); 3030} 3031 3032#define _mm512_cmp_epi32_mask(a, b, p) __extension__ ({ \ 3033 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \ 3034 (__v16si)(__m512i)(b), (p), \ 3035 (__mmask16)-1); }) 3036 3037#define _mm512_cmp_epu32_mask(a, b, p) __extension__ ({ \ 3038 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \ 3039 (__v16si)(__m512i)(b), (p), \ 3040 (__mmask16)-1); }) 3041 3042#define _mm512_cmp_epi64_mask(a, b, p) __extension__ ({ \ 3043 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \ 3044 (__v8di)(__m512i)(b), (p), \ 3045 (__mmask8)-1); }) 3046 3047#define _mm512_cmp_epu64_mask(a, b, p) __extension__ ({ \ 3048 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \ 3049 (__v8di)(__m512i)(b), (p), \ 3050 (__mmask8)-1); }) 3051 3052#define _mm512_mask_cmp_epi32_mask(m, a, b, p) __extension__ ({ \ 3053 (__mmask16)__builtin_ia32_cmpd512_mask((__v16si)(__m512i)(a), \ 3054 (__v16si)(__m512i)(b), (p), \ 3055 (__mmask16)(m)); }) 3056 3057#define _mm512_mask_cmp_epu32_mask(m, a, b, p) __extension__ ({ \ 3058 (__mmask16)__builtin_ia32_ucmpd512_mask((__v16si)(__m512i)(a), \ 3059 (__v16si)(__m512i)(b), (p), \ 3060 (__mmask16)(m)); }) 3061 3062#define _mm512_mask_cmp_epi64_mask(m, a, b, p) __extension__ ({ \ 3063 (__mmask8)__builtin_ia32_cmpq512_mask((__v8di)(__m512i)(a), \ 3064 (__v8di)(__m512i)(b), (p), \ 3065 (__mmask8)(m)); }) 3066 3067#define _mm512_mask_cmp_epu64_mask(m, a, b, p) __extension__ ({ \ 3068 (__mmask8)__builtin_ia32_ucmpq512_mask((__v8di)(__m512i)(a), \ 3069 (__v8di)(__m512i)(b), (p), \ 3070 (__mmask8)(m)); }) 3071 3072#undef __DEFAULT_FN_ATTRS 3073 3074#endif // __AVX512FINTRIN_H 3075