1/* Copyright (C) 2013-2015 Free Software Foundation, Inc. 2 3 This file is part of GCC. 4 5 GCC is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 3, or (at your option) 8 any later version. 9 10 GCC is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 Under Section 7 of GPL version 3, you are granted additional 16 permissions described in the GCC Runtime Library Exception, version 17 3.1, as published by the Free Software Foundation. 18 19 You should have received a copy of the GNU General Public License and 20 a copy of the GCC Runtime Library Exception along with this program; 21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 <http://www.gnu.org/licenses/>. */ 23 24#ifndef _IMMINTRIN_H_INCLUDED 25#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead." 26#endif 27 28#ifndef _AVX512FINTRIN_H_INCLUDED 29#define _AVX512FINTRIN_H_INCLUDED 30 31#ifndef __AVX512F__ 32#pragma GCC push_options 33#pragma GCC target("avx512f") 34#define __DISABLE_AVX512F__ 35#endif /* __AVX512F__ */ 36 37/* Internal data types for implementing the intrinsics. */ 38typedef double __v8df __attribute__ ((__vector_size__ (64))); 39typedef float __v16sf __attribute__ ((__vector_size__ (64))); 40typedef long long __v8di __attribute__ ((__vector_size__ (64))); 41typedef unsigned long long __v8du __attribute__ ((__vector_size__ (64))); 42typedef int __v16si __attribute__ ((__vector_size__ (64))); 43typedef unsigned int __v16su __attribute__ ((__vector_size__ (64))); 44typedef short __v32hi __attribute__ ((__vector_size__ (64))); 45typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64))); 46typedef char __v64qi __attribute__ ((__vector_size__ (64))); 47typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64))); 48 49/* The Intel API is flexible enough that we must allow aliasing with other 50 vector types, and their scalar components. */ 51typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__)); 52typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__)); 53typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__)); 54 55typedef unsigned char __mmask8; 56typedef unsigned short __mmask16; 57 58extern __inline __m512i 59__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 60_mm512_set_epi64 (long long __A, long long __B, long long __C, 61 long long __D, long long __E, long long __F, 62 long long __G, long long __H) 63{ 64 return __extension__ (__m512i) (__v8di) 65 { __H, __G, __F, __E, __D, __C, __B, __A }; 66} 67 68/* Create the vector [A B C D E F G H I J K L M N O P]. */ 69extern __inline __m512i 70__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 71_mm512_set_epi32 (int __A, int __B, int __C, int __D, 72 int __E, int __F, int __G, int __H, 73 int __I, int __J, int __K, int __L, 74 int __M, int __N, int __O, int __P) 75{ 76 return __extension__ (__m512i)(__v16si) 77 { __P, __O, __N, __M, __L, __K, __J, __I, 78 __H, __G, __F, __E, __D, __C, __B, __A }; 79} 80 81extern __inline __m512d 82__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 83_mm512_set_pd (double __A, double __B, double __C, double __D, 84 double __E, double __F, double __G, double __H) 85{ 86 return __extension__ (__m512d) 87 { __H, __G, __F, __E, __D, __C, __B, __A }; 88} 89 90extern __inline __m512 91__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 92_mm512_set_ps (float __A, float __B, float __C, float __D, 93 float __E, float __F, float __G, float __H, 94 float __I, float __J, float __K, float __L, 95 float __M, float __N, float __O, float __P) 96{ 97 return __extension__ (__m512) 98 { __P, __O, __N, __M, __L, __K, __J, __I, 99 __H, __G, __F, __E, __D, __C, __B, __A }; 100} 101 102#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \ 103 _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0) 104 105#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \ 106 e8,e9,e10,e11,e12,e13,e14,e15) \ 107 _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0) 108 109#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \ 110 _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0) 111 112#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \ 113 _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0) 114 115extern __inline __m512 116__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 117_mm512_undefined_ps (void) 118{ 119 __m512 __Y = __Y; 120 return __Y; 121} 122 123extern __inline __m512d 124__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 125_mm512_undefined_pd (void) 126{ 127 __m512d __Y = __Y; 128 return __Y; 129} 130 131extern __inline __m512i 132__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 133_mm512_undefined_si512 (void) 134{ 135 __m512i __Y = __Y; 136 return __Y; 137} 138 139extern __inline __m512i 140__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 141_mm512_set1_epi8 (char __A) 142{ 143 return __extension__ (__m512i)(__v64qi) 144 { __A, __A, __A, __A, __A, __A, __A, __A, 145 __A, __A, __A, __A, __A, __A, __A, __A, 146 __A, __A, __A, __A, __A, __A, __A, __A, 147 __A, __A, __A, __A, __A, __A, __A, __A, 148 __A, __A, __A, __A, __A, __A, __A, __A, 149 __A, __A, __A, __A, __A, __A, __A, __A, 150 __A, __A, __A, __A, __A, __A, __A, __A, 151 __A, __A, __A, __A, __A, __A, __A, __A }; 152} 153 154extern __inline __m512i 155__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 156_mm512_set1_epi16 (short __A) 157{ 158 return __extension__ (__m512i)(__v32hi) 159 { __A, __A, __A, __A, __A, __A, __A, __A, 160 __A, __A, __A, __A, __A, __A, __A, __A, 161 __A, __A, __A, __A, __A, __A, __A, __A, 162 __A, __A, __A, __A, __A, __A, __A, __A }; 163} 164 165extern __inline __m512d 166__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 167_mm512_set1_pd (double __A) 168{ 169 return (__m512d) __builtin_ia32_broadcastsd512 (__extension__ 170 (__v2df) { __A, }, 171 (__v8df) 172 _mm512_undefined_pd (), 173 (__mmask8) -1); 174} 175 176extern __inline __m512 177__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 178_mm512_set1_ps (float __A) 179{ 180 return (__m512) __builtin_ia32_broadcastss512 (__extension__ 181 (__v4sf) { __A, }, 182 (__v16sf) 183 _mm512_undefined_ps (), 184 (__mmask16) -1); 185} 186 187/* Create the vector [A B C D A B C D A B C D A B C D]. */ 188extern __inline __m512i 189__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 190_mm512_set4_epi32 (int __A, int __B, int __C, int __D) 191{ 192 return __extension__ (__m512i)(__v16si) 193 { __D, __C, __B, __A, __D, __C, __B, __A, 194 __D, __C, __B, __A, __D, __C, __B, __A }; 195} 196 197extern __inline __m512i 198__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 199_mm512_set4_epi64 (long long __A, long long __B, long long __C, 200 long long __D) 201{ 202 return __extension__ (__m512i) (__v8di) 203 { __D, __C, __B, __A, __D, __C, __B, __A }; 204} 205 206extern __inline __m512d 207__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 208_mm512_set4_pd (double __A, double __B, double __C, double __D) 209{ 210 return __extension__ (__m512d) 211 { __D, __C, __B, __A, __D, __C, __B, __A }; 212} 213 214extern __inline __m512 215__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 216_mm512_set4_ps (float __A, float __B, float __C, float __D) 217{ 218 return __extension__ (__m512) 219 { __D, __C, __B, __A, __D, __C, __B, __A, 220 __D, __C, __B, __A, __D, __C, __B, __A }; 221} 222 223#define _mm512_setr4_epi64(e0,e1,e2,e3) \ 224 _mm512_set4_epi64(e3,e2,e1,e0) 225 226#define _mm512_setr4_epi32(e0,e1,e2,e3) \ 227 _mm512_set4_epi32(e3,e2,e1,e0) 228 229#define _mm512_setr4_pd(e0,e1,e2,e3) \ 230 _mm512_set4_pd(e3,e2,e1,e0) 231 232#define _mm512_setr4_ps(e0,e1,e2,e3) \ 233 _mm512_set4_ps(e3,e2,e1,e0) 234 235extern __inline __m512 236__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 237_mm512_setzero_ps (void) 238{ 239 return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 240 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; 241} 242 243extern __inline __m512d 244__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 245_mm512_setzero_pd (void) 246{ 247 return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; 248} 249 250extern __inline __m512i 251__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 252_mm512_setzero_epi32 (void) 253{ 254 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; 255} 256 257extern __inline __m512i 258__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 259_mm512_setzero_si512 (void) 260{ 261 return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; 262} 263 264extern __inline __m512d 265__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 266_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A) 267{ 268 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A, 269 (__v8df) __W, 270 (__mmask8) __U); 271} 272 273extern __inline __m512d 274__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 275_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A) 276{ 277 return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A, 278 (__v8df) 279 _mm512_setzero_pd (), 280 (__mmask8) __U); 281} 282 283extern __inline __m512 284__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 285_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A) 286{ 287 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A, 288 (__v16sf) __W, 289 (__mmask16) __U); 290} 291 292extern __inline __m512 293__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 294_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A) 295{ 296 return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A, 297 (__v16sf) 298 _mm512_setzero_ps (), 299 (__mmask16) __U); 300} 301 302extern __inline __m512d 303__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 304_mm512_load_pd (void const *__P) 305{ 306 return *(__m512d *) __P; 307} 308 309extern __inline __m512d 310__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 311_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P) 312{ 313 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P, 314 (__v8df) __W, 315 (__mmask8) __U); 316} 317 318extern __inline __m512d 319__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 320_mm512_maskz_load_pd (__mmask8 __U, void const *__P) 321{ 322 return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P, 323 (__v8df) 324 _mm512_setzero_pd (), 325 (__mmask8) __U); 326} 327 328extern __inline void 329__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 330_mm512_store_pd (void *__P, __m512d __A) 331{ 332 *(__m512d *) __P = __A; 333} 334 335extern __inline void 336__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 337_mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A) 338{ 339 __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A, 340 (__mmask8) __U); 341} 342 343extern __inline __m512 344__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 345_mm512_load_ps (void const *__P) 346{ 347 return *(__m512 *) __P; 348} 349 350extern __inline __m512 351__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 352_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P) 353{ 354 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P, 355 (__v16sf) __W, 356 (__mmask16) __U); 357} 358 359extern __inline __m512 360__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 361_mm512_maskz_load_ps (__mmask16 __U, void const *__P) 362{ 363 return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P, 364 (__v16sf) 365 _mm512_setzero_ps (), 366 (__mmask16) __U); 367} 368 369extern __inline void 370__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 371_mm512_store_ps (void *__P, __m512 __A) 372{ 373 *(__m512 *) __P = __A; 374} 375 376extern __inline void 377__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 378_mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A) 379{ 380 __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A, 381 (__mmask16) __U); 382} 383 384extern __inline __m512i 385__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 386_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A) 387{ 388 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A, 389 (__v8di) __W, 390 (__mmask8) __U); 391} 392 393extern __inline __m512i 394__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 395_mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A) 396{ 397 return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A, 398 (__v8di) 399 _mm512_setzero_si512 (), 400 (__mmask8) __U); 401} 402 403extern __inline __m512i 404__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 405_mm512_load_epi64 (void const *__P) 406{ 407 return *(__m512i *) __P; 408} 409 410extern __inline __m512i 411__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 412_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P) 413{ 414 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P, 415 (__v8di) __W, 416 (__mmask8) __U); 417} 418 419extern __inline __m512i 420__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 421_mm512_maskz_load_epi64 (__mmask8 __U, void const *__P) 422{ 423 return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P, 424 (__v8di) 425 _mm512_setzero_si512 (), 426 (__mmask8) __U); 427} 428 429extern __inline void 430__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 431_mm512_store_epi64 (void *__P, __m512i __A) 432{ 433 *(__m512i *) __P = __A; 434} 435 436extern __inline void 437__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 438_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A) 439{ 440 __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A, 441 (__mmask8) __U); 442} 443 444extern __inline __m512i 445__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 446_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A) 447{ 448 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A, 449 (__v16si) __W, 450 (__mmask16) __U); 451} 452 453extern __inline __m512i 454__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 455_mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A) 456{ 457 return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A, 458 (__v16si) 459 _mm512_setzero_si512 (), 460 (__mmask16) __U); 461} 462 463extern __inline __m512i 464__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 465_mm512_load_si512 (void const *__P) 466{ 467 return *(__m512i *) __P; 468} 469 470extern __inline __m512i 471__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 472_mm512_load_epi32 (void const *__P) 473{ 474 return *(__m512i *) __P; 475} 476 477extern __inline __m512i 478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 479_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P) 480{ 481 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P, 482 (__v16si) __W, 483 (__mmask16) __U); 484} 485 486extern __inline __m512i 487__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 488_mm512_maskz_load_epi32 (__mmask16 __U, void const *__P) 489{ 490 return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P, 491 (__v16si) 492 _mm512_setzero_si512 (), 493 (__mmask16) __U); 494} 495 496extern __inline void 497__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 498_mm512_store_si512 (void *__P, __m512i __A) 499{ 500 *(__m512i *) __P = __A; 501} 502 503extern __inline void 504__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 505_mm512_store_epi32 (void *__P, __m512i __A) 506{ 507 *(__m512i *) __P = __A; 508} 509 510extern __inline void 511__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 512_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A) 513{ 514 __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A, 515 (__mmask16) __U); 516} 517 518extern __inline __m512i 519__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 520_mm512_mullo_epi32 (__m512i __A, __m512i __B) 521{ 522 return (__m512i) ((__v16su) __A * (__v16su) __B); 523} 524 525extern __inline __m512i 526__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 527_mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B) 528{ 529 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A, 530 (__v16si) __B, 531 (__v16si) 532 _mm512_setzero_si512 (), 533 __M); 534} 535 536extern __inline __m512i 537__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 538_mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 539{ 540 return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A, 541 (__v16si) __B, 542 (__v16si) __W, __M); 543} 544 545extern __inline __m512i 546__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 547_mm512_sllv_epi32 (__m512i __X, __m512i __Y) 548{ 549 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X, 550 (__v16si) __Y, 551 (__v16si) 552 _mm512_undefined_si512 (), 553 (__mmask16) -1); 554} 555 556extern __inline __m512i 557__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 558_mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) 559{ 560 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X, 561 (__v16si) __Y, 562 (__v16si) __W, 563 (__mmask16) __U); 564} 565 566extern __inline __m512i 567__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 568_mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y) 569{ 570 return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X, 571 (__v16si) __Y, 572 (__v16si) 573 _mm512_setzero_si512 (), 574 (__mmask16) __U); 575} 576 577extern __inline __m512i 578__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 579_mm512_srav_epi32 (__m512i __X, __m512i __Y) 580{ 581 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X, 582 (__v16si) __Y, 583 (__v16si) 584 _mm512_undefined_si512 (), 585 (__mmask16) -1); 586} 587 588extern __inline __m512i 589__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 590_mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) 591{ 592 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X, 593 (__v16si) __Y, 594 (__v16si) __W, 595 (__mmask16) __U); 596} 597 598extern __inline __m512i 599__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 600_mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y) 601{ 602 return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X, 603 (__v16si) __Y, 604 (__v16si) 605 _mm512_setzero_si512 (), 606 (__mmask16) __U); 607} 608 609extern __inline __m512i 610__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 611_mm512_srlv_epi32 (__m512i __X, __m512i __Y) 612{ 613 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X, 614 (__v16si) __Y, 615 (__v16si) 616 _mm512_undefined_si512 (), 617 (__mmask16) -1); 618} 619 620extern __inline __m512i 621__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 622_mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) 623{ 624 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X, 625 (__v16si) __Y, 626 (__v16si) __W, 627 (__mmask16) __U); 628} 629 630extern __inline __m512i 631__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 632_mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y) 633{ 634 return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X, 635 (__v16si) __Y, 636 (__v16si) 637 _mm512_setzero_si512 (), 638 (__mmask16) __U); 639} 640 641extern __inline __m512i 642__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 643_mm512_add_epi64 (__m512i __A, __m512i __B) 644{ 645 return (__m512i) ((__v8du) __A + (__v8du) __B); 646} 647 648extern __inline __m512i 649__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 650_mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 651{ 652 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A, 653 (__v8di) __B, 654 (__v8di) __W, 655 (__mmask8) __U); 656} 657 658extern __inline __m512i 659__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 660_mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 661{ 662 return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A, 663 (__v8di) __B, 664 (__v8di) 665 _mm512_setzero_si512 (), 666 (__mmask8) __U); 667} 668 669extern __inline __m512i 670__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 671_mm512_sub_epi64 (__m512i __A, __m512i __B) 672{ 673 return (__m512i) ((__v8du) __A - (__v8du) __B); 674} 675 676extern __inline __m512i 677__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 678_mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 679{ 680 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A, 681 (__v8di) __B, 682 (__v8di) __W, 683 (__mmask8) __U); 684} 685 686extern __inline __m512i 687__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 688_mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 689{ 690 return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A, 691 (__v8di) __B, 692 (__v8di) 693 _mm512_setzero_si512 (), 694 (__mmask8) __U); 695} 696 697extern __inline __m512i 698__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 699_mm512_sllv_epi64 (__m512i __X, __m512i __Y) 700{ 701 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X, 702 (__v8di) __Y, 703 (__v8di) 704 _mm512_undefined_pd (), 705 (__mmask8) -1); 706} 707 708extern __inline __m512i 709__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 710_mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) 711{ 712 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X, 713 (__v8di) __Y, 714 (__v8di) __W, 715 (__mmask8) __U); 716} 717 718extern __inline __m512i 719__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 720_mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y) 721{ 722 return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X, 723 (__v8di) __Y, 724 (__v8di) 725 _mm512_setzero_si512 (), 726 (__mmask8) __U); 727} 728 729extern __inline __m512i 730__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 731_mm512_srav_epi64 (__m512i __X, __m512i __Y) 732{ 733 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X, 734 (__v8di) __Y, 735 (__v8di) 736 _mm512_undefined_si512 (), 737 (__mmask8) -1); 738} 739 740extern __inline __m512i 741__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 742_mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) 743{ 744 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X, 745 (__v8di) __Y, 746 (__v8di) __W, 747 (__mmask8) __U); 748} 749 750extern __inline __m512i 751__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 752_mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y) 753{ 754 return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X, 755 (__v8di) __Y, 756 (__v8di) 757 _mm512_setzero_si512 (), 758 (__mmask8) __U); 759} 760 761extern __inline __m512i 762__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 763_mm512_srlv_epi64 (__m512i __X, __m512i __Y) 764{ 765 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X, 766 (__v8di) __Y, 767 (__v8di) 768 _mm512_undefined_si512 (), 769 (__mmask8) -1); 770} 771 772extern __inline __m512i 773__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 774_mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) 775{ 776 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X, 777 (__v8di) __Y, 778 (__v8di) __W, 779 (__mmask8) __U); 780} 781 782extern __inline __m512i 783__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 784_mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y) 785{ 786 return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X, 787 (__v8di) __Y, 788 (__v8di) 789 _mm512_setzero_si512 (), 790 (__mmask8) __U); 791} 792 793extern __inline __m512i 794__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 795_mm512_add_epi32 (__m512i __A, __m512i __B) 796{ 797 return (__m512i) ((__v16su) __A + (__v16su) __B); 798} 799 800extern __inline __m512i 801__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 802_mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 803{ 804 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A, 805 (__v16si) __B, 806 (__v16si) __W, 807 (__mmask16) __U); 808} 809 810extern __inline __m512i 811__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 812_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 813{ 814 return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A, 815 (__v16si) __B, 816 (__v16si) 817 _mm512_setzero_si512 (), 818 (__mmask16) __U); 819} 820 821extern __inline __m512i 822__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 823_mm512_mul_epi32 (__m512i __X, __m512i __Y) 824{ 825 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, 826 (__v16si) __Y, 827 (__v8di) 828 _mm512_undefined_si512 (), 829 (__mmask8) -1); 830} 831 832extern __inline __m512i 833__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 834_mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) 835{ 836 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, 837 (__v16si) __Y, 838 (__v8di) __W, __M); 839} 840 841extern __inline __m512i 842__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 843_mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y) 844{ 845 return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, 846 (__v16si) __Y, 847 (__v8di) 848 _mm512_setzero_si512 (), 849 __M); 850} 851 852extern __inline __m512i 853__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 854_mm512_sub_epi32 (__m512i __A, __m512i __B) 855{ 856 return (__m512i) ((__v16su) __A - (__v16su) __B); 857} 858 859extern __inline __m512i 860__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 861_mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 862{ 863 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A, 864 (__v16si) __B, 865 (__v16si) __W, 866 (__mmask16) __U); 867} 868 869extern __inline __m512i 870__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 871_mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 872{ 873 return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A, 874 (__v16si) __B, 875 (__v16si) 876 _mm512_setzero_si512 (), 877 (__mmask16) __U); 878} 879 880extern __inline __m512i 881__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 882_mm512_mul_epu32 (__m512i __X, __m512i __Y) 883{ 884 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, 885 (__v16si) __Y, 886 (__v8di) 887 _mm512_undefined_si512 (), 888 (__mmask8) -1); 889} 890 891extern __inline __m512i 892__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 893_mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) 894{ 895 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, 896 (__v16si) __Y, 897 (__v8di) __W, __M); 898} 899 900extern __inline __m512i 901__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 902_mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y) 903{ 904 return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, 905 (__v16si) __Y, 906 (__v8di) 907 _mm512_setzero_si512 (), 908 __M); 909} 910 911#ifdef __OPTIMIZE__ 912extern __inline __m512i 913__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 914_mm512_slli_epi64 (__m512i __A, unsigned int __B) 915{ 916 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B, 917 (__v8di) 918 _mm512_undefined_si512 (), 919 (__mmask8) -1); 920} 921 922extern __inline __m512i 923__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 924_mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A, 925 unsigned int __B) 926{ 927 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B, 928 (__v8di) __W, 929 (__mmask8) __U); 930} 931 932extern __inline __m512i 933__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 934_mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B) 935{ 936 return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B, 937 (__v8di) 938 _mm512_setzero_si512 (), 939 (__mmask8) __U); 940} 941#else 942#define _mm512_slli_epi64(X, C) \ 943 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ 944 (__v8di)(__m512i)_mm512_undefined_si512 (),\ 945 (__mmask8)-1)) 946 947#define _mm512_mask_slli_epi64(W, U, X, C) \ 948 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ 949 (__v8di)(__m512i)(W),\ 950 (__mmask8)(U))) 951 952#define _mm512_maskz_slli_epi64(U, X, C) \ 953 ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ 954 (__v8di)(__m512i)_mm512_setzero_si512 (),\ 955 (__mmask8)(U))) 956#endif 957 958extern __inline __m512i 959__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 960_mm512_sll_epi64 (__m512i __A, __m128i __B) 961{ 962 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A, 963 (__v2di) __B, 964 (__v8di) 965 _mm512_undefined_si512 (), 966 (__mmask8) -1); 967} 968 969extern __inline __m512i 970__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 971_mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) 972{ 973 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A, 974 (__v2di) __B, 975 (__v8di) __W, 976 (__mmask8) __U); 977} 978 979extern __inline __m512i 980__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 981_mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B) 982{ 983 return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A, 984 (__v2di) __B, 985 (__v8di) 986 _mm512_setzero_si512 (), 987 (__mmask8) __U); 988} 989 990#ifdef __OPTIMIZE__ 991extern __inline __m512i 992__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 993_mm512_srli_epi64 (__m512i __A, unsigned int __B) 994{ 995 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B, 996 (__v8di) 997 _mm512_undefined_si512 (), 998 (__mmask8) -1); 999} 1000 1001extern __inline __m512i 1002__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1003_mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U, 1004 __m512i __A, unsigned int __B) 1005{ 1006 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B, 1007 (__v8di) __W, 1008 (__mmask8) __U); 1009} 1010 1011extern __inline __m512i 1012__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1013_mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B) 1014{ 1015 return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B, 1016 (__v8di) 1017 _mm512_setzero_si512 (), 1018 (__mmask8) __U); 1019} 1020#else 1021#define _mm512_srli_epi64(X, C) \ 1022 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ 1023 (__v8di)(__m512i)_mm512_undefined_si512 (),\ 1024 (__mmask8)-1)) 1025 1026#define _mm512_mask_srli_epi64(W, U, X, C) \ 1027 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ 1028 (__v8di)(__m512i)(W),\ 1029 (__mmask8)(U))) 1030 1031#define _mm512_maskz_srli_epi64(U, X, C) \ 1032 ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ 1033 (__v8di)(__m512i)_mm512_setzero_si512 (),\ 1034 (__mmask8)(U))) 1035#endif 1036 1037extern __inline __m512i 1038__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1039_mm512_srl_epi64 (__m512i __A, __m128i __B) 1040{ 1041 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A, 1042 (__v2di) __B, 1043 (__v8di) 1044 _mm512_undefined_si512 (), 1045 (__mmask8) -1); 1046} 1047 1048extern __inline __m512i 1049__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1050_mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) 1051{ 1052 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A, 1053 (__v2di) __B, 1054 (__v8di) __W, 1055 (__mmask8) __U); 1056} 1057 1058extern __inline __m512i 1059__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1060_mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B) 1061{ 1062 return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A, 1063 (__v2di) __B, 1064 (__v8di) 1065 _mm512_setzero_si512 (), 1066 (__mmask8) __U); 1067} 1068 1069#ifdef __OPTIMIZE__ 1070extern __inline __m512i 1071__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1072_mm512_srai_epi64 (__m512i __A, unsigned int __B) 1073{ 1074 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B, 1075 (__v8di) 1076 _mm512_undefined_si512 (), 1077 (__mmask8) -1); 1078} 1079 1080extern __inline __m512i 1081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1082_mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A, 1083 unsigned int __B) 1084{ 1085 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B, 1086 (__v8di) __W, 1087 (__mmask8) __U); 1088} 1089 1090extern __inline __m512i 1091__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1092_mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B) 1093{ 1094 return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B, 1095 (__v8di) 1096 _mm512_setzero_si512 (), 1097 (__mmask8) __U); 1098} 1099#else 1100#define _mm512_srai_epi64(X, C) \ 1101 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ 1102 (__v8di)(__m512i)_mm512_undefined_si512 (),\ 1103 (__mmask8)-1)) 1104 1105#define _mm512_mask_srai_epi64(W, U, X, C) \ 1106 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ 1107 (__v8di)(__m512i)(W),\ 1108 (__mmask8)(U))) 1109 1110#define _mm512_maskz_srai_epi64(U, X, C) \ 1111 ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ 1112 (__v8di)(__m512i)_mm512_setzero_si512 (),\ 1113 (__mmask8)(U))) 1114#endif 1115 1116extern __inline __m512i 1117__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1118_mm512_sra_epi64 (__m512i __A, __m128i __B) 1119{ 1120 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A, 1121 (__v2di) __B, 1122 (__v8di) 1123 _mm512_undefined_si512 (), 1124 (__mmask8) -1); 1125} 1126 1127extern __inline __m512i 1128__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1129_mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) 1130{ 1131 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A, 1132 (__v2di) __B, 1133 (__v8di) __W, 1134 (__mmask8) __U); 1135} 1136 1137extern __inline __m512i 1138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1139_mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B) 1140{ 1141 return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A, 1142 (__v2di) __B, 1143 (__v8di) 1144 _mm512_setzero_si512 (), 1145 (__mmask8) __U); 1146} 1147 1148#ifdef __OPTIMIZE__ 1149extern __inline __m512i 1150__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1151_mm512_slli_epi32 (__m512i __A, unsigned int __B) 1152{ 1153 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B, 1154 (__v16si) 1155 _mm512_undefined_si512 (), 1156 (__mmask16) -1); 1157} 1158 1159extern __inline __m512i 1160__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1161_mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A, 1162 unsigned int __B) 1163{ 1164 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B, 1165 (__v16si) __W, 1166 (__mmask16) __U); 1167} 1168 1169extern __inline __m512i 1170__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1171_mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B) 1172{ 1173 return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B, 1174 (__v16si) 1175 _mm512_setzero_si512 (), 1176 (__mmask16) __U); 1177} 1178#else 1179#define _mm512_slli_epi32(X, C) \ 1180 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ 1181 (__v16si)(__m512i)_mm512_undefined_si512 (),\ 1182 (__mmask16)-1)) 1183 1184#define _mm512_mask_slli_epi32(W, U, X, C) \ 1185 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ 1186 (__v16si)(__m512i)(W),\ 1187 (__mmask16)(U))) 1188 1189#define _mm512_maskz_slli_epi32(U, X, C) \ 1190 ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ 1191 (__v16si)(__m512i)_mm512_setzero_si512 (),\ 1192 (__mmask16)(U))) 1193#endif 1194 1195extern __inline __m512i 1196__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1197_mm512_sll_epi32 (__m512i __A, __m128i __B) 1198{ 1199 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A, 1200 (__v4si) __B, 1201 (__v16si) 1202 _mm512_undefined_si512 (), 1203 (__mmask16) -1); 1204} 1205 1206extern __inline __m512i 1207__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1208_mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) 1209{ 1210 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A, 1211 (__v4si) __B, 1212 (__v16si) __W, 1213 (__mmask16) __U); 1214} 1215 1216extern __inline __m512i 1217__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1218_mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B) 1219{ 1220 return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A, 1221 (__v4si) __B, 1222 (__v16si) 1223 _mm512_setzero_si512 (), 1224 (__mmask16) __U); 1225} 1226 1227#ifdef __OPTIMIZE__ 1228extern __inline __m512i 1229__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1230_mm512_srli_epi32 (__m512i __A, unsigned int __B) 1231{ 1232 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B, 1233 (__v16si) 1234 _mm512_undefined_si512 (), 1235 (__mmask16) -1); 1236} 1237 1238extern __inline __m512i 1239__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1240_mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U, 1241 __m512i __A, unsigned int __B) 1242{ 1243 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B, 1244 (__v16si) __W, 1245 (__mmask16) __U); 1246} 1247 1248extern __inline __m512i 1249__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1250_mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B) 1251{ 1252 return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B, 1253 (__v16si) 1254 _mm512_setzero_si512 (), 1255 (__mmask16) __U); 1256} 1257#else 1258#define _mm512_srli_epi32(X, C) \ 1259 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ 1260 (__v16si)(__m512i)_mm512_undefined_si512 (),\ 1261 (__mmask16)-1)) 1262 1263#define _mm512_mask_srli_epi32(W, U, X, C) \ 1264 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ 1265 (__v16si)(__m512i)(W),\ 1266 (__mmask16)(U))) 1267 1268#define _mm512_maskz_srli_epi32(U, X, C) \ 1269 ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ 1270 (__v16si)(__m512i)_mm512_setzero_si512 (),\ 1271 (__mmask16)(U))) 1272#endif 1273 1274extern __inline __m512i 1275__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1276_mm512_srl_epi32 (__m512i __A, __m128i __B) 1277{ 1278 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A, 1279 (__v4si) __B, 1280 (__v16si) 1281 _mm512_undefined_si512 (), 1282 (__mmask16) -1); 1283} 1284 1285extern __inline __m512i 1286__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1287_mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) 1288{ 1289 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A, 1290 (__v4si) __B, 1291 (__v16si) __W, 1292 (__mmask16) __U); 1293} 1294 1295extern __inline __m512i 1296__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1297_mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B) 1298{ 1299 return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A, 1300 (__v4si) __B, 1301 (__v16si) 1302 _mm512_setzero_si512 (), 1303 (__mmask16) __U); 1304} 1305 1306#ifdef __OPTIMIZE__ 1307extern __inline __m512i 1308__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1309_mm512_srai_epi32 (__m512i __A, unsigned int __B) 1310{ 1311 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B, 1312 (__v16si) 1313 _mm512_undefined_si512 (), 1314 (__mmask16) -1); 1315} 1316 1317extern __inline __m512i 1318__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1319_mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A, 1320 unsigned int __B) 1321{ 1322 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B, 1323 (__v16si) __W, 1324 (__mmask16) __U); 1325} 1326 1327extern __inline __m512i 1328__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1329_mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B) 1330{ 1331 return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B, 1332 (__v16si) 1333 _mm512_setzero_si512 (), 1334 (__mmask16) __U); 1335} 1336#else 1337#define _mm512_srai_epi32(X, C) \ 1338 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\ 1339 (__v16si)(__m512i)_mm512_undefined_si512 (),\ 1340 (__mmask16)-1)) 1341 1342#define _mm512_mask_srai_epi32(W, U, X, C) \ 1343 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\ 1344 (__v16si)(__m512i)(W),\ 1345 (__mmask16)(U))) 1346 1347#define _mm512_maskz_srai_epi32(U, X, C) \ 1348 ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\ 1349 (__v16si)(__m512i)_mm512_setzero_si512 (),\ 1350 (__mmask16)(U))) 1351#endif 1352 1353extern __inline __m512i 1354__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1355_mm512_sra_epi32 (__m512i __A, __m128i __B) 1356{ 1357 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A, 1358 (__v4si) __B, 1359 (__v16si) 1360 _mm512_undefined_si512 (), 1361 (__mmask16) -1); 1362} 1363 1364extern __inline __m512i 1365__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1366_mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) 1367{ 1368 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A, 1369 (__v4si) __B, 1370 (__v16si) __W, 1371 (__mmask16) __U); 1372} 1373 1374extern __inline __m512i 1375__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1376_mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B) 1377{ 1378 return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A, 1379 (__v4si) __B, 1380 (__v16si) 1381 _mm512_setzero_si512 (), 1382 (__mmask16) __U); 1383} 1384 1385#ifdef __OPTIMIZE__ 1386extern __inline __m128d 1387__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1388_mm_add_round_sd (__m128d __A, __m128d __B, const int __R) 1389{ 1390 return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, 1391 (__v2df) __B, 1392 __R); 1393} 1394 1395extern __inline __m128 1396__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1397_mm_add_round_ss (__m128 __A, __m128 __B, const int __R) 1398{ 1399 return (__m128) __builtin_ia32_addss_round ((__v4sf) __A, 1400 (__v4sf) __B, 1401 __R); 1402} 1403 1404extern __inline __m128d 1405__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1406_mm_sub_round_sd (__m128d __A, __m128d __B, const int __R) 1407{ 1408 return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, 1409 (__v2df) __B, 1410 __R); 1411} 1412 1413extern __inline __m128 1414__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1415_mm_sub_round_ss (__m128 __A, __m128 __B, const int __R) 1416{ 1417 return (__m128) __builtin_ia32_subss_round ((__v4sf) __A, 1418 (__v4sf) __B, 1419 __R); 1420} 1421 1422#else 1423#define _mm_add_round_sd(A, B, C) \ 1424 (__m128d)__builtin_ia32_addsd_round(A, B, C) 1425 1426#define _mm_add_round_ss(A, B, C) \ 1427 (__m128)__builtin_ia32_addss_round(A, B, C) 1428 1429#define _mm_sub_round_sd(A, B, C) \ 1430 (__m128d)__builtin_ia32_subsd_round(A, B, C) 1431 1432#define _mm_sub_round_ss(A, B, C) \ 1433 (__m128)__builtin_ia32_subss_round(A, B, C) 1434#endif 1435 1436#ifdef __OPTIMIZE__ 1437extern __inline __m512i 1438__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1439_mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C, const int imm) 1440{ 1441 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A, 1442 (__v8di) __B, 1443 (__v8di) __C, imm, 1444 (__mmask8) -1); 1445} 1446 1447extern __inline __m512i 1448__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1449_mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B, 1450 __m512i __C, const int imm) 1451{ 1452 return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A, 1453 (__v8di) __B, 1454 (__v8di) __C, imm, 1455 (__mmask8) __U); 1456} 1457 1458extern __inline __m512i 1459__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1460_mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B, 1461 __m512i __C, const int imm) 1462{ 1463 return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A, 1464 (__v8di) __B, 1465 (__v8di) __C, 1466 imm, (__mmask8) __U); 1467} 1468 1469extern __inline __m512i 1470__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1471_mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C, const int imm) 1472{ 1473 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A, 1474 (__v16si) __B, 1475 (__v16si) __C, 1476 imm, (__mmask16) -1); 1477} 1478 1479extern __inline __m512i 1480__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1481_mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B, 1482 __m512i __C, const int imm) 1483{ 1484 return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A, 1485 (__v16si) __B, 1486 (__v16si) __C, 1487 imm, (__mmask16) __U); 1488} 1489 1490extern __inline __m512i 1491__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1492_mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B, 1493 __m512i __C, const int imm) 1494{ 1495 return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A, 1496 (__v16si) __B, 1497 (__v16si) __C, 1498 imm, (__mmask16) __U); 1499} 1500#else 1501#define _mm512_ternarylogic_epi64(A, B, C, I) \ 1502 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \ 1503 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1)) 1504#define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \ 1505 ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \ 1506 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U))) 1507#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \ 1508 ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A), \ 1509 (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U))) 1510#define _mm512_ternarylogic_epi32(A, B, C, I) \ 1511 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \ 1512 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \ 1513 (__mmask16)-1)) 1514#define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \ 1515 ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \ 1516 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \ 1517 (__mmask16)(U))) 1518#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \ 1519 ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A), \ 1520 (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \ 1521 (__mmask16)(U))) 1522#endif 1523 1524extern __inline __m512d 1525__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1526_mm512_rcp14_pd (__m512d __A) 1527{ 1528 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, 1529 (__v8df) 1530 _mm512_undefined_pd (), 1531 (__mmask8) -1); 1532} 1533 1534extern __inline __m512d 1535__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1536_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A) 1537{ 1538 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, 1539 (__v8df) __W, 1540 (__mmask8) __U); 1541} 1542 1543extern __inline __m512d 1544__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1545_mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A) 1546{ 1547 return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, 1548 (__v8df) 1549 _mm512_setzero_pd (), 1550 (__mmask8) __U); 1551} 1552 1553extern __inline __m512 1554__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1555_mm512_rcp14_ps (__m512 __A) 1556{ 1557 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, 1558 (__v16sf) 1559 _mm512_undefined_ps (), 1560 (__mmask16) -1); 1561} 1562 1563extern __inline __m512 1564__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1565_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A) 1566{ 1567 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, 1568 (__v16sf) __W, 1569 (__mmask16) __U); 1570} 1571 1572extern __inline __m512 1573__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1574_mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A) 1575{ 1576 return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, 1577 (__v16sf) 1578 _mm512_setzero_ps (), 1579 (__mmask16) __U); 1580} 1581 1582extern __inline __m128d 1583__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1584_mm_rcp14_sd (__m128d __A, __m128d __B) 1585{ 1586 return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B, 1587 (__v2df) __A); 1588} 1589 1590extern __inline __m128 1591__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1592_mm_rcp14_ss (__m128 __A, __m128 __B) 1593{ 1594 return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B, 1595 (__v4sf) __A); 1596} 1597 1598extern __inline __m512d 1599__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1600_mm512_rsqrt14_pd (__m512d __A) 1601{ 1602 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, 1603 (__v8df) 1604 _mm512_undefined_pd (), 1605 (__mmask8) -1); 1606} 1607 1608extern __inline __m512d 1609__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1610_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A) 1611{ 1612 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, 1613 (__v8df) __W, 1614 (__mmask8) __U); 1615} 1616 1617extern __inline __m512d 1618__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1619_mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A) 1620{ 1621 return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, 1622 (__v8df) 1623 _mm512_setzero_pd (), 1624 (__mmask8) __U); 1625} 1626 1627extern __inline __m512 1628__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1629_mm512_rsqrt14_ps (__m512 __A) 1630{ 1631 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, 1632 (__v16sf) 1633 _mm512_undefined_ps (), 1634 (__mmask16) -1); 1635} 1636 1637extern __inline __m512 1638__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1639_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A) 1640{ 1641 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, 1642 (__v16sf) __W, 1643 (__mmask16) __U); 1644} 1645 1646extern __inline __m512 1647__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1648_mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A) 1649{ 1650 return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, 1651 (__v16sf) 1652 _mm512_setzero_ps (), 1653 (__mmask16) __U); 1654} 1655 1656extern __inline __m128d 1657__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1658_mm_rsqrt14_sd (__m128d __A, __m128d __B) 1659{ 1660 return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B, 1661 (__v2df) __A); 1662} 1663 1664extern __inline __m128 1665__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1666_mm_rsqrt14_ss (__m128 __A, __m128 __B) 1667{ 1668 return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B, 1669 (__v4sf) __A); 1670} 1671 1672#ifdef __OPTIMIZE__ 1673extern __inline __m512d 1674__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1675_mm512_sqrt_round_pd (__m512d __A, const int __R) 1676{ 1677 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, 1678 (__v8df) 1679 _mm512_undefined_pd (), 1680 (__mmask8) -1, __R); 1681} 1682 1683extern __inline __m512d 1684__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1685_mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A, 1686 const int __R) 1687{ 1688 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, 1689 (__v8df) __W, 1690 (__mmask8) __U, __R); 1691} 1692 1693extern __inline __m512d 1694__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1695_mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R) 1696{ 1697 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, 1698 (__v8df) 1699 _mm512_setzero_pd (), 1700 (__mmask8) __U, __R); 1701} 1702 1703extern __inline __m512 1704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1705_mm512_sqrt_round_ps (__m512 __A, const int __R) 1706{ 1707 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, 1708 (__v16sf) 1709 _mm512_undefined_ps (), 1710 (__mmask16) -1, __R); 1711} 1712 1713extern __inline __m512 1714__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1715_mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R) 1716{ 1717 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, 1718 (__v16sf) __W, 1719 (__mmask16) __U, __R); 1720} 1721 1722extern __inline __m512 1723__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1724_mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R) 1725{ 1726 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, 1727 (__v16sf) 1728 _mm512_setzero_ps (), 1729 (__mmask16) __U, __R); 1730} 1731 1732extern __inline __m128d 1733__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1734_mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R) 1735{ 1736 return (__m128d) __builtin_ia32_sqrtsd_round ((__v2df) __B, 1737 (__v2df) __A, 1738 __R); 1739} 1740 1741extern __inline __m128 1742__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1743_mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R) 1744{ 1745 return (__m128) __builtin_ia32_sqrtss_round ((__v4sf) __B, 1746 (__v4sf) __A, 1747 __R); 1748} 1749#else 1750#define _mm512_sqrt_round_pd(A, C) \ 1751 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C) 1752 1753#define _mm512_mask_sqrt_round_pd(W, U, A, C) \ 1754 (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C) 1755 1756#define _mm512_maskz_sqrt_round_pd(U, A, C) \ 1757 (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C) 1758 1759#define _mm512_sqrt_round_ps(A, C) \ 1760 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C) 1761 1762#define _mm512_mask_sqrt_round_ps(W, U, A, C) \ 1763 (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C) 1764 1765#define _mm512_maskz_sqrt_round_ps(U, A, C) \ 1766 (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C) 1767 1768#define _mm_sqrt_round_sd(A, B, C) \ 1769 (__m128d)__builtin_ia32_sqrtsd_round(A, B, C) 1770 1771#define _mm_sqrt_round_ss(A, B, C) \ 1772 (__m128)__builtin_ia32_sqrtss_round(A, B, C) 1773#endif 1774 1775extern __inline __m512i 1776__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1777_mm512_cvtepi8_epi32 (__m128i __A) 1778{ 1779 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A, 1780 (__v16si) 1781 _mm512_undefined_si512 (), 1782 (__mmask16) -1); 1783} 1784 1785extern __inline __m512i 1786__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1787_mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A) 1788{ 1789 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A, 1790 (__v16si) __W, 1791 (__mmask16) __U); 1792} 1793 1794extern __inline __m512i 1795__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1796_mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A) 1797{ 1798 return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A, 1799 (__v16si) 1800 _mm512_setzero_si512 (), 1801 (__mmask16) __U); 1802} 1803 1804extern __inline __m512i 1805__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1806_mm512_cvtepi8_epi64 (__m128i __A) 1807{ 1808 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A, 1809 (__v8di) 1810 _mm512_undefined_si512 (), 1811 (__mmask8) -1); 1812} 1813 1814extern __inline __m512i 1815__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1816_mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A) 1817{ 1818 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A, 1819 (__v8di) __W, 1820 (__mmask8) __U); 1821} 1822 1823extern __inline __m512i 1824__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1825_mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A) 1826{ 1827 return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A, 1828 (__v8di) 1829 _mm512_setzero_si512 (), 1830 (__mmask8) __U); 1831} 1832 1833extern __inline __m512i 1834__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1835_mm512_cvtepi16_epi32 (__m256i __A) 1836{ 1837 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A, 1838 (__v16si) 1839 _mm512_undefined_si512 (), 1840 (__mmask16) -1); 1841} 1842 1843extern __inline __m512i 1844__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1845_mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A) 1846{ 1847 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A, 1848 (__v16si) __W, 1849 (__mmask16) __U); 1850} 1851 1852extern __inline __m512i 1853__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1854_mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A) 1855{ 1856 return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A, 1857 (__v16si) 1858 _mm512_setzero_si512 (), 1859 (__mmask16) __U); 1860} 1861 1862extern __inline __m512i 1863__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1864_mm512_cvtepi16_epi64 (__m128i __A) 1865{ 1866 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A, 1867 (__v8di) 1868 _mm512_undefined_si512 (), 1869 (__mmask8) -1); 1870} 1871 1872extern __inline __m512i 1873__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1874_mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A) 1875{ 1876 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A, 1877 (__v8di) __W, 1878 (__mmask8) __U); 1879} 1880 1881extern __inline __m512i 1882__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1883_mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A) 1884{ 1885 return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A, 1886 (__v8di) 1887 _mm512_setzero_si512 (), 1888 (__mmask8) __U); 1889} 1890 1891extern __inline __m512i 1892__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1893_mm512_cvtepi32_epi64 (__m256i __X) 1894{ 1895 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X, 1896 (__v8di) 1897 _mm512_undefined_si512 (), 1898 (__mmask8) -1); 1899} 1900 1901extern __inline __m512i 1902__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1903_mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X) 1904{ 1905 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X, 1906 (__v8di) __W, 1907 (__mmask8) __U); 1908} 1909 1910extern __inline __m512i 1911__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1912_mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X) 1913{ 1914 return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X, 1915 (__v8di) 1916 _mm512_setzero_si512 (), 1917 (__mmask8) __U); 1918} 1919 1920extern __inline __m512i 1921__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1922_mm512_cvtepu8_epi32 (__m128i __A) 1923{ 1924 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A, 1925 (__v16si) 1926 _mm512_undefined_si512 (), 1927 (__mmask16) -1); 1928} 1929 1930extern __inline __m512i 1931__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1932_mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A) 1933{ 1934 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A, 1935 (__v16si) __W, 1936 (__mmask16) __U); 1937} 1938 1939extern __inline __m512i 1940__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1941_mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A) 1942{ 1943 return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A, 1944 (__v16si) 1945 _mm512_setzero_si512 (), 1946 (__mmask16) __U); 1947} 1948 1949extern __inline __m512i 1950__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1951_mm512_cvtepu8_epi64 (__m128i __A) 1952{ 1953 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A, 1954 (__v8di) 1955 _mm512_undefined_si512 (), 1956 (__mmask8) -1); 1957} 1958 1959extern __inline __m512i 1960__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1961_mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A) 1962{ 1963 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A, 1964 (__v8di) __W, 1965 (__mmask8) __U); 1966} 1967 1968extern __inline __m512i 1969__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1970_mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A) 1971{ 1972 return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A, 1973 (__v8di) 1974 _mm512_setzero_si512 (), 1975 (__mmask8) __U); 1976} 1977 1978extern __inline __m512i 1979__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1980_mm512_cvtepu16_epi32 (__m256i __A) 1981{ 1982 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A, 1983 (__v16si) 1984 _mm512_undefined_si512 (), 1985 (__mmask16) -1); 1986} 1987 1988extern __inline __m512i 1989__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1990_mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A) 1991{ 1992 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A, 1993 (__v16si) __W, 1994 (__mmask16) __U); 1995} 1996 1997extern __inline __m512i 1998__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1999_mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A) 2000{ 2001 return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A, 2002 (__v16si) 2003 _mm512_setzero_si512 (), 2004 (__mmask16) __U); 2005} 2006 2007extern __inline __m512i 2008__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2009_mm512_cvtepu16_epi64 (__m128i __A) 2010{ 2011 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A, 2012 (__v8di) 2013 _mm512_undefined_si512 (), 2014 (__mmask8) -1); 2015} 2016 2017extern __inline __m512i 2018__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2019_mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A) 2020{ 2021 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A, 2022 (__v8di) __W, 2023 (__mmask8) __U); 2024} 2025 2026extern __inline __m512i 2027__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2028_mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A) 2029{ 2030 return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A, 2031 (__v8di) 2032 _mm512_setzero_si512 (), 2033 (__mmask8) __U); 2034} 2035 2036extern __inline __m512i 2037__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2038_mm512_cvtepu32_epi64 (__m256i __X) 2039{ 2040 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X, 2041 (__v8di) 2042 _mm512_undefined_si512 (), 2043 (__mmask8) -1); 2044} 2045 2046extern __inline __m512i 2047__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2048_mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X) 2049{ 2050 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X, 2051 (__v8di) __W, 2052 (__mmask8) __U); 2053} 2054 2055extern __inline __m512i 2056__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2057_mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X) 2058{ 2059 return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X, 2060 (__v8di) 2061 _mm512_setzero_si512 (), 2062 (__mmask8) __U); 2063} 2064 2065#ifdef __OPTIMIZE__ 2066extern __inline __m512d 2067__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2068_mm512_add_round_pd (__m512d __A, __m512d __B, const int __R) 2069{ 2070 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, 2071 (__v8df) __B, 2072 (__v8df) 2073 _mm512_undefined_pd (), 2074 (__mmask8) -1, __R); 2075} 2076 2077extern __inline __m512d 2078__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2079_mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A, 2080 __m512d __B, const int __R) 2081{ 2082 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, 2083 (__v8df) __B, 2084 (__v8df) __W, 2085 (__mmask8) __U, __R); 2086} 2087 2088extern __inline __m512d 2089__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2090_mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 2091 const int __R) 2092{ 2093 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, 2094 (__v8df) __B, 2095 (__v8df) 2096 _mm512_setzero_pd (), 2097 (__mmask8) __U, __R); 2098} 2099 2100extern __inline __m512 2101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2102_mm512_add_round_ps (__m512 __A, __m512 __B, const int __R) 2103{ 2104 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, 2105 (__v16sf) __B, 2106 (__v16sf) 2107 _mm512_undefined_ps (), 2108 (__mmask16) -1, __R); 2109} 2110 2111extern __inline __m512 2112__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2113_mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A, 2114 __m512 __B, const int __R) 2115{ 2116 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, 2117 (__v16sf) __B, 2118 (__v16sf) __W, 2119 (__mmask16) __U, __R); 2120} 2121 2122extern __inline __m512 2123__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2124_mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) 2125{ 2126 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, 2127 (__v16sf) __B, 2128 (__v16sf) 2129 _mm512_setzero_ps (), 2130 (__mmask16) __U, __R); 2131} 2132 2133extern __inline __m512d 2134__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2135_mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R) 2136{ 2137 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, 2138 (__v8df) __B, 2139 (__v8df) 2140 _mm512_undefined_pd (), 2141 (__mmask8) -1, __R); 2142} 2143 2144extern __inline __m512d 2145__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2146_mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A, 2147 __m512d __B, const int __R) 2148{ 2149 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, 2150 (__v8df) __B, 2151 (__v8df) __W, 2152 (__mmask8) __U, __R); 2153} 2154 2155extern __inline __m512d 2156__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2157_mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 2158 const int __R) 2159{ 2160 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, 2161 (__v8df) __B, 2162 (__v8df) 2163 _mm512_setzero_pd (), 2164 (__mmask8) __U, __R); 2165} 2166 2167extern __inline __m512 2168__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2169_mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R) 2170{ 2171 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, 2172 (__v16sf) __B, 2173 (__v16sf) 2174 _mm512_undefined_ps (), 2175 (__mmask16) -1, __R); 2176} 2177 2178extern __inline __m512 2179__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2180_mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A, 2181 __m512 __B, const int __R) 2182{ 2183 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, 2184 (__v16sf) __B, 2185 (__v16sf) __W, 2186 (__mmask16) __U, __R); 2187} 2188 2189extern __inline __m512 2190__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2191_mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) 2192{ 2193 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, 2194 (__v16sf) __B, 2195 (__v16sf) 2196 _mm512_setzero_ps (), 2197 (__mmask16) __U, __R); 2198} 2199#else 2200#define _mm512_add_round_pd(A, B, C) \ 2201 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C) 2202 2203#define _mm512_mask_add_round_pd(W, U, A, B, C) \ 2204 (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C) 2205 2206#define _mm512_maskz_add_round_pd(U, A, B, C) \ 2207 (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) 2208 2209#define _mm512_add_round_ps(A, B, C) \ 2210 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C) 2211 2212#define _mm512_mask_add_round_ps(W, U, A, B, C) \ 2213 (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C) 2214 2215#define _mm512_maskz_add_round_ps(U, A, B, C) \ 2216 (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) 2217 2218#define _mm512_sub_round_pd(A, B, C) \ 2219 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C) 2220 2221#define _mm512_mask_sub_round_pd(W, U, A, B, C) \ 2222 (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C) 2223 2224#define _mm512_maskz_sub_round_pd(U, A, B, C) \ 2225 (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) 2226 2227#define _mm512_sub_round_ps(A, B, C) \ 2228 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C) 2229 2230#define _mm512_mask_sub_round_ps(W, U, A, B, C) \ 2231 (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C) 2232 2233#define _mm512_maskz_sub_round_ps(U, A, B, C) \ 2234 (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) 2235#endif 2236 2237#ifdef __OPTIMIZE__ 2238extern __inline __m512d 2239__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2240_mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R) 2241{ 2242 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, 2243 (__v8df) __B, 2244 (__v8df) 2245 _mm512_undefined_pd (), 2246 (__mmask8) -1, __R); 2247} 2248 2249extern __inline __m512d 2250__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2251_mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A, 2252 __m512d __B, const int __R) 2253{ 2254 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, 2255 (__v8df) __B, 2256 (__v8df) __W, 2257 (__mmask8) __U, __R); 2258} 2259 2260extern __inline __m512d 2261__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2262_mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 2263 const int __R) 2264{ 2265 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, 2266 (__v8df) __B, 2267 (__v8df) 2268 _mm512_setzero_pd (), 2269 (__mmask8) __U, __R); 2270} 2271 2272extern __inline __m512 2273__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2274_mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R) 2275{ 2276 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, 2277 (__v16sf) __B, 2278 (__v16sf) 2279 _mm512_undefined_ps (), 2280 (__mmask16) -1, __R); 2281} 2282 2283extern __inline __m512 2284__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2285_mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A, 2286 __m512 __B, const int __R) 2287{ 2288 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, 2289 (__v16sf) __B, 2290 (__v16sf) __W, 2291 (__mmask16) __U, __R); 2292} 2293 2294extern __inline __m512 2295__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2296_mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) 2297{ 2298 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, 2299 (__v16sf) __B, 2300 (__v16sf) 2301 _mm512_setzero_ps (), 2302 (__mmask16) __U, __R); 2303} 2304 2305extern __inline __m512d 2306__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2307_mm512_div_round_pd (__m512d __M, __m512d __V, const int __R) 2308{ 2309 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M, 2310 (__v8df) __V, 2311 (__v8df) 2312 _mm512_undefined_pd (), 2313 (__mmask8) -1, __R); 2314} 2315 2316extern __inline __m512d 2317__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2318_mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M, 2319 __m512d __V, const int __R) 2320{ 2321 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M, 2322 (__v8df) __V, 2323 (__v8df) __W, 2324 (__mmask8) __U, __R); 2325} 2326 2327extern __inline __m512d 2328__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2329_mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V, 2330 const int __R) 2331{ 2332 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M, 2333 (__v8df) __V, 2334 (__v8df) 2335 _mm512_setzero_pd (), 2336 (__mmask8) __U, __R); 2337} 2338 2339extern __inline __m512 2340__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2341_mm512_div_round_ps (__m512 __A, __m512 __B, const int __R) 2342{ 2343 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, 2344 (__v16sf) __B, 2345 (__v16sf) 2346 _mm512_undefined_ps (), 2347 (__mmask16) -1, __R); 2348} 2349 2350extern __inline __m512 2351__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2352_mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A, 2353 __m512 __B, const int __R) 2354{ 2355 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, 2356 (__v16sf) __B, 2357 (__v16sf) __W, 2358 (__mmask16) __U, __R); 2359} 2360 2361extern __inline __m512 2362__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2363_mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) 2364{ 2365 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, 2366 (__v16sf) __B, 2367 (__v16sf) 2368 _mm512_setzero_ps (), 2369 (__mmask16) __U, __R); 2370} 2371 2372extern __inline __m128d 2373__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2374_mm_mul_round_sd (__m128d __A, __m128d __B, const int __R) 2375{ 2376 return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, 2377 (__v2df) __B, 2378 __R); 2379} 2380 2381extern __inline __m128 2382__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2383_mm_mul_round_ss (__m128 __A, __m128 __B, const int __R) 2384{ 2385 return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, 2386 (__v4sf) __B, 2387 __R); 2388} 2389 2390extern __inline __m128d 2391__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2392_mm_div_round_sd (__m128d __A, __m128d __B, const int __R) 2393{ 2394 return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, 2395 (__v2df) __B, 2396 __R); 2397} 2398 2399extern __inline __m128 2400__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2401_mm_div_round_ss (__m128 __A, __m128 __B, const int __R) 2402{ 2403 return (__m128) __builtin_ia32_divss_round ((__v4sf) __A, 2404 (__v4sf) __B, 2405 __R); 2406} 2407 2408#else 2409#define _mm512_mul_round_pd(A, B, C) \ 2410 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C) 2411 2412#define _mm512_mask_mul_round_pd(W, U, A, B, C) \ 2413 (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C) 2414 2415#define _mm512_maskz_mul_round_pd(U, A, B, C) \ 2416 (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) 2417 2418#define _mm512_mul_round_ps(A, B, C) \ 2419 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C) 2420 2421#define _mm512_mask_mul_round_ps(W, U, A, B, C) \ 2422 (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C) 2423 2424#define _mm512_maskz_mul_round_ps(U, A, B, C) \ 2425 (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) 2426 2427#define _mm512_div_round_pd(A, B, C) \ 2428 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C) 2429 2430#define _mm512_mask_div_round_pd(W, U, A, B, C) \ 2431 (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C) 2432 2433#define _mm512_maskz_div_round_pd(U, A, B, C) \ 2434 (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) 2435 2436#define _mm512_div_round_ps(A, B, C) \ 2437 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C) 2438 2439#define _mm512_mask_div_round_ps(W, U, A, B, C) \ 2440 (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C) 2441 2442#define _mm512_maskz_div_round_ps(U, A, B, C) \ 2443 (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) 2444 2445#define _mm_mul_round_sd(A, B, C) \ 2446 (__m128d)__builtin_ia32_mulsd_round(A, B, C) 2447 2448#define _mm_mul_round_ss(A, B, C) \ 2449 (__m128)__builtin_ia32_mulss_round(A, B, C) 2450 2451#define _mm_div_round_sd(A, B, C) \ 2452 (__m128d)__builtin_ia32_divsd_round(A, B, C) 2453 2454#define _mm_div_round_ss(A, B, C) \ 2455 (__m128)__builtin_ia32_divss_round(A, B, C) 2456#endif 2457 2458#ifdef __OPTIMIZE__ 2459extern __inline __m512d 2460__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2461_mm512_max_round_pd (__m512d __A, __m512d __B, const int __R) 2462{ 2463 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, 2464 (__v8df) __B, 2465 (__v8df) 2466 _mm512_undefined_pd (), 2467 (__mmask8) -1, __R); 2468} 2469 2470extern __inline __m512d 2471__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2472_mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A, 2473 __m512d __B, const int __R) 2474{ 2475 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, 2476 (__v8df) __B, 2477 (__v8df) __W, 2478 (__mmask8) __U, __R); 2479} 2480 2481extern __inline __m512d 2482__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2483_mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 2484 const int __R) 2485{ 2486 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, 2487 (__v8df) __B, 2488 (__v8df) 2489 _mm512_setzero_pd (), 2490 (__mmask8) __U, __R); 2491} 2492 2493extern __inline __m512 2494__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2495_mm512_max_round_ps (__m512 __A, __m512 __B, const int __R) 2496{ 2497 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, 2498 (__v16sf) __B, 2499 (__v16sf) 2500 _mm512_undefined_ps (), 2501 (__mmask16) -1, __R); 2502} 2503 2504extern __inline __m512 2505__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2506_mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A, 2507 __m512 __B, const int __R) 2508{ 2509 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, 2510 (__v16sf) __B, 2511 (__v16sf) __W, 2512 (__mmask16) __U, __R); 2513} 2514 2515extern __inline __m512 2516__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2517_mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) 2518{ 2519 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, 2520 (__v16sf) __B, 2521 (__v16sf) 2522 _mm512_setzero_ps (), 2523 (__mmask16) __U, __R); 2524} 2525 2526extern __inline __m512d 2527__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2528_mm512_min_round_pd (__m512d __A, __m512d __B, const int __R) 2529{ 2530 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, 2531 (__v8df) __B, 2532 (__v8df) 2533 _mm512_undefined_pd (), 2534 (__mmask8) -1, __R); 2535} 2536 2537extern __inline __m512d 2538__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2539_mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A, 2540 __m512d __B, const int __R) 2541{ 2542 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, 2543 (__v8df) __B, 2544 (__v8df) __W, 2545 (__mmask8) __U, __R); 2546} 2547 2548extern __inline __m512d 2549__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2550_mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 2551 const int __R) 2552{ 2553 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, 2554 (__v8df) __B, 2555 (__v8df) 2556 _mm512_setzero_pd (), 2557 (__mmask8) __U, __R); 2558} 2559 2560extern __inline __m512 2561__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2562_mm512_min_round_ps (__m512 __A, __m512 __B, const int __R) 2563{ 2564 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, 2565 (__v16sf) __B, 2566 (__v16sf) 2567 _mm512_undefined_ps (), 2568 (__mmask16) -1, __R); 2569} 2570 2571extern __inline __m512 2572__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2573_mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A, 2574 __m512 __B, const int __R) 2575{ 2576 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, 2577 (__v16sf) __B, 2578 (__v16sf) __W, 2579 (__mmask16) __U, __R); 2580} 2581 2582extern __inline __m512 2583__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2584_mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) 2585{ 2586 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, 2587 (__v16sf) __B, 2588 (__v16sf) 2589 _mm512_setzero_ps (), 2590 (__mmask16) __U, __R); 2591} 2592#else 2593#define _mm512_max_round_pd(A, B, R) \ 2594 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R) 2595 2596#define _mm512_mask_max_round_pd(W, U, A, B, R) \ 2597 (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R) 2598 2599#define _mm512_maskz_max_round_pd(U, A, B, R) \ 2600 (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R) 2601 2602#define _mm512_max_round_ps(A, B, R) \ 2603 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R) 2604 2605#define _mm512_mask_max_round_ps(W, U, A, B, R) \ 2606 (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R) 2607 2608#define _mm512_maskz_max_round_ps(U, A, B, R) \ 2609 (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R) 2610 2611#define _mm512_min_round_pd(A, B, R) \ 2612 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R) 2613 2614#define _mm512_mask_min_round_pd(W, U, A, B, R) \ 2615 (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R) 2616 2617#define _mm512_maskz_min_round_pd(U, A, B, R) \ 2618 (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R) 2619 2620#define _mm512_min_round_ps(A, B, R) \ 2621 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R) 2622 2623#define _mm512_mask_min_round_ps(W, U, A, B, R) \ 2624 (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R) 2625 2626#define _mm512_maskz_min_round_ps(U, A, B, R) \ 2627 (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R) 2628#endif 2629 2630#ifdef __OPTIMIZE__ 2631extern __inline __m512d 2632__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2633_mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R) 2634{ 2635 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 2636 (__v8df) __B, 2637 (__v8df) 2638 _mm512_undefined_pd (), 2639 (__mmask8) -1, __R); 2640} 2641 2642extern __inline __m512d 2643__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2644_mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A, 2645 __m512d __B, const int __R) 2646{ 2647 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 2648 (__v8df) __B, 2649 (__v8df) __W, 2650 (__mmask8) __U, __R); 2651} 2652 2653extern __inline __m512d 2654__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2655_mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 2656 const int __R) 2657{ 2658 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 2659 (__v8df) __B, 2660 (__v8df) 2661 _mm512_setzero_pd (), 2662 (__mmask8) __U, __R); 2663} 2664 2665extern __inline __m512 2666__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2667_mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R) 2668{ 2669 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 2670 (__v16sf) __B, 2671 (__v16sf) 2672 _mm512_undefined_ps (), 2673 (__mmask16) -1, __R); 2674} 2675 2676extern __inline __m512 2677__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2678_mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A, 2679 __m512 __B, const int __R) 2680{ 2681 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 2682 (__v16sf) __B, 2683 (__v16sf) __W, 2684 (__mmask16) __U, __R); 2685} 2686 2687extern __inline __m512 2688__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2689_mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B, 2690 const int __R) 2691{ 2692 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 2693 (__v16sf) __B, 2694 (__v16sf) 2695 _mm512_setzero_ps (), 2696 (__mmask16) __U, __R); 2697} 2698 2699extern __inline __m128d 2700__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2701_mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R) 2702{ 2703 return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A, 2704 (__v2df) __B, 2705 __R); 2706} 2707 2708extern __inline __m128 2709__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2710_mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R) 2711{ 2712 return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A, 2713 (__v4sf) __B, 2714 __R); 2715} 2716#else 2717#define _mm512_scalef_round_pd(A, B, C) \ 2718 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C) 2719 2720#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \ 2721 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C) 2722 2723#define _mm512_maskz_scalef_round_pd(U, A, B, C) \ 2724 (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) 2725 2726#define _mm512_scalef_round_ps(A, B, C) \ 2727 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C) 2728 2729#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \ 2730 (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C) 2731 2732#define _mm512_maskz_scalef_round_ps(U, A, B, C) \ 2733 (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) 2734 2735#define _mm_scalef_round_sd(A, B, C) \ 2736 (__m128d)__builtin_ia32_scalefsd_round(A, B, C) 2737 2738#define _mm_scalef_round_ss(A, B, C) \ 2739 (__m128)__builtin_ia32_scalefss_round(A, B, C) 2740#endif 2741 2742#ifdef __OPTIMIZE__ 2743extern __inline __m512d 2744__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2745_mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) 2746{ 2747 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 2748 (__v8df) __B, 2749 (__v8df) __C, 2750 (__mmask8) -1, __R); 2751} 2752 2753extern __inline __m512d 2754__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2755_mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B, 2756 __m512d __C, const int __R) 2757{ 2758 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 2759 (__v8df) __B, 2760 (__v8df) __C, 2761 (__mmask8) __U, __R); 2762} 2763 2764extern __inline __m512d 2765__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2766_mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, 2767 __mmask8 __U, const int __R) 2768{ 2769 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A, 2770 (__v8df) __B, 2771 (__v8df) __C, 2772 (__mmask8) __U, __R); 2773} 2774 2775extern __inline __m512d 2776__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2777_mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 2778 __m512d __C, const int __R) 2779{ 2780 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, 2781 (__v8df) __B, 2782 (__v8df) __C, 2783 (__mmask8) __U, __R); 2784} 2785 2786extern __inline __m512 2787__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2788_mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) 2789{ 2790 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 2791 (__v16sf) __B, 2792 (__v16sf) __C, 2793 (__mmask16) -1, __R); 2794} 2795 2796extern __inline __m512 2797__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2798_mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B, 2799 __m512 __C, const int __R) 2800{ 2801 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 2802 (__v16sf) __B, 2803 (__v16sf) __C, 2804 (__mmask16) __U, __R); 2805} 2806 2807extern __inline __m512 2808__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2809_mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, 2810 __mmask16 __U, const int __R) 2811{ 2812 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A, 2813 (__v16sf) __B, 2814 (__v16sf) __C, 2815 (__mmask16) __U, __R); 2816} 2817 2818extern __inline __m512 2819__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2820_mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B, 2821 __m512 __C, const int __R) 2822{ 2823 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, 2824 (__v16sf) __B, 2825 (__v16sf) __C, 2826 (__mmask16) __U, __R); 2827} 2828 2829extern __inline __m512d 2830__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2831_mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) 2832{ 2833 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 2834 (__v8df) __B, 2835 -(__v8df) __C, 2836 (__mmask8) -1, __R); 2837} 2838 2839extern __inline __m512d 2840__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2841_mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B, 2842 __m512d __C, const int __R) 2843{ 2844 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 2845 (__v8df) __B, 2846 -(__v8df) __C, 2847 (__mmask8) __U, __R); 2848} 2849 2850extern __inline __m512d 2851__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2852_mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, 2853 __mmask8 __U, const int __R) 2854{ 2855 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A, 2856 (__v8df) __B, 2857 (__v8df) __C, 2858 (__mmask8) __U, __R); 2859} 2860 2861extern __inline __m512d 2862__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2863_mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 2864 __m512d __C, const int __R) 2865{ 2866 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, 2867 (__v8df) __B, 2868 -(__v8df) __C, 2869 (__mmask8) __U, __R); 2870} 2871 2872extern __inline __m512 2873__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2874_mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) 2875{ 2876 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 2877 (__v16sf) __B, 2878 -(__v16sf) __C, 2879 (__mmask16) -1, __R); 2880} 2881 2882extern __inline __m512 2883__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2884_mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B, 2885 __m512 __C, const int __R) 2886{ 2887 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 2888 (__v16sf) __B, 2889 -(__v16sf) __C, 2890 (__mmask16) __U, __R); 2891} 2892 2893extern __inline __m512 2894__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2895_mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, 2896 __mmask16 __U, const int __R) 2897{ 2898 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A, 2899 (__v16sf) __B, 2900 (__v16sf) __C, 2901 (__mmask16) __U, __R); 2902} 2903 2904extern __inline __m512 2905__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2906_mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, 2907 __m512 __C, const int __R) 2908{ 2909 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, 2910 (__v16sf) __B, 2911 -(__v16sf) __C, 2912 (__mmask16) __U, __R); 2913} 2914 2915extern __inline __m512d 2916__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2917_mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) 2918{ 2919 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 2920 (__v8df) __B, 2921 (__v8df) __C, 2922 (__mmask8) -1, __R); 2923} 2924 2925extern __inline __m512d 2926__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2927_mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B, 2928 __m512d __C, const int __R) 2929{ 2930 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 2931 (__v8df) __B, 2932 (__v8df) __C, 2933 (__mmask8) __U, __R); 2934} 2935 2936extern __inline __m512d 2937__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2938_mm512_mask3_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, 2939 __mmask8 __U, const int __R) 2940{ 2941 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A, 2942 (__v8df) __B, 2943 (__v8df) __C, 2944 (__mmask8) __U, __R); 2945} 2946 2947extern __inline __m512d 2948__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2949_mm512_maskz_fmaddsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 2950 __m512d __C, const int __R) 2951{ 2952 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, 2953 (__v8df) __B, 2954 (__v8df) __C, 2955 (__mmask8) __U, __R); 2956} 2957 2958extern __inline __m512 2959__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2960_mm512_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) 2961{ 2962 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 2963 (__v16sf) __B, 2964 (__v16sf) __C, 2965 (__mmask16) -1, __R); 2966} 2967 2968extern __inline __m512 2969__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2970_mm512_mask_fmaddsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B, 2971 __m512 __C, const int __R) 2972{ 2973 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 2974 (__v16sf) __B, 2975 (__v16sf) __C, 2976 (__mmask16) __U, __R); 2977} 2978 2979extern __inline __m512 2980__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2981_mm512_mask3_fmaddsub_round_ps (__m512 __A, __m512 __B, __m512 __C, 2982 __mmask16 __U, const int __R) 2983{ 2984 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A, 2985 (__v16sf) __B, 2986 (__v16sf) __C, 2987 (__mmask16) __U, __R); 2988} 2989 2990extern __inline __m512 2991__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2992_mm512_maskz_fmaddsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, 2993 __m512 __C, const int __R) 2994{ 2995 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, 2996 (__v16sf) __B, 2997 (__v16sf) __C, 2998 (__mmask16) __U, __R); 2999} 3000 3001extern __inline __m512d 3002__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3003_mm512_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) 3004{ 3005 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 3006 (__v8df) __B, 3007 -(__v8df) __C, 3008 (__mmask8) -1, __R); 3009} 3010 3011extern __inline __m512d 3012__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3013_mm512_mask_fmsubadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B, 3014 __m512d __C, const int __R) 3015{ 3016 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 3017 (__v8df) __B, 3018 -(__v8df) __C, 3019 (__mmask8) __U, __R); 3020} 3021 3022extern __inline __m512d 3023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3024_mm512_mask3_fmsubadd_round_pd (__m512d __A, __m512d __B, __m512d __C, 3025 __mmask8 __U, const int __R) 3026{ 3027 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A, 3028 (__v8df) __B, 3029 (__v8df) __C, 3030 (__mmask8) __U, __R); 3031} 3032 3033extern __inline __m512d 3034__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3035_mm512_maskz_fmsubadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 3036 __m512d __C, const int __R) 3037{ 3038 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, 3039 (__v8df) __B, 3040 -(__v8df) __C, 3041 (__mmask8) __U, __R); 3042} 3043 3044extern __inline __m512 3045__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3046_mm512_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) 3047{ 3048 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 3049 (__v16sf) __B, 3050 -(__v16sf) __C, 3051 (__mmask16) -1, __R); 3052} 3053 3054extern __inline __m512 3055__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3056_mm512_mask_fmsubadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B, 3057 __m512 __C, const int __R) 3058{ 3059 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 3060 (__v16sf) __B, 3061 -(__v16sf) __C, 3062 (__mmask16) __U, __R); 3063} 3064 3065extern __inline __m512 3066__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3067_mm512_mask3_fmsubadd_round_ps (__m512 __A, __m512 __B, __m512 __C, 3068 __mmask16 __U, const int __R) 3069{ 3070 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A, 3071 (__v16sf) __B, 3072 (__v16sf) __C, 3073 (__mmask16) __U, __R); 3074} 3075 3076extern __inline __m512 3077__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3078_mm512_maskz_fmsubadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B, 3079 __m512 __C, const int __R) 3080{ 3081 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, 3082 (__v16sf) __B, 3083 -(__v16sf) __C, 3084 (__mmask16) __U, __R); 3085} 3086 3087extern __inline __m512d 3088__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3089_mm512_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) 3090{ 3091 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, 3092 (__v8df) __B, 3093 (__v8df) __C, 3094 (__mmask8) -1, __R); 3095} 3096 3097extern __inline __m512d 3098__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3099_mm512_mask_fnmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B, 3100 __m512d __C, const int __R) 3101{ 3102 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A, 3103 (__v8df) __B, 3104 (__v8df) __C, 3105 (__mmask8) __U, __R); 3106} 3107 3108extern __inline __m512d 3109__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3110_mm512_mask3_fnmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, 3111 __mmask8 __U, const int __R) 3112{ 3113 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A, 3114 (__v8df) __B, 3115 (__v8df) __C, 3116 (__mmask8) __U, __R); 3117} 3118 3119extern __inline __m512d 3120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3121_mm512_maskz_fnmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 3122 __m512d __C, const int __R) 3123{ 3124 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, 3125 (__v8df) __B, 3126 (__v8df) __C, 3127 (__mmask8) __U, __R); 3128} 3129 3130extern __inline __m512 3131__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3132_mm512_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) 3133{ 3134 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, 3135 (__v16sf) __B, 3136 (__v16sf) __C, 3137 (__mmask16) -1, __R); 3138} 3139 3140extern __inline __m512 3141__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3142_mm512_mask_fnmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B, 3143 __m512 __C, const int __R) 3144{ 3145 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A, 3146 (__v16sf) __B, 3147 (__v16sf) __C, 3148 (__mmask16) __U, __R); 3149} 3150 3151extern __inline __m512 3152__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3153_mm512_mask3_fnmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, 3154 __mmask16 __U, const int __R) 3155{ 3156 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A, 3157 (__v16sf) __B, 3158 (__v16sf) __C, 3159 (__mmask16) __U, __R); 3160} 3161 3162extern __inline __m512 3163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3164_mm512_maskz_fnmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B, 3165 __m512 __C, const int __R) 3166{ 3167 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, 3168 (__v16sf) __B, 3169 (__v16sf) __C, 3170 (__mmask16) __U, __R); 3171} 3172 3173extern __inline __m512d 3174__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3175_mm512_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) 3176{ 3177 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, 3178 (__v8df) __B, 3179 -(__v8df) __C, 3180 (__mmask8) -1, __R); 3181} 3182 3183extern __inline __m512d 3184__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3185_mm512_mask_fnmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B, 3186 __m512d __C, const int __R) 3187{ 3188 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A, 3189 (__v8df) __B, 3190 (__v8df) __C, 3191 (__mmask8) __U, __R); 3192} 3193 3194extern __inline __m512d 3195__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3196_mm512_mask3_fnmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, 3197 __mmask8 __U, const int __R) 3198{ 3199 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A, 3200 (__v8df) __B, 3201 (__v8df) __C, 3202 (__mmask8) __U, __R); 3203} 3204 3205extern __inline __m512d 3206__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3207_mm512_maskz_fnmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 3208 __m512d __C, const int __R) 3209{ 3210 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, 3211 (__v8df) __B, 3212 -(__v8df) __C, 3213 (__mmask8) __U, __R); 3214} 3215 3216extern __inline __m512 3217__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3218_mm512_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) 3219{ 3220 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, 3221 (__v16sf) __B, 3222 -(__v16sf) __C, 3223 (__mmask16) -1, __R); 3224} 3225 3226extern __inline __m512 3227__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3228_mm512_mask_fnmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B, 3229 __m512 __C, const int __R) 3230{ 3231 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A, 3232 (__v16sf) __B, 3233 (__v16sf) __C, 3234 (__mmask16) __U, __R); 3235} 3236 3237extern __inline __m512 3238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3239_mm512_mask3_fnmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, 3240 __mmask16 __U, const int __R) 3241{ 3242 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A, 3243 (__v16sf) __B, 3244 (__v16sf) __C, 3245 (__mmask16) __U, __R); 3246} 3247 3248extern __inline __m512 3249__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3250_mm512_maskz_fnmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, 3251 __m512 __C, const int __R) 3252{ 3253 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, 3254 (__v16sf) __B, 3255 -(__v16sf) __C, 3256 (__mmask16) __U, __R); 3257} 3258#else 3259#define _mm512_fmadd_round_pd(A, B, C, R) \ 3260 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, -1, R) 3261 3262#define _mm512_mask_fmadd_round_pd(A, U, B, C, R) \ 3263 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R) 3264 3265#define _mm512_mask3_fmadd_round_pd(A, B, C, U, R) \ 3266 (__m512d)__builtin_ia32_vfmaddpd512_mask3(A, B, C, U, R) 3267 3268#define _mm512_maskz_fmadd_round_pd(U, A, B, C, R) \ 3269 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, C, U, R) 3270 3271#define _mm512_fmadd_round_ps(A, B, C, R) \ 3272 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, -1, R) 3273 3274#define _mm512_mask_fmadd_round_ps(A, U, B, C, R) \ 3275 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, C, U, R) 3276 3277#define _mm512_mask3_fmadd_round_ps(A, B, C, U, R) \ 3278 (__m512)__builtin_ia32_vfmaddps512_mask3(A, B, C, U, R) 3279 3280#define _mm512_maskz_fmadd_round_ps(U, A, B, C, R) \ 3281 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, C, U, R) 3282 3283#define _mm512_fmsub_round_pd(A, B, C, R) \ 3284 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), -1, R) 3285 3286#define _mm512_mask_fmsub_round_pd(A, U, B, C, R) \ 3287 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, -(C), U, R) 3288 3289#define _mm512_mask3_fmsub_round_pd(A, B, C, U, R) \ 3290 (__m512d)__builtin_ia32_vfmsubpd512_mask3(A, B, C, U, R) 3291 3292#define _mm512_maskz_fmsub_round_pd(U, A, B, C, R) \ 3293 (__m512d)__builtin_ia32_vfmaddpd512_maskz(A, B, -(C), U, R) 3294 3295#define _mm512_fmsub_round_ps(A, B, C, R) \ 3296 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), -1, R) 3297 3298#define _mm512_mask_fmsub_round_ps(A, U, B, C, R) \ 3299 (__m512)__builtin_ia32_vfmaddps512_mask(A, B, -(C), U, R) 3300 3301#define _mm512_mask3_fmsub_round_ps(A, B, C, U, R) \ 3302 (__m512)__builtin_ia32_vfmsubps512_mask3(A, B, C, U, R) 3303 3304#define _mm512_maskz_fmsub_round_ps(U, A, B, C, R) \ 3305 (__m512)__builtin_ia32_vfmaddps512_maskz(A, B, -(C), U, R) 3306 3307#define _mm512_fmaddsub_round_pd(A, B, C, R) \ 3308 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, C, -1, R) 3309 3310#define _mm512_mask_fmaddsub_round_pd(A, U, B, C, R) \ 3311 (__m512d)__builtin_ia32_vfmaddpd512_mask(A, B, C, U, R) 3312 3313#define _mm512_mask3_fmaddsub_round_pd(A, B, C, U, R) \ 3314 (__m512d)__builtin_ia32_vfmaddsubpd512_mask3(A, B, C, U, R) 3315 3316#define _mm512_maskz_fmaddsub_round_pd(U, A, B, C, R) \ 3317 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, C, U, R) 3318 3319#define _mm512_fmaddsub_round_ps(A, B, C, R) \ 3320 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, -1, R) 3321 3322#define _mm512_mask_fmaddsub_round_ps(A, U, B, C, R) \ 3323 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, C, U, R) 3324 3325#define _mm512_mask3_fmaddsub_round_ps(A, B, C, U, R) \ 3326 (__m512)__builtin_ia32_vfmaddsubps512_mask3(A, B, C, U, R) 3327 3328#define _mm512_maskz_fmaddsub_round_ps(U, A, B, C, R) \ 3329 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, C, U, R) 3330 3331#define _mm512_fmsubadd_round_pd(A, B, C, R) \ 3332 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), -1, R) 3333 3334#define _mm512_mask_fmsubadd_round_pd(A, U, B, C, R) \ 3335 (__m512d)__builtin_ia32_vfmaddsubpd512_mask(A, B, -(C), U, R) 3336 3337#define _mm512_mask3_fmsubadd_round_pd(A, B, C, U, R) \ 3338 (__m512d)__builtin_ia32_vfmsubaddpd512_mask3(A, B, C, U, R) 3339 3340#define _mm512_maskz_fmsubadd_round_pd(U, A, B, C, R) \ 3341 (__m512d)__builtin_ia32_vfmaddsubpd512_maskz(A, B, -(C), U, R) 3342 3343#define _mm512_fmsubadd_round_ps(A, B, C, R) \ 3344 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), -1, R) 3345 3346#define _mm512_mask_fmsubadd_round_ps(A, U, B, C, R) \ 3347 (__m512)__builtin_ia32_vfmaddsubps512_mask(A, B, -(C), U, R) 3348 3349#define _mm512_mask3_fmsubadd_round_ps(A, B, C, U, R) \ 3350 (__m512)__builtin_ia32_vfmsubaddps512_mask3(A, B, C, U, R) 3351 3352#define _mm512_maskz_fmsubadd_round_ps(U, A, B, C, R) \ 3353 (__m512)__builtin_ia32_vfmaddsubps512_maskz(A, B, -(C), U, R) 3354 3355#define _mm512_fnmadd_round_pd(A, B, C, R) \ 3356 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, C, -1, R) 3357 3358#define _mm512_mask_fnmadd_round_pd(A, U, B, C, R) \ 3359 (__m512d)__builtin_ia32_vfnmaddpd512_mask(-(A), B, C, U, R) 3360 3361#define _mm512_mask3_fnmadd_round_pd(A, B, C, U, R) \ 3362 (__m512d)__builtin_ia32_vfmaddpd512_mask3(-(A), B, C, U, R) 3363 3364#define _mm512_maskz_fnmadd_round_pd(U, A, B, C, R) \ 3365 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, C, U, R) 3366 3367#define _mm512_fnmadd_round_ps(A, B, C, R) \ 3368 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, C, -1, R) 3369 3370#define _mm512_mask_fnmadd_round_ps(A, U, B, C, R) \ 3371 (__m512)__builtin_ia32_vfnmaddps512_mask(-(A), B, C, U, R) 3372 3373#define _mm512_mask3_fnmadd_round_ps(A, B, C, U, R) \ 3374 (__m512)__builtin_ia32_vfmaddps512_mask3(-(A), B, C, U, R) 3375 3376#define _mm512_maskz_fnmadd_round_ps(U, A, B, C, R) \ 3377 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, C, U, R) 3378 3379#define _mm512_fnmsub_round_pd(A, B, C, R) \ 3380 (__m512d)__builtin_ia32_vfmaddpd512_mask(-(A), B, -(C), -1, R) 3381 3382#define _mm512_mask_fnmsub_round_pd(A, U, B, C, R) \ 3383 (__m512d)__builtin_ia32_vfnmsubpd512_mask(A, B, C, U, R) 3384 3385#define _mm512_mask3_fnmsub_round_pd(A, B, C, U, R) \ 3386 (__m512d)__builtin_ia32_vfnmsubpd512_mask3(A, B, C, U, R) 3387 3388#define _mm512_maskz_fnmsub_round_pd(U, A, B, C, R) \ 3389 (__m512d)__builtin_ia32_vfmaddpd512_maskz(-(A), B, -(C), U, R) 3390 3391#define _mm512_fnmsub_round_ps(A, B, C, R) \ 3392 (__m512)__builtin_ia32_vfmaddps512_mask(-(A), B, -(C), -1, R) 3393 3394#define _mm512_mask_fnmsub_round_ps(A, U, B, C, R) \ 3395 (__m512)__builtin_ia32_vfnmsubps512_mask(A, B, C, U, R) 3396 3397#define _mm512_mask3_fnmsub_round_ps(A, B, C, U, R) \ 3398 (__m512)__builtin_ia32_vfnmsubps512_mask3(A, B, C, U, R) 3399 3400#define _mm512_maskz_fnmsub_round_ps(U, A, B, C, R) \ 3401 (__m512)__builtin_ia32_vfmaddps512_maskz(-(A), B, -(C), U, R) 3402#endif 3403 3404extern __inline __m512i 3405__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3406_mm512_abs_epi64 (__m512i __A) 3407{ 3408 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A, 3409 (__v8di) 3410 _mm512_undefined_si512 (), 3411 (__mmask8) -1); 3412} 3413 3414extern __inline __m512i 3415__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3416_mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A) 3417{ 3418 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A, 3419 (__v8di) __W, 3420 (__mmask8) __U); 3421} 3422 3423extern __inline __m512i 3424__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3425_mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A) 3426{ 3427 return (__m512i) __builtin_ia32_pabsq512_mask ((__v8di) __A, 3428 (__v8di) 3429 _mm512_setzero_si512 (), 3430 (__mmask8) __U); 3431} 3432 3433extern __inline __m512i 3434__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3435_mm512_abs_epi32 (__m512i __A) 3436{ 3437 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A, 3438 (__v16si) 3439 _mm512_undefined_si512 (), 3440 (__mmask16) -1); 3441} 3442 3443extern __inline __m512i 3444__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3445_mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A) 3446{ 3447 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A, 3448 (__v16si) __W, 3449 (__mmask16) __U); 3450} 3451 3452extern __inline __m512i 3453__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3454_mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A) 3455{ 3456 return (__m512i) __builtin_ia32_pabsd512_mask ((__v16si) __A, 3457 (__v16si) 3458 _mm512_setzero_si512 (), 3459 (__mmask16) __U); 3460} 3461 3462extern __inline __m512 3463__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3464_mm512_broadcastss_ps (__m128 __A) 3465{ 3466 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A, 3467 (__v16sf) 3468 _mm512_undefined_ps (), 3469 (__mmask16) -1); 3470} 3471 3472extern __inline __m512 3473__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3474_mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A) 3475{ 3476 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A, 3477 (__v16sf) __O, __M); 3478} 3479 3480extern __inline __m512 3481__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3482_mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A) 3483{ 3484 return (__m512) __builtin_ia32_broadcastss512 ((__v4sf) __A, 3485 (__v16sf) 3486 _mm512_setzero_ps (), 3487 __M); 3488} 3489 3490extern __inline __m512d 3491__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3492_mm512_broadcastsd_pd (__m128d __A) 3493{ 3494 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A, 3495 (__v8df) 3496 _mm512_undefined_pd (), 3497 (__mmask8) -1); 3498} 3499 3500extern __inline __m512d 3501__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3502_mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A) 3503{ 3504 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A, 3505 (__v8df) __O, __M); 3506} 3507 3508extern __inline __m512d 3509__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3510_mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) 3511{ 3512 return (__m512d) __builtin_ia32_broadcastsd512 ((__v2df) __A, 3513 (__v8df) 3514 _mm512_setzero_pd (), 3515 __M); 3516} 3517 3518extern __inline __m512i 3519__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3520_mm512_broadcastd_epi32 (__m128i __A) 3521{ 3522 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A, 3523 (__v16si) 3524 _mm512_undefined_si512 (), 3525 (__mmask16) -1); 3526} 3527 3528extern __inline __m512i 3529__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3530_mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A) 3531{ 3532 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A, 3533 (__v16si) __O, __M); 3534} 3535 3536extern __inline __m512i 3537__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3538_mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A) 3539{ 3540 return (__m512i) __builtin_ia32_pbroadcastd512 ((__v4si) __A, 3541 (__v16si) 3542 _mm512_setzero_si512 (), 3543 __M); 3544} 3545 3546extern __inline __m512i 3547__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3548_mm512_set1_epi32 (int __A) 3549{ 3550 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, 3551 (__v16si) 3552 _mm512_undefined_si512 (), 3553 (__mmask16)(-1)); 3554} 3555 3556extern __inline __m512i 3557__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3558_mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A) 3559{ 3560 return (__m512i) __builtin_ia32_pbroadcastd512_gpr_mask (__A, (__v16si) __O, 3561 __M); 3562} 3563 3564extern __inline __m512i 3565__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3566_mm512_maskz_set1_epi32 (__mmask16 __M, int __A) 3567{ 3568 return (__m512i) 3569 __builtin_ia32_pbroadcastd512_gpr_mask (__A, 3570 (__v16si) _mm512_setzero_si512 (), 3571 __M); 3572} 3573 3574extern __inline __m512i 3575__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3576_mm512_broadcastq_epi64 (__m128i __A) 3577{ 3578 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A, 3579 (__v8di) 3580 _mm512_undefined_si512 (), 3581 (__mmask8) -1); 3582} 3583 3584extern __inline __m512i 3585__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3586_mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A) 3587{ 3588 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A, 3589 (__v8di) __O, __M); 3590} 3591 3592extern __inline __m512i 3593__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3594_mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) 3595{ 3596 return (__m512i) __builtin_ia32_pbroadcastq512 ((__v2di) __A, 3597 (__v8di) 3598 _mm512_setzero_si512 (), 3599 __M); 3600} 3601 3602extern __inline __m512i 3603__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3604_mm512_set1_epi64 (long long __A) 3605{ 3606 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, 3607 (__v8di) 3608 _mm512_undefined_si512 (), 3609 (__mmask8)(-1)); 3610} 3611 3612extern __inline __m512i 3613__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3614_mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A) 3615{ 3616 return (__m512i) __builtin_ia32_pbroadcastq512_gpr_mask (__A, (__v8di) __O, 3617 __M); 3618} 3619 3620extern __inline __m512i 3621__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3622_mm512_maskz_set1_epi64 (__mmask8 __M, long long __A) 3623{ 3624 return (__m512i) 3625 __builtin_ia32_pbroadcastq512_gpr_mask (__A, 3626 (__v8di) _mm512_setzero_si512 (), 3627 __M); 3628} 3629 3630extern __inline __m512 3631__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3632_mm512_broadcast_f32x4 (__m128 __A) 3633{ 3634 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A, 3635 (__v16sf) 3636 _mm512_undefined_ps (), 3637 (__mmask16) -1); 3638} 3639 3640extern __inline __m512 3641__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3642_mm512_mask_broadcast_f32x4 (__m512 __O, __mmask16 __M, __m128 __A) 3643{ 3644 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A, 3645 (__v16sf) __O, 3646 __M); 3647} 3648 3649extern __inline __m512 3650__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3651_mm512_maskz_broadcast_f32x4 (__mmask16 __M, __m128 __A) 3652{ 3653 return (__m512) __builtin_ia32_broadcastf32x4_512 ((__v4sf) __A, 3654 (__v16sf) 3655 _mm512_setzero_ps (), 3656 __M); 3657} 3658 3659extern __inline __m512i 3660__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3661_mm512_broadcast_i32x4 (__m128i __A) 3662{ 3663 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A, 3664 (__v16si) 3665 _mm512_undefined_si512 (), 3666 (__mmask16) -1); 3667} 3668 3669extern __inline __m512i 3670__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3671_mm512_mask_broadcast_i32x4 (__m512i __O, __mmask16 __M, __m128i __A) 3672{ 3673 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A, 3674 (__v16si) __O, 3675 __M); 3676} 3677 3678extern __inline __m512i 3679__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3680_mm512_maskz_broadcast_i32x4 (__mmask16 __M, __m128i __A) 3681{ 3682 return (__m512i) __builtin_ia32_broadcasti32x4_512 ((__v4si) __A, 3683 (__v16si) 3684 _mm512_setzero_si512 (), 3685 __M); 3686} 3687 3688extern __inline __m512d 3689__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3690_mm512_broadcast_f64x4 (__m256d __A) 3691{ 3692 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A, 3693 (__v8df) 3694 _mm512_undefined_pd (), 3695 (__mmask8) -1); 3696} 3697 3698extern __inline __m512d 3699__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3700_mm512_mask_broadcast_f64x4 (__m512d __O, __mmask8 __M, __m256d __A) 3701{ 3702 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A, 3703 (__v8df) __O, 3704 __M); 3705} 3706 3707extern __inline __m512d 3708__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3709_mm512_maskz_broadcast_f64x4 (__mmask8 __M, __m256d __A) 3710{ 3711 return (__m512d) __builtin_ia32_broadcastf64x4_512 ((__v4df) __A, 3712 (__v8df) 3713 _mm512_setzero_pd (), 3714 __M); 3715} 3716 3717extern __inline __m512i 3718__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3719_mm512_broadcast_i64x4 (__m256i __A) 3720{ 3721 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A, 3722 (__v8di) 3723 _mm512_undefined_si512 (), 3724 (__mmask8) -1); 3725} 3726 3727extern __inline __m512i 3728__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3729_mm512_mask_broadcast_i64x4 (__m512i __O, __mmask8 __M, __m256i __A) 3730{ 3731 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A, 3732 (__v8di) __O, 3733 __M); 3734} 3735 3736extern __inline __m512i 3737__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3738_mm512_maskz_broadcast_i64x4 (__mmask8 __M, __m256i __A) 3739{ 3740 return (__m512i) __builtin_ia32_broadcasti64x4_512 ((__v4di) __A, 3741 (__v8di) 3742 _mm512_setzero_si512 (), 3743 __M); 3744} 3745 3746typedef enum 3747{ 3748 _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02, 3749 _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05, 3750 _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08, 3751 _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B, 3752 _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E, 3753 _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11, 3754 _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14, 3755 _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17, 3756 _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A, 3757 _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D, 3758 _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20, 3759 _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23, 3760 _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26, 3761 _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29, 3762 _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C, 3763 _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F, 3764 _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32, 3765 _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35, 3766 _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38, 3767 _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B, 3768 _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E, 3769 _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41, 3770 _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44, 3771 _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47, 3772 _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A, 3773 _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D, 3774 _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50, 3775 _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53, 3776 _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56, 3777 _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59, 3778 _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C, 3779 _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F, 3780 _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62, 3781 _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65, 3782 _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68, 3783 _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B, 3784 _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E, 3785 _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71, 3786 _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74, 3787 _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77, 3788 _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A, 3789 _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D, 3790 _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80, 3791 _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83, 3792 _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86, 3793 _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89, 3794 _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C, 3795 _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F, 3796 _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92, 3797 _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95, 3798 _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98, 3799 _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B, 3800 _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E, 3801 _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1, 3802 _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4, 3803 _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7, 3804 _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA, 3805 _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD, 3806 _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0, 3807 _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3, 3808 _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6, 3809 _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9, 3810 _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC, 3811 _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF, 3812 _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2, 3813 _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5, 3814 _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8, 3815 _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB, 3816 _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE, 3817 _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1, 3818 _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4, 3819 _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7, 3820 _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA, 3821 _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD, 3822 _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0, 3823 _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3, 3824 _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6, 3825 _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9, 3826 _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC, 3827 _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF, 3828 _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2, 3829 _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5, 3830 _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8, 3831 _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB, 3832 _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE, 3833 _MM_PERM_DDDD = 0xFF 3834} _MM_PERM_ENUM; 3835 3836#ifdef __OPTIMIZE__ 3837extern __inline __m512i 3838__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3839_mm512_shuffle_epi32 (__m512i __A, _MM_PERM_ENUM __mask) 3840{ 3841 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A, 3842 __mask, 3843 (__v16si) 3844 _mm512_undefined_si512 (), 3845 (__mmask16) -1); 3846} 3847 3848extern __inline __m512i 3849__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3850_mm512_mask_shuffle_epi32 (__m512i __W, __mmask16 __U, __m512i __A, 3851 _MM_PERM_ENUM __mask) 3852{ 3853 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A, 3854 __mask, 3855 (__v16si) __W, 3856 (__mmask16) __U); 3857} 3858 3859extern __inline __m512i 3860__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3861_mm512_maskz_shuffle_epi32 (__mmask16 __U, __m512i __A, _MM_PERM_ENUM __mask) 3862{ 3863 return (__m512i) __builtin_ia32_pshufd512_mask ((__v16si) __A, 3864 __mask, 3865 (__v16si) 3866 _mm512_setzero_si512 (), 3867 (__mmask16) __U); 3868} 3869 3870extern __inline __m512i 3871__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3872_mm512_shuffle_i64x2 (__m512i __A, __m512i __B, const int __imm) 3873{ 3874 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A, 3875 (__v8di) __B, __imm, 3876 (__v8di) 3877 _mm512_undefined_si512 (), 3878 (__mmask8) -1); 3879} 3880 3881extern __inline __m512i 3882__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3883_mm512_mask_shuffle_i64x2 (__m512i __W, __mmask8 __U, __m512i __A, 3884 __m512i __B, const int __imm) 3885{ 3886 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A, 3887 (__v8di) __B, __imm, 3888 (__v8di) __W, 3889 (__mmask8) __U); 3890} 3891 3892extern __inline __m512i 3893__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3894_mm512_maskz_shuffle_i64x2 (__mmask8 __U, __m512i __A, __m512i __B, 3895 const int __imm) 3896{ 3897 return (__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di) __A, 3898 (__v8di) __B, __imm, 3899 (__v8di) 3900 _mm512_setzero_si512 (), 3901 (__mmask8) __U); 3902} 3903 3904extern __inline __m512i 3905__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3906_mm512_shuffle_i32x4 (__m512i __A, __m512i __B, const int __imm) 3907{ 3908 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A, 3909 (__v16si) __B, 3910 __imm, 3911 (__v16si) 3912 _mm512_undefined_si512 (), 3913 (__mmask16) -1); 3914} 3915 3916extern __inline __m512i 3917__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3918_mm512_mask_shuffle_i32x4 (__m512i __W, __mmask16 __U, __m512i __A, 3919 __m512i __B, const int __imm) 3920{ 3921 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A, 3922 (__v16si) __B, 3923 __imm, 3924 (__v16si) __W, 3925 (__mmask16) __U); 3926} 3927 3928extern __inline __m512i 3929__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3930_mm512_maskz_shuffle_i32x4 (__mmask16 __U, __m512i __A, __m512i __B, 3931 const int __imm) 3932{ 3933 return (__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si) __A, 3934 (__v16si) __B, 3935 __imm, 3936 (__v16si) 3937 _mm512_setzero_si512 (), 3938 (__mmask16) __U); 3939} 3940 3941extern __inline __m512d 3942__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3943_mm512_shuffle_f64x2 (__m512d __A, __m512d __B, const int __imm) 3944{ 3945 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A, 3946 (__v8df) __B, __imm, 3947 (__v8df) 3948 _mm512_undefined_pd (), 3949 (__mmask8) -1); 3950} 3951 3952extern __inline __m512d 3953__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3954_mm512_mask_shuffle_f64x2 (__m512d __W, __mmask8 __U, __m512d __A, 3955 __m512d __B, const int __imm) 3956{ 3957 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A, 3958 (__v8df) __B, __imm, 3959 (__v8df) __W, 3960 (__mmask8) __U); 3961} 3962 3963extern __inline __m512d 3964__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3965_mm512_maskz_shuffle_f64x2 (__mmask8 __U, __m512d __A, __m512d __B, 3966 const int __imm) 3967{ 3968 return (__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df) __A, 3969 (__v8df) __B, __imm, 3970 (__v8df) 3971 _mm512_setzero_pd (), 3972 (__mmask8) __U); 3973} 3974 3975extern __inline __m512 3976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3977_mm512_shuffle_f32x4 (__m512 __A, __m512 __B, const int __imm) 3978{ 3979 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A, 3980 (__v16sf) __B, __imm, 3981 (__v16sf) 3982 _mm512_undefined_ps (), 3983 (__mmask16) -1); 3984} 3985 3986extern __inline __m512 3987__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3988_mm512_mask_shuffle_f32x4 (__m512 __W, __mmask16 __U, __m512 __A, 3989 __m512 __B, const int __imm) 3990{ 3991 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A, 3992 (__v16sf) __B, __imm, 3993 (__v16sf) __W, 3994 (__mmask16) __U); 3995} 3996 3997extern __inline __m512 3998__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3999_mm512_maskz_shuffle_f32x4 (__mmask16 __U, __m512 __A, __m512 __B, 4000 const int __imm) 4001{ 4002 return (__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf) __A, 4003 (__v16sf) __B, __imm, 4004 (__v16sf) 4005 _mm512_setzero_ps (), 4006 (__mmask16) __U); 4007} 4008 4009#else 4010#define _mm512_shuffle_epi32(X, C) \ 4011 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\ 4012 (__v16si)(__m512i)_mm512_undefined_si512 (),\ 4013 (__mmask16)-1)) 4014 4015#define _mm512_mask_shuffle_epi32(W, U, X, C) \ 4016 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\ 4017 (__v16si)(__m512i)(W),\ 4018 (__mmask16)(U))) 4019 4020#define _mm512_maskz_shuffle_epi32(U, X, C) \ 4021 ((__m512i) __builtin_ia32_pshufd512_mask ((__v16si)(__m512i)(X), (int)(C),\ 4022 (__v16si)(__m512i)_mm512_setzero_si512 (),\ 4023 (__mmask16)(U))) 4024 4025#define _mm512_shuffle_i64x2(X, Y, C) \ 4026 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \ 4027 (__v8di)(__m512i)(Y), (int)(C),\ 4028 (__v8di)(__m512i)_mm512_undefined_si512 (),\ 4029 (__mmask8)-1)) 4030 4031#define _mm512_mask_shuffle_i64x2(W, U, X, Y, C) \ 4032 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \ 4033 (__v8di)(__m512i)(Y), (int)(C),\ 4034 (__v8di)(__m512i)(W),\ 4035 (__mmask8)(U))) 4036 4037#define _mm512_maskz_shuffle_i64x2(U, X, Y, C) \ 4038 ((__m512i) __builtin_ia32_shuf_i64x2_mask ((__v8di)(__m512i)(X), \ 4039 (__v8di)(__m512i)(Y), (int)(C),\ 4040 (__v8di)(__m512i)_mm512_setzero_si512 (),\ 4041 (__mmask8)(U))) 4042 4043#define _mm512_shuffle_i32x4(X, Y, C) \ 4044 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \ 4045 (__v16si)(__m512i)(Y), (int)(C),\ 4046 (__v16si)(__m512i)_mm512_undefined_si512 (),\ 4047 (__mmask16)-1)) 4048 4049#define _mm512_mask_shuffle_i32x4(W, U, X, Y, C) \ 4050 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \ 4051 (__v16si)(__m512i)(Y), (int)(C),\ 4052 (__v16si)(__m512i)(W),\ 4053 (__mmask16)(U))) 4054 4055#define _mm512_maskz_shuffle_i32x4(U, X, Y, C) \ 4056 ((__m512i) __builtin_ia32_shuf_i32x4_mask ((__v16si)(__m512i)(X), \ 4057 (__v16si)(__m512i)(Y), (int)(C),\ 4058 (__v16si)(__m512i)_mm512_setzero_si512 (),\ 4059 (__mmask16)(U))) 4060 4061#define _mm512_shuffle_f64x2(X, Y, C) \ 4062 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \ 4063 (__v8df)(__m512d)(Y), (int)(C),\ 4064 (__v8df)(__m512d)_mm512_undefined_pd(),\ 4065 (__mmask8)-1)) 4066 4067#define _mm512_mask_shuffle_f64x2(W, U, X, Y, C) \ 4068 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \ 4069 (__v8df)(__m512d)(Y), (int)(C),\ 4070 (__v8df)(__m512d)(W),\ 4071 (__mmask8)(U))) 4072 4073#define _mm512_maskz_shuffle_f64x2(U, X, Y, C) \ 4074 ((__m512d) __builtin_ia32_shuf_f64x2_mask ((__v8df)(__m512d)(X), \ 4075 (__v8df)(__m512d)(Y), (int)(C),\ 4076 (__v8df)(__m512d)_mm512_setzero_pd(),\ 4077 (__mmask8)(U))) 4078 4079#define _mm512_shuffle_f32x4(X, Y, C) \ 4080 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \ 4081 (__v16sf)(__m512)(Y), (int)(C),\ 4082 (__v16sf)(__m512)_mm512_undefined_ps(),\ 4083 (__mmask16)-1)) 4084 4085#define _mm512_mask_shuffle_f32x4(W, U, X, Y, C) \ 4086 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \ 4087 (__v16sf)(__m512)(Y), (int)(C),\ 4088 (__v16sf)(__m512)(W),\ 4089 (__mmask16)(U))) 4090 4091#define _mm512_maskz_shuffle_f32x4(U, X, Y, C) \ 4092 ((__m512) __builtin_ia32_shuf_f32x4_mask ((__v16sf)(__m512)(X), \ 4093 (__v16sf)(__m512)(Y), (int)(C),\ 4094 (__v16sf)(__m512)_mm512_setzero_ps(),\ 4095 (__mmask16)(U))) 4096#endif 4097 4098extern __inline __m512i 4099__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4100_mm512_rolv_epi32 (__m512i __A, __m512i __B) 4101{ 4102 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A, 4103 (__v16si) __B, 4104 (__v16si) 4105 _mm512_undefined_si512 (), 4106 (__mmask16) -1); 4107} 4108 4109extern __inline __m512i 4110__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4111_mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 4112{ 4113 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A, 4114 (__v16si) __B, 4115 (__v16si) __W, 4116 (__mmask16) __U); 4117} 4118 4119extern __inline __m512i 4120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4121_mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 4122{ 4123 return (__m512i) __builtin_ia32_prolvd512_mask ((__v16si) __A, 4124 (__v16si) __B, 4125 (__v16si) 4126 _mm512_setzero_si512 (), 4127 (__mmask16) __U); 4128} 4129 4130extern __inline __m512i 4131__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4132_mm512_rorv_epi32 (__m512i __A, __m512i __B) 4133{ 4134 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A, 4135 (__v16si) __B, 4136 (__v16si) 4137 _mm512_undefined_si512 (), 4138 (__mmask16) -1); 4139} 4140 4141extern __inline __m512i 4142__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4143_mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 4144{ 4145 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A, 4146 (__v16si) __B, 4147 (__v16si) __W, 4148 (__mmask16) __U); 4149} 4150 4151extern __inline __m512i 4152__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4153_mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 4154{ 4155 return (__m512i) __builtin_ia32_prorvd512_mask ((__v16si) __A, 4156 (__v16si) __B, 4157 (__v16si) 4158 _mm512_setzero_si512 (), 4159 (__mmask16) __U); 4160} 4161 4162extern __inline __m512i 4163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4164_mm512_rolv_epi64 (__m512i __A, __m512i __B) 4165{ 4166 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A, 4167 (__v8di) __B, 4168 (__v8di) 4169 _mm512_undefined_si512 (), 4170 (__mmask8) -1); 4171} 4172 4173extern __inline __m512i 4174__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4175_mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 4176{ 4177 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A, 4178 (__v8di) __B, 4179 (__v8di) __W, 4180 (__mmask8) __U); 4181} 4182 4183extern __inline __m512i 4184__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4185_mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 4186{ 4187 return (__m512i) __builtin_ia32_prolvq512_mask ((__v8di) __A, 4188 (__v8di) __B, 4189 (__v8di) 4190 _mm512_setzero_si512 (), 4191 (__mmask8) __U); 4192} 4193 4194extern __inline __m512i 4195__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4196_mm512_rorv_epi64 (__m512i __A, __m512i __B) 4197{ 4198 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A, 4199 (__v8di) __B, 4200 (__v8di) 4201 _mm512_undefined_si512 (), 4202 (__mmask8) -1); 4203} 4204 4205extern __inline __m512i 4206__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4207_mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 4208{ 4209 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A, 4210 (__v8di) __B, 4211 (__v8di) __W, 4212 (__mmask8) __U); 4213} 4214 4215extern __inline __m512i 4216__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4217_mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 4218{ 4219 return (__m512i) __builtin_ia32_prorvq512_mask ((__v8di) __A, 4220 (__v8di) __B, 4221 (__v8di) 4222 _mm512_setzero_si512 (), 4223 (__mmask8) __U); 4224} 4225 4226#ifdef __OPTIMIZE__ 4227extern __inline __m256i 4228__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4229_mm512_cvtt_roundpd_epi32 (__m512d __A, const int __R) 4230{ 4231 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, 4232 (__v8si) 4233 _mm256_undefined_si256 (), 4234 (__mmask8) -1, __R); 4235} 4236 4237extern __inline __m256i 4238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4239_mm512_mask_cvtt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A, 4240 const int __R) 4241{ 4242 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, 4243 (__v8si) __W, 4244 (__mmask8) __U, __R); 4245} 4246 4247extern __inline __m256i 4248__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4249_mm512_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R) 4250{ 4251 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, 4252 (__v8si) 4253 _mm256_setzero_si256 (), 4254 (__mmask8) __U, __R); 4255} 4256 4257extern __inline __m256i 4258__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4259_mm512_cvtt_roundpd_epu32 (__m512d __A, const int __R) 4260{ 4261 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 4262 (__v8si) 4263 _mm256_undefined_si256 (), 4264 (__mmask8) -1, __R); 4265} 4266 4267extern __inline __m256i 4268__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4269_mm512_mask_cvtt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A, 4270 const int __R) 4271{ 4272 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 4273 (__v8si) __W, 4274 (__mmask8) __U, __R); 4275} 4276 4277extern __inline __m256i 4278__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4279_mm512_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R) 4280{ 4281 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 4282 (__v8si) 4283 _mm256_setzero_si256 (), 4284 (__mmask8) __U, __R); 4285} 4286#else 4287#define _mm512_cvtt_roundpd_epi32(A, B) \ 4288 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B)) 4289 4290#define _mm512_mask_cvtt_roundpd_epi32(W, U, A, B) \ 4291 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)(W), U, B)) 4292 4293#define _mm512_maskz_cvtt_roundpd_epi32(U, A, B) \ 4294 ((__m256i)__builtin_ia32_cvttpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B)) 4295 4296#define _mm512_cvtt_roundpd_epu32(A, B) \ 4297 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B)) 4298 4299#define _mm512_mask_cvtt_roundpd_epu32(W, U, A, B) \ 4300 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)(W), U, B)) 4301 4302#define _mm512_maskz_cvtt_roundpd_epu32(U, A, B) \ 4303 ((__m256i)__builtin_ia32_cvttpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B)) 4304#endif 4305 4306#ifdef __OPTIMIZE__ 4307extern __inline __m256i 4308__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4309_mm512_cvt_roundpd_epi32 (__m512d __A, const int __R) 4310{ 4311 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 4312 (__v8si) 4313 _mm256_undefined_si256 (), 4314 (__mmask8) -1, __R); 4315} 4316 4317extern __inline __m256i 4318__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4319_mm512_mask_cvt_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A, 4320 const int __R) 4321{ 4322 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 4323 (__v8si) __W, 4324 (__mmask8) __U, __R); 4325} 4326 4327extern __inline __m256i 4328__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4329_mm512_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R) 4330{ 4331 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 4332 (__v8si) 4333 _mm256_setzero_si256 (), 4334 (__mmask8) __U, __R); 4335} 4336 4337extern __inline __m256i 4338__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4339_mm512_cvt_roundpd_epu32 (__m512d __A, const int __R) 4340{ 4341 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 4342 (__v8si) 4343 _mm256_undefined_si256 (), 4344 (__mmask8) -1, __R); 4345} 4346 4347extern __inline __m256i 4348__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4349_mm512_mask_cvt_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A, 4350 const int __R) 4351{ 4352 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 4353 (__v8si) __W, 4354 (__mmask8) __U, __R); 4355} 4356 4357extern __inline __m256i 4358__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4359_mm512_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R) 4360{ 4361 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 4362 (__v8si) 4363 _mm256_setzero_si256 (), 4364 (__mmask8) __U, __R); 4365} 4366#else 4367#define _mm512_cvt_roundpd_epi32(A, B) \ 4368 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B)) 4369 4370#define _mm512_mask_cvt_roundpd_epi32(W, U, A, B) \ 4371 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)(W), U, B)) 4372 4373#define _mm512_maskz_cvt_roundpd_epi32(U, A, B) \ 4374 ((__m256i)__builtin_ia32_cvtpd2dq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B)) 4375 4376#define _mm512_cvt_roundpd_epu32(A, B) \ 4377 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_undefined_si256(), -1, B)) 4378 4379#define _mm512_mask_cvt_roundpd_epu32(W, U, A, B) \ 4380 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)(W), U, B)) 4381 4382#define _mm512_maskz_cvt_roundpd_epu32(U, A, B) \ 4383 ((__m256i)__builtin_ia32_cvtpd2udq512_mask(A, (__v8si)_mm256_setzero_si256(), U, B)) 4384#endif 4385 4386#ifdef __OPTIMIZE__ 4387extern __inline __m512i 4388__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4389_mm512_cvtt_roundps_epi32 (__m512 __A, const int __R) 4390{ 4391 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, 4392 (__v16si) 4393 _mm512_undefined_si512 (), 4394 (__mmask16) -1, __R); 4395} 4396 4397extern __inline __m512i 4398__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4399_mm512_mask_cvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A, 4400 const int __R) 4401{ 4402 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, 4403 (__v16si) __W, 4404 (__mmask16) __U, __R); 4405} 4406 4407extern __inline __m512i 4408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4409_mm512_maskz_cvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R) 4410{ 4411 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, 4412 (__v16si) 4413 _mm512_setzero_si512 (), 4414 (__mmask16) __U, __R); 4415} 4416 4417extern __inline __m512i 4418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4419_mm512_cvtt_roundps_epu32 (__m512 __A, const int __R) 4420{ 4421 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 4422 (__v16si) 4423 _mm512_undefined_si512 (), 4424 (__mmask16) -1, __R); 4425} 4426 4427extern __inline __m512i 4428__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4429_mm512_mask_cvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A, 4430 const int __R) 4431{ 4432 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 4433 (__v16si) __W, 4434 (__mmask16) __U, __R); 4435} 4436 4437extern __inline __m512i 4438__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4439_mm512_maskz_cvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R) 4440{ 4441 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 4442 (__v16si) 4443 _mm512_setzero_si512 (), 4444 (__mmask16) __U, __R); 4445} 4446#else 4447#define _mm512_cvtt_roundps_epi32(A, B) \ 4448 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B)) 4449 4450#define _mm512_mask_cvtt_roundps_epi32(W, U, A, B) \ 4451 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)(W), U, B)) 4452 4453#define _mm512_maskz_cvtt_roundps_epi32(U, A, B) \ 4454 ((__m512i)__builtin_ia32_cvttps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B)) 4455 4456#define _mm512_cvtt_roundps_epu32(A, B) \ 4457 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B)) 4458 4459#define _mm512_mask_cvtt_roundps_epu32(W, U, A, B) \ 4460 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)(W), U, B)) 4461 4462#define _mm512_maskz_cvtt_roundps_epu32(U, A, B) \ 4463 ((__m512i)__builtin_ia32_cvttps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B)) 4464#endif 4465 4466#ifdef __OPTIMIZE__ 4467extern __inline __m512i 4468__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4469_mm512_cvt_roundps_epi32 (__m512 __A, const int __R) 4470{ 4471 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 4472 (__v16si) 4473 _mm512_undefined_si512 (), 4474 (__mmask16) -1, __R); 4475} 4476 4477extern __inline __m512i 4478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4479_mm512_mask_cvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A, 4480 const int __R) 4481{ 4482 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 4483 (__v16si) __W, 4484 (__mmask16) __U, __R); 4485} 4486 4487extern __inline __m512i 4488__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4489_mm512_maskz_cvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R) 4490{ 4491 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 4492 (__v16si) 4493 _mm512_setzero_si512 (), 4494 (__mmask16) __U, __R); 4495} 4496 4497extern __inline __m512i 4498__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4499_mm512_cvt_roundps_epu32 (__m512 __A, const int __R) 4500{ 4501 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, 4502 (__v16si) 4503 _mm512_undefined_si512 (), 4504 (__mmask16) -1, __R); 4505} 4506 4507extern __inline __m512i 4508__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4509_mm512_mask_cvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A, 4510 const int __R) 4511{ 4512 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, 4513 (__v16si) __W, 4514 (__mmask16) __U, __R); 4515} 4516 4517extern __inline __m512i 4518__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4519_mm512_maskz_cvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R) 4520{ 4521 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, 4522 (__v16si) 4523 _mm512_setzero_si512 (), 4524 (__mmask16) __U, __R); 4525} 4526#else 4527#define _mm512_cvt_roundps_epi32(A, B) \ 4528 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B)) 4529 4530#define _mm512_mask_cvt_roundps_epi32(W, U, A, B) \ 4531 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)(W), U, B)) 4532 4533#define _mm512_maskz_cvt_roundps_epi32(U, A, B) \ 4534 ((__m512i)__builtin_ia32_cvtps2dq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B)) 4535 4536#define _mm512_cvt_roundps_epu32(A, B) \ 4537 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_undefined_si512 (), -1, B)) 4538 4539#define _mm512_mask_cvt_roundps_epu32(W, U, A, B) \ 4540 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)(W), U, B)) 4541 4542#define _mm512_maskz_cvt_roundps_epu32(U, A, B) \ 4543 ((__m512i)__builtin_ia32_cvtps2udq512_mask(A, (__v16si)_mm512_setzero_si512 (), U, B)) 4544#endif 4545 4546extern __inline __m128d 4547__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4548_mm_cvtu32_sd (__m128d __A, unsigned __B) 4549{ 4550 return (__m128d) __builtin_ia32_cvtusi2sd32 ((__v2df) __A, __B); 4551} 4552 4553#ifdef __x86_64__ 4554#ifdef __OPTIMIZE__ 4555extern __inline __m128d 4556__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4557_mm_cvt_roundu64_sd (__m128d __A, unsigned long long __B, const int __R) 4558{ 4559 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, __R); 4560} 4561 4562extern __inline __m128d 4563__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4564_mm_cvt_roundi64_sd (__m128d __A, long long __B, const int __R) 4565{ 4566 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R); 4567} 4568 4569extern __inline __m128d 4570__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4571_mm_cvt_roundsi64_sd (__m128d __A, long long __B, const int __R) 4572{ 4573 return (__m128d) __builtin_ia32_cvtsi2sd64 ((__v2df) __A, __B, __R); 4574} 4575#else 4576#define _mm_cvt_roundu64_sd(A, B, C) \ 4577 (__m128d)__builtin_ia32_cvtusi2sd64(A, B, C) 4578 4579#define _mm_cvt_roundi64_sd(A, B, C) \ 4580 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C) 4581 4582#define _mm_cvt_roundsi64_sd(A, B, C) \ 4583 (__m128d)__builtin_ia32_cvtsi2sd64(A, B, C) 4584#endif 4585 4586#endif 4587 4588#ifdef __OPTIMIZE__ 4589extern __inline __m128 4590__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4591_mm_cvt_roundu32_ss (__m128 __A, unsigned __B, const int __R) 4592{ 4593 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, __R); 4594} 4595 4596extern __inline __m128 4597__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4598_mm_cvt_roundsi32_ss (__m128 __A, int __B, const int __R) 4599{ 4600 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R); 4601} 4602 4603extern __inline __m128 4604__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4605_mm_cvt_roundi32_ss (__m128 __A, int __B, const int __R) 4606{ 4607 return (__m128) __builtin_ia32_cvtsi2ss32 ((__v4sf) __A, __B, __R); 4608} 4609#else 4610#define _mm_cvt_roundu32_ss(A, B, C) \ 4611 (__m128)__builtin_ia32_cvtusi2ss32(A, B, C) 4612 4613#define _mm_cvt_roundi32_ss(A, B, C) \ 4614 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C) 4615 4616#define _mm_cvt_roundsi32_ss(A, B, C) \ 4617 (__m128)__builtin_ia32_cvtsi2ss32(A, B, C) 4618#endif 4619 4620#ifdef __x86_64__ 4621#ifdef __OPTIMIZE__ 4622extern __inline __m128 4623__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4624_mm_cvt_roundu64_ss (__m128 __A, unsigned long long __B, const int __R) 4625{ 4626 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, __R); 4627} 4628 4629extern __inline __m128 4630__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4631_mm_cvt_roundsi64_ss (__m128 __A, long long __B, const int __R) 4632{ 4633 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R); 4634} 4635 4636extern __inline __m128 4637__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4638_mm_cvt_roundi64_ss (__m128 __A, long long __B, const int __R) 4639{ 4640 return (__m128) __builtin_ia32_cvtsi2ss64 ((__v4sf) __A, __B, __R); 4641} 4642#else 4643#define _mm_cvt_roundu64_ss(A, B, C) \ 4644 (__m128)__builtin_ia32_cvtusi2ss64(A, B, C) 4645 4646#define _mm_cvt_roundi64_ss(A, B, C) \ 4647 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C) 4648 4649#define _mm_cvt_roundsi64_ss(A, B, C) \ 4650 (__m128)__builtin_ia32_cvtsi2ss64(A, B, C) 4651#endif 4652 4653#endif 4654 4655extern __inline __m128i 4656__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4657_mm512_cvtepi32_epi8 (__m512i __A) 4658{ 4659 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, 4660 (__v16qi) 4661 _mm_undefined_si128 (), 4662 (__mmask16) -1); 4663} 4664 4665extern __inline void 4666__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4667_mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) 4668{ 4669 __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); 4670} 4671 4672extern __inline __m128i 4673__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4674_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) 4675{ 4676 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, 4677 (__v16qi) __O, __M); 4678} 4679 4680extern __inline __m128i 4681__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4682_mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A) 4683{ 4684 return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, 4685 (__v16qi) 4686 _mm_setzero_si128 (), 4687 __M); 4688} 4689 4690extern __inline __m128i 4691__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4692_mm512_cvtsepi32_epi8 (__m512i __A) 4693{ 4694 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, 4695 (__v16qi) 4696 _mm_undefined_si128 (), 4697 (__mmask16) -1); 4698} 4699 4700extern __inline void 4701__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4702_mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) 4703{ 4704 __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); 4705} 4706 4707extern __inline __m128i 4708__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4709_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) 4710{ 4711 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, 4712 (__v16qi) __O, __M); 4713} 4714 4715extern __inline __m128i 4716__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4717_mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A) 4718{ 4719 return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, 4720 (__v16qi) 4721 _mm_setzero_si128 (), 4722 __M); 4723} 4724 4725extern __inline __m128i 4726__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4727_mm512_cvtusepi32_epi8 (__m512i __A) 4728{ 4729 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, 4730 (__v16qi) 4731 _mm_undefined_si128 (), 4732 (__mmask16) -1); 4733} 4734 4735extern __inline void 4736__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4737_mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) 4738{ 4739 __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); 4740} 4741 4742extern __inline __m128i 4743__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4744_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) 4745{ 4746 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, 4747 (__v16qi) __O, 4748 __M); 4749} 4750 4751extern __inline __m128i 4752__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4753_mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A) 4754{ 4755 return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, 4756 (__v16qi) 4757 _mm_setzero_si128 (), 4758 __M); 4759} 4760 4761extern __inline __m256i 4762__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4763_mm512_cvtepi32_epi16 (__m512i __A) 4764{ 4765 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, 4766 (__v16hi) 4767 _mm256_undefined_si256 (), 4768 (__mmask16) -1); 4769} 4770 4771extern __inline void 4772__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4773_mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A) 4774{ 4775 __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M); 4776} 4777 4778extern __inline __m256i 4779__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4780_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) 4781{ 4782 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, 4783 (__v16hi) __O, __M); 4784} 4785 4786extern __inline __m256i 4787__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4788_mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A) 4789{ 4790 return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, 4791 (__v16hi) 4792 _mm256_setzero_si256 (), 4793 __M); 4794} 4795 4796extern __inline __m256i 4797__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4798_mm512_cvtsepi32_epi16 (__m512i __A) 4799{ 4800 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, 4801 (__v16hi) 4802 _mm256_undefined_si256 (), 4803 (__mmask16) -1); 4804} 4805 4806extern __inline void 4807__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4808_mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A) 4809{ 4810 __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M); 4811} 4812 4813extern __inline __m256i 4814__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4815_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) 4816{ 4817 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, 4818 (__v16hi) __O, __M); 4819} 4820 4821extern __inline __m256i 4822__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4823_mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A) 4824{ 4825 return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, 4826 (__v16hi) 4827 _mm256_setzero_si256 (), 4828 __M); 4829} 4830 4831extern __inline __m256i 4832__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4833_mm512_cvtusepi32_epi16 (__m512i __A) 4834{ 4835 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, 4836 (__v16hi) 4837 _mm256_undefined_si256 (), 4838 (__mmask16) -1); 4839} 4840 4841extern __inline void 4842__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4843_mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A) 4844{ 4845 __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M); 4846} 4847 4848extern __inline __m256i 4849__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4850_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) 4851{ 4852 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, 4853 (__v16hi) __O, 4854 __M); 4855} 4856 4857extern __inline __m256i 4858__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4859_mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A) 4860{ 4861 return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, 4862 (__v16hi) 4863 _mm256_setzero_si256 (), 4864 __M); 4865} 4866 4867extern __inline __m256i 4868__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4869_mm512_cvtepi64_epi32 (__m512i __A) 4870{ 4871 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, 4872 (__v8si) 4873 _mm256_undefined_si256 (), 4874 (__mmask8) -1); 4875} 4876 4877extern __inline void 4878__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4879_mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A) 4880{ 4881 __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M); 4882} 4883 4884extern __inline __m256i 4885__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4886_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) 4887{ 4888 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, 4889 (__v8si) __O, __M); 4890} 4891 4892extern __inline __m256i 4893__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4894_mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A) 4895{ 4896 return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, 4897 (__v8si) 4898 _mm256_setzero_si256 (), 4899 __M); 4900} 4901 4902extern __inline __m256i 4903__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4904_mm512_cvtsepi64_epi32 (__m512i __A) 4905{ 4906 __v8si __O; 4907 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, 4908 (__v8si) 4909 _mm256_undefined_si256 (), 4910 (__mmask8) -1); 4911} 4912 4913extern __inline void 4914__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4915_mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A) 4916{ 4917 __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M); 4918} 4919 4920extern __inline __m256i 4921__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4922_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) 4923{ 4924 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, 4925 (__v8si) __O, __M); 4926} 4927 4928extern __inline __m256i 4929__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4930_mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A) 4931{ 4932 return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, 4933 (__v8si) 4934 _mm256_setzero_si256 (), 4935 __M); 4936} 4937 4938extern __inline __m256i 4939__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4940_mm512_cvtusepi64_epi32 (__m512i __A) 4941{ 4942 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, 4943 (__v8si) 4944 _mm256_undefined_si256 (), 4945 (__mmask8) -1); 4946} 4947 4948extern __inline void 4949__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4950_mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A) 4951{ 4952 __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M); 4953} 4954 4955extern __inline __m256i 4956__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4957_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) 4958{ 4959 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, 4960 (__v8si) __O, __M); 4961} 4962 4963extern __inline __m256i 4964__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4965_mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A) 4966{ 4967 return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, 4968 (__v8si) 4969 _mm256_setzero_si256 (), 4970 __M); 4971} 4972 4973extern __inline __m128i 4974__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4975_mm512_cvtepi64_epi16 (__m512i __A) 4976{ 4977 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, 4978 (__v8hi) 4979 _mm_undefined_si128 (), 4980 (__mmask8) -1); 4981} 4982 4983extern __inline void 4984__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4985_mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) 4986{ 4987 __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M); 4988} 4989 4990extern __inline __m128i 4991__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 4992_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) 4993{ 4994 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, 4995 (__v8hi) __O, __M); 4996} 4997 4998extern __inline __m128i 4999__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5000_mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A) 5001{ 5002 return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, 5003 (__v8hi) 5004 _mm_setzero_si128 (), 5005 __M); 5006} 5007 5008extern __inline __m128i 5009__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5010_mm512_cvtsepi64_epi16 (__m512i __A) 5011{ 5012 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, 5013 (__v8hi) 5014 _mm_undefined_si128 (), 5015 (__mmask8) -1); 5016} 5017 5018extern __inline void 5019__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5020_mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A) 5021{ 5022 __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M); 5023} 5024 5025extern __inline __m128i 5026__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5027_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) 5028{ 5029 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, 5030 (__v8hi) __O, __M); 5031} 5032 5033extern __inline __m128i 5034__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5035_mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A) 5036{ 5037 return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, 5038 (__v8hi) 5039 _mm_setzero_si128 (), 5040 __M); 5041} 5042 5043extern __inline __m128i 5044__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5045_mm512_cvtusepi64_epi16 (__m512i __A) 5046{ 5047 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, 5048 (__v8hi) 5049 _mm_undefined_si128 (), 5050 (__mmask8) -1); 5051} 5052 5053extern __inline void 5054__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5055_mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) 5056{ 5057 __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M); 5058} 5059 5060extern __inline __m128i 5061__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5062_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) 5063{ 5064 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, 5065 (__v8hi) __O, __M); 5066} 5067 5068extern __inline __m128i 5069__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5070_mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A) 5071{ 5072 return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, 5073 (__v8hi) 5074 _mm_setzero_si128 (), 5075 __M); 5076} 5077 5078extern __inline __m128i 5079__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5080_mm512_cvtepi64_epi8 (__m512i __A) 5081{ 5082 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, 5083 (__v16qi) 5084 _mm_undefined_si128 (), 5085 (__mmask8) -1); 5086} 5087 5088extern __inline void 5089__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5090_mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) 5091{ 5092 __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); 5093} 5094 5095extern __inline __m128i 5096__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5097_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) 5098{ 5099 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, 5100 (__v16qi) __O, __M); 5101} 5102 5103extern __inline __m128i 5104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5105_mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A) 5106{ 5107 return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, 5108 (__v16qi) 5109 _mm_setzero_si128 (), 5110 __M); 5111} 5112 5113extern __inline __m128i 5114__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5115_mm512_cvtsepi64_epi8 (__m512i __A) 5116{ 5117 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, 5118 (__v16qi) 5119 _mm_undefined_si128 (), 5120 (__mmask8) -1); 5121} 5122 5123extern __inline void 5124__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5125_mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) 5126{ 5127 __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); 5128} 5129 5130extern __inline __m128i 5131__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5132_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) 5133{ 5134 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, 5135 (__v16qi) __O, __M); 5136} 5137 5138extern __inline __m128i 5139__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5140_mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A) 5141{ 5142 return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, 5143 (__v16qi) 5144 _mm_setzero_si128 (), 5145 __M); 5146} 5147 5148extern __inline __m128i 5149__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5150_mm512_cvtusepi64_epi8 (__m512i __A) 5151{ 5152 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, 5153 (__v16qi) 5154 _mm_undefined_si128 (), 5155 (__mmask8) -1); 5156} 5157 5158extern __inline void 5159__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5160_mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) 5161{ 5162 __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); 5163} 5164 5165extern __inline __m128i 5166__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5167_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) 5168{ 5169 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, 5170 (__v16qi) __O, 5171 __M); 5172} 5173 5174extern __inline __m128i 5175__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5176_mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A) 5177{ 5178 return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, 5179 (__v16qi) 5180 _mm_setzero_si128 (), 5181 __M); 5182} 5183 5184extern __inline __m512d 5185__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5186_mm512_cvtepi32_pd (__m256i __A) 5187{ 5188 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A, 5189 (__v8df) 5190 _mm512_undefined_pd (), 5191 (__mmask8) -1); 5192} 5193 5194extern __inline __m512d 5195__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5196_mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A) 5197{ 5198 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A, 5199 (__v8df) __W, 5200 (__mmask8) __U); 5201} 5202 5203extern __inline __m512d 5204__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5205_mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A) 5206{ 5207 return (__m512d) __builtin_ia32_cvtdq2pd512_mask ((__v8si) __A, 5208 (__v8df) 5209 _mm512_setzero_pd (), 5210 (__mmask8) __U); 5211} 5212 5213extern __inline __m512d 5214__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5215_mm512_cvtepu32_pd (__m256i __A) 5216{ 5217 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A, 5218 (__v8df) 5219 _mm512_undefined_pd (), 5220 (__mmask8) -1); 5221} 5222 5223extern __inline __m512d 5224__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5225_mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A) 5226{ 5227 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A, 5228 (__v8df) __W, 5229 (__mmask8) __U); 5230} 5231 5232extern __inline __m512d 5233__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5234_mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A) 5235{ 5236 return (__m512d) __builtin_ia32_cvtudq2pd512_mask ((__v8si) __A, 5237 (__v8df) 5238 _mm512_setzero_pd (), 5239 (__mmask8) __U); 5240} 5241 5242#ifdef __OPTIMIZE__ 5243extern __inline __m512 5244__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5245_mm512_cvt_roundepi32_ps (__m512i __A, const int __R) 5246{ 5247 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, 5248 (__v16sf) 5249 _mm512_undefined_ps (), 5250 (__mmask16) -1, __R); 5251} 5252 5253extern __inline __m512 5254__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5255_mm512_mask_cvt_roundepi32_ps (__m512 __W, __mmask16 __U, __m512i __A, 5256 const int __R) 5257{ 5258 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, 5259 (__v16sf) __W, 5260 (__mmask16) __U, __R); 5261} 5262 5263extern __inline __m512 5264__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5265_mm512_maskz_cvt_roundepi32_ps (__mmask16 __U, __m512i __A, const int __R) 5266{ 5267 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, 5268 (__v16sf) 5269 _mm512_setzero_ps (), 5270 (__mmask16) __U, __R); 5271} 5272 5273extern __inline __m512 5274__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5275_mm512_cvt_roundepu32_ps (__m512i __A, const int __R) 5276{ 5277 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, 5278 (__v16sf) 5279 _mm512_undefined_ps (), 5280 (__mmask16) -1, __R); 5281} 5282 5283extern __inline __m512 5284__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5285_mm512_mask_cvt_roundepu32_ps (__m512 __W, __mmask16 __U, __m512i __A, 5286 const int __R) 5287{ 5288 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, 5289 (__v16sf) __W, 5290 (__mmask16) __U, __R); 5291} 5292 5293extern __inline __m512 5294__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5295_mm512_maskz_cvt_roundepu32_ps (__mmask16 __U, __m512i __A, const int __R) 5296{ 5297 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, 5298 (__v16sf) 5299 _mm512_setzero_ps (), 5300 (__mmask16) __U, __R); 5301} 5302 5303#else 5304#define _mm512_cvt_roundepi32_ps(A, B) \ 5305 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B) 5306 5307#define _mm512_mask_cvt_roundepi32_ps(W, U, A, B) \ 5308 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), W, U, B) 5309 5310#define _mm512_maskz_cvt_roundepi32_ps(U, A, B) \ 5311 (__m512)__builtin_ia32_cvtdq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B) 5312 5313#define _mm512_cvt_roundepu32_ps(A, B) \ 5314 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_undefined_ps(), -1, B) 5315 5316#define _mm512_mask_cvt_roundepu32_ps(W, U, A, B) \ 5317 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), W, U, B) 5318 5319#define _mm512_maskz_cvt_roundepu32_ps(U, A, B) \ 5320 (__m512)__builtin_ia32_cvtudq2ps512_mask((__v16si)(A), (__v16sf)_mm512_setzero_ps(), U, B) 5321#endif 5322 5323#ifdef __OPTIMIZE__ 5324extern __inline __m256d 5325__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5326_mm512_extractf64x4_pd (__m512d __A, const int __imm) 5327{ 5328 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A, 5329 __imm, 5330 (__v4df) 5331 _mm256_undefined_pd (), 5332 (__mmask8) -1); 5333} 5334 5335extern __inline __m256d 5336__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5337_mm512_mask_extractf64x4_pd (__m256d __W, __mmask8 __U, __m512d __A, 5338 const int __imm) 5339{ 5340 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A, 5341 __imm, 5342 (__v4df) __W, 5343 (__mmask8) __U); 5344} 5345 5346extern __inline __m256d 5347__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5348_mm512_maskz_extractf64x4_pd (__mmask8 __U, __m512d __A, const int __imm) 5349{ 5350 return (__m256d) __builtin_ia32_extractf64x4_mask ((__v8df) __A, 5351 __imm, 5352 (__v4df) 5353 _mm256_setzero_pd (), 5354 (__mmask8) __U); 5355} 5356 5357extern __inline __m128 5358__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5359_mm512_extractf32x4_ps (__m512 __A, const int __imm) 5360{ 5361 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A, 5362 __imm, 5363 (__v4sf) 5364 _mm_undefined_ps (), 5365 (__mmask8) -1); 5366} 5367 5368extern __inline __m128 5369__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5370_mm512_mask_extractf32x4_ps (__m128 __W, __mmask8 __U, __m512 __A, 5371 const int __imm) 5372{ 5373 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A, 5374 __imm, 5375 (__v4sf) __W, 5376 (__mmask8) __U); 5377} 5378 5379extern __inline __m128 5380__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5381_mm512_maskz_extractf32x4_ps (__mmask8 __U, __m512 __A, const int __imm) 5382{ 5383 return (__m128) __builtin_ia32_extractf32x4_mask ((__v16sf) __A, 5384 __imm, 5385 (__v4sf) 5386 _mm_setzero_ps (), 5387 (__mmask8) __U); 5388} 5389 5390extern __inline __m256i 5391__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5392_mm512_extracti64x4_epi64 (__m512i __A, const int __imm) 5393{ 5394 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A, 5395 __imm, 5396 (__v4di) 5397 _mm256_undefined_si256 (), 5398 (__mmask8) -1); 5399} 5400 5401extern __inline __m256i 5402__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5403_mm512_mask_extracti64x4_epi64 (__m256i __W, __mmask8 __U, __m512i __A, 5404 const int __imm) 5405{ 5406 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A, 5407 __imm, 5408 (__v4di) __W, 5409 (__mmask8) __U); 5410} 5411 5412extern __inline __m256i 5413__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5414_mm512_maskz_extracti64x4_epi64 (__mmask8 __U, __m512i __A, const int __imm) 5415{ 5416 return (__m256i) __builtin_ia32_extracti64x4_mask ((__v8di) __A, 5417 __imm, 5418 (__v4di) 5419 _mm256_setzero_si256 (), 5420 (__mmask8) __U); 5421} 5422 5423extern __inline __m128i 5424__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5425_mm512_extracti32x4_epi32 (__m512i __A, const int __imm) 5426{ 5427 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A, 5428 __imm, 5429 (__v4si) 5430 _mm_undefined_si128 (), 5431 (__mmask8) -1); 5432} 5433 5434extern __inline __m128i 5435__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5436_mm512_mask_extracti32x4_epi32 (__m128i __W, __mmask8 __U, __m512i __A, 5437 const int __imm) 5438{ 5439 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A, 5440 __imm, 5441 (__v4si) __W, 5442 (__mmask8) __U); 5443} 5444 5445extern __inline __m128i 5446__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5447_mm512_maskz_extracti32x4_epi32 (__mmask8 __U, __m512i __A, const int __imm) 5448{ 5449 return (__m128i) __builtin_ia32_extracti32x4_mask ((__v16si) __A, 5450 __imm, 5451 (__v4si) 5452 _mm_setzero_si128 (), 5453 (__mmask8) __U); 5454} 5455#else 5456 5457#define _mm512_extractf64x4_pd(X, C) \ 5458 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \ 5459 (int) (C),\ 5460 (__v4df)(__m256d)_mm256_undefined_pd(),\ 5461 (__mmask8)-1)) 5462 5463#define _mm512_mask_extractf64x4_pd(W, U, X, C) \ 5464 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \ 5465 (int) (C),\ 5466 (__v4df)(__m256d)(W),\ 5467 (__mmask8)(U))) 5468 5469#define _mm512_maskz_extractf64x4_pd(U, X, C) \ 5470 ((__m256d) __builtin_ia32_extractf64x4_mask ((__v8df)(__m512d) (X), \ 5471 (int) (C),\ 5472 (__v4df)(__m256d)_mm256_setzero_pd(),\ 5473 (__mmask8)(U))) 5474 5475#define _mm512_extractf32x4_ps(X, C) \ 5476 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \ 5477 (int) (C),\ 5478 (__v4sf)(__m128)_mm_undefined_ps(),\ 5479 (__mmask8)-1)) 5480 5481#define _mm512_mask_extractf32x4_ps(W, U, X, C) \ 5482 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \ 5483 (int) (C),\ 5484 (__v4sf)(__m128)(W),\ 5485 (__mmask8)(U))) 5486 5487#define _mm512_maskz_extractf32x4_ps(U, X, C) \ 5488 ((__m128) __builtin_ia32_extractf32x4_mask ((__v16sf)(__m512) (X), \ 5489 (int) (C),\ 5490 (__v4sf)(__m128)_mm_setzero_ps(),\ 5491 (__mmask8)(U))) 5492 5493#define _mm512_extracti64x4_epi64(X, C) \ 5494 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \ 5495 (int) (C),\ 5496 (__v4di)(__m256i)_mm256_undefined_si256 (),\ 5497 (__mmask8)-1)) 5498 5499#define _mm512_mask_extracti64x4_epi64(W, U, X, C) \ 5500 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \ 5501 (int) (C),\ 5502 (__v4di)(__m256i)(W),\ 5503 (__mmask8)(U))) 5504 5505#define _mm512_maskz_extracti64x4_epi64(U, X, C) \ 5506 ((__m256i) __builtin_ia32_extracti64x4_mask ((__v8di)(__m512i) (X), \ 5507 (int) (C),\ 5508 (__v4di)(__m256i)_mm256_setzero_si256 (),\ 5509 (__mmask8)(U))) 5510 5511#define _mm512_extracti32x4_epi32(X, C) \ 5512 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \ 5513 (int) (C),\ 5514 (__v4si)(__m128i)_mm_undefined_si128 (),\ 5515 (__mmask8)-1)) 5516 5517#define _mm512_mask_extracti32x4_epi32(W, U, X, C) \ 5518 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \ 5519 (int) (C),\ 5520 (__v4si)(__m128i)(W),\ 5521 (__mmask8)(U))) 5522 5523#define _mm512_maskz_extracti32x4_epi32(U, X, C) \ 5524 ((__m128i) __builtin_ia32_extracti32x4_mask ((__v16si)(__m512i) (X), \ 5525 (int) (C),\ 5526 (__v4si)(__m128i)_mm_setzero_si128 (),\ 5527 (__mmask8)(U))) 5528#endif 5529 5530#ifdef __OPTIMIZE__ 5531extern __inline __m512i 5532__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5533_mm512_inserti32x4 (__m512i __A, __m128i __B, const int __imm) 5534{ 5535 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __A, 5536 (__v4si) __B, 5537 __imm, 5538 (__v16si) __A, -1); 5539} 5540 5541extern __inline __m512 5542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5543_mm512_insertf32x4 (__m512 __A, __m128 __B, const int __imm) 5544{ 5545 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __A, 5546 (__v4sf) __B, 5547 __imm, 5548 (__v16sf) __A, -1); 5549} 5550 5551extern __inline __m512i 5552__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5553_mm512_inserti64x4 (__m512i __A, __m256i __B, const int __imm) 5554{ 5555 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A, 5556 (__v4di) __B, 5557 __imm, 5558 (__v8di) 5559 _mm512_undefined_si512 (), 5560 (__mmask8) -1); 5561} 5562 5563extern __inline __m512i 5564__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5565_mm512_mask_inserti64x4 (__m512i __W, __mmask8 __U, __m512i __A, 5566 __m256i __B, const int __imm) 5567{ 5568 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A, 5569 (__v4di) __B, 5570 __imm, 5571 (__v8di) __W, 5572 (__mmask8) __U); 5573} 5574 5575extern __inline __m512i 5576__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5577_mm512_maskz_inserti64x4 (__mmask8 __U, __m512i __A, __m256i __B, 5578 const int __imm) 5579{ 5580 return (__m512i) __builtin_ia32_inserti64x4_mask ((__v8di) __A, 5581 (__v4di) __B, 5582 __imm, 5583 (__v8di) 5584 _mm512_setzero_si512 (), 5585 (__mmask8) __U); 5586} 5587 5588extern __inline __m512d 5589__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5590_mm512_insertf64x4 (__m512d __A, __m256d __B, const int __imm) 5591{ 5592 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A, 5593 (__v4df) __B, 5594 __imm, 5595 (__v8df) 5596 _mm512_undefined_pd (), 5597 (__mmask8) -1); 5598} 5599 5600extern __inline __m512d 5601__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5602_mm512_mask_insertf64x4 (__m512d __W, __mmask8 __U, __m512d __A, 5603 __m256d __B, const int __imm) 5604{ 5605 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A, 5606 (__v4df) __B, 5607 __imm, 5608 (__v8df) __W, 5609 (__mmask8) __U); 5610} 5611 5612extern __inline __m512d 5613__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5614_mm512_maskz_insertf64x4 (__mmask8 __U, __m512d __A, __m256d __B, 5615 const int __imm) 5616{ 5617 return (__m512d) __builtin_ia32_insertf64x4_mask ((__v8df) __A, 5618 (__v4df) __B, 5619 __imm, 5620 (__v8df) 5621 _mm512_setzero_pd (), 5622 (__mmask8) __U); 5623} 5624#else 5625#define _mm512_insertf32x4(X, Y, C) \ 5626 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \ 5627 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (X), (__mmask16)(-1))) 5628 5629#define _mm512_inserti32x4(X, Y, C) \ 5630 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \ 5631 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (X), (__mmask16)(-1))) 5632 5633#define _mm512_insertf64x4(X, Y, C) \ 5634 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \ 5635 (__v4df)(__m256d) (Y), (int) (C), \ 5636 (__v8df)(__m512d)_mm512_undefined_pd(), \ 5637 (__mmask8)-1)) 5638 5639#define _mm512_mask_insertf64x4(W, U, X, Y, C) \ 5640 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \ 5641 (__v4df)(__m256d) (Y), (int) (C), \ 5642 (__v8df)(__m512d)(W), \ 5643 (__mmask8)(U))) 5644 5645#define _mm512_maskz_insertf64x4(U, X, Y, C) \ 5646 ((__m512d) __builtin_ia32_insertf64x4_mask ((__v8df)(__m512d) (X), \ 5647 (__v4df)(__m256d) (Y), (int) (C), \ 5648 (__v8df)(__m512d)_mm512_setzero_pd(), \ 5649 (__mmask8)(U))) 5650 5651#define _mm512_inserti64x4(X, Y, C) \ 5652 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \ 5653 (__v4di)(__m256i) (Y), (int) (C), \ 5654 (__v8di)(__m512i)_mm512_undefined_si512 (), \ 5655 (__mmask8)-1)) 5656 5657#define _mm512_mask_inserti64x4(W, U, X, Y, C) \ 5658 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \ 5659 (__v4di)(__m256i) (Y), (int) (C),\ 5660 (__v8di)(__m512i)(W),\ 5661 (__mmask8)(U))) 5662 5663#define _mm512_maskz_inserti64x4(U, X, Y, C) \ 5664 ((__m512i) __builtin_ia32_inserti64x4_mask ((__v8di)(__m512i) (X), \ 5665 (__v4di)(__m256i) (Y), (int) (C), \ 5666 (__v8di)(__m512i)_mm512_setzero_si512 (), \ 5667 (__mmask8)(U))) 5668#endif 5669 5670extern __inline __m512d 5671__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5672_mm512_loadu_pd (void const *__P) 5673{ 5674 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P, 5675 (__v8df) 5676 _mm512_undefined_pd (), 5677 (__mmask8) -1); 5678} 5679 5680extern __inline __m512d 5681__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5682_mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P) 5683{ 5684 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P, 5685 (__v8df) __W, 5686 (__mmask8) __U); 5687} 5688 5689extern __inline __m512d 5690__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5691_mm512_maskz_loadu_pd (__mmask8 __U, void const *__P) 5692{ 5693 return (__m512d) __builtin_ia32_loadupd512_mask ((const __v8df *) __P, 5694 (__v8df) 5695 _mm512_setzero_pd (), 5696 (__mmask8) __U); 5697} 5698 5699extern __inline void 5700__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5701_mm512_storeu_pd (void *__P, __m512d __A) 5702{ 5703 __builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A, 5704 (__mmask8) -1); 5705} 5706 5707extern __inline void 5708__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5709_mm512_mask_storeu_pd (void *__P, __mmask8 __U, __m512d __A) 5710{ 5711 __builtin_ia32_storeupd512_mask ((__v8df *) __P, (__v8df) __A, 5712 (__mmask8) __U); 5713} 5714 5715extern __inline __m512 5716__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5717_mm512_loadu_ps (void const *__P) 5718{ 5719 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P, 5720 (__v16sf) 5721 _mm512_undefined_ps (), 5722 (__mmask16) -1); 5723} 5724 5725extern __inline __m512 5726__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5727_mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P) 5728{ 5729 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P, 5730 (__v16sf) __W, 5731 (__mmask16) __U); 5732} 5733 5734extern __inline __m512 5735__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5736_mm512_maskz_loadu_ps (__mmask16 __U, void const *__P) 5737{ 5738 return (__m512) __builtin_ia32_loadups512_mask ((const __v16sf *) __P, 5739 (__v16sf) 5740 _mm512_setzero_ps (), 5741 (__mmask16) __U); 5742} 5743 5744extern __inline void 5745__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5746_mm512_storeu_ps (void *__P, __m512 __A) 5747{ 5748 __builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A, 5749 (__mmask16) -1); 5750} 5751 5752extern __inline void 5753__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5754_mm512_mask_storeu_ps (void *__P, __mmask16 __U, __m512 __A) 5755{ 5756 __builtin_ia32_storeups512_mask ((__v16sf *) __P, (__v16sf) __A, 5757 (__mmask16) __U); 5758} 5759 5760extern __inline __m512i 5761__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5762_mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P) 5763{ 5764 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P, 5765 (__v8di) __W, 5766 (__mmask8) __U); 5767} 5768 5769extern __inline __m512i 5770__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5771_mm512_maskz_loadu_epi64 (__mmask8 __U, void const *__P) 5772{ 5773 return (__m512i) __builtin_ia32_loaddqudi512_mask ((const __v8di *) __P, 5774 (__v8di) 5775 _mm512_setzero_si512 (), 5776 (__mmask8) __U); 5777} 5778 5779extern __inline void 5780__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5781_mm512_mask_storeu_epi64 (void *__P, __mmask8 __U, __m512i __A) 5782{ 5783 __builtin_ia32_storedqudi512_mask ((__v8di *) __P, (__v8di) __A, 5784 (__mmask8) __U); 5785} 5786 5787extern __inline __m512i 5788__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5789_mm512_loadu_si512 (void const *__P) 5790{ 5791 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P, 5792 (__v16si) 5793 _mm512_setzero_si512 (), 5794 (__mmask16) -1); 5795} 5796 5797extern __inline __m512i 5798__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5799_mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P) 5800{ 5801 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P, 5802 (__v16si) __W, 5803 (__mmask16) __U); 5804} 5805 5806extern __inline __m512i 5807__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5808_mm512_maskz_loadu_epi32 (__mmask16 __U, void const *__P) 5809{ 5810 return (__m512i) __builtin_ia32_loaddqusi512_mask ((const __v16si *) __P, 5811 (__v16si) 5812 _mm512_setzero_si512 (), 5813 (__mmask16) __U); 5814} 5815 5816extern __inline void 5817__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5818_mm512_storeu_si512 (void *__P, __m512i __A) 5819{ 5820 __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A, 5821 (__mmask16) -1); 5822} 5823 5824extern __inline void 5825__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5826_mm512_mask_storeu_epi32 (void *__P, __mmask16 __U, __m512i __A) 5827{ 5828 __builtin_ia32_storedqusi512_mask ((__v16si *) __P, (__v16si) __A, 5829 (__mmask16) __U); 5830} 5831 5832extern __inline __m512d 5833__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5834_mm512_permutevar_pd (__m512d __A, __m512i __C) 5835{ 5836 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A, 5837 (__v8di) __C, 5838 (__v8df) 5839 _mm512_undefined_pd (), 5840 (__mmask8) -1); 5841} 5842 5843extern __inline __m512d 5844__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5845_mm512_mask_permutevar_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) 5846{ 5847 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A, 5848 (__v8di) __C, 5849 (__v8df) __W, 5850 (__mmask8) __U); 5851} 5852 5853extern __inline __m512d 5854__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5855_mm512_maskz_permutevar_pd (__mmask8 __U, __m512d __A, __m512i __C) 5856{ 5857 return (__m512d) __builtin_ia32_vpermilvarpd512_mask ((__v8df) __A, 5858 (__v8di) __C, 5859 (__v8df) 5860 _mm512_setzero_pd (), 5861 (__mmask8) __U); 5862} 5863 5864extern __inline __m512 5865__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5866_mm512_permutevar_ps (__m512 __A, __m512i __C) 5867{ 5868 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A, 5869 (__v16si) __C, 5870 (__v16sf) 5871 _mm512_undefined_ps (), 5872 (__mmask16) -1); 5873} 5874 5875extern __inline __m512 5876__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5877_mm512_mask_permutevar_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) 5878{ 5879 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A, 5880 (__v16si) __C, 5881 (__v16sf) __W, 5882 (__mmask16) __U); 5883} 5884 5885extern __inline __m512 5886__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5887_mm512_maskz_permutevar_ps (__mmask16 __U, __m512 __A, __m512i __C) 5888{ 5889 return (__m512) __builtin_ia32_vpermilvarps512_mask ((__v16sf) __A, 5890 (__v16si) __C, 5891 (__v16sf) 5892 _mm512_setzero_ps (), 5893 (__mmask16) __U); 5894} 5895 5896extern __inline __m512i 5897__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5898_mm512_permutex2var_epi64 (__m512i __A, __m512i __I, __m512i __B) 5899{ 5900 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I 5901 /* idx */ , 5902 (__v8di) __A, 5903 (__v8di) __B, 5904 (__mmask8) -1); 5905} 5906 5907extern __inline __m512i 5908__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5909_mm512_mask_permutex2var_epi64 (__m512i __A, __mmask8 __U, __m512i __I, 5910 __m512i __B) 5911{ 5912 return (__m512i) __builtin_ia32_vpermt2varq512_mask ((__v8di) __I 5913 /* idx */ , 5914 (__v8di) __A, 5915 (__v8di) __B, 5916 (__mmask8) __U); 5917} 5918 5919extern __inline __m512i 5920__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5921_mm512_mask2_permutex2var_epi64 (__m512i __A, __m512i __I, 5922 __mmask8 __U, __m512i __B) 5923{ 5924 return (__m512i) __builtin_ia32_vpermi2varq512_mask ((__v8di) __A, 5925 (__v8di) __I 5926 /* idx */ , 5927 (__v8di) __B, 5928 (__mmask8) __U); 5929} 5930 5931extern __inline __m512i 5932__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5933_mm512_maskz_permutex2var_epi64 (__mmask8 __U, __m512i __A, 5934 __m512i __I, __m512i __B) 5935{ 5936 return (__m512i) __builtin_ia32_vpermt2varq512_maskz ((__v8di) __I 5937 /* idx */ , 5938 (__v8di) __A, 5939 (__v8di) __B, 5940 (__mmask8) __U); 5941} 5942 5943extern __inline __m512i 5944__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5945_mm512_permutex2var_epi32 (__m512i __A, __m512i __I, __m512i __B) 5946{ 5947 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I 5948 /* idx */ , 5949 (__v16si) __A, 5950 (__v16si) __B, 5951 (__mmask16) -1); 5952} 5953 5954extern __inline __m512i 5955__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5956_mm512_mask_permutex2var_epi32 (__m512i __A, __mmask16 __U, 5957 __m512i __I, __m512i __B) 5958{ 5959 return (__m512i) __builtin_ia32_vpermt2vard512_mask ((__v16si) __I 5960 /* idx */ , 5961 (__v16si) __A, 5962 (__v16si) __B, 5963 (__mmask16) __U); 5964} 5965 5966extern __inline __m512i 5967__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5968_mm512_mask2_permutex2var_epi32 (__m512i __A, __m512i __I, 5969 __mmask16 __U, __m512i __B) 5970{ 5971 return (__m512i) __builtin_ia32_vpermi2vard512_mask ((__v16si) __A, 5972 (__v16si) __I 5973 /* idx */ , 5974 (__v16si) __B, 5975 (__mmask16) __U); 5976} 5977 5978extern __inline __m512i 5979__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5980_mm512_maskz_permutex2var_epi32 (__mmask16 __U, __m512i __A, 5981 __m512i __I, __m512i __B) 5982{ 5983 return (__m512i) __builtin_ia32_vpermt2vard512_maskz ((__v16si) __I 5984 /* idx */ , 5985 (__v16si) __A, 5986 (__v16si) __B, 5987 (__mmask16) __U); 5988} 5989 5990extern __inline __m512d 5991__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 5992_mm512_permutex2var_pd (__m512d __A, __m512i __I, __m512d __B) 5993{ 5994 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I 5995 /* idx */ , 5996 (__v8df) __A, 5997 (__v8df) __B, 5998 (__mmask8) -1); 5999} 6000 6001extern __inline __m512d 6002__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6003_mm512_mask_permutex2var_pd (__m512d __A, __mmask8 __U, __m512i __I, 6004 __m512d __B) 6005{ 6006 return (__m512d) __builtin_ia32_vpermt2varpd512_mask ((__v8di) __I 6007 /* idx */ , 6008 (__v8df) __A, 6009 (__v8df) __B, 6010 (__mmask8) __U); 6011} 6012 6013extern __inline __m512d 6014__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6015_mm512_mask2_permutex2var_pd (__m512d __A, __m512i __I, __mmask8 __U, 6016 __m512d __B) 6017{ 6018 return (__m512d) __builtin_ia32_vpermi2varpd512_mask ((__v8df) __A, 6019 (__v8di) __I 6020 /* idx */ , 6021 (__v8df) __B, 6022 (__mmask8) __U); 6023} 6024 6025extern __inline __m512d 6026__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6027_mm512_maskz_permutex2var_pd (__mmask8 __U, __m512d __A, __m512i __I, 6028 __m512d __B) 6029{ 6030 return (__m512d) __builtin_ia32_vpermt2varpd512_maskz ((__v8di) __I 6031 /* idx */ , 6032 (__v8df) __A, 6033 (__v8df) __B, 6034 (__mmask8) __U); 6035} 6036 6037extern __inline __m512 6038__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6039_mm512_permutex2var_ps (__m512 __A, __m512i __I, __m512 __B) 6040{ 6041 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I 6042 /* idx */ , 6043 (__v16sf) __A, 6044 (__v16sf) __B, 6045 (__mmask16) -1); 6046} 6047 6048extern __inline __m512 6049__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6050_mm512_mask_permutex2var_ps (__m512 __A, __mmask16 __U, __m512i __I, __m512 __B) 6051{ 6052 return (__m512) __builtin_ia32_vpermt2varps512_mask ((__v16si) __I 6053 /* idx */ , 6054 (__v16sf) __A, 6055 (__v16sf) __B, 6056 (__mmask16) __U); 6057} 6058 6059extern __inline __m512 6060__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6061_mm512_mask2_permutex2var_ps (__m512 __A, __m512i __I, __mmask16 __U, 6062 __m512 __B) 6063{ 6064 return (__m512) __builtin_ia32_vpermi2varps512_mask ((__v16sf) __A, 6065 (__v16si) __I 6066 /* idx */ , 6067 (__v16sf) __B, 6068 (__mmask16) __U); 6069} 6070 6071extern __inline __m512 6072__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6073_mm512_maskz_permutex2var_ps (__mmask16 __U, __m512 __A, __m512i __I, 6074 __m512 __B) 6075{ 6076 return (__m512) __builtin_ia32_vpermt2varps512_maskz ((__v16si) __I 6077 /* idx */ , 6078 (__v16sf) __A, 6079 (__v16sf) __B, 6080 (__mmask16) __U); 6081} 6082 6083#ifdef __OPTIMIZE__ 6084extern __inline __m512d 6085__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6086_mm512_permute_pd (__m512d __X, const int __C) 6087{ 6088 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C, 6089 (__v8df) 6090 _mm512_undefined_pd (), 6091 (__mmask8) -1); 6092} 6093 6094extern __inline __m512d 6095__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6096_mm512_mask_permute_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __C) 6097{ 6098 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C, 6099 (__v8df) __W, 6100 (__mmask8) __U); 6101} 6102 6103extern __inline __m512d 6104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6105_mm512_maskz_permute_pd (__mmask8 __U, __m512d __X, const int __C) 6106{ 6107 return (__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df) __X, __C, 6108 (__v8df) 6109 _mm512_setzero_pd (), 6110 (__mmask8) __U); 6111} 6112 6113extern __inline __m512 6114__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6115_mm512_permute_ps (__m512 __X, const int __C) 6116{ 6117 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C, 6118 (__v16sf) 6119 _mm512_undefined_ps (), 6120 (__mmask16) -1); 6121} 6122 6123extern __inline __m512 6124__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6125_mm512_mask_permute_ps (__m512 __W, __mmask16 __U, __m512 __X, const int __C) 6126{ 6127 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C, 6128 (__v16sf) __W, 6129 (__mmask16) __U); 6130} 6131 6132extern __inline __m512 6133__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6134_mm512_maskz_permute_ps (__mmask16 __U, __m512 __X, const int __C) 6135{ 6136 return (__m512) __builtin_ia32_vpermilps512_mask ((__v16sf) __X, __C, 6137 (__v16sf) 6138 _mm512_setzero_ps (), 6139 (__mmask16) __U); 6140} 6141#else 6142#define _mm512_permute_pd(X, C) \ 6143 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \ 6144 (__v8df)(__m512d)_mm512_undefined_pd(),\ 6145 (__mmask8)(-1))) 6146 6147#define _mm512_mask_permute_pd(W, U, X, C) \ 6148 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \ 6149 (__v8df)(__m512d)(W), \ 6150 (__mmask8)(U))) 6151 6152#define _mm512_maskz_permute_pd(U, X, C) \ 6153 ((__m512d) __builtin_ia32_vpermilpd512_mask ((__v8df)(__m512d)(X), (int)(C), \ 6154 (__v8df)(__m512d)_mm512_setzero_pd(), \ 6155 (__mmask8)(U))) 6156 6157#define _mm512_permute_ps(X, C) \ 6158 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \ 6159 (__v16sf)(__m512)_mm512_undefined_ps(),\ 6160 (__mmask16)(-1))) 6161 6162#define _mm512_mask_permute_ps(W, U, X, C) \ 6163 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \ 6164 (__v16sf)(__m512)(W), \ 6165 (__mmask16)(U))) 6166 6167#define _mm512_maskz_permute_ps(U, X, C) \ 6168 ((__m512) __builtin_ia32_vpermilps512_mask ((__v16sf)(__m512)(X), (int)(C), \ 6169 (__v16sf)(__m512)_mm512_setzero_ps(), \ 6170 (__mmask16)(U))) 6171#endif 6172 6173#ifdef __OPTIMIZE__ 6174extern __inline __m512i 6175__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6176_mm512_permutex_epi64 (__m512i __X, const int __I) 6177{ 6178 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I, 6179 (__v8di) 6180 _mm512_undefined_si512 (), 6181 (__mmask8) (-1)); 6182} 6183 6184extern __inline __m512i 6185__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6186_mm512_mask_permutex_epi64 (__m512i __W, __mmask8 __M, 6187 __m512i __X, const int __I) 6188{ 6189 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I, 6190 (__v8di) __W, 6191 (__mmask8) __M); 6192} 6193 6194extern __inline __m512i 6195__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6196_mm512_maskz_permutex_epi64 (__mmask8 __M, __m512i __X, const int __I) 6197{ 6198 return (__m512i) __builtin_ia32_permdi512_mask ((__v8di) __X, __I, 6199 (__v8di) 6200 _mm512_setzero_si512 (), 6201 (__mmask8) __M); 6202} 6203 6204extern __inline __m512d 6205__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6206_mm512_permutex_pd (__m512d __X, const int __M) 6207{ 6208 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M, 6209 (__v8df) 6210 _mm512_undefined_pd (), 6211 (__mmask8) -1); 6212} 6213 6214extern __inline __m512d 6215__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6216_mm512_mask_permutex_pd (__m512d __W, __mmask8 __U, __m512d __X, const int __M) 6217{ 6218 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M, 6219 (__v8df) __W, 6220 (__mmask8) __U); 6221} 6222 6223extern __inline __m512d 6224__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6225_mm512_maskz_permutex_pd (__mmask8 __U, __m512d __X, const int __M) 6226{ 6227 return (__m512d) __builtin_ia32_permdf512_mask ((__v8df) __X, __M, 6228 (__v8df) 6229 _mm512_setzero_pd (), 6230 (__mmask8) __U); 6231} 6232#else 6233#define _mm512_permutex_pd(X, M) \ 6234 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \ 6235 (__v8df)(__m512d)_mm512_undefined_pd(),\ 6236 (__mmask8)-1)) 6237 6238#define _mm512_mask_permutex_pd(W, U, X, M) \ 6239 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \ 6240 (__v8df)(__m512d)(W), (__mmask8)(U))) 6241 6242#define _mm512_maskz_permutex_pd(U, X, M) \ 6243 ((__m512d) __builtin_ia32_permdf512_mask ((__v8df)(__m512d)(X), (int)(M), \ 6244 (__v8df)(__m512d)_mm512_setzero_pd(),\ 6245 (__mmask8)(U))) 6246 6247#define _mm512_permutex_epi64(X, I) \ 6248 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \ 6249 (int)(I), \ 6250 (__v8di)(__m512i) \ 6251 (_mm512_undefined_si512 ()),\ 6252 (__mmask8)(-1))) 6253 6254#define _mm512_maskz_permutex_epi64(M, X, I) \ 6255 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \ 6256 (int)(I), \ 6257 (__v8di)(__m512i) \ 6258 (_mm512_setzero_si512 ()),\ 6259 (__mmask8)(M))) 6260 6261#define _mm512_mask_permutex_epi64(W, M, X, I) \ 6262 ((__m512i) __builtin_ia32_permdi512_mask ((__v8di)(__m512i)(X), \ 6263 (int)(I), \ 6264 (__v8di)(__m512i)(W), \ 6265 (__mmask8)(M))) 6266#endif 6267 6268extern __inline __m512i 6269__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6270_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y) 6271{ 6272 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y, 6273 (__v8di) __X, 6274 (__v8di) 6275 _mm512_setzero_si512 (), 6276 __M); 6277} 6278 6279extern __inline __m512i 6280__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6281_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y) 6282{ 6283 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y, 6284 (__v8di) __X, 6285 (__v8di) 6286 _mm512_undefined_si512 (), 6287 (__mmask8) -1); 6288} 6289 6290extern __inline __m512i 6291__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6292_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X, 6293 __m512i __Y) 6294{ 6295 return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y, 6296 (__v8di) __X, 6297 (__v8di) __W, 6298 __M); 6299} 6300 6301extern __inline __m512i 6302__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6303_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y) 6304{ 6305 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y, 6306 (__v16si) __X, 6307 (__v16si) 6308 _mm512_setzero_si512 (), 6309 __M); 6310} 6311 6312extern __inline __m512i 6313__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6314_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y) 6315{ 6316 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y, 6317 (__v16si) __X, 6318 (__v16si) 6319 _mm512_undefined_si512 (), 6320 (__mmask16) -1); 6321} 6322 6323extern __inline __m512i 6324__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6325_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X, 6326 __m512i __Y) 6327{ 6328 return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y, 6329 (__v16si) __X, 6330 (__v16si) __W, 6331 __M); 6332} 6333 6334extern __inline __m512d 6335__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6336_mm512_permutexvar_pd (__m512i __X, __m512d __Y) 6337{ 6338 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y, 6339 (__v8di) __X, 6340 (__v8df) 6341 _mm512_undefined_pd (), 6342 (__mmask8) -1); 6343} 6344 6345extern __inline __m512d 6346__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6347_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y) 6348{ 6349 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y, 6350 (__v8di) __X, 6351 (__v8df) __W, 6352 (__mmask8) __U); 6353} 6354 6355extern __inline __m512d 6356__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6357_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y) 6358{ 6359 return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y, 6360 (__v8di) __X, 6361 (__v8df) 6362 _mm512_setzero_pd (), 6363 (__mmask8) __U); 6364} 6365 6366extern __inline __m512 6367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6368_mm512_permutexvar_ps (__m512i __X, __m512 __Y) 6369{ 6370 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y, 6371 (__v16si) __X, 6372 (__v16sf) 6373 _mm512_undefined_ps (), 6374 (__mmask16) -1); 6375} 6376 6377extern __inline __m512 6378__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6379_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y) 6380{ 6381 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y, 6382 (__v16si) __X, 6383 (__v16sf) __W, 6384 (__mmask16) __U); 6385} 6386 6387extern __inline __m512 6388__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6389_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y) 6390{ 6391 return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y, 6392 (__v16si) __X, 6393 (__v16sf) 6394 _mm512_setzero_ps (), 6395 (__mmask16) __U); 6396} 6397 6398#ifdef __OPTIMIZE__ 6399extern __inline __m512 6400__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6401_mm512_shuffle_ps (__m512 __M, __m512 __V, const int __imm) 6402{ 6403 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M, 6404 (__v16sf) __V, __imm, 6405 (__v16sf) 6406 _mm512_undefined_ps (), 6407 (__mmask16) -1); 6408} 6409 6410extern __inline __m512 6411__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6412_mm512_mask_shuffle_ps (__m512 __W, __mmask16 __U, __m512 __M, 6413 __m512 __V, const int __imm) 6414{ 6415 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M, 6416 (__v16sf) __V, __imm, 6417 (__v16sf) __W, 6418 (__mmask16) __U); 6419} 6420 6421extern __inline __m512 6422__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6423_mm512_maskz_shuffle_ps (__mmask16 __U, __m512 __M, __m512 __V, const int __imm) 6424{ 6425 return (__m512) __builtin_ia32_shufps512_mask ((__v16sf) __M, 6426 (__v16sf) __V, __imm, 6427 (__v16sf) 6428 _mm512_setzero_ps (), 6429 (__mmask16) __U); 6430} 6431 6432extern __inline __m512d 6433__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6434_mm512_shuffle_pd (__m512d __M, __m512d __V, const int __imm) 6435{ 6436 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M, 6437 (__v8df) __V, __imm, 6438 (__v8df) 6439 _mm512_undefined_pd (), 6440 (__mmask8) -1); 6441} 6442 6443extern __inline __m512d 6444__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6445_mm512_mask_shuffle_pd (__m512d __W, __mmask8 __U, __m512d __M, 6446 __m512d __V, const int __imm) 6447{ 6448 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M, 6449 (__v8df) __V, __imm, 6450 (__v8df) __W, 6451 (__mmask8) __U); 6452} 6453 6454extern __inline __m512d 6455__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6456_mm512_maskz_shuffle_pd (__mmask8 __U, __m512d __M, __m512d __V, 6457 const int __imm) 6458{ 6459 return (__m512d) __builtin_ia32_shufpd512_mask ((__v8df) __M, 6460 (__v8df) __V, __imm, 6461 (__v8df) 6462 _mm512_setzero_pd (), 6463 (__mmask8) __U); 6464} 6465 6466extern __inline __m512d 6467__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6468_mm512_fixupimm_round_pd (__m512d __A, __m512d __B, __m512i __C, 6469 const int __imm, const int __R) 6470{ 6471 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A, 6472 (__v8df) __B, 6473 (__v8di) __C, 6474 __imm, 6475 (__mmask8) -1, __R); 6476} 6477 6478extern __inline __m512d 6479__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6480_mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B, 6481 __m512i __C, const int __imm, const int __R) 6482{ 6483 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A, 6484 (__v8df) __B, 6485 (__v8di) __C, 6486 __imm, 6487 (__mmask8) __U, __R); 6488} 6489 6490extern __inline __m512d 6491__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6492_mm512_maskz_fixupimm_round_pd (__mmask8 __U, __m512d __A, __m512d __B, 6493 __m512i __C, const int __imm, const int __R) 6494{ 6495 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A, 6496 (__v8df) __B, 6497 (__v8di) __C, 6498 __imm, 6499 (__mmask8) __U, __R); 6500} 6501 6502extern __inline __m512 6503__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6504_mm512_fixupimm_round_ps (__m512 __A, __m512 __B, __m512i __C, 6505 const int __imm, const int __R) 6506{ 6507 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A, 6508 (__v16sf) __B, 6509 (__v16si) __C, 6510 __imm, 6511 (__mmask16) -1, __R); 6512} 6513 6514extern __inline __m512 6515__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6516_mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B, 6517 __m512i __C, const int __imm, const int __R) 6518{ 6519 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A, 6520 (__v16sf) __B, 6521 (__v16si) __C, 6522 __imm, 6523 (__mmask16) __U, __R); 6524} 6525 6526extern __inline __m512 6527__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6528_mm512_maskz_fixupimm_round_ps (__mmask16 __U, __m512 __A, __m512 __B, 6529 __m512i __C, const int __imm, const int __R) 6530{ 6531 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A, 6532 (__v16sf) __B, 6533 (__v16si) __C, 6534 __imm, 6535 (__mmask16) __U, __R); 6536} 6537 6538extern __inline __m128d 6539__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6540_mm_fixupimm_round_sd (__m128d __A, __m128d __B, __m128i __C, 6541 const int __imm, const int __R) 6542{ 6543 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A, 6544 (__v2df) __B, 6545 (__v2di) __C, __imm, 6546 (__mmask8) -1, __R); 6547} 6548 6549extern __inline __m128d 6550__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6551_mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B, 6552 __m128i __C, const int __imm, const int __R) 6553{ 6554 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A, 6555 (__v2df) __B, 6556 (__v2di) __C, __imm, 6557 (__mmask8) __U, __R); 6558} 6559 6560extern __inline __m128d 6561__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6562_mm_maskz_fixupimm_round_sd (__mmask8 __U, __m128d __A, __m128d __B, 6563 __m128i __C, const int __imm, const int __R) 6564{ 6565 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A, 6566 (__v2df) __B, 6567 (__v2di) __C, 6568 __imm, 6569 (__mmask8) __U, __R); 6570} 6571 6572extern __inline __m128 6573__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6574_mm_fixupimm_round_ss (__m128 __A, __m128 __B, __m128i __C, 6575 const int __imm, const int __R) 6576{ 6577 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A, 6578 (__v4sf) __B, 6579 (__v4si) __C, __imm, 6580 (__mmask8) -1, __R); 6581} 6582 6583extern __inline __m128 6584__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6585_mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B, 6586 __m128i __C, const int __imm, const int __R) 6587{ 6588 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A, 6589 (__v4sf) __B, 6590 (__v4si) __C, __imm, 6591 (__mmask8) __U, __R); 6592} 6593 6594extern __inline __m128 6595__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6596_mm_maskz_fixupimm_round_ss (__mmask8 __U, __m128 __A, __m128 __B, 6597 __m128i __C, const int __imm, const int __R) 6598{ 6599 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A, 6600 (__v4sf) __B, 6601 (__v4si) __C, __imm, 6602 (__mmask8) __U, __R); 6603} 6604 6605#else 6606#define _mm512_shuffle_pd(X, Y, C) \ 6607 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \ 6608 (__v8df)(__m512d)(Y), (int)(C),\ 6609 (__v8df)(__m512d)_mm512_undefined_pd(),\ 6610 (__mmask8)-1)) 6611 6612#define _mm512_mask_shuffle_pd(W, U, X, Y, C) \ 6613 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \ 6614 (__v8df)(__m512d)(Y), (int)(C),\ 6615 (__v8df)(__m512d)(W),\ 6616 (__mmask8)(U))) 6617 6618#define _mm512_maskz_shuffle_pd(U, X, Y, C) \ 6619 ((__m512d)__builtin_ia32_shufpd512_mask ((__v8df)(__m512d)(X), \ 6620 (__v8df)(__m512d)(Y), (int)(C),\ 6621 (__v8df)(__m512d)_mm512_setzero_pd(),\ 6622 (__mmask8)(U))) 6623 6624#define _mm512_shuffle_ps(X, Y, C) \ 6625 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \ 6626 (__v16sf)(__m512)(Y), (int)(C),\ 6627 (__v16sf)(__m512)_mm512_undefined_ps(),\ 6628 (__mmask16)-1)) 6629 6630#define _mm512_mask_shuffle_ps(W, U, X, Y, C) \ 6631 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \ 6632 (__v16sf)(__m512)(Y), (int)(C),\ 6633 (__v16sf)(__m512)(W),\ 6634 (__mmask16)(U))) 6635 6636#define _mm512_maskz_shuffle_ps(U, X, Y, C) \ 6637 ((__m512)__builtin_ia32_shufps512_mask ((__v16sf)(__m512)(X), \ 6638 (__v16sf)(__m512)(Y), (int)(C),\ 6639 (__v16sf)(__m512)_mm512_setzero_ps(),\ 6640 (__mmask16)(U))) 6641 6642#define _mm512_fixupimm_round_pd(X, Y, Z, C, R) \ 6643 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \ 6644 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \ 6645 (__mmask8)(-1), (R))) 6646 6647#define _mm512_mask_fixupimm_round_pd(X, U, Y, Z, C, R) \ 6648 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \ 6649 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \ 6650 (__mmask8)(U), (R))) 6651 6652#define _mm512_maskz_fixupimm_round_pd(U, X, Y, Z, C, R) \ 6653 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \ 6654 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \ 6655 (__mmask8)(U), (R))) 6656 6657#define _mm512_fixupimm_round_ps(X, Y, Z, C, R) \ 6658 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \ 6659 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \ 6660 (__mmask16)(-1), (R))) 6661 6662#define _mm512_mask_fixupimm_round_ps(X, U, Y, Z, C, R) \ 6663 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \ 6664 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \ 6665 (__mmask16)(U), (R))) 6666 6667#define _mm512_maskz_fixupimm_round_ps(U, X, Y, Z, C, R) \ 6668 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \ 6669 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \ 6670 (__mmask16)(U), (R))) 6671 6672#define _mm_fixupimm_round_sd(X, Y, Z, C, R) \ 6673 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \ 6674 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \ 6675 (__mmask8)(-1), (R))) 6676 6677#define _mm_mask_fixupimm_round_sd(X, U, Y, Z, C, R) \ 6678 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \ 6679 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \ 6680 (__mmask8)(U), (R))) 6681 6682#define _mm_maskz_fixupimm_round_sd(U, X, Y, Z, C, R) \ 6683 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \ 6684 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \ 6685 (__mmask8)(U), (R))) 6686 6687#define _mm_fixupimm_round_ss(X, Y, Z, C, R) \ 6688 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \ 6689 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \ 6690 (__mmask8)(-1), (R))) 6691 6692#define _mm_mask_fixupimm_round_ss(X, U, Y, Z, C, R) \ 6693 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \ 6694 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \ 6695 (__mmask8)(U), (R))) 6696 6697#define _mm_maskz_fixupimm_round_ss(U, X, Y, Z, C, R) \ 6698 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \ 6699 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \ 6700 (__mmask8)(U), (R))) 6701#endif 6702 6703extern __inline __m512 6704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6705_mm512_movehdup_ps (__m512 __A) 6706{ 6707 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A, 6708 (__v16sf) 6709 _mm512_undefined_ps (), 6710 (__mmask16) -1); 6711} 6712 6713extern __inline __m512 6714__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6715_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A) 6716{ 6717 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A, 6718 (__v16sf) __W, 6719 (__mmask16) __U); 6720} 6721 6722extern __inline __m512 6723__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6724_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A) 6725{ 6726 return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A, 6727 (__v16sf) 6728 _mm512_setzero_ps (), 6729 (__mmask16) __U); 6730} 6731 6732extern __inline __m512 6733__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6734_mm512_moveldup_ps (__m512 __A) 6735{ 6736 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A, 6737 (__v16sf) 6738 _mm512_undefined_ps (), 6739 (__mmask16) -1); 6740} 6741 6742extern __inline __m512 6743__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6744_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A) 6745{ 6746 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A, 6747 (__v16sf) __W, 6748 (__mmask16) __U); 6749} 6750 6751extern __inline __m512 6752__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6753_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A) 6754{ 6755 return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A, 6756 (__v16sf) 6757 _mm512_setzero_ps (), 6758 (__mmask16) __U); 6759} 6760 6761extern __inline __m512i 6762__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6763_mm512_or_si512 (__m512i __A, __m512i __B) 6764{ 6765 return (__m512i) ((__v16su) __A | (__v16su) __B); 6766} 6767 6768extern __inline __m512i 6769__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6770_mm512_or_epi32 (__m512i __A, __m512i __B) 6771{ 6772 return (__m512i) ((__v16su) __A | (__v16su) __B); 6773} 6774 6775extern __inline __m512i 6776__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6777_mm512_mask_or_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 6778{ 6779 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A, 6780 (__v16si) __B, 6781 (__v16si) __W, 6782 (__mmask16) __U); 6783} 6784 6785extern __inline __m512i 6786__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6787_mm512_maskz_or_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 6788{ 6789 return (__m512i) __builtin_ia32_pord512_mask ((__v16si) __A, 6790 (__v16si) __B, 6791 (__v16si) 6792 _mm512_setzero_si512 (), 6793 (__mmask16) __U); 6794} 6795 6796extern __inline __m512i 6797__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6798_mm512_or_epi64 (__m512i __A, __m512i __B) 6799{ 6800 return (__m512i) ((__v8du) __A | (__v8du) __B); 6801} 6802 6803extern __inline __m512i 6804__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6805_mm512_mask_or_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 6806{ 6807 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A, 6808 (__v8di) __B, 6809 (__v8di) __W, 6810 (__mmask8) __U); 6811} 6812 6813extern __inline __m512i 6814__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6815_mm512_maskz_or_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 6816{ 6817 return (__m512i) __builtin_ia32_porq512_mask ((__v8di) __A, 6818 (__v8di) __B, 6819 (__v8di) 6820 _mm512_setzero_si512 (), 6821 (__mmask8) __U); 6822} 6823 6824extern __inline __m512i 6825__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6826_mm512_xor_si512 (__m512i __A, __m512i __B) 6827{ 6828 return (__m512i) ((__v16su) __A ^ (__v16su) __B); 6829} 6830 6831extern __inline __m512i 6832__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6833_mm512_xor_epi32 (__m512i __A, __m512i __B) 6834{ 6835 return (__m512i) ((__v16su) __A ^ (__v16su) __B); 6836} 6837 6838extern __inline __m512i 6839__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6840_mm512_mask_xor_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 6841{ 6842 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A, 6843 (__v16si) __B, 6844 (__v16si) __W, 6845 (__mmask16) __U); 6846} 6847 6848extern __inline __m512i 6849__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6850_mm512_maskz_xor_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 6851{ 6852 return (__m512i) __builtin_ia32_pxord512_mask ((__v16si) __A, 6853 (__v16si) __B, 6854 (__v16si) 6855 _mm512_setzero_si512 (), 6856 (__mmask16) __U); 6857} 6858 6859extern __inline __m512i 6860__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6861_mm512_xor_epi64 (__m512i __A, __m512i __B) 6862{ 6863 return (__m512i) ((__v8du) __A ^ (__v8du) __B); 6864} 6865 6866extern __inline __m512i 6867__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6868_mm512_mask_xor_epi64 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 6869{ 6870 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A, 6871 (__v8di) __B, 6872 (__v8di) __W, 6873 (__mmask8) __U); 6874} 6875 6876extern __inline __m512i 6877__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6878_mm512_maskz_xor_epi64 (__mmask16 __U, __m512i __A, __m512i __B) 6879{ 6880 return (__m512i) __builtin_ia32_pxorq512_mask ((__v8di) __A, 6881 (__v8di) __B, 6882 (__v8di) 6883 _mm512_setzero_si512 (), 6884 (__mmask8) __U); 6885} 6886 6887#ifdef __OPTIMIZE__ 6888extern __inline __m512i 6889__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6890_mm512_rol_epi32 (__m512i __A, const int __B) 6891{ 6892 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B, 6893 (__v16si) 6894 _mm512_undefined_si512 (), 6895 (__mmask16) -1); 6896} 6897 6898extern __inline __m512i 6899__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6900_mm512_mask_rol_epi32 (__m512i __W, __mmask16 __U, __m512i __A, const int __B) 6901{ 6902 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B, 6903 (__v16si) __W, 6904 (__mmask16) __U); 6905} 6906 6907extern __inline __m512i 6908__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6909_mm512_maskz_rol_epi32 (__mmask16 __U, __m512i __A, const int __B) 6910{ 6911 return (__m512i) __builtin_ia32_prold512_mask ((__v16si) __A, __B, 6912 (__v16si) 6913 _mm512_setzero_si512 (), 6914 (__mmask16) __U); 6915} 6916 6917extern __inline __m512i 6918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6919_mm512_ror_epi32 (__m512i __A, int __B) 6920{ 6921 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B, 6922 (__v16si) 6923 _mm512_undefined_si512 (), 6924 (__mmask16) -1); 6925} 6926 6927extern __inline __m512i 6928__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6929_mm512_mask_ror_epi32 (__m512i __W, __mmask16 __U, __m512i __A, int __B) 6930{ 6931 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B, 6932 (__v16si) __W, 6933 (__mmask16) __U); 6934} 6935 6936extern __inline __m512i 6937__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6938_mm512_maskz_ror_epi32 (__mmask16 __U, __m512i __A, int __B) 6939{ 6940 return (__m512i) __builtin_ia32_prord512_mask ((__v16si) __A, __B, 6941 (__v16si) 6942 _mm512_setzero_si512 (), 6943 (__mmask16) __U); 6944} 6945 6946extern __inline __m512i 6947__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6948_mm512_rol_epi64 (__m512i __A, const int __B) 6949{ 6950 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B, 6951 (__v8di) 6952 _mm512_undefined_si512 (), 6953 (__mmask8) -1); 6954} 6955 6956extern __inline __m512i 6957__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6958_mm512_mask_rol_epi64 (__m512i __W, __mmask8 __U, __m512i __A, const int __B) 6959{ 6960 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B, 6961 (__v8di) __W, 6962 (__mmask8) __U); 6963} 6964 6965extern __inline __m512i 6966__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6967_mm512_maskz_rol_epi64 (__mmask8 __U, __m512i __A, const int __B) 6968{ 6969 return (__m512i) __builtin_ia32_prolq512_mask ((__v8di) __A, __B, 6970 (__v8di) 6971 _mm512_setzero_si512 (), 6972 (__mmask8) __U); 6973} 6974 6975extern __inline __m512i 6976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6977_mm512_ror_epi64 (__m512i __A, int __B) 6978{ 6979 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B, 6980 (__v8di) 6981 _mm512_undefined_si512 (), 6982 (__mmask8) -1); 6983} 6984 6985extern __inline __m512i 6986__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6987_mm512_mask_ror_epi64 (__m512i __W, __mmask8 __U, __m512i __A, int __B) 6988{ 6989 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B, 6990 (__v8di) __W, 6991 (__mmask8) __U); 6992} 6993 6994extern __inline __m512i 6995__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 6996_mm512_maskz_ror_epi64 (__mmask8 __U, __m512i __A, int __B) 6997{ 6998 return (__m512i) __builtin_ia32_prorq512_mask ((__v8di) __A, __B, 6999 (__v8di) 7000 _mm512_setzero_si512 (), 7001 (__mmask8) __U); 7002} 7003 7004#else 7005#define _mm512_rol_epi32(A, B) \ 7006 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \ 7007 (int)(B), \ 7008 (__v16si)_mm512_undefined_si512 (), \ 7009 (__mmask16)(-1))) 7010#define _mm512_mask_rol_epi32(W, U, A, B) \ 7011 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \ 7012 (int)(B), \ 7013 (__v16si)(__m512i)(W), \ 7014 (__mmask16)(U))) 7015#define _mm512_maskz_rol_epi32(U, A, B) \ 7016 ((__m512i)__builtin_ia32_prold512_mask ((__v16si)(__m512i)(A), \ 7017 (int)(B), \ 7018 (__v16si)_mm512_setzero_si512 (), \ 7019 (__mmask16)(U))) 7020#define _mm512_ror_epi32(A, B) \ 7021 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \ 7022 (int)(B), \ 7023 (__v16si)_mm512_undefined_si512 (), \ 7024 (__mmask16)(-1))) 7025#define _mm512_mask_ror_epi32(W, U, A, B) \ 7026 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \ 7027 (int)(B), \ 7028 (__v16si)(__m512i)(W), \ 7029 (__mmask16)(U))) 7030#define _mm512_maskz_ror_epi32(U, A, B) \ 7031 ((__m512i)__builtin_ia32_prord512_mask ((__v16si)(__m512i)(A), \ 7032 (int)(B), \ 7033 (__v16si)_mm512_setzero_si512 (), \ 7034 (__mmask16)(U))) 7035#define _mm512_rol_epi64(A, B) \ 7036 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \ 7037 (int)(B), \ 7038 (__v8di)_mm512_undefined_si512 (), \ 7039 (__mmask8)(-1))) 7040#define _mm512_mask_rol_epi64(W, U, A, B) \ 7041 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \ 7042 (int)(B), \ 7043 (__v8di)(__m512i)(W), \ 7044 (__mmask8)(U))) 7045#define _mm512_maskz_rol_epi64(U, A, B) \ 7046 ((__m512i)__builtin_ia32_prolq512_mask ((__v8di)(__m512i)(A), \ 7047 (int)(B), \ 7048 (__v8di)_mm512_setzero_si512 (), \ 7049 (__mmask8)(U))) 7050 7051#define _mm512_ror_epi64(A, B) \ 7052 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \ 7053 (int)(B), \ 7054 (__v8di)_mm512_undefined_si512 (), \ 7055 (__mmask8)(-1))) 7056#define _mm512_mask_ror_epi64(W, U, A, B) \ 7057 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \ 7058 (int)(B), \ 7059 (__v8di)(__m512i)(W), \ 7060 (__mmask8)(U))) 7061#define _mm512_maskz_ror_epi64(U, A, B) \ 7062 ((__m512i)__builtin_ia32_prorq512_mask ((__v8di)(__m512i)(A), \ 7063 (int)(B), \ 7064 (__v8di)_mm512_setzero_si512 (), \ 7065 (__mmask8)(U))) 7066#endif 7067 7068extern __inline __m512i 7069__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7070_mm512_and_si512 (__m512i __A, __m512i __B) 7071{ 7072 return (__m512i) ((__v16su) __A & (__v16su) __B); 7073} 7074 7075extern __inline __m512i 7076__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7077_mm512_and_epi32 (__m512i __A, __m512i __B) 7078{ 7079 return (__m512i) ((__v16su) __A & (__v16su) __B); 7080} 7081 7082extern __inline __m512i 7083__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7084_mm512_mask_and_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 7085{ 7086 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A, 7087 (__v16si) __B, 7088 (__v16si) __W, 7089 (__mmask16) __U); 7090} 7091 7092extern __inline __m512i 7093__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7094_mm512_maskz_and_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 7095{ 7096 return (__m512i) __builtin_ia32_pandd512_mask ((__v16si) __A, 7097 (__v16si) __B, 7098 (__v16si) 7099 _mm512_setzero_si512 (), 7100 (__mmask16) __U); 7101} 7102 7103extern __inline __m512i 7104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7105_mm512_and_epi64 (__m512i __A, __m512i __B) 7106{ 7107 return (__m512i) ((__v8du) __A & (__v8du) __B); 7108} 7109 7110extern __inline __m512i 7111__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7112_mm512_mask_and_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 7113{ 7114 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A, 7115 (__v8di) __B, 7116 (__v8di) __W, __U); 7117} 7118 7119extern __inline __m512i 7120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7121_mm512_maskz_and_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 7122{ 7123 return (__m512i) __builtin_ia32_pandq512_mask ((__v8di) __A, 7124 (__v8di) __B, 7125 (__v8di) 7126 _mm512_setzero_pd (), 7127 __U); 7128} 7129 7130extern __inline __m512i 7131__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7132_mm512_andnot_si512 (__m512i __A, __m512i __B) 7133{ 7134 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, 7135 (__v16si) __B, 7136 (__v16si) 7137 _mm512_undefined_si512 (), 7138 (__mmask16) -1); 7139} 7140 7141extern __inline __m512i 7142__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7143_mm512_andnot_epi32 (__m512i __A, __m512i __B) 7144{ 7145 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, 7146 (__v16si) __B, 7147 (__v16si) 7148 _mm512_undefined_si512 (), 7149 (__mmask16) -1); 7150} 7151 7152extern __inline __m512i 7153__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7154_mm512_mask_andnot_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) 7155{ 7156 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, 7157 (__v16si) __B, 7158 (__v16si) __W, 7159 (__mmask16) __U); 7160} 7161 7162extern __inline __m512i 7163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7164_mm512_maskz_andnot_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 7165{ 7166 return (__m512i) __builtin_ia32_pandnd512_mask ((__v16si) __A, 7167 (__v16si) __B, 7168 (__v16si) 7169 _mm512_setzero_si512 (), 7170 (__mmask16) __U); 7171} 7172 7173extern __inline __m512i 7174__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7175_mm512_andnot_epi64 (__m512i __A, __m512i __B) 7176{ 7177 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A, 7178 (__v8di) __B, 7179 (__v8di) 7180 _mm512_undefined_si512 (), 7181 (__mmask8) -1); 7182} 7183 7184extern __inline __m512i 7185__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7186_mm512_mask_andnot_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 7187{ 7188 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A, 7189 (__v8di) __B, 7190 (__v8di) __W, __U); 7191} 7192 7193extern __inline __m512i 7194__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7195_mm512_maskz_andnot_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 7196{ 7197 return (__m512i) __builtin_ia32_pandnq512_mask ((__v8di) __A, 7198 (__v8di) __B, 7199 (__v8di) 7200 _mm512_setzero_pd (), 7201 __U); 7202} 7203 7204extern __inline __mmask16 7205__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7206_mm512_test_epi32_mask (__m512i __A, __m512i __B) 7207{ 7208 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A, 7209 (__v16si) __B, 7210 (__mmask16) -1); 7211} 7212 7213extern __inline __mmask16 7214__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7215_mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) 7216{ 7217 return (__mmask16) __builtin_ia32_ptestmd512 ((__v16si) __A, 7218 (__v16si) __B, __U); 7219} 7220 7221extern __inline __mmask8 7222__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7223_mm512_test_epi64_mask (__m512i __A, __m512i __B) 7224{ 7225 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, 7226 (__v8di) __B, 7227 (__mmask8) -1); 7228} 7229 7230extern __inline __mmask8 7231__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7232_mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) 7233{ 7234 return (__mmask8) __builtin_ia32_ptestmq512 ((__v8di) __A, (__v8di) __B, __U); 7235} 7236 7237extern __inline __mmask16 7238__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7239_mm512_testn_epi32_mask (__m512i __A, __m512i __B) 7240{ 7241 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A, 7242 (__v16si) __B, 7243 (__mmask16) -1); 7244} 7245 7246extern __inline __mmask16 7247__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7248_mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) 7249{ 7250 return (__mmask16) __builtin_ia32_ptestnmd512 ((__v16si) __A, 7251 (__v16si) __B, __U); 7252} 7253 7254extern __inline __mmask8 7255__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7256_mm512_testn_epi64_mask (__m512i __A, __m512i __B) 7257{ 7258 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A, 7259 (__v8di) __B, 7260 (__mmask8) -1); 7261} 7262 7263extern __inline __mmask8 7264__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7265_mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) 7266{ 7267 return (__mmask8) __builtin_ia32_ptestnmq512 ((__v8di) __A, 7268 (__v8di) __B, __U); 7269} 7270 7271extern __inline __m512i 7272__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7273_mm512_unpackhi_epi32 (__m512i __A, __m512i __B) 7274{ 7275 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A, 7276 (__v16si) __B, 7277 (__v16si) 7278 _mm512_undefined_si512 (), 7279 (__mmask16) -1); 7280} 7281 7282extern __inline __m512i 7283__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7284_mm512_mask_unpackhi_epi32 (__m512i __W, __mmask16 __U, __m512i __A, 7285 __m512i __B) 7286{ 7287 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A, 7288 (__v16si) __B, 7289 (__v16si) __W, 7290 (__mmask16) __U); 7291} 7292 7293extern __inline __m512i 7294__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7295_mm512_maskz_unpackhi_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 7296{ 7297 return (__m512i) __builtin_ia32_punpckhdq512_mask ((__v16si) __A, 7298 (__v16si) __B, 7299 (__v16si) 7300 _mm512_setzero_si512 (), 7301 (__mmask16) __U); 7302} 7303 7304extern __inline __m512i 7305__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7306_mm512_unpackhi_epi64 (__m512i __A, __m512i __B) 7307{ 7308 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A, 7309 (__v8di) __B, 7310 (__v8di) 7311 _mm512_undefined_si512 (), 7312 (__mmask8) -1); 7313} 7314 7315extern __inline __m512i 7316__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7317_mm512_mask_unpackhi_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 7318{ 7319 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A, 7320 (__v8di) __B, 7321 (__v8di) __W, 7322 (__mmask8) __U); 7323} 7324 7325extern __inline __m512i 7326__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7327_mm512_maskz_unpackhi_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 7328{ 7329 return (__m512i) __builtin_ia32_punpckhqdq512_mask ((__v8di) __A, 7330 (__v8di) __B, 7331 (__v8di) 7332 _mm512_setzero_si512 (), 7333 (__mmask8) __U); 7334} 7335 7336extern __inline __m512i 7337__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7338_mm512_unpacklo_epi32 (__m512i __A, __m512i __B) 7339{ 7340 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A, 7341 (__v16si) __B, 7342 (__v16si) 7343 _mm512_undefined_si512 (), 7344 (__mmask16) -1); 7345} 7346 7347extern __inline __m512i 7348__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7349_mm512_mask_unpacklo_epi32 (__m512i __W, __mmask16 __U, __m512i __A, 7350 __m512i __B) 7351{ 7352 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A, 7353 (__v16si) __B, 7354 (__v16si) __W, 7355 (__mmask16) __U); 7356} 7357 7358extern __inline __m512i 7359__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7360_mm512_maskz_unpacklo_epi32 (__mmask16 __U, __m512i __A, __m512i __B) 7361{ 7362 return (__m512i) __builtin_ia32_punpckldq512_mask ((__v16si) __A, 7363 (__v16si) __B, 7364 (__v16si) 7365 _mm512_setzero_si512 (), 7366 (__mmask16) __U); 7367} 7368 7369extern __inline __m512i 7370__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7371_mm512_unpacklo_epi64 (__m512i __A, __m512i __B) 7372{ 7373 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A, 7374 (__v8di) __B, 7375 (__v8di) 7376 _mm512_undefined_si512 (), 7377 (__mmask8) -1); 7378} 7379 7380extern __inline __m512i 7381__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7382_mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) 7383{ 7384 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A, 7385 (__v8di) __B, 7386 (__v8di) __W, 7387 (__mmask8) __U); 7388} 7389 7390extern __inline __m512i 7391__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7392_mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) 7393{ 7394 return (__m512i) __builtin_ia32_punpcklqdq512_mask ((__v8di) __A, 7395 (__v8di) __B, 7396 (__v8di) 7397 _mm512_setzero_si512 (), 7398 (__mmask8) __U); 7399} 7400 7401#ifdef __x86_64__ 7402#ifdef __OPTIMIZE__ 7403extern __inline unsigned long long 7404__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7405_mm_cvt_roundss_u64 (__m128 __A, const int __R) 7406{ 7407 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) __A, __R); 7408} 7409 7410extern __inline long long 7411__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7412_mm_cvt_roundss_si64 (__m128 __A, const int __R) 7413{ 7414 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R); 7415} 7416 7417extern __inline long long 7418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7419_mm_cvt_roundss_i64 (__m128 __A, const int __R) 7420{ 7421 return (long long) __builtin_ia32_vcvtss2si64 ((__v4sf) __A, __R); 7422} 7423 7424extern __inline unsigned long long 7425__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7426_mm_cvtt_roundss_u64 (__m128 __A, const int __R) 7427{ 7428 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) __A, __R); 7429} 7430 7431extern __inline long long 7432__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7433_mm_cvtt_roundss_i64 (__m128 __A, const int __R) 7434{ 7435 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R); 7436} 7437 7438extern __inline long long 7439__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7440_mm_cvtt_roundss_si64 (__m128 __A, const int __R) 7441{ 7442 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, __R); 7443} 7444#else 7445#define _mm_cvt_roundss_u64(A, B) \ 7446 ((unsigned long long)__builtin_ia32_vcvtss2usi64(A, B)) 7447 7448#define _mm_cvt_roundss_si64(A, B) \ 7449 ((long long)__builtin_ia32_vcvtss2si64(A, B)) 7450 7451#define _mm_cvt_roundss_i64(A, B) \ 7452 ((long long)__builtin_ia32_vcvtss2si64(A, B)) 7453 7454#define _mm_cvtt_roundss_u64(A, B) \ 7455 ((unsigned long long)__builtin_ia32_vcvttss2usi64(A, B)) 7456 7457#define _mm_cvtt_roundss_i64(A, B) \ 7458 ((long long)__builtin_ia32_vcvttss2si64(A, B)) 7459 7460#define _mm_cvtt_roundss_si64(A, B) \ 7461 ((long long)__builtin_ia32_vcvttss2si64(A, B)) 7462#endif 7463#endif 7464 7465#ifdef __OPTIMIZE__ 7466extern __inline unsigned 7467__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7468_mm_cvt_roundss_u32 (__m128 __A, const int __R) 7469{ 7470 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, __R); 7471} 7472 7473extern __inline int 7474__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7475_mm_cvt_roundss_si32 (__m128 __A, const int __R) 7476{ 7477 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R); 7478} 7479 7480extern __inline int 7481__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7482_mm_cvt_roundss_i32 (__m128 __A, const int __R) 7483{ 7484 return (int) __builtin_ia32_vcvtss2si32 ((__v4sf) __A, __R); 7485} 7486 7487extern __inline unsigned 7488__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7489_mm_cvtt_roundss_u32 (__m128 __A, const int __R) 7490{ 7491 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, __R); 7492} 7493 7494extern __inline int 7495__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7496_mm_cvtt_roundss_i32 (__m128 __A, const int __R) 7497{ 7498 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R); 7499} 7500 7501extern __inline int 7502__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7503_mm_cvtt_roundss_si32 (__m128 __A, const int __R) 7504{ 7505 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, __R); 7506} 7507#else 7508#define _mm_cvt_roundss_u32(A, B) \ 7509 ((unsigned)__builtin_ia32_vcvtss2usi32(A, B)) 7510 7511#define _mm_cvt_roundss_si32(A, B) \ 7512 ((int)__builtin_ia32_vcvtss2si32(A, B)) 7513 7514#define _mm_cvt_roundss_i32(A, B) \ 7515 ((int)__builtin_ia32_vcvtss2si32(A, B)) 7516 7517#define _mm_cvtt_roundss_u32(A, B) \ 7518 ((unsigned)__builtin_ia32_vcvttss2usi32(A, B)) 7519 7520#define _mm_cvtt_roundss_si32(A, B) \ 7521 ((int)__builtin_ia32_vcvttss2si32(A, B)) 7522 7523#define _mm_cvtt_roundss_i32(A, B) \ 7524 ((int)__builtin_ia32_vcvttss2si32(A, B)) 7525#endif 7526 7527#ifdef __x86_64__ 7528#ifdef __OPTIMIZE__ 7529extern __inline unsigned long long 7530__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7531_mm_cvt_roundsd_u64 (__m128d __A, const int __R) 7532{ 7533 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) __A, __R); 7534} 7535 7536extern __inline long long 7537__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7538_mm_cvt_roundsd_si64 (__m128d __A, const int __R) 7539{ 7540 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R); 7541} 7542 7543extern __inline long long 7544__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7545_mm_cvt_roundsd_i64 (__m128d __A, const int __R) 7546{ 7547 return (long long) __builtin_ia32_vcvtsd2si64 ((__v2df) __A, __R); 7548} 7549 7550extern __inline unsigned long long 7551__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7552_mm_cvtt_roundsd_u64 (__m128d __A, const int __R) 7553{ 7554 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) __A, __R); 7555} 7556 7557extern __inline long long 7558__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7559_mm_cvtt_roundsd_si64 (__m128d __A, const int __R) 7560{ 7561 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R); 7562} 7563 7564extern __inline long long 7565__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7566_mm_cvtt_roundsd_i64 (__m128d __A, const int __R) 7567{ 7568 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, __R); 7569} 7570#else 7571#define _mm_cvt_roundsd_u64(A, B) \ 7572 ((unsigned long long)__builtin_ia32_vcvtsd2usi64(A, B)) 7573 7574#define _mm_cvt_roundsd_si64(A, B) \ 7575 ((long long)__builtin_ia32_vcvtsd2si64(A, B)) 7576 7577#define _mm_cvt_roundsd_i64(A, B) \ 7578 ((long long)__builtin_ia32_vcvtsd2si64(A, B)) 7579 7580#define _mm_cvtt_roundsd_u64(A, B) \ 7581 ((unsigned long long)__builtin_ia32_vcvttsd2usi64(A, B)) 7582 7583#define _mm_cvtt_roundsd_si64(A, B) \ 7584 ((long long)__builtin_ia32_vcvttsd2si64(A, B)) 7585 7586#define _mm_cvtt_roundsd_i64(A, B) \ 7587 ((long long)__builtin_ia32_vcvttsd2si64(A, B)) 7588#endif 7589#endif 7590 7591#ifdef __OPTIMIZE__ 7592extern __inline unsigned 7593__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7594_mm_cvt_roundsd_u32 (__m128d __A, const int __R) 7595{ 7596 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, __R); 7597} 7598 7599extern __inline int 7600__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7601_mm_cvt_roundsd_si32 (__m128d __A, const int __R) 7602{ 7603 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R); 7604} 7605 7606extern __inline int 7607__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7608_mm_cvt_roundsd_i32 (__m128d __A, const int __R) 7609{ 7610 return (int) __builtin_ia32_vcvtsd2si32 ((__v2df) __A, __R); 7611} 7612 7613extern __inline unsigned 7614__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7615_mm_cvtt_roundsd_u32 (__m128d __A, const int __R) 7616{ 7617 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, __R); 7618} 7619 7620extern __inline int 7621__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7622_mm_cvtt_roundsd_i32 (__m128d __A, const int __R) 7623{ 7624 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R); 7625} 7626 7627extern __inline int 7628__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7629_mm_cvtt_roundsd_si32 (__m128d __A, const int __R) 7630{ 7631 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, __R); 7632} 7633#else 7634#define _mm_cvt_roundsd_u32(A, B) \ 7635 ((unsigned)__builtin_ia32_vcvtsd2usi32(A, B)) 7636 7637#define _mm_cvt_roundsd_si32(A, B) \ 7638 ((int)__builtin_ia32_vcvtsd2si32(A, B)) 7639 7640#define _mm_cvt_roundsd_i32(A, B) \ 7641 ((int)__builtin_ia32_vcvtsd2si32(A, B)) 7642 7643#define _mm_cvtt_roundsd_u32(A, B) \ 7644 ((unsigned)__builtin_ia32_vcvttsd2usi32(A, B)) 7645 7646#define _mm_cvtt_roundsd_si32(A, B) \ 7647 ((int)__builtin_ia32_vcvttsd2si32(A, B)) 7648 7649#define _mm_cvtt_roundsd_i32(A, B) \ 7650 ((int)__builtin_ia32_vcvttsd2si32(A, B)) 7651#endif 7652 7653extern __inline __m512d 7654__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7655_mm512_movedup_pd (__m512d __A) 7656{ 7657 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A, 7658 (__v8df) 7659 _mm512_undefined_pd (), 7660 (__mmask8) -1); 7661} 7662 7663extern __inline __m512d 7664__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7665_mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A) 7666{ 7667 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A, 7668 (__v8df) __W, 7669 (__mmask8) __U); 7670} 7671 7672extern __inline __m512d 7673__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7674_mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A) 7675{ 7676 return (__m512d) __builtin_ia32_movddup512_mask ((__v8df) __A, 7677 (__v8df) 7678 _mm512_setzero_pd (), 7679 (__mmask8) __U); 7680} 7681 7682extern __inline __m512d 7683__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7684_mm512_unpacklo_pd (__m512d __A, __m512d __B) 7685{ 7686 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A, 7687 (__v8df) __B, 7688 (__v8df) 7689 _mm512_undefined_pd (), 7690 (__mmask8) -1); 7691} 7692 7693extern __inline __m512d 7694__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7695_mm512_mask_unpacklo_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 7696{ 7697 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A, 7698 (__v8df) __B, 7699 (__v8df) __W, 7700 (__mmask8) __U); 7701} 7702 7703extern __inline __m512d 7704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7705_mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B) 7706{ 7707 return (__m512d) __builtin_ia32_unpcklpd512_mask ((__v8df) __A, 7708 (__v8df) __B, 7709 (__v8df) 7710 _mm512_setzero_pd (), 7711 (__mmask8) __U); 7712} 7713 7714extern __inline __m512d 7715__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7716_mm512_unpackhi_pd (__m512d __A, __m512d __B) 7717{ 7718 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A, 7719 (__v8df) __B, 7720 (__v8df) 7721 _mm512_undefined_pd (), 7722 (__mmask8) -1); 7723} 7724 7725extern __inline __m512d 7726__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7727_mm512_mask_unpackhi_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 7728{ 7729 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A, 7730 (__v8df) __B, 7731 (__v8df) __W, 7732 (__mmask8) __U); 7733} 7734 7735extern __inline __m512d 7736__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7737_mm512_maskz_unpackhi_pd (__mmask8 __U, __m512d __A, __m512d __B) 7738{ 7739 return (__m512d) __builtin_ia32_unpckhpd512_mask ((__v8df) __A, 7740 (__v8df) __B, 7741 (__v8df) 7742 _mm512_setzero_pd (), 7743 (__mmask8) __U); 7744} 7745 7746extern __inline __m512 7747__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7748_mm512_unpackhi_ps (__m512 __A, __m512 __B) 7749{ 7750 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A, 7751 (__v16sf) __B, 7752 (__v16sf) 7753 _mm512_undefined_ps (), 7754 (__mmask16) -1); 7755} 7756 7757extern __inline __m512 7758__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7759_mm512_mask_unpackhi_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 7760{ 7761 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A, 7762 (__v16sf) __B, 7763 (__v16sf) __W, 7764 (__mmask16) __U); 7765} 7766 7767extern __inline __m512 7768__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7769_mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B) 7770{ 7771 return (__m512) __builtin_ia32_unpckhps512_mask ((__v16sf) __A, 7772 (__v16sf) __B, 7773 (__v16sf) 7774 _mm512_setzero_ps (), 7775 (__mmask16) __U); 7776} 7777 7778#ifdef __OPTIMIZE__ 7779extern __inline __m512d 7780__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7781_mm512_cvt_roundps_pd (__m256 __A, const int __R) 7782{ 7783 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, 7784 (__v8df) 7785 _mm512_undefined_pd (), 7786 (__mmask8) -1, __R); 7787} 7788 7789extern __inline __m512d 7790__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7791_mm512_mask_cvt_roundps_pd (__m512d __W, __mmask8 __U, __m256 __A, 7792 const int __R) 7793{ 7794 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, 7795 (__v8df) __W, 7796 (__mmask8) __U, __R); 7797} 7798 7799extern __inline __m512d 7800__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7801_mm512_maskz_cvt_roundps_pd (__mmask8 __U, __m256 __A, const int __R) 7802{ 7803 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, 7804 (__v8df) 7805 _mm512_setzero_pd (), 7806 (__mmask8) __U, __R); 7807} 7808 7809extern __inline __m512 7810__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7811_mm512_cvt_roundph_ps (__m256i __A, const int __R) 7812{ 7813 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 7814 (__v16sf) 7815 _mm512_undefined_ps (), 7816 (__mmask16) -1, __R); 7817} 7818 7819extern __inline __m512 7820__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7821_mm512_mask_cvt_roundph_ps (__m512 __W, __mmask16 __U, __m256i __A, 7822 const int __R) 7823{ 7824 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 7825 (__v16sf) __W, 7826 (__mmask16) __U, __R); 7827} 7828 7829extern __inline __m512 7830__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7831_mm512_maskz_cvt_roundph_ps (__mmask16 __U, __m256i __A, const int __R) 7832{ 7833 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 7834 (__v16sf) 7835 _mm512_setzero_ps (), 7836 (__mmask16) __U, __R); 7837} 7838 7839extern __inline __m256i 7840__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7841_mm512_cvt_roundps_ph (__m512 __A, const int __I) 7842{ 7843 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A, 7844 __I, 7845 (__v16hi) 7846 _mm256_undefined_si256 (), 7847 -1); 7848} 7849 7850extern __inline __m256i 7851__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7852_mm512_cvtps_ph (__m512 __A, const int __I) 7853{ 7854 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A, 7855 __I, 7856 (__v16hi) 7857 _mm256_undefined_si256 (), 7858 -1); 7859} 7860 7861extern __inline __m256i 7862__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7863_mm512_mask_cvt_roundps_ph (__m256i __U, __mmask16 __W, __m512 __A, 7864 const int __I) 7865{ 7866 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A, 7867 __I, 7868 (__v16hi) __U, 7869 (__mmask16) __W); 7870} 7871 7872extern __inline __m256i 7873__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7874_mm512_mask_cvtps_ph (__m256i __U, __mmask16 __W, __m512 __A, const int __I) 7875{ 7876 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A, 7877 __I, 7878 (__v16hi) __U, 7879 (__mmask16) __W); 7880} 7881 7882extern __inline __m256i 7883__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7884_mm512_maskz_cvt_roundps_ph (__mmask16 __W, __m512 __A, const int __I) 7885{ 7886 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A, 7887 __I, 7888 (__v16hi) 7889 _mm256_setzero_si256 (), 7890 (__mmask16) __W); 7891} 7892 7893extern __inline __m256i 7894__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7895_mm512_maskz_cvtps_ph (__mmask16 __W, __m512 __A, const int __I) 7896{ 7897 return (__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf) __A, 7898 __I, 7899 (__v16hi) 7900 _mm256_setzero_si256 (), 7901 (__mmask16) __W); 7902} 7903#else 7904#define _mm512_cvt_roundps_pd(A, B) \ 7905 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, B) 7906 7907#define _mm512_mask_cvt_roundps_pd(W, U, A, B) \ 7908 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)(W), U, B) 7909 7910#define _mm512_maskz_cvt_roundps_pd(U, A, B) \ 7911 (__m512d)__builtin_ia32_cvtps2pd512_mask(A, (__v8df)_mm512_setzero_pd(), U, B) 7912 7913#define _mm512_cvt_roundph_ps(A, B) \ 7914 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_undefined_ps(), -1, B) 7915 7916#define _mm512_mask_cvt_roundph_ps(W, U, A, B) \ 7917 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)(W), U, B) 7918 7919#define _mm512_maskz_cvt_roundph_ps(U, A, B) \ 7920 (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(A), (__v16sf)_mm512_setzero_ps(), U, B) 7921 7922#define _mm512_cvt_roundps_ph(A, I) \ 7923 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\ 7924 (__v16hi)_mm256_undefined_si256 (), -1)) 7925#define _mm512_cvtps_ph(A, I) \ 7926 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\ 7927 (__v16hi)_mm256_undefined_si256 (), -1)) 7928#define _mm512_mask_cvt_roundps_ph(U, W, A, I) \ 7929 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\ 7930 (__v16hi)(__m256i)(U), (__mmask16) (W))) 7931#define _mm512_mask_cvtps_ph(U, W, A, I) \ 7932 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\ 7933 (__v16hi)(__m256i)(U), (__mmask16) (W))) 7934#define _mm512_maskz_cvt_roundps_ph(W, A, I) \ 7935 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\ 7936 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W))) 7937#define _mm512_maskz_cvtps_ph(W, A, I) \ 7938 ((__m256i) __builtin_ia32_vcvtps2ph512_mask ((__v16sf)(__m512) A, (int) (I),\ 7939 (__v16hi)_mm256_setzero_si256 (), (__mmask16) (W))) 7940#endif 7941 7942#ifdef __OPTIMIZE__ 7943extern __inline __m256 7944__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7945_mm512_cvt_roundpd_ps (__m512d __A, const int __R) 7946{ 7947 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 7948 (__v8sf) 7949 _mm256_undefined_ps (), 7950 (__mmask8) -1, __R); 7951} 7952 7953extern __inline __m256 7954__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7955_mm512_mask_cvt_roundpd_ps (__m256 __W, __mmask8 __U, __m512d __A, 7956 const int __R) 7957{ 7958 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 7959 (__v8sf) __W, 7960 (__mmask8) __U, __R); 7961} 7962 7963extern __inline __m256 7964__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7965_mm512_maskz_cvt_roundpd_ps (__mmask8 __U, __m512d __A, const int __R) 7966{ 7967 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 7968 (__v8sf) 7969 _mm256_setzero_ps (), 7970 (__mmask8) __U, __R); 7971} 7972 7973extern __inline __m128 7974__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7975_mm_cvt_roundsd_ss (__m128 __A, __m128d __B, const int __R) 7976{ 7977 return (__m128) __builtin_ia32_cvtsd2ss_round ((__v4sf) __A, 7978 (__v2df) __B, 7979 __R); 7980} 7981 7982extern __inline __m128d 7983__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 7984_mm_cvt_roundss_sd (__m128d __A, __m128 __B, const int __R) 7985{ 7986 return (__m128d) __builtin_ia32_cvtss2sd_round ((__v2df) __A, 7987 (__v4sf) __B, 7988 __R); 7989} 7990#else 7991#define _mm512_cvt_roundpd_ps(A, B) \ 7992 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_undefined_ps(), -1, B) 7993 7994#define _mm512_mask_cvt_roundpd_ps(W, U, A, B) \ 7995 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)(W), U, B) 7996 7997#define _mm512_maskz_cvt_roundpd_ps(U, A, B) \ 7998 (__m256)__builtin_ia32_cvtpd2ps512_mask(A, (__v8sf)_mm256_setzero_ps(), U, B) 7999 8000#define _mm_cvt_roundsd_ss(A, B, C) \ 8001 (__m128)__builtin_ia32_cvtsd2ss_round(A, B, C) 8002 8003#define _mm_cvt_roundss_sd(A, B, C) \ 8004 (__m128d)__builtin_ia32_cvtss2sd_round(A, B, C) 8005#endif 8006 8007extern __inline void 8008__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8009_mm512_stream_si512 (__m512i * __P, __m512i __A) 8010{ 8011 __builtin_ia32_movntdq512 ((__v8di *) __P, (__v8di) __A); 8012} 8013 8014extern __inline void 8015__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8016_mm512_stream_ps (float *__P, __m512 __A) 8017{ 8018 __builtin_ia32_movntps512 (__P, (__v16sf) __A); 8019} 8020 8021extern __inline void 8022__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8023_mm512_stream_pd (double *__P, __m512d __A) 8024{ 8025 __builtin_ia32_movntpd512 (__P, (__v8df) __A); 8026} 8027 8028extern __inline __m512i 8029__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8030_mm512_stream_load_si512 (void *__P) 8031{ 8032 return __builtin_ia32_movntdqa512 ((__v8di *)__P); 8033} 8034 8035/* Constants for mantissa extraction */ 8036typedef enum 8037{ 8038 _MM_MANT_NORM_1_2, /* interval [1, 2) */ 8039 _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */ 8040 _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */ 8041 _MM_MANT_NORM_p75_1p5 /* interval [0.75, 1.5) */ 8042} _MM_MANTISSA_NORM_ENUM; 8043 8044typedef enum 8045{ 8046 _MM_MANT_SIGN_src, /* sign = sign(SRC) */ 8047 _MM_MANT_SIGN_zero, /* sign = 0 */ 8048 _MM_MANT_SIGN_nan /* DEST = NaN if sign(SRC) = 1 */ 8049} _MM_MANTISSA_SIGN_ENUM; 8050 8051#ifdef __OPTIMIZE__ 8052extern __inline __m128 8053__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8054_mm_getexp_round_ss (__m128 __A, __m128 __B, const int __R) 8055{ 8056 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A, 8057 (__v4sf) __B, 8058 __R); 8059} 8060 8061extern __inline __m128d 8062__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8063_mm_getexp_round_sd (__m128d __A, __m128d __B, const int __R) 8064{ 8065 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A, 8066 (__v2df) __B, 8067 __R); 8068} 8069 8070extern __inline __m512 8071__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8072_mm512_getexp_round_ps (__m512 __A, const int __R) 8073{ 8074 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 8075 (__v16sf) 8076 _mm512_undefined_ps (), 8077 (__mmask16) -1, __R); 8078} 8079 8080extern __inline __m512 8081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8082_mm512_mask_getexp_round_ps (__m512 __W, __mmask16 __U, __m512 __A, 8083 const int __R) 8084{ 8085 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 8086 (__v16sf) __W, 8087 (__mmask16) __U, __R); 8088} 8089 8090extern __inline __m512 8091__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8092_mm512_maskz_getexp_round_ps (__mmask16 __U, __m512 __A, const int __R) 8093{ 8094 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 8095 (__v16sf) 8096 _mm512_setzero_ps (), 8097 (__mmask16) __U, __R); 8098} 8099 8100extern __inline __m512d 8101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8102_mm512_getexp_round_pd (__m512d __A, const int __R) 8103{ 8104 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 8105 (__v8df) 8106 _mm512_undefined_pd (), 8107 (__mmask8) -1, __R); 8108} 8109 8110extern __inline __m512d 8111__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8112_mm512_mask_getexp_round_pd (__m512d __W, __mmask8 __U, __m512d __A, 8113 const int __R) 8114{ 8115 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 8116 (__v8df) __W, 8117 (__mmask8) __U, __R); 8118} 8119 8120extern __inline __m512d 8121__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8122_mm512_maskz_getexp_round_pd (__mmask8 __U, __m512d __A, const int __R) 8123{ 8124 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 8125 (__v8df) 8126 _mm512_setzero_pd (), 8127 (__mmask8) __U, __R); 8128} 8129 8130extern __inline __m512d 8131__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8132_mm512_getmant_round_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B, 8133 _MM_MANTISSA_SIGN_ENUM __C, const int __R) 8134{ 8135 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A, 8136 (__C << 2) | __B, 8137 _mm512_undefined_pd (), 8138 (__mmask8) -1, __R); 8139} 8140 8141extern __inline __m512d 8142__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8143_mm512_mask_getmant_round_pd (__m512d __W, __mmask8 __U, __m512d __A, 8144 _MM_MANTISSA_NORM_ENUM __B, 8145 _MM_MANTISSA_SIGN_ENUM __C, const int __R) 8146{ 8147 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A, 8148 (__C << 2) | __B, 8149 (__v8df) __W, __U, 8150 __R); 8151} 8152 8153extern __inline __m512d 8154__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8155_mm512_maskz_getmant_round_pd (__mmask8 __U, __m512d __A, 8156 _MM_MANTISSA_NORM_ENUM __B, 8157 _MM_MANTISSA_SIGN_ENUM __C, const int __R) 8158{ 8159 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A, 8160 (__C << 2) | __B, 8161 (__v8df) 8162 _mm512_setzero_pd (), 8163 __U, __R); 8164} 8165 8166extern __inline __m512 8167__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8168_mm512_getmant_round_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B, 8169 _MM_MANTISSA_SIGN_ENUM __C, const int __R) 8170{ 8171 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A, 8172 (__C << 2) | __B, 8173 _mm512_undefined_ps (), 8174 (__mmask16) -1, __R); 8175} 8176 8177extern __inline __m512 8178__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8179_mm512_mask_getmant_round_ps (__m512 __W, __mmask16 __U, __m512 __A, 8180 _MM_MANTISSA_NORM_ENUM __B, 8181 _MM_MANTISSA_SIGN_ENUM __C, const int __R) 8182{ 8183 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A, 8184 (__C << 2) | __B, 8185 (__v16sf) __W, __U, 8186 __R); 8187} 8188 8189extern __inline __m512 8190__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8191_mm512_maskz_getmant_round_ps (__mmask16 __U, __m512 __A, 8192 _MM_MANTISSA_NORM_ENUM __B, 8193 _MM_MANTISSA_SIGN_ENUM __C, const int __R) 8194{ 8195 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A, 8196 (__C << 2) | __B, 8197 (__v16sf) 8198 _mm512_setzero_ps (), 8199 __U, __R); 8200} 8201 8202extern __inline __m128d 8203__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8204_mm_getmant_round_sd (__m128d __A, __m128d __B, 8205 _MM_MANTISSA_NORM_ENUM __C, 8206 _MM_MANTISSA_SIGN_ENUM __D, const int __R) 8207{ 8208 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A, 8209 (__v2df) __B, 8210 (__D << 2) | __C, 8211 __R); 8212} 8213 8214extern __inline __m128 8215__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8216_mm_getmant_round_ss (__m128 __A, __m128 __B, 8217 _MM_MANTISSA_NORM_ENUM __C, 8218 _MM_MANTISSA_SIGN_ENUM __D, const int __R) 8219{ 8220 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A, 8221 (__v4sf) __B, 8222 (__D << 2) | __C, 8223 __R); 8224} 8225 8226#else 8227#define _mm512_getmant_round_pd(X, B, C, R) \ 8228 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \ 8229 (int)(((C)<<2) | (B)), \ 8230 (__v8df)(__m512d)_mm512_undefined_pd(), \ 8231 (__mmask8)-1,\ 8232 (R))) 8233 8234#define _mm512_mask_getmant_round_pd(W, U, X, B, C, R) \ 8235 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \ 8236 (int)(((C)<<2) | (B)), \ 8237 (__v8df)(__m512d)(W), \ 8238 (__mmask8)(U),\ 8239 (R))) 8240 8241#define _mm512_maskz_getmant_round_pd(U, X, B, C, R) \ 8242 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \ 8243 (int)(((C)<<2) | (B)), \ 8244 (__v8df)(__m512d)_mm512_setzero_pd(), \ 8245 (__mmask8)(U),\ 8246 (R))) 8247#define _mm512_getmant_round_ps(X, B, C, R) \ 8248 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \ 8249 (int)(((C)<<2) | (B)), \ 8250 (__v16sf)(__m512)_mm512_undefined_ps(), \ 8251 (__mmask16)-1,\ 8252 (R))) 8253 8254#define _mm512_mask_getmant_round_ps(W, U, X, B, C, R) \ 8255 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \ 8256 (int)(((C)<<2) | (B)), \ 8257 (__v16sf)(__m512)(W), \ 8258 (__mmask16)(U),\ 8259 (R))) 8260 8261#define _mm512_maskz_getmant_round_ps(U, X, B, C, R) \ 8262 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \ 8263 (int)(((C)<<2) | (B)), \ 8264 (__v16sf)(__m512)_mm512_setzero_ps(), \ 8265 (__mmask16)(U),\ 8266 (R))) 8267#define _mm_getmant_round_sd(X, Y, C, D, R) \ 8268 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \ 8269 (__v2df)(__m128d)(Y), \ 8270 (int)(((D)<<2) | (C)), \ 8271 (R))) 8272 8273#define _mm_getmant_round_ss(X, Y, C, D, R) \ 8274 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \ 8275 (__v4sf)(__m128)(Y), \ 8276 (int)(((D)<<2) | (C)), \ 8277 (R))) 8278 8279#define _mm_getexp_round_ss(A, B, R) \ 8280 ((__m128)__builtin_ia32_getexpss128_round((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), R)) 8281 8282#define _mm_getexp_round_sd(A, B, R) \ 8283 ((__m128d)__builtin_ia32_getexpsd128_round((__v2df)(__m128d)(A), (__v2df)(__m128d)(B), R)) 8284 8285#define _mm512_getexp_round_ps(A, R) \ 8286 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 8287 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, R)) 8288 8289#define _mm512_mask_getexp_round_ps(W, U, A, R) \ 8290 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 8291 (__v16sf)(__m512)(W), (__mmask16)(U), R)) 8292 8293#define _mm512_maskz_getexp_round_ps(U, A, R) \ 8294 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 8295 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), R)) 8296 8297#define _mm512_getexp_round_pd(A, R) \ 8298 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 8299 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, R)) 8300 8301#define _mm512_mask_getexp_round_pd(W, U, A, R) \ 8302 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 8303 (__v8df)(__m512d)(W), (__mmask8)(U), R)) 8304 8305#define _mm512_maskz_getexp_round_pd(U, A, R) \ 8306 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 8307 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), R)) 8308#endif 8309 8310#ifdef __OPTIMIZE__ 8311extern __inline __m512 8312__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8313_mm512_roundscale_round_ps (__m512 __A, const int __imm, const int __R) 8314{ 8315 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm, 8316 (__v16sf) 8317 _mm512_undefined_ps (), 8318 -1, __R); 8319} 8320 8321extern __inline __m512 8322__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8323_mm512_mask_roundscale_round_ps (__m512 __A, __mmask16 __B, __m512 __C, 8324 const int __imm, const int __R) 8325{ 8326 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm, 8327 (__v16sf) __A, 8328 (__mmask16) __B, __R); 8329} 8330 8331extern __inline __m512 8332__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8333_mm512_maskz_roundscale_round_ps (__mmask16 __A, __m512 __B, 8334 const int __imm, const int __R) 8335{ 8336 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B, 8337 __imm, 8338 (__v16sf) 8339 _mm512_setzero_ps (), 8340 (__mmask16) __A, __R); 8341} 8342 8343extern __inline __m512d 8344__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8345_mm512_roundscale_round_pd (__m512d __A, const int __imm, const int __R) 8346{ 8347 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm, 8348 (__v8df) 8349 _mm512_undefined_pd (), 8350 -1, __R); 8351} 8352 8353extern __inline __m512d 8354__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8355_mm512_mask_roundscale_round_pd (__m512d __A, __mmask8 __B, 8356 __m512d __C, const int __imm, const int __R) 8357{ 8358 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm, 8359 (__v8df) __A, 8360 (__mmask8) __B, __R); 8361} 8362 8363extern __inline __m512d 8364__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8365_mm512_maskz_roundscale_round_pd (__mmask8 __A, __m512d __B, 8366 const int __imm, const int __R) 8367{ 8368 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B, 8369 __imm, 8370 (__v8df) 8371 _mm512_setzero_pd (), 8372 (__mmask8) __A, __R); 8373} 8374 8375extern __inline __m128 8376__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8377_mm_roundscale_round_ss (__m128 __A, __m128 __B, const int __imm, const int __R) 8378{ 8379 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A, 8380 (__v4sf) __B, __imm, __R); 8381} 8382 8383extern __inline __m128d 8384__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8385_mm_roundscale_round_sd (__m128d __A, __m128d __B, const int __imm, 8386 const int __R) 8387{ 8388 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A, 8389 (__v2df) __B, __imm, __R); 8390} 8391 8392#else 8393#define _mm512_roundscale_round_ps(A, B, R) \ 8394 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\ 8395 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), R)) 8396#define _mm512_mask_roundscale_round_ps(A, B, C, D, R) \ 8397 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \ 8398 (int)(D), \ 8399 (__v16sf)(__m512)(A), \ 8400 (__mmask16)(B), R)) 8401#define _mm512_maskz_roundscale_round_ps(A, B, C, R) \ 8402 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \ 8403 (int)(C), \ 8404 (__v16sf)_mm512_setzero_ps(),\ 8405 (__mmask16)(A), R)) 8406#define _mm512_roundscale_round_pd(A, B, R) \ 8407 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\ 8408 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), R)) 8409#define _mm512_mask_roundscale_round_pd(A, B, C, D, R) \ 8410 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \ 8411 (int)(D), \ 8412 (__v8df)(__m512d)(A), \ 8413 (__mmask8)(B), R)) 8414#define _mm512_maskz_roundscale_round_pd(A, B, C, R) \ 8415 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \ 8416 (int)(C), \ 8417 (__v8df)_mm512_setzero_pd(),\ 8418 (__mmask8)(A), R)) 8419#define _mm_roundscale_round_ss(A, B, C, R) \ 8420 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \ 8421 (__v4sf)(__m128)(B), (int)(C), R)) 8422#define _mm_roundscale_round_sd(A, B, C, R) \ 8423 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \ 8424 (__v2df)(__m128d)(B), (int)(C), R)) 8425#endif 8426 8427extern __inline __m512 8428__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8429_mm512_floor_ps (__m512 __A) 8430{ 8431 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 8432 _MM_FROUND_FLOOR, 8433 (__v16sf) __A, -1, 8434 _MM_FROUND_CUR_DIRECTION); 8435} 8436 8437extern __inline __m512d 8438__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8439_mm512_floor_pd (__m512d __A) 8440{ 8441 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 8442 _MM_FROUND_FLOOR, 8443 (__v8df) __A, -1, 8444 _MM_FROUND_CUR_DIRECTION); 8445} 8446 8447extern __inline __m512 8448__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8449_mm512_ceil_ps (__m512 __A) 8450{ 8451 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 8452 _MM_FROUND_CEIL, 8453 (__v16sf) __A, -1, 8454 _MM_FROUND_CUR_DIRECTION); 8455} 8456 8457extern __inline __m512d 8458__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8459_mm512_ceil_pd (__m512d __A) 8460{ 8461 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 8462 _MM_FROUND_CEIL, 8463 (__v8df) __A, -1, 8464 _MM_FROUND_CUR_DIRECTION); 8465} 8466 8467extern __inline __m512 8468__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8469_mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A) 8470{ 8471 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 8472 _MM_FROUND_FLOOR, 8473 (__v16sf) __W, __U, 8474 _MM_FROUND_CUR_DIRECTION); 8475} 8476 8477extern __inline __m512d 8478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8479_mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A) 8480{ 8481 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 8482 _MM_FROUND_FLOOR, 8483 (__v8df) __W, __U, 8484 _MM_FROUND_CUR_DIRECTION); 8485} 8486 8487extern __inline __m512 8488__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8489_mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A) 8490{ 8491 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, 8492 _MM_FROUND_CEIL, 8493 (__v16sf) __W, __U, 8494 _MM_FROUND_CUR_DIRECTION); 8495} 8496 8497extern __inline __m512d 8498__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8499_mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A) 8500{ 8501 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, 8502 _MM_FROUND_CEIL, 8503 (__v8df) __W, __U, 8504 _MM_FROUND_CUR_DIRECTION); 8505} 8506 8507#ifdef __OPTIMIZE__ 8508extern __inline __m512i 8509__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8510_mm512_alignr_epi32 (__m512i __A, __m512i __B, const int __imm) 8511{ 8512 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A, 8513 (__v16si) __B, __imm, 8514 (__v16si) 8515 _mm512_undefined_si512 (), 8516 (__mmask16) -1); 8517} 8518 8519extern __inline __m512i 8520__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8521_mm512_mask_alignr_epi32 (__m512i __W, __mmask16 __U, __m512i __A, 8522 __m512i __B, const int __imm) 8523{ 8524 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A, 8525 (__v16si) __B, __imm, 8526 (__v16si) __W, 8527 (__mmask16) __U); 8528} 8529 8530extern __inline __m512i 8531__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8532_mm512_maskz_alignr_epi32 (__mmask16 __U, __m512i __A, __m512i __B, 8533 const int __imm) 8534{ 8535 return (__m512i) __builtin_ia32_alignd512_mask ((__v16si) __A, 8536 (__v16si) __B, __imm, 8537 (__v16si) 8538 _mm512_setzero_si512 (), 8539 (__mmask16) __U); 8540} 8541 8542extern __inline __m512i 8543__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8544_mm512_alignr_epi64 (__m512i __A, __m512i __B, const int __imm) 8545{ 8546 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A, 8547 (__v8di) __B, __imm, 8548 (__v8di) 8549 _mm512_undefined_si512 (), 8550 (__mmask8) -1); 8551} 8552 8553extern __inline __m512i 8554__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8555_mm512_mask_alignr_epi64 (__m512i __W, __mmask8 __U, __m512i __A, 8556 __m512i __B, const int __imm) 8557{ 8558 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A, 8559 (__v8di) __B, __imm, 8560 (__v8di) __W, 8561 (__mmask8) __U); 8562} 8563 8564extern __inline __m512i 8565__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8566_mm512_maskz_alignr_epi64 (__mmask8 __U, __m512i __A, __m512i __B, 8567 const int __imm) 8568{ 8569 return (__m512i) __builtin_ia32_alignq512_mask ((__v8di) __A, 8570 (__v8di) __B, __imm, 8571 (__v8di) 8572 _mm512_setzero_si512 (), 8573 (__mmask8) __U); 8574} 8575#else 8576#define _mm512_alignr_epi32(X, Y, C) \ 8577 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \ 8578 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_undefined_si512 (),\ 8579 (__mmask16)-1)) 8580 8581#define _mm512_mask_alignr_epi32(W, U, X, Y, C) \ 8582 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \ 8583 (__v16si)(__m512i)(Y), (int)(C), (__v16si)(__m512i)(W), \ 8584 (__mmask16)(U))) 8585 8586#define _mm512_maskz_alignr_epi32(U, X, Y, C) \ 8587 ((__m512i)__builtin_ia32_alignd512_mask ((__v16si)(__m512i)(X), \ 8588 (__v16si)(__m512i)(Y), (int)(C), (__v16si)_mm512_setzero_si512 (),\ 8589 (__mmask16)(U))) 8590 8591#define _mm512_alignr_epi64(X, Y, C) \ 8592 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \ 8593 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_undefined_si512 (), \ 8594 (__mmask8)-1)) 8595 8596#define _mm512_mask_alignr_epi64(W, U, X, Y, C) \ 8597 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \ 8598 (__v8di)(__m512i)(Y), (int)(C), (__v8di)(__m512i)(W), (__mmask8)(U))) 8599 8600#define _mm512_maskz_alignr_epi64(U, X, Y, C) \ 8601 ((__m512i)__builtin_ia32_alignq512_mask ((__v8di)(__m512i)(X), \ 8602 (__v8di)(__m512i)(Y), (int)(C), (__v8di)_mm512_setzero_si512 (),\ 8603 (__mmask8)(U))) 8604#endif 8605 8606extern __inline __mmask16 8607__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8608_mm512_cmpeq_epi32_mask (__m512i __A, __m512i __B) 8609{ 8610 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A, 8611 (__v16si) __B, 8612 (__mmask16) -1); 8613} 8614 8615extern __inline __mmask16 8616__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8617_mm512_mask_cmpeq_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) 8618{ 8619 return (__mmask16) __builtin_ia32_pcmpeqd512_mask ((__v16si) __A, 8620 (__v16si) __B, __U); 8621} 8622 8623extern __inline __mmask8 8624__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8625_mm512_mask_cmpeq_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) 8626{ 8627 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A, 8628 (__v8di) __B, __U); 8629} 8630 8631extern __inline __mmask8 8632__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8633_mm512_cmpeq_epi64_mask (__m512i __A, __m512i __B) 8634{ 8635 return (__mmask8) __builtin_ia32_pcmpeqq512_mask ((__v8di) __A, 8636 (__v8di) __B, 8637 (__mmask8) -1); 8638} 8639 8640extern __inline __mmask16 8641__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8642_mm512_cmpgt_epi32_mask (__m512i __A, __m512i __B) 8643{ 8644 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A, 8645 (__v16si) __B, 8646 (__mmask16) -1); 8647} 8648 8649extern __inline __mmask16 8650__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8651_mm512_mask_cmpgt_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) 8652{ 8653 return (__mmask16) __builtin_ia32_pcmpgtd512_mask ((__v16si) __A, 8654 (__v16si) __B, __U); 8655} 8656 8657extern __inline __mmask8 8658__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8659_mm512_mask_cmpgt_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) 8660{ 8661 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A, 8662 (__v8di) __B, __U); 8663} 8664 8665extern __inline __mmask8 8666__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8667_mm512_cmpgt_epi64_mask (__m512i __A, __m512i __B) 8668{ 8669 return (__mmask8) __builtin_ia32_pcmpgtq512_mask ((__v8di) __A, 8670 (__v8di) __B, 8671 (__mmask8) -1); 8672} 8673 8674extern __inline __mmask16 8675__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8676_mm512_cmpge_epi32_mask (__m512i __X, __m512i __Y) 8677{ 8678 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, 8679 (__v16si) __Y, 5, 8680 (__mmask16) -1); 8681} 8682 8683extern __inline __mmask16 8684__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8685_mm512_mask_cmpge_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y) 8686{ 8687 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, 8688 (__v16si) __Y, 5, 8689 (__mmask16) __M); 8690} 8691 8692extern __inline __mmask16 8693__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8694_mm512_mask_cmpge_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y) 8695{ 8696 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, 8697 (__v16si) __Y, 5, 8698 (__mmask16) __M); 8699} 8700 8701extern __inline __mmask16 8702__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8703_mm512_cmpge_epu32_mask (__m512i __X, __m512i __Y) 8704{ 8705 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, 8706 (__v16si) __Y, 5, 8707 (__mmask16) -1); 8708} 8709 8710extern __inline __mmask8 8711__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8712_mm512_mask_cmpge_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y) 8713{ 8714 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, 8715 (__v8di) __Y, 5, 8716 (__mmask8) __M); 8717} 8718 8719extern __inline __mmask8 8720__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8721_mm512_cmpge_epi64_mask (__m512i __X, __m512i __Y) 8722{ 8723 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, 8724 (__v8di) __Y, 5, 8725 (__mmask8) -1); 8726} 8727 8728extern __inline __mmask8 8729__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8730_mm512_mask_cmpge_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y) 8731{ 8732 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, 8733 (__v8di) __Y, 5, 8734 (__mmask8) __M); 8735} 8736 8737extern __inline __mmask8 8738__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8739_mm512_cmpge_epu64_mask (__m512i __X, __m512i __Y) 8740{ 8741 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, 8742 (__v8di) __Y, 5, 8743 (__mmask8) -1); 8744} 8745 8746extern __inline __mmask16 8747__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8748_mm512_mask_cmple_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y) 8749{ 8750 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, 8751 (__v16si) __Y, 2, 8752 (__mmask16) __M); 8753} 8754 8755extern __inline __mmask16 8756__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8757_mm512_cmple_epi32_mask (__m512i __X, __m512i __Y) 8758{ 8759 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, 8760 (__v16si) __Y, 2, 8761 (__mmask16) -1); 8762} 8763 8764extern __inline __mmask16 8765__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8766_mm512_mask_cmple_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y) 8767{ 8768 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, 8769 (__v16si) __Y, 2, 8770 (__mmask16) __M); 8771} 8772 8773extern __inline __mmask16 8774__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8775_mm512_cmple_epu32_mask (__m512i __X, __m512i __Y) 8776{ 8777 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, 8778 (__v16si) __Y, 2, 8779 (__mmask16) -1); 8780} 8781 8782extern __inline __mmask8 8783__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8784_mm512_mask_cmple_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y) 8785{ 8786 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, 8787 (__v8di) __Y, 2, 8788 (__mmask8) __M); 8789} 8790 8791extern __inline __mmask8 8792__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8793_mm512_cmple_epi64_mask (__m512i __X, __m512i __Y) 8794{ 8795 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, 8796 (__v8di) __Y, 2, 8797 (__mmask8) -1); 8798} 8799 8800extern __inline __mmask8 8801__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8802_mm512_mask_cmple_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y) 8803{ 8804 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, 8805 (__v8di) __Y, 2, 8806 (__mmask8) __M); 8807} 8808 8809extern __inline __mmask8 8810__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8811_mm512_cmple_epu64_mask (__m512i __X, __m512i __Y) 8812{ 8813 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, 8814 (__v8di) __Y, 2, 8815 (__mmask8) -1); 8816} 8817 8818extern __inline __mmask16 8819__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8820_mm512_mask_cmplt_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y) 8821{ 8822 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, 8823 (__v16si) __Y, 1, 8824 (__mmask16) __M); 8825} 8826 8827extern __inline __mmask16 8828__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8829_mm512_cmplt_epi32_mask (__m512i __X, __m512i __Y) 8830{ 8831 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, 8832 (__v16si) __Y, 1, 8833 (__mmask16) -1); 8834} 8835 8836extern __inline __mmask16 8837__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8838_mm512_mask_cmplt_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y) 8839{ 8840 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, 8841 (__v16si) __Y, 1, 8842 (__mmask16) __M); 8843} 8844 8845extern __inline __mmask16 8846__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8847_mm512_cmplt_epu32_mask (__m512i __X, __m512i __Y) 8848{ 8849 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, 8850 (__v16si) __Y, 1, 8851 (__mmask16) -1); 8852} 8853 8854extern __inline __mmask8 8855__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8856_mm512_mask_cmplt_epi64_mask (__mmask8 __M, __m512i __X, __m512i __Y) 8857{ 8858 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, 8859 (__v8di) __Y, 1, 8860 (__mmask8) __M); 8861} 8862 8863extern __inline __mmask8 8864__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8865_mm512_cmplt_epi64_mask (__m512i __X, __m512i __Y) 8866{ 8867 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, 8868 (__v8di) __Y, 1, 8869 (__mmask8) -1); 8870} 8871 8872extern __inline __mmask8 8873__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8874_mm512_mask_cmplt_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y) 8875{ 8876 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, 8877 (__v8di) __Y, 1, 8878 (__mmask8) __M); 8879} 8880 8881extern __inline __mmask8 8882__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8883_mm512_cmplt_epu64_mask (__m512i __X, __m512i __Y) 8884{ 8885 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, 8886 (__v8di) __Y, 1, 8887 (__mmask8) -1); 8888} 8889 8890extern __inline __mmask16 8891__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8892_mm512_cmpneq_epi32_mask (__m512i __X, __m512i __Y) 8893{ 8894 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, 8895 (__v16si) __Y, 4, 8896 (__mmask16) -1); 8897} 8898 8899extern __inline __mmask16 8900__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8901_mm512_mask_cmpneq_epi32_mask (__mmask16 __M, __m512i __X, __m512i __Y) 8902{ 8903 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, 8904 (__v16si) __Y, 4, 8905 (__mmask16) __M); 8906} 8907 8908extern __inline __mmask16 8909__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8910_mm512_mask_cmpneq_epu32_mask (__mmask16 __M, __m512i __X, __m512i __Y) 8911{ 8912 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, 8913 (__v16si) __Y, 4, 8914 (__mmask16) __M); 8915} 8916 8917extern __inline __mmask16 8918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8919_mm512_cmpneq_epu32_mask (__m512i __X, __m512i __Y) 8920{ 8921 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, 8922 (__v16si) __Y, 4, 8923 (__mmask16) -1); 8924} 8925 8926extern __inline __mmask8 8927__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8928_mm512_mask_cmpneq_epi64_mask (__mmask16 __M, __m512i __X, __m512i __Y) 8929{ 8930 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, 8931 (__v8di) __Y, 4, 8932 (__mmask8) __M); 8933} 8934 8935extern __inline __mmask8 8936__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8937_mm512_cmpneq_epi64_mask (__m512i __X, __m512i __Y) 8938{ 8939 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, 8940 (__v8di) __Y, 4, 8941 (__mmask8) -1); 8942} 8943 8944extern __inline __mmask8 8945__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8946_mm512_mask_cmpneq_epu64_mask (__mmask8 __M, __m512i __X, __m512i __Y) 8947{ 8948 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, 8949 (__v8di) __Y, 4, 8950 (__mmask8) __M); 8951} 8952 8953extern __inline __mmask8 8954__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8955_mm512_cmpneq_epu64_mask (__m512i __X, __m512i __Y) 8956{ 8957 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, 8958 (__v8di) __Y, 4, 8959 (__mmask8) -1); 8960} 8961 8962#define _MM_CMPINT_EQ 0x0 8963#define _MM_CMPINT_LT 0x1 8964#define _MM_CMPINT_LE 0x2 8965#define _MM_CMPINT_UNUSED 0x3 8966#define _MM_CMPINT_NE 0x4 8967#define _MM_CMPINT_NLT 0x5 8968#define _MM_CMPINT_GE 0x5 8969#define _MM_CMPINT_NLE 0x6 8970#define _MM_CMPINT_GT 0x6 8971 8972#ifdef __OPTIMIZE__ 8973extern __inline __mmask8 8974__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8975_mm512_cmp_epi64_mask (__m512i __X, __m512i __Y, const int __P) 8976{ 8977 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, 8978 (__v8di) __Y, __P, 8979 (__mmask8) -1); 8980} 8981 8982extern __inline __mmask16 8983__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8984_mm512_cmp_epi32_mask (__m512i __X, __m512i __Y, const int __P) 8985{ 8986 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, 8987 (__v16si) __Y, __P, 8988 (__mmask16) -1); 8989} 8990 8991extern __inline __mmask8 8992__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 8993_mm512_cmp_epu64_mask (__m512i __X, __m512i __Y, const int __P) 8994{ 8995 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, 8996 (__v8di) __Y, __P, 8997 (__mmask8) -1); 8998} 8999 9000extern __inline __mmask16 9001__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9002_mm512_cmp_epu32_mask (__m512i __X, __m512i __Y, const int __P) 9003{ 9004 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, 9005 (__v16si) __Y, __P, 9006 (__mmask16) -1); 9007} 9008 9009extern __inline __mmask8 9010__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9011_mm512_cmp_round_pd_mask (__m512d __X, __m512d __Y, const int __P, 9012 const int __R) 9013{ 9014 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 9015 (__v8df) __Y, __P, 9016 (__mmask8) -1, __R); 9017} 9018 9019extern __inline __mmask16 9020__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9021_mm512_cmp_round_ps_mask (__m512 __X, __m512 __Y, const int __P, const int __R) 9022{ 9023 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 9024 (__v16sf) __Y, __P, 9025 (__mmask16) -1, __R); 9026} 9027 9028extern __inline __mmask8 9029__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9030_mm512_mask_cmp_epi64_mask (__mmask8 __U, __m512i __X, __m512i __Y, 9031 const int __P) 9032{ 9033 return (__mmask8) __builtin_ia32_cmpq512_mask ((__v8di) __X, 9034 (__v8di) __Y, __P, 9035 (__mmask8) __U); 9036} 9037 9038extern __inline __mmask16 9039__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9040_mm512_mask_cmp_epi32_mask (__mmask16 __U, __m512i __X, __m512i __Y, 9041 const int __P) 9042{ 9043 return (__mmask16) __builtin_ia32_cmpd512_mask ((__v16si) __X, 9044 (__v16si) __Y, __P, 9045 (__mmask16) __U); 9046} 9047 9048extern __inline __mmask8 9049__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9050_mm512_mask_cmp_epu64_mask (__mmask8 __U, __m512i __X, __m512i __Y, 9051 const int __P) 9052{ 9053 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __X, 9054 (__v8di) __Y, __P, 9055 (__mmask8) __U); 9056} 9057 9058extern __inline __mmask16 9059__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9060_mm512_mask_cmp_epu32_mask (__mmask16 __U, __m512i __X, __m512i __Y, 9061 const int __P) 9062{ 9063 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __X, 9064 (__v16si) __Y, __P, 9065 (__mmask16) __U); 9066} 9067 9068extern __inline __mmask8 9069__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9070_mm512_mask_cmp_round_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, 9071 const int __P, const int __R) 9072{ 9073 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 9074 (__v8df) __Y, __P, 9075 (__mmask8) __U, __R); 9076} 9077 9078extern __inline __mmask16 9079__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9080_mm512_mask_cmp_round_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, 9081 const int __P, const int __R) 9082{ 9083 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 9084 (__v16sf) __Y, __P, 9085 (__mmask16) __U, __R); 9086} 9087 9088extern __inline __mmask8 9089__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9090_mm_cmp_round_sd_mask (__m128d __X, __m128d __Y, const int __P, const int __R) 9091{ 9092 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X, 9093 (__v2df) __Y, __P, 9094 (__mmask8) -1, __R); 9095} 9096 9097extern __inline __mmask8 9098__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9099_mm_mask_cmp_round_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, 9100 const int __P, const int __R) 9101{ 9102 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X, 9103 (__v2df) __Y, __P, 9104 (__mmask8) __M, __R); 9105} 9106 9107extern __inline __mmask8 9108__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9109_mm_cmp_round_ss_mask (__m128 __X, __m128 __Y, const int __P, const int __R) 9110{ 9111 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X, 9112 (__v4sf) __Y, __P, 9113 (__mmask8) -1, __R); 9114} 9115 9116extern __inline __mmask8 9117__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9118_mm_mask_cmp_round_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, 9119 const int __P, const int __R) 9120{ 9121 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X, 9122 (__v4sf) __Y, __P, 9123 (__mmask8) __M, __R); 9124} 9125 9126#else 9127#define _mm512_cmp_epi64_mask(X, Y, P) \ 9128 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \ 9129 (__v8di)(__m512i)(Y), (int)(P),\ 9130 (__mmask8)-1)) 9131 9132#define _mm512_cmp_epi32_mask(X, Y, P) \ 9133 ((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \ 9134 (__v16si)(__m512i)(Y), (int)(P),\ 9135 (__mmask16)-1)) 9136 9137#define _mm512_cmp_epu64_mask(X, Y, P) \ 9138 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \ 9139 (__v8di)(__m512i)(Y), (int)(P),\ 9140 (__mmask8)-1)) 9141 9142#define _mm512_cmp_epu32_mask(X, Y, P) \ 9143 ((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \ 9144 (__v16si)(__m512i)(Y), (int)(P),\ 9145 (__mmask16)-1)) 9146 9147#define _mm512_cmp_round_pd_mask(X, Y, P, R) \ 9148 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \ 9149 (__v8df)(__m512d)(Y), (int)(P),\ 9150 (__mmask8)-1, R)) 9151 9152#define _mm512_cmp_round_ps_mask(X, Y, P, R) \ 9153 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \ 9154 (__v16sf)(__m512)(Y), (int)(P),\ 9155 (__mmask16)-1, R)) 9156 9157#define _mm512_mask_cmp_epi64_mask(M, X, Y, P) \ 9158 ((__mmask8) __builtin_ia32_cmpq512_mask ((__v8di)(__m512i)(X), \ 9159 (__v8di)(__m512i)(Y), (int)(P),\ 9160 (__mmask8)M)) 9161 9162#define _mm512_mask_cmp_epi32_mask(M, X, Y, P) \ 9163 ((__mmask8) __builtin_ia32_cmpd512_mask ((__v16si)(__m512i)(X), \ 9164 (__v16si)(__m512i)(Y), (int)(P),\ 9165 (__mmask16)M)) 9166 9167#define _mm512_mask_cmp_epu64_mask(M, X, Y, P) \ 9168 ((__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di)(__m512i)(X), \ 9169 (__v8di)(__m512i)(Y), (int)(P),\ 9170 (__mmask8)M)) 9171 9172#define _mm512_mask_cmp_epu32_mask(M, X, Y, P) \ 9173 ((__mmask8) __builtin_ia32_ucmpd512_mask ((__v16si)(__m512i)(X), \ 9174 (__v16si)(__m512i)(Y), (int)(P),\ 9175 (__mmask16)M)) 9176 9177#define _mm512_mask_cmp_round_pd_mask(M, X, Y, P, R) \ 9178 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \ 9179 (__v8df)(__m512d)(Y), (int)(P),\ 9180 (__mmask8)M, R)) 9181 9182#define _mm512_mask_cmp_round_ps_mask(M, X, Y, P, R) \ 9183 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \ 9184 (__v16sf)(__m512)(Y), (int)(P),\ 9185 (__mmask16)M, R)) 9186 9187#define _mm_cmp_round_sd_mask(X, Y, P, R) \ 9188 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \ 9189 (__v2df)(__m128d)(Y), (int)(P),\ 9190 (__mmask8)-1, R)) 9191 9192#define _mm_mask_cmp_round_sd_mask(M, X, Y, P, R) \ 9193 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \ 9194 (__v2df)(__m128d)(Y), (int)(P),\ 9195 (M), R)) 9196 9197#define _mm_cmp_round_ss_mask(X, Y, P, R) \ 9198 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \ 9199 (__v4sf)(__m128)(Y), (int)(P), \ 9200 (__mmask8)-1, R)) 9201 9202#define _mm_mask_cmp_round_ss_mask(M, X, Y, P, R) \ 9203 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \ 9204 (__v4sf)(__m128)(Y), (int)(P), \ 9205 (M), R)) 9206#endif 9207 9208#ifdef __OPTIMIZE__ 9209extern __inline __m512 9210__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9211_mm512_i32gather_ps (__m512i __index, float const *__addr, int __scale) 9212{ 9213 __m512 v1_old = _mm512_undefined_ps (); 9214 __mmask16 mask = 0xFFFF; 9215 9216 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old, 9217 __addr, 9218 (__v16si) __index, 9219 mask, __scale); 9220} 9221 9222extern __inline __m512 9223__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9224_mm512_mask_i32gather_ps (__m512 v1_old, __mmask16 __mask, 9225 __m512i __index, float const *__addr, int __scale) 9226{ 9227 return (__m512) __builtin_ia32_gathersiv16sf ((__v16sf) v1_old, 9228 __addr, 9229 (__v16si) __index, 9230 __mask, __scale); 9231} 9232 9233extern __inline __m512d 9234__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9235_mm512_i32gather_pd (__m256i __index, double const *__addr, int __scale) 9236{ 9237 __m512d v1_old = _mm512_undefined_pd (); 9238 __mmask8 mask = 0xFF; 9239 9240 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) v1_old, 9241 __addr, 9242 (__v8si) __index, mask, 9243 __scale); 9244} 9245 9246extern __inline __m512d 9247__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9248_mm512_mask_i32gather_pd (__m512d __v1_old, __mmask8 __mask, 9249 __m256i __index, double const *__addr, int __scale) 9250{ 9251 return (__m512d) __builtin_ia32_gathersiv8df ((__v8df) __v1_old, 9252 __addr, 9253 (__v8si) __index, 9254 __mask, __scale); 9255} 9256 9257extern __inline __m256 9258__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9259_mm512_i64gather_ps (__m512i __index, float const *__addr, int __scale) 9260{ 9261 __m256 v1_old = _mm256_undefined_ps (); 9262 __mmask8 mask = 0xFF; 9263 9264 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) v1_old, 9265 __addr, 9266 (__v8di) __index, mask, 9267 __scale); 9268} 9269 9270extern __inline __m256 9271__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9272_mm512_mask_i64gather_ps (__m256 __v1_old, __mmask8 __mask, 9273 __m512i __index, float const *__addr, int __scale) 9274{ 9275 return (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf) __v1_old, 9276 __addr, 9277 (__v8di) __index, 9278 __mask, __scale); 9279} 9280 9281extern __inline __m512d 9282__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9283_mm512_i64gather_pd (__m512i __index, double const *__addr, int __scale) 9284{ 9285 __m512d v1_old = _mm512_undefined_pd (); 9286 __mmask8 mask = 0xFF; 9287 9288 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) v1_old, 9289 __addr, 9290 (__v8di) __index, mask, 9291 __scale); 9292} 9293 9294extern __inline __m512d 9295__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9296_mm512_mask_i64gather_pd (__m512d __v1_old, __mmask8 __mask, 9297 __m512i __index, double const *__addr, int __scale) 9298{ 9299 return (__m512d) __builtin_ia32_gatherdiv8df ((__v8df) __v1_old, 9300 __addr, 9301 (__v8di) __index, 9302 __mask, __scale); 9303} 9304 9305extern __inline __m512i 9306__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9307_mm512_i32gather_epi32 (__m512i __index, int const *__addr, int __scale) 9308{ 9309 __m512i v1_old = _mm512_undefined_si512 (); 9310 __mmask16 mask = 0xFFFF; 9311 9312 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) v1_old, 9313 __addr, 9314 (__v16si) __index, 9315 mask, __scale); 9316} 9317 9318extern __inline __m512i 9319__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9320_mm512_mask_i32gather_epi32 (__m512i __v1_old, __mmask16 __mask, 9321 __m512i __index, int const *__addr, int __scale) 9322{ 9323 return (__m512i) __builtin_ia32_gathersiv16si ((__v16si) __v1_old, 9324 __addr, 9325 (__v16si) __index, 9326 __mask, __scale); 9327} 9328 9329extern __inline __m512i 9330__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9331_mm512_i32gather_epi64 (__m256i __index, long long const *__addr, int __scale) 9332{ 9333 __m512i v1_old = _mm512_undefined_si512 (); 9334 __mmask8 mask = 0xFF; 9335 9336 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) v1_old, 9337 __addr, 9338 (__v8si) __index, mask, 9339 __scale); 9340} 9341 9342extern __inline __m512i 9343__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9344_mm512_mask_i32gather_epi64 (__m512i __v1_old, __mmask8 __mask, 9345 __m256i __index, long long const *__addr, 9346 int __scale) 9347{ 9348 return (__m512i) __builtin_ia32_gathersiv8di ((__v8di) __v1_old, 9349 __addr, 9350 (__v8si) __index, 9351 __mask, __scale); 9352} 9353 9354extern __inline __m256i 9355__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9356_mm512_i64gather_epi32 (__m512i __index, int const *__addr, int __scale) 9357{ 9358 __m256i v1_old = _mm256_undefined_si256 (); 9359 __mmask8 mask = 0xFF; 9360 9361 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) v1_old, 9362 __addr, 9363 (__v8di) __index, 9364 mask, __scale); 9365} 9366 9367extern __inline __m256i 9368__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9369_mm512_mask_i64gather_epi32 (__m256i __v1_old, __mmask8 __mask, 9370 __m512i __index, int const *__addr, int __scale) 9371{ 9372 return (__m256i) __builtin_ia32_gatherdiv16si ((__v8si) __v1_old, 9373 __addr, 9374 (__v8di) __index, 9375 __mask, __scale); 9376} 9377 9378extern __inline __m512i 9379__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9380_mm512_i64gather_epi64 (__m512i __index, long long const *__addr, int __scale) 9381{ 9382 __m512i v1_old = _mm512_undefined_si512 (); 9383 __mmask8 mask = 0xFF; 9384 9385 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) v1_old, 9386 __addr, 9387 (__v8di) __index, mask, 9388 __scale); 9389} 9390 9391extern __inline __m512i 9392__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9393_mm512_mask_i64gather_epi64 (__m512i __v1_old, __mmask8 __mask, 9394 __m512i __index, long long const *__addr, 9395 int __scale) 9396{ 9397 return (__m512i) __builtin_ia32_gatherdiv8di ((__v8di) __v1_old, 9398 __addr, 9399 (__v8di) __index, 9400 __mask, __scale); 9401} 9402 9403extern __inline void 9404__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9405_mm512_i32scatter_ps (float *__addr, __m512i __index, __m512 __v1, int __scale) 9406{ 9407 __builtin_ia32_scattersiv16sf (__addr, (__mmask16) 0xFFFF, 9408 (__v16si) __index, (__v16sf) __v1, __scale); 9409} 9410 9411extern __inline void 9412__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9413_mm512_mask_i32scatter_ps (float *__addr, __mmask16 __mask, 9414 __m512i __index, __m512 __v1, int __scale) 9415{ 9416 __builtin_ia32_scattersiv16sf (__addr, __mask, (__v16si) __index, 9417 (__v16sf) __v1, __scale); 9418} 9419 9420extern __inline void 9421__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9422_mm512_i32scatter_pd (double *__addr, __m256i __index, __m512d __v1, 9423 int __scale) 9424{ 9425 __builtin_ia32_scattersiv8df (__addr, (__mmask8) 0xFF, 9426 (__v8si) __index, (__v8df) __v1, __scale); 9427} 9428 9429extern __inline void 9430__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9431_mm512_mask_i32scatter_pd (double *__addr, __mmask8 __mask, 9432 __m256i __index, __m512d __v1, int __scale) 9433{ 9434 __builtin_ia32_scattersiv8df (__addr, __mask, (__v8si) __index, 9435 (__v8df) __v1, __scale); 9436} 9437 9438extern __inline void 9439__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9440_mm512_i64scatter_ps (float *__addr, __m512i __index, __m256 __v1, int __scale) 9441{ 9442 __builtin_ia32_scatterdiv16sf (__addr, (__mmask8) 0xFF, 9443 (__v8di) __index, (__v8sf) __v1, __scale); 9444} 9445 9446extern __inline void 9447__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9448_mm512_mask_i64scatter_ps (float *__addr, __mmask8 __mask, 9449 __m512i __index, __m256 __v1, int __scale) 9450{ 9451 __builtin_ia32_scatterdiv16sf (__addr, __mask, (__v8di) __index, 9452 (__v8sf) __v1, __scale); 9453} 9454 9455extern __inline void 9456__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9457_mm512_i64scatter_pd (double *__addr, __m512i __index, __m512d __v1, 9458 int __scale) 9459{ 9460 __builtin_ia32_scatterdiv8df (__addr, (__mmask8) 0xFF, 9461 (__v8di) __index, (__v8df) __v1, __scale); 9462} 9463 9464extern __inline void 9465__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9466_mm512_mask_i64scatter_pd (double *__addr, __mmask8 __mask, 9467 __m512i __index, __m512d __v1, int __scale) 9468{ 9469 __builtin_ia32_scatterdiv8df (__addr, __mask, (__v8di) __index, 9470 (__v8df) __v1, __scale); 9471} 9472 9473extern __inline void 9474__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9475_mm512_i32scatter_epi32 (int *__addr, __m512i __index, 9476 __m512i __v1, int __scale) 9477{ 9478 __builtin_ia32_scattersiv16si (__addr, (__mmask16) 0xFFFF, 9479 (__v16si) __index, (__v16si) __v1, __scale); 9480} 9481 9482extern __inline void 9483__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9484_mm512_mask_i32scatter_epi32 (int *__addr, __mmask16 __mask, 9485 __m512i __index, __m512i __v1, int __scale) 9486{ 9487 __builtin_ia32_scattersiv16si (__addr, __mask, (__v16si) __index, 9488 (__v16si) __v1, __scale); 9489} 9490 9491extern __inline void 9492__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9493_mm512_i32scatter_epi64 (long long *__addr, __m256i __index, 9494 __m512i __v1, int __scale) 9495{ 9496 __builtin_ia32_scattersiv8di (__addr, (__mmask8) 0xFF, 9497 (__v8si) __index, (__v8di) __v1, __scale); 9498} 9499 9500extern __inline void 9501__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9502_mm512_mask_i32scatter_epi64 (long long *__addr, __mmask8 __mask, 9503 __m256i __index, __m512i __v1, int __scale) 9504{ 9505 __builtin_ia32_scattersiv8di (__addr, __mask, (__v8si) __index, 9506 (__v8di) __v1, __scale); 9507} 9508 9509extern __inline void 9510__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9511_mm512_i64scatter_epi32 (int *__addr, __m512i __index, 9512 __m256i __v1, int __scale) 9513{ 9514 __builtin_ia32_scatterdiv16si (__addr, (__mmask8) 0xFF, 9515 (__v8di) __index, (__v8si) __v1, __scale); 9516} 9517 9518extern __inline void 9519__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9520_mm512_mask_i64scatter_epi32 (int *__addr, __mmask8 __mask, 9521 __m512i __index, __m256i __v1, int __scale) 9522{ 9523 __builtin_ia32_scatterdiv16si (__addr, __mask, (__v8di) __index, 9524 (__v8si) __v1, __scale); 9525} 9526 9527extern __inline void 9528__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9529_mm512_i64scatter_epi64 (long long *__addr, __m512i __index, 9530 __m512i __v1, int __scale) 9531{ 9532 __builtin_ia32_scatterdiv8di (__addr, (__mmask8) 0xFF, 9533 (__v8di) __index, (__v8di) __v1, __scale); 9534} 9535 9536extern __inline void 9537__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9538_mm512_mask_i64scatter_epi64 (long long *__addr, __mmask8 __mask, 9539 __m512i __index, __m512i __v1, int __scale) 9540{ 9541 __builtin_ia32_scatterdiv8di (__addr, __mask, (__v8di) __index, 9542 (__v8di) __v1, __scale); 9543} 9544#else 9545#define _mm512_i32gather_ps(INDEX, ADDR, SCALE) \ 9546 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)_mm512_undefined_ps(),\ 9547 (float const *)ADDR, \ 9548 (__v16si)(__m512i)INDEX, \ 9549 (__mmask16)0xFFFF, (int)SCALE) 9550 9551#define _mm512_mask_i32gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \ 9552 (__m512) __builtin_ia32_gathersiv16sf ((__v16sf)(__m512)V1OLD, \ 9553 (float const *)ADDR, \ 9554 (__v16si)(__m512i)INDEX, \ 9555 (__mmask16)MASK, (int)SCALE) 9556 9557#define _mm512_i32gather_pd(INDEX, ADDR, SCALE) \ 9558 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)_mm512_undefined_pd(), \ 9559 (double const *)ADDR, \ 9560 (__v8si)(__m256i)INDEX, \ 9561 (__mmask8)0xFF, (int)SCALE) 9562 9563#define _mm512_mask_i32gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \ 9564 (__m512d) __builtin_ia32_gathersiv8df ((__v8df)(__m512d)V1OLD, \ 9565 (double const *)ADDR, \ 9566 (__v8si)(__m256i)INDEX, \ 9567 (__mmask8)MASK, (int)SCALE) 9568 9569#define _mm512_i64gather_ps(INDEX, ADDR, SCALE) \ 9570 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)_mm256_undefined_ps(), \ 9571 (float const *)ADDR, \ 9572 (__v8di)(__m512i)INDEX, \ 9573 (__mmask8)0xFF, (int)SCALE) 9574 9575#define _mm512_mask_i64gather_ps(V1OLD, MASK, INDEX, ADDR, SCALE) \ 9576 (__m256) __builtin_ia32_gatherdiv16sf ((__v8sf)(__m256)V1OLD, \ 9577 (float const *)ADDR, \ 9578 (__v8di)(__m512i)INDEX, \ 9579 (__mmask8)MASK, (int)SCALE) 9580 9581#define _mm512_i64gather_pd(INDEX, ADDR, SCALE) \ 9582 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)_mm512_undefined_pd(), \ 9583 (double const *)ADDR, \ 9584 (__v8di)(__m512i)INDEX, \ 9585 (__mmask8)0xFF, (int)SCALE) 9586 9587#define _mm512_mask_i64gather_pd(V1OLD, MASK, INDEX, ADDR, SCALE) \ 9588 (__m512d) __builtin_ia32_gatherdiv8df ((__v8df)(__m512d)V1OLD, \ 9589 (double const *)ADDR, \ 9590 (__v8di)(__m512i)INDEX, \ 9591 (__mmask8)MASK, (int)SCALE) 9592 9593#define _mm512_i32gather_epi32(INDEX, ADDR, SCALE) \ 9594 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)_mm512_undefined_si512 (), \ 9595 (int const *)ADDR, \ 9596 (__v16si)(__m512i)INDEX, \ 9597 (__mmask16)0xFFFF, (int)SCALE) 9598 9599#define _mm512_mask_i32gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \ 9600 (__m512i) __builtin_ia32_gathersiv16si ((__v16si)(__m512i)V1OLD, \ 9601 (int const *)ADDR, \ 9602 (__v16si)(__m512i)INDEX, \ 9603 (__mmask16)MASK, (int)SCALE) 9604 9605#define _mm512_i32gather_epi64(INDEX, ADDR, SCALE) \ 9606 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)_mm512_undefined_si512 (), \ 9607 (long long const *)ADDR, \ 9608 (__v8si)(__m256i)INDEX, \ 9609 (__mmask8)0xFF, (int)SCALE) 9610 9611#define _mm512_mask_i32gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \ 9612 (__m512i) __builtin_ia32_gathersiv8di ((__v8di)(__m512i)V1OLD, \ 9613 (long long const *)ADDR, \ 9614 (__v8si)(__m256i)INDEX, \ 9615 (__mmask8)MASK, (int)SCALE) 9616 9617#define _mm512_i64gather_epi32(INDEX, ADDR, SCALE) \ 9618 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)_mm256_undefined_si256(), \ 9619 (int const *)ADDR, \ 9620 (__v8di)(__m512i)INDEX, \ 9621 (__mmask8)0xFF, (int)SCALE) 9622 9623#define _mm512_mask_i64gather_epi32(V1OLD, MASK, INDEX, ADDR, SCALE) \ 9624 (__m256i) __builtin_ia32_gatherdiv16si ((__v8si)(__m256i)V1OLD, \ 9625 (int const *)ADDR, \ 9626 (__v8di)(__m512i)INDEX, \ 9627 (__mmask8)MASK, (int)SCALE) 9628 9629#define _mm512_i64gather_epi64(INDEX, ADDR, SCALE) \ 9630 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)_mm512_undefined_si512 (), \ 9631 (long long const *)ADDR, \ 9632 (__v8di)(__m512i)INDEX, \ 9633 (__mmask8)0xFF, (int)SCALE) 9634 9635#define _mm512_mask_i64gather_epi64(V1OLD, MASK, INDEX, ADDR, SCALE) \ 9636 (__m512i) __builtin_ia32_gatherdiv8di ((__v8di)(__m512i)V1OLD, \ 9637 (long long const *)ADDR, \ 9638 (__v8di)(__m512i)INDEX, \ 9639 (__mmask8)MASK, (int)SCALE) 9640 9641#define _mm512_i32scatter_ps(ADDR, INDEX, V1, SCALE) \ 9642 __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)0xFFFF, \ 9643 (__v16si)(__m512i)INDEX, \ 9644 (__v16sf)(__m512)V1, (int)SCALE) 9645 9646#define _mm512_mask_i32scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \ 9647 __builtin_ia32_scattersiv16sf ((float *)ADDR, (__mmask16)MASK, \ 9648 (__v16si)(__m512i)INDEX, \ 9649 (__v16sf)(__m512)V1, (int)SCALE) 9650 9651#define _mm512_i32scatter_pd(ADDR, INDEX, V1, SCALE) \ 9652 __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)0xFF, \ 9653 (__v8si)(__m256i)INDEX, \ 9654 (__v8df)(__m512d)V1, (int)SCALE) 9655 9656#define _mm512_mask_i32scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \ 9657 __builtin_ia32_scattersiv8df ((double *)ADDR, (__mmask8)MASK, \ 9658 (__v8si)(__m256i)INDEX, \ 9659 (__v8df)(__m512d)V1, (int)SCALE) 9660 9661#define _mm512_i64scatter_ps(ADDR, INDEX, V1, SCALE) \ 9662 __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask8)0xFF, \ 9663 (__v8di)(__m512i)INDEX, \ 9664 (__v8sf)(__m256)V1, (int)SCALE) 9665 9666#define _mm512_mask_i64scatter_ps(ADDR, MASK, INDEX, V1, SCALE) \ 9667 __builtin_ia32_scatterdiv16sf ((float *)ADDR, (__mmask16)MASK, \ 9668 (__v8di)(__m512i)INDEX, \ 9669 (__v8sf)(__m256)V1, (int)SCALE) 9670 9671#define _mm512_i64scatter_pd(ADDR, INDEX, V1, SCALE) \ 9672 __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)0xFF, \ 9673 (__v8di)(__m512i)INDEX, \ 9674 (__v8df)(__m512d)V1, (int)SCALE) 9675 9676#define _mm512_mask_i64scatter_pd(ADDR, MASK, INDEX, V1, SCALE) \ 9677 __builtin_ia32_scatterdiv8df ((double *)ADDR, (__mmask8)MASK, \ 9678 (__v8di)(__m512i)INDEX, \ 9679 (__v8df)(__m512d)V1, (int)SCALE) 9680 9681#define _mm512_i32scatter_epi32(ADDR, INDEX, V1, SCALE) \ 9682 __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)0xFFFF, \ 9683 (__v16si)(__m512i)INDEX, \ 9684 (__v16si)(__m512i)V1, (int)SCALE) 9685 9686#define _mm512_mask_i32scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \ 9687 __builtin_ia32_scattersiv16si ((int *)ADDR, (__mmask16)MASK, \ 9688 (__v16si)(__m512i)INDEX, \ 9689 (__v16si)(__m512i)V1, (int)SCALE) 9690 9691#define _mm512_i32scatter_epi64(ADDR, INDEX, V1, SCALE) \ 9692 __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)0xFF, \ 9693 (__v8si)(__m256i)INDEX, \ 9694 (__v8di)(__m512i)V1, (int)SCALE) 9695 9696#define _mm512_mask_i32scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \ 9697 __builtin_ia32_scattersiv8di ((long long *)ADDR, (__mmask8)MASK, \ 9698 (__v8si)(__m256i)INDEX, \ 9699 (__v8di)(__m512i)V1, (int)SCALE) 9700 9701#define _mm512_i64scatter_epi32(ADDR, INDEX, V1, SCALE) \ 9702 __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)0xFF, \ 9703 (__v8di)(__m512i)INDEX, \ 9704 (__v8si)(__m256i)V1, (int)SCALE) 9705 9706#define _mm512_mask_i64scatter_epi32(ADDR, MASK, INDEX, V1, SCALE) \ 9707 __builtin_ia32_scatterdiv16si ((int *)ADDR, (__mmask8)MASK, \ 9708 (__v8di)(__m512i)INDEX, \ 9709 (__v8si)(__m256i)V1, (int)SCALE) 9710 9711#define _mm512_i64scatter_epi64(ADDR, INDEX, V1, SCALE) \ 9712 __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)0xFF, \ 9713 (__v8di)(__m512i)INDEX, \ 9714 (__v8di)(__m512i)V1, (int)SCALE) 9715 9716#define _mm512_mask_i64scatter_epi64(ADDR, MASK, INDEX, V1, SCALE) \ 9717 __builtin_ia32_scatterdiv8di ((long long *)ADDR, (__mmask8)MASK, \ 9718 (__v8di)(__m512i)INDEX, \ 9719 (__v8di)(__m512i)V1, (int)SCALE) 9720#endif 9721 9722extern __inline __m512d 9723__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9724_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A) 9725{ 9726 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A, 9727 (__v8df) __W, 9728 (__mmask8) __U); 9729} 9730 9731extern __inline __m512d 9732__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9733_mm512_maskz_compress_pd (__mmask8 __U, __m512d __A) 9734{ 9735 return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A, 9736 (__v8df) 9737 _mm512_setzero_pd (), 9738 (__mmask8) __U); 9739} 9740 9741extern __inline void 9742__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9743_mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A) 9744{ 9745 __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A, 9746 (__mmask8) __U); 9747} 9748 9749extern __inline __m512 9750__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9751_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A) 9752{ 9753 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A, 9754 (__v16sf) __W, 9755 (__mmask16) __U); 9756} 9757 9758extern __inline __m512 9759__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9760_mm512_maskz_compress_ps (__mmask16 __U, __m512 __A) 9761{ 9762 return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A, 9763 (__v16sf) 9764 _mm512_setzero_ps (), 9765 (__mmask16) __U); 9766} 9767 9768extern __inline void 9769__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9770_mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A) 9771{ 9772 __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A, 9773 (__mmask16) __U); 9774} 9775 9776extern __inline __m512i 9777__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9778_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A) 9779{ 9780 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A, 9781 (__v8di) __W, 9782 (__mmask8) __U); 9783} 9784 9785extern __inline __m512i 9786__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9787_mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A) 9788{ 9789 return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A, 9790 (__v8di) 9791 _mm512_setzero_si512 (), 9792 (__mmask8) __U); 9793} 9794 9795extern __inline void 9796__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9797_mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A) 9798{ 9799 __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A, 9800 (__mmask8) __U); 9801} 9802 9803extern __inline __m512i 9804__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9805_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A) 9806{ 9807 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A, 9808 (__v16si) __W, 9809 (__mmask16) __U); 9810} 9811 9812extern __inline __m512i 9813__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9814_mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A) 9815{ 9816 return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A, 9817 (__v16si) 9818 _mm512_setzero_si512 (), 9819 (__mmask16) __U); 9820} 9821 9822extern __inline void 9823__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9824_mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A) 9825{ 9826 __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A, 9827 (__mmask16) __U); 9828} 9829 9830extern __inline __m512d 9831__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9832_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A) 9833{ 9834 return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A, 9835 (__v8df) __W, 9836 (__mmask8) __U); 9837} 9838 9839extern __inline __m512d 9840__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9841_mm512_maskz_expand_pd (__mmask8 __U, __m512d __A) 9842{ 9843 return (__m512d) __builtin_ia32_expanddf512_maskz ((__v8df) __A, 9844 (__v8df) 9845 _mm512_setzero_pd (), 9846 (__mmask8) __U); 9847} 9848 9849extern __inline __m512d 9850__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9851_mm512_mask_expandloadu_pd (__m512d __W, __mmask8 __U, void const *__P) 9852{ 9853 return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *) __P, 9854 (__v8df) __W, 9855 (__mmask8) __U); 9856} 9857 9858extern __inline __m512d 9859__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9860_mm512_maskz_expandloadu_pd (__mmask8 __U, void const *__P) 9861{ 9862 return (__m512d) __builtin_ia32_expandloaddf512_maskz ((const __v8df *) __P, 9863 (__v8df) 9864 _mm512_setzero_pd (), 9865 (__mmask8) __U); 9866} 9867 9868extern __inline __m512 9869__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9870_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A) 9871{ 9872 return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A, 9873 (__v16sf) __W, 9874 (__mmask16) __U); 9875} 9876 9877extern __inline __m512 9878__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9879_mm512_maskz_expand_ps (__mmask16 __U, __m512 __A) 9880{ 9881 return (__m512) __builtin_ia32_expandsf512_maskz ((__v16sf) __A, 9882 (__v16sf) 9883 _mm512_setzero_ps (), 9884 (__mmask16) __U); 9885} 9886 9887extern __inline __m512 9888__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9889_mm512_mask_expandloadu_ps (__m512 __W, __mmask16 __U, void const *__P) 9890{ 9891 return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *) __P, 9892 (__v16sf) __W, 9893 (__mmask16) __U); 9894} 9895 9896extern __inline __m512 9897__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9898_mm512_maskz_expandloadu_ps (__mmask16 __U, void const *__P) 9899{ 9900 return (__m512) __builtin_ia32_expandloadsf512_maskz ((const __v16sf *) __P, 9901 (__v16sf) 9902 _mm512_setzero_ps (), 9903 (__mmask16) __U); 9904} 9905 9906extern __inline __m512i 9907__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9908_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A) 9909{ 9910 return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A, 9911 (__v8di) __W, 9912 (__mmask8) __U); 9913} 9914 9915extern __inline __m512i 9916__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9917_mm512_maskz_expand_epi64 (__mmask8 __U, __m512i __A) 9918{ 9919 return (__m512i) __builtin_ia32_expanddi512_maskz ((__v8di) __A, 9920 (__v8di) 9921 _mm512_setzero_si512 (), 9922 (__mmask8) __U); 9923} 9924 9925extern __inline __m512i 9926__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9927_mm512_mask_expandloadu_epi64 (__m512i __W, __mmask8 __U, void const *__P) 9928{ 9929 return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *) __P, 9930 (__v8di) __W, 9931 (__mmask8) __U); 9932} 9933 9934extern __inline __m512i 9935__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9936_mm512_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) 9937{ 9938 return (__m512i) 9939 __builtin_ia32_expandloaddi512_maskz ((const __v8di *) __P, 9940 (__v8di) 9941 _mm512_setzero_si512 (), 9942 (__mmask8) __U); 9943} 9944 9945extern __inline __m512i 9946__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9947_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A) 9948{ 9949 return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A, 9950 (__v16si) __W, 9951 (__mmask16) __U); 9952} 9953 9954extern __inline __m512i 9955__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9956_mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A) 9957{ 9958 return (__m512i) __builtin_ia32_expandsi512_maskz ((__v16si) __A, 9959 (__v16si) 9960 _mm512_setzero_si512 (), 9961 (__mmask16) __U); 9962} 9963 9964extern __inline __m512i 9965__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9966_mm512_mask_expandloadu_epi32 (__m512i __W, __mmask16 __U, void const *__P) 9967{ 9968 return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *) __P, 9969 (__v16si) __W, 9970 (__mmask16) __U); 9971} 9972 9973extern __inline __m512i 9974__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9975_mm512_maskz_expandloadu_epi32 (__mmask16 __U, void const *__P) 9976{ 9977 return (__m512i) __builtin_ia32_expandloadsi512_maskz ((const __v16si *) __P, 9978 (__v16si) 9979 _mm512_setzero_si512 9980 (), (__mmask16) __U); 9981} 9982 9983/* Mask arithmetic operations */ 9984extern __inline __mmask16 9985__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9986_mm512_kand (__mmask16 __A, __mmask16 __B) 9987{ 9988 return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B); 9989} 9990 9991extern __inline __mmask16 9992__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 9993_mm512_kandn (__mmask16 __A, __mmask16 __B) 9994{ 9995 return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B); 9996} 9997 9998extern __inline __mmask16 9999__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10000_mm512_kor (__mmask16 __A, __mmask16 __B) 10001{ 10002 return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B); 10003} 10004 10005extern __inline int 10006__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10007_mm512_kortestz (__mmask16 __A, __mmask16 __B) 10008{ 10009 return (__mmask16) __builtin_ia32_kortestzhi ((__mmask16) __A, 10010 (__mmask16) __B); 10011} 10012 10013extern __inline int 10014__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10015_mm512_kortestc (__mmask16 __A, __mmask16 __B) 10016{ 10017 return (__mmask16) __builtin_ia32_kortestchi ((__mmask16) __A, 10018 (__mmask16) __B); 10019} 10020 10021extern __inline __mmask16 10022__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10023_mm512_kxnor (__mmask16 __A, __mmask16 __B) 10024{ 10025 return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B); 10026} 10027 10028extern __inline __mmask16 10029__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10030_mm512_kxor (__mmask16 __A, __mmask16 __B) 10031{ 10032 return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B); 10033} 10034 10035extern __inline __mmask16 10036__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10037_mm512_knot (__mmask16 __A) 10038{ 10039 return (__mmask16) __builtin_ia32_knothi ((__mmask16) __A); 10040} 10041 10042extern __inline __mmask16 10043__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10044_mm512_kunpackb (__mmask16 __A, __mmask16 __B) 10045{ 10046 return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B); 10047} 10048 10049#ifdef __OPTIMIZE__ 10050extern __inline __m512i 10051__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10052_mm512_maskz_inserti32x4 (__mmask16 __B, __m512i __C, __m128i __D, 10053 const int __imm) 10054{ 10055 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C, 10056 (__v4si) __D, 10057 __imm, 10058 (__v16si) 10059 _mm512_setzero_si512 (), 10060 __B); 10061} 10062 10063extern __inline __m512 10064__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10065_mm512_maskz_insertf32x4 (__mmask16 __B, __m512 __C, __m128 __D, 10066 const int __imm) 10067{ 10068 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C, 10069 (__v4sf) __D, 10070 __imm, 10071 (__v16sf) 10072 _mm512_setzero_ps (), __B); 10073} 10074 10075extern __inline __m512i 10076__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10077_mm512_mask_inserti32x4 (__m512i __A, __mmask16 __B, __m512i __C, 10078 __m128i __D, const int __imm) 10079{ 10080 return (__m512i) __builtin_ia32_inserti32x4_mask ((__v16si) __C, 10081 (__v4si) __D, 10082 __imm, 10083 (__v16si) __A, 10084 __B); 10085} 10086 10087extern __inline __m512 10088__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10089_mm512_mask_insertf32x4 (__m512 __A, __mmask16 __B, __m512 __C, 10090 __m128 __D, const int __imm) 10091{ 10092 return (__m512) __builtin_ia32_insertf32x4_mask ((__v16sf) __C, 10093 (__v4sf) __D, 10094 __imm, 10095 (__v16sf) __A, __B); 10096} 10097#else 10098#define _mm512_maskz_insertf32x4(A, X, Y, C) \ 10099 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \ 10100 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)_mm512_setzero_ps(), \ 10101 (__mmask8)(A))) 10102 10103#define _mm512_maskz_inserti32x4(A, X, Y, C) \ 10104 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \ 10105 (__v4si)(__m128i) (Y), (int) (C), (__v16si)_mm512_setzero_si512 (), \ 10106 (__mmask8)(A))) 10107 10108#define _mm512_mask_insertf32x4(A, B, X, Y, C) \ 10109 ((__m512) __builtin_ia32_insertf32x4_mask ((__v16sf)(__m512) (X), \ 10110 (__v4sf)(__m128) (Y), (int) (C), (__v16sf)(__m512) (A), \ 10111 (__mmask8)(B))) 10112 10113#define _mm512_mask_inserti32x4(A, B, X, Y, C) \ 10114 ((__m512i) __builtin_ia32_inserti32x4_mask ((__v16si)(__m512i) (X), \ 10115 (__v4si)(__m128i) (Y), (int) (C), (__v16si)(__m512i) (A), \ 10116 (__mmask8)(B))) 10117#endif 10118 10119extern __inline __m512i 10120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10121_mm512_max_epi64 (__m512i __A, __m512i __B) 10122{ 10123 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A, 10124 (__v8di) __B, 10125 (__v8di) 10126 _mm512_undefined_si512 (), 10127 (__mmask8) -1); 10128} 10129 10130extern __inline __m512i 10131__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10132_mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B) 10133{ 10134 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A, 10135 (__v8di) __B, 10136 (__v8di) 10137 _mm512_setzero_si512 (), 10138 __M); 10139} 10140 10141extern __inline __m512i 10142__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10143_mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 10144{ 10145 return (__m512i) __builtin_ia32_pmaxsq512_mask ((__v8di) __A, 10146 (__v8di) __B, 10147 (__v8di) __W, __M); 10148} 10149 10150extern __inline __m512i 10151__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10152_mm512_min_epi64 (__m512i __A, __m512i __B) 10153{ 10154 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A, 10155 (__v8di) __B, 10156 (__v8di) 10157 _mm512_undefined_si512 (), 10158 (__mmask8) -1); 10159} 10160 10161extern __inline __m512i 10162__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10163_mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 10164{ 10165 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A, 10166 (__v8di) __B, 10167 (__v8di) __W, __M); 10168} 10169 10170extern __inline __m512i 10171__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10172_mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B) 10173{ 10174 return (__m512i) __builtin_ia32_pminsq512_mask ((__v8di) __A, 10175 (__v8di) __B, 10176 (__v8di) 10177 _mm512_setzero_si512 (), 10178 __M); 10179} 10180 10181extern __inline __m512i 10182__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10183_mm512_max_epu64 (__m512i __A, __m512i __B) 10184{ 10185 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A, 10186 (__v8di) __B, 10187 (__v8di) 10188 _mm512_undefined_si512 (), 10189 (__mmask8) -1); 10190} 10191 10192extern __inline __m512i 10193__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10194_mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B) 10195{ 10196 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A, 10197 (__v8di) __B, 10198 (__v8di) 10199 _mm512_setzero_si512 (), 10200 __M); 10201} 10202 10203extern __inline __m512i 10204__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10205_mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 10206{ 10207 return (__m512i) __builtin_ia32_pmaxuq512_mask ((__v8di) __A, 10208 (__v8di) __B, 10209 (__v8di) __W, __M); 10210} 10211 10212extern __inline __m512i 10213__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10214_mm512_min_epu64 (__m512i __A, __m512i __B) 10215{ 10216 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A, 10217 (__v8di) __B, 10218 (__v8di) 10219 _mm512_undefined_si512 (), 10220 (__mmask8) -1); 10221} 10222 10223extern __inline __m512i 10224__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10225_mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) 10226{ 10227 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A, 10228 (__v8di) __B, 10229 (__v8di) __W, __M); 10230} 10231 10232extern __inline __m512i 10233__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10234_mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B) 10235{ 10236 return (__m512i) __builtin_ia32_pminuq512_mask ((__v8di) __A, 10237 (__v8di) __B, 10238 (__v8di) 10239 _mm512_setzero_si512 (), 10240 __M); 10241} 10242 10243extern __inline __m512i 10244__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10245_mm512_max_epi32 (__m512i __A, __m512i __B) 10246{ 10247 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A, 10248 (__v16si) __B, 10249 (__v16si) 10250 _mm512_undefined_si512 (), 10251 (__mmask16) -1); 10252} 10253 10254extern __inline __m512i 10255__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10256_mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B) 10257{ 10258 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A, 10259 (__v16si) __B, 10260 (__v16si) 10261 _mm512_setzero_si512 (), 10262 __M); 10263} 10264 10265extern __inline __m512i 10266__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10267_mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 10268{ 10269 return (__m512i) __builtin_ia32_pmaxsd512_mask ((__v16si) __A, 10270 (__v16si) __B, 10271 (__v16si) __W, __M); 10272} 10273 10274extern __inline __m512i 10275__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10276_mm512_min_epi32 (__m512i __A, __m512i __B) 10277{ 10278 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A, 10279 (__v16si) __B, 10280 (__v16si) 10281 _mm512_undefined_si512 (), 10282 (__mmask16) -1); 10283} 10284 10285extern __inline __m512i 10286__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10287_mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B) 10288{ 10289 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A, 10290 (__v16si) __B, 10291 (__v16si) 10292 _mm512_setzero_si512 (), 10293 __M); 10294} 10295 10296extern __inline __m512i 10297__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10298_mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 10299{ 10300 return (__m512i) __builtin_ia32_pminsd512_mask ((__v16si) __A, 10301 (__v16si) __B, 10302 (__v16si) __W, __M); 10303} 10304 10305extern __inline __m512i 10306__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10307_mm512_max_epu32 (__m512i __A, __m512i __B) 10308{ 10309 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A, 10310 (__v16si) __B, 10311 (__v16si) 10312 _mm512_undefined_si512 (), 10313 (__mmask16) -1); 10314} 10315 10316extern __inline __m512i 10317__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10318_mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B) 10319{ 10320 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A, 10321 (__v16si) __B, 10322 (__v16si) 10323 _mm512_setzero_si512 (), 10324 __M); 10325} 10326 10327extern __inline __m512i 10328__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10329_mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 10330{ 10331 return (__m512i) __builtin_ia32_pmaxud512_mask ((__v16si) __A, 10332 (__v16si) __B, 10333 (__v16si) __W, __M); 10334} 10335 10336extern __inline __m512i 10337__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10338_mm512_min_epu32 (__m512i __A, __m512i __B) 10339{ 10340 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A, 10341 (__v16si) __B, 10342 (__v16si) 10343 _mm512_undefined_si512 (), 10344 (__mmask16) -1); 10345} 10346 10347extern __inline __m512i 10348__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10349_mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B) 10350{ 10351 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A, 10352 (__v16si) __B, 10353 (__v16si) 10354 _mm512_setzero_si512 (), 10355 __M); 10356} 10357 10358extern __inline __m512i 10359__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10360_mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) 10361{ 10362 return (__m512i) __builtin_ia32_pminud512_mask ((__v16si) __A, 10363 (__v16si) __B, 10364 (__v16si) __W, __M); 10365} 10366 10367extern __inline __m512 10368__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10369_mm512_unpacklo_ps (__m512 __A, __m512 __B) 10370{ 10371 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A, 10372 (__v16sf) __B, 10373 (__v16sf) 10374 _mm512_undefined_ps (), 10375 (__mmask16) -1); 10376} 10377 10378extern __inline __m512 10379__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10380_mm512_mask_unpacklo_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 10381{ 10382 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A, 10383 (__v16sf) __B, 10384 (__v16sf) __W, 10385 (__mmask16) __U); 10386} 10387 10388extern __inline __m512 10389__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10390_mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B) 10391{ 10392 return (__m512) __builtin_ia32_unpcklps512_mask ((__v16sf) __A, 10393 (__v16sf) __B, 10394 (__v16sf) 10395 _mm512_setzero_ps (), 10396 (__mmask16) __U); 10397} 10398 10399#ifdef __OPTIMIZE__ 10400extern __inline __m128d 10401__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10402_mm_max_round_sd (__m128d __A, __m128d __B, const int __R) 10403{ 10404 return (__m128d) __builtin_ia32_maxsd_round ((__v2df) __A, 10405 (__v2df) __B, 10406 __R); 10407} 10408 10409extern __inline __m128 10410__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10411_mm_max_round_ss (__m128 __A, __m128 __B, const int __R) 10412{ 10413 return (__m128) __builtin_ia32_maxss_round ((__v4sf) __A, 10414 (__v4sf) __B, 10415 __R); 10416} 10417 10418extern __inline __m128d 10419__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10420_mm_min_round_sd (__m128d __A, __m128d __B, const int __R) 10421{ 10422 return (__m128d) __builtin_ia32_minsd_round ((__v2df) __A, 10423 (__v2df) __B, 10424 __R); 10425} 10426 10427extern __inline __m128 10428__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10429_mm_min_round_ss (__m128 __A, __m128 __B, const int __R) 10430{ 10431 return (__m128) __builtin_ia32_minss_round ((__v4sf) __A, 10432 (__v4sf) __B, 10433 __R); 10434} 10435 10436#else 10437#define _mm_max_round_sd(A, B, C) \ 10438 (__m128d)__builtin_ia32_addsd_round(A, B, C) 10439 10440#define _mm_max_round_ss(A, B, C) \ 10441 (__m128)__builtin_ia32_addss_round(A, B, C) 10442 10443#define _mm_min_round_sd(A, B, C) \ 10444 (__m128d)__builtin_ia32_subsd_round(A, B, C) 10445 10446#define _mm_min_round_ss(A, B, C) \ 10447 (__m128)__builtin_ia32_subss_round(A, B, C) 10448#endif 10449 10450extern __inline __m512d 10451__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10452_mm512_mask_blend_pd (__mmask8 __U, __m512d __A, __m512d __W) 10453{ 10454 return (__m512d) __builtin_ia32_blendmpd_512_mask ((__v8df) __A, 10455 (__v8df) __W, 10456 (__mmask8) __U); 10457} 10458 10459extern __inline __m512 10460__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10461_mm512_mask_blend_ps (__mmask16 __U, __m512 __A, __m512 __W) 10462{ 10463 return (__m512) __builtin_ia32_blendmps_512_mask ((__v16sf) __A, 10464 (__v16sf) __W, 10465 (__mmask16) __U); 10466} 10467 10468extern __inline __m512i 10469__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10470_mm512_mask_blend_epi64 (__mmask8 __U, __m512i __A, __m512i __W) 10471{ 10472 return (__m512i) __builtin_ia32_blendmq_512_mask ((__v8di) __A, 10473 (__v8di) __W, 10474 (__mmask8) __U); 10475} 10476 10477extern __inline __m512i 10478__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10479_mm512_mask_blend_epi32 (__mmask16 __U, __m512i __A, __m512i __W) 10480{ 10481 return (__m512i) __builtin_ia32_blendmd_512_mask ((__v16si) __A, 10482 (__v16si) __W, 10483 (__mmask16) __U); 10484} 10485 10486#ifdef __OPTIMIZE__ 10487extern __inline __m128d 10488__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10489_mm_fmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R) 10490{ 10491 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W, 10492 (__v2df) __A, 10493 (__v2df) __B, 10494 __R); 10495} 10496 10497extern __inline __m128 10498__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10499_mm_fmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R) 10500{ 10501 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W, 10502 (__v4sf) __A, 10503 (__v4sf) __B, 10504 __R); 10505} 10506 10507extern __inline __m128d 10508__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10509_mm_fmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R) 10510{ 10511 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W, 10512 (__v2df) __A, 10513 -(__v2df) __B, 10514 __R); 10515} 10516 10517extern __inline __m128 10518__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10519_mm_fmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R) 10520{ 10521 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W, 10522 (__v4sf) __A, 10523 -(__v4sf) __B, 10524 __R); 10525} 10526 10527extern __inline __m128d 10528__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10529_mm_fnmadd_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R) 10530{ 10531 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W, 10532 -(__v2df) __A, 10533 (__v2df) __B, 10534 __R); 10535} 10536 10537extern __inline __m128 10538__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10539_mm_fnmadd_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R) 10540{ 10541 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W, 10542 -(__v4sf) __A, 10543 (__v4sf) __B, 10544 __R); 10545} 10546 10547extern __inline __m128d 10548__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10549_mm_fnmsub_round_sd (__m128d __W, __m128d __A, __m128d __B, const int __R) 10550{ 10551 return (__m128d) __builtin_ia32_vfmaddsd3_round ((__v2df) __W, 10552 -(__v2df) __A, 10553 -(__v2df) __B, 10554 __R); 10555} 10556 10557extern __inline __m128 10558__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10559_mm_fnmsub_round_ss (__m128 __W, __m128 __A, __m128 __B, const int __R) 10560{ 10561 return (__m128) __builtin_ia32_vfmaddss3_round ((__v4sf) __W, 10562 -(__v4sf) __A, 10563 -(__v4sf) __B, 10564 __R); 10565} 10566#else 10567#define _mm_fmadd_round_sd(A, B, C, R) \ 10568 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, C, R) 10569 10570#define _mm_fmadd_round_ss(A, B, C, R) \ 10571 (__m128)__builtin_ia32_vfmaddss3_round(A, B, C, R) 10572 10573#define _mm_fmsub_round_sd(A, B, C, R) \ 10574 (__m128d)__builtin_ia32_vfmaddsd3_round(A, B, -(C), R) 10575 10576#define _mm_fmsub_round_ss(A, B, C, R) \ 10577 (__m128)__builtin_ia32_vfmaddss3_round(A, B, -(C), R) 10578 10579#define _mm_fnmadd_round_sd(A, B, C, R) \ 10580 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), C, R) 10581 10582#define _mm_fnmadd_round_ss(A, B, C, R) \ 10583 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), C, R) 10584 10585#define _mm_fnmsub_round_sd(A, B, C, R) \ 10586 (__m128d)__builtin_ia32_vfmaddsd3_round(A, -(B), -(C), R) 10587 10588#define _mm_fnmsub_round_ss(A, B, C, R) \ 10589 (__m128)__builtin_ia32_vfmaddss3_round(A, -(B), -(C), R) 10590#endif 10591 10592#ifdef __OPTIMIZE__ 10593extern __inline int 10594__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10595_mm_comi_round_ss (__m128 __A, __m128 __B, const int __P, const int __R) 10596{ 10597 return __builtin_ia32_vcomiss ((__v4sf) __A, (__v4sf) __B, __P, __R); 10598} 10599 10600extern __inline int 10601__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10602_mm_comi_round_sd (__m128d __A, __m128d __B, const int __P, const int __R) 10603{ 10604 return __builtin_ia32_vcomisd ((__v2df) __A, (__v2df) __B, __P, __R); 10605} 10606#else 10607#define _mm_comi_round_ss(A, B, C, D)\ 10608__builtin_ia32_vcomiss(A, B, C, D) 10609#define _mm_comi_round_sd(A, B, C, D)\ 10610__builtin_ia32_vcomisd(A, B, C, D) 10611#endif 10612 10613extern __inline __m512d 10614__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10615_mm512_sqrt_pd (__m512d __A) 10616{ 10617 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, 10618 (__v8df) 10619 _mm512_undefined_pd (), 10620 (__mmask8) -1, 10621 _MM_FROUND_CUR_DIRECTION); 10622} 10623 10624extern __inline __m512d 10625__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10626_mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A) 10627{ 10628 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, 10629 (__v8df) __W, 10630 (__mmask8) __U, 10631 _MM_FROUND_CUR_DIRECTION); 10632} 10633 10634extern __inline __m512d 10635__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10636_mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A) 10637{ 10638 return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, 10639 (__v8df) 10640 _mm512_setzero_pd (), 10641 (__mmask8) __U, 10642 _MM_FROUND_CUR_DIRECTION); 10643} 10644 10645extern __inline __m512 10646__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10647_mm512_sqrt_ps (__m512 __A) 10648{ 10649 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, 10650 (__v16sf) 10651 _mm512_undefined_ps (), 10652 (__mmask16) -1, 10653 _MM_FROUND_CUR_DIRECTION); 10654} 10655 10656extern __inline __m512 10657__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10658_mm512_mask_sqrt_ps (__m512 __W, __mmask16 __U, __m512 __A) 10659{ 10660 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, 10661 (__v16sf) __W, 10662 (__mmask16) __U, 10663 _MM_FROUND_CUR_DIRECTION); 10664} 10665 10666extern __inline __m512 10667__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10668_mm512_maskz_sqrt_ps (__mmask16 __U, __m512 __A) 10669{ 10670 return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, 10671 (__v16sf) 10672 _mm512_setzero_ps (), 10673 (__mmask16) __U, 10674 _MM_FROUND_CUR_DIRECTION); 10675} 10676 10677extern __inline __m512d 10678__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10679_mm512_add_pd (__m512d __A, __m512d __B) 10680{ 10681 return (__m512d) ((__v8df)__A + (__v8df)__B); 10682} 10683 10684extern __inline __m512d 10685__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10686_mm512_mask_add_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 10687{ 10688 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, 10689 (__v8df) __B, 10690 (__v8df) __W, 10691 (__mmask8) __U, 10692 _MM_FROUND_CUR_DIRECTION); 10693} 10694 10695extern __inline __m512d 10696__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10697_mm512_maskz_add_pd (__mmask8 __U, __m512d __A, __m512d __B) 10698{ 10699 return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, 10700 (__v8df) __B, 10701 (__v8df) 10702 _mm512_setzero_pd (), 10703 (__mmask8) __U, 10704 _MM_FROUND_CUR_DIRECTION); 10705} 10706 10707extern __inline __m512 10708__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10709_mm512_add_ps (__m512 __A, __m512 __B) 10710{ 10711 return (__m512) ((__v16sf)__A + (__v16sf)__B); 10712} 10713 10714extern __inline __m512 10715__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10716_mm512_mask_add_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 10717{ 10718 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, 10719 (__v16sf) __B, 10720 (__v16sf) __W, 10721 (__mmask16) __U, 10722 _MM_FROUND_CUR_DIRECTION); 10723} 10724 10725extern __inline __m512 10726__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10727_mm512_maskz_add_ps (__mmask16 __U, __m512 __A, __m512 __B) 10728{ 10729 return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, 10730 (__v16sf) __B, 10731 (__v16sf) 10732 _mm512_setzero_ps (), 10733 (__mmask16) __U, 10734 _MM_FROUND_CUR_DIRECTION); 10735} 10736 10737extern __inline __m512d 10738__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10739_mm512_sub_pd (__m512d __A, __m512d __B) 10740{ 10741 return (__m512d) ((__v8df)__A - (__v8df)__B); 10742} 10743 10744extern __inline __m512d 10745__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10746_mm512_mask_sub_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 10747{ 10748 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, 10749 (__v8df) __B, 10750 (__v8df) __W, 10751 (__mmask8) __U, 10752 _MM_FROUND_CUR_DIRECTION); 10753} 10754 10755extern __inline __m512d 10756__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10757_mm512_maskz_sub_pd (__mmask8 __U, __m512d __A, __m512d __B) 10758{ 10759 return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, 10760 (__v8df) __B, 10761 (__v8df) 10762 _mm512_setzero_pd (), 10763 (__mmask8) __U, 10764 _MM_FROUND_CUR_DIRECTION); 10765} 10766 10767extern __inline __m512 10768__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10769_mm512_sub_ps (__m512 __A, __m512 __B) 10770{ 10771 return (__m512) ((__v16sf)__A - (__v16sf)__B); 10772} 10773 10774extern __inline __m512 10775__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10776_mm512_mask_sub_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 10777{ 10778 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, 10779 (__v16sf) __B, 10780 (__v16sf) __W, 10781 (__mmask16) __U, 10782 _MM_FROUND_CUR_DIRECTION); 10783} 10784 10785extern __inline __m512 10786__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10787_mm512_maskz_sub_ps (__mmask16 __U, __m512 __A, __m512 __B) 10788{ 10789 return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, 10790 (__v16sf) __B, 10791 (__v16sf) 10792 _mm512_setzero_ps (), 10793 (__mmask16) __U, 10794 _MM_FROUND_CUR_DIRECTION); 10795} 10796 10797extern __inline __m512d 10798__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10799_mm512_mul_pd (__m512d __A, __m512d __B) 10800{ 10801 return (__m512d) ((__v8df)__A * (__v8df)__B); 10802} 10803 10804extern __inline __m512d 10805__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10806_mm512_mask_mul_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 10807{ 10808 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, 10809 (__v8df) __B, 10810 (__v8df) __W, 10811 (__mmask8) __U, 10812 _MM_FROUND_CUR_DIRECTION); 10813} 10814 10815extern __inline __m512d 10816__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10817_mm512_maskz_mul_pd (__mmask8 __U, __m512d __A, __m512d __B) 10818{ 10819 return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, 10820 (__v8df) __B, 10821 (__v8df) 10822 _mm512_setzero_pd (), 10823 (__mmask8) __U, 10824 _MM_FROUND_CUR_DIRECTION); 10825} 10826 10827extern __inline __m512 10828__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10829_mm512_mul_ps (__m512 __A, __m512 __B) 10830{ 10831 return (__m512) ((__v16sf)__A * (__v16sf)__B); 10832} 10833 10834extern __inline __m512 10835__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10836_mm512_mask_mul_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 10837{ 10838 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, 10839 (__v16sf) __B, 10840 (__v16sf) __W, 10841 (__mmask16) __U, 10842 _MM_FROUND_CUR_DIRECTION); 10843} 10844 10845extern __inline __m512 10846__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10847_mm512_maskz_mul_ps (__mmask16 __U, __m512 __A, __m512 __B) 10848{ 10849 return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, 10850 (__v16sf) __B, 10851 (__v16sf) 10852 _mm512_setzero_ps (), 10853 (__mmask16) __U, 10854 _MM_FROUND_CUR_DIRECTION); 10855} 10856 10857extern __inline __m512d 10858__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10859_mm512_div_pd (__m512d __M, __m512d __V) 10860{ 10861 return (__m512d) ((__v8df)__M / (__v8df)__V); 10862} 10863 10864extern __inline __m512d 10865__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10866_mm512_mask_div_pd (__m512d __W, __mmask8 __U, __m512d __M, __m512d __V) 10867{ 10868 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M, 10869 (__v8df) __V, 10870 (__v8df) __W, 10871 (__mmask8) __U, 10872 _MM_FROUND_CUR_DIRECTION); 10873} 10874 10875extern __inline __m512d 10876__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10877_mm512_maskz_div_pd (__mmask8 __U, __m512d __M, __m512d __V) 10878{ 10879 return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M, 10880 (__v8df) __V, 10881 (__v8df) 10882 _mm512_setzero_pd (), 10883 (__mmask8) __U, 10884 _MM_FROUND_CUR_DIRECTION); 10885} 10886 10887extern __inline __m512 10888__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10889_mm512_div_ps (__m512 __A, __m512 __B) 10890{ 10891 return (__m512) ((__v16sf)__A / (__v16sf)__B); 10892} 10893 10894extern __inline __m512 10895__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10896_mm512_mask_div_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 10897{ 10898 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, 10899 (__v16sf) __B, 10900 (__v16sf) __W, 10901 (__mmask16) __U, 10902 _MM_FROUND_CUR_DIRECTION); 10903} 10904 10905extern __inline __m512 10906__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10907_mm512_maskz_div_ps (__mmask16 __U, __m512 __A, __m512 __B) 10908{ 10909 return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, 10910 (__v16sf) __B, 10911 (__v16sf) 10912 _mm512_setzero_ps (), 10913 (__mmask16) __U, 10914 _MM_FROUND_CUR_DIRECTION); 10915} 10916 10917extern __inline __m512d 10918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10919_mm512_max_pd (__m512d __A, __m512d __B) 10920{ 10921 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, 10922 (__v8df) __B, 10923 (__v8df) 10924 _mm512_undefined_pd (), 10925 (__mmask8) -1, 10926 _MM_FROUND_CUR_DIRECTION); 10927} 10928 10929extern __inline __m512d 10930__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10931_mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 10932{ 10933 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, 10934 (__v8df) __B, 10935 (__v8df) __W, 10936 (__mmask8) __U, 10937 _MM_FROUND_CUR_DIRECTION); 10938} 10939 10940extern __inline __m512d 10941__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10942_mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B) 10943{ 10944 return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, 10945 (__v8df) __B, 10946 (__v8df) 10947 _mm512_setzero_pd (), 10948 (__mmask8) __U, 10949 _MM_FROUND_CUR_DIRECTION); 10950} 10951 10952extern __inline __m512 10953__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10954_mm512_max_ps (__m512 __A, __m512 __B) 10955{ 10956 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, 10957 (__v16sf) __B, 10958 (__v16sf) 10959 _mm512_undefined_ps (), 10960 (__mmask16) -1, 10961 _MM_FROUND_CUR_DIRECTION); 10962} 10963 10964extern __inline __m512 10965__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10966_mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 10967{ 10968 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, 10969 (__v16sf) __B, 10970 (__v16sf) __W, 10971 (__mmask16) __U, 10972 _MM_FROUND_CUR_DIRECTION); 10973} 10974 10975extern __inline __m512 10976__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10977_mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B) 10978{ 10979 return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, 10980 (__v16sf) __B, 10981 (__v16sf) 10982 _mm512_setzero_ps (), 10983 (__mmask16) __U, 10984 _MM_FROUND_CUR_DIRECTION); 10985} 10986 10987extern __inline __m512d 10988__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 10989_mm512_min_pd (__m512d __A, __m512d __B) 10990{ 10991 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, 10992 (__v8df) __B, 10993 (__v8df) 10994 _mm512_undefined_pd (), 10995 (__mmask8) -1, 10996 _MM_FROUND_CUR_DIRECTION); 10997} 10998 10999extern __inline __m512d 11000__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11001_mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 11002{ 11003 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, 11004 (__v8df) __B, 11005 (__v8df) __W, 11006 (__mmask8) __U, 11007 _MM_FROUND_CUR_DIRECTION); 11008} 11009 11010extern __inline __m512d 11011__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11012_mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B) 11013{ 11014 return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, 11015 (__v8df) __B, 11016 (__v8df) 11017 _mm512_setzero_pd (), 11018 (__mmask8) __U, 11019 _MM_FROUND_CUR_DIRECTION); 11020} 11021 11022extern __inline __m512 11023__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11024_mm512_min_ps (__m512 __A, __m512 __B) 11025{ 11026 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, 11027 (__v16sf) __B, 11028 (__v16sf) 11029 _mm512_undefined_ps (), 11030 (__mmask16) -1, 11031 _MM_FROUND_CUR_DIRECTION); 11032} 11033 11034extern __inline __m512 11035__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11036_mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 11037{ 11038 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, 11039 (__v16sf) __B, 11040 (__v16sf) __W, 11041 (__mmask16) __U, 11042 _MM_FROUND_CUR_DIRECTION); 11043} 11044 11045extern __inline __m512 11046__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11047_mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B) 11048{ 11049 return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, 11050 (__v16sf) __B, 11051 (__v16sf) 11052 _mm512_setzero_ps (), 11053 (__mmask16) __U, 11054 _MM_FROUND_CUR_DIRECTION); 11055} 11056 11057extern __inline __m512d 11058__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11059_mm512_scalef_pd (__m512d __A, __m512d __B) 11060{ 11061 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 11062 (__v8df) __B, 11063 (__v8df) 11064 _mm512_undefined_pd (), 11065 (__mmask8) -1, 11066 _MM_FROUND_CUR_DIRECTION); 11067} 11068 11069extern __inline __m512d 11070__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11071_mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) 11072{ 11073 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 11074 (__v8df) __B, 11075 (__v8df) __W, 11076 (__mmask8) __U, 11077 _MM_FROUND_CUR_DIRECTION); 11078} 11079 11080extern __inline __m512d 11081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11082_mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B) 11083{ 11084 return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, 11085 (__v8df) __B, 11086 (__v8df) 11087 _mm512_setzero_pd (), 11088 (__mmask8) __U, 11089 _MM_FROUND_CUR_DIRECTION); 11090} 11091 11092extern __inline __m512 11093__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11094_mm512_scalef_ps (__m512 __A, __m512 __B) 11095{ 11096 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 11097 (__v16sf) __B, 11098 (__v16sf) 11099 _mm512_undefined_ps (), 11100 (__mmask16) -1, 11101 _MM_FROUND_CUR_DIRECTION); 11102} 11103 11104extern __inline __m512 11105__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11106_mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) 11107{ 11108 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 11109 (__v16sf) __B, 11110 (__v16sf) __W, 11111 (__mmask16) __U, 11112 _MM_FROUND_CUR_DIRECTION); 11113} 11114 11115extern __inline __m512 11116__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11117_mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B) 11118{ 11119 return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, 11120 (__v16sf) __B, 11121 (__v16sf) 11122 _mm512_setzero_ps (), 11123 (__mmask16) __U, 11124 _MM_FROUND_CUR_DIRECTION); 11125} 11126 11127extern __inline __m128d 11128__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11129_mm_scalef_sd (__m128d __A, __m128d __B) 11130{ 11131 return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A, 11132 (__v2df) __B, 11133 _MM_FROUND_CUR_DIRECTION); 11134} 11135 11136extern __inline __m128 11137__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11138_mm_scalef_ss (__m128 __A, __m128 __B) 11139{ 11140 return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A, 11141 (__v4sf) __B, 11142 _MM_FROUND_CUR_DIRECTION); 11143} 11144 11145extern __inline __m512d 11146__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11147_mm512_fmadd_pd (__m512d __A, __m512d __B, __m512d __C) 11148{ 11149 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 11150 (__v8df) __B, 11151 (__v8df) __C, 11152 (__mmask8) -1, 11153 _MM_FROUND_CUR_DIRECTION); 11154} 11155 11156extern __inline __m512d 11157__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11158_mm512_mask_fmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 11159{ 11160 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 11161 (__v8df) __B, 11162 (__v8df) __C, 11163 (__mmask8) __U, 11164 _MM_FROUND_CUR_DIRECTION); 11165} 11166 11167extern __inline __m512d 11168__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11169_mm512_mask3_fmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 11170{ 11171 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A, 11172 (__v8df) __B, 11173 (__v8df) __C, 11174 (__mmask8) __U, 11175 _MM_FROUND_CUR_DIRECTION); 11176} 11177 11178extern __inline __m512d 11179__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11180_mm512_maskz_fmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 11181{ 11182 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, 11183 (__v8df) __B, 11184 (__v8df) __C, 11185 (__mmask8) __U, 11186 _MM_FROUND_CUR_DIRECTION); 11187} 11188 11189extern __inline __m512 11190__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11191_mm512_fmadd_ps (__m512 __A, __m512 __B, __m512 __C) 11192{ 11193 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 11194 (__v16sf) __B, 11195 (__v16sf) __C, 11196 (__mmask16) -1, 11197 _MM_FROUND_CUR_DIRECTION); 11198} 11199 11200extern __inline __m512 11201__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11202_mm512_mask_fmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 11203{ 11204 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 11205 (__v16sf) __B, 11206 (__v16sf) __C, 11207 (__mmask16) __U, 11208 _MM_FROUND_CUR_DIRECTION); 11209} 11210 11211extern __inline __m512 11212__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11213_mm512_mask3_fmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 11214{ 11215 return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A, 11216 (__v16sf) __B, 11217 (__v16sf) __C, 11218 (__mmask16) __U, 11219 _MM_FROUND_CUR_DIRECTION); 11220} 11221 11222extern __inline __m512 11223__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11224_mm512_maskz_fmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 11225{ 11226 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, 11227 (__v16sf) __B, 11228 (__v16sf) __C, 11229 (__mmask16) __U, 11230 _MM_FROUND_CUR_DIRECTION); 11231} 11232 11233extern __inline __m512d 11234__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11235_mm512_fmsub_pd (__m512d __A, __m512d __B, __m512d __C) 11236{ 11237 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 11238 (__v8df) __B, 11239 -(__v8df) __C, 11240 (__mmask8) -1, 11241 _MM_FROUND_CUR_DIRECTION); 11242} 11243 11244extern __inline __m512d 11245__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11246_mm512_mask_fmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 11247{ 11248 return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, 11249 (__v8df) __B, 11250 -(__v8df) __C, 11251 (__mmask8) __U, 11252 _MM_FROUND_CUR_DIRECTION); 11253} 11254 11255extern __inline __m512d 11256__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11257_mm512_mask3_fmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 11258{ 11259 return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A, 11260 (__v8df) __B, 11261 (__v8df) __C, 11262 (__mmask8) __U, 11263 _MM_FROUND_CUR_DIRECTION); 11264} 11265 11266extern __inline __m512d 11267__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11268_mm512_maskz_fmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 11269{ 11270 return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, 11271 (__v8df) __B, 11272 -(__v8df) __C, 11273 (__mmask8) __U, 11274 _MM_FROUND_CUR_DIRECTION); 11275} 11276 11277extern __inline __m512 11278__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11279_mm512_fmsub_ps (__m512 __A, __m512 __B, __m512 __C) 11280{ 11281 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 11282 (__v16sf) __B, 11283 -(__v16sf) __C, 11284 (__mmask16) -1, 11285 _MM_FROUND_CUR_DIRECTION); 11286} 11287 11288extern __inline __m512 11289__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11290_mm512_mask_fmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 11291{ 11292 return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, 11293 (__v16sf) __B, 11294 -(__v16sf) __C, 11295 (__mmask16) __U, 11296 _MM_FROUND_CUR_DIRECTION); 11297} 11298 11299extern __inline __m512 11300__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11301_mm512_mask3_fmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 11302{ 11303 return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A, 11304 (__v16sf) __B, 11305 (__v16sf) __C, 11306 (__mmask16) __U, 11307 _MM_FROUND_CUR_DIRECTION); 11308} 11309 11310extern __inline __m512 11311__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11312_mm512_maskz_fmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 11313{ 11314 return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, 11315 (__v16sf) __B, 11316 -(__v16sf) __C, 11317 (__mmask16) __U, 11318 _MM_FROUND_CUR_DIRECTION); 11319} 11320 11321extern __inline __m512d 11322__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11323_mm512_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C) 11324{ 11325 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 11326 (__v8df) __B, 11327 (__v8df) __C, 11328 (__mmask8) -1, 11329 _MM_FROUND_CUR_DIRECTION); 11330} 11331 11332extern __inline __m512d 11333__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11334_mm512_mask_fmaddsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 11335{ 11336 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 11337 (__v8df) __B, 11338 (__v8df) __C, 11339 (__mmask8) __U, 11340 _MM_FROUND_CUR_DIRECTION); 11341} 11342 11343extern __inline __m512d 11344__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11345_mm512_mask3_fmaddsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 11346{ 11347 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A, 11348 (__v8df) __B, 11349 (__v8df) __C, 11350 (__mmask8) __U, 11351 _MM_FROUND_CUR_DIRECTION); 11352} 11353 11354extern __inline __m512d 11355__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11356_mm512_maskz_fmaddsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 11357{ 11358 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, 11359 (__v8df) __B, 11360 (__v8df) __C, 11361 (__mmask8) __U, 11362 _MM_FROUND_CUR_DIRECTION); 11363} 11364 11365extern __inline __m512 11366__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11367_mm512_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C) 11368{ 11369 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 11370 (__v16sf) __B, 11371 (__v16sf) __C, 11372 (__mmask16) -1, 11373 _MM_FROUND_CUR_DIRECTION); 11374} 11375 11376extern __inline __m512 11377__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11378_mm512_mask_fmaddsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 11379{ 11380 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 11381 (__v16sf) __B, 11382 (__v16sf) __C, 11383 (__mmask16) __U, 11384 _MM_FROUND_CUR_DIRECTION); 11385} 11386 11387extern __inline __m512 11388__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11389_mm512_mask3_fmaddsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 11390{ 11391 return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A, 11392 (__v16sf) __B, 11393 (__v16sf) __C, 11394 (__mmask16) __U, 11395 _MM_FROUND_CUR_DIRECTION); 11396} 11397 11398extern __inline __m512 11399__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11400_mm512_maskz_fmaddsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 11401{ 11402 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, 11403 (__v16sf) __B, 11404 (__v16sf) __C, 11405 (__mmask16) __U, 11406 _MM_FROUND_CUR_DIRECTION); 11407} 11408 11409extern __inline __m512d 11410__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11411_mm512_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C) 11412{ 11413 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 11414 (__v8df) __B, 11415 -(__v8df) __C, 11416 (__mmask8) -1, 11417 _MM_FROUND_CUR_DIRECTION); 11418} 11419 11420extern __inline __m512d 11421__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11422_mm512_mask_fmsubadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 11423{ 11424 return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, 11425 (__v8df) __B, 11426 -(__v8df) __C, 11427 (__mmask8) __U, 11428 _MM_FROUND_CUR_DIRECTION); 11429} 11430 11431extern __inline __m512d 11432__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11433_mm512_mask3_fmsubadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 11434{ 11435 return (__m512d) __builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A, 11436 (__v8df) __B, 11437 (__v8df) __C, 11438 (__mmask8) __U, 11439 _MM_FROUND_CUR_DIRECTION); 11440} 11441 11442extern __inline __m512d 11443__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11444_mm512_maskz_fmsubadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 11445{ 11446 return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, 11447 (__v8df) __B, 11448 -(__v8df) __C, 11449 (__mmask8) __U, 11450 _MM_FROUND_CUR_DIRECTION); 11451} 11452 11453extern __inline __m512 11454__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11455_mm512_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C) 11456{ 11457 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 11458 (__v16sf) __B, 11459 -(__v16sf) __C, 11460 (__mmask16) -1, 11461 _MM_FROUND_CUR_DIRECTION); 11462} 11463 11464extern __inline __m512 11465__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11466_mm512_mask_fmsubadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 11467{ 11468 return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, 11469 (__v16sf) __B, 11470 -(__v16sf) __C, 11471 (__mmask16) __U, 11472 _MM_FROUND_CUR_DIRECTION); 11473} 11474 11475extern __inline __m512 11476__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11477_mm512_mask3_fmsubadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 11478{ 11479 return (__m512) __builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A, 11480 (__v16sf) __B, 11481 (__v16sf) __C, 11482 (__mmask16) __U, 11483 _MM_FROUND_CUR_DIRECTION); 11484} 11485 11486extern __inline __m512 11487__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11488_mm512_maskz_fmsubadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 11489{ 11490 return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, 11491 (__v16sf) __B, 11492 -(__v16sf) __C, 11493 (__mmask16) __U, 11494 _MM_FROUND_CUR_DIRECTION); 11495} 11496 11497extern __inline __m512d 11498__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11499_mm512_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C) 11500{ 11501 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, 11502 (__v8df) __B, 11503 (__v8df) __C, 11504 (__mmask8) -1, 11505 _MM_FROUND_CUR_DIRECTION); 11506} 11507 11508extern __inline __m512d 11509__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11510_mm512_mask_fnmadd_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 11511{ 11512 return (__m512d) __builtin_ia32_vfnmaddpd512_mask ((__v8df) __A, 11513 (__v8df) __B, 11514 (__v8df) __C, 11515 (__mmask8) __U, 11516 _MM_FROUND_CUR_DIRECTION); 11517} 11518 11519extern __inline __m512d 11520__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11521_mm512_mask3_fnmadd_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 11522{ 11523 return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A, 11524 (__v8df) __B, 11525 (__v8df) __C, 11526 (__mmask8) __U, 11527 _MM_FROUND_CUR_DIRECTION); 11528} 11529 11530extern __inline __m512d 11531__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11532_mm512_maskz_fnmadd_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 11533{ 11534 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, 11535 (__v8df) __B, 11536 (__v8df) __C, 11537 (__mmask8) __U, 11538 _MM_FROUND_CUR_DIRECTION); 11539} 11540 11541extern __inline __m512 11542__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11543_mm512_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C) 11544{ 11545 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, 11546 (__v16sf) __B, 11547 (__v16sf) __C, 11548 (__mmask16) -1, 11549 _MM_FROUND_CUR_DIRECTION); 11550} 11551 11552extern __inline __m512 11553__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11554_mm512_mask_fnmadd_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 11555{ 11556 return (__m512) __builtin_ia32_vfnmaddps512_mask ((__v16sf) __A, 11557 (__v16sf) __B, 11558 (__v16sf) __C, 11559 (__mmask16) __U, 11560 _MM_FROUND_CUR_DIRECTION); 11561} 11562 11563extern __inline __m512 11564__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11565_mm512_mask3_fnmadd_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 11566{ 11567 return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A, 11568 (__v16sf) __B, 11569 (__v16sf) __C, 11570 (__mmask16) __U, 11571 _MM_FROUND_CUR_DIRECTION); 11572} 11573 11574extern __inline __m512 11575__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11576_mm512_maskz_fnmadd_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 11577{ 11578 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, 11579 (__v16sf) __B, 11580 (__v16sf) __C, 11581 (__mmask16) __U, 11582 _MM_FROUND_CUR_DIRECTION); 11583} 11584 11585extern __inline __m512d 11586__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11587_mm512_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C) 11588{ 11589 return (__m512d) __builtin_ia32_vfmaddpd512_mask (-(__v8df) __A, 11590 (__v8df) __B, 11591 -(__v8df) __C, 11592 (__mmask8) -1, 11593 _MM_FROUND_CUR_DIRECTION); 11594} 11595 11596extern __inline __m512d 11597__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11598_mm512_mask_fnmsub_pd (__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) 11599{ 11600 return (__m512d) __builtin_ia32_vfnmsubpd512_mask ((__v8df) __A, 11601 (__v8df) __B, 11602 (__v8df) __C, 11603 (__mmask8) __U, 11604 _MM_FROUND_CUR_DIRECTION); 11605} 11606 11607extern __inline __m512d 11608__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11609_mm512_mask3_fnmsub_pd (__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) 11610{ 11611 return (__m512d) __builtin_ia32_vfnmsubpd512_mask3 ((__v8df) __A, 11612 (__v8df) __B, 11613 (__v8df) __C, 11614 (__mmask8) __U, 11615 _MM_FROUND_CUR_DIRECTION); 11616} 11617 11618extern __inline __m512d 11619__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11620_mm512_maskz_fnmsub_pd (__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) 11621{ 11622 return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, 11623 (__v8df) __B, 11624 -(__v8df) __C, 11625 (__mmask8) __U, 11626 _MM_FROUND_CUR_DIRECTION); 11627} 11628 11629extern __inline __m512 11630__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11631_mm512_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C) 11632{ 11633 return (__m512) __builtin_ia32_vfmaddps512_mask (-(__v16sf) __A, 11634 (__v16sf) __B, 11635 -(__v16sf) __C, 11636 (__mmask16) -1, 11637 _MM_FROUND_CUR_DIRECTION); 11638} 11639 11640extern __inline __m512 11641__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11642_mm512_mask_fnmsub_ps (__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) 11643{ 11644 return (__m512) __builtin_ia32_vfnmsubps512_mask ((__v16sf) __A, 11645 (__v16sf) __B, 11646 (__v16sf) __C, 11647 (__mmask16) __U, 11648 _MM_FROUND_CUR_DIRECTION); 11649} 11650 11651extern __inline __m512 11652__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11653_mm512_mask3_fnmsub_ps (__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) 11654{ 11655 return (__m512) __builtin_ia32_vfnmsubps512_mask3 ((__v16sf) __A, 11656 (__v16sf) __B, 11657 (__v16sf) __C, 11658 (__mmask16) __U, 11659 _MM_FROUND_CUR_DIRECTION); 11660} 11661 11662extern __inline __m512 11663__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11664_mm512_maskz_fnmsub_ps (__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) 11665{ 11666 return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, 11667 (__v16sf) __B, 11668 -(__v16sf) __C, 11669 (__mmask16) __U, 11670 _MM_FROUND_CUR_DIRECTION); 11671} 11672 11673extern __inline __m256i 11674__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11675_mm512_cvttpd_epi32 (__m512d __A) 11676{ 11677 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, 11678 (__v8si) 11679 _mm256_undefined_si256 (), 11680 (__mmask8) -1, 11681 _MM_FROUND_CUR_DIRECTION); 11682} 11683 11684extern __inline __m256i 11685__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11686_mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) 11687{ 11688 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, 11689 (__v8si) __W, 11690 (__mmask8) __U, 11691 _MM_FROUND_CUR_DIRECTION); 11692} 11693 11694extern __inline __m256i 11695__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11696_mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A) 11697{ 11698 return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, 11699 (__v8si) 11700 _mm256_setzero_si256 (), 11701 (__mmask8) __U, 11702 _MM_FROUND_CUR_DIRECTION); 11703} 11704 11705extern __inline __m256i 11706__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11707_mm512_cvttpd_epu32 (__m512d __A) 11708{ 11709 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 11710 (__v8si) 11711 _mm256_undefined_si256 (), 11712 (__mmask8) -1, 11713 _MM_FROUND_CUR_DIRECTION); 11714} 11715 11716extern __inline __m256i 11717__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11718_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) 11719{ 11720 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 11721 (__v8si) __W, 11722 (__mmask8) __U, 11723 _MM_FROUND_CUR_DIRECTION); 11724} 11725 11726extern __inline __m256i 11727__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11728_mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A) 11729{ 11730 return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, 11731 (__v8si) 11732 _mm256_setzero_si256 (), 11733 (__mmask8) __U, 11734 _MM_FROUND_CUR_DIRECTION); 11735} 11736 11737extern __inline __m256i 11738__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11739_mm512_cvtpd_epi32 (__m512d __A) 11740{ 11741 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 11742 (__v8si) 11743 _mm256_undefined_si256 (), 11744 (__mmask8) -1, 11745 _MM_FROUND_CUR_DIRECTION); 11746} 11747 11748extern __inline __m256i 11749__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11750_mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) 11751{ 11752 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 11753 (__v8si) __W, 11754 (__mmask8) __U, 11755 _MM_FROUND_CUR_DIRECTION); 11756} 11757 11758extern __inline __m256i 11759__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11760_mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A) 11761{ 11762 return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, 11763 (__v8si) 11764 _mm256_setzero_si256 (), 11765 (__mmask8) __U, 11766 _MM_FROUND_CUR_DIRECTION); 11767} 11768 11769extern __inline __m256i 11770__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11771_mm512_cvtpd_epu32 (__m512d __A) 11772{ 11773 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 11774 (__v8si) 11775 _mm256_undefined_si256 (), 11776 (__mmask8) -1, 11777 _MM_FROUND_CUR_DIRECTION); 11778} 11779 11780extern __inline __m256i 11781__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11782_mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) 11783{ 11784 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 11785 (__v8si) __W, 11786 (__mmask8) __U, 11787 _MM_FROUND_CUR_DIRECTION); 11788} 11789 11790extern __inline __m256i 11791__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11792_mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A) 11793{ 11794 return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, 11795 (__v8si) 11796 _mm256_setzero_si256 (), 11797 (__mmask8) __U, 11798 _MM_FROUND_CUR_DIRECTION); 11799} 11800 11801extern __inline __m512i 11802__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11803_mm512_cvttps_epi32 (__m512 __A) 11804{ 11805 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, 11806 (__v16si) 11807 _mm512_undefined_si512 (), 11808 (__mmask16) -1, 11809 _MM_FROUND_CUR_DIRECTION); 11810} 11811 11812extern __inline __m512i 11813__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11814_mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) 11815{ 11816 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, 11817 (__v16si) __W, 11818 (__mmask16) __U, 11819 _MM_FROUND_CUR_DIRECTION); 11820} 11821 11822extern __inline __m512i 11823__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11824_mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A) 11825{ 11826 return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, 11827 (__v16si) 11828 _mm512_setzero_si512 (), 11829 (__mmask16) __U, 11830 _MM_FROUND_CUR_DIRECTION); 11831} 11832 11833extern __inline __m512i 11834__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11835_mm512_cvttps_epu32 (__m512 __A) 11836{ 11837 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 11838 (__v16si) 11839 _mm512_undefined_si512 (), 11840 (__mmask16) -1, 11841 _MM_FROUND_CUR_DIRECTION); 11842} 11843 11844extern __inline __m512i 11845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11846_mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) 11847{ 11848 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 11849 (__v16si) __W, 11850 (__mmask16) __U, 11851 _MM_FROUND_CUR_DIRECTION); 11852} 11853 11854extern __inline __m512i 11855__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11856_mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A) 11857{ 11858 return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, 11859 (__v16si) 11860 _mm512_setzero_si512 (), 11861 (__mmask16) __U, 11862 _MM_FROUND_CUR_DIRECTION); 11863} 11864 11865extern __inline __m512i 11866__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11867_mm512_cvtps_epi32 (__m512 __A) 11868{ 11869 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 11870 (__v16si) 11871 _mm512_undefined_si512 (), 11872 (__mmask16) -1, 11873 _MM_FROUND_CUR_DIRECTION); 11874} 11875 11876extern __inline __m512i 11877__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11878_mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) 11879{ 11880 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 11881 (__v16si) __W, 11882 (__mmask16) __U, 11883 _MM_FROUND_CUR_DIRECTION); 11884} 11885 11886extern __inline __m512i 11887__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11888_mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A) 11889{ 11890 return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, 11891 (__v16si) 11892 _mm512_setzero_si512 (), 11893 (__mmask16) __U, 11894 _MM_FROUND_CUR_DIRECTION); 11895} 11896 11897extern __inline __m512i 11898__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11899_mm512_cvtps_epu32 (__m512 __A) 11900{ 11901 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, 11902 (__v16si) 11903 _mm512_undefined_si512 (), 11904 (__mmask16) -1, 11905 _MM_FROUND_CUR_DIRECTION); 11906} 11907 11908extern __inline __m512i 11909__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11910_mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) 11911{ 11912 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, 11913 (__v16si) __W, 11914 (__mmask16) __U, 11915 _MM_FROUND_CUR_DIRECTION); 11916} 11917 11918extern __inline __m512i 11919__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11920_mm512_maskz_cvtps_epu32 (__mmask16 __U, __m512 __A) 11921{ 11922 return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, 11923 (__v16si) 11924 _mm512_setzero_si512 (), 11925 (__mmask16) __U, 11926 _MM_FROUND_CUR_DIRECTION); 11927} 11928 11929#ifdef __x86_64__ 11930extern __inline __m128 11931__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11932_mm_cvtu64_ss (__m128 __A, unsigned long long __B) 11933{ 11934 return (__m128) __builtin_ia32_cvtusi2ss64 ((__v4sf) __A, __B, 11935 _MM_FROUND_CUR_DIRECTION); 11936} 11937 11938extern __inline __m128d 11939__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11940_mm_cvtu64_sd (__m128d __A, unsigned long long __B) 11941{ 11942 return (__m128d) __builtin_ia32_cvtusi2sd64 ((__v2df) __A, __B, 11943 _MM_FROUND_CUR_DIRECTION); 11944} 11945#endif 11946 11947extern __inline __m128 11948__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11949_mm_cvtu32_ss (__m128 __A, unsigned __B) 11950{ 11951 return (__m128) __builtin_ia32_cvtusi2ss32 ((__v4sf) __A, __B, 11952 _MM_FROUND_CUR_DIRECTION); 11953} 11954 11955extern __inline __m512 11956__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11957_mm512_cvtepi32_ps (__m512i __A) 11958{ 11959 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, 11960 (__v16sf) 11961 _mm512_undefined_ps (), 11962 (__mmask16) -1, 11963 _MM_FROUND_CUR_DIRECTION); 11964} 11965 11966extern __inline __m512 11967__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11968_mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A) 11969{ 11970 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, 11971 (__v16sf) __W, 11972 (__mmask16) __U, 11973 _MM_FROUND_CUR_DIRECTION); 11974} 11975 11976extern __inline __m512 11977__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11978_mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A) 11979{ 11980 return (__m512) __builtin_ia32_cvtdq2ps512_mask ((__v16si) __A, 11981 (__v16sf) 11982 _mm512_setzero_ps (), 11983 (__mmask16) __U, 11984 _MM_FROUND_CUR_DIRECTION); 11985} 11986 11987extern __inline __m512 11988__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 11989_mm512_cvtepu32_ps (__m512i __A) 11990{ 11991 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, 11992 (__v16sf) 11993 _mm512_undefined_ps (), 11994 (__mmask16) -1, 11995 _MM_FROUND_CUR_DIRECTION); 11996} 11997 11998extern __inline __m512 11999__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12000_mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A) 12001{ 12002 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, 12003 (__v16sf) __W, 12004 (__mmask16) __U, 12005 _MM_FROUND_CUR_DIRECTION); 12006} 12007 12008extern __inline __m512 12009__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12010_mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A) 12011{ 12012 return (__m512) __builtin_ia32_cvtudq2ps512_mask ((__v16si) __A, 12013 (__v16sf) 12014 _mm512_setzero_ps (), 12015 (__mmask16) __U, 12016 _MM_FROUND_CUR_DIRECTION); 12017} 12018 12019#ifdef __OPTIMIZE__ 12020extern __inline __m512d 12021__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12022_mm512_fixupimm_pd (__m512d __A, __m512d __B, __m512i __C, const int __imm) 12023{ 12024 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A, 12025 (__v8df) __B, 12026 (__v8di) __C, 12027 __imm, 12028 (__mmask8) -1, 12029 _MM_FROUND_CUR_DIRECTION); 12030} 12031 12032extern __inline __m512d 12033__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12034_mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B, 12035 __m512i __C, const int __imm) 12036{ 12037 return (__m512d) __builtin_ia32_fixupimmpd512_mask ((__v8df) __A, 12038 (__v8df) __B, 12039 (__v8di) __C, 12040 __imm, 12041 (__mmask8) __U, 12042 _MM_FROUND_CUR_DIRECTION); 12043} 12044 12045extern __inline __m512d 12046__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12047_mm512_maskz_fixupimm_pd (__mmask8 __U, __m512d __A, __m512d __B, 12048 __m512i __C, const int __imm) 12049{ 12050 return (__m512d) __builtin_ia32_fixupimmpd512_maskz ((__v8df) __A, 12051 (__v8df) __B, 12052 (__v8di) __C, 12053 __imm, 12054 (__mmask8) __U, 12055 _MM_FROUND_CUR_DIRECTION); 12056} 12057 12058extern __inline __m512 12059__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12060_mm512_fixupimm_ps (__m512 __A, __m512 __B, __m512i __C, const int __imm) 12061{ 12062 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A, 12063 (__v16sf) __B, 12064 (__v16si) __C, 12065 __imm, 12066 (__mmask16) -1, 12067 _MM_FROUND_CUR_DIRECTION); 12068} 12069 12070extern __inline __m512 12071__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12072_mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B, 12073 __m512i __C, const int __imm) 12074{ 12075 return (__m512) __builtin_ia32_fixupimmps512_mask ((__v16sf) __A, 12076 (__v16sf) __B, 12077 (__v16si) __C, 12078 __imm, 12079 (__mmask16) __U, 12080 _MM_FROUND_CUR_DIRECTION); 12081} 12082 12083extern __inline __m512 12084__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12085_mm512_maskz_fixupimm_ps (__mmask16 __U, __m512 __A, __m512 __B, 12086 __m512i __C, const int __imm) 12087{ 12088 return (__m512) __builtin_ia32_fixupimmps512_maskz ((__v16sf) __A, 12089 (__v16sf) __B, 12090 (__v16si) __C, 12091 __imm, 12092 (__mmask16) __U, 12093 _MM_FROUND_CUR_DIRECTION); 12094} 12095 12096extern __inline __m128d 12097__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12098_mm_fixupimm_sd (__m128d __A, __m128d __B, __m128i __C, const int __imm) 12099{ 12100 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A, 12101 (__v2df) __B, 12102 (__v2di) __C, __imm, 12103 (__mmask8) -1, 12104 _MM_FROUND_CUR_DIRECTION); 12105} 12106 12107extern __inline __m128d 12108__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12109_mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B, 12110 __m128i __C, const int __imm) 12111{ 12112 return (__m128d) __builtin_ia32_fixupimmsd_mask ((__v2df) __A, 12113 (__v2df) __B, 12114 (__v2di) __C, __imm, 12115 (__mmask8) __U, 12116 _MM_FROUND_CUR_DIRECTION); 12117} 12118 12119extern __inline __m128d 12120__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12121_mm_maskz_fixupimm_sd (__mmask8 __U, __m128d __A, __m128d __B, 12122 __m128i __C, const int __imm) 12123{ 12124 return (__m128d) __builtin_ia32_fixupimmsd_maskz ((__v2df) __A, 12125 (__v2df) __B, 12126 (__v2di) __C, 12127 __imm, 12128 (__mmask8) __U, 12129 _MM_FROUND_CUR_DIRECTION); 12130} 12131 12132extern __inline __m128 12133__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12134_mm_fixupimm_ss (__m128 __A, __m128 __B, __m128i __C, const int __imm) 12135{ 12136 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A, 12137 (__v4sf) __B, 12138 (__v4si) __C, __imm, 12139 (__mmask8) -1, 12140 _MM_FROUND_CUR_DIRECTION); 12141} 12142 12143extern __inline __m128 12144__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12145_mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B, 12146 __m128i __C, const int __imm) 12147{ 12148 return (__m128) __builtin_ia32_fixupimmss_mask ((__v4sf) __A, 12149 (__v4sf) __B, 12150 (__v4si) __C, __imm, 12151 (__mmask8) __U, 12152 _MM_FROUND_CUR_DIRECTION); 12153} 12154 12155extern __inline __m128 12156__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12157_mm_maskz_fixupimm_ss (__mmask8 __U, __m128 __A, __m128 __B, 12158 __m128i __C, const int __imm) 12159{ 12160 return (__m128) __builtin_ia32_fixupimmss_maskz ((__v4sf) __A, 12161 (__v4sf) __B, 12162 (__v4si) __C, __imm, 12163 (__mmask8) __U, 12164 _MM_FROUND_CUR_DIRECTION); 12165} 12166#else 12167#define _mm512_fixupimm_pd(X, Y, Z, C) \ 12168 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \ 12169 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \ 12170 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION)) 12171 12172#define _mm512_mask_fixupimm_pd(X, U, Y, Z, C) \ 12173 ((__m512d)__builtin_ia32_fixupimmpd512_mask ((__v8df)(__m512d)(X), \ 12174 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \ 12175 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 12176 12177#define _mm512_maskz_fixupimm_pd(U, X, Y, Z, C) \ 12178 ((__m512d)__builtin_ia32_fixupimmpd512_maskz ((__v8df)(__m512d)(X), \ 12179 (__v8df)(__m512d)(Y), (__v8di)(__m512i)(Z), (int)(C), \ 12180 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 12181 12182#define _mm512_fixupimm_ps(X, Y, Z, C) \ 12183 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \ 12184 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \ 12185 (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION)) 12186 12187#define _mm512_mask_fixupimm_ps(X, U, Y, Z, C) \ 12188 ((__m512)__builtin_ia32_fixupimmps512_mask ((__v16sf)(__m512)(X), \ 12189 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \ 12190 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) 12191 12192#define _mm512_maskz_fixupimm_ps(U, X, Y, Z, C) \ 12193 ((__m512)__builtin_ia32_fixupimmps512_maskz ((__v16sf)(__m512)(X), \ 12194 (__v16sf)(__m512)(Y), (__v16si)(__m512i)(Z), (int)(C), \ 12195 (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) 12196 12197#define _mm_fixupimm_sd(X, Y, Z, C) \ 12198 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \ 12199 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \ 12200 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION)) 12201 12202#define _mm_mask_fixupimm_sd(X, U, Y, Z, C) \ 12203 ((__m128d)__builtin_ia32_fixupimmsd_mask ((__v2df)(__m128d)(X), \ 12204 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \ 12205 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 12206 12207#define _mm_maskz_fixupimm_sd(U, X, Y, Z, C) \ 12208 ((__m128d)__builtin_ia32_fixupimmsd_maskz ((__v2df)(__m128d)(X), \ 12209 (__v2df)(__m128d)(Y), (__v2di)(__m128i)(Z), (int)(C), \ 12210 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 12211 12212#define _mm_fixupimm_ss(X, Y, Z, C) \ 12213 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \ 12214 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \ 12215 (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION)) 12216 12217#define _mm_mask_fixupimm_ss(X, U, Y, Z, C) \ 12218 ((__m128)__builtin_ia32_fixupimmss_mask ((__v4sf)(__m128)(X), \ 12219 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \ 12220 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 12221 12222#define _mm_maskz_fixupimm_ss(U, X, Y, Z, C) \ 12223 ((__m128)__builtin_ia32_fixupimmss_maskz ((__v4sf)(__m128)(X), \ 12224 (__v4sf)(__m128)(Y), (__v4si)(__m128i)(Z), (int)(C), \ 12225 (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 12226#endif 12227 12228#ifdef __x86_64__ 12229extern __inline unsigned long long 12230__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12231_mm_cvtss_u64 (__m128 __A) 12232{ 12233 return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) 12234 __A, 12235 _MM_FROUND_CUR_DIRECTION); 12236} 12237 12238extern __inline unsigned long long 12239__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12240_mm_cvttss_u64 (__m128 __A) 12241{ 12242 return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) 12243 __A, 12244 _MM_FROUND_CUR_DIRECTION); 12245} 12246 12247extern __inline long long 12248__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12249_mm_cvttss_i64 (__m128 __A) 12250{ 12251 return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, 12252 _MM_FROUND_CUR_DIRECTION); 12253} 12254#endif /* __x86_64__ */ 12255 12256extern __inline unsigned 12257__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12258_mm_cvtss_u32 (__m128 __A) 12259{ 12260 return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, 12261 _MM_FROUND_CUR_DIRECTION); 12262} 12263 12264extern __inline unsigned 12265__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12266_mm_cvttss_u32 (__m128 __A) 12267{ 12268 return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, 12269 _MM_FROUND_CUR_DIRECTION); 12270} 12271 12272extern __inline int 12273__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12274_mm_cvttss_i32 (__m128 __A) 12275{ 12276 return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, 12277 _MM_FROUND_CUR_DIRECTION); 12278} 12279 12280#ifdef __x86_64__ 12281extern __inline unsigned long long 12282__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12283_mm_cvtsd_u64 (__m128d __A) 12284{ 12285 return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) 12286 __A, 12287 _MM_FROUND_CUR_DIRECTION); 12288} 12289 12290extern __inline unsigned long long 12291__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12292_mm_cvttsd_u64 (__m128d __A) 12293{ 12294 return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) 12295 __A, 12296 _MM_FROUND_CUR_DIRECTION); 12297} 12298 12299extern __inline long long 12300__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12301_mm_cvttsd_i64 (__m128d __A) 12302{ 12303 return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, 12304 _MM_FROUND_CUR_DIRECTION); 12305} 12306#endif /* __x86_64__ */ 12307 12308extern __inline unsigned 12309__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12310_mm_cvtsd_u32 (__m128d __A) 12311{ 12312 return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, 12313 _MM_FROUND_CUR_DIRECTION); 12314} 12315 12316extern __inline unsigned 12317__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12318_mm_cvttsd_u32 (__m128d __A) 12319{ 12320 return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, 12321 _MM_FROUND_CUR_DIRECTION); 12322} 12323 12324extern __inline int 12325__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12326_mm_cvttsd_i32 (__m128d __A) 12327{ 12328 return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, 12329 _MM_FROUND_CUR_DIRECTION); 12330} 12331 12332extern __inline __m512d 12333__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12334_mm512_cvtps_pd (__m256 __A) 12335{ 12336 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, 12337 (__v8df) 12338 _mm512_undefined_pd (), 12339 (__mmask8) -1, 12340 _MM_FROUND_CUR_DIRECTION); 12341} 12342 12343extern __inline __m512d 12344__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12345_mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A) 12346{ 12347 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, 12348 (__v8df) __W, 12349 (__mmask8) __U, 12350 _MM_FROUND_CUR_DIRECTION); 12351} 12352 12353extern __inline __m512d 12354__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12355_mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A) 12356{ 12357 return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, 12358 (__v8df) 12359 _mm512_setzero_pd (), 12360 (__mmask8) __U, 12361 _MM_FROUND_CUR_DIRECTION); 12362} 12363 12364extern __inline __m512 12365__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12366_mm512_cvtph_ps (__m256i __A) 12367{ 12368 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 12369 (__v16sf) 12370 _mm512_undefined_ps (), 12371 (__mmask16) -1, 12372 _MM_FROUND_CUR_DIRECTION); 12373} 12374 12375extern __inline __m512 12376__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12377_mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A) 12378{ 12379 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 12380 (__v16sf) __W, 12381 (__mmask16) __U, 12382 _MM_FROUND_CUR_DIRECTION); 12383} 12384 12385extern __inline __m512 12386__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12387_mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A) 12388{ 12389 return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, 12390 (__v16sf) 12391 _mm512_setzero_ps (), 12392 (__mmask16) __U, 12393 _MM_FROUND_CUR_DIRECTION); 12394} 12395 12396extern __inline __m256 12397__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12398_mm512_cvtpd_ps (__m512d __A) 12399{ 12400 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 12401 (__v8sf) 12402 _mm256_undefined_ps (), 12403 (__mmask8) -1, 12404 _MM_FROUND_CUR_DIRECTION); 12405} 12406 12407extern __inline __m256 12408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12409_mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A) 12410{ 12411 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 12412 (__v8sf) __W, 12413 (__mmask8) __U, 12414 _MM_FROUND_CUR_DIRECTION); 12415} 12416 12417extern __inline __m256 12418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12419_mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A) 12420{ 12421 return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, 12422 (__v8sf) 12423 _mm256_setzero_ps (), 12424 (__mmask8) __U, 12425 _MM_FROUND_CUR_DIRECTION); 12426} 12427 12428#ifdef __OPTIMIZE__ 12429extern __inline __m512 12430__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12431_mm512_getexp_ps (__m512 __A) 12432{ 12433 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 12434 (__v16sf) 12435 _mm512_undefined_ps (), 12436 (__mmask16) -1, 12437 _MM_FROUND_CUR_DIRECTION); 12438} 12439 12440extern __inline __m512 12441__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12442_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A) 12443{ 12444 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 12445 (__v16sf) __W, 12446 (__mmask16) __U, 12447 _MM_FROUND_CUR_DIRECTION); 12448} 12449 12450extern __inline __m512 12451__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12452_mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A) 12453{ 12454 return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, 12455 (__v16sf) 12456 _mm512_setzero_ps (), 12457 (__mmask16) __U, 12458 _MM_FROUND_CUR_DIRECTION); 12459} 12460 12461extern __inline __m512d 12462__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12463_mm512_getexp_pd (__m512d __A) 12464{ 12465 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 12466 (__v8df) 12467 _mm512_undefined_pd (), 12468 (__mmask8) -1, 12469 _MM_FROUND_CUR_DIRECTION); 12470} 12471 12472extern __inline __m512d 12473__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12474_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A) 12475{ 12476 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 12477 (__v8df) __W, 12478 (__mmask8) __U, 12479 _MM_FROUND_CUR_DIRECTION); 12480} 12481 12482extern __inline __m512d 12483__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12484_mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A) 12485{ 12486 return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, 12487 (__v8df) 12488 _mm512_setzero_pd (), 12489 (__mmask8) __U, 12490 _MM_FROUND_CUR_DIRECTION); 12491} 12492 12493extern __inline __m128 12494__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12495_mm_getexp_ss (__m128 __A, __m128 __B) 12496{ 12497 return (__m128) __builtin_ia32_getexpss128_round ((__v4sf) __A, 12498 (__v4sf) __B, 12499 _MM_FROUND_CUR_DIRECTION); 12500} 12501 12502extern __inline __m128d 12503__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12504_mm_getexp_sd (__m128d __A, __m128d __B) 12505{ 12506 return (__m128d) __builtin_ia32_getexpsd128_round ((__v2df) __A, 12507 (__v2df) __B, 12508 _MM_FROUND_CUR_DIRECTION); 12509} 12510 12511extern __inline __m512d 12512__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12513_mm512_getmant_pd (__m512d __A, _MM_MANTISSA_NORM_ENUM __B, 12514 _MM_MANTISSA_SIGN_ENUM __C) 12515{ 12516 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A, 12517 (__C << 2) | __B, 12518 _mm512_undefined_pd (), 12519 (__mmask8) -1, 12520 _MM_FROUND_CUR_DIRECTION); 12521} 12522 12523extern __inline __m512d 12524__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12525_mm512_mask_getmant_pd (__m512d __W, __mmask8 __U, __m512d __A, 12526 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) 12527{ 12528 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A, 12529 (__C << 2) | __B, 12530 (__v8df) __W, __U, 12531 _MM_FROUND_CUR_DIRECTION); 12532} 12533 12534extern __inline __m512d 12535__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12536_mm512_maskz_getmant_pd (__mmask8 __U, __m512d __A, 12537 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) 12538{ 12539 return (__m512d) __builtin_ia32_getmantpd512_mask ((__v8df) __A, 12540 (__C << 2) | __B, 12541 (__v8df) 12542 _mm512_setzero_pd (), 12543 __U, 12544 _MM_FROUND_CUR_DIRECTION); 12545} 12546 12547extern __inline __m512 12548__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12549_mm512_getmant_ps (__m512 __A, _MM_MANTISSA_NORM_ENUM __B, 12550 _MM_MANTISSA_SIGN_ENUM __C) 12551{ 12552 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A, 12553 (__C << 2) | __B, 12554 _mm512_undefined_ps (), 12555 (__mmask16) -1, 12556 _MM_FROUND_CUR_DIRECTION); 12557} 12558 12559extern __inline __m512 12560__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12561_mm512_mask_getmant_ps (__m512 __W, __mmask16 __U, __m512 __A, 12562 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) 12563{ 12564 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A, 12565 (__C << 2) | __B, 12566 (__v16sf) __W, __U, 12567 _MM_FROUND_CUR_DIRECTION); 12568} 12569 12570extern __inline __m512 12571__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12572_mm512_maskz_getmant_ps (__mmask16 __U, __m512 __A, 12573 _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) 12574{ 12575 return (__m512) __builtin_ia32_getmantps512_mask ((__v16sf) __A, 12576 (__C << 2) | __B, 12577 (__v16sf) 12578 _mm512_setzero_ps (), 12579 __U, 12580 _MM_FROUND_CUR_DIRECTION); 12581} 12582 12583extern __inline __m128d 12584__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12585_mm_getmant_sd (__m128d __A, __m128d __B, _MM_MANTISSA_NORM_ENUM __C, 12586 _MM_MANTISSA_SIGN_ENUM __D) 12587{ 12588 return (__m128d) __builtin_ia32_getmantsd_round ((__v2df) __A, 12589 (__v2df) __B, 12590 (__D << 2) | __C, 12591 _MM_FROUND_CUR_DIRECTION); 12592} 12593 12594extern __inline __m128 12595__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12596_mm_getmant_ss (__m128 __A, __m128 __B, _MM_MANTISSA_NORM_ENUM __C, 12597 _MM_MANTISSA_SIGN_ENUM __D) 12598{ 12599 return (__m128) __builtin_ia32_getmantss_round ((__v4sf) __A, 12600 (__v4sf) __B, 12601 (__D << 2) | __C, 12602 _MM_FROUND_CUR_DIRECTION); 12603} 12604 12605#else 12606#define _mm512_getmant_pd(X, B, C) \ 12607 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \ 12608 (int)(((C)<<2) | (B)), \ 12609 (__v8df)_mm512_undefined_pd(), \ 12610 (__mmask8)-1,\ 12611 _MM_FROUND_CUR_DIRECTION)) 12612 12613#define _mm512_mask_getmant_pd(W, U, X, B, C) \ 12614 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \ 12615 (int)(((C)<<2) | (B)), \ 12616 (__v8df)(__m512d)(W), \ 12617 (__mmask8)(U),\ 12618 _MM_FROUND_CUR_DIRECTION)) 12619 12620#define _mm512_maskz_getmant_pd(U, X, B, C) \ 12621 ((__m512d)__builtin_ia32_getmantpd512_mask ((__v8df)(__m512d)(X), \ 12622 (int)(((C)<<2) | (B)), \ 12623 (__v8df)_mm512_setzero_pd(), \ 12624 (__mmask8)(U),\ 12625 _MM_FROUND_CUR_DIRECTION)) 12626#define _mm512_getmant_ps(X, B, C) \ 12627 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \ 12628 (int)(((C)<<2) | (B)), \ 12629 (__v16sf)_mm512_undefined_ps(), \ 12630 (__mmask16)-1,\ 12631 _MM_FROUND_CUR_DIRECTION)) 12632 12633#define _mm512_mask_getmant_ps(W, U, X, B, C) \ 12634 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \ 12635 (int)(((C)<<2) | (B)), \ 12636 (__v16sf)(__m512)(W), \ 12637 (__mmask16)(U),\ 12638 _MM_FROUND_CUR_DIRECTION)) 12639 12640#define _mm512_maskz_getmant_ps(U, X, B, C) \ 12641 ((__m512)__builtin_ia32_getmantps512_mask ((__v16sf)(__m512)(X), \ 12642 (int)(((C)<<2) | (B)), \ 12643 (__v16sf)_mm512_setzero_ps(), \ 12644 (__mmask16)(U),\ 12645 _MM_FROUND_CUR_DIRECTION)) 12646#define _mm_getmant_sd(X, Y, C, D) \ 12647 ((__m128d)__builtin_ia32_getmantsd_round ((__v2df)(__m128d)(X), \ 12648 (__v2df)(__m128d)(Y), \ 12649 (int)(((D)<<2) | (C)), \ 12650 _MM_FROUND_CUR_DIRECTION)) 12651 12652#define _mm_getmant_ss(X, Y, C, D) \ 12653 ((__m128)__builtin_ia32_getmantss_round ((__v4sf)(__m128)(X), \ 12654 (__v4sf)(__m128)(Y), \ 12655 (int)(((D)<<2) | (C)), \ 12656 _MM_FROUND_CUR_DIRECTION)) 12657 12658#define _mm_getexp_ss(A, B) \ 12659 ((__m128)__builtin_ia32_getexpss128_mask((__v4sf)(__m128)(A), (__v4sf)(__m128)(B), \ 12660 _MM_FROUND_CUR_DIRECTION)) 12661 12662#define _mm_getexp_sd(A, B) \ 12663 ((__m128d)__builtin_ia32_getexpsd128_mask((__v2df)(__m128d)(A), (__v2df)(__m128d)(B),\ 12664 _MM_FROUND_CUR_DIRECTION)) 12665 12666#define _mm512_getexp_ps(A) \ 12667 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 12668 (__v16sf)_mm512_undefined_ps(), (__mmask16)-1, _MM_FROUND_CUR_DIRECTION)) 12669 12670#define _mm512_mask_getexp_ps(W, U, A) \ 12671 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 12672 (__v16sf)(__m512)(W), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) 12673 12674#define _mm512_maskz_getexp_ps(U, A) \ 12675 ((__m512)__builtin_ia32_getexpps512_mask((__v16sf)(__m512)(A), \ 12676 (__v16sf)_mm512_setzero_ps(), (__mmask16)(U), _MM_FROUND_CUR_DIRECTION)) 12677 12678#define _mm512_getexp_pd(A) \ 12679 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 12680 (__v8df)_mm512_undefined_pd(), (__mmask8)-1, _MM_FROUND_CUR_DIRECTION)) 12681 12682#define _mm512_mask_getexp_pd(W, U, A) \ 12683 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 12684 (__v8df)(__m512d)(W), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 12685 12686#define _mm512_maskz_getexp_pd(U, A) \ 12687 ((__m512d)__builtin_ia32_getexppd512_mask((__v8df)(__m512d)(A), \ 12688 (__v8df)_mm512_setzero_pd(), (__mmask8)(U), _MM_FROUND_CUR_DIRECTION)) 12689#endif 12690 12691#ifdef __OPTIMIZE__ 12692extern __inline __m512 12693__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12694_mm512_roundscale_ps (__m512 __A, const int __imm) 12695{ 12696 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, __imm, 12697 (__v16sf) 12698 _mm512_undefined_ps (), 12699 -1, 12700 _MM_FROUND_CUR_DIRECTION); 12701} 12702 12703extern __inline __m512 12704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12705_mm512_mask_roundscale_ps (__m512 __A, __mmask16 __B, __m512 __C, 12706 const int __imm) 12707{ 12708 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __C, __imm, 12709 (__v16sf) __A, 12710 (__mmask16) __B, 12711 _MM_FROUND_CUR_DIRECTION); 12712} 12713 12714extern __inline __m512 12715__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12716_mm512_maskz_roundscale_ps (__mmask16 __A, __m512 __B, const int __imm) 12717{ 12718 return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __B, 12719 __imm, 12720 (__v16sf) 12721 _mm512_setzero_ps (), 12722 (__mmask16) __A, 12723 _MM_FROUND_CUR_DIRECTION); 12724} 12725 12726extern __inline __m512d 12727__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12728_mm512_roundscale_pd (__m512d __A, const int __imm) 12729{ 12730 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, __imm, 12731 (__v8df) 12732 _mm512_undefined_pd (), 12733 -1, 12734 _MM_FROUND_CUR_DIRECTION); 12735} 12736 12737extern __inline __m512d 12738__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12739_mm512_mask_roundscale_pd (__m512d __A, __mmask8 __B, __m512d __C, 12740 const int __imm) 12741{ 12742 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __C, __imm, 12743 (__v8df) __A, 12744 (__mmask8) __B, 12745 _MM_FROUND_CUR_DIRECTION); 12746} 12747 12748extern __inline __m512d 12749__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12750_mm512_maskz_roundscale_pd (__mmask8 __A, __m512d __B, const int __imm) 12751{ 12752 return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __B, 12753 __imm, 12754 (__v8df) 12755 _mm512_setzero_pd (), 12756 (__mmask8) __A, 12757 _MM_FROUND_CUR_DIRECTION); 12758} 12759 12760extern __inline __m128 12761__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12762_mm_roundscale_ss (__m128 __A, __m128 __B, const int __imm) 12763{ 12764 return (__m128) __builtin_ia32_rndscaless_round ((__v4sf) __A, 12765 (__v4sf) __B, __imm, 12766 _MM_FROUND_CUR_DIRECTION); 12767} 12768 12769extern __inline __m128d 12770__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12771_mm_roundscale_sd (__m128d __A, __m128d __B, const int __imm) 12772{ 12773 return (__m128d) __builtin_ia32_rndscalesd_round ((__v2df) __A, 12774 (__v2df) __B, __imm, 12775 _MM_FROUND_CUR_DIRECTION); 12776} 12777 12778#else 12779#define _mm512_roundscale_ps(A, B) \ 12780 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(A), (int)(B),\ 12781 (__v16sf)_mm512_undefined_ps(), (__mmask16)(-1), _MM_FROUND_CUR_DIRECTION)) 12782#define _mm512_mask_roundscale_ps(A, B, C, D) \ 12783 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(C), \ 12784 (int)(D), \ 12785 (__v16sf)(__m512)(A), \ 12786 (__mmask16)(B), _MM_FROUND_CUR_DIRECTION)) 12787#define _mm512_maskz_roundscale_ps(A, B, C) \ 12788 ((__m512) __builtin_ia32_rndscaleps_mask ((__v16sf)(__m512)(B), \ 12789 (int)(C), \ 12790 (__v16sf)_mm512_setzero_ps(),\ 12791 (__mmask16)(A), _MM_FROUND_CUR_DIRECTION)) 12792#define _mm512_roundscale_pd(A, B) \ 12793 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(A), (int)(B),\ 12794 (__v8df)_mm512_undefined_pd(), (__mmask8)(-1), _MM_FROUND_CUR_DIRECTION)) 12795#define _mm512_mask_roundscale_pd(A, B, C, D) \ 12796 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(C), \ 12797 (int)(D), \ 12798 (__v8df)(__m512d)(A), \ 12799 (__mmask8)(B), _MM_FROUND_CUR_DIRECTION)) 12800#define _mm512_maskz_roundscale_pd(A, B, C) \ 12801 ((__m512d) __builtin_ia32_rndscalepd_mask ((__v8df)(__m512d)(B), \ 12802 (int)(C), \ 12803 (__v8df)_mm512_setzero_pd(),\ 12804 (__mmask8)(A), _MM_FROUND_CUR_DIRECTION)) 12805#define _mm_roundscale_ss(A, B, C) \ 12806 ((__m128) __builtin_ia32_rndscaless_round ((__v4sf)(__m128)(A), \ 12807 (__v4sf)(__m128)(B), (int)(C), _MM_FROUND_CUR_DIRECTION)) 12808#define _mm_roundscale_sd(A, B, C) \ 12809 ((__m128d) __builtin_ia32_rndscalesd_round ((__v2df)(__m128d)(A), \ 12810 (__v2df)(__m128d)(B), (int)(C), _MM_FROUND_CUR_DIRECTION)) 12811#endif 12812 12813#ifdef __OPTIMIZE__ 12814extern __inline __mmask8 12815__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12816_mm512_cmp_pd_mask (__m512d __X, __m512d __Y, const int __P) 12817{ 12818 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 12819 (__v8df) __Y, __P, 12820 (__mmask8) -1, 12821 _MM_FROUND_CUR_DIRECTION); 12822} 12823 12824extern __inline __mmask16 12825__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12826_mm512_cmp_ps_mask (__m512 __X, __m512 __Y, const int __P) 12827{ 12828 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 12829 (__v16sf) __Y, __P, 12830 (__mmask16) -1, 12831 _MM_FROUND_CUR_DIRECTION); 12832} 12833 12834extern __inline __mmask16 12835__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12836_mm512_mask_cmp_ps_mask (__mmask16 __U, __m512 __X, __m512 __Y, const int __P) 12837{ 12838 return (__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf) __X, 12839 (__v16sf) __Y, __P, 12840 (__mmask16) __U, 12841 _MM_FROUND_CUR_DIRECTION); 12842} 12843 12844extern __inline __mmask8 12845__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12846_mm512_mask_cmp_pd_mask (__mmask8 __U, __m512d __X, __m512d __Y, const int __P) 12847{ 12848 return (__mmask8) __builtin_ia32_cmppd512_mask ((__v8df) __X, 12849 (__v8df) __Y, __P, 12850 (__mmask8) __U, 12851 _MM_FROUND_CUR_DIRECTION); 12852} 12853 12854extern __inline __mmask8 12855__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12856_mm_cmp_sd_mask (__m128d __X, __m128d __Y, const int __P) 12857{ 12858 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X, 12859 (__v2df) __Y, __P, 12860 (__mmask8) -1, 12861 _MM_FROUND_CUR_DIRECTION); 12862} 12863 12864extern __inline __mmask8 12865__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12866_mm_mask_cmp_sd_mask (__mmask8 __M, __m128d __X, __m128d __Y, const int __P) 12867{ 12868 return (__mmask8) __builtin_ia32_cmpsd_mask ((__v2df) __X, 12869 (__v2df) __Y, __P, 12870 (__mmask8) __M, 12871 _MM_FROUND_CUR_DIRECTION); 12872} 12873 12874extern __inline __mmask8 12875__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12876_mm_cmp_ss_mask (__m128 __X, __m128 __Y, const int __P) 12877{ 12878 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X, 12879 (__v4sf) __Y, __P, 12880 (__mmask8) -1, 12881 _MM_FROUND_CUR_DIRECTION); 12882} 12883 12884extern __inline __mmask8 12885__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12886_mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P) 12887{ 12888 return (__mmask8) __builtin_ia32_cmpss_mask ((__v4sf) __X, 12889 (__v4sf) __Y, __P, 12890 (__mmask8) __M, 12891 _MM_FROUND_CUR_DIRECTION); 12892} 12893 12894#else 12895#define _mm512_cmp_pd_mask(X, Y, P) \ 12896 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \ 12897 (__v8df)(__m512d)(Y), (int)(P),\ 12898 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION)) 12899 12900#define _mm512_cmp_ps_mask(X, Y, P) \ 12901 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \ 12902 (__v16sf)(__m512)(Y), (int)(P),\ 12903 (__mmask16)-1,_MM_FROUND_CUR_DIRECTION)) 12904 12905#define _mm512_mask_cmp_pd_mask(M, X, Y, P) \ 12906 ((__mmask8) __builtin_ia32_cmppd512_mask ((__v8df)(__m512d)(X), \ 12907 (__v8df)(__m512d)(Y), (int)(P),\ 12908 (__mmask8)M, _MM_FROUND_CUR_DIRECTION)) 12909 12910#define _mm512_mask_cmp_ps_mask(M, X, Y, P) \ 12911 ((__mmask16) __builtin_ia32_cmpps512_mask ((__v16sf)(__m512)(X), \ 12912 (__v16sf)(__m512)(Y), (int)(P),\ 12913 (__mmask16)M,_MM_FROUND_CUR_DIRECTION)) 12914 12915#define _mm_cmp_sd_mask(X, Y, P) \ 12916 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \ 12917 (__v2df)(__m128d)(Y), (int)(P),\ 12918 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION)) 12919 12920#define _mm_mask_cmp_sd_mask(M, X, Y, P) \ 12921 ((__mmask8) __builtin_ia32_cmpsd_mask ((__v2df)(__m128d)(X), \ 12922 (__v2df)(__m128d)(Y), (int)(P),\ 12923 M,_MM_FROUND_CUR_DIRECTION)) 12924 12925#define _mm_cmp_ss_mask(X, Y, P) \ 12926 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \ 12927 (__v4sf)(__m128)(Y), (int)(P), \ 12928 (__mmask8)-1,_MM_FROUND_CUR_DIRECTION)) 12929 12930#define _mm_mask_cmp_ss_mask(M, X, Y, P) \ 12931 ((__mmask8) __builtin_ia32_cmpss_mask ((__v4sf)(__m128)(X), \ 12932 (__v4sf)(__m128)(Y), (int)(P), \ 12933 M,_MM_FROUND_CUR_DIRECTION)) 12934#endif 12935 12936extern __inline __mmask16 12937__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12938_mm512_kmov (__mmask16 __A) 12939{ 12940 return __builtin_ia32_kmov16 (__A); 12941} 12942 12943extern __inline __m512 12944__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12945_mm512_castpd_ps (__m512d __A) 12946{ 12947 return (__m512) (__A); 12948} 12949 12950extern __inline __m512i 12951__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12952_mm512_castpd_si512 (__m512d __A) 12953{ 12954 return (__m512i) (__A); 12955} 12956 12957extern __inline __m512d 12958__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12959_mm512_castps_pd (__m512 __A) 12960{ 12961 return (__m512d) (__A); 12962} 12963 12964extern __inline __m512i 12965__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12966_mm512_castps_si512 (__m512 __A) 12967{ 12968 return (__m512i) (__A); 12969} 12970 12971extern __inline __m512 12972__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12973_mm512_castsi512_ps (__m512i __A) 12974{ 12975 return (__m512) (__A); 12976} 12977 12978extern __inline __m512d 12979__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12980_mm512_castsi512_pd (__m512i __A) 12981{ 12982 return (__m512d) (__A); 12983} 12984 12985extern __inline __m128d 12986__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12987_mm512_castpd512_pd128 (__m512d __A) 12988{ 12989 return (__m128d)_mm512_extractf32x4_ps((__m512)__A, 0); 12990} 12991 12992extern __inline __m128 12993__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 12994_mm512_castps512_ps128 (__m512 __A) 12995{ 12996 return _mm512_extractf32x4_ps(__A, 0); 12997} 12998 12999extern __inline __m128i 13000__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13001_mm512_castsi512_si128 (__m512i __A) 13002{ 13003 return (__m128i)_mm512_extracti32x4_epi32((__m512i)__A, 0); 13004} 13005 13006extern __inline __m256d 13007__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13008_mm512_castpd512_pd256 (__m512d __A) 13009{ 13010 return _mm512_extractf64x4_pd(__A, 0); 13011} 13012 13013extern __inline __m256 13014__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13015_mm512_castps512_ps256 (__m512 __A) 13016{ 13017 return (__m256)_mm512_extractf64x4_pd((__m512d)__A, 0); 13018} 13019 13020extern __inline __m256i 13021__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13022_mm512_castsi512_si256 (__m512i __A) 13023{ 13024 return (__m256i)_mm512_extractf64x4_pd((__m512d)__A, 0); 13025} 13026 13027extern __inline __m512d 13028__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13029_mm512_castpd128_pd512 (__m128d __A) 13030{ 13031 return (__m512d) __builtin_ia32_pd512_pd((__m128d)__A); 13032} 13033 13034extern __inline __m512 13035__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13036_mm512_castps128_ps512 (__m128 __A) 13037{ 13038 return (__m512) __builtin_ia32_ps512_ps((__m128)__A); 13039} 13040 13041extern __inline __m512i 13042__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13043_mm512_castsi128_si512 (__m128i __A) 13044{ 13045 return (__m512i) __builtin_ia32_si512_si((__v4si)__A); 13046} 13047 13048extern __inline __m512d 13049__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13050_mm512_castpd256_pd512 (__m256d __A) 13051{ 13052 return __builtin_ia32_pd512_256pd (__A); 13053} 13054 13055extern __inline __m512 13056__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13057_mm512_castps256_ps512 (__m256 __A) 13058{ 13059 return __builtin_ia32_ps512_256ps (__A); 13060} 13061 13062extern __inline __m512i 13063__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13064_mm512_castsi256_si512 (__m256i __A) 13065{ 13066 return (__m512i)__builtin_ia32_si512_256si ((__v8si)__A); 13067} 13068 13069extern __inline __mmask16 13070__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13071_mm512_cmpeq_epu32_mask (__m512i __A, __m512i __B) 13072{ 13073 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A, 13074 (__v16si) __B, 0, 13075 (__mmask16) -1); 13076} 13077 13078extern __inline __mmask16 13079__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13080_mm512_mask_cmpeq_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B) 13081{ 13082 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A, 13083 (__v16si) __B, 0, __U); 13084} 13085 13086extern __inline __mmask8 13087__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13088_mm512_mask_cmpeq_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B) 13089{ 13090 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A, 13091 (__v8di) __B, 0, __U); 13092} 13093 13094extern __inline __mmask8 13095__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13096_mm512_cmpeq_epu64_mask (__m512i __A, __m512i __B) 13097{ 13098 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A, 13099 (__v8di) __B, 0, 13100 (__mmask8) -1); 13101} 13102 13103extern __inline __mmask16 13104__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13105_mm512_cmpgt_epu32_mask (__m512i __A, __m512i __B) 13106{ 13107 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A, 13108 (__v16si) __B, 6, 13109 (__mmask16) -1); 13110} 13111 13112extern __inline __mmask16 13113__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13114_mm512_mask_cmpgt_epu32_mask (__mmask16 __U, __m512i __A, __m512i __B) 13115{ 13116 return (__mmask16) __builtin_ia32_ucmpd512_mask ((__v16si) __A, 13117 (__v16si) __B, 6, __U); 13118} 13119 13120extern __inline __mmask8 13121__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13122_mm512_mask_cmpgt_epu64_mask (__mmask8 __U, __m512i __A, __m512i __B) 13123{ 13124 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A, 13125 (__v8di) __B, 6, __U); 13126} 13127 13128extern __inline __mmask8 13129__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 13130_mm512_cmpgt_epu64_mask (__m512i __A, __m512i __B) 13131{ 13132 return (__mmask8) __builtin_ia32_ucmpq512_mask ((__v8di) __A, 13133 (__v8di) __B, 6, 13134 (__mmask8) -1); 13135} 13136 13137#ifdef __DISABLE_AVX512F__ 13138#undef __DISABLE_AVX512F__ 13139#pragma GCC pop_options 13140#endif /* __DISABLE_AVX512F__ */ 13141 13142#endif /* _AVX512FINTRIN_H_INCLUDED */ 13143