1/* Copyright (C) 2014-2015 Free Software Foundation, Inc. 2 3 This file is part of GCC. 4 5 GCC is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 3, or (at your option) 8 any later version. 9 10 GCC is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 Under Section 7 of GPL version 3, you are granted additional 16 permissions described in the GCC Runtime Library Exception, version 17 3.1, as published by the Free Software Foundation. 18 19 You should have received a copy of the GNU General Public License and 20 a copy of the GCC Runtime Library Exception along with this program; 21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 <http://www.gnu.org/licenses/>. */ 23 24#ifndef _IMMINTRIN_H_INCLUDED 25#error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead." 26#endif 27 28#ifndef _AVX512VLDQINTRIN_H_INCLUDED 29#define _AVX512VLDQINTRIN_H_INCLUDED 30 31#if !defined(__AVX512VL__) || !defined(__AVX512DQ__) 32#pragma GCC push_options 33#pragma GCC target("avx512vl,avx512dq") 34#define __DISABLE_AVX512VLDQ__ 35#endif /* __AVX512VLDQ__ */ 36 37extern __inline __m256i 38__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 39_mm256_cvttpd_epi64 (__m256d __A) 40{ 41 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, 42 (__v4di) 43 _mm256_setzero_si256 (), 44 (__mmask8) -1); 45} 46 47extern __inline __m256i 48__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 49_mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) 50{ 51 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, 52 (__v4di) __W, 53 (__mmask8) __U); 54} 55 56extern __inline __m256i 57__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 58_mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A) 59{ 60 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, 61 (__v4di) 62 _mm256_setzero_si256 (), 63 (__mmask8) __U); 64} 65 66extern __inline __m128i 67__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 68_mm_cvttpd_epi64 (__m128d __A) 69{ 70 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, 71 (__v2di) 72 _mm_setzero_di (), 73 (__mmask8) -1); 74} 75 76extern __inline __m128i 77__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 78_mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) 79{ 80 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, 81 (__v2di) __W, 82 (__mmask8) __U); 83} 84 85extern __inline __m128i 86__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 87_mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A) 88{ 89 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, 90 (__v2di) 91 _mm_setzero_si128 (), 92 (__mmask8) __U); 93} 94 95extern __inline __m256i 96__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 97_mm256_cvttpd_epu64 (__m256d __A) 98{ 99 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, 100 (__v4di) 101 _mm256_setzero_si256 (), 102 (__mmask8) -1); 103} 104 105extern __inline __m256i 106__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 107_mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) 108{ 109 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, 110 (__v4di) __W, 111 (__mmask8) __U); 112} 113 114extern __inline __m256i 115__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 116_mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A) 117{ 118 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, 119 (__v4di) 120 _mm256_setzero_si256 (), 121 (__mmask8) __U); 122} 123 124extern __inline __m128i 125__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 126_mm_cvttpd_epu64 (__m128d __A) 127{ 128 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, 129 (__v2di) 130 _mm_setzero_di (), 131 (__mmask8) -1); 132} 133 134extern __inline __m128i 135__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 136_mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) 137{ 138 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, 139 (__v2di) __W, 140 (__mmask8) __U); 141} 142 143extern __inline __m128i 144__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 145_mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A) 146{ 147 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, 148 (__v2di) 149 _mm_setzero_si128 (), 150 (__mmask8) __U); 151} 152 153extern __inline __m256i 154__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 155_mm256_cvtpd_epi64 (__m256d __A) 156{ 157 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, 158 (__v4di) 159 _mm256_setzero_si256 (), 160 (__mmask8) -1); 161} 162 163extern __inline __m256i 164__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 165_mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) 166{ 167 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, 168 (__v4di) __W, 169 (__mmask8) __U); 170} 171 172extern __inline __m256i 173__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 174_mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A) 175{ 176 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, 177 (__v4di) 178 _mm256_setzero_si256 (), 179 (__mmask8) __U); 180} 181 182extern __inline __m128i 183__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 184_mm_cvtpd_epi64 (__m128d __A) 185{ 186 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, 187 (__v2di) 188 _mm_setzero_di (), 189 (__mmask8) -1); 190} 191 192extern __inline __m128i 193__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 194_mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) 195{ 196 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, 197 (__v2di) __W, 198 (__mmask8) __U); 199} 200 201extern __inline __m128i 202__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 203_mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A) 204{ 205 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, 206 (__v2di) 207 _mm_setzero_si128 (), 208 (__mmask8) __U); 209} 210 211extern __inline __m256i 212__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 213_mm256_cvtpd_epu64 (__m256d __A) 214{ 215 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, 216 (__v4di) 217 _mm256_setzero_si256 (), 218 (__mmask8) -1); 219} 220 221extern __inline __m256i 222__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 223_mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) 224{ 225 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, 226 (__v4di) __W, 227 (__mmask8) __U); 228} 229 230extern __inline __m256i 231__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 232_mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A) 233{ 234 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, 235 (__v4di) 236 _mm256_setzero_si256 (), 237 (__mmask8) __U); 238} 239 240extern __inline __m128i 241__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 242_mm_cvtpd_epu64 (__m128d __A) 243{ 244 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, 245 (__v2di) 246 _mm_setzero_di (), 247 (__mmask8) -1); 248} 249 250extern __inline __m128i 251__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 252_mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) 253{ 254 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, 255 (__v2di) __W, 256 (__mmask8) __U); 257} 258 259extern __inline __m128i 260__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 261_mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A) 262{ 263 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, 264 (__v2di) 265 _mm_setzero_si128 (), 266 (__mmask8) __U); 267} 268 269extern __inline __m256i 270__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 271_mm256_cvttps_epi64 (__m128 __A) 272{ 273 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, 274 (__v4di) 275 _mm256_setzero_si256 (), 276 (__mmask8) -1); 277} 278 279extern __inline __m256i 280__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 281_mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) 282{ 283 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, 284 (__v4di) __W, 285 (__mmask8) __U); 286} 287 288extern __inline __m256i 289__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 290_mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) 291{ 292 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, 293 (__v4di) 294 _mm256_setzero_si256 (), 295 (__mmask8) __U); 296} 297 298extern __inline __m128i 299__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 300_mm_cvttps_epi64 (__m128 __A) 301{ 302 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, 303 (__v2di) 304 _mm_setzero_di (), 305 (__mmask8) -1); 306} 307 308extern __inline __m128i 309__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 310_mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) 311{ 312 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, 313 (__v2di) __W, 314 (__mmask8) __U); 315} 316 317extern __inline __m128i 318__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 319_mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) 320{ 321 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, 322 (__v2di) 323 _mm_setzero_di (), 324 (__mmask8) __U); 325} 326 327extern __inline __m256i 328__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 329_mm256_cvttps_epu64 (__m128 __A) 330{ 331 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, 332 (__v4di) 333 _mm256_setzero_si256 (), 334 (__mmask8) -1); 335} 336 337extern __inline __m256i 338__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 339_mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) 340{ 341 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, 342 (__v4di) __W, 343 (__mmask8) __U); 344} 345 346extern __inline __m256i 347__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 348_mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) 349{ 350 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, 351 (__v4di) 352 _mm256_setzero_si256 (), 353 (__mmask8) __U); 354} 355 356extern __inline __m128i 357__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 358_mm_cvttps_epu64 (__m128 __A) 359{ 360 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, 361 (__v2di) 362 _mm_setzero_di (), 363 (__mmask8) -1); 364} 365 366extern __inline __m128i 367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 368_mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) 369{ 370 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, 371 (__v2di) __W, 372 (__mmask8) __U); 373} 374 375extern __inline __m128i 376__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 377_mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) 378{ 379 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, 380 (__v2di) 381 _mm_setzero_di (), 382 (__mmask8) __U); 383} 384 385extern __inline __m256d 386__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 387_mm256_broadcast_f64x2 (__m128d __A) 388{ 389 return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) 390 __A, 391 (__v4df)_mm256_undefined_pd(), 392 (__mmask8) - 393 1); 394} 395 396extern __inline __m256d 397__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 398_mm256_mask_broadcast_f64x2 (__m256d __O, __mmask8 __M, __m128d __A) 399{ 400 return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) 401 __A, 402 (__v4df) 403 __O, __M); 404} 405 406extern __inline __m256d 407__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 408_mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A) 409{ 410 return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) 411 __A, 412 (__v4df) 413 _mm256_setzero_ps (), 414 __M); 415} 416 417extern __inline __m256i 418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 419_mm256_broadcast_i64x2 (__m128i __A) 420{ 421 return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) 422 __A, 423 (__v4di)_mm256_undefined_si256(), 424 (__mmask8) - 425 1); 426} 427 428extern __inline __m256i 429__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 430_mm256_mask_broadcast_i64x2 (__m256i __O, __mmask8 __M, __m128i __A) 431{ 432 return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) 433 __A, 434 (__v4di) 435 __O, __M); 436} 437 438extern __inline __m256i 439__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 440_mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) 441{ 442 return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) 443 __A, 444 (__v4di) 445 _mm256_setzero_si256 (), 446 __M); 447} 448 449extern __inline __m256 450__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 451_mm256_broadcast_f32x2 (__m128 __A) 452{ 453 return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A, 454 (__v8sf)_mm256_undefined_ps(), 455 (__mmask8) - 456 1); 457} 458 459extern __inline __m256 460__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 461_mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A) 462{ 463 return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A, 464 (__v8sf) __O, 465 __M); 466} 467 468extern __inline __m256 469__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 470_mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A) 471{ 472 return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A, 473 (__v8sf) 474 _mm256_setzero_ps (), 475 __M); 476} 477 478extern __inline __m256i 479__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 480_mm256_broadcast_i32x2 (__m128i __A) 481{ 482 return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) 483 __A, 484 (__v8si)_mm256_undefined_si256(), 485 (__mmask8) - 486 1); 487} 488 489extern __inline __m256i 490__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 491_mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A) 492{ 493 return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) 494 __A, 495 (__v8si) 496 __O, __M); 497} 498 499extern __inline __m256i 500__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 501_mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) 502{ 503 return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) 504 __A, 505 (__v8si) 506 _mm256_setzero_si256 (), 507 __M); 508} 509 510extern __inline __m128i 511__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 512_mm_broadcast_i32x2 (__m128i __A) 513{ 514 return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) 515 __A, 516 (__v4si)_mm_undefined_si128(), 517 (__mmask8) - 518 1); 519} 520 521extern __inline __m128i 522__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 523_mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A) 524{ 525 return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) 526 __A, 527 (__v4si) 528 __O, __M); 529} 530 531extern __inline __m128i 532__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 533_mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) 534{ 535 return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) 536 __A, 537 (__v4si) 538 _mm_setzero_si128 (), 539 __M); 540} 541 542extern __inline __m256i 543__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 544_mm256_mullo_epi64 (__m256i __A, __m256i __B) 545{ 546 return (__m256i) ((__v4du) __A * (__v4du) __B); 547} 548 549extern __inline __m256i 550__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 551_mm256_mask_mullo_epi64 (__m256i __W, __mmask8 __U, __m256i __A, 552 __m256i __B) 553{ 554 return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A, 555 (__v4di) __B, 556 (__v4di) __W, 557 (__mmask8) __U); 558} 559 560extern __inline __m256i 561__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 562_mm256_maskz_mullo_epi64 (__mmask8 __U, __m256i __A, __m256i __B) 563{ 564 return (__m256i) __builtin_ia32_pmullq256_mask ((__v4di) __A, 565 (__v4di) __B, 566 (__v4di) 567 _mm256_setzero_si256 (), 568 (__mmask8) __U); 569} 570 571extern __inline __m128i 572__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 573_mm_mullo_epi64 (__m128i __A, __m128i __B) 574{ 575 return (__m128i) ((__v2du) __A * (__v2du) __B); 576} 577 578extern __inline __m128i 579__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 580_mm_mask_mullo_epi64 (__m128i __W, __mmask8 __U, __m128i __A, 581 __m128i __B) 582{ 583 return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A, 584 (__v2di) __B, 585 (__v2di) __W, 586 (__mmask8) __U); 587} 588 589extern __inline __m128i 590__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 591_mm_maskz_mullo_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 592{ 593 return (__m128i) __builtin_ia32_pmullq128_mask ((__v2di) __A, 594 (__v2di) __B, 595 (__v2di) 596 _mm_setzero_di (), 597 (__mmask8) __U); 598} 599 600extern __inline __m256d 601__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 602_mm256_mask_andnot_pd (__m256d __W, __mmask8 __U, __m256d __A, 603 __m256d __B) 604{ 605 return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A, 606 (__v4df) __B, 607 (__v4df) __W, 608 (__mmask8) __U); 609} 610 611extern __inline __m256d 612__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 613_mm256_maskz_andnot_pd (__mmask8 __U, __m256d __A, __m256d __B) 614{ 615 return (__m256d) __builtin_ia32_andnpd256_mask ((__v4df) __A, 616 (__v4df) __B, 617 (__v4df) 618 _mm256_setzero_pd (), 619 (__mmask8) __U); 620} 621 622extern __inline __m128d 623__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 624_mm_mask_andnot_pd (__m128d __W, __mmask8 __U, __m128d __A, 625 __m128d __B) 626{ 627 return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A, 628 (__v2df) __B, 629 (__v2df) __W, 630 (__mmask8) __U); 631} 632 633extern __inline __m128d 634__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 635_mm_maskz_andnot_pd (__mmask8 __U, __m128d __A, __m128d __B) 636{ 637 return (__m128d) __builtin_ia32_andnpd128_mask ((__v2df) __A, 638 (__v2df) __B, 639 (__v2df) 640 _mm_setzero_pd (), 641 (__mmask8) __U); 642} 643 644extern __inline __m256 645__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 646_mm256_mask_andnot_ps (__m256 __W, __mmask8 __U, __m256 __A, 647 __m256 __B) 648{ 649 return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A, 650 (__v8sf) __B, 651 (__v8sf) __W, 652 (__mmask8) __U); 653} 654 655extern __inline __m256 656__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 657_mm256_maskz_andnot_ps (__mmask8 __U, __m256 __A, __m256 __B) 658{ 659 return (__m256) __builtin_ia32_andnps256_mask ((__v8sf) __A, 660 (__v8sf) __B, 661 (__v8sf) 662 _mm256_setzero_ps (), 663 (__mmask8) __U); 664} 665 666extern __inline __m128 667__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 668_mm_mask_andnot_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 669{ 670 return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A, 671 (__v4sf) __B, 672 (__v4sf) __W, 673 (__mmask8) __U); 674} 675 676extern __inline __m128 677__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 678_mm_maskz_andnot_ps (__mmask8 __U, __m128 __A, __m128 __B) 679{ 680 return (__m128) __builtin_ia32_andnps128_mask ((__v4sf) __A, 681 (__v4sf) __B, 682 (__v4sf) 683 _mm_setzero_ps (), 684 (__mmask8) __U); 685} 686 687extern __inline __m256i 688__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 689_mm256_cvtps_epi64 (__m128 __A) 690{ 691 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, 692 (__v4di) 693 _mm256_setzero_si256 (), 694 (__mmask8) -1); 695} 696 697extern __inline __m256i 698__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 699_mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) 700{ 701 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, 702 (__v4di) __W, 703 (__mmask8) __U); 704} 705 706extern __inline __m256i 707__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 708_mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) 709{ 710 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, 711 (__v4di) 712 _mm256_setzero_si256 (), 713 (__mmask8) __U); 714} 715 716extern __inline __m128i 717__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 718_mm_cvtps_epi64 (__m128 __A) 719{ 720 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, 721 (__v2di) 722 _mm_setzero_di (), 723 (__mmask8) -1); 724} 725 726extern __inline __m128i 727__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 728_mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) 729{ 730 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, 731 (__v2di) __W, 732 (__mmask8) __U); 733} 734 735extern __inline __m128i 736__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 737_mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) 738{ 739 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, 740 (__v2di) 741 _mm_setzero_di (), 742 (__mmask8) __U); 743} 744 745extern __inline __m256i 746__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 747_mm256_cvtps_epu64 (__m128 __A) 748{ 749 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, 750 (__v4di) 751 _mm256_setzero_si256 (), 752 (__mmask8) -1); 753} 754 755extern __inline __m256i 756__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 757_mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) 758{ 759 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, 760 (__v4di) __W, 761 (__mmask8) __U); 762} 763 764extern __inline __m256i 765__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 766_mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) 767{ 768 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, 769 (__v4di) 770 _mm256_setzero_si256 (), 771 (__mmask8) __U); 772} 773 774extern __inline __m128i 775__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 776_mm_cvtps_epu64 (__m128 __A) 777{ 778 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, 779 (__v2di) 780 _mm_setzero_di (), 781 (__mmask8) -1); 782} 783 784extern __inline __m128i 785__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 786_mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) 787{ 788 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, 789 (__v2di) __W, 790 (__mmask8) __U); 791} 792 793extern __inline __m128i 794__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 795_mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) 796{ 797 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, 798 (__v2di) 799 _mm_setzero_di (), 800 (__mmask8) __U); 801} 802 803extern __inline __m128 804__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 805_mm256_cvtepi64_ps (__m256i __A) 806{ 807 return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A, 808 (__v4sf) 809 _mm_setzero_ps (), 810 (__mmask8) -1); 811} 812 813extern __inline __m128 814__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 815_mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A) 816{ 817 return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A, 818 (__v4sf) __W, 819 (__mmask8) __U); 820} 821 822extern __inline __m128 823__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 824_mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A) 825{ 826 return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A, 827 (__v4sf) 828 _mm_setzero_ps (), 829 (__mmask8) __U); 830} 831 832extern __inline __m128 833__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 834_mm_cvtepi64_ps (__m128i __A) 835{ 836 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, 837 (__v4sf) 838 _mm_setzero_ps (), 839 (__mmask8) -1); 840} 841 842extern __inline __m128 843__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 844_mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A) 845{ 846 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, 847 (__v4sf) __W, 848 (__mmask8) __U); 849} 850 851extern __inline __m128 852__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 853_mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A) 854{ 855 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, 856 (__v4sf) 857 _mm_setzero_ps (), 858 (__mmask8) __U); 859} 860 861extern __inline __m128 862__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 863_mm256_cvtepu64_ps (__m256i __A) 864{ 865 return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A, 866 (__v4sf) 867 _mm_setzero_ps (), 868 (__mmask8) -1); 869} 870 871extern __inline __m128 872__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 873_mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A) 874{ 875 return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A, 876 (__v4sf) __W, 877 (__mmask8) __U); 878} 879 880extern __inline __m128 881__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 882_mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) 883{ 884 return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A, 885 (__v4sf) 886 _mm_setzero_ps (), 887 (__mmask8) __U); 888} 889 890extern __inline __m128 891__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 892_mm_cvtepu64_ps (__m128i __A) 893{ 894 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, 895 (__v4sf) 896 _mm_setzero_ps (), 897 (__mmask8) -1); 898} 899 900extern __inline __m128 901__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 902_mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A) 903{ 904 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, 905 (__v4sf) __W, 906 (__mmask8) __U); 907} 908 909extern __inline __m128 910__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 911_mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A) 912{ 913 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, 914 (__v4sf) 915 _mm_setzero_ps (), 916 (__mmask8) __U); 917} 918 919extern __inline __m256d 920__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 921_mm256_cvtepi64_pd (__m256i __A) 922{ 923 return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A, 924 (__v4df) 925 _mm256_setzero_pd (), 926 (__mmask8) -1); 927} 928 929extern __inline __m256d 930__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 931_mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A) 932{ 933 return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A, 934 (__v4df) __W, 935 (__mmask8) __U); 936} 937 938extern __inline __m256d 939__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 940_mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A) 941{ 942 return (__m256d) __builtin_ia32_cvtqq2pd256_mask ((__v4di) __A, 943 (__v4df) 944 _mm256_setzero_pd (), 945 (__mmask8) __U); 946} 947 948extern __inline __m128d 949__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 950_mm_cvtepi64_pd (__m128i __A) 951{ 952 return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A, 953 (__v2df) 954 _mm_setzero_pd (), 955 (__mmask8) -1); 956} 957 958extern __inline __m128d 959__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 960_mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A) 961{ 962 return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A, 963 (__v2df) __W, 964 (__mmask8) __U); 965} 966 967extern __inline __m128d 968__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 969_mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A) 970{ 971 return (__m128d) __builtin_ia32_cvtqq2pd128_mask ((__v2di) __A, 972 (__v2df) 973 _mm_setzero_pd (), 974 (__mmask8) __U); 975} 976 977extern __inline __m256d 978__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 979_mm256_cvtepu64_pd (__m256i __A) 980{ 981 return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A, 982 (__v4df) 983 _mm256_setzero_pd (), 984 (__mmask8) -1); 985} 986 987extern __inline __m256d 988__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 989_mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A) 990{ 991 return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A, 992 (__v4df) __W, 993 (__mmask8) __U); 994} 995 996extern __inline __m256d 997__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 998_mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A) 999{ 1000 return (__m256d) __builtin_ia32_cvtuqq2pd256_mask ((__v4di) __A, 1001 (__v4df) 1002 _mm256_setzero_pd (), 1003 (__mmask8) __U); 1004} 1005 1006extern __inline __m256d 1007__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1008_mm256_mask_and_pd (__m256d __W, __mmask8 __U, __m256d __A, 1009 __m256d __B) 1010{ 1011 return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A, 1012 (__v4df) __B, 1013 (__v4df) __W, 1014 (__mmask8) __U); 1015} 1016 1017extern __inline __m256d 1018__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1019_mm256_maskz_and_pd (__mmask8 __U, __m256d __A, __m256d __B) 1020{ 1021 return (__m256d) __builtin_ia32_andpd256_mask ((__v4df) __A, 1022 (__v4df) __B, 1023 (__v4df) 1024 _mm256_setzero_pd (), 1025 (__mmask8) __U); 1026} 1027 1028extern __inline __m128d 1029__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1030_mm_mask_and_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 1031{ 1032 return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A, 1033 (__v2df) __B, 1034 (__v2df) __W, 1035 (__mmask8) __U); 1036} 1037 1038extern __inline __m128d 1039__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1040_mm_maskz_and_pd (__mmask8 __U, __m128d __A, __m128d __B) 1041{ 1042 return (__m128d) __builtin_ia32_andpd128_mask ((__v2df) __A, 1043 (__v2df) __B, 1044 (__v2df) 1045 _mm_setzero_pd (), 1046 (__mmask8) __U); 1047} 1048 1049extern __inline __m256 1050__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1051_mm256_mask_and_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) 1052{ 1053 return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A, 1054 (__v8sf) __B, 1055 (__v8sf) __W, 1056 (__mmask8) __U); 1057} 1058 1059extern __inline __m256 1060__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1061_mm256_maskz_and_ps (__mmask8 __U, __m256 __A, __m256 __B) 1062{ 1063 return (__m256) __builtin_ia32_andps256_mask ((__v8sf) __A, 1064 (__v8sf) __B, 1065 (__v8sf) 1066 _mm256_setzero_ps (), 1067 (__mmask8) __U); 1068} 1069 1070extern __inline __m128 1071__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1072_mm_mask_and_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 1073{ 1074 return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A, 1075 (__v4sf) __B, 1076 (__v4sf) __W, 1077 (__mmask8) __U); 1078} 1079 1080extern __inline __m128 1081__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1082_mm_maskz_and_ps (__mmask8 __U, __m128 __A, __m128 __B) 1083{ 1084 return (__m128) __builtin_ia32_andps128_mask ((__v4sf) __A, 1085 (__v4sf) __B, 1086 (__v4sf) 1087 _mm_setzero_ps (), 1088 (__mmask8) __U); 1089} 1090 1091extern __inline __m128d 1092__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1093_mm_cvtepu64_pd (__m128i __A) 1094{ 1095 return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A, 1096 (__v2df) 1097 _mm_setzero_pd (), 1098 (__mmask8) -1); 1099} 1100 1101extern __inline __m128d 1102__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1103_mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A) 1104{ 1105 return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A, 1106 (__v2df) __W, 1107 (__mmask8) __U); 1108} 1109 1110extern __inline __m128d 1111__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1112_mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A) 1113{ 1114 return (__m128d) __builtin_ia32_cvtuqq2pd128_mask ((__v2di) __A, 1115 (__v2df) 1116 _mm_setzero_pd (), 1117 (__mmask8) __U); 1118} 1119 1120extern __inline __m256d 1121__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1122_mm256_mask_xor_pd (__m256d __W, __mmask8 __U, __m256d __A, 1123 __m256d __B) 1124{ 1125 return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A, 1126 (__v4df) __B, 1127 (__v4df) __W, 1128 (__mmask8) __U); 1129} 1130 1131extern __inline __m256d 1132__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1133_mm256_maskz_xor_pd (__mmask8 __U, __m256d __A, __m256d __B) 1134{ 1135 return (__m256d) __builtin_ia32_xorpd256_mask ((__v4df) __A, 1136 (__v4df) __B, 1137 (__v4df) 1138 _mm256_setzero_pd (), 1139 (__mmask8) __U); 1140} 1141 1142extern __inline __m128d 1143__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1144_mm_mask_xor_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 1145{ 1146 return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A, 1147 (__v2df) __B, 1148 (__v2df) __W, 1149 (__mmask8) __U); 1150} 1151 1152extern __inline __m128d 1153__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1154_mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) 1155{ 1156 return (__m128d) __builtin_ia32_xorpd128_mask ((__v2df) __A, 1157 (__v2df) __B, 1158 (__v2df) 1159 _mm_setzero_pd (), 1160 (__mmask8) __U); 1161} 1162 1163extern __inline __m256 1164__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1165_mm256_mask_xor_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) 1166{ 1167 return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A, 1168 (__v8sf) __B, 1169 (__v8sf) __W, 1170 (__mmask8) __U); 1171} 1172 1173extern __inline __m256 1174__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1175_mm256_maskz_xor_ps (__mmask8 __U, __m256 __A, __m256 __B) 1176{ 1177 return (__m256) __builtin_ia32_xorps256_mask ((__v8sf) __A, 1178 (__v8sf) __B, 1179 (__v8sf) 1180 _mm256_setzero_ps (), 1181 (__mmask8) __U); 1182} 1183 1184extern __inline __m128 1185__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1186_mm_mask_xor_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 1187{ 1188 return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A, 1189 (__v4sf) __B, 1190 (__v4sf) __W, 1191 (__mmask8) __U); 1192} 1193 1194extern __inline __m128 1195__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1196_mm_maskz_xor_ps (__mmask8 __U, __m128 __A, __m128 __B) 1197{ 1198 return (__m128) __builtin_ia32_xorps128_mask ((__v4sf) __A, 1199 (__v4sf) __B, 1200 (__v4sf) 1201 _mm_setzero_ps (), 1202 (__mmask8) __U); 1203} 1204 1205extern __inline __m256d 1206__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1207_mm256_mask_or_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) 1208{ 1209 return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A, 1210 (__v4df) __B, 1211 (__v4df) __W, 1212 (__mmask8) __U); 1213} 1214 1215extern __inline __m256d 1216__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1217_mm256_maskz_or_pd (__mmask8 __U, __m256d __A, __m256d __B) 1218{ 1219 return (__m256d) __builtin_ia32_orpd256_mask ((__v4df) __A, 1220 (__v4df) __B, 1221 (__v4df) 1222 _mm256_setzero_pd (), 1223 (__mmask8) __U); 1224} 1225 1226extern __inline __m128d 1227__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1228_mm_mask_or_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 1229{ 1230 return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A, 1231 (__v2df) __B, 1232 (__v2df) __W, 1233 (__mmask8) __U); 1234} 1235 1236extern __inline __m128d 1237__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1238_mm_maskz_or_pd (__mmask8 __U, __m128d __A, __m128d __B) 1239{ 1240 return (__m128d) __builtin_ia32_orpd128_mask ((__v2df) __A, 1241 (__v2df) __B, 1242 (__v2df) 1243 _mm_setzero_pd (), 1244 (__mmask8) __U); 1245} 1246 1247extern __inline __m256 1248__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1249_mm256_mask_or_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) 1250{ 1251 return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A, 1252 (__v8sf) __B, 1253 (__v8sf) __W, 1254 (__mmask8) __U); 1255} 1256 1257extern __inline __m256 1258__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1259_mm256_maskz_or_ps (__mmask8 __U, __m256 __A, __m256 __B) 1260{ 1261 return (__m256) __builtin_ia32_orps256_mask ((__v8sf) __A, 1262 (__v8sf) __B, 1263 (__v8sf) 1264 _mm256_setzero_ps (), 1265 (__mmask8) __U); 1266} 1267 1268extern __inline __m128 1269__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1270_mm_mask_or_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 1271{ 1272 return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A, 1273 (__v4sf) __B, 1274 (__v4sf) __W, 1275 (__mmask8) __U); 1276} 1277 1278extern __inline __m128 1279__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1280_mm_maskz_or_ps (__mmask8 __U, __m128 __A, __m128 __B) 1281{ 1282 return (__m128) __builtin_ia32_orps128_mask ((__v4sf) __A, 1283 (__v4sf) __B, 1284 (__v4sf) 1285 _mm_setzero_ps (), 1286 (__mmask8) __U); 1287} 1288 1289extern __inline __m128i 1290__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1291_mm_movm_epi32 (__mmask8 __A) 1292{ 1293 return (__m128i) __builtin_ia32_cvtmask2d128 (__A); 1294} 1295 1296extern __inline __m256i 1297__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1298_mm256_movm_epi32 (__mmask8 __A) 1299{ 1300 return (__m256i) __builtin_ia32_cvtmask2d256 (__A); 1301} 1302 1303extern __inline __m128i 1304__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1305_mm_movm_epi64 (__mmask8 __A) 1306{ 1307 return (__m128i) __builtin_ia32_cvtmask2q128 (__A); 1308} 1309 1310extern __inline __m256i 1311__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1312_mm256_movm_epi64 (__mmask8 __A) 1313{ 1314 return (__m256i) __builtin_ia32_cvtmask2q256 (__A); 1315} 1316 1317extern __inline __mmask8 1318__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1319_mm_movepi32_mask (__m128i __A) 1320{ 1321 return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A); 1322} 1323 1324extern __inline __mmask8 1325__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1326_mm256_movepi32_mask (__m256i __A) 1327{ 1328 return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A); 1329} 1330 1331extern __inline __mmask8 1332__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1333_mm_movepi64_mask (__m128i __A) 1334{ 1335 return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A); 1336} 1337 1338extern __inline __mmask8 1339__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1340_mm256_movepi64_mask (__m256i __A) 1341{ 1342 return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A); 1343} 1344 1345#ifdef __OPTIMIZE__ 1346extern __inline __m128d 1347__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1348_mm256_extractf64x2_pd (__m256d __A, const int __imm) 1349{ 1350 return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A, 1351 __imm, 1352 (__v2df) 1353 _mm_setzero_pd (), 1354 (__mmask8) - 1355 1); 1356} 1357 1358extern __inline __m128d 1359__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1360_mm256_mask_extractf64x2_pd (__m128d __W, __mmask8 __U, __m256d __A, 1361 const int __imm) 1362{ 1363 return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A, 1364 __imm, 1365 (__v2df) __W, 1366 (__mmask8) 1367 __U); 1368} 1369 1370extern __inline __m128d 1371__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1372_mm256_maskz_extractf64x2_pd (__mmask8 __U, __m256d __A, 1373 const int __imm) 1374{ 1375 return (__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df) __A, 1376 __imm, 1377 (__v2df) 1378 _mm_setzero_pd (), 1379 (__mmask8) 1380 __U); 1381} 1382 1383extern __inline __m128i 1384__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1385_mm256_extracti64x2_epi64 (__m256i __A, const int __imm) 1386{ 1387 return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A, 1388 __imm, 1389 (__v2di) 1390 _mm_setzero_di (), 1391 (__mmask8) - 1392 1); 1393} 1394 1395extern __inline __m128i 1396__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1397_mm256_mask_extracti64x2_epi64 (__m128i __W, __mmask8 __U, __m256i __A, 1398 const int __imm) 1399{ 1400 return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A, 1401 __imm, 1402 (__v2di) __W, 1403 (__mmask8) 1404 __U); 1405} 1406 1407extern __inline __m128i 1408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1409_mm256_maskz_extracti64x2_epi64 (__mmask8 __U, __m256i __A, 1410 const int __imm) 1411{ 1412 return (__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di) __A, 1413 __imm, 1414 (__v2di) 1415 _mm_setzero_di (), 1416 (__mmask8) 1417 __U); 1418} 1419 1420extern __inline __m256d 1421__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1422_mm256_reduce_pd (__m256d __A, int __B) 1423{ 1424 return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B, 1425 (__v4df) 1426 _mm256_setzero_pd (), 1427 (__mmask8) -1); 1428} 1429 1430extern __inline __m256d 1431__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1432_mm256_mask_reduce_pd (__m256d __W, __mmask8 __U, __m256d __A, int __B) 1433{ 1434 return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B, 1435 (__v4df) __W, 1436 (__mmask8) __U); 1437} 1438 1439extern __inline __m256d 1440__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1441_mm256_maskz_reduce_pd (__mmask8 __U, __m256d __A, int __B) 1442{ 1443 return (__m256d) __builtin_ia32_reducepd256_mask ((__v4df) __A, __B, 1444 (__v4df) 1445 _mm256_setzero_pd (), 1446 (__mmask8) __U); 1447} 1448 1449extern __inline __m128d 1450__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1451_mm_reduce_pd (__m128d __A, int __B) 1452{ 1453 return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B, 1454 (__v2df) 1455 _mm_setzero_pd (), 1456 (__mmask8) -1); 1457} 1458 1459extern __inline __m128d 1460__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1461_mm_mask_reduce_pd (__m128d __W, __mmask8 __U, __m128d __A, int __B) 1462{ 1463 return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B, 1464 (__v2df) __W, 1465 (__mmask8) __U); 1466} 1467 1468extern __inline __m128d 1469__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1470_mm_maskz_reduce_pd (__mmask8 __U, __m128d __A, int __B) 1471{ 1472 return (__m128d) __builtin_ia32_reducepd128_mask ((__v2df) __A, __B, 1473 (__v2df) 1474 _mm_setzero_pd (), 1475 (__mmask8) __U); 1476} 1477 1478extern __inline __m256 1479__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1480_mm256_reduce_ps (__m256 __A, int __B) 1481{ 1482 return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B, 1483 (__v8sf) 1484 _mm256_setzero_ps (), 1485 (__mmask8) -1); 1486} 1487 1488extern __inline __m256 1489__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1490_mm256_mask_reduce_ps (__m256 __W, __mmask8 __U, __m256 __A, int __B) 1491{ 1492 return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B, 1493 (__v8sf) __W, 1494 (__mmask8) __U); 1495} 1496 1497extern __inline __m256 1498__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1499_mm256_maskz_reduce_ps (__mmask8 __U, __m256 __A, int __B) 1500{ 1501 return (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B, 1502 (__v8sf) 1503 _mm256_setzero_ps (), 1504 (__mmask8) __U); 1505} 1506 1507extern __inline __m128 1508__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1509_mm_reduce_ps (__m128 __A, int __B) 1510{ 1511 return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B, 1512 (__v4sf) 1513 _mm_setzero_ps (), 1514 (__mmask8) -1); 1515} 1516 1517extern __inline __m128 1518__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1519_mm_mask_reduce_ps (__m128 __W, __mmask8 __U, __m128 __A, int __B) 1520{ 1521 return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B, 1522 (__v4sf) __W, 1523 (__mmask8) __U); 1524} 1525 1526extern __inline __m128 1527__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1528_mm_maskz_reduce_ps (__mmask8 __U, __m128 __A, int __B) 1529{ 1530 return (__m128) __builtin_ia32_reduceps128_mask ((__v4sf) __A, __B, 1531 (__v4sf) 1532 _mm_setzero_ps (), 1533 (__mmask8) __U); 1534} 1535 1536extern __inline __m256d 1537__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1538_mm256_range_pd (__m256d __A, __m256d __B, int __C) 1539{ 1540 return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A, 1541 (__v4df) __B, __C, 1542 (__v4df) 1543 _mm256_setzero_pd (), 1544 (__mmask8) -1); 1545} 1546 1547extern __inline __m256d 1548__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1549_mm256_mask_range_pd (__m256d __W, __mmask8 __U, 1550 __m256d __A, __m256d __B, int __C) 1551{ 1552 return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A, 1553 (__v4df) __B, __C, 1554 (__v4df) __W, 1555 (__mmask8) __U); 1556} 1557 1558extern __inline __m256d 1559__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1560_mm256_maskz_range_pd (__mmask8 __U, __m256d __A, __m256d __B, int __C) 1561{ 1562 return (__m256d) __builtin_ia32_rangepd256_mask ((__v4df) __A, 1563 (__v4df) __B, __C, 1564 (__v4df) 1565 _mm256_setzero_pd (), 1566 (__mmask8) __U); 1567} 1568 1569extern __inline __m128d 1570__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1571_mm_range_pd (__m128d __A, __m128d __B, int __C) 1572{ 1573 return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A, 1574 (__v2df) __B, __C, 1575 (__v2df) 1576 _mm_setzero_pd (), 1577 (__mmask8) -1); 1578} 1579 1580extern __inline __m128d 1581__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1582_mm_mask_range_pd (__m128d __W, __mmask8 __U, 1583 __m128d __A, __m128d __B, int __C) 1584{ 1585 return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A, 1586 (__v2df) __B, __C, 1587 (__v2df) __W, 1588 (__mmask8) __U); 1589} 1590 1591extern __inline __m128d 1592__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1593_mm_maskz_range_pd (__mmask8 __U, __m128d __A, __m128d __B, int __C) 1594{ 1595 return (__m128d) __builtin_ia32_rangepd128_mask ((__v2df) __A, 1596 (__v2df) __B, __C, 1597 (__v2df) 1598 _mm_setzero_pd (), 1599 (__mmask8) __U); 1600} 1601 1602extern __inline __m256 1603__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1604_mm256_range_ps (__m256 __A, __m256 __B, int __C) 1605{ 1606 return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A, 1607 (__v8sf) __B, __C, 1608 (__v8sf) 1609 _mm256_setzero_ps (), 1610 (__mmask8) -1); 1611} 1612 1613extern __inline __m256 1614__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1615_mm256_mask_range_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B, 1616 int __C) 1617{ 1618 return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A, 1619 (__v8sf) __B, __C, 1620 (__v8sf) __W, 1621 (__mmask8) __U); 1622} 1623 1624extern __inline __m256 1625__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1626_mm256_maskz_range_ps (__mmask8 __U, __m256 __A, __m256 __B, int __C) 1627{ 1628 return (__m256) __builtin_ia32_rangeps256_mask ((__v8sf) __A, 1629 (__v8sf) __B, __C, 1630 (__v8sf) 1631 _mm256_setzero_ps (), 1632 (__mmask8) __U); 1633} 1634 1635extern __inline __m128 1636__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1637_mm_range_ps (__m128 __A, __m128 __B, int __C) 1638{ 1639 return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A, 1640 (__v4sf) __B, __C, 1641 (__v4sf) 1642 _mm_setzero_ps (), 1643 (__mmask8) -1); 1644} 1645 1646extern __inline __m128 1647__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1648_mm_mask_range_ps (__m128 __W, __mmask8 __U, 1649 __m128 __A, __m128 __B, int __C) 1650{ 1651 return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A, 1652 (__v4sf) __B, __C, 1653 (__v4sf) __W, 1654 (__mmask8) __U); 1655} 1656 1657extern __inline __m128 1658__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1659_mm_maskz_range_ps (__mmask8 __U, __m128 __A, __m128 __B, int __C) 1660{ 1661 return (__m128) __builtin_ia32_rangeps128_mask ((__v4sf) __A, 1662 (__v4sf) __B, __C, 1663 (__v4sf) 1664 _mm_setzero_ps (), 1665 (__mmask8) __U); 1666} 1667 1668extern __inline __mmask8 1669__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1670_mm256_mask_fpclass_pd_mask (__mmask8 __U, __m256d __A, 1671 const int __imm) 1672{ 1673 return (__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) __A, 1674 __imm, __U); 1675} 1676 1677extern __inline __mmask8 1678__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1679_mm256_fpclass_pd_mask (__m256d __A, const int __imm) 1680{ 1681 return (__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) __A, 1682 __imm, 1683 (__mmask8) -1); 1684} 1685 1686extern __inline __mmask8 1687__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1688_mm256_mask_fpclass_ps_mask (__mmask8 __U, __m256 __A, const int __imm) 1689{ 1690 return (__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) __A, 1691 __imm, __U); 1692} 1693 1694extern __inline __mmask8 1695__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1696_mm256_fpclass_ps_mask (__m256 __A, const int __imm) 1697{ 1698 return (__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) __A, 1699 __imm, 1700 (__mmask8) -1); 1701} 1702 1703extern __inline __mmask8 1704__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1705_mm_mask_fpclass_pd_mask (__mmask8 __U, __m128d __A, const int __imm) 1706{ 1707 return (__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) __A, 1708 __imm, __U); 1709} 1710 1711extern __inline __mmask8 1712__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1713_mm_fpclass_pd_mask (__m128d __A, const int __imm) 1714{ 1715 return (__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) __A, 1716 __imm, 1717 (__mmask8) -1); 1718} 1719 1720extern __inline __mmask8 1721__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1722_mm_mask_fpclass_ps_mask (__mmask8 __U, __m128 __A, const int __imm) 1723{ 1724 return (__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) __A, 1725 __imm, __U); 1726} 1727 1728extern __inline __mmask8 1729__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1730_mm_fpclass_ps_mask (__m128 __A, const int __imm) 1731{ 1732 return (__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) __A, 1733 __imm, 1734 (__mmask8) -1); 1735} 1736 1737extern __inline __m256i 1738__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1739_mm256_inserti64x2 (__m256i __A, __m128i __B, const int __imm) 1740{ 1741 return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A, 1742 (__v2di) __B, 1743 __imm, 1744 (__v4di) 1745 _mm256_setzero_si256 (), 1746 (__mmask8) - 1747 1); 1748} 1749 1750extern __inline __m256i 1751__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1752_mm256_mask_inserti64x2 (__m256i __W, __mmask8 __U, __m256i __A, 1753 __m128i __B, const int __imm) 1754{ 1755 return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A, 1756 (__v2di) __B, 1757 __imm, 1758 (__v4di) __W, 1759 (__mmask8) 1760 __U); 1761} 1762 1763extern __inline __m256i 1764__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1765_mm256_maskz_inserti64x2 (__mmask8 __U, __m256i __A, __m128i __B, 1766 const int __imm) 1767{ 1768 return (__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di) __A, 1769 (__v2di) __B, 1770 __imm, 1771 (__v4di) 1772 _mm256_setzero_si256 (), 1773 (__mmask8) 1774 __U); 1775} 1776 1777extern __inline __m256d 1778__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1779_mm256_insertf64x2 (__m256d __A, __m128d __B, const int __imm) 1780{ 1781 return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A, 1782 (__v2df) __B, 1783 __imm, 1784 (__v4df) 1785 _mm256_setzero_pd (), 1786 (__mmask8) - 1787 1); 1788} 1789 1790extern __inline __m256d 1791__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1792_mm256_mask_insertf64x2 (__m256d __W, __mmask8 __U, __m256d __A, 1793 __m128d __B, const int __imm) 1794{ 1795 return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A, 1796 (__v2df) __B, 1797 __imm, 1798 (__v4df) __W, 1799 (__mmask8) 1800 __U); 1801} 1802 1803extern __inline __m256d 1804__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1805_mm256_maskz_insertf64x2 (__mmask8 __U, __m256d __A, __m128d __B, 1806 const int __imm) 1807{ 1808 return (__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df) __A, 1809 (__v2df) __B, 1810 __imm, 1811 (__v4df) 1812 _mm256_setzero_pd (), 1813 (__mmask8) 1814 __U); 1815} 1816 1817#else 1818#define _mm256_insertf64x2(X, Y, C) \ 1819 ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\ 1820 (__v2df)(__m128d) (Y), (int) (C), \ 1821 (__v4df)(__m256d)_mm256_setzero_pd(), \ 1822 (__mmask8)-1)) 1823 1824#define _mm256_mask_insertf64x2(W, U, X, Y, C) \ 1825 ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\ 1826 (__v2df)(__m128d) (Y), (int) (C), \ 1827 (__v4df)(__m256d)(W), \ 1828 (__mmask8)(U))) 1829 1830#define _mm256_maskz_insertf64x2(U, X, Y, C) \ 1831 ((__m256d) __builtin_ia32_insertf64x2_256_mask ((__v4df)(__m256d) (X),\ 1832 (__v2df)(__m128d) (Y), (int) (C), \ 1833 (__v4df)(__m256d)_mm256_setzero_pd(), \ 1834 (__mmask8)(U))) 1835 1836#define _mm256_inserti64x2(X, Y, C) \ 1837 ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\ 1838 (__v2di)(__m128i) (Y), (int) (C), \ 1839 (__v4di)(__m256i)_mm256_setzero_si256 (), \ 1840 (__mmask8)-1)) 1841 1842#define _mm256_mask_inserti64x2(W, U, X, Y, C) \ 1843 ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\ 1844 (__v2di)(__m128i) (Y), (int) (C), \ 1845 (__v4di)(__m256i)(W), \ 1846 (__mmask8)(U))) 1847 1848#define _mm256_maskz_inserti64x2(U, X, Y, C) \ 1849 ((__m256i) __builtin_ia32_inserti64x2_256_mask ((__v4di)(__m256i) (X),\ 1850 (__v2di)(__m128i) (Y), (int) (C), \ 1851 (__v4di)(__m256i)_mm256_setzero_si256 (), \ 1852 (__mmask8)(U))) 1853 1854#define _mm256_extractf64x2_pd(X, C) \ 1855 ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\ 1856 (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8)-1)) 1857 1858#define _mm256_mask_extractf64x2_pd(W, U, X, C) \ 1859 ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\ 1860 (int) (C), (__v2df)(__m128d) (W), (__mmask8) (U))) 1861 1862#define _mm256_maskz_extractf64x2_pd(U, X, C) \ 1863 ((__m128d) __builtin_ia32_extractf64x2_256_mask ((__v4df)(__m256d) (X),\ 1864 (int) (C), (__v2df)(__m128d) _mm_setzero_pd(), (__mmask8) (U))) 1865 1866#define _mm256_extracti64x2_epi64(X, C) \ 1867 ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\ 1868 (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8)-1)) 1869 1870#define _mm256_mask_extracti64x2_epi64(W, U, X, C) \ 1871 ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\ 1872 (int) (C), (__v2di)(__m128i) (W), (__mmask8) (U))) 1873 1874#define _mm256_maskz_extracti64x2_epi64(U, X, C) \ 1875 ((__m128i) __builtin_ia32_extracti64x2_256_mask ((__v4di)(__m256i) (X),\ 1876 (int) (C), (__v2di)(__m128i) _mm_setzero_di(), (__mmask8) (U))) 1877 1878#define _mm256_reduce_pd(A, B) \ 1879 ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A), \ 1880 (int)(B), (__v4df)_mm256_setzero_pd(), (__mmask8)-1)) 1881 1882#define _mm256_mask_reduce_pd(W, U, A, B) \ 1883 ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A), \ 1884 (int)(B), (__v4df)(__m256d)(W), (__mmask8)(U))) 1885 1886#define _mm256_maskz_reduce_pd(U, A, B) \ 1887 ((__m256d) __builtin_ia32_reducepd256_mask ((__v4df)(__m256d)(A), \ 1888 (int)(B), (__v4df)_mm256_setzero_pd(), (__mmask8)(U))) 1889 1890#define _mm_reduce_pd(A, B) \ 1891 ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A), \ 1892 (int)(B), (__v2df)_mm_setzero_pd(), (__mmask8)-1)) 1893 1894#define _mm_mask_reduce_pd(W, U, A, B) \ 1895 ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A), \ 1896 (int)(B), (__v2df)(__m128d)(W), (__mmask8)(U))) 1897 1898#define _mm_maskz_reduce_pd(U, A, B) \ 1899 ((__m128d) __builtin_ia32_reducepd128_mask ((__v2df)(__m128d)(A), \ 1900 (int)(B), (__v2df)_mm_setzero_pd(), (__mmask8)(U))) 1901 1902#define _mm256_reduce_ps(A, B) \ 1903 ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A), \ 1904 (int)(B), (__v8sf)_mm256_setzero_ps(), (__mmask8)-1)) 1905 1906#define _mm256_mask_reduce_ps(W, U, A, B) \ 1907 ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A), \ 1908 (int)(B), (__v8sf)(__m256)(W), (__mmask8)(U))) 1909 1910#define _mm256_maskz_reduce_ps(U, A, B) \ 1911 ((__m256) __builtin_ia32_reduceps256_mask ((__v8sf)(__m256)(A), \ 1912 (int)(B), (__v8sf)_mm256_setzero_ps(), (__mmask8)(U))) 1913 1914#define _mm_reduce_ps(A, B) \ 1915 ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A), \ 1916 (int)(B), (__v4sf)_mm_setzero_ps(), (__mmask8)-1)) 1917 1918#define _mm_mask_reduce_ps(W, U, A, B) \ 1919 ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A), \ 1920 (int)(B), (__v4sf)(__m128)(W), (__mmask8)(U))) 1921 1922#define _mm_maskz_reduce_ps(U, A, B) \ 1923 ((__m128) __builtin_ia32_reduceps128_mask ((__v4sf)(__m128)(A), \ 1924 (int)(B), (__v4sf)_mm_setzero_ps(), (__mmask8)(U))) 1925 1926#define _mm256_range_pd(A, B, C) \ 1927 ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A), \ 1928 (__v4df)(__m256d)(B), (int)(C), \ 1929 (__v4df)_mm256_setzero_pd(), (__mmask8)-1)) 1930 1931#define _mm256_maskz_range_pd(U, A, B, C) \ 1932 ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A), \ 1933 (__v4df)(__m256d)(B), (int)(C), \ 1934 (__v4df)_mm256_setzero_pd(), (__mmask8)(U))) 1935 1936#define _mm_range_pd(A, B, C) \ 1937 ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A), \ 1938 (__v2df)(__m128d)(B), (int)(C), \ 1939 (__v2df)_mm_setzero_pd(), (__mmask8)-1)) 1940 1941#define _mm256_range_ps(A, B, C) \ 1942 ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A), \ 1943 (__v8sf)(__m256)(B), (int)(C), \ 1944 (__v8sf)_mm256_setzero_ps(), (__mmask8)-1)) 1945 1946#define _mm256_mask_range_ps(W, U, A, B, C) \ 1947 ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A), \ 1948 (__v8sf)(__m256)(B), (int)(C), \ 1949 (__v8sf)(__m256)(W), (__mmask8)(U))) 1950 1951#define _mm256_maskz_range_ps(U, A, B, C) \ 1952 ((__m256) __builtin_ia32_rangeps256_mask ((__v8sf)(__m256)(A), \ 1953 (__v8sf)(__m256)(B), (int)(C), \ 1954 (__v8sf)_mm256_setzero_ps(), (__mmask8)(U))) 1955 1956#define _mm_range_ps(A, B, C) \ 1957 ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A), \ 1958 (__v4sf)(__m128)(B), (int)(C), \ 1959 (__v4sf)_mm_setzero_ps(), (__mmask8)-1)) 1960 1961#define _mm_mask_range_ps(W, U, A, B, C) \ 1962 ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A), \ 1963 (__v4sf)(__m128)(B), (int)(C), \ 1964 (__v4sf)(__m128)(W), (__mmask8)(U))) 1965 1966#define _mm_maskz_range_ps(U, A, B, C) \ 1967 ((__m128) __builtin_ia32_rangeps128_mask ((__v4sf)(__m128)(A), \ 1968 (__v4sf)(__m128)(B), (int)(C), \ 1969 (__v4sf)_mm_setzero_ps(), (__mmask8)(U))) 1970 1971#define _mm256_mask_range_pd(W, U, A, B, C) \ 1972 ((__m256d) __builtin_ia32_rangepd256_mask ((__v4df)(__m256d)(A), \ 1973 (__v4df)(__m256d)(B), (int)(C), \ 1974 (__v4df)(__m256d)(W), (__mmask8)(U))) 1975 1976#define _mm_mask_range_pd(W, U, A, B, C) \ 1977 ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A), \ 1978 (__v2df)(__m128d)(B), (int)(C), \ 1979 (__v2df)(__m128d)(W), (__mmask8)(U))) 1980 1981#define _mm_maskz_range_pd(U, A, B, C) \ 1982 ((__m128d) __builtin_ia32_rangepd128_mask ((__v2df)(__m128d)(A), \ 1983 (__v2df)(__m128d)(B), (int)(C), \ 1984 (__v2df)_mm_setzero_pd(), (__mmask8)(U))) 1985 1986#define _mm256_mask_fpclass_pd_mask(u, X, C) \ 1987 ((__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) (__m256d) (X), \ 1988 (int) (C),(__mmask8)(u))) 1989 1990#define _mm256_mask_fpclass_ps_mask(u, X, C) \ 1991 ((__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) (__m256) (X), \ 1992 (int) (C),(__mmask8)(u))) 1993 1994#define _mm_mask_fpclass_pd_mask(u, X, C) \ 1995 ((__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) (__m128d) (X), \ 1996 (int) (C),(__mmask8)(u))) 1997 1998#define _mm_mask_fpclass_ps_mask(u, X, C) \ 1999 ((__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) (__m128) (X), \ 2000 (int) (C),(__mmask8)(u))) 2001 2002#define _mm256_fpclass_pd_mask(X, C) \ 2003 ((__mmask8) __builtin_ia32_fpclasspd256_mask ((__v4df) (__m256d) (X), \ 2004 (int) (C),(__mmask8)-1)) 2005 2006#define _mm256_fpclass_ps_mask(X, C) \ 2007 ((__mmask8) __builtin_ia32_fpclassps256_mask ((__v8sf) (__m256) (X), \ 2008 (int) (C),(__mmask8)-1)) 2009 2010#define _mm_fpclass_pd_mask(X, C) \ 2011 ((__mmask8) __builtin_ia32_fpclasspd128_mask ((__v2df) (__m128d) (X), \ 2012 (int) (C),(__mmask8)-1)) 2013 2014#define _mm_fpclass_ps_mask(X, C) \ 2015 ((__mmask8) __builtin_ia32_fpclassps128_mask ((__v4sf) (__m128) (X), \ 2016 (int) (C),(__mmask8)-1)) 2017 2018#endif 2019 2020#ifdef __DISABLE_AVX512VLDQ__ 2021#undef __DISABLE_AVX512VLDQ__ 2022#pragma GCC pop_options 2023#endif /* __DISABLE_AVX512VLDQ__ */ 2024 2025#endif /* _AVX512VLDQINTRIN_H_INCLUDED */ 2026