1/* Copyright (C) 2019-2022 Free Software Foundation, Inc. 2 3 This file is part of GCC. 4 5 GCC is free software; you can redistribute it and/or modify 6 it under the terms of the GNU General Public License as published by 7 the Free Software Foundation; either version 3, or (at your option) 8 any later version. 9 10 GCC is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 GNU General Public License for more details. 14 15 Under Section 7 of GPL version 3, you are granted additional 16 permissions described in the GCC Runtime Library Exception, version 17 3.1, as published by the Free Software Foundation. 18 19 You should have received a copy of the GNU General Public License and 20 a copy of the GCC Runtime Library Exception along with this program; 21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 <http://www.gnu.org/licenses/>. */ 23 24#ifndef _IMMINTRIN_H_INCLUDED 25#error "Never use <avx512fp16vlintrin.h> directly; include <immintrin.h> instead." 26#endif 27 28#ifndef __AVX512FP16VLINTRIN_H_INCLUDED 29#define __AVX512FP16VLINTRIN_H_INCLUDED 30 31#if !defined(__AVX512VL__) || !defined(__AVX512FP16__) 32#pragma GCC push_options 33#pragma GCC target("avx512fp16,avx512vl") 34#define __DISABLE_AVX512FP16VL__ 35#endif /* __AVX512FP16VL__ */ 36 37extern __inline __m128 38__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 39_mm_castph_ps (__m128h __a) 40{ 41 return (__m128) __a; 42} 43 44extern __inline __m256 45__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 46_mm256_castph_ps (__m256h __a) 47{ 48 return (__m256) __a; 49} 50 51extern __inline __m128d 52__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 53_mm_castph_pd (__m128h __a) 54{ 55 return (__m128d) __a; 56} 57 58extern __inline __m256d 59__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 60_mm256_castph_pd (__m256h __a) 61{ 62 return (__m256d) __a; 63} 64 65extern __inline __m128i 66__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 67_mm_castph_si128 (__m128h __a) 68{ 69 return (__m128i) __a; 70} 71 72extern __inline __m256i 73__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 74_mm256_castph_si256 (__m256h __a) 75{ 76 return (__m256i) __a; 77} 78 79extern __inline __m128h 80__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 81_mm_castps_ph (__m128 __a) 82{ 83 return (__m128h) __a; 84} 85 86extern __inline __m256h 87__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 88_mm256_castps_ph (__m256 __a) 89{ 90 return (__m256h) __a; 91} 92 93extern __inline __m128h 94__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 95_mm_castpd_ph (__m128d __a) 96{ 97 return (__m128h) __a; 98} 99 100extern __inline __m256h 101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 102_mm256_castpd_ph (__m256d __a) 103{ 104 return (__m256h) __a; 105} 106 107extern __inline __m128h 108__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 109_mm_castsi128_ph (__m128i __a) 110{ 111 return (__m128h) __a; 112} 113 114extern __inline __m256h 115__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 116_mm256_castsi256_ph (__m256i __a) 117{ 118 return (__m256h) __a; 119} 120 121extern __inline __m128h 122__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 123_mm256_castph256_ph128 (__m256h __A) 124{ 125 union 126 { 127 __m128h __a[2]; 128 __m256h __v; 129 } __u = { .__v = __A }; 130 return __u.__a[0]; 131} 132 133extern __inline __m256h 134__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 135_mm256_castph128_ph256 (__m128h __A) 136{ 137 union 138 { 139 __m128h __a[2]; 140 __m256h __v; 141 } __u; 142 __u.__a[0] = __A; 143 return __u.__v; 144} 145 146extern __inline __m256h 147__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 148_mm256_zextph128_ph256 (__m128h __A) 149{ 150 return (__m256h) _mm256_insertf128_ps (_mm256_setzero_ps (), 151 (__m128) __A, 0); 152} 153 154extern __inline __m256h 155__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 156_mm256_conj_pch (__m256h __A) 157{ 158 return (__m256h) _mm256_xor_epi32 ((__m256i) __A, _mm256_set1_epi32 (1<<31)); 159} 160 161extern __inline __m256h 162__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 163_mm256_mask_conj_pch (__m256h __W, __mmask8 __U, __m256h __A) 164{ 165 return (__m256h) __builtin_ia32_movaps256_mask ((__v8sf) 166 _mm256_conj_pch (__A), 167 (__v8sf) __W, 168 (__mmask8) __U); 169} 170 171extern __inline __m256h 172__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 173_mm256_maskz_conj_pch (__mmask8 __U, __m256h __A) 174{ 175 return (__m256h) __builtin_ia32_movaps256_mask ((__v8sf) 176 _mm256_conj_pch (__A), 177 (__v8sf) 178 _mm256_setzero_ps (), 179 (__mmask8) __U); 180} 181 182extern __inline __m128h 183__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 184_mm_conj_pch (__m128h __A) 185{ 186 return (__m128h) _mm_xor_epi32 ((__m128i) __A, _mm_set1_epi32 (1<<31)); 187} 188 189extern __inline __m128h 190__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 191_mm_mask_conj_pch (__m128h __W, __mmask8 __U, __m128h __A) 192{ 193 return (__m128h) __builtin_ia32_movaps128_mask ((__v4sf) _mm_conj_pch (__A), 194 (__v4sf) __W, 195 (__mmask8) __U); 196} 197 198extern __inline __m128h 199__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 200_mm_maskz_conj_pch (__mmask8 __U, __m128h __A) 201{ 202 return (__m128h) __builtin_ia32_movaps128_mask ((__v4sf) _mm_conj_pch (__A), 203 (__v4sf) _mm_setzero_ps (), 204 (__mmask8) __U); 205} 206 207/* Intrinsics v[add,sub,mul,div]ph. */ 208extern __inline __m128h 209__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 210_mm_add_ph (__m128h __A, __m128h __B) 211{ 212 return (__m128h) ((__v8hf) __A + (__v8hf) __B); 213} 214 215extern __inline __m256h 216__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 217_mm256_add_ph (__m256h __A, __m256h __B) 218{ 219 return (__m256h) ((__v16hf) __A + (__v16hf) __B); 220} 221 222extern __inline __m128h 223__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 224_mm_mask_add_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) 225{ 226 return __builtin_ia32_addph128_mask (__C, __D, __A, __B); 227} 228 229extern __inline __m256h 230__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 231_mm256_mask_add_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D) 232{ 233 return __builtin_ia32_addph256_mask (__C, __D, __A, __B); 234} 235 236extern __inline __m128h 237__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 238_mm_maskz_add_ph (__mmask8 __A, __m128h __B, __m128h __C) 239{ 240 return __builtin_ia32_addph128_mask (__B, __C, _mm_setzero_ph (), 241 __A); 242} 243 244extern __inline __m256h 245__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 246_mm256_maskz_add_ph (__mmask16 __A, __m256h __B, __m256h __C) 247{ 248 return __builtin_ia32_addph256_mask (__B, __C, 249 _mm256_setzero_ph (), __A); 250} 251 252extern __inline __m128h 253__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 254_mm_sub_ph (__m128h __A, __m128h __B) 255{ 256 return (__m128h) ((__v8hf) __A - (__v8hf) __B); 257} 258 259extern __inline __m256h 260__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 261_mm256_sub_ph (__m256h __A, __m256h __B) 262{ 263 return (__m256h) ((__v16hf) __A - (__v16hf) __B); 264} 265 266extern __inline __m128h 267__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 268_mm_mask_sub_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) 269{ 270 return __builtin_ia32_subph128_mask (__C, __D, __A, __B); 271} 272 273extern __inline __m256h 274__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 275_mm256_mask_sub_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D) 276{ 277 return __builtin_ia32_subph256_mask (__C, __D, __A, __B); 278} 279 280extern __inline __m128h 281__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 282_mm_maskz_sub_ph (__mmask8 __A, __m128h __B, __m128h __C) 283{ 284 return __builtin_ia32_subph128_mask (__B, __C, _mm_setzero_ph (), 285 __A); 286} 287 288extern __inline __m256h 289__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 290_mm256_maskz_sub_ph (__mmask16 __A, __m256h __B, __m256h __C) 291{ 292 return __builtin_ia32_subph256_mask (__B, __C, 293 _mm256_setzero_ph (), __A); 294} 295 296extern __inline __m128h 297__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 298_mm_mul_ph (__m128h __A, __m128h __B) 299{ 300 return (__m128h) ((__v8hf) __A * (__v8hf) __B); 301} 302 303extern __inline __m256h 304__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 305_mm256_mul_ph (__m256h __A, __m256h __B) 306{ 307 return (__m256h) ((__v16hf) __A * (__v16hf) __B); 308} 309 310extern __inline __m128h 311__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 312_mm_mask_mul_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) 313{ 314 return __builtin_ia32_mulph128_mask (__C, __D, __A, __B); 315} 316 317extern __inline __m256h 318__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 319_mm256_mask_mul_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D) 320{ 321 return __builtin_ia32_mulph256_mask (__C, __D, __A, __B); 322} 323 324extern __inline __m128h 325__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 326_mm_maskz_mul_ph (__mmask8 __A, __m128h __B, __m128h __C) 327{ 328 return __builtin_ia32_mulph128_mask (__B, __C, _mm_setzero_ph (), 329 __A); 330} 331 332extern __inline __m256h 333__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 334_mm256_maskz_mul_ph (__mmask16 __A, __m256h __B, __m256h __C) 335{ 336 return __builtin_ia32_mulph256_mask (__B, __C, 337 _mm256_setzero_ph (), __A); 338} 339 340extern __inline __m128h 341__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 342_mm_div_ph (__m128h __A, __m128h __B) 343{ 344 return (__m128h) ((__v8hf) __A / (__v8hf) __B); 345} 346 347extern __inline __m256h 348__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 349_mm256_div_ph (__m256h __A, __m256h __B) 350{ 351 return (__m256h) ((__v16hf) __A / (__v16hf) __B); 352} 353 354extern __inline __m128h 355__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 356_mm_mask_div_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) 357{ 358 return __builtin_ia32_divph128_mask (__C, __D, __A, __B); 359} 360 361extern __inline __m256h 362__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 363_mm256_mask_div_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D) 364{ 365 return __builtin_ia32_divph256_mask (__C, __D, __A, __B); 366} 367 368extern __inline __m128h 369__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 370_mm_maskz_div_ph (__mmask8 __A, __m128h __B, __m128h __C) 371{ 372 return __builtin_ia32_divph128_mask (__B, __C, _mm_setzero_ph (), 373 __A); 374} 375 376extern __inline __m256h 377__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 378_mm256_maskz_div_ph (__mmask16 __A, __m256h __B, __m256h __C) 379{ 380 return __builtin_ia32_divph256_mask (__B, __C, 381 _mm256_setzero_ph (), __A); 382} 383 384/* Intrinsics v[max,min]ph. */ 385extern __inline __m128h 386__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 387_mm_max_ph (__m128h __A, __m128h __B) 388{ 389 return __builtin_ia32_maxph128_mask (__A, __B, 390 _mm_setzero_ph (), 391 (__mmask8) -1); 392} 393 394extern __inline __m256h 395__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 396_mm256_max_ph (__m256h __A, __m256h __B) 397{ 398 return __builtin_ia32_maxph256_mask (__A, __B, 399 _mm256_setzero_ph (), 400 (__mmask16) -1); 401} 402 403extern __inline __m128h 404__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 405_mm_mask_max_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) 406{ 407 return __builtin_ia32_maxph128_mask (__C, __D, __A, __B); 408} 409 410extern __inline __m256h 411__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 412_mm256_mask_max_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D) 413{ 414 return __builtin_ia32_maxph256_mask (__C, __D, __A, __B); 415} 416 417extern __inline __m128h 418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 419_mm_maskz_max_ph (__mmask8 __A, __m128h __B, __m128h __C) 420{ 421 return __builtin_ia32_maxph128_mask (__B, __C, _mm_setzero_ph (), 422 __A); 423} 424 425extern __inline __m256h 426__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 427_mm256_maskz_max_ph (__mmask16 __A, __m256h __B, __m256h __C) 428{ 429 return __builtin_ia32_maxph256_mask (__B, __C, 430 _mm256_setzero_ph (), __A); 431} 432 433extern __inline __m128h 434__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 435_mm_min_ph (__m128h __A, __m128h __B) 436{ 437 return __builtin_ia32_minph128_mask (__A, __B, 438 _mm_setzero_ph (), 439 (__mmask8) -1); 440} 441 442extern __inline __m256h 443__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 444_mm256_min_ph (__m256h __A, __m256h __B) 445{ 446 return __builtin_ia32_minph256_mask (__A, __B, 447 _mm256_setzero_ph (), 448 (__mmask16) -1); 449} 450 451extern __inline __m128h 452__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 453_mm_mask_min_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) 454{ 455 return __builtin_ia32_minph128_mask (__C, __D, __A, __B); 456} 457 458extern __inline __m256h 459__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 460_mm256_mask_min_ph (__m256h __A, __mmask16 __B, __m256h __C, __m256h __D) 461{ 462 return __builtin_ia32_minph256_mask (__C, __D, __A, __B); 463} 464 465extern __inline __m128h 466__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 467_mm_maskz_min_ph (__mmask8 __A, __m128h __B, __m128h __C) 468{ 469 return __builtin_ia32_minph128_mask (__B, __C, _mm_setzero_ph (), 470 __A); 471} 472 473extern __inline __m256h 474__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 475_mm256_maskz_min_ph (__mmask16 __A, __m256h __B, __m256h __C) 476{ 477 return __builtin_ia32_minph256_mask (__B, __C, 478 _mm256_setzero_ph (), __A); 479} 480 481extern __inline __m128h 482__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 483_mm_abs_ph (__m128h __A) 484{ 485 return (__m128h) _mm_and_si128 ( _mm_set1_epi32 (0x7FFF7FFF), 486 (__m128i) __A); 487} 488 489extern __inline __m256h 490__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 491_mm256_abs_ph (__m256h __A) 492{ 493 return (__m256h) _mm256_and_si256 ( _mm256_set1_epi32 (0x7FFF7FFF), 494 (__m256i) __A); 495} 496 497/* vcmpph */ 498#ifdef __OPTIMIZE 499extern __inline __mmask8 500__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 501_mm_cmp_ph_mask (__m128h __A, __m128h __B, const int __C) 502{ 503 return (__mmask8) __builtin_ia32_cmpph128_mask (__A, __B, __C, 504 (__mmask8) -1); 505} 506 507extern __inline __mmask8 508__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 509_mm_mask_cmp_ph_mask (__mmask8 __A, __m128h __B, __m128h __C, 510 const int __D) 511{ 512 return (__mmask8) __builtin_ia32_cmpph128_mask (__B, __C, __D, __A); 513} 514 515extern __inline __mmask16 516__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 517_mm_cmp_ph_mask (__m256h __A, __m256h __B, const int __C) 518{ 519 return (__mmask16) __builtin_ia32_cmpph256_mask (__A, __B, __C, 520 (__mmask16) -1); 521} 522 523extern __inline __mmask16 524__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 525_mm_mask_cmp_ph_mask (__mmask16 __A, __m256h __B, __m256h __C, 526 const int __D) 527{ 528 return (__mmask16) __builtin_ia32_cmpph256_mask (__B, __C, __D, 529 __A); 530} 531 532#else 533#define _mm_cmp_ph_mask(A, B, C) \ 534 (__builtin_ia32_cmpph128_mask ((A), (B), (C), (-1))) 535 536#define _mm_mask_cmp_ph_mask(A, B, C, D) \ 537 (__builtin_ia32_cmpph128_mask ((B), (C), (D), (A))) 538 539#define _mm256_cmp_ph_mask(A, B, C) \ 540 (__builtin_ia32_cmpph256_mask ((A), (B), (C), (-1))) 541 542#define _mm256_mask_cmp_ph_mask(A, B, C, D) \ 543 (__builtin_ia32_cmpph256_mask ((B), (C), (D), (A))) 544 545#endif /* __OPTIMIZE__ */ 546 547/* Intrinsics vsqrtph. */ 548extern __inline __m128h 549__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 550_mm_sqrt_ph (__m128h __A) 551{ 552 return __builtin_ia32_sqrtph128_mask (__A, _mm_setzero_ph (), 553 (__mmask8) -1); 554} 555 556extern __inline __m256h 557__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 558_mm256_sqrt_ph (__m256h __A) 559{ 560 return __builtin_ia32_sqrtph256_mask (__A, _mm256_setzero_ph (), 561 (__mmask16) -1); 562} 563 564extern __inline __m128h 565__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 566_mm_mask_sqrt_ph (__m128h __A, __mmask8 __B, __m128h __C) 567{ 568 return __builtin_ia32_sqrtph128_mask (__C, __A, __B); 569} 570 571extern __inline __m256h 572__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 573_mm256_mask_sqrt_ph (__m256h __A, __mmask16 __B, __m256h __C) 574{ 575 return __builtin_ia32_sqrtph256_mask (__C, __A, __B); 576} 577 578extern __inline __m128h 579__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 580_mm_maskz_sqrt_ph (__mmask8 __A, __m128h __B) 581{ 582 return __builtin_ia32_sqrtph128_mask (__B, _mm_setzero_ph (), 583 __A); 584} 585 586extern __inline __m256h 587__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 588_mm256_maskz_sqrt_ph (__mmask16 __A, __m256h __B) 589{ 590 return __builtin_ia32_sqrtph256_mask (__B, _mm256_setzero_ph (), 591 __A); 592} 593 594/* Intrinsics vrsqrtph. */ 595extern __inline __m128h 596__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 597_mm_rsqrt_ph (__m128h __A) 598{ 599 return __builtin_ia32_rsqrtph128_mask (__A, _mm_setzero_ph (), 600 (__mmask8) -1); 601} 602 603extern __inline __m256h 604__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 605_mm256_rsqrt_ph (__m256h __A) 606{ 607 return __builtin_ia32_rsqrtph256_mask (__A, _mm256_setzero_ph (), 608 (__mmask16) -1); 609} 610 611extern __inline __m128h 612__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 613_mm_mask_rsqrt_ph (__m128h __A, __mmask8 __B, __m128h __C) 614{ 615 return __builtin_ia32_rsqrtph128_mask (__C, __A, __B); 616} 617 618extern __inline __m256h 619__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 620_mm256_mask_rsqrt_ph (__m256h __A, __mmask16 __B, __m256h __C) 621{ 622 return __builtin_ia32_rsqrtph256_mask (__C, __A, __B); 623} 624 625extern __inline __m128h 626__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 627_mm_maskz_rsqrt_ph (__mmask8 __A, __m128h __B) 628{ 629 return __builtin_ia32_rsqrtph128_mask (__B, _mm_setzero_ph (), __A); 630} 631 632extern __inline __m256h 633__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 634_mm256_maskz_rsqrt_ph (__mmask16 __A, __m256h __B) 635{ 636 return __builtin_ia32_rsqrtph256_mask (__B, _mm256_setzero_ph (), 637 __A); 638} 639 640/* Intrinsics vrcpph. */ 641extern __inline __m128h 642__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 643_mm_rcp_ph (__m128h __A) 644{ 645 return __builtin_ia32_rcpph128_mask (__A, _mm_setzero_ph (), 646 (__mmask8) -1); 647} 648 649extern __inline __m256h 650__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 651_mm256_rcp_ph (__m256h __A) 652{ 653 return __builtin_ia32_rcpph256_mask (__A, _mm256_setzero_ph (), 654 (__mmask16) -1); 655} 656 657extern __inline __m128h 658__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 659_mm_mask_rcp_ph (__m128h __A, __mmask8 __B, __m128h __C) 660{ 661 return __builtin_ia32_rcpph128_mask (__C, __A, __B); 662} 663 664extern __inline __m256h 665__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 666_mm256_mask_rcp_ph (__m256h __A, __mmask16 __B, __m256h __C) 667{ 668 return __builtin_ia32_rcpph256_mask (__C, __A, __B); 669} 670 671extern __inline __m128h 672__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 673_mm_maskz_rcp_ph (__mmask8 __A, __m128h __B) 674{ 675 return __builtin_ia32_rcpph128_mask (__B, _mm_setzero_ph (), __A); 676} 677 678extern __inline __m256h 679__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 680_mm256_maskz_rcp_ph (__mmask16 __A, __m256h __B) 681{ 682 return __builtin_ia32_rcpph256_mask (__B, _mm256_setzero_ph (), 683 __A); 684} 685 686/* Intrinsics vscalefph. */ 687extern __inline __m128h 688__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 689_mm_scalef_ph (__m128h __A, __m128h __B) 690{ 691 return __builtin_ia32_scalefph128_mask (__A, __B, 692 _mm_setzero_ph (), 693 (__mmask8) -1); 694} 695 696extern __inline __m256h 697__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 698_mm256_scalef_ph (__m256h __A, __m256h __B) 699{ 700 return __builtin_ia32_scalefph256_mask (__A, __B, 701 _mm256_setzero_ph (), 702 (__mmask16) -1); 703} 704 705extern __inline __m128h 706__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 707_mm_mask_scalef_ph (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) 708{ 709 return __builtin_ia32_scalefph128_mask (__C, __D, __A, __B); 710} 711 712extern __inline __m256h 713__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 714_mm256_mask_scalef_ph (__m256h __A, __mmask16 __B, __m256h __C, 715 __m256h __D) 716{ 717 return __builtin_ia32_scalefph256_mask (__C, __D, __A, __B); 718} 719 720extern __inline __m128h 721__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 722_mm_maskz_scalef_ph (__mmask8 __A, __m128h __B, __m128h __C) 723{ 724 return __builtin_ia32_scalefph128_mask (__B, __C, 725 _mm_setzero_ph (), __A); 726} 727 728extern __inline __m256h 729__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 730_mm256_maskz_scalef_ph (__mmask16 __A, __m256h __B, __m256h __C) 731{ 732 return __builtin_ia32_scalefph256_mask (__B, __C, 733 _mm256_setzero_ph (), 734 __A); 735} 736 737/* Intrinsics vreduceph. */ 738#ifdef __OPTIMIZE__ 739extern __inline __m128h 740__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 741_mm_reduce_ph (__m128h __A, int __B) 742{ 743 return __builtin_ia32_reduceph128_mask (__A, __B, 744 _mm_setzero_ph (), 745 (__mmask8) -1); 746} 747 748extern __inline __m128h 749__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 750_mm_mask_reduce_ph (__m128h __A, __mmask8 __B, __m128h __C, int __D) 751{ 752 return __builtin_ia32_reduceph128_mask (__C, __D, __A, __B); 753} 754 755extern __inline __m128h 756__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 757_mm_maskz_reduce_ph (__mmask8 __A, __m128h __B, int __C) 758{ 759 return __builtin_ia32_reduceph128_mask (__B, __C, 760 _mm_setzero_ph (), __A); 761} 762 763extern __inline __m256h 764__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 765_mm256_reduce_ph (__m256h __A, int __B) 766{ 767 return __builtin_ia32_reduceph256_mask (__A, __B, 768 _mm256_setzero_ph (), 769 (__mmask16) -1); 770} 771 772extern __inline __m256h 773__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 774_mm256_mask_reduce_ph (__m256h __A, __mmask16 __B, __m256h __C, int __D) 775{ 776 return __builtin_ia32_reduceph256_mask (__C, __D, __A, __B); 777} 778 779extern __inline __m256h 780__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 781_mm256_maskz_reduce_ph (__mmask16 __A, __m256h __B, int __C) 782{ 783 return __builtin_ia32_reduceph256_mask (__B, __C, 784 _mm256_setzero_ph (), 785 __A); 786} 787 788#else 789#define _mm_reduce_ph(A, B) \ 790 (__builtin_ia32_reduceph128_mask ((A), (B), \ 791 _mm_setzero_ph (), \ 792 ((__mmask8)-1))) 793 794#define _mm_mask_reduce_ph(A, B, C, D) \ 795 (__builtin_ia32_reduceph128_mask ((C), (D), (A), (B))) 796 797#define _mm_maskz_reduce_ph(A, B, C) \ 798 (__builtin_ia32_reduceph128_mask ((B), (C), _mm_setzero_ph (), (A))) 799 800#define _mm256_reduce_ph(A, B) \ 801 (__builtin_ia32_reduceph256_mask ((A), (B), \ 802 _mm256_setzero_ph (), \ 803 ((__mmask16)-1))) 804 805#define _mm256_mask_reduce_ph(A, B, C, D) \ 806 (__builtin_ia32_reduceph256_mask ((C), (D), (A), (B))) 807 808#define _mm256_maskz_reduce_ph(A, B, C) \ 809 (__builtin_ia32_reduceph256_mask ((B), (C), _mm256_setzero_ph (), (A))) 810 811#endif /* __OPTIMIZE__ */ 812 813/* Intrinsics vrndscaleph. */ 814#ifdef __OPTIMIZE__ 815 extern __inline __m128h 816 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 817 _mm_roundscale_ph (__m128h __A, int __B) 818 { 819 return __builtin_ia32_rndscaleph128_mask (__A, __B, 820 _mm_setzero_ph (), 821 (__mmask8) -1); 822 } 823 824extern __inline __m128h 825__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 826_mm_mask_roundscale_ph (__m128h __A, __mmask8 __B, __m128h __C, int __D) 827{ 828 return __builtin_ia32_rndscaleph128_mask (__C, __D, __A, __B); 829} 830 831extern __inline __m128h 832__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 833_mm_maskz_roundscale_ph (__mmask8 __A, __m128h __B, int __C) 834{ 835 return __builtin_ia32_rndscaleph128_mask (__B, __C, 836 _mm_setzero_ph (), __A); 837} 838 839extern __inline __m256h 840__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 841_mm256_roundscale_ph (__m256h __A, int __B) 842{ 843 return __builtin_ia32_rndscaleph256_mask (__A, __B, 844 _mm256_setzero_ph (), 845 (__mmask16) -1); 846} 847 848extern __inline __m256h 849__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 850_mm256_mask_roundscale_ph (__m256h __A, __mmask16 __B, __m256h __C, 851 int __D) 852{ 853 return __builtin_ia32_rndscaleph256_mask (__C, __D, __A, __B); 854} 855 856extern __inline __m256h 857__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 858_mm256_maskz_roundscale_ph (__mmask16 __A, __m256h __B, int __C) 859{ 860 return __builtin_ia32_rndscaleph256_mask (__B, __C, 861 _mm256_setzero_ph (), 862 __A); 863} 864 865#else 866#define _mm_roundscale_ph(A, B) \ 867 (__builtin_ia32_rndscaleph128_mask ((A), (B), _mm_setzero_ph (), \ 868 ((__mmask8)-1))) 869 870#define _mm_mask_roundscale_ph(A, B, C, D) \ 871 (__builtin_ia32_rndscaleph128_mask ((C), (D), (A), (B))) 872 873#define _mm_maskz_roundscale_ph(A, B, C) \ 874 (__builtin_ia32_rndscaleph128_mask ((B), (C), _mm_setzero_ph (), (A))) 875 876#define _mm256_roundscale_ph(A, B) \ 877 (__builtin_ia32_rndscaleph256_mask ((A), (B), \ 878 _mm256_setzero_ph(), \ 879 ((__mmask16)-1))) 880 881#define _mm256_mask_roundscale_ph(A, B, C, D) \ 882 (__builtin_ia32_rndscaleph256_mask ((C), (D), (A), (B))) 883 884#define _mm256_maskz_roundscale_ph(A, B, C) \ 885 (__builtin_ia32_rndscaleph256_mask ((B), (C), \ 886 _mm256_setzero_ph (), (A))) 887 888#endif /* __OPTIMIZE__ */ 889 890/* Intrinsics vfpclassph. */ 891#ifdef __OPTIMIZE__ 892extern __inline __mmask8 893__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 894 _mm_mask_fpclass_ph_mask (__mmask8 __U, __m128h __A, const int __imm) 895{ 896 return (__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) __A, 897 __imm, __U); 898} 899 900extern __inline __mmask8 901__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 902_mm_fpclass_ph_mask (__m128h __A, const int __imm) 903{ 904 return (__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) __A, 905 __imm, 906 (__mmask8) -1); 907} 908 909extern __inline __mmask16 910__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 911_mm256_mask_fpclass_ph_mask (__mmask16 __U, __m256h __A, const int __imm) 912{ 913 return (__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) __A, 914 __imm, __U); 915} 916 917extern __inline __mmask16 918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 919_mm256_fpclass_ph_mask (__m256h __A, const int __imm) 920{ 921 return (__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) __A, 922 __imm, 923 (__mmask16) -1); 924} 925 926#else 927#define _mm_fpclass_ph_mask(X, C) \ 928 ((__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) (__m128h) (X), \ 929 (int) (C),(__mmask8)-1)) 930 931#define _mm_mask_fpclass_ph_mask(u, X, C) \ 932 ((__mmask8) __builtin_ia32_fpclassph128_mask ((__v8hf) (__m128h) (X), \ 933 (int) (C),(__mmask8)(u))) 934 935#define _mm256_fpclass_ph_mask(X, C) \ 936 ((__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) (__m256h) (X), \ 937 (int) (C),(__mmask16)-1)) 938 939#define _mm256_mask_fpclass_ph_mask(u, X, C) \ 940 ((__mmask16) __builtin_ia32_fpclassph256_mask ((__v16hf) (__m256h) (X), \ 941 (int) (C),(__mmask16)(u))) 942#endif /* __OPTIMIZE__ */ 943 944/* Intrinsics vgetexpph, vgetexpsh. */ 945extern __inline __m256h 946__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 947_mm256_getexp_ph (__m256h __A) 948{ 949 return (__m256h) __builtin_ia32_getexpph256_mask ((__v16hf) __A, 950 (__v16hf) 951 _mm256_setzero_ph (), 952 (__mmask16) -1); 953} 954 955extern __inline __m256h 956__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 957_mm256_mask_getexp_ph (__m256h __W, __mmask16 __U, __m256h __A) 958{ 959 return (__m256h) __builtin_ia32_getexpph256_mask ((__v16hf) __A, 960 (__v16hf) __W, 961 (__mmask16) __U); 962} 963 964extern __inline __m256h 965__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 966_mm256_maskz_getexp_ph (__mmask16 __U, __m256h __A) 967{ 968 return (__m256h) __builtin_ia32_getexpph256_mask ((__v16hf) __A, 969 (__v16hf) 970 _mm256_setzero_ph (), 971 (__mmask16) __U); 972} 973 974extern __inline __m128h 975__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 976_mm_getexp_ph (__m128h __A) 977{ 978 return (__m128h) __builtin_ia32_getexpph128_mask ((__v8hf) __A, 979 (__v8hf) 980 _mm_setzero_ph (), 981 (__mmask8) -1); 982} 983 984extern __inline __m128h 985__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 986_mm_mask_getexp_ph (__m128h __W, __mmask8 __U, __m128h __A) 987{ 988 return (__m128h) __builtin_ia32_getexpph128_mask ((__v8hf) __A, 989 (__v8hf) __W, 990 (__mmask8) __U); 991} 992 993extern __inline __m128h 994__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 995_mm_maskz_getexp_ph (__mmask8 __U, __m128h __A) 996{ 997 return (__m128h) __builtin_ia32_getexpph128_mask ((__v8hf) __A, 998 (__v8hf) 999 _mm_setzero_ph (), 1000 (__mmask8) __U); 1001} 1002 1003 1004/* Intrinsics vgetmantph, vgetmantsh. */ 1005#ifdef __OPTIMIZE__ 1006extern __inline __m256h 1007__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1008_mm256_getmant_ph (__m256h __A, _MM_MANTISSA_NORM_ENUM __B, 1009 _MM_MANTISSA_SIGN_ENUM __C) 1010{ 1011 return (__m256h) __builtin_ia32_getmantph256_mask ((__v16hf) __A, 1012 (__C << 2) | __B, 1013 (__v16hf) 1014 _mm256_setzero_ph (), 1015 (__mmask16) -1); 1016} 1017 1018extern __inline __m256h 1019__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1020_mm256_mask_getmant_ph (__m256h __W, __mmask16 __U, __m256h __A, 1021 _MM_MANTISSA_NORM_ENUM __B, 1022 _MM_MANTISSA_SIGN_ENUM __C) 1023{ 1024 return (__m256h) __builtin_ia32_getmantph256_mask ((__v16hf) __A, 1025 (__C << 2) | __B, 1026 (__v16hf) __W, 1027 (__mmask16) __U); 1028} 1029 1030extern __inline __m256h 1031__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1032_mm256_maskz_getmant_ph (__mmask16 __U, __m256h __A, 1033 _MM_MANTISSA_NORM_ENUM __B, 1034 _MM_MANTISSA_SIGN_ENUM __C) 1035{ 1036 return (__m256h) __builtin_ia32_getmantph256_mask ((__v16hf) __A, 1037 (__C << 2) | __B, 1038 (__v16hf) 1039 _mm256_setzero_ph (), 1040 (__mmask16) __U); 1041} 1042 1043extern __inline __m128h 1044__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1045_mm_getmant_ph (__m128h __A, _MM_MANTISSA_NORM_ENUM __B, 1046 _MM_MANTISSA_SIGN_ENUM __C) 1047{ 1048 return (__m128h) __builtin_ia32_getmantph128_mask ((__v8hf) __A, 1049 (__C << 2) | __B, 1050 (__v8hf) 1051 _mm_setzero_ph (), 1052 (__mmask8) -1); 1053} 1054 1055extern __inline __m128h 1056__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1057_mm_mask_getmant_ph (__m128h __W, __mmask8 __U, __m128h __A, 1058 _MM_MANTISSA_NORM_ENUM __B, 1059 _MM_MANTISSA_SIGN_ENUM __C) 1060{ 1061 return (__m128h) __builtin_ia32_getmantph128_mask ((__v8hf) __A, 1062 (__C << 2) | __B, 1063 (__v8hf) __W, 1064 (__mmask8) __U); 1065} 1066 1067extern __inline __m128h 1068__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1069_mm_maskz_getmant_ph (__mmask8 __U, __m128h __A, 1070 _MM_MANTISSA_NORM_ENUM __B, 1071 _MM_MANTISSA_SIGN_ENUM __C) 1072{ 1073 return (__m128h) __builtin_ia32_getmantph128_mask ((__v8hf) __A, 1074 (__C << 2) | __B, 1075 (__v8hf) 1076 _mm_setzero_ph (), 1077 (__mmask8) __U); 1078} 1079 1080#else 1081#define _mm256_getmant_ph(X, B, C) \ 1082 ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X), \ 1083 (int)(((C)<<2) | (B)), \ 1084 (__v16hf)(__m256h)_mm256_setzero_ph (), \ 1085 (__mmask16)-1)) 1086 1087#define _mm256_mask_getmant_ph(W, U, X, B, C) \ 1088 ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X), \ 1089 (int)(((C)<<2) | (B)), \ 1090 (__v16hf)(__m256h)(W), \ 1091 (__mmask16)(U))) 1092 1093#define _mm256_maskz_getmant_ph(U, X, B, C) \ 1094 ((__m256h) __builtin_ia32_getmantph256_mask ((__v16hf)(__m256h) (X), \ 1095 (int)(((C)<<2) | (B)), \ 1096 (__v16hf)(__m256h)_mm256_setzero_ph (), \ 1097 (__mmask16)(U))) 1098 1099#define _mm_getmant_ph(X, B, C) \ 1100 ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X), \ 1101 (int)(((C)<<2) | (B)), \ 1102 (__v8hf)(__m128h)_mm_setzero_ph (), \ 1103 (__mmask8)-1)) 1104 1105#define _mm_mask_getmant_ph(W, U, X, B, C) \ 1106 ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X), \ 1107 (int)(((C)<<2) | (B)), \ 1108 (__v8hf)(__m128h)(W), \ 1109 (__mmask8)(U))) 1110 1111#define _mm_maskz_getmant_ph(U, X, B, C) \ 1112 ((__m128h) __builtin_ia32_getmantph128_mask ((__v8hf)(__m128h) (X), \ 1113 (int)(((C)<<2) | (B)), \ 1114 (__v8hf)(__m128h)_mm_setzero_ph (), \ 1115 (__mmask8)(U))) 1116 1117#endif /* __OPTIMIZE__ */ 1118 1119/* Intrinsics vcvtph2dq. */ 1120extern __inline __m128i 1121__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1122_mm_cvtph_epi32 (__m128h __A) 1123{ 1124 return (__m128i) 1125 __builtin_ia32_vcvtph2dq128_mask (__A, 1126 (__v4si) 1127 _mm_setzero_si128 (), 1128 (__mmask8) -1); 1129} 1130 1131extern __inline __m128i 1132__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1133_mm_mask_cvtph_epi32 (__m128i __A, __mmask8 __B, __m128h __C) 1134{ 1135 return (__m128i) 1136 __builtin_ia32_vcvtph2dq128_mask (__C, ( __v4si) __A, __B); 1137} 1138 1139extern __inline __m128i 1140__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1141_mm_maskz_cvtph_epi32 (__mmask8 __A, __m128h __B) 1142{ 1143 return (__m128i) 1144 __builtin_ia32_vcvtph2dq128_mask (__B, 1145 (__v4si) _mm_setzero_si128 (), 1146 __A); 1147} 1148 1149extern __inline __m256i 1150__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1151_mm256_cvtph_epi32 (__m128h __A) 1152{ 1153 return (__m256i) 1154 __builtin_ia32_vcvtph2dq256_mask (__A, 1155 (__v8si) 1156 _mm256_setzero_si256 (), 1157 (__mmask8) -1); 1158} 1159 1160extern __inline __m256i 1161__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1162_mm256_mask_cvtph_epi32 (__m256i __A, __mmask8 __B, __m128h __C) 1163{ 1164 return (__m256i) 1165 __builtin_ia32_vcvtph2dq256_mask (__C, ( __v8si) __A, __B); 1166} 1167 1168extern __inline __m256i 1169__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1170_mm256_maskz_cvtph_epi32 (__mmask8 __A, __m128h __B) 1171{ 1172 return (__m256i) 1173 __builtin_ia32_vcvtph2dq256_mask (__B, 1174 (__v8si) 1175 _mm256_setzero_si256 (), 1176 __A); 1177} 1178 1179/* Intrinsics vcvtph2udq. */ 1180extern __inline __m128i 1181__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1182_mm_cvtph_epu32 (__m128h __A) 1183{ 1184 return (__m128i) 1185 __builtin_ia32_vcvtph2udq128_mask (__A, 1186 (__v4si) 1187 _mm_setzero_si128 (), 1188 (__mmask8) -1); 1189} 1190 1191extern __inline __m128i 1192__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1193_mm_mask_cvtph_epu32 (__m128i __A, __mmask8 __B, __m128h __C) 1194{ 1195 return (__m128i) 1196 __builtin_ia32_vcvtph2udq128_mask (__C, ( __v4si) __A, __B); 1197} 1198 1199extern __inline __m128i 1200__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1201_mm_maskz_cvtph_epu32 (__mmask8 __A, __m128h __B) 1202{ 1203 return (__m128i) 1204 __builtin_ia32_vcvtph2udq128_mask (__B, 1205 (__v4si) 1206 _mm_setzero_si128 (), 1207 __A); 1208} 1209 1210extern __inline __m256i 1211__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1212_mm256_cvtph_epu32 (__m128h __A) 1213{ 1214 return (__m256i) 1215 __builtin_ia32_vcvtph2udq256_mask (__A, 1216 (__v8si) 1217 _mm256_setzero_si256 (), 1218 (__mmask8) -1); 1219} 1220 1221extern __inline __m256i 1222__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1223_mm256_mask_cvtph_epu32 (__m256i __A, __mmask8 __B, __m128h __C) 1224{ 1225 return (__m256i) 1226 __builtin_ia32_vcvtph2udq256_mask (__C, ( __v8si) __A, __B); 1227} 1228 1229extern __inline __m256i 1230__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1231_mm256_maskz_cvtph_epu32 (__mmask8 __A, __m128h __B) 1232{ 1233 return (__m256i) 1234 __builtin_ia32_vcvtph2udq256_mask (__B, 1235 (__v8si) _mm256_setzero_si256 (), 1236 __A); 1237} 1238 1239/* Intrinsics vcvttph2dq. */ 1240extern __inline __m128i 1241__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1242_mm_cvttph_epi32 (__m128h __A) 1243{ 1244 return (__m128i) 1245 __builtin_ia32_vcvttph2dq128_mask (__A, 1246 (__v4si) _mm_setzero_si128 (), 1247 (__mmask8) -1); 1248} 1249 1250extern __inline __m128i 1251__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1252_mm_mask_cvttph_epi32 (__m128i __A, __mmask8 __B, __m128h __C) 1253{ 1254 return (__m128i)__builtin_ia32_vcvttph2dq128_mask (__C, 1255 ( __v4si) __A, 1256 __B); 1257} 1258 1259extern __inline __m128i 1260__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1261_mm_maskz_cvttph_epi32 (__mmask8 __A, __m128h __B) 1262{ 1263 return (__m128i) 1264 __builtin_ia32_vcvttph2dq128_mask (__B, 1265 (__v4si) _mm_setzero_si128 (), 1266 __A); 1267} 1268 1269extern __inline __m256i 1270__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1271_mm256_cvttph_epi32 (__m128h __A) 1272{ 1273 return (__m256i) 1274 __builtin_ia32_vcvttph2dq256_mask (__A, 1275 (__v8si) 1276 _mm256_setzero_si256 (), 1277 (__mmask8) -1); 1278} 1279 1280extern __inline __m256i 1281__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1282_mm256_mask_cvttph_epi32 (__m256i __A, __mmask8 __B, __m128h __C) 1283{ 1284 return (__m256i) 1285 __builtin_ia32_vcvttph2dq256_mask (__C, 1286 ( __v8si) __A, 1287 __B); 1288} 1289 1290extern __inline __m256i 1291__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1292_mm256_maskz_cvttph_epi32 (__mmask8 __A, __m128h __B) 1293{ 1294 return (__m256i) 1295 __builtin_ia32_vcvttph2dq256_mask (__B, 1296 (__v8si) 1297 _mm256_setzero_si256 (), 1298 __A); 1299} 1300 1301/* Intrinsics vcvttph2udq. */ 1302extern __inline __m128i 1303__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1304_mm_cvttph_epu32 (__m128h __A) 1305{ 1306 return (__m128i) 1307 __builtin_ia32_vcvttph2udq128_mask (__A, 1308 (__v4si) 1309 _mm_setzero_si128 (), 1310 (__mmask8) -1); 1311} 1312 1313extern __inline __m128i 1314__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1315_mm_mask_cvttph_epu32 (__m128i __A, __mmask8 __B, __m128h __C) 1316{ 1317 return (__m128i) 1318 __builtin_ia32_vcvttph2udq128_mask (__C, 1319 ( __v4si) __A, 1320 __B); 1321} 1322 1323extern __inline __m128i 1324__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1325_mm_maskz_cvttph_epu32 (__mmask8 __A, __m128h __B) 1326{ 1327 return (__m128i) 1328 __builtin_ia32_vcvttph2udq128_mask (__B, 1329 (__v4si) 1330 _mm_setzero_si128 (), 1331 __A); 1332} 1333 1334extern __inline __m256i 1335__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1336_mm256_cvttph_epu32 (__m128h __A) 1337{ 1338 return (__m256i) 1339 __builtin_ia32_vcvttph2udq256_mask (__A, 1340 (__v8si) 1341 _mm256_setzero_si256 (), (__mmask8) -1); 1342} 1343 1344extern __inline __m256i 1345__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1346_mm256_mask_cvttph_epu32 (__m256i __A, __mmask8 __B, __m128h __C) 1347{ 1348 return (__m256i) 1349 __builtin_ia32_vcvttph2udq256_mask (__C, 1350 ( __v8si) __A, 1351 __B); 1352} 1353 1354extern __inline __m256i 1355__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1356_mm256_maskz_cvttph_epu32 (__mmask8 __A, __m128h __B) 1357{ 1358 return (__m256i) 1359 __builtin_ia32_vcvttph2udq256_mask (__B, 1360 (__v8si) 1361 _mm256_setzero_si256 (), 1362 __A); 1363} 1364 1365/* Intrinsics vcvtdq2ph. */ 1366extern __inline __m128h 1367__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1368_mm_cvtepi32_ph (__m128i __A) 1369{ 1370 return __builtin_ia32_vcvtdq2ph128_mask ((__v4si) __A, 1371 _mm_setzero_ph (), 1372 (__mmask8) -1); 1373} 1374 1375extern __inline __m128h 1376__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1377_mm_mask_cvtepi32_ph (__m128h __A, __mmask8 __B, __m128i __C) 1378{ 1379 return __builtin_ia32_vcvtdq2ph128_mask ((__v4si) __C, __A, __B); 1380} 1381 1382extern __inline __m128h 1383__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1384_mm_maskz_cvtepi32_ph (__mmask8 __A, __m128i __B) 1385{ 1386 return __builtin_ia32_vcvtdq2ph128_mask ((__v4si) __B, 1387 _mm_setzero_ph (), 1388 __A); 1389} 1390 1391extern __inline __m128h 1392__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1393_mm256_cvtepi32_ph (__m256i __A) 1394{ 1395 return __builtin_ia32_vcvtdq2ph256_mask ((__v8si) __A, 1396 _mm_setzero_ph (), 1397 (__mmask8) -1); 1398} 1399 1400extern __inline __m128h 1401__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1402_mm256_mask_cvtepi32_ph (__m128h __A, __mmask8 __B, __m256i __C) 1403{ 1404 return __builtin_ia32_vcvtdq2ph256_mask ((__v8si) __C, __A, __B); 1405} 1406 1407extern __inline __m128h 1408__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1409_mm256_maskz_cvtepi32_ph (__mmask8 __A, __m256i __B) 1410{ 1411 return __builtin_ia32_vcvtdq2ph256_mask ((__v8si) __B, 1412 _mm_setzero_ph (), 1413 __A); 1414} 1415 1416/* Intrinsics vcvtudq2ph. */ 1417extern __inline __m128h 1418__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1419_mm_cvtepu32_ph (__m128i __A) 1420{ 1421 return __builtin_ia32_vcvtudq2ph128_mask ((__v4si) __A, 1422 _mm_setzero_ph (), 1423 (__mmask8) -1); 1424} 1425 1426extern __inline __m128h 1427__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1428_mm_mask_cvtepu32_ph (__m128h __A, __mmask8 __B, __m128i __C) 1429{ 1430 return __builtin_ia32_vcvtudq2ph128_mask ((__v4si) __C, 1431 __A, 1432 __B); 1433} 1434 1435extern __inline __m128h 1436__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1437_mm_maskz_cvtepu32_ph (__mmask8 __A, __m128i __B) 1438{ 1439 return __builtin_ia32_vcvtudq2ph128_mask ((__v4si) __B, 1440 _mm_setzero_ph (), 1441 __A); 1442} 1443 1444extern __inline __m128h 1445__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1446_mm256_cvtepu32_ph (__m256i __A) 1447{ 1448 return __builtin_ia32_vcvtudq2ph256_mask ((__v8si) __A, 1449 _mm_setzero_ph (), 1450 (__mmask8) -1); 1451} 1452 1453extern __inline __m128h 1454__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1455_mm256_mask_cvtepu32_ph (__m128h __A, __mmask8 __B, __m256i __C) 1456{ 1457 return __builtin_ia32_vcvtudq2ph256_mask ((__v8si) __C, __A, __B); 1458} 1459 1460extern __inline __m128h 1461__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1462_mm256_maskz_cvtepu32_ph (__mmask8 __A, __m256i __B) 1463{ 1464 return __builtin_ia32_vcvtudq2ph256_mask ((__v8si) __B, 1465 _mm_setzero_ph (), 1466 __A); 1467} 1468 1469/* Intrinsics vcvtph2qq. */ 1470extern __inline __m128i 1471__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1472_mm_cvtph_epi64 (__m128h __A) 1473{ 1474 return 1475 __builtin_ia32_vcvtph2qq128_mask (__A, 1476 _mm_setzero_si128 (), 1477 (__mmask8) -1); 1478} 1479 1480extern __inline __m128i 1481__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1482_mm_mask_cvtph_epi64 (__m128i __A, __mmask8 __B, __m128h __C) 1483{ 1484 return __builtin_ia32_vcvtph2qq128_mask (__C, __A, __B); 1485} 1486 1487extern __inline __m128i 1488__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1489_mm_maskz_cvtph_epi64 (__mmask8 __A, __m128h __B) 1490{ 1491 return __builtin_ia32_vcvtph2qq128_mask (__B, 1492 _mm_setzero_si128 (), 1493 __A); 1494} 1495 1496extern __inline __m256i 1497__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1498_mm256_cvtph_epi64 (__m128h __A) 1499{ 1500 return __builtin_ia32_vcvtph2qq256_mask (__A, 1501 _mm256_setzero_si256 (), 1502 (__mmask8) -1); 1503} 1504 1505extern __inline __m256i 1506__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1507_mm256_mask_cvtph_epi64 (__m256i __A, __mmask8 __B, __m128h __C) 1508{ 1509 return __builtin_ia32_vcvtph2qq256_mask (__C, __A, __B); 1510} 1511 1512extern __inline __m256i 1513__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1514_mm256_maskz_cvtph_epi64 (__mmask8 __A, __m128h __B) 1515{ 1516 return __builtin_ia32_vcvtph2qq256_mask (__B, 1517 _mm256_setzero_si256 (), 1518 __A); 1519} 1520 1521/* Intrinsics vcvtph2uqq. */ 1522extern __inline __m128i 1523__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1524_mm_cvtph_epu64 (__m128h __A) 1525{ 1526 return __builtin_ia32_vcvtph2uqq128_mask (__A, 1527 _mm_setzero_si128 (), 1528 (__mmask8) -1); 1529} 1530 1531extern __inline __m128i 1532__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1533_mm_mask_cvtph_epu64 (__m128i __A, __mmask8 __B, __m128h __C) 1534{ 1535 return __builtin_ia32_vcvtph2uqq128_mask (__C, __A, __B); 1536} 1537 1538extern __inline __m128i 1539__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1540_mm_maskz_cvtph_epu64 (__mmask8 __A, __m128h __B) 1541{ 1542 return __builtin_ia32_vcvtph2uqq128_mask (__B, 1543 _mm_setzero_si128 (), 1544 __A); 1545} 1546 1547extern __inline __m256i 1548__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1549_mm256_cvtph_epu64 (__m128h __A) 1550{ 1551 return __builtin_ia32_vcvtph2uqq256_mask (__A, 1552 _mm256_setzero_si256 (), 1553 (__mmask8) -1); 1554} 1555 1556extern __inline __m256i 1557__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1558_mm256_mask_cvtph_epu64 (__m256i __A, __mmask8 __B, __m128h __C) 1559{ 1560 return __builtin_ia32_vcvtph2uqq256_mask (__C, __A, __B); 1561} 1562 1563extern __inline __m256i 1564__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1565_mm256_maskz_cvtph_epu64 (__mmask8 __A, __m128h __B) 1566{ 1567 return __builtin_ia32_vcvtph2uqq256_mask (__B, 1568 _mm256_setzero_si256 (), 1569 __A); 1570} 1571 1572/* Intrinsics vcvttph2qq. */ 1573extern __inline __m128i 1574__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1575_mm_cvttph_epi64 (__m128h __A) 1576{ 1577 return __builtin_ia32_vcvttph2qq128_mask (__A, 1578 _mm_setzero_si128 (), 1579 (__mmask8) -1); 1580} 1581 1582extern __inline __m128i 1583__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1584_mm_mask_cvttph_epi64 (__m128i __A, __mmask8 __B, __m128h __C) 1585{ 1586 return __builtin_ia32_vcvttph2qq128_mask (__C, 1587 __A, 1588 __B); 1589} 1590 1591extern __inline __m128i 1592__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1593_mm_maskz_cvttph_epi64 (__mmask8 __A, __m128h __B) 1594{ 1595 return __builtin_ia32_vcvttph2qq128_mask (__B, 1596 _mm_setzero_si128 (), 1597 __A); 1598} 1599 1600extern __inline __m256i 1601__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1602_mm256_cvttph_epi64 (__m128h __A) 1603{ 1604 return __builtin_ia32_vcvttph2qq256_mask (__A, 1605 _mm256_setzero_si256 (), 1606 (__mmask8) -1); 1607} 1608 1609extern __inline __m256i 1610__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1611_mm256_mask_cvttph_epi64 (__m256i __A, __mmask8 __B, __m128h __C) 1612{ 1613 return __builtin_ia32_vcvttph2qq256_mask (__C, 1614 __A, 1615 __B); 1616} 1617 1618extern __inline __m256i 1619__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1620_mm256_maskz_cvttph_epi64 (__mmask8 __A, __m128h __B) 1621{ 1622 return __builtin_ia32_vcvttph2qq256_mask (__B, 1623 _mm256_setzero_si256 (), 1624 __A); 1625} 1626 1627/* Intrinsics vcvttph2uqq. */ 1628extern __inline __m128i 1629__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1630_mm_cvttph_epu64 (__m128h __A) 1631{ 1632 return __builtin_ia32_vcvttph2uqq128_mask (__A, 1633 _mm_setzero_si128 (), 1634 (__mmask8) -1); 1635} 1636 1637extern __inline __m128i 1638__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1639_mm_mask_cvttph_epu64 (__m128i __A, __mmask8 __B, __m128h __C) 1640{ 1641 return __builtin_ia32_vcvttph2uqq128_mask (__C, 1642 __A, 1643 __B); 1644} 1645 1646extern __inline __m128i 1647__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1648_mm_maskz_cvttph_epu64 (__mmask8 __A, __m128h __B) 1649{ 1650 return __builtin_ia32_vcvttph2uqq128_mask (__B, 1651 _mm_setzero_si128 (), 1652 __A); 1653} 1654 1655extern __inline __m256i 1656__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1657_mm256_cvttph_epu64 (__m128h __A) 1658{ 1659 return __builtin_ia32_vcvttph2uqq256_mask (__A, 1660 _mm256_setzero_si256 (), 1661 (__mmask8) -1); 1662} 1663 1664extern __inline __m256i 1665__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1666_mm256_mask_cvttph_epu64 (__m256i __A, __mmask8 __B, __m128h __C) 1667{ 1668 return __builtin_ia32_vcvttph2uqq256_mask (__C, 1669 __A, 1670 __B); 1671} 1672 1673extern __inline __m256i 1674__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1675_mm256_maskz_cvttph_epu64 (__mmask8 __A, __m128h __B) 1676{ 1677 return __builtin_ia32_vcvttph2uqq256_mask (__B, 1678 _mm256_setzero_si256 (), 1679 __A); 1680} 1681 1682/* Intrinsics vcvtqq2ph. */ 1683extern __inline __m128h 1684__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1685_mm_cvtepi64_ph (__m128i __A) 1686{ 1687 return __builtin_ia32_vcvtqq2ph128_mask ((__v2di) __A, 1688 _mm_setzero_ph (), 1689 (__mmask8) -1); 1690} 1691 1692extern __inline __m128h 1693__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1694_mm_mask_cvtepi64_ph (__m128h __A, __mmask8 __B, __m128i __C) 1695{ 1696 return __builtin_ia32_vcvtqq2ph128_mask ((__v2di) __C, __A, __B); 1697} 1698 1699extern __inline __m128h 1700__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1701_mm_maskz_cvtepi64_ph (__mmask8 __A, __m128i __B) 1702{ 1703 return __builtin_ia32_vcvtqq2ph128_mask ((__v2di) __B, 1704 _mm_setzero_ph (), 1705 __A); 1706} 1707 1708extern __inline __m128h 1709__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1710_mm256_cvtepi64_ph (__m256i __A) 1711{ 1712 return __builtin_ia32_vcvtqq2ph256_mask ((__v4di) __A, 1713 _mm_setzero_ph (), 1714 (__mmask8) -1); 1715} 1716 1717extern __inline __m128h 1718__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1719_mm256_mask_cvtepi64_ph (__m128h __A, __mmask8 __B, __m256i __C) 1720{ 1721 return __builtin_ia32_vcvtqq2ph256_mask ((__v4di) __C, __A, __B); 1722} 1723 1724extern __inline __m128h 1725__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1726_mm256_maskz_cvtepi64_ph (__mmask8 __A, __m256i __B) 1727{ 1728 return __builtin_ia32_vcvtqq2ph256_mask ((__v4di) __B, 1729 _mm_setzero_ph (), 1730 __A); 1731} 1732 1733/* Intrinsics vcvtuqq2ph. */ 1734extern __inline __m128h 1735__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1736_mm_cvtepu64_ph (__m128i __A) 1737{ 1738 return __builtin_ia32_vcvtuqq2ph128_mask ((__v2di) __A, 1739 _mm_setzero_ph (), 1740 (__mmask8) -1); 1741} 1742 1743extern __inline __m128h 1744__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1745_mm_mask_cvtepu64_ph (__m128h __A, __mmask8 __B, __m128i __C) 1746{ 1747 return __builtin_ia32_vcvtuqq2ph128_mask ((__v2di) __C, __A, __B); 1748} 1749 1750extern __inline __m128h 1751__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1752_mm_maskz_cvtepu64_ph (__mmask8 __A, __m128i __B) 1753{ 1754 return __builtin_ia32_vcvtuqq2ph128_mask ((__v2di) __B, 1755 _mm_setzero_ph (), 1756 __A); 1757} 1758 1759extern __inline __m128h 1760__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1761_mm256_cvtepu64_ph (__m256i __A) 1762{ 1763 return __builtin_ia32_vcvtuqq2ph256_mask ((__v4di) __A, 1764 _mm_setzero_ph (), 1765 (__mmask8) -1); 1766} 1767 1768extern __inline __m128h 1769__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1770_mm256_mask_cvtepu64_ph (__m128h __A, __mmask8 __B, __m256i __C) 1771{ 1772 return __builtin_ia32_vcvtuqq2ph256_mask ((__v4di) __C, __A, __B); 1773} 1774 1775extern __inline __m128h 1776__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1777_mm256_maskz_cvtepu64_ph (__mmask8 __A, __m256i __B) 1778{ 1779 return __builtin_ia32_vcvtuqq2ph256_mask ((__v4di) __B, 1780 _mm_setzero_ph (), 1781 __A); 1782} 1783 1784/* Intrinsics vcvtph2w. */ 1785extern __inline __m128i 1786__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1787_mm_cvtph_epi16 (__m128h __A) 1788{ 1789 return (__m128i) 1790 __builtin_ia32_vcvtph2w128_mask (__A, 1791 (__v8hi) 1792 _mm_setzero_si128 (), 1793 (__mmask8) -1); 1794} 1795 1796extern __inline __m128i 1797__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1798_mm_mask_cvtph_epi16 (__m128i __A, __mmask8 __B, __m128h __C) 1799{ 1800 return (__m128i) 1801 __builtin_ia32_vcvtph2w128_mask (__C, ( __v8hi) __A, __B); 1802} 1803 1804extern __inline __m128i 1805__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1806_mm_maskz_cvtph_epi16 (__mmask8 __A, __m128h __B) 1807{ 1808 return (__m128i) 1809 __builtin_ia32_vcvtph2w128_mask (__B, 1810 (__v8hi) 1811 _mm_setzero_si128 (), 1812 __A); 1813} 1814 1815extern __inline __m256i 1816__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1817_mm256_cvtph_epi16 (__m256h __A) 1818{ 1819 return (__m256i) 1820 __builtin_ia32_vcvtph2w256_mask (__A, 1821 (__v16hi) 1822 _mm256_setzero_si256 (), 1823 (__mmask16) -1); 1824} 1825 1826extern __inline __m256i 1827__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1828_mm256_mask_cvtph_epi16 (__m256i __A, __mmask16 __B, __m256h __C) 1829{ 1830 return (__m256i) 1831 __builtin_ia32_vcvtph2w256_mask (__C, ( __v16hi) __A, __B); 1832} 1833 1834extern __inline __m256i 1835__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1836_mm256_maskz_cvtph_epi16 (__mmask16 __A, __m256h __B) 1837{ 1838 return (__m256i) 1839 __builtin_ia32_vcvtph2w256_mask (__B, 1840 (__v16hi) 1841 _mm256_setzero_si256 (), 1842 __A); 1843} 1844 1845/* Intrinsics vcvtph2uw. */ 1846extern __inline __m128i 1847__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1848_mm_cvtph_epu16 (__m128h __A) 1849{ 1850 return (__m128i) 1851 __builtin_ia32_vcvtph2uw128_mask (__A, 1852 (__v8hi) 1853 _mm_setzero_si128 (), 1854 (__mmask8) -1); 1855} 1856 1857extern __inline __m128i 1858__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1859_mm_mask_cvtph_epu16 (__m128i __A, __mmask8 __B, __m128h __C) 1860{ 1861 return (__m128i) 1862 __builtin_ia32_vcvtph2uw128_mask (__C, ( __v8hi) __A, __B); 1863} 1864 1865extern __inline __m128i 1866__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1867_mm_maskz_cvtph_epu16 (__mmask8 __A, __m128h __B) 1868{ 1869 return (__m128i) 1870 __builtin_ia32_vcvtph2uw128_mask (__B, 1871 (__v8hi) 1872 _mm_setzero_si128 (), 1873 __A); 1874} 1875 1876extern __inline __m256i 1877__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1878_mm256_cvtph_epu16 (__m256h __A) 1879{ 1880 return (__m256i) 1881 __builtin_ia32_vcvtph2uw256_mask (__A, 1882 (__v16hi) 1883 _mm256_setzero_si256 (), 1884 (__mmask16) -1); 1885} 1886 1887extern __inline __m256i 1888__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1889_mm256_mask_cvtph_epu16 (__m256i __A, __mmask16 __B, __m256h __C) 1890{ 1891 return (__m256i) 1892 __builtin_ia32_vcvtph2uw256_mask (__C, ( __v16hi) __A, __B); 1893} 1894 1895extern __inline __m256i 1896__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1897_mm256_maskz_cvtph_epu16 (__mmask16 __A, __m256h __B) 1898{ 1899 return (__m256i) 1900 __builtin_ia32_vcvtph2uw256_mask (__B, 1901 (__v16hi) 1902 _mm256_setzero_si256 (), 1903 __A); 1904} 1905 1906/* Intrinsics vcvttph2w. */ 1907extern __inline __m128i 1908__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1909_mm_cvttph_epi16 (__m128h __A) 1910{ 1911 return (__m128i) 1912 __builtin_ia32_vcvttph2w128_mask (__A, 1913 (__v8hi) 1914 _mm_setzero_si128 (), 1915 (__mmask8) -1); 1916} 1917 1918extern __inline __m128i 1919__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1920_mm_mask_cvttph_epi16 (__m128i __A, __mmask8 __B, __m128h __C) 1921{ 1922 return (__m128i) 1923 __builtin_ia32_vcvttph2w128_mask (__C, 1924 ( __v8hi) __A, 1925 __B); 1926} 1927 1928extern __inline __m128i 1929__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1930_mm_maskz_cvttph_epi16 (__mmask8 __A, __m128h __B) 1931{ 1932 return (__m128i) 1933 __builtin_ia32_vcvttph2w128_mask (__B, 1934 (__v8hi) 1935 _mm_setzero_si128 (), 1936 __A); 1937} 1938 1939extern __inline __m256i 1940__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1941_mm256_cvttph_epi16 (__m256h __A) 1942{ 1943 return (__m256i) 1944 __builtin_ia32_vcvttph2w256_mask (__A, 1945 (__v16hi) 1946 _mm256_setzero_si256 (), 1947 (__mmask16) -1); 1948} 1949 1950extern __inline __m256i 1951__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1952_mm256_mask_cvttph_epi16 (__m256i __A, __mmask16 __B, __m256h __C) 1953{ 1954 return (__m256i) 1955 __builtin_ia32_vcvttph2w256_mask (__C, 1956 ( __v16hi) __A, 1957 __B); 1958} 1959 1960extern __inline __m256i 1961__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1962_mm256_maskz_cvttph_epi16 (__mmask16 __A, __m256h __B) 1963{ 1964 return (__m256i) 1965 __builtin_ia32_vcvttph2w256_mask (__B, 1966 (__v16hi) 1967 _mm256_setzero_si256 (), 1968 __A); 1969} 1970 1971/* Intrinsics vcvttph2uw. */ 1972extern __inline __m128i 1973__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1974_mm_cvttph_epu16 (__m128h __A) 1975{ 1976 return (__m128i) 1977 __builtin_ia32_vcvttph2uw128_mask (__A, 1978 (__v8hi) 1979 _mm_setzero_si128 (), 1980 (__mmask8) -1); 1981} 1982 1983extern __inline __m128i 1984__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1985_mm_mask_cvttph_epu16 (__m128i __A, __mmask8 __B, __m128h __C) 1986{ 1987 return (__m128i) 1988 __builtin_ia32_vcvttph2uw128_mask (__C, 1989 ( __v8hi) __A, 1990 __B); 1991} 1992 1993extern __inline __m128i 1994__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 1995_mm_maskz_cvttph_epu16 (__mmask8 __A, __m128h __B) 1996{ 1997 return (__m128i) 1998 __builtin_ia32_vcvttph2uw128_mask (__B, 1999 (__v8hi) 2000 _mm_setzero_si128 (), 2001 __A); 2002} 2003 2004extern __inline __m256i 2005__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2006_mm256_cvttph_epu16 (__m256h __A) 2007{ 2008 return (__m256i) 2009 __builtin_ia32_vcvttph2uw256_mask (__A, 2010 (__v16hi) 2011 _mm256_setzero_si256 (), 2012 (__mmask16) -1); 2013} 2014 2015extern __inline __m256i 2016__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2017_mm256_mask_cvttph_epu16 (__m256i __A, __mmask16 __B, __m256h __C) 2018{ 2019 return (__m256i) 2020 __builtin_ia32_vcvttph2uw256_mask (__C, 2021 ( __v16hi) __A, 2022 __B); 2023} 2024 2025extern __inline __m256i 2026__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2027_mm256_maskz_cvttph_epu16 (__mmask16 __A, __m256h __B) 2028{ 2029 return (__m256i) 2030 __builtin_ia32_vcvttph2uw256_mask (__B, 2031 (__v16hi) _mm256_setzero_si256 (), 2032 __A); 2033} 2034 2035/* Intrinsics vcvtw2ph. */ 2036extern __inline __m128h 2037__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2038_mm_cvtepi16_ph (__m128i __A) 2039{ 2040 return __builtin_ia32_vcvtw2ph128_mask ((__v8hi) __A, 2041 _mm_setzero_ph (), 2042 (__mmask8) -1); 2043} 2044 2045extern __inline __m128h 2046__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2047_mm_mask_cvtepi16_ph (__m128h __A, __mmask8 __B, __m128i __C) 2048{ 2049 return __builtin_ia32_vcvtw2ph128_mask ((__v8hi) __C, 2050 __A, 2051 __B); 2052} 2053 2054extern __inline __m128h 2055__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2056_mm_maskz_cvtepi16_ph (__mmask8 __A, __m128i __B) 2057{ 2058 return __builtin_ia32_vcvtw2ph128_mask ((__v8hi) __B, 2059 _mm_setzero_ph (), 2060 __A); 2061} 2062 2063extern __inline __m256h 2064__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2065_mm256_cvtepi16_ph (__m256i __A) 2066{ 2067 return __builtin_ia32_vcvtw2ph256_mask ((__v16hi) __A, 2068 _mm256_setzero_ph (), 2069 (__mmask16) -1); 2070} 2071 2072extern __inline __m256h 2073__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2074_mm256_mask_cvtepi16_ph (__m256h __A, __mmask16 __B, __m256i __C) 2075{ 2076 return __builtin_ia32_vcvtw2ph256_mask ((__v16hi) __C, 2077 __A, 2078 __B); 2079} 2080 2081extern __inline __m256h 2082__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2083_mm256_maskz_cvtepi16_ph (__mmask16 __A, __m256i __B) 2084{ 2085 return __builtin_ia32_vcvtw2ph256_mask ((__v16hi) __B, 2086 _mm256_setzero_ph (), 2087 __A); 2088} 2089 2090/* Intrinsics vcvtuw2ph. */ 2091extern __inline __m128h 2092__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2093_mm_cvtepu16_ph (__m128i __A) 2094{ 2095 return __builtin_ia32_vcvtuw2ph128_mask ((__v8hi) __A, 2096 _mm_setzero_ph (), 2097 (__mmask8) -1); 2098} 2099 2100extern __inline __m128h 2101__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2102_mm_mask_cvtepu16_ph (__m128h __A, __mmask8 __B, __m128i __C) 2103{ 2104 return __builtin_ia32_vcvtuw2ph128_mask ((__v8hi) __C, __A, __B); 2105} 2106 2107extern __inline __m128h 2108__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2109_mm_maskz_cvtepu16_ph (__mmask8 __A, __m128i __B) 2110{ 2111 return __builtin_ia32_vcvtuw2ph128_mask ((__v8hi) __B, 2112 _mm_setzero_ph (), 2113 __A); 2114} 2115 2116extern __inline __m256h 2117__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2118_mm256_cvtepu16_ph (__m256i __A) 2119{ 2120 return __builtin_ia32_vcvtuw2ph256_mask ((__v16hi) __A, 2121 _mm256_setzero_ph (), 2122 (__mmask16) -1); 2123} 2124 2125extern __inline __m256h 2126__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2127_mm256_mask_cvtepu16_ph (__m256h __A, __mmask16 __B, __m256i __C) 2128{ 2129 return __builtin_ia32_vcvtuw2ph256_mask ((__v16hi) __C, __A, __B); 2130} 2131 2132extern __inline __m256h 2133__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2134_mm256_maskz_cvtepu16_ph (__mmask16 __A, __m256i __B) 2135{ 2136 return __builtin_ia32_vcvtuw2ph256_mask ((__v16hi) __B, 2137 _mm256_setzero_ph (), 2138 __A); 2139} 2140 2141/* Intrinsics vcvtph2pd. */ 2142extern __inline __m128d 2143__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2144_mm_cvtph_pd (__m128h __A) 2145{ 2146 return __builtin_ia32_vcvtph2pd128_mask (__A, 2147 _mm_setzero_pd (), 2148 (__mmask8) -1); 2149} 2150 2151extern __inline __m128d 2152__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2153_mm_mask_cvtph_pd (__m128d __A, __mmask8 __B, __m128h __C) 2154{ 2155 return __builtin_ia32_vcvtph2pd128_mask (__C, __A, __B); 2156} 2157 2158extern __inline __m128d 2159__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2160_mm_maskz_cvtph_pd (__mmask8 __A, __m128h __B) 2161{ 2162 return __builtin_ia32_vcvtph2pd128_mask (__B, _mm_setzero_pd (), __A); 2163} 2164 2165extern __inline __m256d 2166__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2167_mm256_cvtph_pd (__m128h __A) 2168{ 2169 return __builtin_ia32_vcvtph2pd256_mask (__A, 2170 _mm256_setzero_pd (), 2171 (__mmask8) -1); 2172} 2173 2174extern __inline __m256d 2175__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2176_mm256_mask_cvtph_pd (__m256d __A, __mmask8 __B, __m128h __C) 2177{ 2178 return __builtin_ia32_vcvtph2pd256_mask (__C, __A, __B); 2179} 2180 2181extern __inline __m256d 2182__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2183_mm256_maskz_cvtph_pd (__mmask8 __A, __m128h __B) 2184{ 2185 return __builtin_ia32_vcvtph2pd256_mask (__B, 2186 _mm256_setzero_pd (), 2187 __A); 2188} 2189 2190/* Intrinsics vcvtph2ps. */ 2191extern __inline __m128 2192__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2193_mm_cvtxph_ps (__m128h __A) 2194{ 2195 return __builtin_ia32_vcvtph2psx128_mask (__A, 2196 _mm_setzero_ps (), 2197 (__mmask8) -1); 2198} 2199 2200extern __inline __m128 2201__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2202_mm_mask_cvtxph_ps (__m128 __A, __mmask8 __B, __m128h __C) 2203{ 2204 return __builtin_ia32_vcvtph2psx128_mask (__C, __A, __B); 2205} 2206 2207extern __inline __m128 2208__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2209_mm_maskz_cvtxph_ps (__mmask8 __A, __m128h __B) 2210{ 2211 return __builtin_ia32_vcvtph2psx128_mask (__B, _mm_setzero_ps (), __A); 2212} 2213 2214extern __inline __m256 2215__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2216_mm256_cvtxph_ps (__m128h __A) 2217{ 2218 return __builtin_ia32_vcvtph2psx256_mask (__A, 2219 _mm256_setzero_ps (), 2220 (__mmask8) -1); 2221} 2222 2223extern __inline __m256 2224__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2225_mm256_mask_cvtxph_ps (__m256 __A, __mmask8 __B, __m128h __C) 2226{ 2227 return __builtin_ia32_vcvtph2psx256_mask (__C, __A, __B); 2228} 2229 2230extern __inline __m256 2231__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2232_mm256_maskz_cvtxph_ps (__mmask8 __A, __m128h __B) 2233{ 2234 return __builtin_ia32_vcvtph2psx256_mask (__B, 2235 _mm256_setzero_ps (), 2236 __A); 2237} 2238 2239/* Intrinsics vcvtxps2ph. */ 2240extern __inline __m128h 2241__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2242_mm_cvtxps_ph (__m128 __A) 2243{ 2244 return __builtin_ia32_vcvtps2phx128_mask ((__v4sf) __A, 2245 _mm_setzero_ph (), 2246 (__mmask8) -1); 2247} 2248 2249extern __inline __m128h 2250__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2251_mm_mask_cvtxps_ph (__m128h __A, __mmask8 __B, __m128 __C) 2252{ 2253 return __builtin_ia32_vcvtps2phx128_mask ((__v4sf) __C, __A, __B); 2254} 2255 2256extern __inline __m128h 2257__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2258_mm_maskz_cvtxps_ph (__mmask8 __A, __m128 __B) 2259{ 2260 return __builtin_ia32_vcvtps2phx128_mask ((__v4sf) __B, 2261 _mm_setzero_ph (), 2262 __A); 2263} 2264 2265extern __inline __m128h 2266__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2267_mm256_cvtxps_ph (__m256 __A) 2268{ 2269 return __builtin_ia32_vcvtps2phx256_mask ((__v8sf) __A, 2270 _mm_setzero_ph (), 2271 (__mmask8) -1); 2272} 2273 2274extern __inline __m128h 2275__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2276_mm256_mask_cvtxps_ph (__m128h __A, __mmask8 __B, __m256 __C) 2277{ 2278 return __builtin_ia32_vcvtps2phx256_mask ((__v8sf) __C, __A, __B); 2279} 2280 2281extern __inline __m128h 2282__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2283_mm256_maskz_cvtxps_ph (__mmask8 __A, __m256 __B) 2284{ 2285 return __builtin_ia32_vcvtps2phx256_mask ((__v8sf) __B, 2286 _mm_setzero_ph (), 2287 __A); 2288} 2289 2290/* Intrinsics vcvtpd2ph. */ 2291extern __inline __m128h 2292__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2293_mm_cvtpd_ph (__m128d __A) 2294{ 2295 return __builtin_ia32_vcvtpd2ph128_mask ((__v2df) __A, 2296 _mm_setzero_ph (), 2297 (__mmask8) -1); 2298} 2299 2300extern __inline __m128h 2301__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2302_mm_mask_cvtpd_ph (__m128h __A, __mmask8 __B, __m128d __C) 2303{ 2304 return __builtin_ia32_vcvtpd2ph128_mask ((__v2df) __C, __A, __B); 2305} 2306 2307extern __inline __m128h 2308__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2309_mm_maskz_cvtpd_ph (__mmask8 __A, __m128d __B) 2310{ 2311 return __builtin_ia32_vcvtpd2ph128_mask ((__v2df) __B, 2312 _mm_setzero_ph (), 2313 __A); 2314} 2315 2316extern __inline __m128h 2317__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2318_mm256_cvtpd_ph (__m256d __A) 2319{ 2320 return __builtin_ia32_vcvtpd2ph256_mask ((__v4df) __A, 2321 _mm_setzero_ph (), 2322 (__mmask8) -1); 2323} 2324 2325extern __inline __m128h 2326__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2327_mm256_mask_cvtpd_ph (__m128h __A, __mmask8 __B, __m256d __C) 2328{ 2329 return __builtin_ia32_vcvtpd2ph256_mask ((__v4df) __C, __A, __B); 2330} 2331 2332extern __inline __m128h 2333__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2334_mm256_maskz_cvtpd_ph (__mmask8 __A, __m256d __B) 2335{ 2336 return __builtin_ia32_vcvtpd2ph256_mask ((__v4df) __B, 2337 _mm_setzero_ph (), 2338 __A); 2339} 2340 2341/* Intrinsics vfmaddsub[132,213,231]ph. */ 2342extern __inline __m256h 2343__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2344_mm256_fmaddsub_ph (__m256h __A, __m256h __B, __m256h __C) 2345{ 2346 return (__m256h)__builtin_ia32_vfmaddsubph256_mask ((__v16hf)__A, 2347 (__v16hf)__B, 2348 (__v16hf)__C, 2349 (__mmask16)-1); 2350} 2351 2352extern __inline __m256h 2353__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2354_mm256_mask_fmaddsub_ph (__m256h __A, __mmask16 __U, __m256h __B, 2355 __m256h __C) 2356{ 2357 return (__m256h) __builtin_ia32_vfmaddsubph256_mask ((__v16hf) __A, 2358 (__v16hf) __B, 2359 (__v16hf) __C, 2360 (__mmask16) __U); 2361} 2362 2363extern __inline __m256h 2364__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2365_mm256_mask3_fmaddsub_ph (__m256h __A, __m256h __B, __m256h __C, 2366 __mmask16 __U) 2367{ 2368 return (__m256h) __builtin_ia32_vfmaddsubph256_mask3 ((__v16hf) __A, 2369 (__v16hf) __B, 2370 (__v16hf) __C, 2371 (__mmask16) 2372 __U); 2373} 2374 2375extern __inline __m256h 2376__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2377_mm256_maskz_fmaddsub_ph (__mmask16 __U, __m256h __A, __m256h __B, 2378 __m256h __C) 2379{ 2380 return (__m256h) __builtin_ia32_vfmaddsubph256_maskz ((__v16hf) __A, 2381 (__v16hf) __B, 2382 (__v16hf) __C, 2383 (__mmask16) 2384 __U); 2385} 2386 2387extern __inline __m128h 2388__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2389_mm_fmaddsub_ph (__m128h __A, __m128h __B, __m128h __C) 2390{ 2391 return (__m128h)__builtin_ia32_vfmaddsubph128_mask ((__v8hf)__A, 2392 (__v8hf)__B, 2393 (__v8hf)__C, 2394 (__mmask8)-1); 2395} 2396 2397extern __inline __m128h 2398__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2399_mm_mask_fmaddsub_ph (__m128h __A, __mmask8 __U, __m128h __B, 2400 __m128h __C) 2401{ 2402 return (__m128h) __builtin_ia32_vfmaddsubph128_mask ((__v8hf) __A, 2403 (__v8hf) __B, 2404 (__v8hf) __C, 2405 (__mmask8) __U); 2406} 2407 2408extern __inline __m128h 2409__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2410_mm_mask3_fmaddsub_ph (__m128h __A, __m128h __B, __m128h __C, 2411 __mmask8 __U) 2412{ 2413 return (__m128h) __builtin_ia32_vfmaddsubph128_mask3 ((__v8hf) __A, 2414 (__v8hf) __B, 2415 (__v8hf) __C, 2416 (__mmask8) 2417 __U); 2418} 2419 2420extern __inline __m128h 2421__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2422_mm_maskz_fmaddsub_ph (__mmask8 __U, __m128h __A, __m128h __B, 2423 __m128h __C) 2424{ 2425 return (__m128h) __builtin_ia32_vfmaddsubph128_maskz ((__v8hf) __A, 2426 (__v8hf) __B, 2427 (__v8hf) __C, 2428 (__mmask8) 2429 __U); 2430} 2431 2432/* Intrinsics vfmsubadd[132,213,231]ph. */ 2433extern __inline __m256h 2434__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2435_mm256_fmsubadd_ph (__m256h __A, __m256h __B, __m256h __C) 2436{ 2437 return (__m256h) __builtin_ia32_vfmsubaddph256_mask ((__v16hf) __A, 2438 (__v16hf) __B, 2439 (__v16hf) __C, 2440 (__mmask16) -1); 2441} 2442 2443extern __inline __m256h 2444__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2445_mm256_mask_fmsubadd_ph (__m256h __A, __mmask16 __U, __m256h __B, 2446 __m256h __C) 2447{ 2448 return (__m256h) __builtin_ia32_vfmsubaddph256_mask ((__v16hf) __A, 2449 (__v16hf) __B, 2450 (__v16hf) __C, 2451 (__mmask16) __U); 2452} 2453 2454extern __inline __m256h 2455__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2456_mm256_mask3_fmsubadd_ph (__m256h __A, __m256h __B, __m256h __C, 2457 __mmask16 __U) 2458{ 2459 return (__m256h) __builtin_ia32_vfmsubaddph256_mask3 ((__v16hf) __A, 2460 (__v16hf) __B, 2461 (__v16hf) __C, 2462 (__mmask16) 2463 __U); 2464} 2465 2466extern __inline __m256h 2467__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2468_mm256_maskz_fmsubadd_ph (__mmask16 __U, __m256h __A, __m256h __B, 2469 __m256h __C) 2470{ 2471 return (__m256h) __builtin_ia32_vfmsubaddph256_maskz ((__v16hf) __A, 2472 (__v16hf) __B, 2473 (__v16hf) __C, 2474 (__mmask16) 2475 __U); 2476} 2477 2478extern __inline __m128h 2479__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2480_mm_fmsubadd_ph (__m128h __A, __m128h __B, __m128h __C) 2481{ 2482 return (__m128h) __builtin_ia32_vfmsubaddph128_mask ((__v8hf) __A, 2483 (__v8hf) __B, 2484 (__v8hf) __C, 2485 (__mmask8) -1); 2486} 2487 2488extern __inline __m128h 2489__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2490_mm_mask_fmsubadd_ph (__m128h __A, __mmask8 __U, __m128h __B, 2491 __m128h __C) 2492{ 2493 return (__m128h) __builtin_ia32_vfmsubaddph128_mask ((__v8hf) __A, 2494 (__v8hf) __B, 2495 (__v8hf) __C, 2496 (__mmask8) __U); 2497} 2498 2499extern __inline __m128h 2500__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2501_mm_mask3_fmsubadd_ph (__m128h __A, __m128h __B, __m128h __C, 2502 __mmask8 __U) 2503{ 2504 return (__m128h) __builtin_ia32_vfmsubaddph128_mask3 ((__v8hf) __A, 2505 (__v8hf) __B, 2506 (__v8hf) __C, 2507 (__mmask8) 2508 __U); 2509} 2510 2511extern __inline __m128h 2512__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2513_mm_maskz_fmsubadd_ph (__mmask8 __U, __m128h __A, __m128h __B, 2514 __m128h __C) 2515{ 2516 return (__m128h) __builtin_ia32_vfmsubaddph128_maskz ((__v8hf) __A, 2517 (__v8hf) __B, 2518 (__v8hf) __C, 2519 (__mmask8) 2520 __U); 2521} 2522 2523/* Intrinsics vfmadd[132,213,231]ph. */ 2524extern __inline __m256h 2525__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2526_mm256_fmadd_ph (__m256h __A, __m256h __B, __m256h __C) 2527{ 2528 return (__m256h) __builtin_ia32_vfmaddph256_mask ((__v16hf) __A, 2529 (__v16hf) __B, 2530 (__v16hf) __C, 2531 (__mmask16) -1); 2532} 2533 2534extern __inline __m256h 2535__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2536_mm256_mask_fmadd_ph (__m256h __A, __mmask16 __U, __m256h __B, 2537 __m256h __C) 2538{ 2539 return (__m256h) __builtin_ia32_vfmaddph256_mask ((__v16hf) __A, 2540 (__v16hf) __B, 2541 (__v16hf) __C, 2542 (__mmask16) __U); 2543} 2544 2545extern __inline __m256h 2546__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2547_mm256_mask3_fmadd_ph (__m256h __A, __m256h __B, __m256h __C, 2548 __mmask16 __U) 2549{ 2550 return (__m256h) __builtin_ia32_vfmaddph256_mask3 ((__v16hf) __A, 2551 (__v16hf) __B, 2552 (__v16hf) __C, 2553 (__mmask16) 2554 __U); 2555} 2556 2557extern __inline __m256h 2558__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2559_mm256_maskz_fmadd_ph (__mmask16 __U, __m256h __A, __m256h __B, 2560 __m256h __C) 2561{ 2562 return (__m256h) __builtin_ia32_vfmaddph256_maskz ((__v16hf) __A, 2563 (__v16hf) __B, 2564 (__v16hf) __C, 2565 (__mmask16) 2566 __U); 2567} 2568 2569extern __inline __m128h 2570__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2571_mm_fmadd_ph (__m128h __A, __m128h __B, __m128h __C) 2572{ 2573 return (__m128h) __builtin_ia32_vfmaddph128_mask ((__v8hf) __A, 2574 (__v8hf) __B, 2575 (__v8hf) __C, 2576 (__mmask8) -1); 2577} 2578 2579extern __inline __m128h 2580__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2581_mm_mask_fmadd_ph (__m128h __A, __mmask8 __U, __m128h __B, 2582 __m128h __C) 2583{ 2584 return (__m128h) __builtin_ia32_vfmaddph128_mask ((__v8hf) __A, 2585 (__v8hf) __B, 2586 (__v8hf) __C, 2587 (__mmask8) __U); 2588} 2589 2590extern __inline __m128h 2591__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2592_mm_mask3_fmadd_ph (__m128h __A, __m128h __B, __m128h __C, 2593 __mmask8 __U) 2594{ 2595 return (__m128h) __builtin_ia32_vfmaddph128_mask3 ((__v8hf) __A, 2596 (__v8hf) __B, 2597 (__v8hf) __C, 2598 (__mmask8) 2599 __U); 2600} 2601 2602extern __inline __m128h 2603__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2604_mm_maskz_fmadd_ph (__mmask8 __U, __m128h __A, __m128h __B, 2605 __m128h __C) 2606{ 2607 return (__m128h) __builtin_ia32_vfmaddph128_maskz ((__v8hf) __A, 2608 (__v8hf) __B, 2609 (__v8hf) __C, 2610 (__mmask8) 2611 __U); 2612} 2613 2614/* Intrinsics vfnmadd[132,213,231]ph. */ 2615extern __inline __m256h 2616__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2617_mm256_fnmadd_ph (__m256h __A, __m256h __B, __m256h __C) 2618{ 2619 return (__m256h) __builtin_ia32_vfnmaddph256_mask ((__v16hf) __A, 2620 (__v16hf) __B, 2621 (__v16hf) __C, 2622 (__mmask16) -1); 2623} 2624 2625extern __inline __m256h 2626__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2627_mm256_mask_fnmadd_ph (__m256h __A, __mmask16 __U, __m256h __B, 2628 __m256h __C) 2629{ 2630 return (__m256h) __builtin_ia32_vfnmaddph256_mask ((__v16hf) __A, 2631 (__v16hf) __B, 2632 (__v16hf) __C, 2633 (__mmask16) __U); 2634} 2635 2636extern __inline __m256h 2637__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2638_mm256_mask3_fnmadd_ph (__m256h __A, __m256h __B, __m256h __C, 2639 __mmask16 __U) 2640{ 2641 return (__m256h) __builtin_ia32_vfnmaddph256_mask3 ((__v16hf) __A, 2642 (__v16hf) __B, 2643 (__v16hf) __C, 2644 (__mmask16) 2645 __U); 2646} 2647 2648extern __inline __m256h 2649__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2650_mm256_maskz_fnmadd_ph (__mmask16 __U, __m256h __A, __m256h __B, 2651 __m256h __C) 2652{ 2653 return (__m256h) __builtin_ia32_vfnmaddph256_maskz ((__v16hf) __A, 2654 (__v16hf) __B, 2655 (__v16hf) __C, 2656 (__mmask16) 2657 __U); 2658} 2659 2660extern __inline __m128h 2661__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2662_mm_fnmadd_ph (__m128h __A, __m128h __B, __m128h __C) 2663{ 2664 return (__m128h) __builtin_ia32_vfnmaddph128_mask ((__v8hf) __A, 2665 (__v8hf) __B, 2666 (__v8hf) __C, 2667 (__mmask8) -1); 2668} 2669 2670extern __inline __m128h 2671__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2672_mm_mask_fnmadd_ph (__m128h __A, __mmask8 __U, __m128h __B, 2673 __m128h __C) 2674{ 2675 return (__m128h) __builtin_ia32_vfnmaddph128_mask ((__v8hf) __A, 2676 (__v8hf) __B, 2677 (__v8hf) __C, 2678 (__mmask8) __U); 2679} 2680 2681extern __inline __m128h 2682__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2683_mm_mask3_fnmadd_ph (__m128h __A, __m128h __B, __m128h __C, 2684 __mmask8 __U) 2685{ 2686 return (__m128h) __builtin_ia32_vfnmaddph128_mask3 ((__v8hf) __A, 2687 (__v8hf) __B, 2688 (__v8hf) __C, 2689 (__mmask8) 2690 __U); 2691} 2692 2693extern __inline __m128h 2694__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2695_mm_maskz_fnmadd_ph (__mmask8 __U, __m128h __A, __m128h __B, 2696 __m128h __C) 2697{ 2698 return (__m128h) __builtin_ia32_vfnmaddph128_maskz ((__v8hf) __A, 2699 (__v8hf) __B, 2700 (__v8hf) __C, 2701 (__mmask8) 2702 __U); 2703} 2704 2705/* Intrinsics vfmsub[132,213,231]ph. */ 2706extern __inline __m256h 2707__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2708_mm256_fmsub_ph (__m256h __A, __m256h __B, __m256h __C) 2709{ 2710 return (__m256h) __builtin_ia32_vfmsubph256_mask ((__v16hf) __A, 2711 (__v16hf) __B, 2712 (__v16hf) __C, 2713 (__mmask16) -1); 2714} 2715 2716extern __inline __m256h 2717__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2718_mm256_mask_fmsub_ph (__m256h __A, __mmask16 __U, __m256h __B, 2719 __m256h __C) 2720{ 2721 return (__m256h) __builtin_ia32_vfmsubph256_mask ((__v16hf) __A, 2722 (__v16hf) __B, 2723 (__v16hf) __C, 2724 (__mmask16) __U); 2725} 2726 2727extern __inline __m256h 2728__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2729_mm256_mask3_fmsub_ph (__m256h __A, __m256h __B, __m256h __C, 2730 __mmask16 __U) 2731{ 2732 return (__m256h) __builtin_ia32_vfmsubph256_mask3 ((__v16hf) __A, 2733 (__v16hf) __B, 2734 (__v16hf) __C, 2735 (__mmask16) 2736 __U); 2737} 2738 2739extern __inline __m256h 2740__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2741_mm256_maskz_fmsub_ph (__mmask16 __U, __m256h __A, __m256h __B, 2742 __m256h __C) 2743{ 2744 return (__m256h) __builtin_ia32_vfmsubph256_maskz ((__v16hf) __A, 2745 (__v16hf) __B, 2746 (__v16hf) __C, 2747 (__mmask16) 2748 __U); 2749} 2750 2751extern __inline __m128h 2752__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2753_mm_fmsub_ph (__m128h __A, __m128h __B, __m128h __C) 2754{ 2755 return (__m128h) __builtin_ia32_vfmsubph128_mask ((__v8hf) __A, 2756 (__v8hf) __B, 2757 (__v8hf) __C, 2758 (__mmask8) -1); 2759} 2760 2761extern __inline __m128h 2762__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2763_mm_mask_fmsub_ph (__m128h __A, __mmask8 __U, __m128h __B, 2764 __m128h __C) 2765{ 2766 return (__m128h) __builtin_ia32_vfmsubph128_mask ((__v8hf) __A, 2767 (__v8hf) __B, 2768 (__v8hf) __C, 2769 (__mmask8) __U); 2770} 2771 2772extern __inline __m128h 2773__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2774_mm_mask3_fmsub_ph (__m128h __A, __m128h __B, __m128h __C, 2775 __mmask8 __U) 2776{ 2777 return (__m128h) __builtin_ia32_vfmsubph128_mask3 ((__v8hf) __A, 2778 (__v8hf) __B, 2779 (__v8hf) __C, 2780 (__mmask8) 2781 __U); 2782} 2783 2784extern __inline __m128h 2785__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2786_mm_maskz_fmsub_ph (__mmask8 __U, __m128h __A, __m128h __B, 2787 __m128h __C) 2788{ 2789 return (__m128h) __builtin_ia32_vfmsubph128_maskz ((__v8hf) __A, 2790 (__v8hf) __B, 2791 (__v8hf) __C, 2792 (__mmask8) 2793 __U); 2794} 2795 2796/* Intrinsics vfnmsub[132,213,231]ph. */ 2797extern __inline __m256h 2798__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2799_mm256_fnmsub_ph (__m256h __A, __m256h __B, __m256h __C) 2800{ 2801 return (__m256h) __builtin_ia32_vfnmsubph256_mask ((__v16hf) __A, 2802 (__v16hf) __B, 2803 (__v16hf) __C, 2804 (__mmask16) -1); 2805} 2806 2807extern __inline __m256h 2808__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2809_mm256_mask_fnmsub_ph (__m256h __A, __mmask16 __U, __m256h __B, 2810 __m256h __C) 2811{ 2812 return (__m256h) __builtin_ia32_vfnmsubph256_mask ((__v16hf) __A, 2813 (__v16hf) __B, 2814 (__v16hf) __C, 2815 (__mmask16) __U); 2816} 2817 2818extern __inline __m256h 2819__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2820_mm256_mask3_fnmsub_ph (__m256h __A, __m256h __B, __m256h __C, 2821 __mmask16 __U) 2822{ 2823 return (__m256h) __builtin_ia32_vfnmsubph256_mask3 ((__v16hf) __A, 2824 (__v16hf) __B, 2825 (__v16hf) __C, 2826 (__mmask16) 2827 __U); 2828} 2829 2830extern __inline __m256h 2831__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2832_mm256_maskz_fnmsub_ph (__mmask16 __U, __m256h __A, __m256h __B, 2833 __m256h __C) 2834{ 2835 return (__m256h) __builtin_ia32_vfnmsubph256_maskz ((__v16hf) __A, 2836 (__v16hf) __B, 2837 (__v16hf) __C, 2838 (__mmask16) 2839 __U); 2840} 2841 2842extern __inline __m128h 2843__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2844_mm_fnmsub_ph (__m128h __A, __m128h __B, __m128h __C) 2845{ 2846 return (__m128h) __builtin_ia32_vfnmsubph128_mask ((__v8hf) __A, 2847 (__v8hf) __B, 2848 (__v8hf) __C, 2849 (__mmask8) -1); 2850} 2851 2852extern __inline __m128h 2853__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2854_mm_mask_fnmsub_ph (__m128h __A, __mmask8 __U, __m128h __B, 2855 __m128h __C) 2856{ 2857 return (__m128h) __builtin_ia32_vfnmsubph128_mask ((__v8hf) __A, 2858 (__v8hf) __B, 2859 (__v8hf) __C, 2860 (__mmask8) __U); 2861} 2862 2863extern __inline __m128h 2864__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2865_mm_mask3_fnmsub_ph (__m128h __A, __m128h __B, __m128h __C, 2866 __mmask8 __U) 2867{ 2868 return (__m128h) __builtin_ia32_vfnmsubph128_mask3 ((__v8hf) __A, 2869 (__v8hf) __B, 2870 (__v8hf) __C, 2871 (__mmask8) 2872 __U); 2873} 2874 2875extern __inline __m128h 2876__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2877_mm_maskz_fnmsub_ph (__mmask8 __U, __m128h __A, __m128h __B, 2878 __m128h __C) 2879{ 2880 return (__m128h) __builtin_ia32_vfnmsubph128_maskz ((__v8hf) __A, 2881 (__v8hf) __B, 2882 (__v8hf) __C, 2883 (__mmask8) 2884 __U); 2885} 2886 2887/* Intrinsics vf[,c]maddcph. */ 2888extern __inline __m128h 2889__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2890_mm_fmadd_pch (__m128h __A, __m128h __B, __m128h __C) 2891{ 2892 return (__m128h) __builtin_ia32_vfmaddcph128 ((__v8hf) __A, 2893 (__v8hf) __B, 2894 (__v8hf) __C); 2895} 2896 2897extern __inline __m128h 2898__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2899_mm_mask_fmadd_pch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) 2900{ 2901 return (__m128h) 2902 __builtin_ia32_vfmaddcph128_mask ((__v8hf) __A, 2903 (__v8hf) __C, 2904 (__v8hf) __D, __B); 2905} 2906 2907extern __inline __m128h 2908__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2909_mm_mask3_fmadd_pch (__m128h __A, __m128h __B, __m128h __C, __mmask8 __D) 2910{ 2911 return (__m128h) 2912 __builtin_ia32_vfmaddcph128_mask3 ((__v8hf) __A, 2913 (__v8hf) __B, 2914 (__v8hf) __C, __D); 2915} 2916 2917extern __inline __m128h 2918__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2919_mm_maskz_fmadd_pch (__mmask8 __A, __m128h __B, __m128h __C, __m128h __D) 2920{ 2921 return (__m128h) __builtin_ia32_vfmaddcph128_maskz ((__v8hf) __B, 2922 (__v8hf) __C, 2923 (__v8hf) __D, __A); 2924} 2925 2926extern __inline __m256h 2927__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2928_mm256_fmadd_pch (__m256h __A, __m256h __B, __m256h __C) 2929{ 2930 return (__m256h) __builtin_ia32_vfmaddcph256 ((__v16hf) __A, 2931 (__v16hf) __B, 2932 (__v16hf) __C); 2933} 2934 2935extern __inline __m256h 2936__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2937_mm256_mask_fmadd_pch (__m256h __A, __mmask8 __B, __m256h __C, __m256h __D) 2938{ 2939 return (__m256h) 2940 __builtin_ia32_vfmaddcph256_mask ((__v16hf) __A, 2941 (__v16hf) __C, 2942 (__v16hf) __D, __B); 2943} 2944 2945extern __inline __m256h 2946__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2947_mm256_mask3_fmadd_pch (__m256h __A, __m256h __B, __m256h __C, __mmask8 __D) 2948{ 2949 return (__m256h) 2950 __builtin_ia32_vfmaddcph256_mask3 ((__v16hf) __A, 2951 (__v16hf) __B, 2952 (__v16hf) __C, __D); 2953} 2954 2955extern __inline __m256h 2956__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2957_mm256_maskz_fmadd_pch (__mmask8 __A, __m256h __B, __m256h __C, __m256h __D) 2958{ 2959 return (__m256h)__builtin_ia32_vfmaddcph256_maskz ((__v16hf) __B, 2960 (__v16hf) __C, 2961 (__v16hf) __D, __A); 2962} 2963 2964extern __inline __m128h 2965__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2966_mm_fcmadd_pch (__m128h __A, __m128h __B, __m128h __C) 2967{ 2968 return (__m128h) __builtin_ia32_vfcmaddcph128 ((__v8hf) __A, 2969 (__v8hf) __B, 2970 (__v8hf) __C); 2971} 2972 2973extern __inline __m128h 2974__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2975_mm_mask_fcmadd_pch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) 2976{ 2977 return (__m128h) 2978 __builtin_ia32_vfcmaddcph128_mask ((__v8hf) __A, 2979 (__v8hf) __C, 2980 (__v8hf) __D, __B); 2981} 2982 2983extern __inline __m128h 2984__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2985_mm_mask3_fcmadd_pch (__m128h __A, __m128h __B, __m128h __C, __mmask8 __D) 2986{ 2987 return (__m128h) 2988 __builtin_ia32_vfcmaddcph128_mask3 ((__v8hf) __A, 2989 (__v8hf) __B, 2990 (__v8hf) __C, __D); 2991} 2992 2993extern __inline __m128h 2994__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 2995_mm_maskz_fcmadd_pch (__mmask8 __A, __m128h __B, __m128h __C, __m128h __D) 2996{ 2997 return (__m128h)__builtin_ia32_vfcmaddcph128_maskz ((__v8hf) __B, 2998 (__v8hf) __C, 2999 (__v8hf) __D, __A); 3000} 3001 3002extern __inline __m256h 3003__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3004_mm256_fcmadd_pch (__m256h __A, __m256h __B, __m256h __C) 3005{ 3006 return (__m256h) __builtin_ia32_vfcmaddcph256 ((__v16hf) __A, 3007 (__v16hf) __B, 3008 (__v16hf) __C); 3009} 3010 3011extern __inline __m256h 3012__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3013_mm256_mask_fcmadd_pch (__m256h __A, __mmask8 __B, __m256h __C, __m256h __D) 3014{ 3015 return (__m256h) 3016 __builtin_ia32_vfcmaddcph256_mask ((__v16hf) __A, 3017 (__v16hf) __C, 3018 (__v16hf) __D, __B); 3019} 3020 3021extern __inline __m256h 3022__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3023_mm256_mask3_fcmadd_pch (__m256h __A, __m256h __B, __m256h __C, __mmask8 __D) 3024{ 3025 return (__m256h) 3026 __builtin_ia32_vfcmaddcph256_mask3 ((__v16hf) __A, 3027 (__v16hf) __B, 3028 (__v16hf) __C, __D); 3029} 3030 3031extern __inline __m256h 3032__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3033_mm256_maskz_fcmadd_pch (__mmask8 __A, __m256h __B, __m256h __C, __m256h __D) 3034{ 3035 return (__m256h) __builtin_ia32_vfcmaddcph256_maskz ((__v16hf) __B, 3036 (__v16hf) __C, 3037 (__v16hf) __D, __A); 3038} 3039 3040/* Intrinsics vf[,c]mulcph. */ 3041extern __inline __m128h 3042__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3043_mm_fmul_pch (__m128h __A, __m128h __B) 3044{ 3045 return (__m128h) __builtin_ia32_vfmulcph128 ((__v8hf) __A, (__v8hf) __B); 3046} 3047 3048extern __inline __m128h 3049__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3050_mm_mask_fmul_pch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) 3051{ 3052 return (__m128h) __builtin_ia32_vfmulcph128_mask ((__v8hf) __C, 3053 (__v8hf) __D, 3054 (__v8hf) __A, __B); 3055} 3056 3057extern __inline __m128h 3058__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3059_mm_maskz_fmul_pch (__mmask8 __A, __m128h __B, __m128h __C) 3060{ 3061 return (__m128h) __builtin_ia32_vfmulcph128_mask ((__v8hf) __B, 3062 (__v8hf) __C, 3063 _mm_setzero_ph (), 3064 __A); 3065} 3066 3067extern __inline __m256h 3068__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3069_mm256_fmul_pch (__m256h __A, __m256h __B) 3070{ 3071 return (__m256h) __builtin_ia32_vfmulcph256 ((__v16hf) __A, 3072 (__v16hf) __B); 3073} 3074 3075extern __inline __m256h 3076__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3077_mm256_mask_fmul_pch (__m256h __A, __mmask8 __B, __m256h __C, __m256h __D) 3078{ 3079 return (__m256h) __builtin_ia32_vfmulcph256_mask ((__v16hf) __C, 3080 (__v16hf) __D, 3081 (__v16hf) __A, __B); 3082} 3083 3084extern __inline __m256h 3085__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3086_mm256_maskz_fmul_pch (__mmask8 __A, __m256h __B, __m256h __C) 3087{ 3088 return (__m256h) __builtin_ia32_vfmulcph256_mask ((__v16hf) __B, 3089 (__v16hf) __C, 3090 _mm256_setzero_ph (), 3091 __A); 3092} 3093 3094extern __inline __m128h 3095__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3096_mm_fcmul_pch (__m128h __A, __m128h __B) 3097{ 3098 return (__m128h) __builtin_ia32_vfcmulcph128 ((__v8hf) __A, 3099 (__v8hf) __B); 3100} 3101 3102extern __inline __m128h 3103__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3104_mm_mask_fcmul_pch (__m128h __A, __mmask8 __B, __m128h __C, __m128h __D) 3105{ 3106 return (__m128h) __builtin_ia32_vfcmulcph128_mask ((__v8hf) __C, 3107 (__v8hf) __D, 3108 (__v8hf) __A, __B); 3109} 3110 3111extern __inline __m128h 3112__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3113_mm_maskz_fcmul_pch (__mmask8 __A, __m128h __B, __m128h __C) 3114{ 3115 return (__m128h) __builtin_ia32_vfcmulcph128_mask ((__v8hf) __B, 3116 (__v8hf) __C, 3117 _mm_setzero_ph (), 3118 __A); 3119} 3120 3121extern __inline __m256h 3122__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3123_mm256_fcmul_pch (__m256h __A, __m256h __B) 3124{ 3125 return (__m256h) __builtin_ia32_vfcmulcph256 ((__v16hf) __A, (__v16hf) __B); 3126} 3127 3128extern __inline __m256h 3129__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3130_mm256_mask_fcmul_pch (__m256h __A, __mmask8 __B, __m256h __C, __m256h __D) 3131{ 3132 return (__m256h) __builtin_ia32_vfcmulcph256_mask ((__v16hf) __C, 3133 (__v16hf) __D, 3134 (__v16hf) __A, __B); 3135} 3136 3137extern __inline __m256h 3138__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3139_mm256_maskz_fcmul_pch (__mmask8 __A, __m256h __B, __m256h __C) 3140{ 3141 return (__m256h) __builtin_ia32_vfcmulcph256_mask ((__v16hf) __B, 3142 (__v16hf) __C, 3143 _mm256_setzero_ph (), 3144 __A); 3145} 3146 3147#define _MM256_REDUCE_OP(op) \ 3148 __m128h __T1 = (__m128h) _mm256_extractf128_pd ((__m256d) __A, 0); \ 3149 __m128h __T2 = (__m128h) _mm256_extractf128_pd ((__m256d) __A, 1); \ 3150 __m128h __T3 = (__T1 op __T2); \ 3151 __m128h __T4 = (__m128h) __builtin_shuffle (__T3, \ 3152 (__v8hi) { 4, 5, 6, 7, 0, 1, 2, 3 }); \ 3153 __m128h __T5 = (__T3) op (__T4); \ 3154 __m128h __T6 = (__m128h) __builtin_shuffle (__T5, \ 3155 (__v8hi) { 2, 3, 0, 1, 4, 5, 6, 7 }); \ 3156 __m128h __T7 = __T5 op __T6; \ 3157 return __T7[0] op __T7[1] 3158 3159extern __inline _Float16 3160__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3161_mm256_reduce_add_ph (__m256h __A) 3162{ 3163 _MM256_REDUCE_OP (+); 3164} 3165 3166extern __inline _Float16 3167__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3168_mm256_reduce_mul_ph (__m256h __A) 3169{ 3170 _MM256_REDUCE_OP (*); 3171} 3172 3173#undef _MM256_REDUCE_OP 3174#define _MM256_REDUCE_OP(op) \ 3175 __m128h __T1 = (__m128h) _mm256_extractf128_pd ((__m256d) __A, 0); \ 3176 __m128h __T2 = (__m128h) _mm256_extractf128_pd ((__m256d) __A, 1); \ 3177 __m128h __T3 = _mm_##op (__T1, __T2); \ 3178 __m128h __T4 = (__m128h) __builtin_shuffle (__T3, \ 3179 (__v8hi) { 2, 3, 0, 1, 6, 7, 4, 5 }); \ 3180 __m128h __T5 = _mm_##op (__T3, __T4); \ 3181 __m128h __T6 = (__m128h) __builtin_shuffle (__T5, (__v8hi) { 4, 5 }); \ 3182 __m128h __T7 = _mm_##op (__T5, __T6); \ 3183 __m128h __T8 = (__m128h) __builtin_shuffle (__T7, (__v8hi) { 1, 0 }); \ 3184 __m128h __T9 = _mm_##op (__T7, __T8); \ 3185 return __T9[0] 3186 3187extern __inline _Float16 3188__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3189_mm256_reduce_min_ph (__m256h __A) 3190{ 3191 _MM256_REDUCE_OP (min_ph); 3192} 3193 3194extern __inline _Float16 3195__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3196_mm256_reduce_max_ph (__m256h __A) 3197{ 3198 _MM256_REDUCE_OP (max_ph); 3199} 3200 3201#define _MM_REDUCE_OP(op) \ 3202 __m128h __T1 = (__m128h) __builtin_shuffle (__A, \ 3203 (__v8hi) { 4, 5, 6, 7, 0, 1, 2, 3 }); \ 3204 __m128h __T2 = (__A) op (__T1); \ 3205 __m128h __T3 = (__m128h) __builtin_shuffle (__T2, \ 3206 (__v8hi){ 2, 3, 0, 1, 4, 5, 6, 7 }); \ 3207 __m128h __T4 = __T2 op __T3; \ 3208 return __T4[0] op __T4[1] 3209 3210extern __inline _Float16 3211__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3212_mm_reduce_add_ph (__m128h __A) 3213{ 3214 _MM_REDUCE_OP (+); 3215} 3216 3217extern __inline _Float16 3218__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3219_mm_reduce_mul_ph (__m128h __A) 3220{ 3221 _MM_REDUCE_OP (*); 3222} 3223 3224#undef _MM_REDUCE_OP 3225#define _MM_REDUCE_OP(op) \ 3226 __m128h __T1 = (__m128h) __builtin_shuffle (__A, \ 3227 (__v8hi) { 2, 3, 0, 1, 6, 7, 4, 5 }); \ 3228 __m128h __T2 = _mm_##op (__A, __T1); \ 3229 __m128h __T3 = (__m128h) __builtin_shuffle (__T2, (__v8hi){ 4, 5 }); \ 3230 __m128h __T4 = _mm_##op (__T2, __T3); \ 3231 __m128h __T5 = (__m128h) __builtin_shuffle (__T4, (__v8hi){ 1, 0 }); \ 3232 __m128h __T6 = _mm_##op (__T4, __T5); \ 3233 return __T6[0] 3234 3235extern __inline _Float16 3236__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3237_mm_reduce_min_ph (__m128h __A) 3238{ 3239 _MM_REDUCE_OP (min_ph); 3240} 3241 3242extern __inline _Float16 3243__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3244_mm_reduce_max_ph (__m128h __A) 3245{ 3246 _MM_REDUCE_OP (max_ph); 3247} 3248 3249#undef _MM256_REDUCE_OP 3250#undef _MM_REDUCE_OP 3251 3252extern __inline __m256h 3253__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3254_mm256_mask_blend_ph (__mmask16 __U, __m256h __A, __m256h __W) 3255{ 3256 return (__m256h) __builtin_ia32_movdquhi256_mask ((__v16hi) __W, 3257 (__v16hi) __A, 3258 (__mmask16) __U); 3259 3260} 3261 3262extern __inline __m256h 3263__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3264_mm256_permutex2var_ph (__m256h __A, __m256i __I, __m256h __B) 3265{ 3266 return (__m256h) __builtin_ia32_vpermi2varhi256_mask ((__v16hi) __A, 3267 (__v16hi) __I, 3268 (__v16hi) __B, 3269 (__mmask16)-1); 3270} 3271 3272extern __inline __m256h 3273__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3274_mm256_permutexvar_ph (__m256i __A, __m256h __B) 3275{ 3276 return (__m256h) __builtin_ia32_permvarhi256_mask ((__v16hi) __B, 3277 (__v16hi) __A, 3278 (__v16hi) 3279 (_mm256_setzero_ph ()), 3280 (__mmask16)-1); 3281} 3282 3283extern __inline __m128h 3284__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3285_mm_mask_blend_ph (__mmask8 __U, __m128h __A, __m128h __W) 3286{ 3287 return (__m128h) __builtin_ia32_movdquhi128_mask ((__v8hi) __W, 3288 (__v8hi) __A, 3289 (__mmask8) __U); 3290 3291} 3292 3293extern __inline __m128h 3294__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3295_mm_permutex2var_ph (__m128h __A, __m128i __I, __m128h __B) 3296{ 3297 return (__m128h) __builtin_ia32_vpermi2varhi128_mask ((__v8hi) __A, 3298 (__v8hi) __I, 3299 (__v8hi) __B, 3300 (__mmask8)-1); 3301} 3302 3303extern __inline __m128h 3304__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3305_mm_permutexvar_ph (__m128i __A, __m128h __B) 3306{ 3307 return (__m128h) __builtin_ia32_permvarhi128_mask ((__v8hi) __B, 3308 (__v8hi) __A, 3309 (__v8hi) 3310 (_mm_setzero_ph ()), 3311 (__mmask8)-1); 3312} 3313 3314extern __inline __m256h 3315__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3316_mm256_set1_pch (_Float16 _Complex __A) 3317{ 3318 union 3319 { 3320 _Float16 _Complex __a; 3321 float __b; 3322 } __u = { .__a = __A }; 3323 3324 return (__m256h) _mm256_set1_ps (__u.__b); 3325} 3326 3327extern __inline __m128h 3328__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) 3329_mm_set1_pch (_Float16 _Complex __A) 3330{ 3331 union 3332 { 3333 _Float16 _Complex __a; 3334 float __b; 3335 } __u = { .__a = __A }; 3336 3337 return (__m128h) _mm_set1_ps (__u.__b); 3338} 3339 3340// intrinsics below are alias for f*mul_*ch 3341#define _mm_mul_pch(A, B) _mm_fmul_pch ((A), (B)) 3342#define _mm_mask_mul_pch(W, U, A, B) _mm_mask_fmul_pch ((W), (U), (A), (B)) 3343#define _mm_maskz_mul_pch(U, A, B) _mm_maskz_fmul_pch ((U), (A), (B)) 3344#define _mm256_mul_pch(A, B) _mm256_fmul_pch ((A), (B)) 3345#define _mm256_mask_mul_pch(W, U, A, B) \ 3346 _mm256_mask_fmul_pch ((W), (U), (A), (B)) 3347#define _mm256_maskz_mul_pch(U, A, B) _mm256_maskz_fmul_pch ((U), (A), (B)) 3348 3349#define _mm_cmul_pch(A, B) _mm_fcmul_pch ((A), (B)) 3350#define _mm_mask_cmul_pch(W, U, A, B) _mm_mask_fcmul_pch ((W), (U), (A), (B)) 3351#define _mm_maskz_cmul_pch(U, A, B) _mm_maskz_fcmul_pch ((U), (A), (B)) 3352#define _mm256_cmul_pch(A, B) _mm256_fcmul_pch ((A), (B)) 3353#define _mm256_mask_cmul_pch(W, U, A, B) \ 3354 _mm256_mask_fcmul_pch ((W), (U), (A), (B)) 3355#define _mm256_maskz_cmul_pch(U, A, B) _mm256_maskz_fcmul_pch((U), (A), (B)) 3356 3357#ifdef __DISABLE_AVX512FP16VL__ 3358#undef __DISABLE_AVX512FP16VL__ 3359#pragma GCC pop_options 3360#endif /* __DISABLE_AVX512FP16VL__ */ 3361 3362#endif /* __AVX512FP16VLINTRIN_H_INCLUDED */ 3363