1/*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 10#ifndef __IMMINTRIN_H 11#error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead." 12#endif 13 14#ifndef __AVX512VLDQINTRIN_H 15#define __AVX512VLDQINTRIN_H 16 17/* Define the default attributes for the functions in this file. */ 18#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(128))) 19#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(256))) 20 21static __inline__ __m256i __DEFAULT_FN_ATTRS256 22_mm256_mullo_epi64 (__m256i __A, __m256i __B) { 23 return (__m256i) ((__v4du) __A * (__v4du) __B); 24} 25 26static __inline__ __m256i __DEFAULT_FN_ATTRS256 27_mm256_mask_mullo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { 28 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 29 (__v4di)_mm256_mullo_epi64(__A, __B), 30 (__v4di)__W); 31} 32 33static __inline__ __m256i __DEFAULT_FN_ATTRS256 34_mm256_maskz_mullo_epi64(__mmask8 __U, __m256i __A, __m256i __B) { 35 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 36 (__v4di)_mm256_mullo_epi64(__A, __B), 37 (__v4di)_mm256_setzero_si256()); 38} 39 40static __inline__ __m128i __DEFAULT_FN_ATTRS128 41_mm_mullo_epi64 (__m128i __A, __m128i __B) { 42 return (__m128i) ((__v2du) __A * (__v2du) __B); 43} 44 45static __inline__ __m128i __DEFAULT_FN_ATTRS128 46_mm_mask_mullo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { 47 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 48 (__v2di)_mm_mullo_epi64(__A, __B), 49 (__v2di)__W); 50} 51 52static __inline__ __m128i __DEFAULT_FN_ATTRS128 53_mm_maskz_mullo_epi64(__mmask8 __U, __m128i __A, __m128i __B) { 54 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 55 (__v2di)_mm_mullo_epi64(__A, __B), 56 (__v2di)_mm_setzero_si128()); 57} 58 59static __inline__ __m256d __DEFAULT_FN_ATTRS256 60_mm256_mask_andnot_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 61 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 62 (__v4df)_mm256_andnot_pd(__A, __B), 63 (__v4df)__W); 64} 65 66static __inline__ __m256d __DEFAULT_FN_ATTRS256 67_mm256_maskz_andnot_pd(__mmask8 __U, __m256d __A, __m256d __B) { 68 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 69 (__v4df)_mm256_andnot_pd(__A, __B), 70 (__v4df)_mm256_setzero_pd()); 71} 72 73static __inline__ __m128d __DEFAULT_FN_ATTRS128 74_mm_mask_andnot_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 75 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 76 (__v2df)_mm_andnot_pd(__A, __B), 77 (__v2df)__W); 78} 79 80static __inline__ __m128d __DEFAULT_FN_ATTRS128 81_mm_maskz_andnot_pd(__mmask8 __U, __m128d __A, __m128d __B) { 82 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 83 (__v2df)_mm_andnot_pd(__A, __B), 84 (__v2df)_mm_setzero_pd()); 85} 86 87static __inline__ __m256 __DEFAULT_FN_ATTRS256 88_mm256_mask_andnot_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 89 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 90 (__v8sf)_mm256_andnot_ps(__A, __B), 91 (__v8sf)__W); 92} 93 94static __inline__ __m256 __DEFAULT_FN_ATTRS256 95_mm256_maskz_andnot_ps(__mmask8 __U, __m256 __A, __m256 __B) { 96 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 97 (__v8sf)_mm256_andnot_ps(__A, __B), 98 (__v8sf)_mm256_setzero_ps()); 99} 100 101static __inline__ __m128 __DEFAULT_FN_ATTRS128 102_mm_mask_andnot_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 103 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 104 (__v4sf)_mm_andnot_ps(__A, __B), 105 (__v4sf)__W); 106} 107 108static __inline__ __m128 __DEFAULT_FN_ATTRS128 109_mm_maskz_andnot_ps(__mmask8 __U, __m128 __A, __m128 __B) { 110 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 111 (__v4sf)_mm_andnot_ps(__A, __B), 112 (__v4sf)_mm_setzero_ps()); 113} 114 115static __inline__ __m256d __DEFAULT_FN_ATTRS256 116_mm256_mask_and_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 117 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 118 (__v4df)_mm256_and_pd(__A, __B), 119 (__v4df)__W); 120} 121 122static __inline__ __m256d __DEFAULT_FN_ATTRS256 123_mm256_maskz_and_pd(__mmask8 __U, __m256d __A, __m256d __B) { 124 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 125 (__v4df)_mm256_and_pd(__A, __B), 126 (__v4df)_mm256_setzero_pd()); 127} 128 129static __inline__ __m128d __DEFAULT_FN_ATTRS128 130_mm_mask_and_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 131 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 132 (__v2df)_mm_and_pd(__A, __B), 133 (__v2df)__W); 134} 135 136static __inline__ __m128d __DEFAULT_FN_ATTRS128 137_mm_maskz_and_pd(__mmask8 __U, __m128d __A, __m128d __B) { 138 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 139 (__v2df)_mm_and_pd(__A, __B), 140 (__v2df)_mm_setzero_pd()); 141} 142 143static __inline__ __m256 __DEFAULT_FN_ATTRS256 144_mm256_mask_and_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 145 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 146 (__v8sf)_mm256_and_ps(__A, __B), 147 (__v8sf)__W); 148} 149 150static __inline__ __m256 __DEFAULT_FN_ATTRS256 151_mm256_maskz_and_ps(__mmask8 __U, __m256 __A, __m256 __B) { 152 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 153 (__v8sf)_mm256_and_ps(__A, __B), 154 (__v8sf)_mm256_setzero_ps()); 155} 156 157static __inline__ __m128 __DEFAULT_FN_ATTRS128 158_mm_mask_and_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 159 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 160 (__v4sf)_mm_and_ps(__A, __B), 161 (__v4sf)__W); 162} 163 164static __inline__ __m128 __DEFAULT_FN_ATTRS128 165_mm_maskz_and_ps(__mmask8 __U, __m128 __A, __m128 __B) { 166 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 167 (__v4sf)_mm_and_ps(__A, __B), 168 (__v4sf)_mm_setzero_ps()); 169} 170 171static __inline__ __m256d __DEFAULT_FN_ATTRS256 172_mm256_mask_xor_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 173 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 174 (__v4df)_mm256_xor_pd(__A, __B), 175 (__v4df)__W); 176} 177 178static __inline__ __m256d __DEFAULT_FN_ATTRS256 179_mm256_maskz_xor_pd(__mmask8 __U, __m256d __A, __m256d __B) { 180 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 181 (__v4df)_mm256_xor_pd(__A, __B), 182 (__v4df)_mm256_setzero_pd()); 183} 184 185static __inline__ __m128d __DEFAULT_FN_ATTRS128 186_mm_mask_xor_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 187 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 188 (__v2df)_mm_xor_pd(__A, __B), 189 (__v2df)__W); 190} 191 192static __inline__ __m128d __DEFAULT_FN_ATTRS128 193_mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) { 194 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 195 (__v2df)_mm_xor_pd(__A, __B), 196 (__v2df)_mm_setzero_pd()); 197} 198 199static __inline__ __m256 __DEFAULT_FN_ATTRS256 200_mm256_mask_xor_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 201 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 202 (__v8sf)_mm256_xor_ps(__A, __B), 203 (__v8sf)__W); 204} 205 206static __inline__ __m256 __DEFAULT_FN_ATTRS256 207_mm256_maskz_xor_ps(__mmask8 __U, __m256 __A, __m256 __B) { 208 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 209 (__v8sf)_mm256_xor_ps(__A, __B), 210 (__v8sf)_mm256_setzero_ps()); 211} 212 213static __inline__ __m128 __DEFAULT_FN_ATTRS128 214_mm_mask_xor_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 215 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 216 (__v4sf)_mm_xor_ps(__A, __B), 217 (__v4sf)__W); 218} 219 220static __inline__ __m128 __DEFAULT_FN_ATTRS128 221_mm_maskz_xor_ps(__mmask8 __U, __m128 __A, __m128 __B) { 222 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 223 (__v4sf)_mm_xor_ps(__A, __B), 224 (__v4sf)_mm_setzero_ps()); 225} 226 227static __inline__ __m256d __DEFAULT_FN_ATTRS256 228_mm256_mask_or_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 229 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 230 (__v4df)_mm256_or_pd(__A, __B), 231 (__v4df)__W); 232} 233 234static __inline__ __m256d __DEFAULT_FN_ATTRS256 235_mm256_maskz_or_pd(__mmask8 __U, __m256d __A, __m256d __B) { 236 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 237 (__v4df)_mm256_or_pd(__A, __B), 238 (__v4df)_mm256_setzero_pd()); 239} 240 241static __inline__ __m128d __DEFAULT_FN_ATTRS128 242_mm_mask_or_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 243 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 244 (__v2df)_mm_or_pd(__A, __B), 245 (__v2df)__W); 246} 247 248static __inline__ __m128d __DEFAULT_FN_ATTRS128 249_mm_maskz_or_pd(__mmask8 __U, __m128d __A, __m128d __B) { 250 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 251 (__v2df)_mm_or_pd(__A, __B), 252 (__v2df)_mm_setzero_pd()); 253} 254 255static __inline__ __m256 __DEFAULT_FN_ATTRS256 256_mm256_mask_or_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 257 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 258 (__v8sf)_mm256_or_ps(__A, __B), 259 (__v8sf)__W); 260} 261 262static __inline__ __m256 __DEFAULT_FN_ATTRS256 263_mm256_maskz_or_ps(__mmask8 __U, __m256 __A, __m256 __B) { 264 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 265 (__v8sf)_mm256_or_ps(__A, __B), 266 (__v8sf)_mm256_setzero_ps()); 267} 268 269static __inline__ __m128 __DEFAULT_FN_ATTRS128 270_mm_mask_or_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 271 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 272 (__v4sf)_mm_or_ps(__A, __B), 273 (__v4sf)__W); 274} 275 276static __inline__ __m128 __DEFAULT_FN_ATTRS128 277_mm_maskz_or_ps(__mmask8 __U, __m128 __A, __m128 __B) { 278 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 279 (__v4sf)_mm_or_ps(__A, __B), 280 (__v4sf)_mm_setzero_ps()); 281} 282 283static __inline__ __m128i __DEFAULT_FN_ATTRS128 284_mm_cvtpd_epi64 (__m128d __A) { 285 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, 286 (__v2di) _mm_setzero_si128(), 287 (__mmask8) -1); 288} 289 290static __inline__ __m128i __DEFAULT_FN_ATTRS128 291_mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) { 292 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, 293 (__v2di) __W, 294 (__mmask8) __U); 295} 296 297static __inline__ __m128i __DEFAULT_FN_ATTRS128 298_mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A) { 299 return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, 300 (__v2di) _mm_setzero_si128(), 301 (__mmask8) __U); 302} 303 304static __inline__ __m256i __DEFAULT_FN_ATTRS256 305_mm256_cvtpd_epi64 (__m256d __A) { 306 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, 307 (__v4di) _mm256_setzero_si256(), 308 (__mmask8) -1); 309} 310 311static __inline__ __m256i __DEFAULT_FN_ATTRS256 312_mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) { 313 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, 314 (__v4di) __W, 315 (__mmask8) __U); 316} 317 318static __inline__ __m256i __DEFAULT_FN_ATTRS256 319_mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A) { 320 return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, 321 (__v4di) _mm256_setzero_si256(), 322 (__mmask8) __U); 323} 324 325static __inline__ __m128i __DEFAULT_FN_ATTRS128 326_mm_cvtpd_epu64 (__m128d __A) { 327 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, 328 (__v2di) _mm_setzero_si128(), 329 (__mmask8) -1); 330} 331 332static __inline__ __m128i __DEFAULT_FN_ATTRS128 333_mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) { 334 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, 335 (__v2di) __W, 336 (__mmask8) __U); 337} 338 339static __inline__ __m128i __DEFAULT_FN_ATTRS128 340_mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A) { 341 return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, 342 (__v2di) _mm_setzero_si128(), 343 (__mmask8) __U); 344} 345 346static __inline__ __m256i __DEFAULT_FN_ATTRS256 347_mm256_cvtpd_epu64 (__m256d __A) { 348 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, 349 (__v4di) _mm256_setzero_si256(), 350 (__mmask8) -1); 351} 352 353static __inline__ __m256i __DEFAULT_FN_ATTRS256 354_mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) { 355 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, 356 (__v4di) __W, 357 (__mmask8) __U); 358} 359 360static __inline__ __m256i __DEFAULT_FN_ATTRS256 361_mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A) { 362 return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, 363 (__v4di) _mm256_setzero_si256(), 364 (__mmask8) __U); 365} 366 367static __inline__ __m128i __DEFAULT_FN_ATTRS128 368_mm_cvtps_epi64 (__m128 __A) { 369 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, 370 (__v2di) _mm_setzero_si128(), 371 (__mmask8) -1); 372} 373 374static __inline__ __m128i __DEFAULT_FN_ATTRS128 375_mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) { 376 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, 377 (__v2di) __W, 378 (__mmask8) __U); 379} 380 381static __inline__ __m128i __DEFAULT_FN_ATTRS128 382_mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) { 383 return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, 384 (__v2di) _mm_setzero_si128(), 385 (__mmask8) __U); 386} 387 388static __inline__ __m256i __DEFAULT_FN_ATTRS256 389_mm256_cvtps_epi64 (__m128 __A) { 390 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, 391 (__v4di) _mm256_setzero_si256(), 392 (__mmask8) -1); 393} 394 395static __inline__ __m256i __DEFAULT_FN_ATTRS256 396_mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) { 397 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, 398 (__v4di) __W, 399 (__mmask8) __U); 400} 401 402static __inline__ __m256i __DEFAULT_FN_ATTRS256 403_mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) { 404 return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, 405 (__v4di) _mm256_setzero_si256(), 406 (__mmask8) __U); 407} 408 409static __inline__ __m128i __DEFAULT_FN_ATTRS128 410_mm_cvtps_epu64 (__m128 __A) { 411 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, 412 (__v2di) _mm_setzero_si128(), 413 (__mmask8) -1); 414} 415 416static __inline__ __m128i __DEFAULT_FN_ATTRS128 417_mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) { 418 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, 419 (__v2di) __W, 420 (__mmask8) __U); 421} 422 423static __inline__ __m128i __DEFAULT_FN_ATTRS128 424_mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) { 425 return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, 426 (__v2di) _mm_setzero_si128(), 427 (__mmask8) __U); 428} 429 430static __inline__ __m256i __DEFAULT_FN_ATTRS256 431_mm256_cvtps_epu64 (__m128 __A) { 432 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, 433 (__v4di) _mm256_setzero_si256(), 434 (__mmask8) -1); 435} 436 437static __inline__ __m256i __DEFAULT_FN_ATTRS256 438_mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) { 439 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, 440 (__v4di) __W, 441 (__mmask8) __U); 442} 443 444static __inline__ __m256i __DEFAULT_FN_ATTRS256 445_mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) { 446 return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, 447 (__v4di) _mm256_setzero_si256(), 448 (__mmask8) __U); 449} 450 451static __inline__ __m128d __DEFAULT_FN_ATTRS128 452_mm_cvtepi64_pd (__m128i __A) { 453 return (__m128d)__builtin_convertvector((__v2di)__A, __v2df); 454} 455 456static __inline__ __m128d __DEFAULT_FN_ATTRS128 457_mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A) { 458 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 459 (__v2df)_mm_cvtepi64_pd(__A), 460 (__v2df)__W); 461} 462 463static __inline__ __m128d __DEFAULT_FN_ATTRS128 464_mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A) { 465 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 466 (__v2df)_mm_cvtepi64_pd(__A), 467 (__v2df)_mm_setzero_pd()); 468} 469 470static __inline__ __m256d __DEFAULT_FN_ATTRS256 471_mm256_cvtepi64_pd (__m256i __A) { 472 return (__m256d)__builtin_convertvector((__v4di)__A, __v4df); 473} 474 475static __inline__ __m256d __DEFAULT_FN_ATTRS256 476_mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A) { 477 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 478 (__v4df)_mm256_cvtepi64_pd(__A), 479 (__v4df)__W); 480} 481 482static __inline__ __m256d __DEFAULT_FN_ATTRS256 483_mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A) { 484 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 485 (__v4df)_mm256_cvtepi64_pd(__A), 486 (__v4df)_mm256_setzero_pd()); 487} 488 489static __inline__ __m128 __DEFAULT_FN_ATTRS128 490_mm_cvtepi64_ps (__m128i __A) { 491 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, 492 (__v4sf) _mm_setzero_ps(), 493 (__mmask8) -1); 494} 495 496static __inline__ __m128 __DEFAULT_FN_ATTRS128 497_mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A) { 498 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, 499 (__v4sf) __W, 500 (__mmask8) __U); 501} 502 503static __inline__ __m128 __DEFAULT_FN_ATTRS128 504_mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A) { 505 return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, 506 (__v4sf) _mm_setzero_ps(), 507 (__mmask8) __U); 508} 509 510static __inline__ __m128 __DEFAULT_FN_ATTRS256 511_mm256_cvtepi64_ps (__m256i __A) { 512 return (__m128)__builtin_convertvector((__v4di)__A, __v4sf); 513} 514 515static __inline__ __m128 __DEFAULT_FN_ATTRS256 516_mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A) { 517 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 518 (__v4sf)_mm256_cvtepi64_ps(__A), 519 (__v4sf)__W); 520} 521 522static __inline__ __m128 __DEFAULT_FN_ATTRS256 523_mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A) { 524 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 525 (__v4sf)_mm256_cvtepi64_ps(__A), 526 (__v4sf)_mm_setzero_ps()); 527} 528 529static __inline__ __m128i __DEFAULT_FN_ATTRS128 530_mm_cvttpd_epi64 (__m128d __A) { 531 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, 532 (__v2di) _mm_setzero_si128(), 533 (__mmask8) -1); 534} 535 536static __inline__ __m128i __DEFAULT_FN_ATTRS128 537_mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) { 538 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, 539 (__v2di) __W, 540 (__mmask8) __U); 541} 542 543static __inline__ __m128i __DEFAULT_FN_ATTRS128 544_mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A) { 545 return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, 546 (__v2di) _mm_setzero_si128(), 547 (__mmask8) __U); 548} 549 550static __inline__ __m256i __DEFAULT_FN_ATTRS256 551_mm256_cvttpd_epi64 (__m256d __A) { 552 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, 553 (__v4di) _mm256_setzero_si256(), 554 (__mmask8) -1); 555} 556 557static __inline__ __m256i __DEFAULT_FN_ATTRS256 558_mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) { 559 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, 560 (__v4di) __W, 561 (__mmask8) __U); 562} 563 564static __inline__ __m256i __DEFAULT_FN_ATTRS256 565_mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A) { 566 return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, 567 (__v4di) _mm256_setzero_si256(), 568 (__mmask8) __U); 569} 570 571static __inline__ __m128i __DEFAULT_FN_ATTRS128 572_mm_cvttpd_epu64 (__m128d __A) { 573 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, 574 (__v2di) _mm_setzero_si128(), 575 (__mmask8) -1); 576} 577 578static __inline__ __m128i __DEFAULT_FN_ATTRS128 579_mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) { 580 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, 581 (__v2di) __W, 582 (__mmask8) __U); 583} 584 585static __inline__ __m128i __DEFAULT_FN_ATTRS128 586_mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A) { 587 return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, 588 (__v2di) _mm_setzero_si128(), 589 (__mmask8) __U); 590} 591 592static __inline__ __m256i __DEFAULT_FN_ATTRS256 593_mm256_cvttpd_epu64 (__m256d __A) { 594 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, 595 (__v4di) _mm256_setzero_si256(), 596 (__mmask8) -1); 597} 598 599static __inline__ __m256i __DEFAULT_FN_ATTRS256 600_mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) { 601 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, 602 (__v4di) __W, 603 (__mmask8) __U); 604} 605 606static __inline__ __m256i __DEFAULT_FN_ATTRS256 607_mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A) { 608 return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, 609 (__v4di) _mm256_setzero_si256(), 610 (__mmask8) __U); 611} 612 613static __inline__ __m128i __DEFAULT_FN_ATTRS128 614_mm_cvttps_epi64 (__m128 __A) { 615 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, 616 (__v2di) _mm_setzero_si128(), 617 (__mmask8) -1); 618} 619 620static __inline__ __m128i __DEFAULT_FN_ATTRS128 621_mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) { 622 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, 623 (__v2di) __W, 624 (__mmask8) __U); 625} 626 627static __inline__ __m128i __DEFAULT_FN_ATTRS128 628_mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) { 629 return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, 630 (__v2di) _mm_setzero_si128(), 631 (__mmask8) __U); 632} 633 634static __inline__ __m256i __DEFAULT_FN_ATTRS256 635_mm256_cvttps_epi64 (__m128 __A) { 636 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, 637 (__v4di) _mm256_setzero_si256(), 638 (__mmask8) -1); 639} 640 641static __inline__ __m256i __DEFAULT_FN_ATTRS256 642_mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) { 643 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, 644 (__v4di) __W, 645 (__mmask8) __U); 646} 647 648static __inline__ __m256i __DEFAULT_FN_ATTRS256 649_mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) { 650 return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, 651 (__v4di) _mm256_setzero_si256(), 652 (__mmask8) __U); 653} 654 655static __inline__ __m128i __DEFAULT_FN_ATTRS128 656_mm_cvttps_epu64 (__m128 __A) { 657 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, 658 (__v2di) _mm_setzero_si128(), 659 (__mmask8) -1); 660} 661 662static __inline__ __m128i __DEFAULT_FN_ATTRS128 663_mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) { 664 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, 665 (__v2di) __W, 666 (__mmask8) __U); 667} 668 669static __inline__ __m128i __DEFAULT_FN_ATTRS128 670_mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) { 671 return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, 672 (__v2di) _mm_setzero_si128(), 673 (__mmask8) __U); 674} 675 676static __inline__ __m256i __DEFAULT_FN_ATTRS256 677_mm256_cvttps_epu64 (__m128 __A) { 678 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, 679 (__v4di) _mm256_setzero_si256(), 680 (__mmask8) -1); 681} 682 683static __inline__ __m256i __DEFAULT_FN_ATTRS256 684_mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) { 685 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, 686 (__v4di) __W, 687 (__mmask8) __U); 688} 689 690static __inline__ __m256i __DEFAULT_FN_ATTRS256 691_mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) { 692 return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, 693 (__v4di) _mm256_setzero_si256(), 694 (__mmask8) __U); 695} 696 697static __inline__ __m128d __DEFAULT_FN_ATTRS128 698_mm_cvtepu64_pd (__m128i __A) { 699 return (__m128d)__builtin_convertvector((__v2du)__A, __v2df); 700} 701 702static __inline__ __m128d __DEFAULT_FN_ATTRS128 703_mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A) { 704 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 705 (__v2df)_mm_cvtepu64_pd(__A), 706 (__v2df)__W); 707} 708 709static __inline__ __m128d __DEFAULT_FN_ATTRS128 710_mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A) { 711 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 712 (__v2df)_mm_cvtepu64_pd(__A), 713 (__v2df)_mm_setzero_pd()); 714} 715 716static __inline__ __m256d __DEFAULT_FN_ATTRS256 717_mm256_cvtepu64_pd (__m256i __A) { 718 return (__m256d)__builtin_convertvector((__v4du)__A, __v4df); 719} 720 721static __inline__ __m256d __DEFAULT_FN_ATTRS256 722_mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A) { 723 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 724 (__v4df)_mm256_cvtepu64_pd(__A), 725 (__v4df)__W); 726} 727 728static __inline__ __m256d __DEFAULT_FN_ATTRS256 729_mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A) { 730 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 731 (__v4df)_mm256_cvtepu64_pd(__A), 732 (__v4df)_mm256_setzero_pd()); 733} 734 735static __inline__ __m128 __DEFAULT_FN_ATTRS128 736_mm_cvtepu64_ps (__m128i __A) { 737 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, 738 (__v4sf) _mm_setzero_ps(), 739 (__mmask8) -1); 740} 741 742static __inline__ __m128 __DEFAULT_FN_ATTRS128 743_mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A) { 744 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, 745 (__v4sf) __W, 746 (__mmask8) __U); 747} 748 749static __inline__ __m128 __DEFAULT_FN_ATTRS128 750_mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A) { 751 return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, 752 (__v4sf) _mm_setzero_ps(), 753 (__mmask8) __U); 754} 755 756static __inline__ __m128 __DEFAULT_FN_ATTRS256 757_mm256_cvtepu64_ps (__m256i __A) { 758 return (__m128)__builtin_convertvector((__v4du)__A, __v4sf); 759} 760 761static __inline__ __m128 __DEFAULT_FN_ATTRS256 762_mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A) { 763 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 764 (__v4sf)_mm256_cvtepu64_ps(__A), 765 (__v4sf)__W); 766} 767 768static __inline__ __m128 __DEFAULT_FN_ATTRS256 769_mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) { 770 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 771 (__v4sf)_mm256_cvtepu64_ps(__A), 772 (__v4sf)_mm_setzero_ps()); 773} 774 775#define _mm_range_pd(A, B, C) \ 776 (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ 777 (__v2df)(__m128d)(B), (int)(C), \ 778 (__v2df)_mm_setzero_pd(), \ 779 (__mmask8)-1) 780 781#define _mm_mask_range_pd(W, U, A, B, C) \ 782 (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ 783 (__v2df)(__m128d)(B), (int)(C), \ 784 (__v2df)(__m128d)(W), \ 785 (__mmask8)(U)) 786 787#define _mm_maskz_range_pd(U, A, B, C) \ 788 (__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \ 789 (__v2df)(__m128d)(B), (int)(C), \ 790 (__v2df)_mm_setzero_pd(), \ 791 (__mmask8)(U)) 792 793#define _mm256_range_pd(A, B, C) \ 794 (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ 795 (__v4df)(__m256d)(B), (int)(C), \ 796 (__v4df)_mm256_setzero_pd(), \ 797 (__mmask8)-1) 798 799#define _mm256_mask_range_pd(W, U, A, B, C) \ 800 (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ 801 (__v4df)(__m256d)(B), (int)(C), \ 802 (__v4df)(__m256d)(W), \ 803 (__mmask8)(U)) 804 805#define _mm256_maskz_range_pd(U, A, B, C) \ 806 (__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \ 807 (__v4df)(__m256d)(B), (int)(C), \ 808 (__v4df)_mm256_setzero_pd(), \ 809 (__mmask8)(U)) 810 811#define _mm_range_ps(A, B, C) \ 812 (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ 813 (__v4sf)(__m128)(B), (int)(C), \ 814 (__v4sf)_mm_setzero_ps(), \ 815 (__mmask8)-1) 816 817#define _mm_mask_range_ps(W, U, A, B, C) \ 818 (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ 819 (__v4sf)(__m128)(B), (int)(C), \ 820 (__v4sf)(__m128)(W), (__mmask8)(U)) 821 822#define _mm_maskz_range_ps(U, A, B, C) \ 823 (__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \ 824 (__v4sf)(__m128)(B), (int)(C), \ 825 (__v4sf)_mm_setzero_ps(), \ 826 (__mmask8)(U)) 827 828#define _mm256_range_ps(A, B, C) \ 829 (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ 830 (__v8sf)(__m256)(B), (int)(C), \ 831 (__v8sf)_mm256_setzero_ps(), \ 832 (__mmask8)-1) 833 834#define _mm256_mask_range_ps(W, U, A, B, C) \ 835 (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ 836 (__v8sf)(__m256)(B), (int)(C), \ 837 (__v8sf)(__m256)(W), (__mmask8)(U)) 838 839#define _mm256_maskz_range_ps(U, A, B, C) \ 840 (__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \ 841 (__v8sf)(__m256)(B), (int)(C), \ 842 (__v8sf)_mm256_setzero_ps(), \ 843 (__mmask8)(U)) 844 845#define _mm_reduce_pd(A, B) \ 846 (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ 847 (__v2df)_mm_setzero_pd(), \ 848 (__mmask8)-1) 849 850#define _mm_mask_reduce_pd(W, U, A, B) \ 851 (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ 852 (__v2df)(__m128d)(W), \ 853 (__mmask8)(U)) 854 855#define _mm_maskz_reduce_pd(U, A, B) \ 856 (__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \ 857 (__v2df)_mm_setzero_pd(), \ 858 (__mmask8)(U)) 859 860#define _mm256_reduce_pd(A, B) \ 861 (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ 862 (__v4df)_mm256_setzero_pd(), \ 863 (__mmask8)-1) 864 865#define _mm256_mask_reduce_pd(W, U, A, B) \ 866 (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ 867 (__v4df)(__m256d)(W), \ 868 (__mmask8)(U)) 869 870#define _mm256_maskz_reduce_pd(U, A, B) \ 871 (__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \ 872 (__v4df)_mm256_setzero_pd(), \ 873 (__mmask8)(U)) 874 875#define _mm_reduce_ps(A, B) \ 876 (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ 877 (__v4sf)_mm_setzero_ps(), \ 878 (__mmask8)-1) 879 880#define _mm_mask_reduce_ps(W, U, A, B) \ 881 (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ 882 (__v4sf)(__m128)(W), \ 883 (__mmask8)(U)) 884 885#define _mm_maskz_reduce_ps(U, A, B) \ 886 (__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \ 887 (__v4sf)_mm_setzero_ps(), \ 888 (__mmask8)(U)) 889 890#define _mm256_reduce_ps(A, B) \ 891 (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ 892 (__v8sf)_mm256_setzero_ps(), \ 893 (__mmask8)-1) 894 895#define _mm256_mask_reduce_ps(W, U, A, B) \ 896 (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ 897 (__v8sf)(__m256)(W), \ 898 (__mmask8)(U)) 899 900#define _mm256_maskz_reduce_ps(U, A, B) \ 901 (__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \ 902 (__v8sf)_mm256_setzero_ps(), \ 903 (__mmask8)(U)) 904 905static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 906_mm_movepi32_mask (__m128i __A) 907{ 908 return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A); 909} 910 911static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 912_mm256_movepi32_mask (__m256i __A) 913{ 914 return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A); 915} 916 917static __inline__ __m128i __DEFAULT_FN_ATTRS128 918_mm_movm_epi32 (__mmask8 __A) 919{ 920 return (__m128i) __builtin_ia32_cvtmask2d128 (__A); 921} 922 923static __inline__ __m256i __DEFAULT_FN_ATTRS256 924_mm256_movm_epi32 (__mmask8 __A) 925{ 926 return (__m256i) __builtin_ia32_cvtmask2d256 (__A); 927} 928 929static __inline__ __m128i __DEFAULT_FN_ATTRS128 930_mm_movm_epi64 (__mmask8 __A) 931{ 932 return (__m128i) __builtin_ia32_cvtmask2q128 (__A); 933} 934 935static __inline__ __m256i __DEFAULT_FN_ATTRS256 936_mm256_movm_epi64 (__mmask8 __A) 937{ 938 return (__m256i) __builtin_ia32_cvtmask2q256 (__A); 939} 940 941static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 942_mm_movepi64_mask (__m128i __A) 943{ 944 return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A); 945} 946 947static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 948_mm256_movepi64_mask (__m256i __A) 949{ 950 return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A); 951} 952 953static __inline__ __m256 __DEFAULT_FN_ATTRS256 954_mm256_broadcast_f32x2 (__m128 __A) 955{ 956 return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, 957 0, 1, 0, 1, 0, 1, 0, 1); 958} 959 960static __inline__ __m256 __DEFAULT_FN_ATTRS256 961_mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A) 962{ 963 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, 964 (__v8sf)_mm256_broadcast_f32x2(__A), 965 (__v8sf)__O); 966} 967 968static __inline__ __m256 __DEFAULT_FN_ATTRS256 969_mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A) 970{ 971 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, 972 (__v8sf)_mm256_broadcast_f32x2(__A), 973 (__v8sf)_mm256_setzero_ps()); 974} 975 976static __inline__ __m256d __DEFAULT_FN_ATTRS256 977_mm256_broadcast_f64x2(__m128d __A) 978{ 979 return (__m256d)__builtin_shufflevector((__v2df)__A, (__v2df)__A, 980 0, 1, 0, 1); 981} 982 983static __inline__ __m256d __DEFAULT_FN_ATTRS256 984_mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A) 985{ 986 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M, 987 (__v4df)_mm256_broadcast_f64x2(__A), 988 (__v4df)__O); 989} 990 991static __inline__ __m256d __DEFAULT_FN_ATTRS256 992_mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A) 993{ 994 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M, 995 (__v4df)_mm256_broadcast_f64x2(__A), 996 (__v4df)_mm256_setzero_pd()); 997} 998 999static __inline__ __m128i __DEFAULT_FN_ATTRS128 1000_mm_broadcast_i32x2 (__m128i __A) 1001{ 1002 return (__m128i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 1003 0, 1, 0, 1); 1004} 1005 1006static __inline__ __m128i __DEFAULT_FN_ATTRS128 1007_mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A) 1008{ 1009 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 1010 (__v4si)_mm_broadcast_i32x2(__A), 1011 (__v4si)__O); 1012} 1013 1014static __inline__ __m128i __DEFAULT_FN_ATTRS128 1015_mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) 1016{ 1017 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 1018 (__v4si)_mm_broadcast_i32x2(__A), 1019 (__v4si)_mm_setzero_si128()); 1020} 1021 1022static __inline__ __m256i __DEFAULT_FN_ATTRS256 1023_mm256_broadcast_i32x2 (__m128i __A) 1024{ 1025 return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 1026 0, 1, 0, 1, 0, 1, 0, 1); 1027} 1028 1029static __inline__ __m256i __DEFAULT_FN_ATTRS256 1030_mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A) 1031{ 1032 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 1033 (__v8si)_mm256_broadcast_i32x2(__A), 1034 (__v8si)__O); 1035} 1036 1037static __inline__ __m256i __DEFAULT_FN_ATTRS256 1038_mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) 1039{ 1040 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 1041 (__v8si)_mm256_broadcast_i32x2(__A), 1042 (__v8si)_mm256_setzero_si256()); 1043} 1044 1045static __inline__ __m256i __DEFAULT_FN_ATTRS256 1046_mm256_broadcast_i64x2(__m128i __A) 1047{ 1048 return (__m256i)__builtin_shufflevector((__v2di)__A, (__v2di)__A, 1049 0, 1, 0, 1); 1050} 1051 1052static __inline__ __m256i __DEFAULT_FN_ATTRS256 1053_mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A) 1054{ 1055 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 1056 (__v4di)_mm256_broadcast_i64x2(__A), 1057 (__v4di)__O); 1058} 1059 1060static __inline__ __m256i __DEFAULT_FN_ATTRS256 1061_mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) 1062{ 1063 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 1064 (__v4di)_mm256_broadcast_i64x2(__A), 1065 (__v4di)_mm256_setzero_si256()); 1066} 1067 1068#define _mm256_extractf64x2_pd(A, imm) \ 1069 (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ 1070 (int)(imm), \ 1071 (__v2df)_mm_undefined_pd(), \ 1072 (__mmask8)-1) 1073 1074#define _mm256_mask_extractf64x2_pd(W, U, A, imm) \ 1075 (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ 1076 (int)(imm), \ 1077 (__v2df)(__m128d)(W), \ 1078 (__mmask8)(U)) 1079 1080#define _mm256_maskz_extractf64x2_pd(U, A, imm) \ 1081 (__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \ 1082 (int)(imm), \ 1083 (__v2df)_mm_setzero_pd(), \ 1084 (__mmask8)(U)) 1085 1086#define _mm256_extracti64x2_epi64(A, imm) \ 1087 (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ 1088 (int)(imm), \ 1089 (__v2di)_mm_undefined_si128(), \ 1090 (__mmask8)-1) 1091 1092#define _mm256_mask_extracti64x2_epi64(W, U, A, imm) \ 1093 (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ 1094 (int)(imm), \ 1095 (__v2di)(__m128i)(W), \ 1096 (__mmask8)(U)) 1097 1098#define _mm256_maskz_extracti64x2_epi64(U, A, imm) \ 1099 (__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \ 1100 (int)(imm), \ 1101 (__v2di)_mm_setzero_si128(), \ 1102 (__mmask8)(U)) 1103 1104#define _mm256_insertf64x2(A, B, imm) \ 1105 (__m256d)__builtin_ia32_insertf64x2_256((__v4df)(__m256d)(A), \ 1106 (__v2df)(__m128d)(B), (int)(imm)) 1107 1108#define _mm256_mask_insertf64x2(W, U, A, B, imm) \ 1109 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 1110 (__v4df)_mm256_insertf64x2((A), (B), (imm)), \ 1111 (__v4df)(__m256d)(W)) 1112 1113#define _mm256_maskz_insertf64x2(U, A, B, imm) \ 1114 (__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 1115 (__v4df)_mm256_insertf64x2((A), (B), (imm)), \ 1116 (__v4df)_mm256_setzero_pd()) 1117 1118#define _mm256_inserti64x2(A, B, imm) \ 1119 (__m256i)__builtin_ia32_inserti64x2_256((__v4di)(__m256i)(A), \ 1120 (__v2di)(__m128i)(B), (int)(imm)) 1121 1122#define _mm256_mask_inserti64x2(W, U, A, B, imm) \ 1123 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 1124 (__v4di)_mm256_inserti64x2((A), (B), (imm)), \ 1125 (__v4di)(__m256i)(W)) 1126 1127#define _mm256_maskz_inserti64x2(U, A, B, imm) \ 1128 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 1129 (__v4di)_mm256_inserti64x2((A), (B), (imm)), \ 1130 (__v4di)_mm256_setzero_si256()) 1131 1132#define _mm_mask_fpclass_pd_mask(U, A, imm) \ 1133 (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \ 1134 (__mmask8)(U)) 1135 1136#define _mm_fpclass_pd_mask(A, imm) \ 1137 (__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \ 1138 (__mmask8)-1) 1139 1140#define _mm256_mask_fpclass_pd_mask(U, A, imm) \ 1141 (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \ 1142 (__mmask8)(U)) 1143 1144#define _mm256_fpclass_pd_mask(A, imm) \ 1145 (__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \ 1146 (__mmask8)-1) 1147 1148#define _mm_mask_fpclass_ps_mask(U, A, imm) \ 1149 (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \ 1150 (__mmask8)(U)) 1151 1152#define _mm_fpclass_ps_mask(A, imm) \ 1153 (__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \ 1154 (__mmask8)-1) 1155 1156#define _mm256_mask_fpclass_ps_mask(U, A, imm) \ 1157 (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \ 1158 (__mmask8)(U)) 1159 1160#define _mm256_fpclass_ps_mask(A, imm) \ 1161 (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \ 1162 (__mmask8)-1) 1163 1164#undef __DEFAULT_FN_ATTRS128 1165#undef __DEFAULT_FN_ATTRS256 1166 1167#endif 1168