1/*===------------- avx512vlvbmi2intrin.h - VBMI2 intrinsics -----------------=== 2 * 3 * 4 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5 * See https://llvm.org/LICENSE.txt for license information. 6 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7 * 8 *===-----------------------------------------------------------------------=== 9 */ 10#ifndef __IMMINTRIN_H 11#error "Never use <avx512vlvbmi2intrin.h> directly; include <immintrin.h> instead." 12#endif 13 14#ifndef __AVX512VLVBMI2INTRIN_H 15#define __AVX512VLVBMI2INTRIN_H 16 17/* Define the default attributes for the functions in this file. */ 18#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2"), __min_vector_width__(128))) 19#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2"), __min_vector_width__(256))) 20 21static __inline__ __m128i __DEFAULT_FN_ATTRS128 22_mm_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D) 23{ 24 return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D, 25 (__v8hi) __S, 26 __U); 27} 28 29static __inline__ __m128i __DEFAULT_FN_ATTRS128 30_mm_maskz_compress_epi16(__mmask8 __U, __m128i __D) 31{ 32 return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D, 33 (__v8hi) _mm_setzero_si128(), 34 __U); 35} 36 37static __inline__ __m128i __DEFAULT_FN_ATTRS128 38_mm_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D) 39{ 40 return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D, 41 (__v16qi) __S, 42 __U); 43} 44 45static __inline__ __m128i __DEFAULT_FN_ATTRS128 46_mm_maskz_compress_epi8(__mmask16 __U, __m128i __D) 47{ 48 return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D, 49 (__v16qi) _mm_setzero_si128(), 50 __U); 51} 52 53static __inline__ void __DEFAULT_FN_ATTRS128 54_mm_mask_compressstoreu_epi16(void *__P, __mmask8 __U, __m128i __D) 55{ 56 __builtin_ia32_compressstorehi128_mask ((__v8hi *) __P, (__v8hi) __D, 57 __U); 58} 59 60static __inline__ void __DEFAULT_FN_ATTRS128 61_mm_mask_compressstoreu_epi8(void *__P, __mmask16 __U, __m128i __D) 62{ 63 __builtin_ia32_compressstoreqi128_mask ((__v16qi *) __P, (__v16qi) __D, 64 __U); 65} 66 67static __inline__ __m128i __DEFAULT_FN_ATTRS128 68_mm_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D) 69{ 70 return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D, 71 (__v8hi) __S, 72 __U); 73} 74 75static __inline__ __m128i __DEFAULT_FN_ATTRS128 76_mm_maskz_expand_epi16(__mmask8 __U, __m128i __D) 77{ 78 return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D, 79 (__v8hi) _mm_setzero_si128(), 80 __U); 81} 82 83static __inline__ __m128i __DEFAULT_FN_ATTRS128 84_mm_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D) 85{ 86 return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D, 87 (__v16qi) __S, 88 __U); 89} 90 91static __inline__ __m128i __DEFAULT_FN_ATTRS128 92_mm_maskz_expand_epi8(__mmask16 __U, __m128i __D) 93{ 94 return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D, 95 (__v16qi) _mm_setzero_si128(), 96 __U); 97} 98 99static __inline__ __m128i __DEFAULT_FN_ATTRS128 100_mm_mask_expandloadu_epi16(__m128i __S, __mmask8 __U, void const *__P) 101{ 102 return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P, 103 (__v8hi) __S, 104 __U); 105} 106 107static __inline__ __m128i __DEFAULT_FN_ATTRS128 108_mm_maskz_expandloadu_epi16(__mmask8 __U, void const *__P) 109{ 110 return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P, 111 (__v8hi) _mm_setzero_si128(), 112 __U); 113} 114 115static __inline__ __m128i __DEFAULT_FN_ATTRS128 116_mm_mask_expandloadu_epi8(__m128i __S, __mmask16 __U, void const *__P) 117{ 118 return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P, 119 (__v16qi) __S, 120 __U); 121} 122 123static __inline__ __m128i __DEFAULT_FN_ATTRS128 124_mm_maskz_expandloadu_epi8(__mmask16 __U, void const *__P) 125{ 126 return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P, 127 (__v16qi) _mm_setzero_si128(), 128 __U); 129} 130 131static __inline__ __m256i __DEFAULT_FN_ATTRS256 132_mm256_mask_compress_epi16(__m256i __S, __mmask16 __U, __m256i __D) 133{ 134 return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D, 135 (__v16hi) __S, 136 __U); 137} 138 139static __inline__ __m256i __DEFAULT_FN_ATTRS256 140_mm256_maskz_compress_epi16(__mmask16 __U, __m256i __D) 141{ 142 return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D, 143 (__v16hi) _mm256_setzero_si256(), 144 __U); 145} 146 147static __inline__ __m256i __DEFAULT_FN_ATTRS256 148_mm256_mask_compress_epi8(__m256i __S, __mmask32 __U, __m256i __D) 149{ 150 return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D, 151 (__v32qi) __S, 152 __U); 153} 154 155static __inline__ __m256i __DEFAULT_FN_ATTRS256 156_mm256_maskz_compress_epi8(__mmask32 __U, __m256i __D) 157{ 158 return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D, 159 (__v32qi) _mm256_setzero_si256(), 160 __U); 161} 162 163static __inline__ void __DEFAULT_FN_ATTRS256 164_mm256_mask_compressstoreu_epi16(void *__P, __mmask16 __U, __m256i __D) 165{ 166 __builtin_ia32_compressstorehi256_mask ((__v16hi *) __P, (__v16hi) __D, 167 __U); 168} 169 170static __inline__ void __DEFAULT_FN_ATTRS256 171_mm256_mask_compressstoreu_epi8(void *__P, __mmask32 __U, __m256i __D) 172{ 173 __builtin_ia32_compressstoreqi256_mask ((__v32qi *) __P, (__v32qi) __D, 174 __U); 175} 176 177static __inline__ __m256i __DEFAULT_FN_ATTRS256 178_mm256_mask_expand_epi16(__m256i __S, __mmask16 __U, __m256i __D) 179{ 180 return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D, 181 (__v16hi) __S, 182 __U); 183} 184 185static __inline__ __m256i __DEFAULT_FN_ATTRS256 186_mm256_maskz_expand_epi16(__mmask16 __U, __m256i __D) 187{ 188 return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D, 189 (__v16hi) _mm256_setzero_si256(), 190 __U); 191} 192 193static __inline__ __m256i __DEFAULT_FN_ATTRS256 194_mm256_mask_expand_epi8(__m256i __S, __mmask32 __U, __m256i __D) 195{ 196 return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D, 197 (__v32qi) __S, 198 __U); 199} 200 201static __inline__ __m256i __DEFAULT_FN_ATTRS256 202_mm256_maskz_expand_epi8(__mmask32 __U, __m256i __D) 203{ 204 return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D, 205 (__v32qi) _mm256_setzero_si256(), 206 __U); 207} 208 209static __inline__ __m256i __DEFAULT_FN_ATTRS256 210_mm256_mask_expandloadu_epi16(__m256i __S, __mmask16 __U, void const *__P) 211{ 212 return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P, 213 (__v16hi) __S, 214 __U); 215} 216 217static __inline__ __m256i __DEFAULT_FN_ATTRS256 218_mm256_maskz_expandloadu_epi16(__mmask16 __U, void const *__P) 219{ 220 return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P, 221 (__v16hi) _mm256_setzero_si256(), 222 __U); 223} 224 225static __inline__ __m256i __DEFAULT_FN_ATTRS256 226_mm256_mask_expandloadu_epi8(__m256i __S, __mmask32 __U, void const *__P) 227{ 228 return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P, 229 (__v32qi) __S, 230 __U); 231} 232 233static __inline__ __m256i __DEFAULT_FN_ATTRS256 234_mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) 235{ 236 return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P, 237 (__v32qi) _mm256_setzero_si256(), 238 __U); 239} 240 241#define _mm256_shldi_epi64(A, B, I) \ 242 (__m256i)__builtin_ia32_vpshldq256((__v4di)(__m256i)(A), \ 243 (__v4di)(__m256i)(B), (int)(I)) 244 245#define _mm256_mask_shldi_epi64(S, U, A, B, I) \ 246 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 247 (__v4di)_mm256_shldi_epi64((A), (B), (I)), \ 248 (__v4di)(__m256i)(S)) 249 250#define _mm256_maskz_shldi_epi64(U, A, B, I) \ 251 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 252 (__v4di)_mm256_shldi_epi64((A), (B), (I)), \ 253 (__v4di)_mm256_setzero_si256()) 254 255#define _mm_shldi_epi64(A, B, I) \ 256 (__m128i)__builtin_ia32_vpshldq128((__v2di)(__m128i)(A), \ 257 (__v2di)(__m128i)(B), (int)(I)) 258 259#define _mm_mask_shldi_epi64(S, U, A, B, I) \ 260 (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ 261 (__v2di)_mm_shldi_epi64((A), (B), (I)), \ 262 (__v2di)(__m128i)(S)) 263 264#define _mm_maskz_shldi_epi64(U, A, B, I) \ 265 (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ 266 (__v2di)_mm_shldi_epi64((A), (B), (I)), \ 267 (__v2di)_mm_setzero_si128()) 268 269#define _mm256_shldi_epi32(A, B, I) \ 270 (__m256i)__builtin_ia32_vpshldd256((__v8si)(__m256i)(A), \ 271 (__v8si)(__m256i)(B), (int)(I)) 272 273#define _mm256_mask_shldi_epi32(S, U, A, B, I) \ 274 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 275 (__v8si)_mm256_shldi_epi32((A), (B), (I)), \ 276 (__v8si)(__m256i)(S)) 277 278#define _mm256_maskz_shldi_epi32(U, A, B, I) \ 279 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 280 (__v8si)_mm256_shldi_epi32((A), (B), (I)), \ 281 (__v8si)_mm256_setzero_si256()) 282 283#define _mm_shldi_epi32(A, B, I) \ 284 (__m128i)__builtin_ia32_vpshldd128((__v4si)(__m128i)(A), \ 285 (__v4si)(__m128i)(B), (int)(I)) 286 287#define _mm_mask_shldi_epi32(S, U, A, B, I) \ 288 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 289 (__v4si)_mm_shldi_epi32((A), (B), (I)), \ 290 (__v4si)(__m128i)(S)) 291 292#define _mm_maskz_shldi_epi32(U, A, B, I) \ 293 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 294 (__v4si)_mm_shldi_epi32((A), (B), (I)), \ 295 (__v4si)_mm_setzero_si128()) 296 297#define _mm256_shldi_epi16(A, B, I) \ 298 (__m256i)__builtin_ia32_vpshldw256((__v16hi)(__m256i)(A), \ 299 (__v16hi)(__m256i)(B), (int)(I)) 300 301#define _mm256_mask_shldi_epi16(S, U, A, B, I) \ 302 (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 303 (__v16hi)_mm256_shldi_epi16((A), (B), (I)), \ 304 (__v16hi)(__m256i)(S)) 305 306#define _mm256_maskz_shldi_epi16(U, A, B, I) \ 307 (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 308 (__v16hi)_mm256_shldi_epi16((A), (B), (I)), \ 309 (__v16hi)_mm256_setzero_si256()) 310 311#define _mm_shldi_epi16(A, B, I) \ 312 (__m128i)__builtin_ia32_vpshldw128((__v8hi)(__m128i)(A), \ 313 (__v8hi)(__m128i)(B), (int)(I)) 314 315#define _mm_mask_shldi_epi16(S, U, A, B, I) \ 316 (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 317 (__v8hi)_mm_shldi_epi16((A), (B), (I)), \ 318 (__v8hi)(__m128i)(S)) 319 320#define _mm_maskz_shldi_epi16(U, A, B, I) \ 321 (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 322 (__v8hi)_mm_shldi_epi16((A), (B), (I)), \ 323 (__v8hi)_mm_setzero_si128()) 324 325#define _mm256_shrdi_epi64(A, B, I) \ 326 (__m256i)__builtin_ia32_vpshrdq256((__v4di)(__m256i)(A), \ 327 (__v4di)(__m256i)(B), (int)(I)) 328 329#define _mm256_mask_shrdi_epi64(S, U, A, B, I) \ 330 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 331 (__v4di)_mm256_shrdi_epi64((A), (B), (I)), \ 332 (__v4di)(__m256i)(S)) 333 334#define _mm256_maskz_shrdi_epi64(U, A, B, I) \ 335 (__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 336 (__v4di)_mm256_shrdi_epi64((A), (B), (I)), \ 337 (__v4di)_mm256_setzero_si256()) 338 339#define _mm_shrdi_epi64(A, B, I) \ 340 (__m128i)__builtin_ia32_vpshrdq128((__v2di)(__m128i)(A), \ 341 (__v2di)(__m128i)(B), (int)(I)) 342 343#define _mm_mask_shrdi_epi64(S, U, A, B, I) \ 344 (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ 345 (__v2di)_mm_shrdi_epi64((A), (B), (I)), \ 346 (__v2di)(__m128i)(S)) 347 348#define _mm_maskz_shrdi_epi64(U, A, B, I) \ 349 (__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ 350 (__v2di)_mm_shrdi_epi64((A), (B), (I)), \ 351 (__v2di)_mm_setzero_si128()) 352 353#define _mm256_shrdi_epi32(A, B, I) \ 354 (__m256i)__builtin_ia32_vpshrdd256((__v8si)(__m256i)(A), \ 355 (__v8si)(__m256i)(B), (int)(I)) 356 357#define _mm256_mask_shrdi_epi32(S, U, A, B, I) \ 358 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 359 (__v8si)_mm256_shrdi_epi32((A), (B), (I)), \ 360 (__v8si)(__m256i)(S)) 361 362#define _mm256_maskz_shrdi_epi32(U, A, B, I) \ 363 (__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 364 (__v8si)_mm256_shrdi_epi32((A), (B), (I)), \ 365 (__v8si)_mm256_setzero_si256()) 366 367#define _mm_shrdi_epi32(A, B, I) \ 368 (__m128i)__builtin_ia32_vpshrdd128((__v4si)(__m128i)(A), \ 369 (__v4si)(__m128i)(B), (int)(I)) 370 371#define _mm_mask_shrdi_epi32(S, U, A, B, I) \ 372 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 373 (__v4si)_mm_shrdi_epi32((A), (B), (I)), \ 374 (__v4si)(__m128i)(S)) 375 376#define _mm_maskz_shrdi_epi32(U, A, B, I) \ 377 (__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 378 (__v4si)_mm_shrdi_epi32((A), (B), (I)), \ 379 (__v4si)_mm_setzero_si128()) 380 381#define _mm256_shrdi_epi16(A, B, I) \ 382 (__m256i)__builtin_ia32_vpshrdw256((__v16hi)(__m256i)(A), \ 383 (__v16hi)(__m256i)(B), (int)(I)) 384 385#define _mm256_mask_shrdi_epi16(S, U, A, B, I) \ 386 (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 387 (__v16hi)_mm256_shrdi_epi16((A), (B), (I)), \ 388 (__v16hi)(__m256i)(S)) 389 390#define _mm256_maskz_shrdi_epi16(U, A, B, I) \ 391 (__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 392 (__v16hi)_mm256_shrdi_epi16((A), (B), (I)), \ 393 (__v16hi)_mm256_setzero_si256()) 394 395#define _mm_shrdi_epi16(A, B, I) \ 396 (__m128i)__builtin_ia32_vpshrdw128((__v8hi)(__m128i)(A), \ 397 (__v8hi)(__m128i)(B), (int)(I)) 398 399#define _mm_mask_shrdi_epi16(S, U, A, B, I) \ 400 (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 401 (__v8hi)_mm_shrdi_epi16((A), (B), (I)), \ 402 (__v8hi)(__m128i)(S)) 403 404#define _mm_maskz_shrdi_epi16(U, A, B, I) \ 405 (__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 406 (__v8hi)_mm_shrdi_epi16((A), (B), (I)), \ 407 (__v8hi)_mm_setzero_si128()) 408 409static __inline__ __m256i __DEFAULT_FN_ATTRS256 410_mm256_shldv_epi64(__m256i __A, __m256i __B, __m256i __C) 411{ 412 return (__m256i)__builtin_ia32_vpshldvq256((__v4di)__A, (__v4di)__B, 413 (__v4di)__C); 414} 415 416static __inline__ __m256i __DEFAULT_FN_ATTRS256 417_mm256_mask_shldv_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) 418{ 419 return (__m256i)__builtin_ia32_selectq_256(__U, 420 (__v4di)_mm256_shldv_epi64(__A, __B, __C), 421 (__v4di)__A); 422} 423 424static __inline__ __m256i __DEFAULT_FN_ATTRS256 425_mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) 426{ 427 return (__m256i)__builtin_ia32_selectq_256(__U, 428 (__v4di)_mm256_shldv_epi64(__A, __B, __C), 429 (__v4di)_mm256_setzero_si256()); 430} 431 432static __inline__ __m128i __DEFAULT_FN_ATTRS128 433_mm_shldv_epi64(__m128i __A, __m128i __B, __m128i __C) 434{ 435 return (__m128i)__builtin_ia32_vpshldvq128((__v2di)__A, (__v2di)__B, 436 (__v2di)__C); 437} 438 439static __inline__ __m128i __DEFAULT_FN_ATTRS128 440_mm_mask_shldv_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) 441{ 442 return (__m128i)__builtin_ia32_selectq_128(__U, 443 (__v2di)_mm_shldv_epi64(__A, __B, __C), 444 (__v2di)__A); 445} 446 447static __inline__ __m128i __DEFAULT_FN_ATTRS128 448_mm_maskz_shldv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) 449{ 450 return (__m128i)__builtin_ia32_selectq_128(__U, 451 (__v2di)_mm_shldv_epi64(__A, __B, __C), 452 (__v2di)_mm_setzero_si128()); 453} 454 455static __inline__ __m256i __DEFAULT_FN_ATTRS256 456_mm256_shldv_epi32(__m256i __A, __m256i __B, __m256i __C) 457{ 458 return (__m256i)__builtin_ia32_vpshldvd256((__v8si)__A, (__v8si)__B, 459 (__v8si)__C); 460} 461 462static __inline__ __m256i __DEFAULT_FN_ATTRS256 463_mm256_mask_shldv_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) 464{ 465 return (__m256i)__builtin_ia32_selectd_256(__U, 466 (__v8si)_mm256_shldv_epi32(__A, __B, __C), 467 (__v8si)__A); 468} 469 470static __inline__ __m256i __DEFAULT_FN_ATTRS256 471_mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) 472{ 473 return (__m256i)__builtin_ia32_selectd_256(__U, 474 (__v8si)_mm256_shldv_epi32(__A, __B, __C), 475 (__v8si)_mm256_setzero_si256()); 476} 477 478static __inline__ __m128i __DEFAULT_FN_ATTRS128 479_mm_shldv_epi32(__m128i __A, __m128i __B, __m128i __C) 480{ 481 return (__m128i)__builtin_ia32_vpshldvd128((__v4si)__A, (__v4si)__B, 482 (__v4si)__C); 483} 484 485static __inline__ __m128i __DEFAULT_FN_ATTRS128 486_mm_mask_shldv_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) 487{ 488 return (__m128i)__builtin_ia32_selectd_128(__U, 489 (__v4si)_mm_shldv_epi32(__A, __B, __C), 490 (__v4si)__A); 491} 492 493static __inline__ __m128i __DEFAULT_FN_ATTRS128 494_mm_maskz_shldv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) 495{ 496 return (__m128i)__builtin_ia32_selectd_128(__U, 497 (__v4si)_mm_shldv_epi32(__A, __B, __C), 498 (__v4si)_mm_setzero_si128()); 499} 500 501static __inline__ __m256i __DEFAULT_FN_ATTRS256 502_mm256_shldv_epi16(__m256i __A, __m256i __B, __m256i __C) 503{ 504 return (__m256i)__builtin_ia32_vpshldvw256((__v16hi)__A, (__v16hi)__B, 505 (__v16hi)__C); 506} 507 508static __inline__ __m256i __DEFAULT_FN_ATTRS256 509_mm256_mask_shldv_epi16(__m256i __A, __mmask16 __U, __m256i __B, __m256i __C) 510{ 511 return (__m256i)__builtin_ia32_selectw_256(__U, 512 (__v16hi)_mm256_shldv_epi16(__A, __B, __C), 513 (__v16hi)__A); 514} 515 516static __inline__ __m256i __DEFAULT_FN_ATTRS256 517_mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C) 518{ 519 return (__m256i)__builtin_ia32_selectw_256(__U, 520 (__v16hi)_mm256_shldv_epi16(__A, __B, __C), 521 (__v16hi)_mm256_setzero_si256()); 522} 523 524static __inline__ __m128i __DEFAULT_FN_ATTRS128 525_mm_shldv_epi16(__m128i __A, __m128i __B, __m128i __C) 526{ 527 return (__m128i)__builtin_ia32_vpshldvw128((__v8hi)__A, (__v8hi)__B, 528 (__v8hi)__C); 529} 530 531static __inline__ __m128i __DEFAULT_FN_ATTRS128 532_mm_mask_shldv_epi16(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) 533{ 534 return (__m128i)__builtin_ia32_selectw_128(__U, 535 (__v8hi)_mm_shldv_epi16(__A, __B, __C), 536 (__v8hi)__A); 537} 538 539static __inline__ __m128i __DEFAULT_FN_ATTRS128 540_mm_maskz_shldv_epi16(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) 541{ 542 return (__m128i)__builtin_ia32_selectw_128(__U, 543 (__v8hi)_mm_shldv_epi16(__A, __B, __C), 544 (__v8hi)_mm_setzero_si128()); 545} 546 547static __inline__ __m256i __DEFAULT_FN_ATTRS256 548_mm256_shrdv_epi64(__m256i __A, __m256i __B, __m256i __C) 549{ 550 return (__m256i)__builtin_ia32_vpshrdvq256((__v4di)__A, (__v4di)__B, 551 (__v4di)__C); 552} 553 554static __inline__ __m256i __DEFAULT_FN_ATTRS256 555_mm256_mask_shrdv_epi64(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) 556{ 557 return (__m256i)__builtin_ia32_selectq_256(__U, 558 (__v4di)_mm256_shrdv_epi64(__A, __B, __C), 559 (__v4di)__A); 560} 561 562static __inline__ __m256i __DEFAULT_FN_ATTRS256 563_mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) 564{ 565 return (__m256i)__builtin_ia32_selectq_256(__U, 566 (__v4di)_mm256_shrdv_epi64(__A, __B, __C), 567 (__v4di)_mm256_setzero_si256()); 568} 569 570static __inline__ __m128i __DEFAULT_FN_ATTRS128 571_mm_shrdv_epi64(__m128i __A, __m128i __B, __m128i __C) 572{ 573 return (__m128i)__builtin_ia32_vpshrdvq128((__v2di)__A, (__v2di)__B, 574 (__v2di)__C); 575} 576 577static __inline__ __m128i __DEFAULT_FN_ATTRS128 578_mm_mask_shrdv_epi64(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) 579{ 580 return (__m128i)__builtin_ia32_selectq_128(__U, 581 (__v2di)_mm_shrdv_epi64(__A, __B, __C), 582 (__v2di)__A); 583} 584 585static __inline__ __m128i __DEFAULT_FN_ATTRS128 586_mm_maskz_shrdv_epi64(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) 587{ 588 return (__m128i)__builtin_ia32_selectq_128(__U, 589 (__v2di)_mm_shrdv_epi64(__A, __B, __C), 590 (__v2di)_mm_setzero_si128()); 591} 592 593static __inline__ __m256i __DEFAULT_FN_ATTRS256 594_mm256_shrdv_epi32(__m256i __A, __m256i __B, __m256i __C) 595{ 596 return (__m256i)__builtin_ia32_vpshrdvd256((__v8si)__A, (__v8si)__B, 597 (__v8si)__C); 598} 599 600static __inline__ __m256i __DEFAULT_FN_ATTRS256 601_mm256_mask_shrdv_epi32(__m256i __A, __mmask8 __U, __m256i __B, __m256i __C) 602{ 603 return (__m256i)__builtin_ia32_selectd_256(__U, 604 (__v8si)_mm256_shrdv_epi32(__A, __B, __C), 605 (__v8si)__A); 606} 607 608static __inline__ __m256i __DEFAULT_FN_ATTRS256 609_mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __A, __m256i __B, __m256i __C) 610{ 611 return (__m256i)__builtin_ia32_selectd_256(__U, 612 (__v8si)_mm256_shrdv_epi32(__A, __B, __C), 613 (__v8si)_mm256_setzero_si256()); 614} 615 616static __inline__ __m128i __DEFAULT_FN_ATTRS128 617_mm_shrdv_epi32(__m128i __A, __m128i __B, __m128i __C) 618{ 619 return (__m128i)__builtin_ia32_vpshrdvd128((__v4si)__A, (__v4si)__B, 620 (__v4si)__C); 621} 622 623static __inline__ __m128i __DEFAULT_FN_ATTRS128 624_mm_mask_shrdv_epi32(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) 625{ 626 return (__m128i)__builtin_ia32_selectd_128(__U, 627 (__v4si)_mm_shrdv_epi32(__A, __B, __C), 628 (__v4si)__A); 629} 630 631static __inline__ __m128i __DEFAULT_FN_ATTRS128 632_mm_maskz_shrdv_epi32(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) 633{ 634 return (__m128i)__builtin_ia32_selectd_128(__U, 635 (__v4si)_mm_shrdv_epi32(__A, __B, __C), 636 (__v4si)_mm_setzero_si128()); 637} 638 639static __inline__ __m256i __DEFAULT_FN_ATTRS256 640_mm256_shrdv_epi16(__m256i __A, __m256i __B, __m256i __C) 641{ 642 return (__m256i)__builtin_ia32_vpshrdvw256((__v16hi)__A, (__v16hi)__B, 643 (__v16hi)__C); 644} 645 646static __inline__ __m256i __DEFAULT_FN_ATTRS256 647_mm256_mask_shrdv_epi16(__m256i __A, __mmask16 __U, __m256i __B, __m256i __C) 648{ 649 return (__m256i)__builtin_ia32_selectw_256(__U, 650 (__v16hi)_mm256_shrdv_epi16(__A, __B, __C), 651 (__v16hi)__A); 652} 653 654static __inline__ __m256i __DEFAULT_FN_ATTRS256 655_mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __A, __m256i __B, __m256i __C) 656{ 657 return (__m256i)__builtin_ia32_selectw_256(__U, 658 (__v16hi)_mm256_shrdv_epi16(__A, __B, __C), 659 (__v16hi)_mm256_setzero_si256()); 660} 661 662static __inline__ __m128i __DEFAULT_FN_ATTRS128 663_mm_shrdv_epi16(__m128i __A, __m128i __B, __m128i __C) 664{ 665 return (__m128i)__builtin_ia32_vpshrdvw128((__v8hi)__A, (__v8hi)__B, 666 (__v8hi)__C); 667} 668 669static __inline__ __m128i __DEFAULT_FN_ATTRS128 670_mm_mask_shrdv_epi16(__m128i __A, __mmask8 __U, __m128i __B, __m128i __C) 671{ 672 return (__m128i)__builtin_ia32_selectw_128(__U, 673 (__v8hi)_mm_shrdv_epi16(__A, __B, __C), 674 (__v8hi)__A); 675} 676 677static __inline__ __m128i __DEFAULT_FN_ATTRS128 678_mm_maskz_shrdv_epi16(__mmask8 __U, __m128i __A, __m128i __B, __m128i __C) 679{ 680 return (__m128i)__builtin_ia32_selectw_128(__U, 681 (__v8hi)_mm_shrdv_epi16(__A, __B, __C), 682 (__v8hi)_mm_setzero_si128()); 683} 684 685 686#undef __DEFAULT_FN_ATTRS128 687#undef __DEFAULT_FN_ATTRS256 688 689#endif 690