avx512vlvbmi2intrin.h revision 335799
1/*===------------- avx512vlvbmi2intrin.h - VBMI2 intrinsics -----------------=== 2 * 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a copy 5 * of this software and associated documentation files (the "Software"), to deal 6 * in the Software without restriction, including without limitation the rights 7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 * copies of the Software, and to permit persons to whom the Software is 9 * furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 * THE SOFTWARE. 21 * 22 *===-----------------------------------------------------------------------=== 23 */ 24#ifndef __IMMINTRIN_H 25#error "Never use <avx512vlvbmi2intrin.h> directly; include <immintrin.h> instead." 26#endif 27 28#ifndef __AVX512VLVBMI2INTRIN_H 29#define __AVX512VLVBMI2INTRIN_H 30 31/* Define the default attributes for the functions in this file. */ 32#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2"))) 33 34static __inline__ __m128i __DEFAULT_FN_ATTRS 35_mm_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D) 36{ 37 return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D, 38 (__v8hi) __S, 39 __U); 40} 41 42static __inline__ __m128i __DEFAULT_FN_ATTRS 43_mm_maskz_compress_epi16(__mmask8 __U, __m128i __D) 44{ 45 return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D, 46 (__v8hi) _mm_setzero_si128(), 47 __U); 48} 49 50static __inline__ __m128i __DEFAULT_FN_ATTRS 51_mm_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D) 52{ 53 return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D, 54 (__v16qi) __S, 55 __U); 56} 57 58static __inline__ __m128i __DEFAULT_FN_ATTRS 59_mm_maskz_compress_epi8(__mmask16 __U, __m128i __D) 60{ 61 return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D, 62 (__v16qi) _mm_setzero_si128(), 63 __U); 64} 65 66static __inline__ void __DEFAULT_FN_ATTRS 67_mm_mask_compressstoreu_epi16(void *__P, __mmask8 __U, __m128i __D) 68{ 69 __builtin_ia32_compressstorehi128_mask ((__v8hi *) __P, (__v8hi) __D, 70 __U); 71} 72 73static __inline__ void __DEFAULT_FN_ATTRS 74_mm_mask_compressstoreu_epi8(void *__P, __mmask16 __U, __m128i __D) 75{ 76 __builtin_ia32_compressstoreqi128_mask ((__v16qi *) __P, (__v16qi) __D, 77 __U); 78} 79 80static __inline__ __m128i __DEFAULT_FN_ATTRS 81_mm_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D) 82{ 83 return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D, 84 (__v8hi) __S, 85 __U); 86} 87 88static __inline__ __m128i __DEFAULT_FN_ATTRS 89_mm_maskz_expand_epi16(__mmask8 __U, __m128i __D) 90{ 91 return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D, 92 (__v8hi) _mm_setzero_si128(), 93 __U); 94} 95 96static __inline__ __m128i __DEFAULT_FN_ATTRS 97_mm_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D) 98{ 99 return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D, 100 (__v16qi) __S, 101 __U); 102} 103 104static __inline__ __m128i __DEFAULT_FN_ATTRS 105_mm_maskz_expand_epi8(__mmask16 __U, __m128i __D) 106{ 107 return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D, 108 (__v16qi) _mm_setzero_si128(), 109 __U); 110} 111 112static __inline__ __m128i __DEFAULT_FN_ATTRS 113_mm_mask_expandloadu_epi16(__m128i __S, __mmask8 __U, void const *__P) 114{ 115 return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P, 116 (__v8hi) __S, 117 __U); 118} 119 120static __inline__ __m128i __DEFAULT_FN_ATTRS 121_mm_maskz_expandloadu_epi16(__mmask8 __U, void const *__P) 122{ 123 return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P, 124 (__v8hi) _mm_setzero_si128(), 125 __U); 126} 127 128static __inline__ __m128i __DEFAULT_FN_ATTRS 129_mm_mask_expandloadu_epi8(__m128i __S, __mmask16 __U, void const *__P) 130{ 131 return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P, 132 (__v16qi) __S, 133 __U); 134} 135 136static __inline__ __m128i __DEFAULT_FN_ATTRS 137_mm_maskz_expandloadu_epi8(__mmask16 __U, void const *__P) 138{ 139 return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P, 140 (__v16qi) _mm_setzero_si128(), 141 __U); 142} 143 144static __inline__ __m256i __DEFAULT_FN_ATTRS 145_mm256_mask_compress_epi16(__m256i __S, __mmask16 __U, __m256i __D) 146{ 147 return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D, 148 (__v16hi) __S, 149 __U); 150} 151 152static __inline__ __m256i __DEFAULT_FN_ATTRS 153_mm256_maskz_compress_epi16(__mmask16 __U, __m256i __D) 154{ 155 return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D, 156 (__v16hi) _mm256_setzero_si256(), 157 __U); 158} 159 160static __inline__ __m256i __DEFAULT_FN_ATTRS 161_mm256_mask_compress_epi8(__m256i __S, __mmask32 __U, __m256i __D) 162{ 163 return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D, 164 (__v32qi) __S, 165 __U); 166} 167 168static __inline__ __m256i __DEFAULT_FN_ATTRS 169_mm256_maskz_compress_epi8(__mmask32 __U, __m256i __D) 170{ 171 return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D, 172 (__v32qi) _mm256_setzero_si256(), 173 __U); 174} 175 176static __inline__ void __DEFAULT_FN_ATTRS 177_mm256_mask_compressstoreu_epi16(void *__P, __mmask16 __U, __m256i __D) 178{ 179 __builtin_ia32_compressstorehi256_mask ((__v16hi *) __P, (__v16hi) __D, 180 __U); 181} 182 183static __inline__ void __DEFAULT_FN_ATTRS 184_mm256_mask_compressstoreu_epi8(void *__P, __mmask32 __U, __m256i __D) 185{ 186 __builtin_ia32_compressstoreqi256_mask ((__v32qi *) __P, (__v32qi) __D, 187 __U); 188} 189 190static __inline__ __m256i __DEFAULT_FN_ATTRS 191_mm256_mask_expand_epi16(__m256i __S, __mmask16 __U, __m256i __D) 192{ 193 return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D, 194 (__v16hi) __S, 195 __U); 196} 197 198static __inline__ __m256i __DEFAULT_FN_ATTRS 199_mm256_maskz_expand_epi16(__mmask16 __U, __m256i __D) 200{ 201 return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D, 202 (__v16hi) _mm256_setzero_si256(), 203 __U); 204} 205 206static __inline__ __m256i __DEFAULT_FN_ATTRS 207_mm256_mask_expand_epi8(__m256i __S, __mmask32 __U, __m256i __D) 208{ 209 return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D, 210 (__v32qi) __S, 211 __U); 212} 213 214static __inline__ __m256i __DEFAULT_FN_ATTRS 215_mm256_maskz_expand_epi8(__mmask32 __U, __m256i __D) 216{ 217 return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D, 218 (__v32qi) _mm256_setzero_si256(), 219 __U); 220} 221 222static __inline__ __m256i __DEFAULT_FN_ATTRS 223_mm256_mask_expandloadu_epi16(__m256i __S, __mmask16 __U, void const *__P) 224{ 225 return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P, 226 (__v16hi) __S, 227 __U); 228} 229 230static __inline__ __m256i __DEFAULT_FN_ATTRS 231_mm256_maskz_expandloadu_epi16(__mmask16 __U, void const *__P) 232{ 233 return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P, 234 (__v16hi) _mm256_setzero_si256(), 235 __U); 236} 237 238static __inline__ __m256i __DEFAULT_FN_ATTRS 239_mm256_mask_expandloadu_epi8(__m256i __S, __mmask32 __U, void const *__P) 240{ 241 return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P, 242 (__v32qi) __S, 243 __U); 244} 245 246static __inline__ __m256i __DEFAULT_FN_ATTRS 247_mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) 248{ 249 return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P, 250 (__v32qi) _mm256_setzero_si256(), 251 __U); 252} 253 254#define _mm256_mask_shldi_epi64(S, U, A, B, I) __extension__ ({ \ 255 (__m256i)__builtin_ia32_vpshldq256_mask((__v4di)(A), \ 256 (__v4di)(B), \ 257 (int)(I), \ 258 (__v4di)(S), \ 259 (__mmask8)(U)); }) 260 261#define _mm256_maskz_shldi_epi64(U, A, B, I) \ 262 _mm256_mask_shldi_epi64(_mm256_setzero_si256(), (U), (A), (B), (I)) 263 264#define _mm256_shldi_epi64(A, B, I) \ 265 _mm256_mask_shldi_epi64(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) 266 267#define _mm_mask_shldi_epi64(S, U, A, B, I) __extension__ ({ \ 268 (__m128i)__builtin_ia32_vpshldq128_mask((__v2di)(A), \ 269 (__v2di)(B), \ 270 (int)(I), \ 271 (__v2di)(S), \ 272 (__mmask8)(U)); }) 273 274#define _mm_maskz_shldi_epi64(U, A, B, I) \ 275 _mm_mask_shldi_epi64(_mm_setzero_si128(), (U), (A), (B), (I)) 276 277#define _mm_shldi_epi64(A, B, I) \ 278 _mm_mask_shldi_epi64(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) 279 280#define _mm256_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \ 281 (__m256i)__builtin_ia32_vpshldd256_mask((__v8si)(A), \ 282 (__v8si)(B), \ 283 (int)(I), \ 284 (__v8si)(S), \ 285 (__mmask8)(U)); }) 286 287#define _mm256_maskz_shldi_epi32(U, A, B, I) \ 288 _mm256_mask_shldi_epi32(_mm256_setzero_si256(), (U), (A), (B), (I)) 289 290#define _mm256_shldi_epi32(A, B, I) \ 291 _mm256_mask_shldi_epi32(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) 292 293#define _mm_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \ 294 (__m128i)__builtin_ia32_vpshldd128_mask((__v4si)(A), \ 295 (__v4si)(B), \ 296 (int)(I), \ 297 (__v4si)(S), \ 298 (__mmask8)(U)); }) 299 300#define _mm_maskz_shldi_epi32(U, A, B, I) \ 301 _mm_mask_shldi_epi32(_mm_setzero_si128(), (U), (A), (B), (I)) 302 303#define _mm_shldi_epi32(A, B, I) \ 304 _mm_mask_shldi_epi32(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) 305 306#define _mm256_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \ 307 (__m256i)__builtin_ia32_vpshldw256_mask((__v16hi)(A), \ 308 (__v16hi)(B), \ 309 (int)(I), \ 310 (__v16hi)(S), \ 311 (__mmask16)(U)); }) 312 313#define _mm256_maskz_shldi_epi16(U, A, B, I) \ 314 _mm256_mask_shldi_epi16(_mm256_setzero_si256(), (U), (A), (B), (I)) 315 316#define _mm256_shldi_epi16(A, B, I) \ 317 _mm256_mask_shldi_epi16(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) 318 319#define _mm_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \ 320 (__m128i)__builtin_ia32_vpshldw128_mask((__v8hi)(A), \ 321 (__v8hi)(B), \ 322 (int)(I), \ 323 (__v8hi)(S), \ 324 (__mmask8)(U)); }) 325 326#define _mm_maskz_shldi_epi16(U, A, B, I) \ 327 _mm_mask_shldi_epi16(_mm_setzero_si128(), (U), (A), (B), (I)) 328 329#define _mm_shldi_epi16(A, B, I) \ 330 _mm_mask_shldi_epi16(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) 331 332#define _mm256_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \ 333 (__m256i)__builtin_ia32_vpshrdq256_mask((__v4di)(A), \ 334 (__v4di)(B), \ 335 (int)(I), \ 336 (__v4di)(S), \ 337 (__mmask8)(U)); }) 338 339#define _mm256_maskz_shrdi_epi64(U, A, B, I) \ 340 _mm256_mask_shrdi_epi64(_mm256_setzero_si256(), (U), (A), (B), (I)) 341 342#define _mm256_shrdi_epi64(A, B, I) \ 343 _mm256_mask_shrdi_epi64(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) 344 345#define _mm_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \ 346 (__m128i)__builtin_ia32_vpshrdq128_mask((__v2di)(A), \ 347 (__v2di)(B), \ 348 (int)(I), \ 349 (__v2di)(S), \ 350 (__mmask8)(U)); }) 351 352#define _mm_maskz_shrdi_epi64(U, A, B, I) \ 353 _mm_mask_shrdi_epi64(_mm_setzero_si128(), (U), (A), (B), (I)) 354 355#define _mm_shrdi_epi64(A, B, I) \ 356 _mm_mask_shrdi_epi64(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) 357 358#define _mm256_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \ 359 (__m256i)__builtin_ia32_vpshrdd256_mask((__v8si)(A), \ 360 (__v8si)(B), \ 361 (int)(I), \ 362 (__v8si)(S), \ 363 (__mmask8)(U)); }) 364 365#define _mm256_maskz_shrdi_epi32(U, A, B, I) \ 366 _mm256_mask_shrdi_epi32(_mm256_setzero_si256(), (U), (A), (B), (I)) 367 368#define _mm256_shrdi_epi32(A, B, I) \ 369 _mm256_mask_shrdi_epi32(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) 370 371#define _mm_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \ 372 (__m128i)__builtin_ia32_vpshrdd128_mask((__v4si)(A), \ 373 (__v4si)(B), \ 374 (int)(I), \ 375 (__v4si)(S), \ 376 (__mmask8)(U)); }) 377 378#define _mm_maskz_shrdi_epi32(U, A, B, I) \ 379 _mm_mask_shrdi_epi32(_mm_setzero_si128(), (U), (A), (B), (I)) 380 381#define _mm_shrdi_epi32(A, B, I) \ 382 _mm_mask_shrdi_epi32(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) 383 384#define _mm256_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \ 385 (__m256i)__builtin_ia32_vpshrdw256_mask((__v16hi)(A), \ 386 (__v16hi)(B), \ 387 (int)(I), \ 388 (__v16hi)(S), \ 389 (__mmask16)(U)); }) 390 391#define _mm256_maskz_shrdi_epi16(U, A, B, I) \ 392 _mm256_mask_shrdi_epi16(_mm256_setzero_si256(), (U), (A), (B), (I)) 393 394#define _mm256_shrdi_epi16(A, B, I) \ 395 _mm256_mask_shrdi_epi16(_mm256_undefined_si256(), (__mmask8)(-1), (A), (B), (I)) 396 397#define _mm_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \ 398 (__m128i)__builtin_ia32_vpshrdw128_mask((__v8hi)(A), \ 399 (__v8hi)(B), \ 400 (int)(I), \ 401 (__v8hi)(S), \ 402 (__mmask8)(U)); }) 403 404#define _mm_maskz_shrdi_epi16(U, A, B, I) \ 405 _mm_mask_shrdi_epi16(_mm_setzero_si128(), (U), (A), (B), (I)) 406 407#define _mm_shrdi_epi16(A, B, I) \ 408 _mm_mask_shrdi_epi16(_mm_undefined_si128(), (__mmask8)(-1), (A), (B), (I)) 409 410static __inline__ __m256i __DEFAULT_FN_ATTRS 411_mm256_mask_shldv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) 412{ 413 return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S, 414 (__v4di) __A, 415 (__v4di) __B, 416 __U); 417} 418 419static __inline__ __m256i __DEFAULT_FN_ATTRS 420_mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) 421{ 422 return (__m256i) __builtin_ia32_vpshldvq256_maskz ((__v4di) __S, 423 (__v4di) __A, 424 (__v4di) __B, 425 __U); 426} 427 428static __inline__ __m256i __DEFAULT_FN_ATTRS 429_mm256_shldv_epi64(__m256i __S, __m256i __A, __m256i __B) 430{ 431 return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S, 432 (__v4di) __A, 433 (__v4di) __B, 434 (__mmask8) -1); 435} 436 437static __inline__ __m128i __DEFAULT_FN_ATTRS 438_mm_mask_shldv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) 439{ 440 return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S, 441 (__v2di) __A, 442 (__v2di) __B, 443 __U); 444} 445 446static __inline__ __m128i __DEFAULT_FN_ATTRS 447_mm_maskz_shldv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) 448{ 449 return (__m128i) __builtin_ia32_vpshldvq128_maskz ((__v2di) __S, 450 (__v2di) __A, 451 (__v2di) __B, 452 __U); 453} 454 455static __inline__ __m128i __DEFAULT_FN_ATTRS 456_mm_shldv_epi64(__m128i __S, __m128i __A, __m128i __B) 457{ 458 return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S, 459 (__v2di) __A, 460 (__v2di) __B, 461 (__mmask8) -1); 462} 463 464static __inline__ __m256i __DEFAULT_FN_ATTRS 465_mm256_mask_shldv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) 466{ 467 return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S, 468 (__v8si) __A, 469 (__v8si) __B, 470 __U); 471} 472 473static __inline__ __m256i __DEFAULT_FN_ATTRS 474_mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) 475{ 476 return (__m256i) __builtin_ia32_vpshldvd256_maskz ((__v8si) __S, 477 (__v8si) __A, 478 (__v8si) __B, 479 __U); 480} 481 482static __inline__ __m256i __DEFAULT_FN_ATTRS 483_mm256_shldv_epi32(__m256i __S, __m256i __A, __m256i __B) 484{ 485 return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S, 486 (__v8si) __A, 487 (__v8si) __B, 488 (__mmask8) -1); 489} 490 491static __inline__ __m128i __DEFAULT_FN_ATTRS 492_mm_mask_shldv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) 493{ 494 return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S, 495 (__v4si) __A, 496 (__v4si) __B, 497 __U); 498} 499 500static __inline__ __m128i __DEFAULT_FN_ATTRS 501_mm_maskz_shldv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) 502{ 503 return (__m128i) __builtin_ia32_vpshldvd128_maskz ((__v4si) __S, 504 (__v4si) __A, 505 (__v4si) __B, 506 __U); 507} 508 509static __inline__ __m128i __DEFAULT_FN_ATTRS 510_mm_shldv_epi32(__m128i __S, __m128i __A, __m128i __B) 511{ 512 return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S, 513 (__v4si) __A, 514 (__v4si) __B, 515 (__mmask8) -1); 516} 517 518static __inline__ __m256i __DEFAULT_FN_ATTRS 519_mm256_mask_shldv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) 520{ 521 return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S, 522 (__v16hi) __A, 523 (__v16hi) __B, 524 __U); 525} 526 527static __inline__ __m256i __DEFAULT_FN_ATTRS 528_mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B) 529{ 530 return (__m256i) __builtin_ia32_vpshldvw256_maskz ((__v16hi) __S, 531 (__v16hi) __A, 532 (__v16hi) __B, 533 __U); 534} 535 536static __inline__ __m256i __DEFAULT_FN_ATTRS 537_mm256_shldv_epi16(__m256i __S, __m256i __A, __m256i __B) 538{ 539 return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S, 540 (__v16hi) __A, 541 (__v16hi) __B, 542 (__mmask16) -1); 543} 544 545static __inline__ __m128i __DEFAULT_FN_ATTRS 546_mm_mask_shldv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) 547{ 548 return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S, 549 (__v8hi) __A, 550 (__v8hi) __B, 551 __U); 552} 553 554static __inline__ __m128i __DEFAULT_FN_ATTRS 555_mm_maskz_shldv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) 556{ 557 return (__m128i) __builtin_ia32_vpshldvw128_maskz ((__v8hi) __S, 558 (__v8hi) __A, 559 (__v8hi) __B, 560 __U); 561} 562 563static __inline__ __m128i __DEFAULT_FN_ATTRS 564_mm_shldv_epi16(__m128i __S, __m128i __A, __m128i __B) 565{ 566 return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S, 567 (__v8hi) __A, 568 (__v8hi) __B, 569 (__mmask8) -1); 570} 571 572static __inline__ __m256i __DEFAULT_FN_ATTRS 573_mm256_mask_shrdv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) 574{ 575 return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S, 576 (__v4di) __A, 577 (__v4di) __B, 578 __U); 579} 580 581static __inline__ __m256i __DEFAULT_FN_ATTRS 582_mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) 583{ 584 return (__m256i) __builtin_ia32_vpshrdvq256_maskz ((__v4di) __S, 585 (__v4di) __A, 586 (__v4di) __B, 587 __U); 588} 589 590static __inline__ __m256i __DEFAULT_FN_ATTRS 591_mm256_shrdv_epi64(__m256i __S, __m256i __A, __m256i __B) 592{ 593 return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S, 594 (__v4di) __A, 595 (__v4di) __B, 596 (__mmask8) -1); 597} 598 599static __inline__ __m128i __DEFAULT_FN_ATTRS 600_mm_mask_shrdv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) 601{ 602 return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S, 603 (__v2di) __A, 604 (__v2di) __B, 605 __U); 606} 607 608static __inline__ __m128i __DEFAULT_FN_ATTRS 609_mm_maskz_shrdv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) 610{ 611 return (__m128i) __builtin_ia32_vpshrdvq128_maskz ((__v2di) __S, 612 (__v2di) __A, 613 (__v2di) __B, 614 __U); 615} 616 617static __inline__ __m128i __DEFAULT_FN_ATTRS 618_mm_shrdv_epi64(__m128i __S, __m128i __A, __m128i __B) 619{ 620 return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S, 621 (__v2di) __A, 622 (__v2di) __B, 623 (__mmask8) -1); 624} 625 626static __inline__ __m256i __DEFAULT_FN_ATTRS 627_mm256_mask_shrdv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) 628{ 629 return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S, 630 (__v8si) __A, 631 (__v8si) __B, 632 __U); 633} 634 635static __inline__ __m256i __DEFAULT_FN_ATTRS 636_mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) 637{ 638 return (__m256i) __builtin_ia32_vpshrdvd256_maskz ((__v8si) __S, 639 (__v8si) __A, 640 (__v8si) __B, 641 __U); 642} 643 644static __inline__ __m256i __DEFAULT_FN_ATTRS 645_mm256_shrdv_epi32(__m256i __S, __m256i __A, __m256i __B) 646{ 647 return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S, 648 (__v8si) __A, 649 (__v8si) __B, 650 (__mmask8) -1); 651} 652 653static __inline__ __m128i __DEFAULT_FN_ATTRS 654_mm_mask_shrdv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) 655{ 656 return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S, 657 (__v4si) __A, 658 (__v4si) __B, 659 __U); 660} 661 662static __inline__ __m128i __DEFAULT_FN_ATTRS 663_mm_maskz_shrdv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) 664{ 665 return (__m128i) __builtin_ia32_vpshrdvd128_maskz ((__v4si) __S, 666 (__v4si) __A, 667 (__v4si) __B, 668 __U); 669} 670 671static __inline__ __m128i __DEFAULT_FN_ATTRS 672_mm_shrdv_epi32(__m128i __S, __m128i __A, __m128i __B) 673{ 674 return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S, 675 (__v4si) __A, 676 (__v4si) __B, 677 (__mmask8) -1); 678} 679 680static __inline__ __m256i __DEFAULT_FN_ATTRS 681_mm256_mask_shrdv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) 682{ 683 return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S, 684 (__v16hi) __A, 685 (__v16hi) __B, 686 __U); 687} 688 689static __inline__ __m256i __DEFAULT_FN_ATTRS 690_mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B) 691{ 692 return (__m256i) __builtin_ia32_vpshrdvw256_maskz ((__v16hi) __S, 693 (__v16hi) __A, 694 (__v16hi) __B, 695 __U); 696} 697 698static __inline__ __m256i __DEFAULT_FN_ATTRS 699_mm256_shrdv_epi16(__m256i __S, __m256i __A, __m256i __B) 700{ 701 return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S, 702 (__v16hi) __A, 703 (__v16hi) __B, 704 (__mmask16) -1); 705} 706 707static __inline__ __m128i __DEFAULT_FN_ATTRS 708_mm_mask_shrdv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) 709{ 710 return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S, 711 (__v8hi) __A, 712 (__v8hi) __B, 713 __U); 714} 715 716static __inline__ __m128i __DEFAULT_FN_ATTRS 717_mm_maskz_shrdv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) 718{ 719 return (__m128i) __builtin_ia32_vpshrdvw128_maskz ((__v8hi) __S, 720 (__v8hi) __A, 721 (__v8hi) __B, 722 __U); 723} 724 725static __inline__ __m128i __DEFAULT_FN_ATTRS 726_mm_shrdv_epi16(__m128i __S, __m128i __A, __m128i __B) 727{ 728 return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S, 729 (__v8hi) __A, 730 (__v8hi) __B, 731 (__mmask8) -1); 732} 733 734 735#undef __DEFAULT_FN_ATTRS 736 737#endif 738