avx512vbmi2intrin.h revision 360660
1/*===------------- avx512vbmi2intrin.h - VBMI2 intrinsics ------------------=== 2 * 3 * 4 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5 * See https://llvm.org/LICENSE.txt for license information. 6 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7 * 8 *===-----------------------------------------------------------------------=== 9 */ 10#ifndef __IMMINTRIN_H 11#error "Never use <avx512vbmi2intrin.h> directly; include <immintrin.h> instead." 12#endif 13 14#ifndef __AVX512VBMI2INTRIN_H 15#define __AVX512VBMI2INTRIN_H 16 17/* Define the default attributes for the functions in this file. */ 18#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2"), __min_vector_width__(512))) 19 20 21static __inline__ __m512i __DEFAULT_FN_ATTRS 22_mm512_mask_compress_epi16(__m512i __S, __mmask32 __U, __m512i __D) 23{ 24 return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D, 25 (__v32hi) __S, 26 __U); 27} 28 29static __inline__ __m512i __DEFAULT_FN_ATTRS 30_mm512_maskz_compress_epi16(__mmask32 __U, __m512i __D) 31{ 32 return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D, 33 (__v32hi) _mm512_setzero_si512(), 34 __U); 35} 36 37static __inline__ __m512i __DEFAULT_FN_ATTRS 38_mm512_mask_compress_epi8(__m512i __S, __mmask64 __U, __m512i __D) 39{ 40 return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D, 41 (__v64qi) __S, 42 __U); 43} 44 45static __inline__ __m512i __DEFAULT_FN_ATTRS 46_mm512_maskz_compress_epi8(__mmask64 __U, __m512i __D) 47{ 48 return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D, 49 (__v64qi) _mm512_setzero_si512(), 50 __U); 51} 52 53static __inline__ void __DEFAULT_FN_ATTRS 54_mm512_mask_compressstoreu_epi16(void *__P, __mmask32 __U, __m512i __D) 55{ 56 __builtin_ia32_compressstorehi512_mask ((__v32hi *) __P, (__v32hi) __D, 57 __U); 58} 59 60static __inline__ void __DEFAULT_FN_ATTRS 61_mm512_mask_compressstoreu_epi8(void *__P, __mmask64 __U, __m512i __D) 62{ 63 __builtin_ia32_compressstoreqi512_mask ((__v64qi *) __P, (__v64qi) __D, 64 __U); 65} 66 67static __inline__ __m512i __DEFAULT_FN_ATTRS 68_mm512_mask_expand_epi16(__m512i __S, __mmask32 __U, __m512i __D) 69{ 70 return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D, 71 (__v32hi) __S, 72 __U); 73} 74 75static __inline__ __m512i __DEFAULT_FN_ATTRS 76_mm512_maskz_expand_epi16(__mmask32 __U, __m512i __D) 77{ 78 return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D, 79 (__v32hi) _mm512_setzero_si512(), 80 __U); 81} 82 83static __inline__ __m512i __DEFAULT_FN_ATTRS 84_mm512_mask_expand_epi8(__m512i __S, __mmask64 __U, __m512i __D) 85{ 86 return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D, 87 (__v64qi) __S, 88 __U); 89} 90 91static __inline__ __m512i __DEFAULT_FN_ATTRS 92_mm512_maskz_expand_epi8(__mmask64 __U, __m512i __D) 93{ 94 return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D, 95 (__v64qi) _mm512_setzero_si512(), 96 __U); 97} 98 99static __inline__ __m512i __DEFAULT_FN_ATTRS 100_mm512_mask_expandloadu_epi16(__m512i __S, __mmask32 __U, void const *__P) 101{ 102 return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P, 103 (__v32hi) __S, 104 __U); 105} 106 107static __inline__ __m512i __DEFAULT_FN_ATTRS 108_mm512_maskz_expandloadu_epi16(__mmask32 __U, void const *__P) 109{ 110 return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P, 111 (__v32hi) _mm512_setzero_si512(), 112 __U); 113} 114 115static __inline__ __m512i __DEFAULT_FN_ATTRS 116_mm512_mask_expandloadu_epi8(__m512i __S, __mmask64 __U, void const *__P) 117{ 118 return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P, 119 (__v64qi) __S, 120 __U); 121} 122 123static __inline__ __m512i __DEFAULT_FN_ATTRS 124_mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) 125{ 126 return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P, 127 (__v64qi) _mm512_setzero_si512(), 128 __U); 129} 130 131#define _mm512_shldi_epi64(A, B, I) \ 132 (__m512i)__builtin_ia32_vpshldq512((__v8di)(__m512i)(A), \ 133 (__v8di)(__m512i)(B), (int)(I)) 134 135#define _mm512_mask_shldi_epi64(S, U, A, B, I) \ 136 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 137 (__v8di)_mm512_shldi_epi64((A), (B), (I)), \ 138 (__v8di)(__m512i)(S)) 139 140#define _mm512_maskz_shldi_epi64(U, A, B, I) \ 141 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 142 (__v8di)_mm512_shldi_epi64((A), (B), (I)), \ 143 (__v8di)_mm512_setzero_si512()) 144 145#define _mm512_shldi_epi32(A, B, I) \ 146 (__m512i)__builtin_ia32_vpshldd512((__v16si)(__m512i)(A), \ 147 (__v16si)(__m512i)(B), (int)(I)) 148 149#define _mm512_mask_shldi_epi32(S, U, A, B, I) \ 150 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 151 (__v16si)_mm512_shldi_epi32((A), (B), (I)), \ 152 (__v16si)(__m512i)(S)) 153 154#define _mm512_maskz_shldi_epi32(U, A, B, I) \ 155 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 156 (__v16si)_mm512_shldi_epi32((A), (B), (I)), \ 157 (__v16si)_mm512_setzero_si512()) 158 159#define _mm512_shldi_epi16(A, B, I) \ 160 (__m512i)__builtin_ia32_vpshldw512((__v32hi)(__m512i)(A), \ 161 (__v32hi)(__m512i)(B), (int)(I)) 162 163#define _mm512_mask_shldi_epi16(S, U, A, B, I) \ 164 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 165 (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \ 166 (__v32hi)(__m512i)(S)) 167 168#define _mm512_maskz_shldi_epi16(U, A, B, I) \ 169 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 170 (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \ 171 (__v32hi)_mm512_setzero_si512()) 172 173#define _mm512_shrdi_epi64(A, B, I) \ 174 (__m512i)__builtin_ia32_vpshrdq512((__v8di)(__m512i)(A), \ 175 (__v8di)(__m512i)(B), (int)(I)) 176 177#define _mm512_mask_shrdi_epi64(S, U, A, B, I) \ 178 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 179 (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \ 180 (__v8di)(__m512i)(S)) 181 182#define _mm512_maskz_shrdi_epi64(U, A, B, I) \ 183 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 184 (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \ 185 (__v8di)_mm512_setzero_si512()) 186 187#define _mm512_shrdi_epi32(A, B, I) \ 188 (__m512i)__builtin_ia32_vpshrdd512((__v16si)(__m512i)(A), \ 189 (__v16si)(__m512i)(B), (int)(I)) 190 191#define _mm512_mask_shrdi_epi32(S, U, A, B, I) \ 192 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 193 (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \ 194 (__v16si)(__m512i)(S)) 195 196#define _mm512_maskz_shrdi_epi32(U, A, B, I) \ 197 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 198 (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \ 199 (__v16si)_mm512_setzero_si512()) 200 201#define _mm512_shrdi_epi16(A, B, I) \ 202 (__m512i)__builtin_ia32_vpshrdw512((__v32hi)(__m512i)(A), \ 203 (__v32hi)(__m512i)(B), (int)(I)) 204 205#define _mm512_mask_shrdi_epi16(S, U, A, B, I) \ 206 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 207 (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \ 208 (__v32hi)(__m512i)(S)) 209 210#define _mm512_maskz_shrdi_epi16(U, A, B, I) \ 211 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 212 (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \ 213 (__v32hi)_mm512_setzero_si512()) 214 215static __inline__ __m512i __DEFAULT_FN_ATTRS 216_mm512_shldv_epi64(__m512i __A, __m512i __B, __m512i __C) 217{ 218 return (__m512i)__builtin_ia32_vpshldvq512((__v8di)__A, (__v8di)__B, 219 (__v8di)__C); 220} 221 222static __inline__ __m512i __DEFAULT_FN_ATTRS 223_mm512_mask_shldv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C) 224{ 225 return (__m512i)__builtin_ia32_selectq_512(__U, 226 (__v8di)_mm512_shldv_epi64(__A, __B, __C), 227 (__v8di)__A); 228} 229 230static __inline__ __m512i __DEFAULT_FN_ATTRS 231_mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C) 232{ 233 return (__m512i)__builtin_ia32_selectq_512(__U, 234 (__v8di)_mm512_shldv_epi64(__A, __B, __C), 235 (__v8di)_mm512_setzero_si512()); 236} 237 238static __inline__ __m512i __DEFAULT_FN_ATTRS 239_mm512_shldv_epi32(__m512i __A, __m512i __B, __m512i __C) 240{ 241 return (__m512i)__builtin_ia32_vpshldvd512((__v16si)__A, (__v16si)__B, 242 (__v16si)__C); 243} 244 245static __inline__ __m512i __DEFAULT_FN_ATTRS 246_mm512_mask_shldv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) 247{ 248 return (__m512i)__builtin_ia32_selectd_512(__U, 249 (__v16si)_mm512_shldv_epi32(__A, __B, __C), 250 (__v16si)__A); 251} 252 253static __inline__ __m512i __DEFAULT_FN_ATTRS 254_mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) 255{ 256 return (__m512i)__builtin_ia32_selectd_512(__U, 257 (__v16si)_mm512_shldv_epi32(__A, __B, __C), 258 (__v16si)_mm512_setzero_si512()); 259} 260 261static __inline__ __m512i __DEFAULT_FN_ATTRS 262_mm512_shldv_epi16(__m512i __A, __m512i __B, __m512i __C) 263{ 264 return (__m512i)__builtin_ia32_vpshldvw512((__v32hi)__A, (__v32hi)__B, 265 (__v32hi)__C); 266} 267 268static __inline__ __m512i __DEFAULT_FN_ATTRS 269_mm512_mask_shldv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C) 270{ 271 return (__m512i)__builtin_ia32_selectw_512(__U, 272 (__v32hi)_mm512_shldv_epi16(__A, __B, __C), 273 (__v32hi)__A); 274} 275 276static __inline__ __m512i __DEFAULT_FN_ATTRS 277_mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C) 278{ 279 return (__m512i)__builtin_ia32_selectw_512(__U, 280 (__v32hi)_mm512_shldv_epi16(__A, __B, __C), 281 (__v32hi)_mm512_setzero_si512()); 282} 283 284static __inline__ __m512i __DEFAULT_FN_ATTRS 285_mm512_shrdv_epi64(__m512i __A, __m512i __B, __m512i __C) 286{ 287 return (__m512i)__builtin_ia32_vpshrdvq512((__v8di)__A, (__v8di)__B, 288 (__v8di)__C); 289} 290 291static __inline__ __m512i __DEFAULT_FN_ATTRS 292_mm512_mask_shrdv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C) 293{ 294 return (__m512i)__builtin_ia32_selectq_512(__U, 295 (__v8di)_mm512_shrdv_epi64(__A, __B, __C), 296 (__v8di)__A); 297} 298 299static __inline__ __m512i __DEFAULT_FN_ATTRS 300_mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C) 301{ 302 return (__m512i)__builtin_ia32_selectq_512(__U, 303 (__v8di)_mm512_shrdv_epi64(__A, __B, __C), 304 (__v8di)_mm512_setzero_si512()); 305} 306 307static __inline__ __m512i __DEFAULT_FN_ATTRS 308_mm512_shrdv_epi32(__m512i __A, __m512i __B, __m512i __C) 309{ 310 return (__m512i)__builtin_ia32_vpshrdvd512((__v16si)__A, (__v16si)__B, 311 (__v16si)__C); 312} 313 314static __inline__ __m512i __DEFAULT_FN_ATTRS 315_mm512_mask_shrdv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) 316{ 317 return (__m512i) __builtin_ia32_selectd_512(__U, 318 (__v16si)_mm512_shrdv_epi32(__A, __B, __C), 319 (__v16si)__A); 320} 321 322static __inline__ __m512i __DEFAULT_FN_ATTRS 323_mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) 324{ 325 return (__m512i) __builtin_ia32_selectd_512(__U, 326 (__v16si)_mm512_shrdv_epi32(__A, __B, __C), 327 (__v16si)_mm512_setzero_si512()); 328} 329 330static __inline__ __m512i __DEFAULT_FN_ATTRS 331_mm512_shrdv_epi16(__m512i __A, __m512i __B, __m512i __C) 332{ 333 return (__m512i)__builtin_ia32_vpshrdvw512((__v32hi)__A, (__v32hi)__B, 334 (__v32hi)__C); 335} 336 337static __inline__ __m512i __DEFAULT_FN_ATTRS 338_mm512_mask_shrdv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C) 339{ 340 return (__m512i)__builtin_ia32_selectw_512(__U, 341 (__v32hi)_mm512_shrdv_epi16(__A, __B, __C), 342 (__v32hi)__A); 343} 344 345static __inline__ __m512i __DEFAULT_FN_ATTRS 346_mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C) 347{ 348 return (__m512i)__builtin_ia32_selectw_512(__U, 349 (__v32hi)_mm512_shrdv_epi16(__A, __B, __C), 350 (__v32hi)_mm512_setzero_si512()); 351} 352 353 354#undef __DEFAULT_FN_ATTRS 355 356#endif 357 358