avx512vbmi2intrin.h revision 341825
1/*===------------- avx512vbmi2intrin.h - VBMI2 intrinsics ------------------=== 2 * 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a copy 5 * of this software and associated documentation files (the "Software"), to deal 6 * in the Software without restriction, including without limitation the rights 7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 * copies of the Software, and to permit persons to whom the Software is 9 * furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 * THE SOFTWARE. 21 * 22 *===-----------------------------------------------------------------------=== 23 */ 24#ifndef __IMMINTRIN_H 25#error "Never use <avx512vbmi2intrin.h> directly; include <immintrin.h> instead." 26#endif 27 28#ifndef __AVX512VBMI2INTRIN_H 29#define __AVX512VBMI2INTRIN_H 30 31/* Define the default attributes for the functions in this file. */ 32#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2"), __min_vector_width__(512))) 33 34 35static __inline__ __m512i __DEFAULT_FN_ATTRS 36_mm512_mask_compress_epi16(__m512i __S, __mmask32 __U, __m512i __D) 37{ 38 return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D, 39 (__v32hi) __S, 40 __U); 41} 42 43static __inline__ __m512i __DEFAULT_FN_ATTRS 44_mm512_maskz_compress_epi16(__mmask32 __U, __m512i __D) 45{ 46 return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D, 47 (__v32hi) _mm512_setzero_si512(), 48 __U); 49} 50 51static __inline__ __m512i __DEFAULT_FN_ATTRS 52_mm512_mask_compress_epi8(__m512i __S, __mmask64 __U, __m512i __D) 53{ 54 return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D, 55 (__v64qi) __S, 56 __U); 57} 58 59static __inline__ __m512i __DEFAULT_FN_ATTRS 60_mm512_maskz_compress_epi8(__mmask64 __U, __m512i __D) 61{ 62 return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D, 63 (__v64qi) _mm512_setzero_si512(), 64 __U); 65} 66 67static __inline__ void __DEFAULT_FN_ATTRS 68_mm512_mask_compressstoreu_epi16(void *__P, __mmask32 __U, __m512i __D) 69{ 70 __builtin_ia32_compressstorehi512_mask ((__v32hi *) __P, (__v32hi) __D, 71 __U); 72} 73 74static __inline__ void __DEFAULT_FN_ATTRS 75_mm512_mask_compressstoreu_epi8(void *__P, __mmask64 __U, __m512i __D) 76{ 77 __builtin_ia32_compressstoreqi512_mask ((__v64qi *) __P, (__v64qi) __D, 78 __U); 79} 80 81static __inline__ __m512i __DEFAULT_FN_ATTRS 82_mm512_mask_expand_epi16(__m512i __S, __mmask32 __U, __m512i __D) 83{ 84 return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D, 85 (__v32hi) __S, 86 __U); 87} 88 89static __inline__ __m512i __DEFAULT_FN_ATTRS 90_mm512_maskz_expand_epi16(__mmask32 __U, __m512i __D) 91{ 92 return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D, 93 (__v32hi) _mm512_setzero_si512(), 94 __U); 95} 96 97static __inline__ __m512i __DEFAULT_FN_ATTRS 98_mm512_mask_expand_epi8(__m512i __S, __mmask64 __U, __m512i __D) 99{ 100 return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D, 101 (__v64qi) __S, 102 __U); 103} 104 105static __inline__ __m512i __DEFAULT_FN_ATTRS 106_mm512_maskz_expand_epi8(__mmask64 __U, __m512i __D) 107{ 108 return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D, 109 (__v64qi) _mm512_setzero_si512(), 110 __U); 111} 112 113static __inline__ __m512i __DEFAULT_FN_ATTRS 114_mm512_mask_expandloadu_epi16(__m512i __S, __mmask32 __U, void const *__P) 115{ 116 return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P, 117 (__v32hi) __S, 118 __U); 119} 120 121static __inline__ __m512i __DEFAULT_FN_ATTRS 122_mm512_maskz_expandloadu_epi16(__mmask32 __U, void const *__P) 123{ 124 return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P, 125 (__v32hi) _mm512_setzero_si512(), 126 __U); 127} 128 129static __inline__ __m512i __DEFAULT_FN_ATTRS 130_mm512_mask_expandloadu_epi8(__m512i __S, __mmask64 __U, void const *__P) 131{ 132 return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P, 133 (__v64qi) __S, 134 __U); 135} 136 137static __inline__ __m512i __DEFAULT_FN_ATTRS 138_mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) 139{ 140 return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P, 141 (__v64qi) _mm512_setzero_si512(), 142 __U); 143} 144 145#define _mm512_shldi_epi64(A, B, I) \ 146 (__m512i)__builtin_ia32_vpshldq512((__v8di)(__m512i)(A), \ 147 (__v8di)(__m512i)(B), (int)(I)) 148 149#define _mm512_mask_shldi_epi64(S, U, A, B, I) \ 150 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 151 (__v8di)_mm512_shldi_epi64((A), (B), (I)), \ 152 (__v8di)(__m512i)(S)) 153 154#define _mm512_maskz_shldi_epi64(U, A, B, I) \ 155 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 156 (__v8di)_mm512_shldi_epi64((A), (B), (I)), \ 157 (__v8di)_mm512_setzero_si512()) 158 159#define _mm512_shldi_epi32(A, B, I) \ 160 (__m512i)__builtin_ia32_vpshldd512((__v16si)(__m512i)(A), \ 161 (__v16si)(__m512i)(B), (int)(I)) 162 163#define _mm512_mask_shldi_epi32(S, U, A, B, I) \ 164 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 165 (__v16si)_mm512_shldi_epi32((A), (B), (I)), \ 166 (__v16si)(__m512i)(S)) 167 168#define _mm512_maskz_shldi_epi32(U, A, B, I) \ 169 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 170 (__v16si)_mm512_shldi_epi32((A), (B), (I)), \ 171 (__v16si)_mm512_setzero_si512()) 172 173#define _mm512_shldi_epi16(A, B, I) \ 174 (__m512i)__builtin_ia32_vpshldw512((__v32hi)(__m512i)(A), \ 175 (__v32hi)(__m512i)(B), (int)(I)) 176 177#define _mm512_mask_shldi_epi16(S, U, A, B, I) \ 178 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 179 (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \ 180 (__v32hi)(__m512i)(S)) 181 182#define _mm512_maskz_shldi_epi16(U, A, B, I) \ 183 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 184 (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \ 185 (__v32hi)_mm512_setzero_si512()) 186 187#define _mm512_shrdi_epi64(A, B, I) \ 188 (__m512i)__builtin_ia32_vpshrdq512((__v8di)(__m512i)(A), \ 189 (__v8di)(__m512i)(B), (int)(I)) 190 191#define _mm512_mask_shrdi_epi64(S, U, A, B, I) \ 192 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 193 (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \ 194 (__v8di)(__m512i)(S)) 195 196#define _mm512_maskz_shrdi_epi64(U, A, B, I) \ 197 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 198 (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \ 199 (__v8di)_mm512_setzero_si512()) 200 201#define _mm512_shrdi_epi32(A, B, I) \ 202 (__m512i)__builtin_ia32_vpshrdd512((__v16si)(__m512i)(A), \ 203 (__v16si)(__m512i)(B), (int)(I)) 204 205#define _mm512_mask_shrdi_epi32(S, U, A, B, I) \ 206 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 207 (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \ 208 (__v16si)(__m512i)(S)) 209 210#define _mm512_maskz_shrdi_epi32(U, A, B, I) \ 211 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 212 (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \ 213 (__v16si)_mm512_setzero_si512()) 214 215#define _mm512_shrdi_epi16(A, B, I) \ 216 (__m512i)__builtin_ia32_vpshrdw512((__v32hi)(__m512i)(A), \ 217 (__v32hi)(__m512i)(B), (int)(I)) 218 219#define _mm512_mask_shrdi_epi16(S, U, A, B, I) \ 220 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 221 (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \ 222 (__v32hi)(__m512i)(S)) 223 224#define _mm512_maskz_shrdi_epi16(U, A, B, I) \ 225 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 226 (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \ 227 (__v32hi)_mm512_setzero_si512()) 228 229static __inline__ __m512i __DEFAULT_FN_ATTRS 230_mm512_mask_shldv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) 231{ 232 return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S, 233 (__v8di) __A, 234 (__v8di) __B, 235 __U); 236} 237 238static __inline__ __m512i __DEFAULT_FN_ATTRS 239_mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B) 240{ 241 return (__m512i) __builtin_ia32_vpshldvq512_maskz ((__v8di) __S, 242 (__v8di) __A, 243 (__v8di) __B, 244 __U); 245} 246 247static __inline__ __m512i __DEFAULT_FN_ATTRS 248_mm512_shldv_epi64(__m512i __S, __m512i __A, __m512i __B) 249{ 250 return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S, 251 (__v8di) __A, 252 (__v8di) __B, 253 (__mmask8) -1); 254} 255 256static __inline__ __m512i __DEFAULT_FN_ATTRS 257_mm512_mask_shldv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) 258{ 259 return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S, 260 (__v16si) __A, 261 (__v16si) __B, 262 __U); 263} 264 265static __inline__ __m512i __DEFAULT_FN_ATTRS 266_mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) 267{ 268 return (__m512i) __builtin_ia32_vpshldvd512_maskz ((__v16si) __S, 269 (__v16si) __A, 270 (__v16si) __B, 271 __U); 272} 273 274static __inline__ __m512i __DEFAULT_FN_ATTRS 275_mm512_shldv_epi32(__m512i __S, __m512i __A, __m512i __B) 276{ 277 return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S, 278 (__v16si) __A, 279 (__v16si) __B, 280 (__mmask16) -1); 281} 282 283 284static __inline__ __m512i __DEFAULT_FN_ATTRS 285_mm512_mask_shldv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) 286{ 287 return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S, 288 (__v32hi) __A, 289 (__v32hi) __B, 290 __U); 291} 292 293static __inline__ __m512i __DEFAULT_FN_ATTRS 294_mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B) 295{ 296 return (__m512i) __builtin_ia32_vpshldvw512_maskz ((__v32hi) __S, 297 (__v32hi) __A, 298 (__v32hi) __B, 299 __U); 300} 301 302static __inline__ __m512i __DEFAULT_FN_ATTRS 303_mm512_shldv_epi16(__m512i __S, __m512i __A, __m512i __B) 304{ 305 return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S, 306 (__v32hi) __A, 307 (__v32hi) __B, 308 (__mmask32) -1); 309} 310 311static __inline__ __m512i __DEFAULT_FN_ATTRS 312_mm512_mask_shrdv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) 313{ 314 return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S, 315 (__v8di) __A, 316 (__v8di) __B, 317 __U); 318} 319 320static __inline__ __m512i __DEFAULT_FN_ATTRS 321_mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B) 322{ 323 return (__m512i) __builtin_ia32_vpshrdvq512_maskz ((__v8di) __S, 324 (__v8di) __A, 325 (__v8di) __B, 326 __U); 327} 328 329static __inline__ __m512i __DEFAULT_FN_ATTRS 330_mm512_shrdv_epi64(__m512i __S, __m512i __A, __m512i __B) 331{ 332 return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S, 333 (__v8di) __A, 334 (__v8di) __B, 335 (__mmask8) -1); 336} 337 338static __inline__ __m512i __DEFAULT_FN_ATTRS 339_mm512_mask_shrdv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) 340{ 341 return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S, 342 (__v16si) __A, 343 (__v16si) __B, 344 __U); 345} 346 347static __inline__ __m512i __DEFAULT_FN_ATTRS 348_mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) 349{ 350 return (__m512i) __builtin_ia32_vpshrdvd512_maskz ((__v16si) __S, 351 (__v16si) __A, 352 (__v16si) __B, 353 __U); 354} 355 356static __inline__ __m512i __DEFAULT_FN_ATTRS 357_mm512_shrdv_epi32(__m512i __S, __m512i __A, __m512i __B) 358{ 359 return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S, 360 (__v16si) __A, 361 (__v16si) __B, 362 (__mmask16) -1); 363} 364 365 366static __inline__ __m512i __DEFAULT_FN_ATTRS 367_mm512_mask_shrdv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) 368{ 369 return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S, 370 (__v32hi) __A, 371 (__v32hi) __B, 372 __U); 373} 374 375static __inline__ __m512i __DEFAULT_FN_ATTRS 376_mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B) 377{ 378 return (__m512i) __builtin_ia32_vpshrdvw512_maskz ((__v32hi) __S, 379 (__v32hi) __A, 380 (__v32hi) __B, 381 __U); 382} 383 384static __inline__ __m512i __DEFAULT_FN_ATTRS 385_mm512_shrdv_epi16(__m512i __S, __m512i __A, __m512i __B) 386{ 387 return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S, 388 (__v32hi) __A, 389 (__v32hi) __B, 390 (__mmask32) -1); 391} 392 393 394#undef __DEFAULT_FN_ATTRS 395 396#endif 397 398