avx512vbmi2intrin.h revision 344779
1142425Snectar/*===------------- avx512vbmi2intrin.h - VBMI2 intrinsics ------------------=== 2160814Ssimon * 3142425Snectar * 4142425Snectar * Permission is hereby granted, free of charge, to any person obtaining a copy 5142425Snectar * of this software and associated documentation files (the "Software"), to deal 6142425Snectar * in the Software without restriction, including without limitation the rights 7142425Snectar * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8142425Snectar * copies of the Software, and to permit persons to whom the Software is 9142425Snectar * furnished to do so, subject to the following conditions: 10142425Snectar * 11142425Snectar * The above copyright notice and this permission notice shall be included in 12142425Snectar * all copies or substantial portions of the Software. 13142425Snectar * 14142425Snectar * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15142425Snectar * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16142425Snectar * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17142425Snectar * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18142425Snectar * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19142425Snectar * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20160814Ssimon * THE SOFTWARE. 21142425Snectar * 22142425Snectar *===-----------------------------------------------------------------------=== 23142425Snectar */ 24142425Snectar#ifndef __IMMINTRIN_H 25142425Snectar#error "Never use <avx512vbmi2intrin.h> directly; include <immintrin.h> instead." 26142425Snectar#endif 27142425Snectar 28142425Snectar#ifndef __AVX512VBMI2INTRIN_H 29142425Snectar#define __AVX512VBMI2INTRIN_H 30142425Snectar 31142425Snectar/* Define the default attributes for the functions in this file. */ 32142425Snectar#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2"), __min_vector_width__(512))) 33142425Snectar 34142425Snectar 35142425Snectarstatic __inline__ __m512i __DEFAULT_FN_ATTRS 36142425Snectar_mm512_mask_compress_epi16(__m512i __S, __mmask32 __U, __m512i __D) 37142425Snectar{ 38142425Snectar return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D, 39142425Snectar (__v32hi) __S, 40142425Snectar __U); 41142425Snectar} 42142425Snectar 43142425Snectarstatic __inline__ __m512i __DEFAULT_FN_ATTRS 44142425Snectar_mm512_maskz_compress_epi16(__mmask32 __U, __m512i __D) 45142425Snectar{ 46142425Snectar return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D, 47160814Ssimon (__v32hi) _mm512_setzero_si512(), 48160814Ssimon __U); 49160814Ssimon} 50160814Ssimon 51160814Ssimonstatic __inline__ __m512i __DEFAULT_FN_ATTRS 52160814Ssimon_mm512_mask_compress_epi8(__m512i __S, __mmask64 __U, __m512i __D) 53142425Snectar{ 54160814Ssimon return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D, 55160814Ssimon (__v64qi) __S, 56142425Snectar __U); 57142425Snectar} 58142425Snectar 59142425Snectarstatic __inline__ __m512i __DEFAULT_FN_ATTRS 60142425Snectar_mm512_maskz_compress_epi8(__mmask64 __U, __m512i __D) 61142425Snectar{ 62142425Snectar return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D, 63142425Snectar (__v64qi) _mm512_setzero_si512(), 64142425Snectar __U); 65160814Ssimon} 66160814Ssimon 67160814Ssimonstatic __inline__ void __DEFAULT_FN_ATTRS 68160814Ssimon_mm512_mask_compressstoreu_epi16(void *__P, __mmask32 __U, __m512i __D) 69160814Ssimon{ 70160814Ssimon __builtin_ia32_compressstorehi512_mask ((__v32hi *) __P, (__v32hi) __D, 71142425Snectar __U); 72142425Snectar} 73142425Snectar 74142425Snectarstatic __inline__ void __DEFAULT_FN_ATTRS 75142425Snectar_mm512_mask_compressstoreu_epi8(void *__P, __mmask64 __U, __m512i __D) 76142425Snectar{ 77142425Snectar __builtin_ia32_compressstoreqi512_mask ((__v64qi *) __P, (__v64qi) __D, 78142425Snectar __U); 79142425Snectar} 80142425Snectar 81142425Snectarstatic __inline__ __m512i __DEFAULT_FN_ATTRS 82142425Snectar_mm512_mask_expand_epi16(__m512i __S, __mmask32 __U, __m512i __D) 83142425Snectar{ 84142425Snectar return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D, 85160814Ssimon (__v32hi) __S, 86142425Snectar __U); 87142425Snectar} 88142425Snectar 89142425Snectarstatic __inline__ __m512i __DEFAULT_FN_ATTRS 90142425Snectar_mm512_maskz_expand_epi16(__mmask32 __U, __m512i __D) 91142425Snectar{ 92142425Snectar return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D, 93160814Ssimon (__v32hi) _mm512_setzero_si512(), 94142425Snectar __U); 95142425Snectar} 96142425Snectar 97142425Snectarstatic __inline__ __m512i __DEFAULT_FN_ATTRS 98142425Snectar_mm512_mask_expand_epi8(__m512i __S, __mmask64 __U, __m512i __D) 99142425Snectar{ 100142425Snectar return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D, 101142425Snectar (__v64qi) __S, 102142425Snectar __U); 103142425Snectar} 104142425Snectar 105142425Snectarstatic __inline__ __m512i __DEFAULT_FN_ATTRS 106142425Snectar_mm512_maskz_expand_epi8(__mmask64 __U, __m512i __D) 107142425Snectar{ 108 return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D, 109 (__v64qi) _mm512_setzero_si512(), 110 __U); 111} 112 113static __inline__ __m512i __DEFAULT_FN_ATTRS 114_mm512_mask_expandloadu_epi16(__m512i __S, __mmask32 __U, void const *__P) 115{ 116 return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P, 117 (__v32hi) __S, 118 __U); 119} 120 121static __inline__ __m512i __DEFAULT_FN_ATTRS 122_mm512_maskz_expandloadu_epi16(__mmask32 __U, void const *__P) 123{ 124 return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P, 125 (__v32hi) _mm512_setzero_si512(), 126 __U); 127} 128 129static __inline__ __m512i __DEFAULT_FN_ATTRS 130_mm512_mask_expandloadu_epi8(__m512i __S, __mmask64 __U, void const *__P) 131{ 132 return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P, 133 (__v64qi) __S, 134 __U); 135} 136 137static __inline__ __m512i __DEFAULT_FN_ATTRS 138_mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) 139{ 140 return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P, 141 (__v64qi) _mm512_setzero_si512(), 142 __U); 143} 144 145#define _mm512_shldi_epi64(A, B, I) \ 146 (__m512i)__builtin_ia32_vpshldq512((__v8di)(__m512i)(A), \ 147 (__v8di)(__m512i)(B), (int)(I)) 148 149#define _mm512_mask_shldi_epi64(S, U, A, B, I) \ 150 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 151 (__v8di)_mm512_shldi_epi64((A), (B), (I)), \ 152 (__v8di)(__m512i)(S)) 153 154#define _mm512_maskz_shldi_epi64(U, A, B, I) \ 155 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 156 (__v8di)_mm512_shldi_epi64((A), (B), (I)), \ 157 (__v8di)_mm512_setzero_si512()) 158 159#define _mm512_shldi_epi32(A, B, I) \ 160 (__m512i)__builtin_ia32_vpshldd512((__v16si)(__m512i)(A), \ 161 (__v16si)(__m512i)(B), (int)(I)) 162 163#define _mm512_mask_shldi_epi32(S, U, A, B, I) \ 164 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 165 (__v16si)_mm512_shldi_epi32((A), (B), (I)), \ 166 (__v16si)(__m512i)(S)) 167 168#define _mm512_maskz_shldi_epi32(U, A, B, I) \ 169 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 170 (__v16si)_mm512_shldi_epi32((A), (B), (I)), \ 171 (__v16si)_mm512_setzero_si512()) 172 173#define _mm512_shldi_epi16(A, B, I) \ 174 (__m512i)__builtin_ia32_vpshldw512((__v32hi)(__m512i)(A), \ 175 (__v32hi)(__m512i)(B), (int)(I)) 176 177#define _mm512_mask_shldi_epi16(S, U, A, B, I) \ 178 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 179 (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \ 180 (__v32hi)(__m512i)(S)) 181 182#define _mm512_maskz_shldi_epi16(U, A, B, I) \ 183 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 184 (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \ 185 (__v32hi)_mm512_setzero_si512()) 186 187#define _mm512_shrdi_epi64(A, B, I) \ 188 (__m512i)__builtin_ia32_vpshrdq512((__v8di)(__m512i)(A), \ 189 (__v8di)(__m512i)(B), (int)(I)) 190 191#define _mm512_mask_shrdi_epi64(S, U, A, B, I) \ 192 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 193 (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \ 194 (__v8di)(__m512i)(S)) 195 196#define _mm512_maskz_shrdi_epi64(U, A, B, I) \ 197 (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 198 (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \ 199 (__v8di)_mm512_setzero_si512()) 200 201#define _mm512_shrdi_epi32(A, B, I) \ 202 (__m512i)__builtin_ia32_vpshrdd512((__v16si)(__m512i)(A), \ 203 (__v16si)(__m512i)(B), (int)(I)) 204 205#define _mm512_mask_shrdi_epi32(S, U, A, B, I) \ 206 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 207 (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \ 208 (__v16si)(__m512i)(S)) 209 210#define _mm512_maskz_shrdi_epi32(U, A, B, I) \ 211 (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 212 (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \ 213 (__v16si)_mm512_setzero_si512()) 214 215#define _mm512_shrdi_epi16(A, B, I) \ 216 (__m512i)__builtin_ia32_vpshrdw512((__v32hi)(__m512i)(A), \ 217 (__v32hi)(__m512i)(B), (int)(I)) 218 219#define _mm512_mask_shrdi_epi16(S, U, A, B, I) \ 220 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 221 (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \ 222 (__v32hi)(__m512i)(S)) 223 224#define _mm512_maskz_shrdi_epi16(U, A, B, I) \ 225 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 226 (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \ 227 (__v32hi)_mm512_setzero_si512()) 228 229static __inline__ __m512i __DEFAULT_FN_ATTRS 230_mm512_shldv_epi64(__m512i __A, __m512i __B, __m512i __C) 231{ 232 return (__m512i)__builtin_ia32_vpshldvq512((__v8di)__A, (__v8di)__B, 233 (__v8di)__C); 234} 235 236static __inline__ __m512i __DEFAULT_FN_ATTRS 237_mm512_mask_shldv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C) 238{ 239 return (__m512i)__builtin_ia32_selectq_512(__U, 240 (__v8di)_mm512_shldv_epi64(__A, __B, __C), 241 (__v8di)__A); 242} 243 244static __inline__ __m512i __DEFAULT_FN_ATTRS 245_mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C) 246{ 247 return (__m512i)__builtin_ia32_selectq_512(__U, 248 (__v8di)_mm512_shldv_epi64(__A, __B, __C), 249 (__v8di)_mm512_setzero_si512()); 250} 251 252static __inline__ __m512i __DEFAULT_FN_ATTRS 253_mm512_shldv_epi32(__m512i __A, __m512i __B, __m512i __C) 254{ 255 return (__m512i)__builtin_ia32_vpshldvd512((__v16si)__A, (__v16si)__B, 256 (__v16si)__C); 257} 258 259static __inline__ __m512i __DEFAULT_FN_ATTRS 260_mm512_mask_shldv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) 261{ 262 return (__m512i)__builtin_ia32_selectd_512(__U, 263 (__v16si)_mm512_shldv_epi32(__A, __B, __C), 264 (__v16si)__A); 265} 266 267static __inline__ __m512i __DEFAULT_FN_ATTRS 268_mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) 269{ 270 return (__m512i)__builtin_ia32_selectd_512(__U, 271 (__v16si)_mm512_shldv_epi32(__A, __B, __C), 272 (__v16si)_mm512_setzero_si512()); 273} 274 275static __inline__ __m512i __DEFAULT_FN_ATTRS 276_mm512_shldv_epi16(__m512i __A, __m512i __B, __m512i __C) 277{ 278 return (__m512i)__builtin_ia32_vpshldvw512((__v32hi)__A, (__v32hi)__B, 279 (__v32hi)__C); 280} 281 282static __inline__ __m512i __DEFAULT_FN_ATTRS 283_mm512_mask_shldv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C) 284{ 285 return (__m512i)__builtin_ia32_selectw_512(__U, 286 (__v32hi)_mm512_shldv_epi16(__A, __B, __C), 287 (__v32hi)__A); 288} 289 290static __inline__ __m512i __DEFAULT_FN_ATTRS 291_mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C) 292{ 293 return (__m512i)__builtin_ia32_selectw_512(__U, 294 (__v32hi)_mm512_shldv_epi16(__A, __B, __C), 295 (__v32hi)_mm512_setzero_si512()); 296} 297 298static __inline__ __m512i __DEFAULT_FN_ATTRS 299_mm512_shrdv_epi64(__m512i __A, __m512i __B, __m512i __C) 300{ 301 return (__m512i)__builtin_ia32_vpshrdvq512((__v8di)__A, (__v8di)__B, 302 (__v8di)__C); 303} 304 305static __inline__ __m512i __DEFAULT_FN_ATTRS 306_mm512_mask_shrdv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C) 307{ 308 return (__m512i)__builtin_ia32_selectq_512(__U, 309 (__v8di)_mm512_shrdv_epi64(__A, __B, __C), 310 (__v8di)__A); 311} 312 313static __inline__ __m512i __DEFAULT_FN_ATTRS 314_mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C) 315{ 316 return (__m512i)__builtin_ia32_selectq_512(__U, 317 (__v8di)_mm512_shrdv_epi64(__A, __B, __C), 318 (__v8di)_mm512_setzero_si512()); 319} 320 321static __inline__ __m512i __DEFAULT_FN_ATTRS 322_mm512_shrdv_epi32(__m512i __A, __m512i __B, __m512i __C) 323{ 324 return (__m512i)__builtin_ia32_vpshrdvd512((__v16si)__A, (__v16si)__B, 325 (__v16si)__C); 326} 327 328static __inline__ __m512i __DEFAULT_FN_ATTRS 329_mm512_mask_shrdv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) 330{ 331 return (__m512i) __builtin_ia32_selectd_512(__U, 332 (__v16si)_mm512_shrdv_epi32(__A, __B, __C), 333 (__v16si)__A); 334} 335 336static __inline__ __m512i __DEFAULT_FN_ATTRS 337_mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) 338{ 339 return (__m512i) __builtin_ia32_selectd_512(__U, 340 (__v16si)_mm512_shrdv_epi32(__A, __B, __C), 341 (__v16si)_mm512_setzero_si512()); 342} 343 344static __inline__ __m512i __DEFAULT_FN_ATTRS 345_mm512_shrdv_epi16(__m512i __A, __m512i __B, __m512i __C) 346{ 347 return (__m512i)__builtin_ia32_vpshrdvw512((__v32hi)__A, (__v32hi)__B, 348 (__v32hi)__C); 349} 350 351static __inline__ __m512i __DEFAULT_FN_ATTRS 352_mm512_mask_shrdv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C) 353{ 354 return (__m512i)__builtin_ia32_selectw_512(__U, 355 (__v32hi)_mm512_shrdv_epi16(__A, __B, __C), 356 (__v32hi)__A); 357} 358 359static __inline__ __m512i __DEFAULT_FN_ATTRS 360_mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C) 361{ 362 return (__m512i)__builtin_ia32_selectw_512(__U, 363 (__v32hi)_mm512_shrdv_epi16(__A, __B, __C), 364 (__v32hi)_mm512_setzero_si512()); 365} 366 367 368#undef __DEFAULT_FN_ATTRS 369 370#endif 371 372