1327302Sdim/*===------------- avx512vbmi2intrin.h - VBMI2 intrinsics ------------------=== 2327302Sdim * 3327302Sdim * 4353358Sdim * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5353358Sdim * See https://llvm.org/LICENSE.txt for license information. 6353358Sdim * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7327302Sdim * 8327302Sdim *===-----------------------------------------------------------------------=== 9327302Sdim */ 10327302Sdim#ifndef __IMMINTRIN_H 11327302Sdim#error "Never use <avx512vbmi2intrin.h> directly; include <immintrin.h> instead." 12327302Sdim#endif 13327302Sdim 14327302Sdim#ifndef __AVX512VBMI2INTRIN_H 15327302Sdim#define __AVX512VBMI2INTRIN_H 16327302Sdim 17327302Sdim/* Define the default attributes for the functions in this file. */ 18341825Sdim#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2"), __min_vector_width__(512))) 19327302Sdim 20327302Sdim 21327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 22327302Sdim_mm512_mask_compress_epi16(__m512i __S, __mmask32 __U, __m512i __D) 23327302Sdim{ 24327302Sdim return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D, 25327302Sdim (__v32hi) __S, 26327302Sdim __U); 27327302Sdim} 28327302Sdim 29327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 30327302Sdim_mm512_maskz_compress_epi16(__mmask32 __U, __m512i __D) 31327302Sdim{ 32327302Sdim return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D, 33341825Sdim (__v32hi) _mm512_setzero_si512(), 34327302Sdim __U); 35327302Sdim} 36327302Sdim 37327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 38327302Sdim_mm512_mask_compress_epi8(__m512i __S, __mmask64 __U, __m512i __D) 39327302Sdim{ 40327302Sdim return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D, 41327302Sdim (__v64qi) __S, 42327302Sdim __U); 43327302Sdim} 44327302Sdim 45327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 46327302Sdim_mm512_maskz_compress_epi8(__mmask64 __U, __m512i __D) 47327302Sdim{ 48327302Sdim return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D, 49341825Sdim (__v64qi) _mm512_setzero_si512(), 50327302Sdim __U); 51327302Sdim} 52327302Sdim 53327302Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 54327302Sdim_mm512_mask_compressstoreu_epi16(void *__P, __mmask32 __U, __m512i __D) 55327302Sdim{ 56327302Sdim __builtin_ia32_compressstorehi512_mask ((__v32hi *) __P, (__v32hi) __D, 57327302Sdim __U); 58327302Sdim} 59327302Sdim 60327302Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 61327302Sdim_mm512_mask_compressstoreu_epi8(void *__P, __mmask64 __U, __m512i __D) 62327302Sdim{ 63327302Sdim __builtin_ia32_compressstoreqi512_mask ((__v64qi *) __P, (__v64qi) __D, 64327302Sdim __U); 65327302Sdim} 66327302Sdim 67327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 68327302Sdim_mm512_mask_expand_epi16(__m512i __S, __mmask32 __U, __m512i __D) 69327302Sdim{ 70327302Sdim return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D, 71327302Sdim (__v32hi) __S, 72327302Sdim __U); 73327302Sdim} 74327302Sdim 75327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 76327302Sdim_mm512_maskz_expand_epi16(__mmask32 __U, __m512i __D) 77327302Sdim{ 78327302Sdim return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D, 79341825Sdim (__v32hi) _mm512_setzero_si512(), 80327302Sdim __U); 81327302Sdim} 82327302Sdim 83327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 84327302Sdim_mm512_mask_expand_epi8(__m512i __S, __mmask64 __U, __m512i __D) 85327302Sdim{ 86327302Sdim return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D, 87327302Sdim (__v64qi) __S, 88327302Sdim __U); 89327302Sdim} 90327302Sdim 91327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 92327302Sdim_mm512_maskz_expand_epi8(__mmask64 __U, __m512i __D) 93327302Sdim{ 94327302Sdim return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D, 95341825Sdim (__v64qi) _mm512_setzero_si512(), 96327302Sdim __U); 97327302Sdim} 98327302Sdim 99327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 100327302Sdim_mm512_mask_expandloadu_epi16(__m512i __S, __mmask32 __U, void const *__P) 101327302Sdim{ 102327302Sdim return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P, 103327302Sdim (__v32hi) __S, 104327302Sdim __U); 105327302Sdim} 106327302Sdim 107327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 108327302Sdim_mm512_maskz_expandloadu_epi16(__mmask32 __U, void const *__P) 109327302Sdim{ 110327302Sdim return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P, 111341825Sdim (__v32hi) _mm512_setzero_si512(), 112327302Sdim __U); 113327302Sdim} 114327302Sdim 115327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 116327302Sdim_mm512_mask_expandloadu_epi8(__m512i __S, __mmask64 __U, void const *__P) 117327302Sdim{ 118327302Sdim return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P, 119327302Sdim (__v64qi) __S, 120327302Sdim __U); 121327302Sdim} 122327302Sdim 123327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 124327302Sdim_mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) 125327302Sdim{ 126327302Sdim return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P, 127341825Sdim (__v64qi) _mm512_setzero_si512(), 128327302Sdim __U); 129327302Sdim} 130327302Sdim 131341825Sdim#define _mm512_shldi_epi64(A, B, I) \ 132341825Sdim (__m512i)__builtin_ia32_vpshldq512((__v8di)(__m512i)(A), \ 133341825Sdim (__v8di)(__m512i)(B), (int)(I)) 134327302Sdim 135341825Sdim#define _mm512_mask_shldi_epi64(S, U, A, B, I) \ 136341825Sdim (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 137341825Sdim (__v8di)_mm512_shldi_epi64((A), (B), (I)), \ 138341825Sdim (__v8di)(__m512i)(S)) 139341825Sdim 140327302Sdim#define _mm512_maskz_shldi_epi64(U, A, B, I) \ 141341825Sdim (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 142341825Sdim (__v8di)_mm512_shldi_epi64((A), (B), (I)), \ 143341825Sdim (__v8di)_mm512_setzero_si512()) 144327302Sdim 145341825Sdim#define _mm512_shldi_epi32(A, B, I) \ 146341825Sdim (__m512i)__builtin_ia32_vpshldd512((__v16si)(__m512i)(A), \ 147341825Sdim (__v16si)(__m512i)(B), (int)(I)) 148327302Sdim 149341825Sdim#define _mm512_mask_shldi_epi32(S, U, A, B, I) \ 150341825Sdim (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 151341825Sdim (__v16si)_mm512_shldi_epi32((A), (B), (I)), \ 152341825Sdim (__v16si)(__m512i)(S)) 153327302Sdim 154327302Sdim#define _mm512_maskz_shldi_epi32(U, A, B, I) \ 155341825Sdim (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 156341825Sdim (__v16si)_mm512_shldi_epi32((A), (B), (I)), \ 157341825Sdim (__v16si)_mm512_setzero_si512()) 158327302Sdim 159341825Sdim#define _mm512_shldi_epi16(A, B, I) \ 160341825Sdim (__m512i)__builtin_ia32_vpshldw512((__v32hi)(__m512i)(A), \ 161341825Sdim (__v32hi)(__m512i)(B), (int)(I)) 162327302Sdim 163341825Sdim#define _mm512_mask_shldi_epi16(S, U, A, B, I) \ 164341825Sdim (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 165341825Sdim (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \ 166341825Sdim (__v32hi)(__m512i)(S)) 167327302Sdim 168327302Sdim#define _mm512_maskz_shldi_epi16(U, A, B, I) \ 169341825Sdim (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 170341825Sdim (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \ 171341825Sdim (__v32hi)_mm512_setzero_si512()) 172327302Sdim 173341825Sdim#define _mm512_shrdi_epi64(A, B, I) \ 174341825Sdim (__m512i)__builtin_ia32_vpshrdq512((__v8di)(__m512i)(A), \ 175341825Sdim (__v8di)(__m512i)(B), (int)(I)) 176327302Sdim 177341825Sdim#define _mm512_mask_shrdi_epi64(S, U, A, B, I) \ 178341825Sdim (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 179341825Sdim (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \ 180341825Sdim (__v8di)(__m512i)(S)) 181327302Sdim 182327302Sdim#define _mm512_maskz_shrdi_epi64(U, A, B, I) \ 183341825Sdim (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 184341825Sdim (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \ 185341825Sdim (__v8di)_mm512_setzero_si512()) 186327302Sdim 187341825Sdim#define _mm512_shrdi_epi32(A, B, I) \ 188341825Sdim (__m512i)__builtin_ia32_vpshrdd512((__v16si)(__m512i)(A), \ 189341825Sdim (__v16si)(__m512i)(B), (int)(I)) 190327302Sdim 191341825Sdim#define _mm512_mask_shrdi_epi32(S, U, A, B, I) \ 192341825Sdim (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 193341825Sdim (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \ 194341825Sdim (__v16si)(__m512i)(S)) 195327302Sdim 196327302Sdim#define _mm512_maskz_shrdi_epi32(U, A, B, I) \ 197341825Sdim (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 198341825Sdim (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \ 199341825Sdim (__v16si)_mm512_setzero_si512()) 200327302Sdim 201341825Sdim#define _mm512_shrdi_epi16(A, B, I) \ 202341825Sdim (__m512i)__builtin_ia32_vpshrdw512((__v32hi)(__m512i)(A), \ 203341825Sdim (__v32hi)(__m512i)(B), (int)(I)) 204327302Sdim 205341825Sdim#define _mm512_mask_shrdi_epi16(S, U, A, B, I) \ 206341825Sdim (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 207341825Sdim (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \ 208341825Sdim (__v32hi)(__m512i)(S)) 209327302Sdim 210327302Sdim#define _mm512_maskz_shrdi_epi16(U, A, B, I) \ 211341825Sdim (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 212341825Sdim (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \ 213341825Sdim (__v32hi)_mm512_setzero_si512()) 214327302Sdim 215327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 216344779Sdim_mm512_shldv_epi64(__m512i __A, __m512i __B, __m512i __C) 217327302Sdim{ 218344779Sdim return (__m512i)__builtin_ia32_vpshldvq512((__v8di)__A, (__v8di)__B, 219344779Sdim (__v8di)__C); 220327302Sdim} 221327302Sdim 222327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 223344779Sdim_mm512_mask_shldv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C) 224327302Sdim{ 225344779Sdim return (__m512i)__builtin_ia32_selectq_512(__U, 226344779Sdim (__v8di)_mm512_shldv_epi64(__A, __B, __C), 227344779Sdim (__v8di)__A); 228327302Sdim} 229327302Sdim 230327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 231344779Sdim_mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C) 232327302Sdim{ 233344779Sdim return (__m512i)__builtin_ia32_selectq_512(__U, 234344779Sdim (__v8di)_mm512_shldv_epi64(__A, __B, __C), 235344779Sdim (__v8di)_mm512_setzero_si512()); 236327302Sdim} 237327302Sdim 238327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 239344779Sdim_mm512_shldv_epi32(__m512i __A, __m512i __B, __m512i __C) 240327302Sdim{ 241344779Sdim return (__m512i)__builtin_ia32_vpshldvd512((__v16si)__A, (__v16si)__B, 242344779Sdim (__v16si)__C); 243327302Sdim} 244327302Sdim 245327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 246344779Sdim_mm512_mask_shldv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) 247327302Sdim{ 248344779Sdim return (__m512i)__builtin_ia32_selectd_512(__U, 249344779Sdim (__v16si)_mm512_shldv_epi32(__A, __B, __C), 250344779Sdim (__v16si)__A); 251327302Sdim} 252327302Sdim 253327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 254344779Sdim_mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) 255327302Sdim{ 256344779Sdim return (__m512i)__builtin_ia32_selectd_512(__U, 257344779Sdim (__v16si)_mm512_shldv_epi32(__A, __B, __C), 258344779Sdim (__v16si)_mm512_setzero_si512()); 259327302Sdim} 260327302Sdim 261327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 262344779Sdim_mm512_shldv_epi16(__m512i __A, __m512i __B, __m512i __C) 263327302Sdim{ 264344779Sdim return (__m512i)__builtin_ia32_vpshldvw512((__v32hi)__A, (__v32hi)__B, 265344779Sdim (__v32hi)__C); 266327302Sdim} 267327302Sdim 268327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 269344779Sdim_mm512_mask_shldv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C) 270327302Sdim{ 271344779Sdim return (__m512i)__builtin_ia32_selectw_512(__U, 272344779Sdim (__v32hi)_mm512_shldv_epi16(__A, __B, __C), 273344779Sdim (__v32hi)__A); 274327302Sdim} 275327302Sdim 276327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 277344779Sdim_mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C) 278327302Sdim{ 279344779Sdim return (__m512i)__builtin_ia32_selectw_512(__U, 280344779Sdim (__v32hi)_mm512_shldv_epi16(__A, __B, __C), 281344779Sdim (__v32hi)_mm512_setzero_si512()); 282327302Sdim} 283327302Sdim 284327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 285344779Sdim_mm512_shrdv_epi64(__m512i __A, __m512i __B, __m512i __C) 286327302Sdim{ 287344779Sdim return (__m512i)__builtin_ia32_vpshrdvq512((__v8di)__A, (__v8di)__B, 288344779Sdim (__v8di)__C); 289327302Sdim} 290327302Sdim 291327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 292344779Sdim_mm512_mask_shrdv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C) 293327302Sdim{ 294344779Sdim return (__m512i)__builtin_ia32_selectq_512(__U, 295344779Sdim (__v8di)_mm512_shrdv_epi64(__A, __B, __C), 296344779Sdim (__v8di)__A); 297327302Sdim} 298327302Sdim 299327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 300344779Sdim_mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C) 301327302Sdim{ 302344779Sdim return (__m512i)__builtin_ia32_selectq_512(__U, 303344779Sdim (__v8di)_mm512_shrdv_epi64(__A, __B, __C), 304344779Sdim (__v8di)_mm512_setzero_si512()); 305327302Sdim} 306327302Sdim 307327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 308344779Sdim_mm512_shrdv_epi32(__m512i __A, __m512i __B, __m512i __C) 309327302Sdim{ 310344779Sdim return (__m512i)__builtin_ia32_vpshrdvd512((__v16si)__A, (__v16si)__B, 311344779Sdim (__v16si)__C); 312327302Sdim} 313327302Sdim 314327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 315344779Sdim_mm512_mask_shrdv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C) 316327302Sdim{ 317344779Sdim return (__m512i) __builtin_ia32_selectd_512(__U, 318344779Sdim (__v16si)_mm512_shrdv_epi32(__A, __B, __C), 319344779Sdim (__v16si)__A); 320327302Sdim} 321327302Sdim 322327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 323344779Sdim_mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C) 324327302Sdim{ 325344779Sdim return (__m512i) __builtin_ia32_selectd_512(__U, 326344779Sdim (__v16si)_mm512_shrdv_epi32(__A, __B, __C), 327344779Sdim (__v16si)_mm512_setzero_si512()); 328327302Sdim} 329327302Sdim 330327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 331344779Sdim_mm512_shrdv_epi16(__m512i __A, __m512i __B, __m512i __C) 332327302Sdim{ 333344779Sdim return (__m512i)__builtin_ia32_vpshrdvw512((__v32hi)__A, (__v32hi)__B, 334344779Sdim (__v32hi)__C); 335327302Sdim} 336327302Sdim 337327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 338344779Sdim_mm512_mask_shrdv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C) 339327302Sdim{ 340344779Sdim return (__m512i)__builtin_ia32_selectw_512(__U, 341344779Sdim (__v32hi)_mm512_shrdv_epi16(__A, __B, __C), 342344779Sdim (__v32hi)__A); 343327302Sdim} 344327302Sdim 345327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 346344779Sdim_mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C) 347327302Sdim{ 348344779Sdim return (__m512i)__builtin_ia32_selectw_512(__U, 349344779Sdim (__v32hi)_mm512_shrdv_epi16(__A, __B, __C), 350344779Sdim (__v32hi)_mm512_setzero_si512()); 351327302Sdim} 352327302Sdim 353327302Sdim 354327302Sdim#undef __DEFAULT_FN_ATTRS 355327302Sdim 356327302Sdim#endif 357327302Sdim 358