avx512vbmi2intrin.h revision 341825
1327302Sdim/*===------------- avx512vbmi2intrin.h - VBMI2 intrinsics ------------------=== 2327302Sdim * 3327302Sdim * 4327302Sdim * Permission is hereby granted, free of charge, to any person obtaining a copy 5327302Sdim * of this software and associated documentation files (the "Software"), to deal 6327302Sdim * in the Software without restriction, including without limitation the rights 7327302Sdim * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8327302Sdim * copies of the Software, and to permit persons to whom the Software is 9327302Sdim * furnished to do so, subject to the following conditions: 10327302Sdim * 11327302Sdim * The above copyright notice and this permission notice shall be included in 12327302Sdim * all copies or substantial portions of the Software. 13327302Sdim * 14327302Sdim * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15327302Sdim * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16327302Sdim * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17327302Sdim * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18327302Sdim * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19327302Sdim * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20327302Sdim * THE SOFTWARE. 21327302Sdim * 22327302Sdim *===-----------------------------------------------------------------------=== 23327302Sdim */ 24327302Sdim#ifndef __IMMINTRIN_H 25327302Sdim#error "Never use <avx512vbmi2intrin.h> directly; include <immintrin.h> instead." 26327302Sdim#endif 27327302Sdim 28327302Sdim#ifndef __AVX512VBMI2INTRIN_H 29327302Sdim#define __AVX512VBMI2INTRIN_H 30327302Sdim 31327302Sdim/* Define the default attributes for the functions in this file. */ 32341825Sdim#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2"), __min_vector_width__(512))) 33327302Sdim 34327302Sdim 35327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 36327302Sdim_mm512_mask_compress_epi16(__m512i __S, __mmask32 __U, __m512i __D) 37327302Sdim{ 38327302Sdim return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D, 39327302Sdim (__v32hi) __S, 40327302Sdim __U); 41327302Sdim} 42327302Sdim 43327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 44327302Sdim_mm512_maskz_compress_epi16(__mmask32 __U, __m512i __D) 45327302Sdim{ 46327302Sdim return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D, 47341825Sdim (__v32hi) _mm512_setzero_si512(), 48327302Sdim __U); 49327302Sdim} 50327302Sdim 51327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 52327302Sdim_mm512_mask_compress_epi8(__m512i __S, __mmask64 __U, __m512i __D) 53327302Sdim{ 54327302Sdim return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D, 55327302Sdim (__v64qi) __S, 56327302Sdim __U); 57327302Sdim} 58327302Sdim 59327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 60327302Sdim_mm512_maskz_compress_epi8(__mmask64 __U, __m512i __D) 61327302Sdim{ 62327302Sdim return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D, 63341825Sdim (__v64qi) _mm512_setzero_si512(), 64327302Sdim __U); 65327302Sdim} 66327302Sdim 67327302Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 68327302Sdim_mm512_mask_compressstoreu_epi16(void *__P, __mmask32 __U, __m512i __D) 69327302Sdim{ 70327302Sdim __builtin_ia32_compressstorehi512_mask ((__v32hi *) __P, (__v32hi) __D, 71327302Sdim __U); 72327302Sdim} 73327302Sdim 74327302Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 75327302Sdim_mm512_mask_compressstoreu_epi8(void *__P, __mmask64 __U, __m512i __D) 76327302Sdim{ 77327302Sdim __builtin_ia32_compressstoreqi512_mask ((__v64qi *) __P, (__v64qi) __D, 78327302Sdim __U); 79327302Sdim} 80327302Sdim 81327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 82327302Sdim_mm512_mask_expand_epi16(__m512i __S, __mmask32 __U, __m512i __D) 83327302Sdim{ 84327302Sdim return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D, 85327302Sdim (__v32hi) __S, 86327302Sdim __U); 87327302Sdim} 88327302Sdim 89327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 90327302Sdim_mm512_maskz_expand_epi16(__mmask32 __U, __m512i __D) 91327302Sdim{ 92327302Sdim return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D, 93341825Sdim (__v32hi) _mm512_setzero_si512(), 94327302Sdim __U); 95327302Sdim} 96327302Sdim 97327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 98327302Sdim_mm512_mask_expand_epi8(__m512i __S, __mmask64 __U, __m512i __D) 99327302Sdim{ 100327302Sdim return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D, 101327302Sdim (__v64qi) __S, 102327302Sdim __U); 103327302Sdim} 104327302Sdim 105327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 106327302Sdim_mm512_maskz_expand_epi8(__mmask64 __U, __m512i __D) 107327302Sdim{ 108327302Sdim return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D, 109341825Sdim (__v64qi) _mm512_setzero_si512(), 110327302Sdim __U); 111327302Sdim} 112327302Sdim 113327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 114327302Sdim_mm512_mask_expandloadu_epi16(__m512i __S, __mmask32 __U, void const *__P) 115327302Sdim{ 116327302Sdim return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P, 117327302Sdim (__v32hi) __S, 118327302Sdim __U); 119327302Sdim} 120327302Sdim 121327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 122327302Sdim_mm512_maskz_expandloadu_epi16(__mmask32 __U, void const *__P) 123327302Sdim{ 124327302Sdim return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P, 125341825Sdim (__v32hi) _mm512_setzero_si512(), 126327302Sdim __U); 127327302Sdim} 128327302Sdim 129327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 130327302Sdim_mm512_mask_expandloadu_epi8(__m512i __S, __mmask64 __U, void const *__P) 131327302Sdim{ 132327302Sdim return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P, 133327302Sdim (__v64qi) __S, 134327302Sdim __U); 135327302Sdim} 136327302Sdim 137327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 138327302Sdim_mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) 139327302Sdim{ 140327302Sdim return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P, 141341825Sdim (__v64qi) _mm512_setzero_si512(), 142327302Sdim __U); 143327302Sdim} 144327302Sdim 145341825Sdim#define _mm512_shldi_epi64(A, B, I) \ 146341825Sdim (__m512i)__builtin_ia32_vpshldq512((__v8di)(__m512i)(A), \ 147341825Sdim (__v8di)(__m512i)(B), (int)(I)) 148327302Sdim 149341825Sdim#define _mm512_mask_shldi_epi64(S, U, A, B, I) \ 150341825Sdim (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 151341825Sdim (__v8di)_mm512_shldi_epi64((A), (B), (I)), \ 152341825Sdim (__v8di)(__m512i)(S)) 153341825Sdim 154327302Sdim#define _mm512_maskz_shldi_epi64(U, A, B, I) \ 155341825Sdim (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 156341825Sdim (__v8di)_mm512_shldi_epi64((A), (B), (I)), \ 157341825Sdim (__v8di)_mm512_setzero_si512()) 158327302Sdim 159341825Sdim#define _mm512_shldi_epi32(A, B, I) \ 160341825Sdim (__m512i)__builtin_ia32_vpshldd512((__v16si)(__m512i)(A), \ 161341825Sdim (__v16si)(__m512i)(B), (int)(I)) 162327302Sdim 163341825Sdim#define _mm512_mask_shldi_epi32(S, U, A, B, I) \ 164341825Sdim (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 165341825Sdim (__v16si)_mm512_shldi_epi32((A), (B), (I)), \ 166341825Sdim (__v16si)(__m512i)(S)) 167327302Sdim 168327302Sdim#define _mm512_maskz_shldi_epi32(U, A, B, I) \ 169341825Sdim (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 170341825Sdim (__v16si)_mm512_shldi_epi32((A), (B), (I)), \ 171341825Sdim (__v16si)_mm512_setzero_si512()) 172327302Sdim 173341825Sdim#define _mm512_shldi_epi16(A, B, I) \ 174341825Sdim (__m512i)__builtin_ia32_vpshldw512((__v32hi)(__m512i)(A), \ 175341825Sdim (__v32hi)(__m512i)(B), (int)(I)) 176327302Sdim 177341825Sdim#define _mm512_mask_shldi_epi16(S, U, A, B, I) \ 178341825Sdim (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 179341825Sdim (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \ 180341825Sdim (__v32hi)(__m512i)(S)) 181327302Sdim 182327302Sdim#define _mm512_maskz_shldi_epi16(U, A, B, I) \ 183341825Sdim (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 184341825Sdim (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \ 185341825Sdim (__v32hi)_mm512_setzero_si512()) 186327302Sdim 187341825Sdim#define _mm512_shrdi_epi64(A, B, I) \ 188341825Sdim (__m512i)__builtin_ia32_vpshrdq512((__v8di)(__m512i)(A), \ 189341825Sdim (__v8di)(__m512i)(B), (int)(I)) 190327302Sdim 191341825Sdim#define _mm512_mask_shrdi_epi64(S, U, A, B, I) \ 192341825Sdim (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 193341825Sdim (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \ 194341825Sdim (__v8di)(__m512i)(S)) 195327302Sdim 196327302Sdim#define _mm512_maskz_shrdi_epi64(U, A, B, I) \ 197341825Sdim (__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \ 198341825Sdim (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \ 199341825Sdim (__v8di)_mm512_setzero_si512()) 200327302Sdim 201341825Sdim#define _mm512_shrdi_epi32(A, B, I) \ 202341825Sdim (__m512i)__builtin_ia32_vpshrdd512((__v16si)(__m512i)(A), \ 203341825Sdim (__v16si)(__m512i)(B), (int)(I)) 204327302Sdim 205341825Sdim#define _mm512_mask_shrdi_epi32(S, U, A, B, I) \ 206341825Sdim (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 207341825Sdim (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \ 208341825Sdim (__v16si)(__m512i)(S)) 209327302Sdim 210327302Sdim#define _mm512_maskz_shrdi_epi32(U, A, B, I) \ 211341825Sdim (__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \ 212341825Sdim (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \ 213341825Sdim (__v16si)_mm512_setzero_si512()) 214327302Sdim 215341825Sdim#define _mm512_shrdi_epi16(A, B, I) \ 216341825Sdim (__m512i)__builtin_ia32_vpshrdw512((__v32hi)(__m512i)(A), \ 217341825Sdim (__v32hi)(__m512i)(B), (int)(I)) 218327302Sdim 219341825Sdim#define _mm512_mask_shrdi_epi16(S, U, A, B, I) \ 220341825Sdim (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 221341825Sdim (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \ 222341825Sdim (__v32hi)(__m512i)(S)) 223327302Sdim 224327302Sdim#define _mm512_maskz_shrdi_epi16(U, A, B, I) \ 225341825Sdim (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 226341825Sdim (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \ 227341825Sdim (__v32hi)_mm512_setzero_si512()) 228327302Sdim 229327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 230327302Sdim_mm512_mask_shldv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) 231327302Sdim{ 232327302Sdim return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S, 233327302Sdim (__v8di) __A, 234327302Sdim (__v8di) __B, 235327302Sdim __U); 236327302Sdim} 237327302Sdim 238327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 239327302Sdim_mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B) 240327302Sdim{ 241327302Sdim return (__m512i) __builtin_ia32_vpshldvq512_maskz ((__v8di) __S, 242327302Sdim (__v8di) __A, 243327302Sdim (__v8di) __B, 244327302Sdim __U); 245327302Sdim} 246327302Sdim 247327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 248327302Sdim_mm512_shldv_epi64(__m512i __S, __m512i __A, __m512i __B) 249327302Sdim{ 250327302Sdim return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S, 251327302Sdim (__v8di) __A, 252327302Sdim (__v8di) __B, 253327302Sdim (__mmask8) -1); 254327302Sdim} 255327302Sdim 256327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 257327302Sdim_mm512_mask_shldv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) 258327302Sdim{ 259327302Sdim return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S, 260327302Sdim (__v16si) __A, 261327302Sdim (__v16si) __B, 262327302Sdim __U); 263327302Sdim} 264327302Sdim 265327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 266327302Sdim_mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) 267327302Sdim{ 268327302Sdim return (__m512i) __builtin_ia32_vpshldvd512_maskz ((__v16si) __S, 269327302Sdim (__v16si) __A, 270327302Sdim (__v16si) __B, 271327302Sdim __U); 272327302Sdim} 273327302Sdim 274327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 275327302Sdim_mm512_shldv_epi32(__m512i __S, __m512i __A, __m512i __B) 276327302Sdim{ 277327302Sdim return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S, 278327302Sdim (__v16si) __A, 279327302Sdim (__v16si) __B, 280327302Sdim (__mmask16) -1); 281327302Sdim} 282327302Sdim 283327302Sdim 284327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 285327302Sdim_mm512_mask_shldv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) 286327302Sdim{ 287327302Sdim return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S, 288327302Sdim (__v32hi) __A, 289327302Sdim (__v32hi) __B, 290327302Sdim __U); 291327302Sdim} 292327302Sdim 293327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 294327302Sdim_mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B) 295327302Sdim{ 296327302Sdim return (__m512i) __builtin_ia32_vpshldvw512_maskz ((__v32hi) __S, 297327302Sdim (__v32hi) __A, 298327302Sdim (__v32hi) __B, 299327302Sdim __U); 300327302Sdim} 301327302Sdim 302327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 303327302Sdim_mm512_shldv_epi16(__m512i __S, __m512i __A, __m512i __B) 304327302Sdim{ 305327302Sdim return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S, 306327302Sdim (__v32hi) __A, 307327302Sdim (__v32hi) __B, 308327302Sdim (__mmask32) -1); 309327302Sdim} 310327302Sdim 311327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 312327302Sdim_mm512_mask_shrdv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) 313327302Sdim{ 314327302Sdim return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S, 315327302Sdim (__v8di) __A, 316327302Sdim (__v8di) __B, 317327302Sdim __U); 318327302Sdim} 319327302Sdim 320327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 321327302Sdim_mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B) 322327302Sdim{ 323327302Sdim return (__m512i) __builtin_ia32_vpshrdvq512_maskz ((__v8di) __S, 324327302Sdim (__v8di) __A, 325327302Sdim (__v8di) __B, 326327302Sdim __U); 327327302Sdim} 328327302Sdim 329327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 330327302Sdim_mm512_shrdv_epi64(__m512i __S, __m512i __A, __m512i __B) 331327302Sdim{ 332327302Sdim return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S, 333327302Sdim (__v8di) __A, 334327302Sdim (__v8di) __B, 335327302Sdim (__mmask8) -1); 336327302Sdim} 337327302Sdim 338327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 339327302Sdim_mm512_mask_shrdv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) 340327302Sdim{ 341327302Sdim return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S, 342327302Sdim (__v16si) __A, 343327302Sdim (__v16si) __B, 344327302Sdim __U); 345327302Sdim} 346327302Sdim 347327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 348327302Sdim_mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) 349327302Sdim{ 350327302Sdim return (__m512i) __builtin_ia32_vpshrdvd512_maskz ((__v16si) __S, 351327302Sdim (__v16si) __A, 352327302Sdim (__v16si) __B, 353327302Sdim __U); 354327302Sdim} 355327302Sdim 356327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 357327302Sdim_mm512_shrdv_epi32(__m512i __S, __m512i __A, __m512i __B) 358327302Sdim{ 359327302Sdim return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S, 360327302Sdim (__v16si) __A, 361327302Sdim (__v16si) __B, 362327302Sdim (__mmask16) -1); 363327302Sdim} 364327302Sdim 365327302Sdim 366327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 367327302Sdim_mm512_mask_shrdv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) 368327302Sdim{ 369327302Sdim return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S, 370327302Sdim (__v32hi) __A, 371327302Sdim (__v32hi) __B, 372327302Sdim __U); 373327302Sdim} 374327302Sdim 375327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 376327302Sdim_mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B) 377327302Sdim{ 378327302Sdim return (__m512i) __builtin_ia32_vpshrdvw512_maskz ((__v32hi) __S, 379327302Sdim (__v32hi) __A, 380327302Sdim (__v32hi) __B, 381327302Sdim __U); 382327302Sdim} 383327302Sdim 384327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 385327302Sdim_mm512_shrdv_epi16(__m512i __S, __m512i __A, __m512i __B) 386327302Sdim{ 387327302Sdim return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S, 388327302Sdim (__v32hi) __A, 389327302Sdim (__v32hi) __B, 390327302Sdim (__mmask32) -1); 391327302Sdim} 392327302Sdim 393327302Sdim 394327302Sdim#undef __DEFAULT_FN_ATTRS 395327302Sdim 396327302Sdim#endif 397327302Sdim 398