avx512vbmi2intrin.h revision 327302
1327302Sdim/*===------------- avx512vbmi2intrin.h - VBMI2 intrinsics ------------------=== 2327302Sdim * 3327302Sdim * 4327302Sdim * Permission is hereby granted, free of charge, to any person obtaining a copy 5327302Sdim * of this software and associated documentation files (the "Software"), to deal 6327302Sdim * in the Software without restriction, including without limitation the rights 7327302Sdim * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8327302Sdim * copies of the Software, and to permit persons to whom the Software is 9327302Sdim * furnished to do so, subject to the following conditions: 10327302Sdim * 11327302Sdim * The above copyright notice and this permission notice shall be included in 12327302Sdim * all copies or substantial portions of the Software. 13327302Sdim * 14327302Sdim * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15327302Sdim * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16327302Sdim * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17327302Sdim * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18327302Sdim * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19327302Sdim * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20327302Sdim * THE SOFTWARE. 21327302Sdim * 22327302Sdim *===-----------------------------------------------------------------------=== 23327302Sdim */ 24327302Sdim#ifndef __IMMINTRIN_H 25327302Sdim#error "Never use <avx512vbmi2intrin.h> directly; include <immintrin.h> instead." 26327302Sdim#endif 27327302Sdim 28327302Sdim#ifndef __AVX512VBMI2INTRIN_H 29327302Sdim#define __AVX512VBMI2INTRIN_H 30327302Sdim 31327302Sdim/* Define the default attributes for the functions in this file. */ 32327302Sdim#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2"))) 33327302Sdim 34327302Sdim 35327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 36327302Sdim_mm512_mask_compress_epi16(__m512i __S, __mmask32 __U, __m512i __D) 37327302Sdim{ 38327302Sdim return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D, 39327302Sdim (__v32hi) __S, 40327302Sdim __U); 41327302Sdim} 42327302Sdim 43327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 44327302Sdim_mm512_maskz_compress_epi16(__mmask32 __U, __m512i __D) 45327302Sdim{ 46327302Sdim return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D, 47327302Sdim (__v32hi) _mm512_setzero_hi(), 48327302Sdim __U); 49327302Sdim} 50327302Sdim 51327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 52327302Sdim_mm512_mask_compress_epi8(__m512i __S, __mmask64 __U, __m512i __D) 53327302Sdim{ 54327302Sdim return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D, 55327302Sdim (__v64qi) __S, 56327302Sdim __U); 57327302Sdim} 58327302Sdim 59327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 60327302Sdim_mm512_maskz_compress_epi8(__mmask64 __U, __m512i __D) 61327302Sdim{ 62327302Sdim return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D, 63327302Sdim (__v64qi) _mm512_setzero_qi(), 64327302Sdim __U); 65327302Sdim} 66327302Sdim 67327302Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 68327302Sdim_mm512_mask_compressstoreu_epi16(void *__P, __mmask32 __U, __m512i __D) 69327302Sdim{ 70327302Sdim __builtin_ia32_compressstorehi512_mask ((__v32hi *) __P, (__v32hi) __D, 71327302Sdim __U); 72327302Sdim} 73327302Sdim 74327302Sdimstatic __inline__ void __DEFAULT_FN_ATTRS 75327302Sdim_mm512_mask_compressstoreu_epi8(void *__P, __mmask64 __U, __m512i __D) 76327302Sdim{ 77327302Sdim __builtin_ia32_compressstoreqi512_mask ((__v64qi *) __P, (__v64qi) __D, 78327302Sdim __U); 79327302Sdim} 80327302Sdim 81327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 82327302Sdim_mm512_mask_expand_epi16(__m512i __S, __mmask32 __U, __m512i __D) 83327302Sdim{ 84327302Sdim return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D, 85327302Sdim (__v32hi) __S, 86327302Sdim __U); 87327302Sdim} 88327302Sdim 89327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 90327302Sdim_mm512_maskz_expand_epi16(__mmask32 __U, __m512i __D) 91327302Sdim{ 92327302Sdim return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D, 93327302Sdim (__v32hi) _mm512_setzero_hi(), 94327302Sdim __U); 95327302Sdim} 96327302Sdim 97327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 98327302Sdim_mm512_mask_expand_epi8(__m512i __S, __mmask64 __U, __m512i __D) 99327302Sdim{ 100327302Sdim return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D, 101327302Sdim (__v64qi) __S, 102327302Sdim __U); 103327302Sdim} 104327302Sdim 105327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 106327302Sdim_mm512_maskz_expand_epi8(__mmask64 __U, __m512i __D) 107327302Sdim{ 108327302Sdim return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D, 109327302Sdim (__v64qi) _mm512_setzero_qi(), 110327302Sdim __U); 111327302Sdim} 112327302Sdim 113327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 114327302Sdim_mm512_mask_expandloadu_epi16(__m512i __S, __mmask32 __U, void const *__P) 115327302Sdim{ 116327302Sdim return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P, 117327302Sdim (__v32hi) __S, 118327302Sdim __U); 119327302Sdim} 120327302Sdim 121327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 122327302Sdim_mm512_maskz_expandloadu_epi16(__mmask32 __U, void const *__P) 123327302Sdim{ 124327302Sdim return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P, 125327302Sdim (__v32hi) _mm512_setzero_hi(), 126327302Sdim __U); 127327302Sdim} 128327302Sdim 129327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 130327302Sdim_mm512_mask_expandloadu_epi8(__m512i __S, __mmask64 __U, void const *__P) 131327302Sdim{ 132327302Sdim return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P, 133327302Sdim (__v64qi) __S, 134327302Sdim __U); 135327302Sdim} 136327302Sdim 137327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 138327302Sdim_mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) 139327302Sdim{ 140327302Sdim return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P, 141327302Sdim (__v64qi) _mm512_setzero_qi(), 142327302Sdim __U); 143327302Sdim} 144327302Sdim 145327302Sdim#define _mm512_mask_shldi_epi64(S, U, A, B, I) __extension__ ({ \ 146327302Sdim (__m512i)__builtin_ia32_vpshldq512_mask((__v8di)(A), \ 147327302Sdim (__v8di)(B), \ 148327302Sdim (int)(I), \ 149327302Sdim (__v8di)(S), \ 150327302Sdim (__mmask8)(U)); }) 151327302Sdim 152327302Sdim#define _mm512_maskz_shldi_epi64(U, A, B, I) \ 153327302Sdim _mm512_mask_shldi_epi64(_mm512_setzero_hi(), (U), (A), (B), (I)) 154327302Sdim 155327302Sdim#define _mm512_shldi_epi64(A, B, I) \ 156327302Sdim _mm512_mask_shldi_epi64(_mm512_undefined(), (__mmask8)(-1), (A), (B), (I)) 157327302Sdim 158327302Sdim#define _mm512_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \ 159327302Sdim (__m512i)__builtin_ia32_vpshldd512_mask((__v16si)(A), \ 160327302Sdim (__v16si)(B), \ 161327302Sdim (int)(I), \ 162327302Sdim (__v16si)(S), \ 163327302Sdim (__mmask16)(U)); }) 164327302Sdim 165327302Sdim#define _mm512_maskz_shldi_epi32(U, A, B, I) \ 166327302Sdim _mm512_mask_shldi_epi32(_mm512_setzero_hi(), (U), (A), (B), (I)) 167327302Sdim 168327302Sdim#define _mm512_shldi_epi32(A, B, I) \ 169327302Sdim _mm512_mask_shldi_epi32(_mm512_undefined(), (__mmask16)(-1), (A), (B), (I)) 170327302Sdim 171327302Sdim#define _mm512_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \ 172327302Sdim (__m512i)__builtin_ia32_vpshldw512_mask((__v32hi)(A), \ 173327302Sdim (__v32hi)(B), \ 174327302Sdim (int)(I), \ 175327302Sdim (__v32hi)(S), \ 176327302Sdim (__mmask32)(U)); }) 177327302Sdim 178327302Sdim#define _mm512_maskz_shldi_epi16(U, A, B, I) \ 179327302Sdim _mm512_mask_shldi_epi16(_mm512_setzero_hi(), (U), (A), (B), (I)) 180327302Sdim 181327302Sdim#define _mm512_shldi_epi16(A, B, I) \ 182327302Sdim _mm512_mask_shldi_epi16(_mm512_undefined(), (__mmask32)(-1), (A), (B), (I)) 183327302Sdim 184327302Sdim#define _mm512_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \ 185327302Sdim (__m512i)__builtin_ia32_vpshrdq512_mask((__v8di)(A), \ 186327302Sdim (__v8di)(B), \ 187327302Sdim (int)(I), \ 188327302Sdim (__v8di)(S), \ 189327302Sdim (__mmask8)(U)); }) 190327302Sdim 191327302Sdim#define _mm512_maskz_shrdi_epi64(U, A, B, I) \ 192327302Sdim _mm512_mask_shrdi_epi64(_mm512_setzero_hi(), (U), (A), (B), (I)) 193327302Sdim 194327302Sdim#define _mm512_shrdi_epi64(A, B, I) \ 195327302Sdim _mm512_mask_shrdi_epi64(_mm512_undefined(), (__mmask8)(-1), (A), (B), (I)) 196327302Sdim 197327302Sdim#define _mm512_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \ 198327302Sdim (__m512i)__builtin_ia32_vpshrdd512_mask((__v16si)(A), \ 199327302Sdim (__v16si)(B), \ 200327302Sdim (int)(I), \ 201327302Sdim (__v16si)(S), \ 202327302Sdim (__mmask16)(U)); }) 203327302Sdim 204327302Sdim#define _mm512_maskz_shrdi_epi32(U, A, B, I) \ 205327302Sdim _mm512_mask_shrdi_epi32(_mm512_setzero_hi(), (U), (A), (B), (I)) 206327302Sdim 207327302Sdim#define _mm512_shrdi_epi32(A, B, I) \ 208327302Sdim _mm512_mask_shrdi_epi32(_mm512_undefined(), (__mmask16)(-1), (A), (B), (I)) 209327302Sdim 210327302Sdim#define _mm512_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \ 211327302Sdim (__m512i)__builtin_ia32_vpshrdw512_mask((__v32hi)(A), \ 212327302Sdim (__v32hi)(B), \ 213327302Sdim (int)(I), \ 214327302Sdim (__v32hi)(S), \ 215327302Sdim (__mmask32)(U)); }) 216327302Sdim 217327302Sdim#define _mm512_maskz_shrdi_epi16(U, A, B, I) \ 218327302Sdim _mm512_mask_shrdi_epi16(_mm512_setzero_hi(), (U), (A), (B), (I)) 219327302Sdim 220327302Sdim#define _mm512_shrdi_epi16(A, B, I) \ 221327302Sdim _mm512_mask_shrdi_epi16(_mm512_undefined(), (__mmask32)(-1), (A), (B), (I)) 222327302Sdim 223327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 224327302Sdim_mm512_mask_shldv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) 225327302Sdim{ 226327302Sdim return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S, 227327302Sdim (__v8di) __A, 228327302Sdim (__v8di) __B, 229327302Sdim __U); 230327302Sdim} 231327302Sdim 232327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 233327302Sdim_mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B) 234327302Sdim{ 235327302Sdim return (__m512i) __builtin_ia32_vpshldvq512_maskz ((__v8di) __S, 236327302Sdim (__v8di) __A, 237327302Sdim (__v8di) __B, 238327302Sdim __U); 239327302Sdim} 240327302Sdim 241327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 242327302Sdim_mm512_shldv_epi64(__m512i __S, __m512i __A, __m512i __B) 243327302Sdim{ 244327302Sdim return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S, 245327302Sdim (__v8di) __A, 246327302Sdim (__v8di) __B, 247327302Sdim (__mmask8) -1); 248327302Sdim} 249327302Sdim 250327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 251327302Sdim_mm512_mask_shldv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) 252327302Sdim{ 253327302Sdim return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S, 254327302Sdim (__v16si) __A, 255327302Sdim (__v16si) __B, 256327302Sdim __U); 257327302Sdim} 258327302Sdim 259327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 260327302Sdim_mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) 261327302Sdim{ 262327302Sdim return (__m512i) __builtin_ia32_vpshldvd512_maskz ((__v16si) __S, 263327302Sdim (__v16si) __A, 264327302Sdim (__v16si) __B, 265327302Sdim __U); 266327302Sdim} 267327302Sdim 268327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 269327302Sdim_mm512_shldv_epi32(__m512i __S, __m512i __A, __m512i __B) 270327302Sdim{ 271327302Sdim return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S, 272327302Sdim (__v16si) __A, 273327302Sdim (__v16si) __B, 274327302Sdim (__mmask16) -1); 275327302Sdim} 276327302Sdim 277327302Sdim 278327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 279327302Sdim_mm512_mask_shldv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) 280327302Sdim{ 281327302Sdim return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S, 282327302Sdim (__v32hi) __A, 283327302Sdim (__v32hi) __B, 284327302Sdim __U); 285327302Sdim} 286327302Sdim 287327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 288327302Sdim_mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B) 289327302Sdim{ 290327302Sdim return (__m512i) __builtin_ia32_vpshldvw512_maskz ((__v32hi) __S, 291327302Sdim (__v32hi) __A, 292327302Sdim (__v32hi) __B, 293327302Sdim __U); 294327302Sdim} 295327302Sdim 296327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 297327302Sdim_mm512_shldv_epi16(__m512i __S, __m512i __A, __m512i __B) 298327302Sdim{ 299327302Sdim return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S, 300327302Sdim (__v32hi) __A, 301327302Sdim (__v32hi) __B, 302327302Sdim (__mmask32) -1); 303327302Sdim} 304327302Sdim 305327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 306327302Sdim_mm512_mask_shrdv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) 307327302Sdim{ 308327302Sdim return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S, 309327302Sdim (__v8di) __A, 310327302Sdim (__v8di) __B, 311327302Sdim __U); 312327302Sdim} 313327302Sdim 314327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 315327302Sdim_mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B) 316327302Sdim{ 317327302Sdim return (__m512i) __builtin_ia32_vpshrdvq512_maskz ((__v8di) __S, 318327302Sdim (__v8di) __A, 319327302Sdim (__v8di) __B, 320327302Sdim __U); 321327302Sdim} 322327302Sdim 323327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 324327302Sdim_mm512_shrdv_epi64(__m512i __S, __m512i __A, __m512i __B) 325327302Sdim{ 326327302Sdim return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S, 327327302Sdim (__v8di) __A, 328327302Sdim (__v8di) __B, 329327302Sdim (__mmask8) -1); 330327302Sdim} 331327302Sdim 332327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 333327302Sdim_mm512_mask_shrdv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) 334327302Sdim{ 335327302Sdim return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S, 336327302Sdim (__v16si) __A, 337327302Sdim (__v16si) __B, 338327302Sdim __U); 339327302Sdim} 340327302Sdim 341327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 342327302Sdim_mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) 343327302Sdim{ 344327302Sdim return (__m512i) __builtin_ia32_vpshrdvd512_maskz ((__v16si) __S, 345327302Sdim (__v16si) __A, 346327302Sdim (__v16si) __B, 347327302Sdim __U); 348327302Sdim} 349327302Sdim 350327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 351327302Sdim_mm512_shrdv_epi32(__m512i __S, __m512i __A, __m512i __B) 352327302Sdim{ 353327302Sdim return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S, 354327302Sdim (__v16si) __A, 355327302Sdim (__v16si) __B, 356327302Sdim (__mmask16) -1); 357327302Sdim} 358327302Sdim 359327302Sdim 360327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 361327302Sdim_mm512_mask_shrdv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) 362327302Sdim{ 363327302Sdim return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S, 364327302Sdim (__v32hi) __A, 365327302Sdim (__v32hi) __B, 366327302Sdim __U); 367327302Sdim} 368327302Sdim 369327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 370327302Sdim_mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B) 371327302Sdim{ 372327302Sdim return (__m512i) __builtin_ia32_vpshrdvw512_maskz ((__v32hi) __S, 373327302Sdim (__v32hi) __A, 374327302Sdim (__v32hi) __B, 375327302Sdim __U); 376327302Sdim} 377327302Sdim 378327302Sdimstatic __inline__ __m512i __DEFAULT_FN_ATTRS 379327302Sdim_mm512_shrdv_epi16(__m512i __S, __m512i __A, __m512i __B) 380327302Sdim{ 381327302Sdim return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S, 382327302Sdim (__v32hi) __A, 383327302Sdim (__v32hi) __B, 384327302Sdim (__mmask32) -1); 385327302Sdim} 386327302Sdim 387327302Sdim 388327302Sdim#undef __DEFAULT_FN_ATTRS 389327302Sdim 390327302Sdim#endif 391327302Sdim 392