avx512vbmi2intrin.h revision 327330
1/*===------------- avx512vbmi2intrin.h - VBMI2 intrinsics ------------------=== 2 * 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a copy 5 * of this software and associated documentation files (the "Software"), to deal 6 * in the Software without restriction, including without limitation the rights 7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 * copies of the Software, and to permit persons to whom the Software is 9 * furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 * THE SOFTWARE. 21 * 22 *===-----------------------------------------------------------------------=== 23 */ 24#ifndef __IMMINTRIN_H 25#error "Never use <avx512vbmi2intrin.h> directly; include <immintrin.h> instead." 26#endif 27 28#ifndef __AVX512VBMI2INTRIN_H 29#define __AVX512VBMI2INTRIN_H 30 31/* Define the default attributes for the functions in this file. */ 32#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2"))) 33 34 35static __inline__ __m512i __DEFAULT_FN_ATTRS 36_mm512_mask_compress_epi16(__m512i __S, __mmask32 __U, __m512i __D) 37{ 38 return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D, 39 (__v32hi) __S, 40 __U); 41} 42 43static __inline__ __m512i __DEFAULT_FN_ATTRS 44_mm512_maskz_compress_epi16(__mmask32 __U, __m512i __D) 45{ 46 return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D, 47 (__v32hi) _mm512_setzero_hi(), 48 __U); 49} 50 51static __inline__ __m512i __DEFAULT_FN_ATTRS 52_mm512_mask_compress_epi8(__m512i __S, __mmask64 __U, __m512i __D) 53{ 54 return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D, 55 (__v64qi) __S, 56 __U); 57} 58 59static __inline__ __m512i __DEFAULT_FN_ATTRS 60_mm512_maskz_compress_epi8(__mmask64 __U, __m512i __D) 61{ 62 return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D, 63 (__v64qi) _mm512_setzero_qi(), 64 __U); 65} 66 67static __inline__ void __DEFAULT_FN_ATTRS 68_mm512_mask_compressstoreu_epi16(void *__P, __mmask32 __U, __m512i __D) 69{ 70 __builtin_ia32_compressstorehi512_mask ((__v32hi *) __P, (__v32hi) __D, 71 __U); 72} 73 74static __inline__ void __DEFAULT_FN_ATTRS 75_mm512_mask_compressstoreu_epi8(void *__P, __mmask64 __U, __m512i __D) 76{ 77 __builtin_ia32_compressstoreqi512_mask ((__v64qi *) __P, (__v64qi) __D, 78 __U); 79} 80 81static __inline__ __m512i __DEFAULT_FN_ATTRS 82_mm512_mask_expand_epi16(__m512i __S, __mmask32 __U, __m512i __D) 83{ 84 return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D, 85 (__v32hi) __S, 86 __U); 87} 88 89static __inline__ __m512i __DEFAULT_FN_ATTRS 90_mm512_maskz_expand_epi16(__mmask32 __U, __m512i __D) 91{ 92 return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D, 93 (__v32hi) _mm512_setzero_hi(), 94 __U); 95} 96 97static __inline__ __m512i __DEFAULT_FN_ATTRS 98_mm512_mask_expand_epi8(__m512i __S, __mmask64 __U, __m512i __D) 99{ 100 return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D, 101 (__v64qi) __S, 102 __U); 103} 104 105static __inline__ __m512i __DEFAULT_FN_ATTRS 106_mm512_maskz_expand_epi8(__mmask64 __U, __m512i __D) 107{ 108 return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D, 109 (__v64qi) _mm512_setzero_qi(), 110 __U); 111} 112 113static __inline__ __m512i __DEFAULT_FN_ATTRS 114_mm512_mask_expandloadu_epi16(__m512i __S, __mmask32 __U, void const *__P) 115{ 116 return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P, 117 (__v32hi) __S, 118 __U); 119} 120 121static __inline__ __m512i __DEFAULT_FN_ATTRS 122_mm512_maskz_expandloadu_epi16(__mmask32 __U, void const *__P) 123{ 124 return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P, 125 (__v32hi) _mm512_setzero_hi(), 126 __U); 127} 128 129static __inline__ __m512i __DEFAULT_FN_ATTRS 130_mm512_mask_expandloadu_epi8(__m512i __S, __mmask64 __U, void const *__P) 131{ 132 return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P, 133 (__v64qi) __S, 134 __U); 135} 136 137static __inline__ __m512i __DEFAULT_FN_ATTRS 138_mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P) 139{ 140 return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P, 141 (__v64qi) _mm512_setzero_qi(), 142 __U); 143} 144 145#define _mm512_mask_shldi_epi64(S, U, A, B, I) __extension__ ({ \ 146 (__m512i)__builtin_ia32_vpshldq512_mask((__v8di)(A), \ 147 (__v8di)(B), \ 148 (int)(I), \ 149 (__v8di)(S), \ 150 (__mmask8)(U)); }) 151 152#define _mm512_maskz_shldi_epi64(U, A, B, I) \ 153 _mm512_mask_shldi_epi64(_mm512_setzero_hi(), (U), (A), (B), (I)) 154 155#define _mm512_shldi_epi64(A, B, I) \ 156 _mm512_mask_shldi_epi64(_mm512_undefined(), (__mmask8)(-1), (A), (B), (I)) 157 158#define _mm512_mask_shldi_epi32(S, U, A, B, I) __extension__ ({ \ 159 (__m512i)__builtin_ia32_vpshldd512_mask((__v16si)(A), \ 160 (__v16si)(B), \ 161 (int)(I), \ 162 (__v16si)(S), \ 163 (__mmask16)(U)); }) 164 165#define _mm512_maskz_shldi_epi32(U, A, B, I) \ 166 _mm512_mask_shldi_epi32(_mm512_setzero_hi(), (U), (A), (B), (I)) 167 168#define _mm512_shldi_epi32(A, B, I) \ 169 _mm512_mask_shldi_epi32(_mm512_undefined(), (__mmask16)(-1), (A), (B), (I)) 170 171#define _mm512_mask_shldi_epi16(S, U, A, B, I) __extension__ ({ \ 172 (__m512i)__builtin_ia32_vpshldw512_mask((__v32hi)(A), \ 173 (__v32hi)(B), \ 174 (int)(I), \ 175 (__v32hi)(S), \ 176 (__mmask32)(U)); }) 177 178#define _mm512_maskz_shldi_epi16(U, A, B, I) \ 179 _mm512_mask_shldi_epi16(_mm512_setzero_hi(), (U), (A), (B), (I)) 180 181#define _mm512_shldi_epi16(A, B, I) \ 182 _mm512_mask_shldi_epi16(_mm512_undefined(), (__mmask32)(-1), (A), (B), (I)) 183 184#define _mm512_mask_shrdi_epi64(S, U, A, B, I) __extension__ ({ \ 185 (__m512i)__builtin_ia32_vpshrdq512_mask((__v8di)(A), \ 186 (__v8di)(B), \ 187 (int)(I), \ 188 (__v8di)(S), \ 189 (__mmask8)(U)); }) 190 191#define _mm512_maskz_shrdi_epi64(U, A, B, I) \ 192 _mm512_mask_shrdi_epi64(_mm512_setzero_hi(), (U), (A), (B), (I)) 193 194#define _mm512_shrdi_epi64(A, B, I) \ 195 _mm512_mask_shrdi_epi64(_mm512_undefined(), (__mmask8)(-1), (A), (B), (I)) 196 197#define _mm512_mask_shrdi_epi32(S, U, A, B, I) __extension__ ({ \ 198 (__m512i)__builtin_ia32_vpshrdd512_mask((__v16si)(A), \ 199 (__v16si)(B), \ 200 (int)(I), \ 201 (__v16si)(S), \ 202 (__mmask16)(U)); }) 203 204#define _mm512_maskz_shrdi_epi32(U, A, B, I) \ 205 _mm512_mask_shrdi_epi32(_mm512_setzero_hi(), (U), (A), (B), (I)) 206 207#define _mm512_shrdi_epi32(A, B, I) \ 208 _mm512_mask_shrdi_epi32(_mm512_undefined(), (__mmask16)(-1), (A), (B), (I)) 209 210#define _mm512_mask_shrdi_epi16(S, U, A, B, I) __extension__ ({ \ 211 (__m512i)__builtin_ia32_vpshrdw512_mask((__v32hi)(A), \ 212 (__v32hi)(B), \ 213 (int)(I), \ 214 (__v32hi)(S), \ 215 (__mmask32)(U)); }) 216 217#define _mm512_maskz_shrdi_epi16(U, A, B, I) \ 218 _mm512_mask_shrdi_epi16(_mm512_setzero_hi(), (U), (A), (B), (I)) 219 220#define _mm512_shrdi_epi16(A, B, I) \ 221 _mm512_mask_shrdi_epi16(_mm512_undefined(), (__mmask32)(-1), (A), (B), (I)) 222 223static __inline__ __m512i __DEFAULT_FN_ATTRS 224_mm512_mask_shldv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) 225{ 226 return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S, 227 (__v8di) __A, 228 (__v8di) __B, 229 __U); 230} 231 232static __inline__ __m512i __DEFAULT_FN_ATTRS 233_mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B) 234{ 235 return (__m512i) __builtin_ia32_vpshldvq512_maskz ((__v8di) __S, 236 (__v8di) __A, 237 (__v8di) __B, 238 __U); 239} 240 241static __inline__ __m512i __DEFAULT_FN_ATTRS 242_mm512_shldv_epi64(__m512i __S, __m512i __A, __m512i __B) 243{ 244 return (__m512i) __builtin_ia32_vpshldvq512_mask ((__v8di) __S, 245 (__v8di) __A, 246 (__v8di) __B, 247 (__mmask8) -1); 248} 249 250static __inline__ __m512i __DEFAULT_FN_ATTRS 251_mm512_mask_shldv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) 252{ 253 return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S, 254 (__v16si) __A, 255 (__v16si) __B, 256 __U); 257} 258 259static __inline__ __m512i __DEFAULT_FN_ATTRS 260_mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) 261{ 262 return (__m512i) __builtin_ia32_vpshldvd512_maskz ((__v16si) __S, 263 (__v16si) __A, 264 (__v16si) __B, 265 __U); 266} 267 268static __inline__ __m512i __DEFAULT_FN_ATTRS 269_mm512_shldv_epi32(__m512i __S, __m512i __A, __m512i __B) 270{ 271 return (__m512i) __builtin_ia32_vpshldvd512_mask ((__v16si) __S, 272 (__v16si) __A, 273 (__v16si) __B, 274 (__mmask16) -1); 275} 276 277 278static __inline__ __m512i __DEFAULT_FN_ATTRS 279_mm512_mask_shldv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) 280{ 281 return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S, 282 (__v32hi) __A, 283 (__v32hi) __B, 284 __U); 285} 286 287static __inline__ __m512i __DEFAULT_FN_ATTRS 288_mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B) 289{ 290 return (__m512i) __builtin_ia32_vpshldvw512_maskz ((__v32hi) __S, 291 (__v32hi) __A, 292 (__v32hi) __B, 293 __U); 294} 295 296static __inline__ __m512i __DEFAULT_FN_ATTRS 297_mm512_shldv_epi16(__m512i __S, __m512i __A, __m512i __B) 298{ 299 return (__m512i) __builtin_ia32_vpshldvw512_mask ((__v32hi) __S, 300 (__v32hi) __A, 301 (__v32hi) __B, 302 (__mmask32) -1); 303} 304 305static __inline__ __m512i __DEFAULT_FN_ATTRS 306_mm512_mask_shrdv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) 307{ 308 return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S, 309 (__v8di) __A, 310 (__v8di) __B, 311 __U); 312} 313 314static __inline__ __m512i __DEFAULT_FN_ATTRS 315_mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __S, __m512i __A, __m512i __B) 316{ 317 return (__m512i) __builtin_ia32_vpshrdvq512_maskz ((__v8di) __S, 318 (__v8di) __A, 319 (__v8di) __B, 320 __U); 321} 322 323static __inline__ __m512i __DEFAULT_FN_ATTRS 324_mm512_shrdv_epi64(__m512i __S, __m512i __A, __m512i __B) 325{ 326 return (__m512i) __builtin_ia32_vpshrdvq512_mask ((__v8di) __S, 327 (__v8di) __A, 328 (__v8di) __B, 329 (__mmask8) -1); 330} 331 332static __inline__ __m512i __DEFAULT_FN_ATTRS 333_mm512_mask_shrdv_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) 334{ 335 return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S, 336 (__v16si) __A, 337 (__v16si) __B, 338 __U); 339} 340 341static __inline__ __m512i __DEFAULT_FN_ATTRS 342_mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __S, __m512i __A, __m512i __B) 343{ 344 return (__m512i) __builtin_ia32_vpshrdvd512_maskz ((__v16si) __S, 345 (__v16si) __A, 346 (__v16si) __B, 347 __U); 348} 349 350static __inline__ __m512i __DEFAULT_FN_ATTRS 351_mm512_shrdv_epi32(__m512i __S, __m512i __A, __m512i __B) 352{ 353 return (__m512i) __builtin_ia32_vpshrdvd512_mask ((__v16si) __S, 354 (__v16si) __A, 355 (__v16si) __B, 356 (__mmask16) -1); 357} 358 359 360static __inline__ __m512i __DEFAULT_FN_ATTRS 361_mm512_mask_shrdv_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) 362{ 363 return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S, 364 (__v32hi) __A, 365 (__v32hi) __B, 366 __U); 367} 368 369static __inline__ __m512i __DEFAULT_FN_ATTRS 370_mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __S, __m512i __A, __m512i __B) 371{ 372 return (__m512i) __builtin_ia32_vpshrdvw512_maskz ((__v32hi) __S, 373 (__v32hi) __A, 374 (__v32hi) __B, 375 __U); 376} 377 378static __inline__ __m512i __DEFAULT_FN_ATTRS 379_mm512_shrdv_epi16(__m512i __S, __m512i __A, __m512i __B) 380{ 381 return (__m512i) __builtin_ia32_vpshrdvw512_mask ((__v32hi) __S, 382 (__v32hi) __A, 383 (__v32hi) __B, 384 (__mmask32) -1); 385} 386 387 388#undef __DEFAULT_FN_ATTRS 389 390#endif 391 392