avx512vbmivlintrin.h revision 344779
1303233Sdim/*===------------- avx512vbmivlintrin.h - VBMI intrinsics ------------------=== 2303233Sdim * 3303233Sdim * 4303233Sdim * Permission is hereby granted, free of charge, to any person obtaining a copy 5303233Sdim * of this software and associated documentation files (the "Software"), to deal 6303233Sdim * in the Software without restriction, including without limitation the rights 7303233Sdim * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8303233Sdim * copies of the Software, and to permit persons to whom the Software is 9303233Sdim * furnished to do so, subject to the following conditions: 10303233Sdim * 11303233Sdim * The above copyright notice and this permission notice shall be included in 12303233Sdim * all copies or substantial portions of the Software. 13303233Sdim * 14303233Sdim * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15303233Sdim * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16303233Sdim * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17303233Sdim * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18303233Sdim * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19303233Sdim * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20303233Sdim * THE SOFTWARE. 21303233Sdim * 22303233Sdim *===-----------------------------------------------------------------------=== 23303233Sdim */ 24303233Sdim#ifndef __IMMINTRIN_H 25303233Sdim#error "Never use <avx512vbmivlintrin.h> directly; include <immintrin.h> instead." 26303233Sdim#endif 27303233Sdim 28303233Sdim#ifndef __VBMIVLINTRIN_H 29303233Sdim#define __VBMIVLINTRIN_H 30303233Sdim 31303233Sdim/* Define the default attributes for the functions in this file. */ 32341825Sdim#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"), __min_vector_width__(128))) 33341825Sdim#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"), __min_vector_width__(256))) 34303233Sdim 35303233Sdim 36341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 37341825Sdim_mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B) 38303233Sdim{ 39341825Sdim return (__m128i)__builtin_ia32_vpermi2varqi128((__v16qi)__A, 40341825Sdim (__v16qi)__I, 41341825Sdim (__v16qi)__B); 42303233Sdim} 43303233Sdim 44341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 45341825Sdim_mm_mask_permutex2var_epi8(__m128i __A, __mmask16 __U, __m128i __I, 46341825Sdim __m128i __B) 47303233Sdim{ 48341825Sdim return (__m128i)__builtin_ia32_selectb_128(__U, 49341825Sdim (__v16qi)_mm_permutex2var_epi8(__A, __I, __B), 50341825Sdim (__v16qi)__A); 51303233Sdim} 52303233Sdim 53341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 54341825Sdim_mm_mask2_permutex2var_epi8(__m128i __A, __m128i __I, __mmask16 __U, 55341825Sdim __m128i __B) 56303233Sdim{ 57341825Sdim return (__m128i)__builtin_ia32_selectb_128(__U, 58341825Sdim (__v16qi)_mm_permutex2var_epi8(__A, __I, __B), 59341825Sdim (__v16qi)__I); 60303233Sdim} 61303233Sdim 62341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 63341825Sdim_mm_maskz_permutex2var_epi8(__mmask16 __U, __m128i __A, __m128i __I, 64341825Sdim __m128i __B) 65303233Sdim{ 66341825Sdim return (__m128i)__builtin_ia32_selectb_128(__U, 67341825Sdim (__v16qi)_mm_permutex2var_epi8(__A, __I, __B), 68341825Sdim (__v16qi)_mm_setzero_si128()); 69303233Sdim} 70303233Sdim 71341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 72341825Sdim_mm256_permutex2var_epi8(__m256i __A, __m256i __I, __m256i __B) 73303233Sdim{ 74341825Sdim return (__m256i)__builtin_ia32_vpermi2varqi256((__v32qi)__A, (__v32qi)__I, 75341825Sdim (__v32qi)__B); 76303233Sdim} 77303233Sdim 78341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 79341825Sdim_mm256_mask_permutex2var_epi8(__m256i __A, __mmask32 __U, __m256i __I, 80341825Sdim __m256i __B) 81303233Sdim{ 82341825Sdim return (__m256i)__builtin_ia32_selectb_256(__U, 83341825Sdim (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B), 84341825Sdim (__v32qi)__A); 85303233Sdim} 86303233Sdim 87341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 88341825Sdim_mm256_mask2_permutex2var_epi8(__m256i __A, __m256i __I, __mmask32 __U, 89341825Sdim __m256i __B) 90303233Sdim{ 91341825Sdim return (__m256i)__builtin_ia32_selectb_256(__U, 92341825Sdim (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B), 93341825Sdim (__v32qi)__I); 94303233Sdim} 95303233Sdim 96341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 97341825Sdim_mm256_maskz_permutex2var_epi8(__mmask32 __U, __m256i __A, __m256i __I, 98341825Sdim __m256i __B) 99303233Sdim{ 100341825Sdim return (__m256i)__builtin_ia32_selectb_256(__U, 101341825Sdim (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B), 102341825Sdim (__v32qi)_mm256_setzero_si256()); 103303233Sdim} 104303233Sdim 105341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 106303233Sdim_mm_permutexvar_epi8 (__m128i __A, __m128i __B) 107303233Sdim{ 108341825Sdim return (__m128i)__builtin_ia32_permvarqi128((__v16qi)__B, (__v16qi)__A); 109303233Sdim} 110303233Sdim 111341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 112303233Sdim_mm_maskz_permutexvar_epi8 (__mmask16 __M, __m128i __A, __m128i __B) 113303233Sdim{ 114341825Sdim return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 115341825Sdim (__v16qi)_mm_permutexvar_epi8(__A, __B), 116341825Sdim (__v16qi)_mm_setzero_si128()); 117303233Sdim} 118303233Sdim 119341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 120303233Sdim_mm_mask_permutexvar_epi8 (__m128i __W, __mmask16 __M, __m128i __A, 121303233Sdim __m128i __B) 122303233Sdim{ 123341825Sdim return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 124341825Sdim (__v16qi)_mm_permutexvar_epi8(__A, __B), 125341825Sdim (__v16qi)__W); 126303233Sdim} 127303233Sdim 128341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 129303233Sdim_mm256_permutexvar_epi8 (__m256i __A, __m256i __B) 130303233Sdim{ 131341825Sdim return (__m256i)__builtin_ia32_permvarqi256((__v32qi) __B, (__v32qi) __A); 132303233Sdim} 133303233Sdim 134341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 135303233Sdim_mm256_maskz_permutexvar_epi8 (__mmask32 __M, __m256i __A, 136303233Sdim __m256i __B) 137303233Sdim{ 138341825Sdim return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, 139341825Sdim (__v32qi)_mm256_permutexvar_epi8(__A, __B), 140341825Sdim (__v32qi)_mm256_setzero_si256()); 141303233Sdim} 142303233Sdim 143341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 144303233Sdim_mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A, 145303233Sdim __m256i __B) 146303233Sdim{ 147341825Sdim return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, 148341825Sdim (__v32qi)_mm256_permutexvar_epi8(__A, __B), 149341825Sdim (__v32qi)__W); 150303233Sdim} 151303233Sdim 152341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 153344779Sdim_mm_multishift_epi64_epi8(__m128i __X, __m128i __Y) 154303233Sdim{ 155344779Sdim return (__m128i)__builtin_ia32_vpmultishiftqb128((__v16qi)__X, (__v16qi)__Y); 156303233Sdim} 157303233Sdim 158341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 159344779Sdim_mm_mask_multishift_epi64_epi8(__m128i __W, __mmask16 __M, __m128i __X, 160344779Sdim __m128i __Y) 161303233Sdim{ 162344779Sdim return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 163344779Sdim (__v16qi)_mm_multishift_epi64_epi8(__X, __Y), 164344779Sdim (__v16qi)__W); 165303233Sdim} 166303233Sdim 167341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 168344779Sdim_mm_maskz_multishift_epi64_epi8(__mmask16 __M, __m128i __X, __m128i __Y) 169303233Sdim{ 170344779Sdim return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 171344779Sdim (__v16qi)_mm_multishift_epi64_epi8(__X, __Y), 172344779Sdim (__v16qi)_mm_setzero_si128()); 173303233Sdim} 174303233Sdim 175341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 176344779Sdim_mm256_multishift_epi64_epi8(__m256i __X, __m256i __Y) 177303233Sdim{ 178344779Sdim return (__m256i)__builtin_ia32_vpmultishiftqb256((__v32qi)__X, (__v32qi)__Y); 179303233Sdim} 180303233Sdim 181341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 182344779Sdim_mm256_mask_multishift_epi64_epi8(__m256i __W, __mmask32 __M, __m256i __X, 183344779Sdim __m256i __Y) 184303233Sdim{ 185344779Sdim return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, 186344779Sdim (__v32qi)_mm256_multishift_epi64_epi8(__X, __Y), 187344779Sdim (__v32qi)__W); 188303233Sdim} 189303233Sdim 190341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 191344779Sdim_mm256_maskz_multishift_epi64_epi8(__mmask32 __M, __m256i __X, __m256i __Y) 192303233Sdim{ 193344779Sdim return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, 194344779Sdim (__v32qi)_mm256_multishift_epi64_epi8(__X, __Y), 195344779Sdim (__v32qi)_mm256_setzero_si256()); 196303233Sdim} 197303233Sdim 198303233Sdim 199341825Sdim#undef __DEFAULT_FN_ATTRS128 200341825Sdim#undef __DEFAULT_FN_ATTRS256 201303233Sdim 202303233Sdim#endif 203