avx512vbmivlintrin.h revision 303233
1303233Sdim/*===------------- avx512vbmivlintrin.h - VBMI intrinsics ------------------=== 2303233Sdim * 3303233Sdim * 4303233Sdim * Permission is hereby granted, free of charge, to any person obtaining a copy 5303233Sdim * of this software and associated documentation files (the "Software"), to deal 6303233Sdim * in the Software without restriction, including without limitation the rights 7303233Sdim * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8303233Sdim * copies of the Software, and to permit persons to whom the Software is 9303233Sdim * furnished to do so, subject to the following conditions: 10303233Sdim * 11303233Sdim * The above copyright notice and this permission notice shall be included in 12303233Sdim * all copies or substantial portions of the Software. 13303233Sdim * 14303233Sdim * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15303233Sdim * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16303233Sdim * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17303233Sdim * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18303233Sdim * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19303233Sdim * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20303233Sdim * THE SOFTWARE. 21303233Sdim * 22303233Sdim *===-----------------------------------------------------------------------=== 23303233Sdim */ 24303233Sdim#ifndef __IMMINTRIN_H 25303233Sdim#error "Never use <avx512vbmivlintrin.h> directly; include <immintrin.h> instead." 26303233Sdim#endif 27303233Sdim 28303233Sdim#ifndef __VBMIVLINTRIN_H 29303233Sdim#define __VBMIVLINTRIN_H 30303233Sdim 31303233Sdim/* Define the default attributes for the functions in this file. */ 32303233Sdim#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"))) 33303233Sdim 34303233Sdim 35303233Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 36303233Sdim_mm_mask2_permutex2var_epi8 (__m128i __A, __m128i __I, __mmask16 __U, 37303233Sdim __m128i __B) 38303233Sdim{ 39303233Sdim return (__m128i) __builtin_ia32_vpermi2varqi128_mask ((__v16qi) __A, 40303233Sdim (__v16qi) __I 41303233Sdim /* idx */ , 42303233Sdim (__v16qi) __B, 43303233Sdim (__mmask16) 44303233Sdim __U); 45303233Sdim} 46303233Sdim 47303233Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 48303233Sdim_mm256_mask2_permutex2var_epi8 (__m256i __A, __m256i __I, 49303233Sdim __mmask32 __U, __m256i __B) 50303233Sdim{ 51303233Sdim return (__m256i) __builtin_ia32_vpermi2varqi256_mask ((__v32qi) __A, 52303233Sdim (__v32qi) __I 53303233Sdim /* idx */ , 54303233Sdim (__v32qi) __B, 55303233Sdim (__mmask32) 56303233Sdim __U); 57303233Sdim} 58303233Sdim 59303233Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 60303233Sdim_mm_permutex2var_epi8 (__m128i __A, __m128i __I, __m128i __B) 61303233Sdim{ 62303233Sdim return (__m128i) __builtin_ia32_vpermt2varqi128_mask ((__v16qi) __I 63303233Sdim /* idx */ , 64303233Sdim (__v16qi) __A, 65303233Sdim (__v16qi) __B, 66303233Sdim (__mmask16) - 67303233Sdim 1); 68303233Sdim} 69303233Sdim 70303233Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 71303233Sdim_mm_mask_permutex2var_epi8 (__m128i __A, __mmask16 __U, __m128i __I, 72303233Sdim __m128i __B) 73303233Sdim{ 74303233Sdim return (__m128i) __builtin_ia32_vpermt2varqi128_mask ((__v16qi) __I 75303233Sdim /* idx */ , 76303233Sdim (__v16qi) __A, 77303233Sdim (__v16qi) __B, 78303233Sdim (__mmask16) 79303233Sdim __U); 80303233Sdim} 81303233Sdim 82303233Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 83303233Sdim_mm_maskz_permutex2var_epi8 (__mmask16 __U, __m128i __A, __m128i __I, 84303233Sdim __m128i __B) 85303233Sdim{ 86303233Sdim return (__m128i) __builtin_ia32_vpermt2varqi128_maskz ((__v16qi) __I 87303233Sdim /* idx */ , 88303233Sdim (__v16qi) __A, 89303233Sdim (__v16qi) __B, 90303233Sdim (__mmask16) 91303233Sdim __U); 92303233Sdim} 93303233Sdim 94303233Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 95303233Sdim_mm256_permutex2var_epi8 (__m256i __A, __m256i __I, __m256i __B) 96303233Sdim{ 97303233Sdim return (__m256i) __builtin_ia32_vpermt2varqi256_mask ((__v32qi) __I 98303233Sdim /* idx */ , 99303233Sdim (__v32qi) __A, 100303233Sdim (__v32qi) __B, 101303233Sdim (__mmask32) - 102303233Sdim 1); 103303233Sdim} 104303233Sdim 105303233Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 106303233Sdim_mm256_mask_permutex2var_epi8 (__m256i __A, __mmask32 __U, 107303233Sdim __m256i __I, __m256i __B) 108303233Sdim{ 109303233Sdim return (__m256i) __builtin_ia32_vpermt2varqi256_mask ((__v32qi) __I 110303233Sdim /* idx */ , 111303233Sdim (__v32qi) __A, 112303233Sdim (__v32qi) __B, 113303233Sdim (__mmask32) 114303233Sdim __U); 115303233Sdim} 116303233Sdim 117303233Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 118303233Sdim_mm256_maskz_permutex2var_epi8 (__mmask32 __U, __m256i __A, 119303233Sdim __m256i __I, __m256i __B) 120303233Sdim{ 121303233Sdim return (__m256i) __builtin_ia32_vpermt2varqi256_maskz ((__v32qi) __I 122303233Sdim /* idx */ , 123303233Sdim (__v32qi) __A, 124303233Sdim (__v32qi) __B, 125303233Sdim (__mmask32) 126303233Sdim __U); 127303233Sdim} 128303233Sdim 129303233Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 130303233Sdim_mm_permutexvar_epi8 (__m128i __A, __m128i __B) 131303233Sdim{ 132303233Sdim return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B, 133303233Sdim (__v16qi) __A, 134303233Sdim (__v16qi) _mm_undefined_si128 (), 135303233Sdim (__mmask16) -1); 136303233Sdim} 137303233Sdim 138303233Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 139303233Sdim_mm_maskz_permutexvar_epi8 (__mmask16 __M, __m128i __A, __m128i __B) 140303233Sdim{ 141303233Sdim return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B, 142303233Sdim (__v16qi) __A, 143303233Sdim (__v16qi) _mm_setzero_si128 (), 144303233Sdim (__mmask16) __M); 145303233Sdim} 146303233Sdim 147303233Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 148303233Sdim_mm_mask_permutexvar_epi8 (__m128i __W, __mmask16 __M, __m128i __A, 149303233Sdim __m128i __B) 150303233Sdim{ 151303233Sdim return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B, 152303233Sdim (__v16qi) __A, 153303233Sdim (__v16qi) __W, 154303233Sdim (__mmask16) __M); 155303233Sdim} 156303233Sdim 157303233Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 158303233Sdim_mm256_permutexvar_epi8 (__m256i __A, __m256i __B) 159303233Sdim{ 160303233Sdim return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B, 161303233Sdim (__v32qi) __A, 162303233Sdim (__v32qi) _mm256_undefined_si256 (), 163303233Sdim (__mmask32) -1); 164303233Sdim} 165303233Sdim 166303233Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 167303233Sdim_mm256_maskz_permutexvar_epi8 (__mmask32 __M, __m256i __A, 168303233Sdim __m256i __B) 169303233Sdim{ 170303233Sdim return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B, 171303233Sdim (__v32qi) __A, 172303233Sdim (__v32qi) _mm256_setzero_si256 (), 173303233Sdim (__mmask32) __M); 174303233Sdim} 175303233Sdim 176303233Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 177303233Sdim_mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A, 178303233Sdim __m256i __B) 179303233Sdim{ 180303233Sdim return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B, 181303233Sdim (__v32qi) __A, 182303233Sdim (__v32qi) __W, 183303233Sdim (__mmask32) __M); 184303233Sdim} 185303233Sdim 186303233Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 187303233Sdim_mm_mask_multishift_epi64_epi8 (__m128i __W, __mmask16 __M, __m128i __X, __m128i __Y) 188303233Sdim{ 189303233Sdim return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X, 190303233Sdim (__v16qi) __Y, 191303233Sdim (__v16qi) __W, 192303233Sdim (__mmask16) __M); 193303233Sdim} 194303233Sdim 195303233Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 196303233Sdim_mm_maskz_multishift_epi64_epi8 (__mmask16 __M, __m128i __X, __m128i __Y) 197303233Sdim{ 198303233Sdim return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X, 199303233Sdim (__v16qi) __Y, 200303233Sdim (__v16qi) 201303233Sdim _mm_setzero_si128 (), 202303233Sdim (__mmask16) __M); 203303233Sdim} 204303233Sdim 205303233Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 206303233Sdim_mm_multishift_epi64_epi8 (__m128i __X, __m128i __Y) 207303233Sdim{ 208303233Sdim return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X, 209303233Sdim (__v16qi) __Y, 210303233Sdim (__v16qi) 211303233Sdim _mm_undefined_si128 (), 212303233Sdim (__mmask16) -1); 213303233Sdim} 214303233Sdim 215303233Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 216303233Sdim_mm256_mask_multishift_epi64_epi8 (__m256i __W, __mmask32 __M, __m256i __X, __m256i __Y) 217303233Sdim{ 218303233Sdim return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X, 219303233Sdim (__v32qi) __Y, 220303233Sdim (__v32qi) __W, 221303233Sdim (__mmask32) __M); 222303233Sdim} 223303233Sdim 224303233Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 225303233Sdim_mm256_maskz_multishift_epi64_epi8 (__mmask32 __M, __m256i __X, __m256i __Y) 226303233Sdim{ 227303233Sdim return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X, 228303233Sdim (__v32qi) __Y, 229303233Sdim (__v32qi) 230303233Sdim _mm256_setzero_si256 (), 231303233Sdim (__mmask32) __M); 232303233Sdim} 233303233Sdim 234303233Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS 235303233Sdim_mm256_multishift_epi64_epi8 (__m256i __X, __m256i __Y) 236303233Sdim{ 237303233Sdim return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X, 238303233Sdim (__v32qi) __Y, 239303233Sdim (__v32qi) 240303233Sdim _mm256_undefined_si256 (), 241303233Sdim (__mmask32) -1); 242303233Sdim} 243303233Sdim 244303233Sdim 245303233Sdim#undef __DEFAULT_FN_ATTRS 246303233Sdim 247303233Sdim#endif 248