avx512vbmivlintrin.h revision 344779
1139749Simp/*===------------- avx512vbmivlintrin.h - VBMI intrinsics ------------------=== 2113584Ssimokawa * 3103285Sikob * 4103285Sikob * Permission is hereby granted, free of charge, to any person obtaining a copy 5103285Sikob * of this software and associated documentation files (the "Software"), to deal 6103285Sikob * in the Software without restriction, including without limitation the rights 7103285Sikob * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8103285Sikob * copies of the Software, and to permit persons to whom the Software is 9103285Sikob * furnished to do so, subject to the following conditions: 10103285Sikob * 11103285Sikob * The above copyright notice and this permission notice shall be included in 12103285Sikob * all copies or substantial portions of the Software. 13103285Sikob * 14103285Sikob * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15103285Sikob * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16103285Sikob * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17103285Sikob * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18103285Sikob * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19103285Sikob * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20103285Sikob * THE SOFTWARE. 21103285Sikob * 22103285Sikob *===-----------------------------------------------------------------------=== 23103285Sikob */ 24103285Sikob#ifndef __IMMINTRIN_H 25103285Sikob#error "Never use <avx512vbmivlintrin.h> directly; include <immintrin.h> instead." 26103285Sikob#endif 27103285Sikob 28103285Sikob#ifndef __VBMIVLINTRIN_H 29103285Sikob#define __VBMIVLINTRIN_H 30103285Sikob 31103285Sikob/* Define the default attributes for the functions in this file. */ 32103285Sikob#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"), __min_vector_width__(128))) 33103285Sikob#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"), __min_vector_width__(256))) 34103285Sikob 35227843Smarius 36227843Smariusstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 37227843Smarius_mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B) 38103285Sikob{ 39103285Sikob return (__m128i)__builtin_ia32_vpermi2varqi128((__v16qi)__A, 40103285Sikob (__v16qi)__I, 41103285Sikob (__v16qi)__B); 42193066Sjamie} 43103285Sikob 44129879Sphkstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 45103285Sikob_mm_mask_permutex2var_epi8(__m128i __A, __mmask16 __U, __m128i __I, 46103285Sikob __m128i __B) 47103285Sikob{ 48169806Ssimokawa return (__m128i)__builtin_ia32_selectb_128(__U, 49103285Sikob (__v16qi)_mm_permutex2var_epi8(__A, __I, __B), 50170374Ssimokawa (__v16qi)__A); 51170374Ssimokawa} 52127468Ssimokawa 53117067Ssimokawastatic __inline__ __m128i __DEFAULT_FN_ATTRS128 54117067Ssimokawa_mm_mask2_permutex2var_epi8(__m128i __A, __m128i __I, __mmask16 __U, 55103285Sikob __m128i __B) 56103285Sikob{ 57113584Ssimokawa return (__m128i)__builtin_ia32_selectb_128(__U, 58103285Sikob (__v16qi)_mm_permutex2var_epi8(__A, __I, __B), 59127468Ssimokawa (__v16qi)__I); 60127468Ssimokawa} 61127468Ssimokawa 62127468Ssimokawastatic __inline__ __m128i __DEFAULT_FN_ATTRS128 63127468Ssimokawa_mm_maskz_permutex2var_epi8(__mmask16 __U, __m128i __A, __m128i __I, 64127468Ssimokawa __m128i __B) 65127468Ssimokawa{ 66103285Sikob return (__m128i)__builtin_ia32_selectb_128(__U, 67103285Sikob (__v16qi)_mm_permutex2var_epi8(__A, __I, __B), 68110072Ssimokawa (__v16qi)_mm_setzero_si128()); 69103285Sikob} 70103285Sikob 71127468Ssimokawastatic __inline__ __m256i __DEFAULT_FN_ATTRS256 72103285Sikob_mm256_permutex2var_epi8(__m256i __A, __m256i __I, __m256i __B) 73116376Ssimokawa{ 74116376Ssimokawa return (__m256i)__builtin_ia32_vpermi2varqi256((__v32qi)__A, (__v32qi)__I, 75116376Ssimokawa (__v32qi)__B); 76116376Ssimokawa} 77116376Ssimokawa 78116376Ssimokawastatic __inline__ __m256i __DEFAULT_FN_ATTRS256 79116376Ssimokawa_mm256_mask_permutex2var_epi8(__m256i __A, __mmask32 __U, __m256i __I, 80188704Ssbruno __m256i __B) 81103285Sikob{ 82108281Ssimokawa return (__m256i)__builtin_ia32_selectb_256(__U, 83109736Ssimokawa (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B), 84109736Ssimokawa (__v32qi)__A); 85109736Ssimokawa} 86120850Ssimokawa 87120850Ssimokawastatic __inline__ __m256i __DEFAULT_FN_ATTRS256 88103285Sikob_mm256_mask2_permutex2var_epi8(__m256i __A, __m256i __I, __mmask32 __U, 89110195Ssimokawa __m256i __B) 90110269Ssimokawa{ 91110195Ssimokawa return (__m256i)__builtin_ia32_selectb_256(__U, 92103285Sikob (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B), 93103285Sikob (__v32qi)__I); 94103285Sikob} 95103285Sikob 96125238Ssimokawastatic __inline__ __m256i __DEFAULT_FN_ATTRS256 97125238Ssimokawa_mm256_maskz_permutex2var_epi8(__mmask32 __U, __m256i __A, __m256i __I, 98124169Ssimokawa __m256i __B) 99124169Ssimokawa{ 100124169Ssimokawa return (__m256i)__builtin_ia32_selectb_256(__U, 101170374Ssimokawa (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B), 102103285Sikob (__v32qi)_mm256_setzero_si256()); 103124169Ssimokawa} 104103285Sikob 105212413Savgstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 106124169Ssimokawa_mm_permutexvar_epi8 (__m128i __A, __m128i __B) 107124169Ssimokawa{ 108124169Ssimokawa return (__m128i)__builtin_ia32_permvarqi128((__v16qi)__B, (__v16qi)__A); 109124169Ssimokawa} 110124169Ssimokawa 111124169Ssimokawastatic __inline__ __m128i __DEFAULT_FN_ATTRS128 112169806Ssimokawa_mm_maskz_permutexvar_epi8 (__mmask16 __M, __m128i __A, __m128i __B) 113106543Ssimokawa{ 114124169Ssimokawa return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 115106543Ssimokawa (__v16qi)_mm_permutexvar_epi8(__A, __B), 116124169Ssimokawa (__v16qi)_mm_setzero_si128()); 117170374Ssimokawa} 118103285Sikob 119103285Sikobstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 120103285Sikob_mm_mask_permutexvar_epi8 (__m128i __W, __mmask16 __M, __m128i __A, 121125238Ssimokawa __m128i __B) 122125238Ssimokawa{ 123103285Sikob return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 124103285Sikob (__v16qi)_mm_permutexvar_epi8(__A, __B), 125108642Ssimokawa (__v16qi)__W); 126116978Ssimokawa} 127103285Sikob 128103285Sikobstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 129103285Sikob_mm256_permutexvar_epi8 (__m256i __A, __m256i __B) 130103285Sikob{ 131103285Sikob return (__m256i)__builtin_ia32_permvarqi256((__v32qi) __B, (__v32qi) __A); 132227843Smarius} 133103285Sikob 134124251Ssimokawastatic __inline__ __m256i __DEFAULT_FN_ATTRS256 135124251Ssimokawa_mm256_maskz_permutexvar_epi8 (__mmask32 __M, __m256i __A, 136124251Ssimokawa __m256i __B) 137124251Ssimokawa{ 138103285Sikob return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, 139124251Ssimokawa (__v32qi)_mm256_permutexvar_epi8(__A, __B), 140124251Ssimokawa (__v32qi)_mm256_setzero_si256()); 141124251Ssimokawa} 142124251Ssimokawa 143124251Ssimokawastatic __inline__ __m256i __DEFAULT_FN_ATTRS256 144124251Ssimokawa_mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A, 145124251Ssimokawa __m256i __B) 146114909Ssimokawa{ 147114909Ssimokawa return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, 148114909Ssimokawa (__v32qi)_mm256_permutexvar_epi8(__A, __B), 149114909Ssimokawa (__v32qi)__W); 150106813Ssimokawa} 151103285Sikob 152103285Sikobstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 153103285Sikob_mm_multishift_epi64_epi8(__m128i __X, __m128i __Y) 154103285Sikob{ 155103285Sikob return (__m128i)__builtin_ia32_vpmultishiftqb128((__v16qi)__X, (__v16qi)__Y); 156103285Sikob} 157103285Sikob 158110072Ssimokawastatic __inline__ __m128i __DEFAULT_FN_ATTRS128 159103285Sikob_mm_mask_multishift_epi64_epi8(__m128i __W, __mmask16 __M, __m128i __X, 160106810Ssimokawa __m128i __Y) 161110072Ssimokawa{ 162103285Sikob return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 163103285Sikob (__v16qi)_mm_multishift_epi64_epi8(__X, __Y), 164110072Ssimokawa (__v16qi)__W); 165110072Ssimokawa} 166110072Ssimokawa 167110193Ssimokawastatic __inline__ __m128i __DEFAULT_FN_ATTRS128 168120660Ssimokawa_mm_maskz_multishift_epi64_epi8(__mmask16 __M, __m128i __X, __m128i __Y) 169103285Sikob{ 170110072Ssimokawa return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 171110072Ssimokawa (__v16qi)_mm_multishift_epi64_epi8(__X, __Y), 172106810Ssimokawa (__v16qi)_mm_setzero_si128()); 173103285Sikob} 174106813Ssimokawa 175103285Sikobstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 176110072Ssimokawa_mm256_multishift_epi64_epi8(__m256i __X, __m256i __Y) 177110072Ssimokawa{ 178110072Ssimokawa return (__m256i)__builtin_ia32_vpmultishiftqb256((__v32qi)__X, (__v32qi)__Y); 179110582Ssimokawa} 180110072Ssimokawa 181110072Ssimokawastatic __inline__ __m256i __DEFAULT_FN_ATTRS256 182110072Ssimokawa_mm256_mask_multishift_epi64_epi8(__m256i __W, __mmask32 __M, __m256i __X, 183110072Ssimokawa __m256i __Y) 184110072Ssimokawa{ 185170374Ssimokawa return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, 186110193Ssimokawa (__v32qi)_mm256_multishift_epi64_epi8(__X, __Y), 187110582Ssimokawa (__v32qi)__W); 188110072Ssimokawa} 189170374Ssimokawa 190110072Ssimokawastatic __inline__ __m256i __DEFAULT_FN_ATTRS256 191110072Ssimokawa_mm256_maskz_multishift_epi64_epi8(__mmask32 __M, __m256i __X, __m256i __Y) 192110072Ssimokawa{ 193110072Ssimokawa return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, 194110072Ssimokawa (__v32qi)_mm256_multishift_epi64_epi8(__X, __Y), 195110072Ssimokawa (__v32qi)_mm256_setzero_si256()); 196110072Ssimokawa} 197110072Ssimokawa 198103285Sikob 199103285Sikob#undef __DEFAULT_FN_ATTRS128 200103285Sikob#undef __DEFAULT_FN_ATTRS256 201103285Sikob 202103285Sikob#endif 203103285Sikob