avx512vbmivlintrin.h revision 309124
1/*===------------- avx512vbmivlintrin.h - VBMI intrinsics ------------------=== 2 * 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a copy 5 * of this software and associated documentation files (the "Software"), to deal 6 * in the Software without restriction, including without limitation the rights 7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 * copies of the Software, and to permit persons to whom the Software is 9 * furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 * THE SOFTWARE. 21 * 22 *===-----------------------------------------------------------------------=== 23 */ 24#ifndef __IMMINTRIN_H 25#error "Never use <avx512vbmivlintrin.h> directly; include <immintrin.h> instead." 26#endif 27 28#ifndef __VBMIVLINTRIN_H 29#define __VBMIVLINTRIN_H 30 31/* Define the default attributes for the functions in this file. */ 32#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"))) 33 34 35static __inline__ __m128i __DEFAULT_FN_ATTRS 36_mm_mask2_permutex2var_epi8 (__m128i __A, __m128i __I, __mmask16 __U, 37 __m128i __B) 38{ 39 return (__m128i) __builtin_ia32_vpermi2varqi128_mask ((__v16qi) __A, 40 (__v16qi) __I 41 /* idx */ , 42 (__v16qi) __B, 43 (__mmask16) 44 __U); 45} 46 47static __inline__ __m256i __DEFAULT_FN_ATTRS 48_mm256_mask2_permutex2var_epi8 (__m256i __A, __m256i __I, 49 __mmask32 __U, __m256i __B) 50{ 51 return (__m256i) __builtin_ia32_vpermi2varqi256_mask ((__v32qi) __A, 52 (__v32qi) __I 53 /* idx */ , 54 (__v32qi) __B, 55 (__mmask32) 56 __U); 57} 58 59static __inline__ __m128i __DEFAULT_FN_ATTRS 60_mm_permutex2var_epi8 (__m128i __A, __m128i __I, __m128i __B) 61{ 62 return (__m128i) __builtin_ia32_vpermt2varqi128_mask ((__v16qi) __I 63 /* idx */ , 64 (__v16qi) __A, 65 (__v16qi) __B, 66 (__mmask16) - 67 1); 68} 69 70static __inline__ __m128i __DEFAULT_FN_ATTRS 71_mm_mask_permutex2var_epi8 (__m128i __A, __mmask16 __U, __m128i __I, 72 __m128i __B) 73{ 74 return (__m128i) __builtin_ia32_vpermt2varqi128_mask ((__v16qi) __I 75 /* idx */ , 76 (__v16qi) __A, 77 (__v16qi) __B, 78 (__mmask16) 79 __U); 80} 81 82static __inline__ __m128i __DEFAULT_FN_ATTRS 83_mm_maskz_permutex2var_epi8 (__mmask16 __U, __m128i __A, __m128i __I, 84 __m128i __B) 85{ 86 return (__m128i) __builtin_ia32_vpermt2varqi128_maskz ((__v16qi) __I 87 /* idx */ , 88 (__v16qi) __A, 89 (__v16qi) __B, 90 (__mmask16) 91 __U); 92} 93 94static __inline__ __m256i __DEFAULT_FN_ATTRS 95_mm256_permutex2var_epi8 (__m256i __A, __m256i __I, __m256i __B) 96{ 97 return (__m256i) __builtin_ia32_vpermt2varqi256_mask ((__v32qi) __I 98 /* idx */ , 99 (__v32qi) __A, 100 (__v32qi) __B, 101 (__mmask32) - 102 1); 103} 104 105static __inline__ __m256i __DEFAULT_FN_ATTRS 106_mm256_mask_permutex2var_epi8 (__m256i __A, __mmask32 __U, 107 __m256i __I, __m256i __B) 108{ 109 return (__m256i) __builtin_ia32_vpermt2varqi256_mask ((__v32qi) __I 110 /* idx */ , 111 (__v32qi) __A, 112 (__v32qi) __B, 113 (__mmask32) 114 __U); 115} 116 117static __inline__ __m256i __DEFAULT_FN_ATTRS 118_mm256_maskz_permutex2var_epi8 (__mmask32 __U, __m256i __A, 119 __m256i __I, __m256i __B) 120{ 121 return (__m256i) __builtin_ia32_vpermt2varqi256_maskz ((__v32qi) __I 122 /* idx */ , 123 (__v32qi) __A, 124 (__v32qi) __B, 125 (__mmask32) 126 __U); 127} 128 129static __inline__ __m128i __DEFAULT_FN_ATTRS 130_mm_permutexvar_epi8 (__m128i __A, __m128i __B) 131{ 132 return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B, 133 (__v16qi) __A, 134 (__v16qi) _mm_undefined_si128 (), 135 (__mmask16) -1); 136} 137 138static __inline__ __m128i __DEFAULT_FN_ATTRS 139_mm_maskz_permutexvar_epi8 (__mmask16 __M, __m128i __A, __m128i __B) 140{ 141 return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B, 142 (__v16qi) __A, 143 (__v16qi) _mm_setzero_si128 (), 144 (__mmask16) __M); 145} 146 147static __inline__ __m128i __DEFAULT_FN_ATTRS 148_mm_mask_permutexvar_epi8 (__m128i __W, __mmask16 __M, __m128i __A, 149 __m128i __B) 150{ 151 return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B, 152 (__v16qi) __A, 153 (__v16qi) __W, 154 (__mmask16) __M); 155} 156 157static __inline__ __m256i __DEFAULT_FN_ATTRS 158_mm256_permutexvar_epi8 (__m256i __A, __m256i __B) 159{ 160 return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B, 161 (__v32qi) __A, 162 (__v32qi) _mm256_undefined_si256 (), 163 (__mmask32) -1); 164} 165 166static __inline__ __m256i __DEFAULT_FN_ATTRS 167_mm256_maskz_permutexvar_epi8 (__mmask32 __M, __m256i __A, 168 __m256i __B) 169{ 170 return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B, 171 (__v32qi) __A, 172 (__v32qi) _mm256_setzero_si256 (), 173 (__mmask32) __M); 174} 175 176static __inline__ __m256i __DEFAULT_FN_ATTRS 177_mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A, 178 __m256i __B) 179{ 180 return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B, 181 (__v32qi) __A, 182 (__v32qi) __W, 183 (__mmask32) __M); 184} 185 186static __inline__ __m128i __DEFAULT_FN_ATTRS 187_mm_mask_multishift_epi64_epi8 (__m128i __W, __mmask16 __M, __m128i __X, __m128i __Y) 188{ 189 return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X, 190 (__v16qi) __Y, 191 (__v16qi) __W, 192 (__mmask16) __M); 193} 194 195static __inline__ __m128i __DEFAULT_FN_ATTRS 196_mm_maskz_multishift_epi64_epi8 (__mmask16 __M, __m128i __X, __m128i __Y) 197{ 198 return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X, 199 (__v16qi) __Y, 200 (__v16qi) 201 _mm_setzero_si128 (), 202 (__mmask16) __M); 203} 204 205static __inline__ __m128i __DEFAULT_FN_ATTRS 206_mm_multishift_epi64_epi8 (__m128i __X, __m128i __Y) 207{ 208 return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X, 209 (__v16qi) __Y, 210 (__v16qi) 211 _mm_undefined_si128 (), 212 (__mmask16) -1); 213} 214 215static __inline__ __m256i __DEFAULT_FN_ATTRS 216_mm256_mask_multishift_epi64_epi8 (__m256i __W, __mmask32 __M, __m256i __X, __m256i __Y) 217{ 218 return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X, 219 (__v32qi) __Y, 220 (__v32qi) __W, 221 (__mmask32) __M); 222} 223 224static __inline__ __m256i __DEFAULT_FN_ATTRS 225_mm256_maskz_multishift_epi64_epi8 (__mmask32 __M, __m256i __X, __m256i __Y) 226{ 227 return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X, 228 (__v32qi) __Y, 229 (__v32qi) 230 _mm256_setzero_si256 (), 231 (__mmask32) __M); 232} 233 234static __inline__ __m256i __DEFAULT_FN_ATTRS 235_mm256_multishift_epi64_epi8 (__m256i __X, __m256i __Y) 236{ 237 return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X, 238 (__v32qi) __Y, 239 (__v32qi) 240 _mm256_undefined_si256 (), 241 (__mmask32) -1); 242} 243 244 245#undef __DEFAULT_FN_ATTRS 246 247#endif 248