1/*===------------- avx512vbmivlintrin.h - VBMI intrinsics ------------------=== 2 * 3 * 4 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5 * See https://llvm.org/LICENSE.txt for license information. 6 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7 * 8 *===-----------------------------------------------------------------------=== 9 */ 10#ifndef __IMMINTRIN_H 11#error "Never use <avx512vbmivlintrin.h> directly; include <immintrin.h> instead." 12#endif 13 14#ifndef __VBMIVLINTRIN_H 15#define __VBMIVLINTRIN_H 16 17/* Define the default attributes for the functions in this file. */ 18#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"), __min_vector_width__(128))) 19#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"), __min_vector_width__(256))) 20 21 22static __inline__ __m128i __DEFAULT_FN_ATTRS128 23_mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B) 24{ 25 return (__m128i)__builtin_ia32_vpermi2varqi128((__v16qi)__A, 26 (__v16qi)__I, 27 (__v16qi)__B); 28} 29 30static __inline__ __m128i __DEFAULT_FN_ATTRS128 31_mm_mask_permutex2var_epi8(__m128i __A, __mmask16 __U, __m128i __I, 32 __m128i __B) 33{ 34 return (__m128i)__builtin_ia32_selectb_128(__U, 35 (__v16qi)_mm_permutex2var_epi8(__A, __I, __B), 36 (__v16qi)__A); 37} 38 39static __inline__ __m128i __DEFAULT_FN_ATTRS128 40_mm_mask2_permutex2var_epi8(__m128i __A, __m128i __I, __mmask16 __U, 41 __m128i __B) 42{ 43 return (__m128i)__builtin_ia32_selectb_128(__U, 44 (__v16qi)_mm_permutex2var_epi8(__A, __I, __B), 45 (__v16qi)__I); 46} 47 48static __inline__ __m128i __DEFAULT_FN_ATTRS128 49_mm_maskz_permutex2var_epi8(__mmask16 __U, __m128i __A, __m128i __I, 50 __m128i __B) 51{ 52 return (__m128i)__builtin_ia32_selectb_128(__U, 53 (__v16qi)_mm_permutex2var_epi8(__A, __I, __B), 54 (__v16qi)_mm_setzero_si128()); 55} 56 57static __inline__ __m256i __DEFAULT_FN_ATTRS256 58_mm256_permutex2var_epi8(__m256i __A, __m256i __I, __m256i __B) 59{ 60 return (__m256i)__builtin_ia32_vpermi2varqi256((__v32qi)__A, (__v32qi)__I, 61 (__v32qi)__B); 62} 63 64static __inline__ __m256i __DEFAULT_FN_ATTRS256 65_mm256_mask_permutex2var_epi8(__m256i __A, __mmask32 __U, __m256i __I, 66 __m256i __B) 67{ 68 return (__m256i)__builtin_ia32_selectb_256(__U, 69 (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B), 70 (__v32qi)__A); 71} 72 73static __inline__ __m256i __DEFAULT_FN_ATTRS256 74_mm256_mask2_permutex2var_epi8(__m256i __A, __m256i __I, __mmask32 __U, 75 __m256i __B) 76{ 77 return (__m256i)__builtin_ia32_selectb_256(__U, 78 (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B), 79 (__v32qi)__I); 80} 81 82static __inline__ __m256i __DEFAULT_FN_ATTRS256 83_mm256_maskz_permutex2var_epi8(__mmask32 __U, __m256i __A, __m256i __I, 84 __m256i __B) 85{ 86 return (__m256i)__builtin_ia32_selectb_256(__U, 87 (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B), 88 (__v32qi)_mm256_setzero_si256()); 89} 90 91static __inline__ __m128i __DEFAULT_FN_ATTRS128 92_mm_permutexvar_epi8 (__m128i __A, __m128i __B) 93{ 94 return (__m128i)__builtin_ia32_permvarqi128((__v16qi)__B, (__v16qi)__A); 95} 96 97static __inline__ __m128i __DEFAULT_FN_ATTRS128 98_mm_maskz_permutexvar_epi8 (__mmask16 __M, __m128i __A, __m128i __B) 99{ 100 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 101 (__v16qi)_mm_permutexvar_epi8(__A, __B), 102 (__v16qi)_mm_setzero_si128()); 103} 104 105static __inline__ __m128i __DEFAULT_FN_ATTRS128 106_mm_mask_permutexvar_epi8 (__m128i __W, __mmask16 __M, __m128i __A, 107 __m128i __B) 108{ 109 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 110 (__v16qi)_mm_permutexvar_epi8(__A, __B), 111 (__v16qi)__W); 112} 113 114static __inline__ __m256i __DEFAULT_FN_ATTRS256 115_mm256_permutexvar_epi8 (__m256i __A, __m256i __B) 116{ 117 return (__m256i)__builtin_ia32_permvarqi256((__v32qi) __B, (__v32qi) __A); 118} 119 120static __inline__ __m256i __DEFAULT_FN_ATTRS256 121_mm256_maskz_permutexvar_epi8 (__mmask32 __M, __m256i __A, 122 __m256i __B) 123{ 124 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, 125 (__v32qi)_mm256_permutexvar_epi8(__A, __B), 126 (__v32qi)_mm256_setzero_si256()); 127} 128 129static __inline__ __m256i __DEFAULT_FN_ATTRS256 130_mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A, 131 __m256i __B) 132{ 133 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, 134 (__v32qi)_mm256_permutexvar_epi8(__A, __B), 135 (__v32qi)__W); 136} 137 138static __inline__ __m128i __DEFAULT_FN_ATTRS128 139_mm_multishift_epi64_epi8(__m128i __X, __m128i __Y) 140{ 141 return (__m128i)__builtin_ia32_vpmultishiftqb128((__v16qi)__X, (__v16qi)__Y); 142} 143 144static __inline__ __m128i __DEFAULT_FN_ATTRS128 145_mm_mask_multishift_epi64_epi8(__m128i __W, __mmask16 __M, __m128i __X, 146 __m128i __Y) 147{ 148 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 149 (__v16qi)_mm_multishift_epi64_epi8(__X, __Y), 150 (__v16qi)__W); 151} 152 153static __inline__ __m128i __DEFAULT_FN_ATTRS128 154_mm_maskz_multishift_epi64_epi8(__mmask16 __M, __m128i __X, __m128i __Y) 155{ 156 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 157 (__v16qi)_mm_multishift_epi64_epi8(__X, __Y), 158 (__v16qi)_mm_setzero_si128()); 159} 160 161static __inline__ __m256i __DEFAULT_FN_ATTRS256 162_mm256_multishift_epi64_epi8(__m256i __X, __m256i __Y) 163{ 164 return (__m256i)__builtin_ia32_vpmultishiftqb256((__v32qi)__X, (__v32qi)__Y); 165} 166 167static __inline__ __m256i __DEFAULT_FN_ATTRS256 168_mm256_mask_multishift_epi64_epi8(__m256i __W, __mmask32 __M, __m256i __X, 169 __m256i __Y) 170{ 171 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, 172 (__v32qi)_mm256_multishift_epi64_epi8(__X, __Y), 173 (__v32qi)__W); 174} 175 176static __inline__ __m256i __DEFAULT_FN_ATTRS256 177_mm256_maskz_multishift_epi64_epi8(__mmask32 __M, __m256i __X, __m256i __Y) 178{ 179 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, 180 (__v32qi)_mm256_multishift_epi64_epi8(__X, __Y), 181 (__v32qi)_mm256_setzero_si256()); 182} 183 184 185#undef __DEFAULT_FN_ATTRS128 186#undef __DEFAULT_FN_ATTRS256 187 188#endif 189