1303233Sdim/*===------------- avx512vbmivlintrin.h - VBMI intrinsics ------------------=== 2303233Sdim * 3303233Sdim * 4353358Sdim * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5353358Sdim * See https://llvm.org/LICENSE.txt for license information. 6353358Sdim * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7303233Sdim * 8303233Sdim *===-----------------------------------------------------------------------=== 9303233Sdim */ 10303233Sdim#ifndef __IMMINTRIN_H 11303233Sdim#error "Never use <avx512vbmivlintrin.h> directly; include <immintrin.h> instead." 12303233Sdim#endif 13303233Sdim 14303233Sdim#ifndef __VBMIVLINTRIN_H 15303233Sdim#define __VBMIVLINTRIN_H 16303233Sdim 17303233Sdim/* Define the default attributes for the functions in this file. */ 18341825Sdim#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"), __min_vector_width__(128))) 19341825Sdim#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"), __min_vector_width__(256))) 20303233Sdim 21303233Sdim 22341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 23341825Sdim_mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B) 24303233Sdim{ 25341825Sdim return (__m128i)__builtin_ia32_vpermi2varqi128((__v16qi)__A, 26341825Sdim (__v16qi)__I, 27341825Sdim (__v16qi)__B); 28303233Sdim} 29303233Sdim 30341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 31341825Sdim_mm_mask_permutex2var_epi8(__m128i __A, __mmask16 __U, __m128i __I, 32341825Sdim __m128i __B) 33303233Sdim{ 34341825Sdim return (__m128i)__builtin_ia32_selectb_128(__U, 35341825Sdim (__v16qi)_mm_permutex2var_epi8(__A, __I, __B), 36341825Sdim (__v16qi)__A); 37303233Sdim} 38303233Sdim 39341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 40341825Sdim_mm_mask2_permutex2var_epi8(__m128i __A, __m128i __I, __mmask16 __U, 41341825Sdim __m128i __B) 42303233Sdim{ 43341825Sdim return (__m128i)__builtin_ia32_selectb_128(__U, 44341825Sdim (__v16qi)_mm_permutex2var_epi8(__A, __I, __B), 45341825Sdim (__v16qi)__I); 46303233Sdim} 47303233Sdim 48341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 49341825Sdim_mm_maskz_permutex2var_epi8(__mmask16 __U, __m128i __A, __m128i __I, 50341825Sdim __m128i __B) 51303233Sdim{ 52341825Sdim return (__m128i)__builtin_ia32_selectb_128(__U, 53341825Sdim (__v16qi)_mm_permutex2var_epi8(__A, __I, __B), 54341825Sdim (__v16qi)_mm_setzero_si128()); 55303233Sdim} 56303233Sdim 57341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 58341825Sdim_mm256_permutex2var_epi8(__m256i __A, __m256i __I, __m256i __B) 59303233Sdim{ 60341825Sdim return (__m256i)__builtin_ia32_vpermi2varqi256((__v32qi)__A, (__v32qi)__I, 61341825Sdim (__v32qi)__B); 62303233Sdim} 63303233Sdim 64341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 65341825Sdim_mm256_mask_permutex2var_epi8(__m256i __A, __mmask32 __U, __m256i __I, 66341825Sdim __m256i __B) 67303233Sdim{ 68341825Sdim return (__m256i)__builtin_ia32_selectb_256(__U, 69341825Sdim (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B), 70341825Sdim (__v32qi)__A); 71303233Sdim} 72303233Sdim 73341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 74341825Sdim_mm256_mask2_permutex2var_epi8(__m256i __A, __m256i __I, __mmask32 __U, 75341825Sdim __m256i __B) 76303233Sdim{ 77341825Sdim return (__m256i)__builtin_ia32_selectb_256(__U, 78341825Sdim (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B), 79341825Sdim (__v32qi)__I); 80303233Sdim} 81303233Sdim 82341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 83341825Sdim_mm256_maskz_permutex2var_epi8(__mmask32 __U, __m256i __A, __m256i __I, 84341825Sdim __m256i __B) 85303233Sdim{ 86341825Sdim return (__m256i)__builtin_ia32_selectb_256(__U, 87341825Sdim (__v32qi)_mm256_permutex2var_epi8(__A, __I, __B), 88341825Sdim (__v32qi)_mm256_setzero_si256()); 89303233Sdim} 90303233Sdim 91341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 92303233Sdim_mm_permutexvar_epi8 (__m128i __A, __m128i __B) 93303233Sdim{ 94341825Sdim return (__m128i)__builtin_ia32_permvarqi128((__v16qi)__B, (__v16qi)__A); 95303233Sdim} 96303233Sdim 97341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 98303233Sdim_mm_maskz_permutexvar_epi8 (__mmask16 __M, __m128i __A, __m128i __B) 99303233Sdim{ 100341825Sdim return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 101341825Sdim (__v16qi)_mm_permutexvar_epi8(__A, __B), 102341825Sdim (__v16qi)_mm_setzero_si128()); 103303233Sdim} 104303233Sdim 105341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 106303233Sdim_mm_mask_permutexvar_epi8 (__m128i __W, __mmask16 __M, __m128i __A, 107303233Sdim __m128i __B) 108303233Sdim{ 109341825Sdim return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 110341825Sdim (__v16qi)_mm_permutexvar_epi8(__A, __B), 111341825Sdim (__v16qi)__W); 112303233Sdim} 113303233Sdim 114341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 115303233Sdim_mm256_permutexvar_epi8 (__m256i __A, __m256i __B) 116303233Sdim{ 117341825Sdim return (__m256i)__builtin_ia32_permvarqi256((__v32qi) __B, (__v32qi) __A); 118303233Sdim} 119303233Sdim 120341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 121303233Sdim_mm256_maskz_permutexvar_epi8 (__mmask32 __M, __m256i __A, 122303233Sdim __m256i __B) 123303233Sdim{ 124341825Sdim return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, 125341825Sdim (__v32qi)_mm256_permutexvar_epi8(__A, __B), 126341825Sdim (__v32qi)_mm256_setzero_si256()); 127303233Sdim} 128303233Sdim 129341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 130303233Sdim_mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A, 131303233Sdim __m256i __B) 132303233Sdim{ 133341825Sdim return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, 134341825Sdim (__v32qi)_mm256_permutexvar_epi8(__A, __B), 135341825Sdim (__v32qi)__W); 136303233Sdim} 137303233Sdim 138341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 139344779Sdim_mm_multishift_epi64_epi8(__m128i __X, __m128i __Y) 140303233Sdim{ 141344779Sdim return (__m128i)__builtin_ia32_vpmultishiftqb128((__v16qi)__X, (__v16qi)__Y); 142303233Sdim} 143303233Sdim 144341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 145344779Sdim_mm_mask_multishift_epi64_epi8(__m128i __W, __mmask16 __M, __m128i __X, 146344779Sdim __m128i __Y) 147303233Sdim{ 148344779Sdim return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 149344779Sdim (__v16qi)_mm_multishift_epi64_epi8(__X, __Y), 150344779Sdim (__v16qi)__W); 151303233Sdim} 152303233Sdim 153341825Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 154344779Sdim_mm_maskz_multishift_epi64_epi8(__mmask16 __M, __m128i __X, __m128i __Y) 155303233Sdim{ 156344779Sdim return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 157344779Sdim (__v16qi)_mm_multishift_epi64_epi8(__X, __Y), 158344779Sdim (__v16qi)_mm_setzero_si128()); 159303233Sdim} 160303233Sdim 161341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 162344779Sdim_mm256_multishift_epi64_epi8(__m256i __X, __m256i __Y) 163303233Sdim{ 164344779Sdim return (__m256i)__builtin_ia32_vpmultishiftqb256((__v32qi)__X, (__v32qi)__Y); 165303233Sdim} 166303233Sdim 167341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 168344779Sdim_mm256_mask_multishift_epi64_epi8(__m256i __W, __mmask32 __M, __m256i __X, 169344779Sdim __m256i __Y) 170303233Sdim{ 171344779Sdim return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, 172344779Sdim (__v32qi)_mm256_multishift_epi64_epi8(__X, __Y), 173344779Sdim (__v32qi)__W); 174303233Sdim} 175303233Sdim 176341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 177344779Sdim_mm256_maskz_multishift_epi64_epi8(__mmask32 __M, __m256i __X, __m256i __Y) 178303233Sdim{ 179344779Sdim return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, 180344779Sdim (__v32qi)_mm256_multishift_epi64_epi8(__X, __Y), 181344779Sdim (__v32qi)_mm256_setzero_si256()); 182303233Sdim} 183303233Sdim 184303233Sdim 185341825Sdim#undef __DEFAULT_FN_ATTRS128 186341825Sdim#undef __DEFAULT_FN_ATTRS256 187303233Sdim 188303233Sdim#endif 189