1227753Stheraven/*===---- avx512vlcdintrin.h - AVX512VL and AVX512CD intrinsics ------------=== 2232498Stheraven * 3227753Stheraven * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4227753Stheraven * See https://llvm.org/LICENSE.txt for license information. 5227753Stheraven * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6227753Stheraven * 7227753Stheraven *===-----------------------------------------------------------------------=== 8227753Stheraven */ 9227753Stheraven#ifndef __IMMINTRIN_H 10227753Stheraven#error "Never use <avx512vlcdintrin.h> directly; include <immintrin.h> instead." 11232498Stheraven#endif 12232498Stheraven 13232498Stheraven#ifndef __AVX512VLCDINTRIN_H 14232498Stheraven#define __AVX512VLCDINTRIN_H 15232498Stheraven 16227753Stheraven/* Define the default attributes for the functions in this file. */ 17227753Stheraven#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd"), __min_vector_width__(128))) 18227753Stheraven#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd"), __min_vector_width__(256))) 19227753Stheraven 20232498Stheraven 21227753Stheravenstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 22227753Stheraven_mm_broadcastmb_epi64 (__mmask8 __A) 23227753Stheraven{ 24227753Stheraven return (__m128i) _mm_set1_epi64x((long long) __A); 25227753Stheraven} 26227753Stheraven 27227753Stheravenstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 28227753Stheraven_mm256_broadcastmb_epi64 (__mmask8 __A) 29227753Stheraven{ 30227753Stheraven return (__m256i) _mm256_set1_epi64x((long long)__A); 31227753Stheraven} 32227753Stheraven 33227753Stheravenstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 34227753Stheraven_mm_broadcastmw_epi32 (__mmask16 __A) 35227753Stheraven{ 36227753Stheraven return (__m128i) _mm_set1_epi32((int)__A); 37232498Stheraven} 38227753Stheraven 39227753Stheravenstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 40232498Stheraven_mm256_broadcastmw_epi32 (__mmask16 __A) 41227753Stheraven{ 42232498Stheraven return (__m256i) _mm256_set1_epi32((int)__A); 43227753Stheraven} 44232498Stheraven 45227753Stheraven 46232498Stheravenstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 47227753Stheraven_mm_conflict_epi64 (__m128i __A) 48232498Stheraven{ 49227753Stheraven return (__m128i) __builtin_ia32_vpconflictdi_128 ((__v2di) __A); 50227753Stheraven} 51227753Stheraven 52232498Stheravenstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 53232498Stheraven_mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A) 54231714Sdim{ 55227753Stheraven return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 56232498Stheraven (__v2di)_mm_conflict_epi64(__A), 57227753Stheraven (__v2di)__W); 58232498Stheraven} 59227753Stheraven 60232498Stheravenstatic __inline__ __m128i __DEFAULT_FN_ATTRS128 61227753Stheraven_mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A) 62232498Stheraven{ 63227753Stheraven return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 64232498Stheraven (__v2di)_mm_conflict_epi64(__A), 65227753Stheraven (__v2di)_mm_setzero_si128()); 66232498Stheraven} 67227753Stheraven 68232498Stheravenstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 69232498Stheraven_mm256_conflict_epi64 (__m256i __A) 70227753Stheraven{ 71227753Stheraven return (__m256i) __builtin_ia32_vpconflictdi_256 ((__v4di) __A); 72227753Stheraven} 73232498Stheraven 74232498Stheravenstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 75231714Sdim_mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A) 76232498Stheraven{ 77232498Stheraven return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 78231673Stheraven (__v4di)_mm256_conflict_epi64(__A), 79227753Stheraven (__v4di)__W); 80227753Stheraven} 81227753Stheraven 82227753Stheravenstatic __inline__ __m256i __DEFAULT_FN_ATTRS256 83227753Stheraven_mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A) 84227753Stheraven{ 85227753Stheraven return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 86 (__v4di)_mm256_conflict_epi64(__A), 87 (__v4di)_mm256_setzero_si256()); 88} 89 90static __inline__ __m128i __DEFAULT_FN_ATTRS128 91_mm_conflict_epi32 (__m128i __A) 92{ 93 return (__m128i) __builtin_ia32_vpconflictsi_128 ((__v4si) __A); 94} 95 96static __inline__ __m128i __DEFAULT_FN_ATTRS128 97_mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A) 98{ 99 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 100 (__v4si)_mm_conflict_epi32(__A), 101 (__v4si)__W); 102} 103 104static __inline__ __m128i __DEFAULT_FN_ATTRS128 105_mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A) 106{ 107 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 108 (__v4si)_mm_conflict_epi32(__A), 109 (__v4si)_mm_setzero_si128()); 110} 111 112static __inline__ __m256i __DEFAULT_FN_ATTRS256 113_mm256_conflict_epi32 (__m256i __A) 114{ 115 return (__m256i) __builtin_ia32_vpconflictsi_256 ((__v8si) __A); 116} 117 118static __inline__ __m256i __DEFAULT_FN_ATTRS256 119_mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A) 120{ 121 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 122 (__v8si)_mm256_conflict_epi32(__A), 123 (__v8si)__W); 124} 125 126static __inline__ __m256i __DEFAULT_FN_ATTRS256 127_mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A) 128{ 129 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 130 (__v8si)_mm256_conflict_epi32(__A), 131 (__v8si)_mm256_setzero_si256()); 132} 133 134static __inline__ __m128i __DEFAULT_FN_ATTRS128 135_mm_lzcnt_epi32 (__m128i __A) 136{ 137 return (__m128i) __builtin_ia32_vplzcntd_128 ((__v4si) __A); 138} 139 140static __inline__ __m128i __DEFAULT_FN_ATTRS128 141_mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A) 142{ 143 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 144 (__v4si)_mm_lzcnt_epi32(__A), 145 (__v4si)__W); 146} 147 148static __inline__ __m128i __DEFAULT_FN_ATTRS128 149_mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A) 150{ 151 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 152 (__v4si)_mm_lzcnt_epi32(__A), 153 (__v4si)_mm_setzero_si128()); 154} 155 156static __inline__ __m256i __DEFAULT_FN_ATTRS256 157_mm256_lzcnt_epi32 (__m256i __A) 158{ 159 return (__m256i) __builtin_ia32_vplzcntd_256 ((__v8si) __A); 160} 161 162static __inline__ __m256i __DEFAULT_FN_ATTRS256 163_mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A) 164{ 165 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 166 (__v8si)_mm256_lzcnt_epi32(__A), 167 (__v8si)__W); 168} 169 170static __inline__ __m256i __DEFAULT_FN_ATTRS256 171_mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A) 172{ 173 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 174 (__v8si)_mm256_lzcnt_epi32(__A), 175 (__v8si)_mm256_setzero_si256()); 176} 177 178static __inline__ __m128i __DEFAULT_FN_ATTRS128 179_mm_lzcnt_epi64 (__m128i __A) 180{ 181 return (__m128i) __builtin_ia32_vplzcntq_128 ((__v2di) __A); 182} 183 184static __inline__ __m128i __DEFAULT_FN_ATTRS128 185_mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A) 186{ 187 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 188 (__v2di)_mm_lzcnt_epi64(__A), 189 (__v2di)__W); 190} 191 192static __inline__ __m128i __DEFAULT_FN_ATTRS128 193_mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A) 194{ 195 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 196 (__v2di)_mm_lzcnt_epi64(__A), 197 (__v2di)_mm_setzero_si128()); 198} 199 200static __inline__ __m256i __DEFAULT_FN_ATTRS256 201_mm256_lzcnt_epi64 (__m256i __A) 202{ 203 return (__m256i) __builtin_ia32_vplzcntq_256 ((__v4di) __A); 204} 205 206static __inline__ __m256i __DEFAULT_FN_ATTRS256 207_mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A) 208{ 209 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 210 (__v4di)_mm256_lzcnt_epi64(__A), 211 (__v4di)__W); 212} 213 214static __inline__ __m256i __DEFAULT_FN_ATTRS256 215_mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A) 216{ 217 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 218 (__v4di)_mm256_lzcnt_epi64(__A), 219 (__v4di)_mm256_setzero_si256()); 220} 221 222#undef __DEFAULT_FN_ATTRS128 223#undef __DEFAULT_FN_ATTRS256 224 225#endif /* __AVX512VLCDINTRIN_H */ 226