avx512vlcdintrin.h revision 341825
1/*===---- avx512vlcdintrin.h - AVX512VL and AVX512CD intrinsics ------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23#ifndef __IMMINTRIN_H 24#error "Never use <avx512vlcdintrin.h> directly; include <immintrin.h> instead." 25#endif 26 27#ifndef __AVX512VLCDINTRIN_H 28#define __AVX512VLCDINTRIN_H 29 30/* Define the default attributes for the functions in this file. */ 31#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd"), __min_vector_width__(128))) 32#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd"), __min_vector_width__(256))) 33 34 35static __inline__ __m128i __DEFAULT_FN_ATTRS128 36_mm_broadcastmb_epi64 (__mmask8 __A) 37{ 38 return (__m128i) _mm_set1_epi64x((long long) __A); 39} 40 41static __inline__ __m256i __DEFAULT_FN_ATTRS256 42_mm256_broadcastmb_epi64 (__mmask8 __A) 43{ 44 return (__m256i) _mm256_set1_epi64x((long long)__A); 45} 46 47static __inline__ __m128i __DEFAULT_FN_ATTRS128 48_mm_broadcastmw_epi32 (__mmask16 __A) 49{ 50 return (__m128i) _mm_set1_epi32((int)__A); 51} 52 53static __inline__ __m256i __DEFAULT_FN_ATTRS256 54_mm256_broadcastmw_epi32 (__mmask16 __A) 55{ 56 return (__m256i) _mm256_set1_epi32((int)__A); 57} 58 59 60static __inline__ __m128i __DEFAULT_FN_ATTRS128 61_mm_conflict_epi64 (__m128i __A) 62{ 63 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A, 64 (__v2di) _mm_undefined_si128 (), 65 (__mmask8) -1); 66} 67 68static __inline__ __m128i __DEFAULT_FN_ATTRS128 69_mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A) 70{ 71 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A, 72 (__v2di) __W, 73 (__mmask8) __U); 74} 75 76static __inline__ __m128i __DEFAULT_FN_ATTRS128 77_mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A) 78{ 79 return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A, 80 (__v2di) 81 _mm_setzero_si128 (), 82 (__mmask8) __U); 83} 84 85static __inline__ __m256i __DEFAULT_FN_ATTRS256 86_mm256_conflict_epi64 (__m256i __A) 87{ 88 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A, 89 (__v4di) _mm256_undefined_si256 (), 90 (__mmask8) -1); 91} 92 93static __inline__ __m256i __DEFAULT_FN_ATTRS256 94_mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A) 95{ 96 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A, 97 (__v4di) __W, 98 (__mmask8) __U); 99} 100 101static __inline__ __m256i __DEFAULT_FN_ATTRS256 102_mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A) 103{ 104 return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A, 105 (__v4di) _mm256_setzero_si256 (), 106 (__mmask8) __U); 107} 108 109static __inline__ __m128i __DEFAULT_FN_ATTRS128 110_mm_conflict_epi32 (__m128i __A) 111{ 112 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A, 113 (__v4si) _mm_undefined_si128 (), 114 (__mmask8) -1); 115} 116 117static __inline__ __m128i __DEFAULT_FN_ATTRS128 118_mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A) 119{ 120 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A, 121 (__v4si) __W, 122 (__mmask8) __U); 123} 124 125static __inline__ __m128i __DEFAULT_FN_ATTRS128 126_mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A) 127{ 128 return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A, 129 (__v4si) _mm_setzero_si128 (), 130 (__mmask8) __U); 131} 132 133static __inline__ __m256i __DEFAULT_FN_ATTRS256 134_mm256_conflict_epi32 (__m256i __A) 135{ 136 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A, 137 (__v8si) _mm256_undefined_si256 (), 138 (__mmask8) -1); 139} 140 141static __inline__ __m256i __DEFAULT_FN_ATTRS256 142_mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A) 143{ 144 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A, 145 (__v8si) __W, 146 (__mmask8) __U); 147} 148 149static __inline__ __m256i __DEFAULT_FN_ATTRS256 150_mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A) 151{ 152 return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A, 153 (__v8si) 154 _mm256_setzero_si256 (), 155 (__mmask8) __U); 156} 157 158static __inline__ __m128i __DEFAULT_FN_ATTRS128 159_mm_lzcnt_epi32 (__m128i __A) 160{ 161 return (__m128i) __builtin_ia32_vplzcntd_128 ((__v4si) __A); 162} 163 164static __inline__ __m128i __DEFAULT_FN_ATTRS128 165_mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A) 166{ 167 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 168 (__v4si)_mm_lzcnt_epi32(__A), 169 (__v4si)__W); 170} 171 172static __inline__ __m128i __DEFAULT_FN_ATTRS128 173_mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A) 174{ 175 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 176 (__v4si)_mm_lzcnt_epi32(__A), 177 (__v4si)_mm_setzero_si128()); 178} 179 180static __inline__ __m256i __DEFAULT_FN_ATTRS256 181_mm256_lzcnt_epi32 (__m256i __A) 182{ 183 return (__m256i) __builtin_ia32_vplzcntd_256 ((__v8si) __A); 184} 185 186static __inline__ __m256i __DEFAULT_FN_ATTRS256 187_mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A) 188{ 189 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 190 (__v8si)_mm256_lzcnt_epi32(__A), 191 (__v8si)__W); 192} 193 194static __inline__ __m256i __DEFAULT_FN_ATTRS256 195_mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A) 196{ 197 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 198 (__v8si)_mm256_lzcnt_epi32(__A), 199 (__v8si)_mm256_setzero_si256()); 200} 201 202static __inline__ __m128i __DEFAULT_FN_ATTRS128 203_mm_lzcnt_epi64 (__m128i __A) 204{ 205 return (__m128i) __builtin_ia32_vplzcntq_128 ((__v2di) __A); 206} 207 208static __inline__ __m128i __DEFAULT_FN_ATTRS128 209_mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A) 210{ 211 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 212 (__v2di)_mm_lzcnt_epi64(__A), 213 (__v2di)__W); 214} 215 216static __inline__ __m128i __DEFAULT_FN_ATTRS128 217_mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A) 218{ 219 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 220 (__v2di)_mm_lzcnt_epi64(__A), 221 (__v2di)_mm_setzero_si128()); 222} 223 224static __inline__ __m256i __DEFAULT_FN_ATTRS256 225_mm256_lzcnt_epi64 (__m256i __A) 226{ 227 return (__m256i) __builtin_ia32_vplzcntq_256 ((__v4di) __A); 228} 229 230static __inline__ __m256i __DEFAULT_FN_ATTRS256 231_mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A) 232{ 233 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 234 (__v4di)_mm256_lzcnt_epi64(__A), 235 (__v4di)__W); 236} 237 238static __inline__ __m256i __DEFAULT_FN_ATTRS256 239_mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A) 240{ 241 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 242 (__v4di)_mm256_lzcnt_epi64(__A), 243 (__v4di)_mm256_setzero_si256()); 244} 245 246#undef __DEFAULT_FN_ATTRS128 247#undef __DEFAULT_FN_ATTRS256 248 249#endif /* __AVX512VLCDINTRIN_H */ 250