avx512bwintrin.h revision 344779
1/*===------------- avx512bwintrin.h - AVX512BW intrinsics ------------------=== 2 * 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a copy 5 * of this software and associated documentation files (the "Software"), to deal 6 * in the Software without restriction, including without limitation the rights 7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 * copies of the Software, and to permit persons to whom the Software is 9 * furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 * THE SOFTWARE. 21 * 22 *===-----------------------------------------------------------------------=== 23 */ 24#ifndef __IMMINTRIN_H 25#error "Never use <avx512bwintrin.h> directly; include <immintrin.h> instead." 26#endif 27 28#ifndef __AVX512BWINTRIN_H 29#define __AVX512BWINTRIN_H 30 31typedef unsigned int __mmask32; 32typedef unsigned long long __mmask64; 33 34/* Define the default attributes for the functions in this file. */ 35#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"), __min_vector_width__(512))) 36#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"))) 37 38static __inline __mmask32 __DEFAULT_FN_ATTRS 39_knot_mask32(__mmask32 __M) 40{ 41 return __builtin_ia32_knotsi(__M); 42} 43 44static __inline __mmask64 __DEFAULT_FN_ATTRS 45_knot_mask64(__mmask64 __M) 46{ 47 return __builtin_ia32_knotdi(__M); 48} 49 50static __inline__ __mmask32 __DEFAULT_FN_ATTRS 51_kand_mask32(__mmask32 __A, __mmask32 __B) 52{ 53 return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B); 54} 55 56static __inline__ __mmask64 __DEFAULT_FN_ATTRS 57_kand_mask64(__mmask64 __A, __mmask64 __B) 58{ 59 return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B); 60} 61 62static __inline__ __mmask32 __DEFAULT_FN_ATTRS 63_kandn_mask32(__mmask32 __A, __mmask32 __B) 64{ 65 return (__mmask32)__builtin_ia32_kandnsi((__mmask32)__A, (__mmask32)__B); 66} 67 68static __inline__ __mmask64 __DEFAULT_FN_ATTRS 69_kandn_mask64(__mmask64 __A, __mmask64 __B) 70{ 71 return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B); 72} 73 74static __inline__ __mmask32 __DEFAULT_FN_ATTRS 75_kor_mask32(__mmask32 __A, __mmask32 __B) 76{ 77 return (__mmask32)__builtin_ia32_korsi((__mmask32)__A, (__mmask32)__B); 78} 79 80static __inline__ __mmask64 __DEFAULT_FN_ATTRS 81_kor_mask64(__mmask64 __A, __mmask64 __B) 82{ 83 return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B); 84} 85 86static __inline__ __mmask32 __DEFAULT_FN_ATTRS 87_kxnor_mask32(__mmask32 __A, __mmask32 __B) 88{ 89 return (__mmask32)__builtin_ia32_kxnorsi((__mmask32)__A, (__mmask32)__B); 90} 91 92static __inline__ __mmask64 __DEFAULT_FN_ATTRS 93_kxnor_mask64(__mmask64 __A, __mmask64 __B) 94{ 95 return (__mmask64)__builtin_ia32_kxnordi((__mmask64)__A, (__mmask64)__B); 96} 97 98static __inline__ __mmask32 __DEFAULT_FN_ATTRS 99_kxor_mask32(__mmask32 __A, __mmask32 __B) 100{ 101 return (__mmask32)__builtin_ia32_kxorsi((__mmask32)__A, (__mmask32)__B); 102} 103 104static __inline__ __mmask64 __DEFAULT_FN_ATTRS 105_kxor_mask64(__mmask64 __A, __mmask64 __B) 106{ 107 return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B); 108} 109 110static __inline__ unsigned char __DEFAULT_FN_ATTRS 111_kortestc_mask32_u8(__mmask32 __A, __mmask32 __B) 112{ 113 return (unsigned char)__builtin_ia32_kortestcsi(__A, __B); 114} 115 116static __inline__ unsigned char __DEFAULT_FN_ATTRS 117_kortestz_mask32_u8(__mmask32 __A, __mmask32 __B) 118{ 119 return (unsigned char)__builtin_ia32_kortestzsi(__A, __B); 120} 121 122static __inline__ unsigned char __DEFAULT_FN_ATTRS 123_kortest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) { 124 *__C = (unsigned char)__builtin_ia32_kortestcsi(__A, __B); 125 return (unsigned char)__builtin_ia32_kortestzsi(__A, __B); 126} 127 128static __inline__ unsigned char __DEFAULT_FN_ATTRS 129_kortestc_mask64_u8(__mmask64 __A, __mmask64 __B) 130{ 131 return (unsigned char)__builtin_ia32_kortestcdi(__A, __B); 132} 133 134static __inline__ unsigned char __DEFAULT_FN_ATTRS 135_kortestz_mask64_u8(__mmask64 __A, __mmask64 __B) 136{ 137 return (unsigned char)__builtin_ia32_kortestzdi(__A, __B); 138} 139 140static __inline__ unsigned char __DEFAULT_FN_ATTRS 141_kortest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) { 142 *__C = (unsigned char)__builtin_ia32_kortestcdi(__A, __B); 143 return (unsigned char)__builtin_ia32_kortestzdi(__A, __B); 144} 145 146static __inline__ unsigned char __DEFAULT_FN_ATTRS 147_ktestc_mask32_u8(__mmask32 __A, __mmask32 __B) 148{ 149 return (unsigned char)__builtin_ia32_ktestcsi(__A, __B); 150} 151 152static __inline__ unsigned char __DEFAULT_FN_ATTRS 153_ktestz_mask32_u8(__mmask32 __A, __mmask32 __B) 154{ 155 return (unsigned char)__builtin_ia32_ktestzsi(__A, __B); 156} 157 158static __inline__ unsigned char __DEFAULT_FN_ATTRS 159_ktest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) { 160 *__C = (unsigned char)__builtin_ia32_ktestcsi(__A, __B); 161 return (unsigned char)__builtin_ia32_ktestzsi(__A, __B); 162} 163 164static __inline__ unsigned char __DEFAULT_FN_ATTRS 165_ktestc_mask64_u8(__mmask64 __A, __mmask64 __B) 166{ 167 return (unsigned char)__builtin_ia32_ktestcdi(__A, __B); 168} 169 170static __inline__ unsigned char __DEFAULT_FN_ATTRS 171_ktestz_mask64_u8(__mmask64 __A, __mmask64 __B) 172{ 173 return (unsigned char)__builtin_ia32_ktestzdi(__A, __B); 174} 175 176static __inline__ unsigned char __DEFAULT_FN_ATTRS 177_ktest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) { 178 *__C = (unsigned char)__builtin_ia32_ktestcdi(__A, __B); 179 return (unsigned char)__builtin_ia32_ktestzdi(__A, __B); 180} 181 182static __inline__ __mmask32 __DEFAULT_FN_ATTRS 183_kadd_mask32(__mmask32 __A, __mmask32 __B) 184{ 185 return (__mmask32)__builtin_ia32_kaddsi((__mmask32)__A, (__mmask32)__B); 186} 187 188static __inline__ __mmask64 __DEFAULT_FN_ATTRS 189_kadd_mask64(__mmask64 __A, __mmask64 __B) 190{ 191 return (__mmask64)__builtin_ia32_kadddi((__mmask64)__A, (__mmask64)__B); 192} 193 194#define _kshiftli_mask32(A, I) \ 195 (__mmask32)__builtin_ia32_kshiftlisi((__mmask32)(A), (unsigned int)(I)) 196 197#define _kshiftri_mask32(A, I) \ 198 (__mmask32)__builtin_ia32_kshiftrisi((__mmask32)(A), (unsigned int)(I)) 199 200#define _kshiftli_mask64(A, I) \ 201 (__mmask64)__builtin_ia32_kshiftlidi((__mmask64)(A), (unsigned int)(I)) 202 203#define _kshiftri_mask64(A, I) \ 204 (__mmask64)__builtin_ia32_kshiftridi((__mmask64)(A), (unsigned int)(I)) 205 206static __inline__ unsigned int __DEFAULT_FN_ATTRS 207_cvtmask32_u32(__mmask32 __A) { 208 return (unsigned int)__builtin_ia32_kmovd((__mmask32)__A); 209} 210 211static __inline__ unsigned long long __DEFAULT_FN_ATTRS 212_cvtmask64_u64(__mmask64 __A) { 213 return (unsigned long long)__builtin_ia32_kmovq((__mmask64)__A); 214} 215 216static __inline__ __mmask32 __DEFAULT_FN_ATTRS 217_cvtu32_mask32(unsigned int __A) { 218 return (__mmask32)__builtin_ia32_kmovd((__mmask32)__A); 219} 220 221static __inline__ __mmask64 __DEFAULT_FN_ATTRS 222_cvtu64_mask64(unsigned long long __A) { 223 return (__mmask64)__builtin_ia32_kmovq((__mmask64)__A); 224} 225 226static __inline__ __mmask32 __DEFAULT_FN_ATTRS 227_load_mask32(__mmask32 *__A) { 228 return (__mmask32)__builtin_ia32_kmovd(*(__mmask32 *)__A); 229} 230 231static __inline__ __mmask64 __DEFAULT_FN_ATTRS 232_load_mask64(__mmask64 *__A) { 233 return (__mmask64)__builtin_ia32_kmovq(*(__mmask64 *)__A); 234} 235 236static __inline__ void __DEFAULT_FN_ATTRS 237_store_mask32(__mmask32 *__A, __mmask32 __B) { 238 *(__mmask32 *)__A = __builtin_ia32_kmovd((__mmask32)__B); 239} 240 241static __inline__ void __DEFAULT_FN_ATTRS 242_store_mask64(__mmask64 *__A, __mmask64 __B) { 243 *(__mmask64 *)__A = __builtin_ia32_kmovq((__mmask64)__B); 244} 245 246/* Integer compare */ 247 248#define _mm512_cmp_epi8_mask(a, b, p) \ 249 (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ 250 (__v64qi)(__m512i)(b), (int)(p), \ 251 (__mmask64)-1) 252 253#define _mm512_mask_cmp_epi8_mask(m, a, b, p) \ 254 (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ 255 (__v64qi)(__m512i)(b), (int)(p), \ 256 (__mmask64)(m)) 257 258#define _mm512_cmp_epu8_mask(a, b, p) \ 259 (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ 260 (__v64qi)(__m512i)(b), (int)(p), \ 261 (__mmask64)-1) 262 263#define _mm512_mask_cmp_epu8_mask(m, a, b, p) \ 264 (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ 265 (__v64qi)(__m512i)(b), (int)(p), \ 266 (__mmask64)(m)) 267 268#define _mm512_cmp_epi16_mask(a, b, p) \ 269 (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ 270 (__v32hi)(__m512i)(b), (int)(p), \ 271 (__mmask32)-1) 272 273#define _mm512_mask_cmp_epi16_mask(m, a, b, p) \ 274 (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ 275 (__v32hi)(__m512i)(b), (int)(p), \ 276 (__mmask32)(m)) 277 278#define _mm512_cmp_epu16_mask(a, b, p) \ 279 (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ 280 (__v32hi)(__m512i)(b), (int)(p), \ 281 (__mmask32)-1) 282 283#define _mm512_mask_cmp_epu16_mask(m, a, b, p) \ 284 (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ 285 (__v32hi)(__m512i)(b), (int)(p), \ 286 (__mmask32)(m)) 287 288#define _mm512_cmpeq_epi8_mask(A, B) \ 289 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ) 290#define _mm512_mask_cmpeq_epi8_mask(k, A, B) \ 291 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ) 292#define _mm512_cmpge_epi8_mask(A, B) \ 293 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GE) 294#define _mm512_mask_cmpge_epi8_mask(k, A, B) \ 295 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE) 296#define _mm512_cmpgt_epi8_mask(A, B) \ 297 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GT) 298#define _mm512_mask_cmpgt_epi8_mask(k, A, B) \ 299 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT) 300#define _mm512_cmple_epi8_mask(A, B) \ 301 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LE) 302#define _mm512_mask_cmple_epi8_mask(k, A, B) \ 303 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE) 304#define _mm512_cmplt_epi8_mask(A, B) \ 305 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LT) 306#define _mm512_mask_cmplt_epi8_mask(k, A, B) \ 307 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT) 308#define _mm512_cmpneq_epi8_mask(A, B) \ 309 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_NE) 310#define _mm512_mask_cmpneq_epi8_mask(k, A, B) \ 311 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE) 312 313#define _mm512_cmpeq_epu8_mask(A, B) \ 314 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ) 315#define _mm512_mask_cmpeq_epu8_mask(k, A, B) \ 316 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ) 317#define _mm512_cmpge_epu8_mask(A, B) \ 318 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GE) 319#define _mm512_mask_cmpge_epu8_mask(k, A, B) \ 320 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE) 321#define _mm512_cmpgt_epu8_mask(A, B) \ 322 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GT) 323#define _mm512_mask_cmpgt_epu8_mask(k, A, B) \ 324 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT) 325#define _mm512_cmple_epu8_mask(A, B) \ 326 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LE) 327#define _mm512_mask_cmple_epu8_mask(k, A, B) \ 328 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE) 329#define _mm512_cmplt_epu8_mask(A, B) \ 330 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LT) 331#define _mm512_mask_cmplt_epu8_mask(k, A, B) \ 332 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT) 333#define _mm512_cmpneq_epu8_mask(A, B) \ 334 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_NE) 335#define _mm512_mask_cmpneq_epu8_mask(k, A, B) \ 336 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE) 337 338#define _mm512_cmpeq_epi16_mask(A, B) \ 339 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ) 340#define _mm512_mask_cmpeq_epi16_mask(k, A, B) \ 341 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ) 342#define _mm512_cmpge_epi16_mask(A, B) \ 343 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GE) 344#define _mm512_mask_cmpge_epi16_mask(k, A, B) \ 345 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE) 346#define _mm512_cmpgt_epi16_mask(A, B) \ 347 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GT) 348#define _mm512_mask_cmpgt_epi16_mask(k, A, B) \ 349 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT) 350#define _mm512_cmple_epi16_mask(A, B) \ 351 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LE) 352#define _mm512_mask_cmple_epi16_mask(k, A, B) \ 353 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE) 354#define _mm512_cmplt_epi16_mask(A, B) \ 355 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LT) 356#define _mm512_mask_cmplt_epi16_mask(k, A, B) \ 357 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT) 358#define _mm512_cmpneq_epi16_mask(A, B) \ 359 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_NE) 360#define _mm512_mask_cmpneq_epi16_mask(k, A, B) \ 361 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE) 362 363#define _mm512_cmpeq_epu16_mask(A, B) \ 364 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ) 365#define _mm512_mask_cmpeq_epu16_mask(k, A, B) \ 366 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ) 367#define _mm512_cmpge_epu16_mask(A, B) \ 368 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GE) 369#define _mm512_mask_cmpge_epu16_mask(k, A, B) \ 370 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE) 371#define _mm512_cmpgt_epu16_mask(A, B) \ 372 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GT) 373#define _mm512_mask_cmpgt_epu16_mask(k, A, B) \ 374 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT) 375#define _mm512_cmple_epu16_mask(A, B) \ 376 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LE) 377#define _mm512_mask_cmple_epu16_mask(k, A, B) \ 378 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE) 379#define _mm512_cmplt_epu16_mask(A, B) \ 380 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LT) 381#define _mm512_mask_cmplt_epu16_mask(k, A, B) \ 382 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT) 383#define _mm512_cmpneq_epu16_mask(A, B) \ 384 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_NE) 385#define _mm512_mask_cmpneq_epu16_mask(k, A, B) \ 386 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE) 387 388static __inline__ __m512i __DEFAULT_FN_ATTRS512 389_mm512_add_epi8 (__m512i __A, __m512i __B) { 390 return (__m512i) ((__v64qu) __A + (__v64qu) __B); 391} 392 393static __inline__ __m512i __DEFAULT_FN_ATTRS512 394_mm512_mask_add_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { 395 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 396 (__v64qi)_mm512_add_epi8(__A, __B), 397 (__v64qi)__W); 398} 399 400static __inline__ __m512i __DEFAULT_FN_ATTRS512 401_mm512_maskz_add_epi8(__mmask64 __U, __m512i __A, __m512i __B) { 402 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 403 (__v64qi)_mm512_add_epi8(__A, __B), 404 (__v64qi)_mm512_setzero_si512()); 405} 406 407static __inline__ __m512i __DEFAULT_FN_ATTRS512 408_mm512_sub_epi8 (__m512i __A, __m512i __B) { 409 return (__m512i) ((__v64qu) __A - (__v64qu) __B); 410} 411 412static __inline__ __m512i __DEFAULT_FN_ATTRS512 413_mm512_mask_sub_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { 414 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 415 (__v64qi)_mm512_sub_epi8(__A, __B), 416 (__v64qi)__W); 417} 418 419static __inline__ __m512i __DEFAULT_FN_ATTRS512 420_mm512_maskz_sub_epi8(__mmask64 __U, __m512i __A, __m512i __B) { 421 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 422 (__v64qi)_mm512_sub_epi8(__A, __B), 423 (__v64qi)_mm512_setzero_si512()); 424} 425 426static __inline__ __m512i __DEFAULT_FN_ATTRS512 427_mm512_add_epi16 (__m512i __A, __m512i __B) { 428 return (__m512i) ((__v32hu) __A + (__v32hu) __B); 429} 430 431static __inline__ __m512i __DEFAULT_FN_ATTRS512 432_mm512_mask_add_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 433 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 434 (__v32hi)_mm512_add_epi16(__A, __B), 435 (__v32hi)__W); 436} 437 438static __inline__ __m512i __DEFAULT_FN_ATTRS512 439_mm512_maskz_add_epi16(__mmask32 __U, __m512i __A, __m512i __B) { 440 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 441 (__v32hi)_mm512_add_epi16(__A, __B), 442 (__v32hi)_mm512_setzero_si512()); 443} 444 445static __inline__ __m512i __DEFAULT_FN_ATTRS512 446_mm512_sub_epi16 (__m512i __A, __m512i __B) { 447 return (__m512i) ((__v32hu) __A - (__v32hu) __B); 448} 449 450static __inline__ __m512i __DEFAULT_FN_ATTRS512 451_mm512_mask_sub_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 452 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 453 (__v32hi)_mm512_sub_epi16(__A, __B), 454 (__v32hi)__W); 455} 456 457static __inline__ __m512i __DEFAULT_FN_ATTRS512 458_mm512_maskz_sub_epi16(__mmask32 __U, __m512i __A, __m512i __B) { 459 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 460 (__v32hi)_mm512_sub_epi16(__A, __B), 461 (__v32hi)_mm512_setzero_si512()); 462} 463 464static __inline__ __m512i __DEFAULT_FN_ATTRS512 465_mm512_mullo_epi16 (__m512i __A, __m512i __B) { 466 return (__m512i) ((__v32hu) __A * (__v32hu) __B); 467} 468 469static __inline__ __m512i __DEFAULT_FN_ATTRS512 470_mm512_mask_mullo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 471 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 472 (__v32hi)_mm512_mullo_epi16(__A, __B), 473 (__v32hi)__W); 474} 475 476static __inline__ __m512i __DEFAULT_FN_ATTRS512 477_mm512_maskz_mullo_epi16(__mmask32 __U, __m512i __A, __m512i __B) { 478 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 479 (__v32hi)_mm512_mullo_epi16(__A, __B), 480 (__v32hi)_mm512_setzero_si512()); 481} 482 483static __inline__ __m512i __DEFAULT_FN_ATTRS512 484_mm512_mask_blend_epi8 (__mmask64 __U, __m512i __A, __m512i __W) 485{ 486 return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, 487 (__v64qi) __W, 488 (__v64qi) __A); 489} 490 491static __inline__ __m512i __DEFAULT_FN_ATTRS512 492_mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W) 493{ 494 return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, 495 (__v32hi) __W, 496 (__v32hi) __A); 497} 498 499static __inline__ __m512i __DEFAULT_FN_ATTRS512 500_mm512_abs_epi8 (__m512i __A) 501{ 502 return (__m512i)__builtin_ia32_pabsb512((__v64qi)__A); 503} 504 505static __inline__ __m512i __DEFAULT_FN_ATTRS512 506_mm512_mask_abs_epi8 (__m512i __W, __mmask64 __U, __m512i __A) 507{ 508 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 509 (__v64qi)_mm512_abs_epi8(__A), 510 (__v64qi)__W); 511} 512 513static __inline__ __m512i __DEFAULT_FN_ATTRS512 514_mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A) 515{ 516 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 517 (__v64qi)_mm512_abs_epi8(__A), 518 (__v64qi)_mm512_setzero_si512()); 519} 520 521static __inline__ __m512i __DEFAULT_FN_ATTRS512 522_mm512_abs_epi16 (__m512i __A) 523{ 524 return (__m512i)__builtin_ia32_pabsw512((__v32hi)__A); 525} 526 527static __inline__ __m512i __DEFAULT_FN_ATTRS512 528_mm512_mask_abs_epi16 (__m512i __W, __mmask32 __U, __m512i __A) 529{ 530 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 531 (__v32hi)_mm512_abs_epi16(__A), 532 (__v32hi)__W); 533} 534 535static __inline__ __m512i __DEFAULT_FN_ATTRS512 536_mm512_maskz_abs_epi16 (__mmask32 __U, __m512i __A) 537{ 538 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 539 (__v32hi)_mm512_abs_epi16(__A), 540 (__v32hi)_mm512_setzero_si512()); 541} 542 543static __inline__ __m512i __DEFAULT_FN_ATTRS512 544_mm512_packs_epi32(__m512i __A, __m512i __B) 545{ 546 return (__m512i)__builtin_ia32_packssdw512((__v16si)__A, (__v16si)__B); 547} 548 549static __inline__ __m512i __DEFAULT_FN_ATTRS512 550_mm512_maskz_packs_epi32(__mmask32 __M, __m512i __A, __m512i __B) 551{ 552 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 553 (__v32hi)_mm512_packs_epi32(__A, __B), 554 (__v32hi)_mm512_setzero_si512()); 555} 556 557static __inline__ __m512i __DEFAULT_FN_ATTRS512 558_mm512_mask_packs_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) 559{ 560 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 561 (__v32hi)_mm512_packs_epi32(__A, __B), 562 (__v32hi)__W); 563} 564 565static __inline__ __m512i __DEFAULT_FN_ATTRS512 566_mm512_packs_epi16(__m512i __A, __m512i __B) 567{ 568 return (__m512i)__builtin_ia32_packsswb512((__v32hi)__A, (__v32hi) __B); 569} 570 571static __inline__ __m512i __DEFAULT_FN_ATTRS512 572_mm512_mask_packs_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 573{ 574 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 575 (__v64qi)_mm512_packs_epi16(__A, __B), 576 (__v64qi)__W); 577} 578 579static __inline__ __m512i __DEFAULT_FN_ATTRS512 580_mm512_maskz_packs_epi16(__mmask64 __M, __m512i __A, __m512i __B) 581{ 582 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 583 (__v64qi)_mm512_packs_epi16(__A, __B), 584 (__v64qi)_mm512_setzero_si512()); 585} 586 587static __inline__ __m512i __DEFAULT_FN_ATTRS512 588_mm512_packus_epi32(__m512i __A, __m512i __B) 589{ 590 return (__m512i)__builtin_ia32_packusdw512((__v16si) __A, (__v16si) __B); 591} 592 593static __inline__ __m512i __DEFAULT_FN_ATTRS512 594_mm512_maskz_packus_epi32(__mmask32 __M, __m512i __A, __m512i __B) 595{ 596 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 597 (__v32hi)_mm512_packus_epi32(__A, __B), 598 (__v32hi)_mm512_setzero_si512()); 599} 600 601static __inline__ __m512i __DEFAULT_FN_ATTRS512 602_mm512_mask_packus_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) 603{ 604 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 605 (__v32hi)_mm512_packus_epi32(__A, __B), 606 (__v32hi)__W); 607} 608 609static __inline__ __m512i __DEFAULT_FN_ATTRS512 610_mm512_packus_epi16(__m512i __A, __m512i __B) 611{ 612 return (__m512i)__builtin_ia32_packuswb512((__v32hi) __A, (__v32hi) __B); 613} 614 615static __inline__ __m512i __DEFAULT_FN_ATTRS512 616_mm512_mask_packus_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 617{ 618 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 619 (__v64qi)_mm512_packus_epi16(__A, __B), 620 (__v64qi)__W); 621} 622 623static __inline__ __m512i __DEFAULT_FN_ATTRS512 624_mm512_maskz_packus_epi16(__mmask64 __M, __m512i __A, __m512i __B) 625{ 626 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 627 (__v64qi)_mm512_packus_epi16(__A, __B), 628 (__v64qi)_mm512_setzero_si512()); 629} 630 631static __inline__ __m512i __DEFAULT_FN_ATTRS512 632_mm512_adds_epi8 (__m512i __A, __m512i __B) 633{ 634 return (__m512i)__builtin_ia32_paddsb512((__v64qi)__A, (__v64qi)__B); 635} 636 637static __inline__ __m512i __DEFAULT_FN_ATTRS512 638_mm512_mask_adds_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) 639{ 640 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 641 (__v64qi)_mm512_adds_epi8(__A, __B), 642 (__v64qi)__W); 643} 644 645static __inline__ __m512i __DEFAULT_FN_ATTRS512 646_mm512_maskz_adds_epi8 (__mmask64 __U, __m512i __A, __m512i __B) 647{ 648 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 649 (__v64qi)_mm512_adds_epi8(__A, __B), 650 (__v64qi)_mm512_setzero_si512()); 651} 652 653static __inline__ __m512i __DEFAULT_FN_ATTRS512 654_mm512_adds_epi16 (__m512i __A, __m512i __B) 655{ 656 return (__m512i)__builtin_ia32_paddsw512((__v32hi)__A, (__v32hi)__B); 657} 658 659static __inline__ __m512i __DEFAULT_FN_ATTRS512 660_mm512_mask_adds_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 661{ 662 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 663 (__v32hi)_mm512_adds_epi16(__A, __B), 664 (__v32hi)__W); 665} 666 667static __inline__ __m512i __DEFAULT_FN_ATTRS512 668_mm512_maskz_adds_epi16 (__mmask32 __U, __m512i __A, __m512i __B) 669{ 670 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 671 (__v32hi)_mm512_adds_epi16(__A, __B), 672 (__v32hi)_mm512_setzero_si512()); 673} 674 675static __inline__ __m512i __DEFAULT_FN_ATTRS512 676_mm512_adds_epu8 (__m512i __A, __m512i __B) 677{ 678 return (__m512i)__builtin_ia32_paddusb512((__v64qi) __A, (__v64qi) __B); 679} 680 681static __inline__ __m512i __DEFAULT_FN_ATTRS512 682_mm512_mask_adds_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) 683{ 684 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 685 (__v64qi)_mm512_adds_epu8(__A, __B), 686 (__v64qi)__W); 687} 688 689static __inline__ __m512i __DEFAULT_FN_ATTRS512 690_mm512_maskz_adds_epu8 (__mmask64 __U, __m512i __A, __m512i __B) 691{ 692 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 693 (__v64qi)_mm512_adds_epu8(__A, __B), 694 (__v64qi)_mm512_setzero_si512()); 695} 696 697static __inline__ __m512i __DEFAULT_FN_ATTRS512 698_mm512_adds_epu16 (__m512i __A, __m512i __B) 699{ 700 return (__m512i)__builtin_ia32_paddusw512((__v32hi) __A, (__v32hi) __B); 701} 702 703static __inline__ __m512i __DEFAULT_FN_ATTRS512 704_mm512_mask_adds_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 705{ 706 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 707 (__v32hi)_mm512_adds_epu16(__A, __B), 708 (__v32hi)__W); 709} 710 711static __inline__ __m512i __DEFAULT_FN_ATTRS512 712_mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B) 713{ 714 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 715 (__v32hi)_mm512_adds_epu16(__A, __B), 716 (__v32hi)_mm512_setzero_si512()); 717} 718 719static __inline__ __m512i __DEFAULT_FN_ATTRS512 720_mm512_avg_epu8 (__m512i __A, __m512i __B) 721{ 722 typedef unsigned short __v64hu __attribute__((__vector_size__(128))); 723 return (__m512i)__builtin_convertvector( 724 ((__builtin_convertvector((__v64qu) __A, __v64hu) + 725 __builtin_convertvector((__v64qu) __B, __v64hu)) + 1) 726 >> 1, __v64qu); 727} 728 729static __inline__ __m512i __DEFAULT_FN_ATTRS512 730_mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A, 731 __m512i __B) 732{ 733 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 734 (__v64qi)_mm512_avg_epu8(__A, __B), 735 (__v64qi)__W); 736} 737 738static __inline__ __m512i __DEFAULT_FN_ATTRS512 739_mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B) 740{ 741 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 742 (__v64qi)_mm512_avg_epu8(__A, __B), 743 (__v64qi)_mm512_setzero_si512()); 744} 745 746static __inline__ __m512i __DEFAULT_FN_ATTRS512 747_mm512_avg_epu16 (__m512i __A, __m512i __B) 748{ 749 typedef unsigned int __v32su __attribute__((__vector_size__(128))); 750 return (__m512i)__builtin_convertvector( 751 ((__builtin_convertvector((__v32hu) __A, __v32su) + 752 __builtin_convertvector((__v32hu) __B, __v32su)) + 1) 753 >> 1, __v32hu); 754} 755 756static __inline__ __m512i __DEFAULT_FN_ATTRS512 757_mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A, 758 __m512i __B) 759{ 760 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 761 (__v32hi)_mm512_avg_epu16(__A, __B), 762 (__v32hi)__W); 763} 764 765static __inline__ __m512i __DEFAULT_FN_ATTRS512 766_mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B) 767{ 768 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 769 (__v32hi)_mm512_avg_epu16(__A, __B), 770 (__v32hi) _mm512_setzero_si512()); 771} 772 773static __inline__ __m512i __DEFAULT_FN_ATTRS512 774_mm512_max_epi8 (__m512i __A, __m512i __B) 775{ 776 return (__m512i)__builtin_ia32_pmaxsb512((__v64qi) __A, (__v64qi) __B); 777} 778 779static __inline__ __m512i __DEFAULT_FN_ATTRS512 780_mm512_maskz_max_epi8 (__mmask64 __M, __m512i __A, __m512i __B) 781{ 782 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 783 (__v64qi)_mm512_max_epi8(__A, __B), 784 (__v64qi)_mm512_setzero_si512()); 785} 786 787static __inline__ __m512i __DEFAULT_FN_ATTRS512 788_mm512_mask_max_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 789{ 790 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 791 (__v64qi)_mm512_max_epi8(__A, __B), 792 (__v64qi)__W); 793} 794 795static __inline__ __m512i __DEFAULT_FN_ATTRS512 796_mm512_max_epi16 (__m512i __A, __m512i __B) 797{ 798 return (__m512i)__builtin_ia32_pmaxsw512((__v32hi) __A, (__v32hi) __B); 799} 800 801static __inline__ __m512i __DEFAULT_FN_ATTRS512 802_mm512_maskz_max_epi16 (__mmask32 __M, __m512i __A, __m512i __B) 803{ 804 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 805 (__v32hi)_mm512_max_epi16(__A, __B), 806 (__v32hi)_mm512_setzero_si512()); 807} 808 809static __inline__ __m512i __DEFAULT_FN_ATTRS512 810_mm512_mask_max_epi16 (__m512i __W, __mmask32 __M, __m512i __A, 811 __m512i __B) 812{ 813 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 814 (__v32hi)_mm512_max_epi16(__A, __B), 815 (__v32hi)__W); 816} 817 818static __inline__ __m512i __DEFAULT_FN_ATTRS512 819_mm512_max_epu8 (__m512i __A, __m512i __B) 820{ 821 return (__m512i)__builtin_ia32_pmaxub512((__v64qi)__A, (__v64qi)__B); 822} 823 824static __inline__ __m512i __DEFAULT_FN_ATTRS512 825_mm512_maskz_max_epu8 (__mmask64 __M, __m512i __A, __m512i __B) 826{ 827 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 828 (__v64qi)_mm512_max_epu8(__A, __B), 829 (__v64qi)_mm512_setzero_si512()); 830} 831 832static __inline__ __m512i __DEFAULT_FN_ATTRS512 833_mm512_mask_max_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 834{ 835 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 836 (__v64qi)_mm512_max_epu8(__A, __B), 837 (__v64qi)__W); 838} 839 840static __inline__ __m512i __DEFAULT_FN_ATTRS512 841_mm512_max_epu16 (__m512i __A, __m512i __B) 842{ 843 return (__m512i)__builtin_ia32_pmaxuw512((__v32hi)__A, (__v32hi)__B); 844} 845 846static __inline__ __m512i __DEFAULT_FN_ATTRS512 847_mm512_maskz_max_epu16 (__mmask32 __M, __m512i __A, __m512i __B) 848{ 849 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 850 (__v32hi)_mm512_max_epu16(__A, __B), 851 (__v32hi)_mm512_setzero_si512()); 852} 853 854static __inline__ __m512i __DEFAULT_FN_ATTRS512 855_mm512_mask_max_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) 856{ 857 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 858 (__v32hi)_mm512_max_epu16(__A, __B), 859 (__v32hi)__W); 860} 861 862static __inline__ __m512i __DEFAULT_FN_ATTRS512 863_mm512_min_epi8 (__m512i __A, __m512i __B) 864{ 865 return (__m512i)__builtin_ia32_pminsb512((__v64qi) __A, (__v64qi) __B); 866} 867 868static __inline__ __m512i __DEFAULT_FN_ATTRS512 869_mm512_maskz_min_epi8 (__mmask64 __M, __m512i __A, __m512i __B) 870{ 871 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 872 (__v64qi)_mm512_min_epi8(__A, __B), 873 (__v64qi)_mm512_setzero_si512()); 874} 875 876static __inline__ __m512i __DEFAULT_FN_ATTRS512 877_mm512_mask_min_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 878{ 879 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 880 (__v64qi)_mm512_min_epi8(__A, __B), 881 (__v64qi)__W); 882} 883 884static __inline__ __m512i __DEFAULT_FN_ATTRS512 885_mm512_min_epi16 (__m512i __A, __m512i __B) 886{ 887 return (__m512i)__builtin_ia32_pminsw512((__v32hi) __A, (__v32hi) __B); 888} 889 890static __inline__ __m512i __DEFAULT_FN_ATTRS512 891_mm512_maskz_min_epi16 (__mmask32 __M, __m512i __A, __m512i __B) 892{ 893 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 894 (__v32hi)_mm512_min_epi16(__A, __B), 895 (__v32hi)_mm512_setzero_si512()); 896} 897 898static __inline__ __m512i __DEFAULT_FN_ATTRS512 899_mm512_mask_min_epi16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) 900{ 901 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 902 (__v32hi)_mm512_min_epi16(__A, __B), 903 (__v32hi)__W); 904} 905 906static __inline__ __m512i __DEFAULT_FN_ATTRS512 907_mm512_min_epu8 (__m512i __A, __m512i __B) 908{ 909 return (__m512i)__builtin_ia32_pminub512((__v64qi)__A, (__v64qi)__B); 910} 911 912static __inline__ __m512i __DEFAULT_FN_ATTRS512 913_mm512_maskz_min_epu8 (__mmask64 __M, __m512i __A, __m512i __B) 914{ 915 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 916 (__v64qi)_mm512_min_epu8(__A, __B), 917 (__v64qi)_mm512_setzero_si512()); 918} 919 920static __inline__ __m512i __DEFAULT_FN_ATTRS512 921_mm512_mask_min_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 922{ 923 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 924 (__v64qi)_mm512_min_epu8(__A, __B), 925 (__v64qi)__W); 926} 927 928static __inline__ __m512i __DEFAULT_FN_ATTRS512 929_mm512_min_epu16 (__m512i __A, __m512i __B) 930{ 931 return (__m512i)__builtin_ia32_pminuw512((__v32hi)__A, (__v32hi)__B); 932} 933 934static __inline__ __m512i __DEFAULT_FN_ATTRS512 935_mm512_maskz_min_epu16 (__mmask32 __M, __m512i __A, __m512i __B) 936{ 937 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 938 (__v32hi)_mm512_min_epu16(__A, __B), 939 (__v32hi)_mm512_setzero_si512()); 940} 941 942static __inline__ __m512i __DEFAULT_FN_ATTRS512 943_mm512_mask_min_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) 944{ 945 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 946 (__v32hi)_mm512_min_epu16(__A, __B), 947 (__v32hi)__W); 948} 949 950static __inline__ __m512i __DEFAULT_FN_ATTRS512 951_mm512_shuffle_epi8(__m512i __A, __m512i __B) 952{ 953 return (__m512i)__builtin_ia32_pshufb512((__v64qi)__A,(__v64qi)__B); 954} 955 956static __inline__ __m512i __DEFAULT_FN_ATTRS512 957_mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) 958{ 959 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 960 (__v64qi)_mm512_shuffle_epi8(__A, __B), 961 (__v64qi)__W); 962} 963 964static __inline__ __m512i __DEFAULT_FN_ATTRS512 965_mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B) 966{ 967 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 968 (__v64qi)_mm512_shuffle_epi8(__A, __B), 969 (__v64qi)_mm512_setzero_si512()); 970} 971 972static __inline__ __m512i __DEFAULT_FN_ATTRS512 973_mm512_subs_epi8 (__m512i __A, __m512i __B) 974{ 975 return (__m512i)__builtin_ia32_psubsb512((__v64qi)__A, (__v64qi)__B); 976} 977 978static __inline__ __m512i __DEFAULT_FN_ATTRS512 979_mm512_mask_subs_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) 980{ 981 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 982 (__v64qi)_mm512_subs_epi8(__A, __B), 983 (__v64qi)__W); 984} 985 986static __inline__ __m512i __DEFAULT_FN_ATTRS512 987_mm512_maskz_subs_epi8 (__mmask64 __U, __m512i __A, __m512i __B) 988{ 989 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 990 (__v64qi)_mm512_subs_epi8(__A, __B), 991 (__v64qi)_mm512_setzero_si512()); 992} 993 994static __inline__ __m512i __DEFAULT_FN_ATTRS512 995_mm512_subs_epi16 (__m512i __A, __m512i __B) 996{ 997 return (__m512i)__builtin_ia32_psubsw512((__v32hi)__A, (__v32hi)__B); 998} 999 1000static __inline__ __m512i __DEFAULT_FN_ATTRS512 1001_mm512_mask_subs_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 1002{ 1003 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1004 (__v32hi)_mm512_subs_epi16(__A, __B), 1005 (__v32hi)__W); 1006} 1007 1008static __inline__ __m512i __DEFAULT_FN_ATTRS512 1009_mm512_maskz_subs_epi16 (__mmask32 __U, __m512i __A, __m512i __B) 1010{ 1011 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1012 (__v32hi)_mm512_subs_epi16(__A, __B), 1013 (__v32hi)_mm512_setzero_si512()); 1014} 1015 1016static __inline__ __m512i __DEFAULT_FN_ATTRS512 1017_mm512_subs_epu8 (__m512i __A, __m512i __B) 1018{ 1019 return (__m512i)__builtin_ia32_psubusb512((__v64qi) __A, (__v64qi) __B); 1020} 1021 1022static __inline__ __m512i __DEFAULT_FN_ATTRS512 1023_mm512_mask_subs_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) 1024{ 1025 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 1026 (__v64qi)_mm512_subs_epu8(__A, __B), 1027 (__v64qi)__W); 1028} 1029 1030static __inline__ __m512i __DEFAULT_FN_ATTRS512 1031_mm512_maskz_subs_epu8 (__mmask64 __U, __m512i __A, __m512i __B) 1032{ 1033 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 1034 (__v64qi)_mm512_subs_epu8(__A, __B), 1035 (__v64qi)_mm512_setzero_si512()); 1036} 1037 1038static __inline__ __m512i __DEFAULT_FN_ATTRS512 1039_mm512_subs_epu16 (__m512i __A, __m512i __B) 1040{ 1041 return (__m512i)__builtin_ia32_psubusw512((__v32hi) __A, (__v32hi) __B); 1042} 1043 1044static __inline__ __m512i __DEFAULT_FN_ATTRS512 1045_mm512_mask_subs_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 1046{ 1047 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1048 (__v32hi)_mm512_subs_epu16(__A, __B), 1049 (__v32hi)__W); 1050} 1051 1052static __inline__ __m512i __DEFAULT_FN_ATTRS512 1053_mm512_maskz_subs_epu16 (__mmask32 __U, __m512i __A, __m512i __B) 1054{ 1055 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1056 (__v32hi)_mm512_subs_epu16(__A, __B), 1057 (__v32hi)_mm512_setzero_si512()); 1058} 1059 1060static __inline__ __m512i __DEFAULT_FN_ATTRS512 1061_mm512_permutex2var_epi16(__m512i __A, __m512i __I, __m512i __B) 1062{ 1063 return (__m512i)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I, 1064 (__v32hi)__B); 1065} 1066 1067static __inline__ __m512i __DEFAULT_FN_ATTRS512 1068_mm512_mask_permutex2var_epi16(__m512i __A, __mmask32 __U, __m512i __I, 1069 __m512i __B) 1070{ 1071 return (__m512i)__builtin_ia32_selectw_512(__U, 1072 (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B), 1073 (__v32hi)__A); 1074} 1075 1076static __inline__ __m512i __DEFAULT_FN_ATTRS512 1077_mm512_mask2_permutex2var_epi16(__m512i __A, __m512i __I, __mmask32 __U, 1078 __m512i __B) 1079{ 1080 return (__m512i)__builtin_ia32_selectw_512(__U, 1081 (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B), 1082 (__v32hi)__I); 1083} 1084 1085static __inline__ __m512i __DEFAULT_FN_ATTRS512 1086_mm512_maskz_permutex2var_epi16(__mmask32 __U, __m512i __A, __m512i __I, 1087 __m512i __B) 1088{ 1089 return (__m512i)__builtin_ia32_selectw_512(__U, 1090 (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B), 1091 (__v32hi)_mm512_setzero_si512()); 1092} 1093 1094static __inline__ __m512i __DEFAULT_FN_ATTRS512 1095_mm512_mulhrs_epi16(__m512i __A, __m512i __B) 1096{ 1097 return (__m512i)__builtin_ia32_pmulhrsw512((__v32hi)__A, (__v32hi)__B); 1098} 1099 1100static __inline__ __m512i __DEFAULT_FN_ATTRS512 1101_mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 1102{ 1103 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1104 (__v32hi)_mm512_mulhrs_epi16(__A, __B), 1105 (__v32hi)__W); 1106} 1107 1108static __inline__ __m512i __DEFAULT_FN_ATTRS512 1109_mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B) 1110{ 1111 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1112 (__v32hi)_mm512_mulhrs_epi16(__A, __B), 1113 (__v32hi)_mm512_setzero_si512()); 1114} 1115 1116static __inline__ __m512i __DEFAULT_FN_ATTRS512 1117_mm512_mulhi_epi16(__m512i __A, __m512i __B) 1118{ 1119 return (__m512i)__builtin_ia32_pmulhw512((__v32hi) __A, (__v32hi) __B); 1120} 1121 1122static __inline__ __m512i __DEFAULT_FN_ATTRS512 1123_mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, 1124 __m512i __B) 1125{ 1126 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1127 (__v32hi)_mm512_mulhi_epi16(__A, __B), 1128 (__v32hi)__W); 1129} 1130 1131static __inline__ __m512i __DEFAULT_FN_ATTRS512 1132_mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) 1133{ 1134 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1135 (__v32hi)_mm512_mulhi_epi16(__A, __B), 1136 (__v32hi)_mm512_setzero_si512()); 1137} 1138 1139static __inline__ __m512i __DEFAULT_FN_ATTRS512 1140_mm512_mulhi_epu16(__m512i __A, __m512i __B) 1141{ 1142 return (__m512i)__builtin_ia32_pmulhuw512((__v32hi) __A, (__v32hi) __B); 1143} 1144 1145static __inline__ __m512i __DEFAULT_FN_ATTRS512 1146_mm512_mask_mulhi_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 1147{ 1148 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1149 (__v32hi)_mm512_mulhi_epu16(__A, __B), 1150 (__v32hi)__W); 1151} 1152 1153static __inline__ __m512i __DEFAULT_FN_ATTRS512 1154_mm512_maskz_mulhi_epu16 (__mmask32 __U, __m512i __A, __m512i __B) 1155{ 1156 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1157 (__v32hi)_mm512_mulhi_epu16(__A, __B), 1158 (__v32hi)_mm512_setzero_si512()); 1159} 1160 1161static __inline__ __m512i __DEFAULT_FN_ATTRS512 1162_mm512_maddubs_epi16(__m512i __X, __m512i __Y) { 1163 return (__m512i)__builtin_ia32_pmaddubsw512((__v64qi)__X, (__v64qi)__Y); 1164} 1165 1166static __inline__ __m512i __DEFAULT_FN_ATTRS512 1167_mm512_mask_maddubs_epi16(__m512i __W, __mmask32 __U, __m512i __X, 1168 __m512i __Y) { 1169 return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U, 1170 (__v32hi)_mm512_maddubs_epi16(__X, __Y), 1171 (__v32hi)__W); 1172} 1173 1174static __inline__ __m512i __DEFAULT_FN_ATTRS512 1175_mm512_maskz_maddubs_epi16(__mmask32 __U, __m512i __X, __m512i __Y) { 1176 return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U, 1177 (__v32hi)_mm512_maddubs_epi16(__X, __Y), 1178 (__v32hi)_mm512_setzero_si512()); 1179} 1180 1181static __inline__ __m512i __DEFAULT_FN_ATTRS512 1182_mm512_madd_epi16(__m512i __A, __m512i __B) { 1183 return (__m512i)__builtin_ia32_pmaddwd512((__v32hi)__A, (__v32hi)__B); 1184} 1185 1186static __inline__ __m512i __DEFAULT_FN_ATTRS512 1187_mm512_mask_madd_epi16(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { 1188 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 1189 (__v16si)_mm512_madd_epi16(__A, __B), 1190 (__v16si)__W); 1191} 1192 1193static __inline__ __m512i __DEFAULT_FN_ATTRS512 1194_mm512_maskz_madd_epi16(__mmask16 __U, __m512i __A, __m512i __B) { 1195 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 1196 (__v16si)_mm512_madd_epi16(__A, __B), 1197 (__v16si)_mm512_setzero_si512()); 1198} 1199 1200static __inline__ __m256i __DEFAULT_FN_ATTRS512 1201_mm512_cvtsepi16_epi8 (__m512i __A) { 1202 return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A, 1203 (__v32qi)_mm256_setzero_si256(), 1204 (__mmask32) -1); 1205} 1206 1207static __inline__ __m256i __DEFAULT_FN_ATTRS512 1208_mm512_mask_cvtsepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) { 1209 return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A, 1210 (__v32qi)__O, 1211 __M); 1212} 1213 1214static __inline__ __m256i __DEFAULT_FN_ATTRS512 1215_mm512_maskz_cvtsepi16_epi8 (__mmask32 __M, __m512i __A) { 1216 return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A, 1217 (__v32qi) _mm256_setzero_si256(), 1218 __M); 1219} 1220 1221static __inline__ __m256i __DEFAULT_FN_ATTRS512 1222_mm512_cvtusepi16_epi8 (__m512i __A) { 1223 return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A, 1224 (__v32qi) _mm256_setzero_si256(), 1225 (__mmask32) -1); 1226} 1227 1228static __inline__ __m256i __DEFAULT_FN_ATTRS512 1229_mm512_mask_cvtusepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) { 1230 return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A, 1231 (__v32qi) __O, 1232 __M); 1233} 1234 1235static __inline__ __m256i __DEFAULT_FN_ATTRS512 1236_mm512_maskz_cvtusepi16_epi8 (__mmask32 __M, __m512i __A) { 1237 return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A, 1238 (__v32qi) _mm256_setzero_si256(), 1239 __M); 1240} 1241 1242static __inline__ __m256i __DEFAULT_FN_ATTRS512 1243_mm512_cvtepi16_epi8 (__m512i __A) { 1244 return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A, 1245 (__v32qi) _mm256_undefined_si256(), 1246 (__mmask32) -1); 1247} 1248 1249static __inline__ __m256i __DEFAULT_FN_ATTRS512 1250_mm512_mask_cvtepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) { 1251 return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A, 1252 (__v32qi) __O, 1253 __M); 1254} 1255 1256static __inline__ __m256i __DEFAULT_FN_ATTRS512 1257_mm512_maskz_cvtepi16_epi8 (__mmask32 __M, __m512i __A) { 1258 return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A, 1259 (__v32qi) _mm256_setzero_si256(), 1260 __M); 1261} 1262 1263static __inline__ void __DEFAULT_FN_ATTRS512 1264_mm512_mask_cvtepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) 1265{ 1266 __builtin_ia32_pmovwb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M); 1267} 1268 1269static __inline__ void __DEFAULT_FN_ATTRS512 1270_mm512_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) 1271{ 1272 __builtin_ia32_pmovswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M); 1273} 1274 1275static __inline__ void __DEFAULT_FN_ATTRS512 1276_mm512_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) 1277{ 1278 __builtin_ia32_pmovuswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M); 1279} 1280 1281static __inline__ __m512i __DEFAULT_FN_ATTRS512 1282_mm512_unpackhi_epi8(__m512i __A, __m512i __B) { 1283 return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B, 1284 8, 64+8, 9, 64+9, 1285 10, 64+10, 11, 64+11, 1286 12, 64+12, 13, 64+13, 1287 14, 64+14, 15, 64+15, 1288 24, 64+24, 25, 64+25, 1289 26, 64+26, 27, 64+27, 1290 28, 64+28, 29, 64+29, 1291 30, 64+30, 31, 64+31, 1292 40, 64+40, 41, 64+41, 1293 42, 64+42, 43, 64+43, 1294 44, 64+44, 45, 64+45, 1295 46, 64+46, 47, 64+47, 1296 56, 64+56, 57, 64+57, 1297 58, 64+58, 59, 64+59, 1298 60, 64+60, 61, 64+61, 1299 62, 64+62, 63, 64+63); 1300} 1301 1302static __inline__ __m512i __DEFAULT_FN_ATTRS512 1303_mm512_mask_unpackhi_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { 1304 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 1305 (__v64qi)_mm512_unpackhi_epi8(__A, __B), 1306 (__v64qi)__W); 1307} 1308 1309static __inline__ __m512i __DEFAULT_FN_ATTRS512 1310_mm512_maskz_unpackhi_epi8(__mmask64 __U, __m512i __A, __m512i __B) { 1311 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 1312 (__v64qi)_mm512_unpackhi_epi8(__A, __B), 1313 (__v64qi)_mm512_setzero_si512()); 1314} 1315 1316static __inline__ __m512i __DEFAULT_FN_ATTRS512 1317_mm512_unpackhi_epi16(__m512i __A, __m512i __B) { 1318 return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B, 1319 4, 32+4, 5, 32+5, 1320 6, 32+6, 7, 32+7, 1321 12, 32+12, 13, 32+13, 1322 14, 32+14, 15, 32+15, 1323 20, 32+20, 21, 32+21, 1324 22, 32+22, 23, 32+23, 1325 28, 32+28, 29, 32+29, 1326 30, 32+30, 31, 32+31); 1327} 1328 1329static __inline__ __m512i __DEFAULT_FN_ATTRS512 1330_mm512_mask_unpackhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 1331 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1332 (__v32hi)_mm512_unpackhi_epi16(__A, __B), 1333 (__v32hi)__W); 1334} 1335 1336static __inline__ __m512i __DEFAULT_FN_ATTRS512 1337_mm512_maskz_unpackhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { 1338 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1339 (__v32hi)_mm512_unpackhi_epi16(__A, __B), 1340 (__v32hi)_mm512_setzero_si512()); 1341} 1342 1343static __inline__ __m512i __DEFAULT_FN_ATTRS512 1344_mm512_unpacklo_epi8(__m512i __A, __m512i __B) { 1345 return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B, 1346 0, 64+0, 1, 64+1, 1347 2, 64+2, 3, 64+3, 1348 4, 64+4, 5, 64+5, 1349 6, 64+6, 7, 64+7, 1350 16, 64+16, 17, 64+17, 1351 18, 64+18, 19, 64+19, 1352 20, 64+20, 21, 64+21, 1353 22, 64+22, 23, 64+23, 1354 32, 64+32, 33, 64+33, 1355 34, 64+34, 35, 64+35, 1356 36, 64+36, 37, 64+37, 1357 38, 64+38, 39, 64+39, 1358 48, 64+48, 49, 64+49, 1359 50, 64+50, 51, 64+51, 1360 52, 64+52, 53, 64+53, 1361 54, 64+54, 55, 64+55); 1362} 1363 1364static __inline__ __m512i __DEFAULT_FN_ATTRS512 1365_mm512_mask_unpacklo_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { 1366 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 1367 (__v64qi)_mm512_unpacklo_epi8(__A, __B), 1368 (__v64qi)__W); 1369} 1370 1371static __inline__ __m512i __DEFAULT_FN_ATTRS512 1372_mm512_maskz_unpacklo_epi8(__mmask64 __U, __m512i __A, __m512i __B) { 1373 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 1374 (__v64qi)_mm512_unpacklo_epi8(__A, __B), 1375 (__v64qi)_mm512_setzero_si512()); 1376} 1377 1378static __inline__ __m512i __DEFAULT_FN_ATTRS512 1379_mm512_unpacklo_epi16(__m512i __A, __m512i __B) { 1380 return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B, 1381 0, 32+0, 1, 32+1, 1382 2, 32+2, 3, 32+3, 1383 8, 32+8, 9, 32+9, 1384 10, 32+10, 11, 32+11, 1385 16, 32+16, 17, 32+17, 1386 18, 32+18, 19, 32+19, 1387 24, 32+24, 25, 32+25, 1388 26, 32+26, 27, 32+27); 1389} 1390 1391static __inline__ __m512i __DEFAULT_FN_ATTRS512 1392_mm512_mask_unpacklo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 1393 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1394 (__v32hi)_mm512_unpacklo_epi16(__A, __B), 1395 (__v32hi)__W); 1396} 1397 1398static __inline__ __m512i __DEFAULT_FN_ATTRS512 1399_mm512_maskz_unpacklo_epi16(__mmask32 __U, __m512i __A, __m512i __B) { 1400 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1401 (__v32hi)_mm512_unpacklo_epi16(__A, __B), 1402 (__v32hi)_mm512_setzero_si512()); 1403} 1404 1405static __inline__ __m512i __DEFAULT_FN_ATTRS512 1406_mm512_cvtepi8_epi16(__m256i __A) 1407{ 1408 /* This function always performs a signed extension, but __v32qi is a char 1409 which may be signed or unsigned, so use __v32qs. */ 1410 return (__m512i)__builtin_convertvector((__v32qs)__A, __v32hi); 1411} 1412 1413static __inline__ __m512i __DEFAULT_FN_ATTRS512 1414_mm512_mask_cvtepi8_epi16(__m512i __W, __mmask32 __U, __m256i __A) 1415{ 1416 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1417 (__v32hi)_mm512_cvtepi8_epi16(__A), 1418 (__v32hi)__W); 1419} 1420 1421static __inline__ __m512i __DEFAULT_FN_ATTRS512 1422_mm512_maskz_cvtepi8_epi16(__mmask32 __U, __m256i __A) 1423{ 1424 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1425 (__v32hi)_mm512_cvtepi8_epi16(__A), 1426 (__v32hi)_mm512_setzero_si512()); 1427} 1428 1429static __inline__ __m512i __DEFAULT_FN_ATTRS512 1430_mm512_cvtepu8_epi16(__m256i __A) 1431{ 1432 return (__m512i)__builtin_convertvector((__v32qu)__A, __v32hi); 1433} 1434 1435static __inline__ __m512i __DEFAULT_FN_ATTRS512 1436_mm512_mask_cvtepu8_epi16(__m512i __W, __mmask32 __U, __m256i __A) 1437{ 1438 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1439 (__v32hi)_mm512_cvtepu8_epi16(__A), 1440 (__v32hi)__W); 1441} 1442 1443static __inline__ __m512i __DEFAULT_FN_ATTRS512 1444_mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A) 1445{ 1446 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1447 (__v32hi)_mm512_cvtepu8_epi16(__A), 1448 (__v32hi)_mm512_setzero_si512()); 1449} 1450 1451 1452#define _mm512_shufflehi_epi16(A, imm) \ 1453 (__m512i)__builtin_ia32_pshufhw512((__v32hi)(__m512i)(A), (int)(imm)) 1454 1455#define _mm512_mask_shufflehi_epi16(W, U, A, imm) \ 1456 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 1457 (__v32hi)_mm512_shufflehi_epi16((A), \ 1458 (imm)), \ 1459 (__v32hi)(__m512i)(W)) 1460 1461#define _mm512_maskz_shufflehi_epi16(U, A, imm) \ 1462 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 1463 (__v32hi)_mm512_shufflehi_epi16((A), \ 1464 (imm)), \ 1465 (__v32hi)_mm512_setzero_si512()) 1466 1467#define _mm512_shufflelo_epi16(A, imm) \ 1468 (__m512i)__builtin_ia32_pshuflw512((__v32hi)(__m512i)(A), (int)(imm)) 1469 1470 1471#define _mm512_mask_shufflelo_epi16(W, U, A, imm) \ 1472 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 1473 (__v32hi)_mm512_shufflelo_epi16((A), \ 1474 (imm)), \ 1475 (__v32hi)(__m512i)(W)) 1476 1477 1478#define _mm512_maskz_shufflelo_epi16(U, A, imm) \ 1479 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 1480 (__v32hi)_mm512_shufflelo_epi16((A), \ 1481 (imm)), \ 1482 (__v32hi)_mm512_setzero_si512()) 1483 1484static __inline__ __m512i __DEFAULT_FN_ATTRS512 1485_mm512_sllv_epi16(__m512i __A, __m512i __B) 1486{ 1487 return (__m512i)__builtin_ia32_psllv32hi((__v32hi) __A, (__v32hi) __B); 1488} 1489 1490static __inline__ __m512i __DEFAULT_FN_ATTRS512 1491_mm512_mask_sllv_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 1492{ 1493 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1494 (__v32hi)_mm512_sllv_epi16(__A, __B), 1495 (__v32hi)__W); 1496} 1497 1498static __inline__ __m512i __DEFAULT_FN_ATTRS512 1499_mm512_maskz_sllv_epi16(__mmask32 __U, __m512i __A, __m512i __B) 1500{ 1501 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1502 (__v32hi)_mm512_sllv_epi16(__A, __B), 1503 (__v32hi)_mm512_setzero_si512()); 1504} 1505 1506static __inline__ __m512i __DEFAULT_FN_ATTRS512 1507_mm512_sll_epi16(__m512i __A, __m128i __B) 1508{ 1509 return (__m512i)__builtin_ia32_psllw512((__v32hi) __A, (__v8hi) __B); 1510} 1511 1512static __inline__ __m512i __DEFAULT_FN_ATTRS512 1513_mm512_mask_sll_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) 1514{ 1515 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1516 (__v32hi)_mm512_sll_epi16(__A, __B), 1517 (__v32hi)__W); 1518} 1519 1520static __inline__ __m512i __DEFAULT_FN_ATTRS512 1521_mm512_maskz_sll_epi16(__mmask32 __U, __m512i __A, __m128i __B) 1522{ 1523 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1524 (__v32hi)_mm512_sll_epi16(__A, __B), 1525 (__v32hi)_mm512_setzero_si512()); 1526} 1527 1528static __inline__ __m512i __DEFAULT_FN_ATTRS512 1529_mm512_slli_epi16(__m512i __A, int __B) 1530{ 1531 return (__m512i)__builtin_ia32_psllwi512((__v32hi)__A, __B); 1532} 1533 1534static __inline__ __m512i __DEFAULT_FN_ATTRS512 1535_mm512_mask_slli_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B) 1536{ 1537 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1538 (__v32hi)_mm512_slli_epi16(__A, __B), 1539 (__v32hi)__W); 1540} 1541 1542static __inline__ __m512i __DEFAULT_FN_ATTRS512 1543_mm512_maskz_slli_epi16(__mmask32 __U, __m512i __A, int __B) 1544{ 1545 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1546 (__v32hi)_mm512_slli_epi16(__A, __B), 1547 (__v32hi)_mm512_setzero_si512()); 1548} 1549 1550#define _mm512_bslli_epi128(a, imm) \ 1551 (__m512i)__builtin_ia32_pslldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm)) 1552 1553static __inline__ __m512i __DEFAULT_FN_ATTRS512 1554_mm512_srlv_epi16(__m512i __A, __m512i __B) 1555{ 1556 return (__m512i)__builtin_ia32_psrlv32hi((__v32hi)__A, (__v32hi)__B); 1557} 1558 1559static __inline__ __m512i __DEFAULT_FN_ATTRS512 1560_mm512_mask_srlv_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 1561{ 1562 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1563 (__v32hi)_mm512_srlv_epi16(__A, __B), 1564 (__v32hi)__W); 1565} 1566 1567static __inline__ __m512i __DEFAULT_FN_ATTRS512 1568_mm512_maskz_srlv_epi16(__mmask32 __U, __m512i __A, __m512i __B) 1569{ 1570 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1571 (__v32hi)_mm512_srlv_epi16(__A, __B), 1572 (__v32hi)_mm512_setzero_si512()); 1573} 1574 1575static __inline__ __m512i __DEFAULT_FN_ATTRS512 1576_mm512_srav_epi16(__m512i __A, __m512i __B) 1577{ 1578 return (__m512i)__builtin_ia32_psrav32hi((__v32hi)__A, (__v32hi)__B); 1579} 1580 1581static __inline__ __m512i __DEFAULT_FN_ATTRS512 1582_mm512_mask_srav_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 1583{ 1584 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1585 (__v32hi)_mm512_srav_epi16(__A, __B), 1586 (__v32hi)__W); 1587} 1588 1589static __inline__ __m512i __DEFAULT_FN_ATTRS512 1590_mm512_maskz_srav_epi16(__mmask32 __U, __m512i __A, __m512i __B) 1591{ 1592 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1593 (__v32hi)_mm512_srav_epi16(__A, __B), 1594 (__v32hi)_mm512_setzero_si512()); 1595} 1596 1597static __inline__ __m512i __DEFAULT_FN_ATTRS512 1598_mm512_sra_epi16(__m512i __A, __m128i __B) 1599{ 1600 return (__m512i)__builtin_ia32_psraw512((__v32hi) __A, (__v8hi) __B); 1601} 1602 1603static __inline__ __m512i __DEFAULT_FN_ATTRS512 1604_mm512_mask_sra_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) 1605{ 1606 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1607 (__v32hi)_mm512_sra_epi16(__A, __B), 1608 (__v32hi)__W); 1609} 1610 1611static __inline__ __m512i __DEFAULT_FN_ATTRS512 1612_mm512_maskz_sra_epi16(__mmask32 __U, __m512i __A, __m128i __B) 1613{ 1614 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1615 (__v32hi)_mm512_sra_epi16(__A, __B), 1616 (__v32hi)_mm512_setzero_si512()); 1617} 1618 1619static __inline__ __m512i __DEFAULT_FN_ATTRS512 1620_mm512_srai_epi16(__m512i __A, int __B) 1621{ 1622 return (__m512i)__builtin_ia32_psrawi512((__v32hi)__A, __B); 1623} 1624 1625static __inline__ __m512i __DEFAULT_FN_ATTRS512 1626_mm512_mask_srai_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B) 1627{ 1628 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1629 (__v32hi)_mm512_srai_epi16(__A, __B), 1630 (__v32hi)__W); 1631} 1632 1633static __inline__ __m512i __DEFAULT_FN_ATTRS512 1634_mm512_maskz_srai_epi16(__mmask32 __U, __m512i __A, int __B) 1635{ 1636 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1637 (__v32hi)_mm512_srai_epi16(__A, __B), 1638 (__v32hi)_mm512_setzero_si512()); 1639} 1640 1641static __inline__ __m512i __DEFAULT_FN_ATTRS512 1642_mm512_srl_epi16(__m512i __A, __m128i __B) 1643{ 1644 return (__m512i)__builtin_ia32_psrlw512((__v32hi) __A, (__v8hi) __B); 1645} 1646 1647static __inline__ __m512i __DEFAULT_FN_ATTRS512 1648_mm512_mask_srl_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) 1649{ 1650 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1651 (__v32hi)_mm512_srl_epi16(__A, __B), 1652 (__v32hi)__W); 1653} 1654 1655static __inline__ __m512i __DEFAULT_FN_ATTRS512 1656_mm512_maskz_srl_epi16(__mmask32 __U, __m512i __A, __m128i __B) 1657{ 1658 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1659 (__v32hi)_mm512_srl_epi16(__A, __B), 1660 (__v32hi)_mm512_setzero_si512()); 1661} 1662 1663static __inline__ __m512i __DEFAULT_FN_ATTRS512 1664_mm512_srli_epi16(__m512i __A, int __B) 1665{ 1666 return (__m512i)__builtin_ia32_psrlwi512((__v32hi)__A, __B); 1667} 1668 1669static __inline__ __m512i __DEFAULT_FN_ATTRS512 1670_mm512_mask_srli_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B) 1671{ 1672 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1673 (__v32hi)_mm512_srli_epi16(__A, __B), 1674 (__v32hi)__W); 1675} 1676 1677static __inline__ __m512i __DEFAULT_FN_ATTRS512 1678_mm512_maskz_srli_epi16(__mmask32 __U, __m512i __A, int __B) 1679{ 1680 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1681 (__v32hi)_mm512_srli_epi16(__A, __B), 1682 (__v32hi)_mm512_setzero_si512()); 1683} 1684 1685#define _mm512_bsrli_epi128(a, imm) \ 1686 (__m512i)__builtin_ia32_psrldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm)) 1687 1688static __inline__ __m512i __DEFAULT_FN_ATTRS512 1689_mm512_mask_mov_epi16 (__m512i __W, __mmask32 __U, __m512i __A) 1690{ 1691 return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, 1692 (__v32hi) __A, 1693 (__v32hi) __W); 1694} 1695 1696static __inline__ __m512i __DEFAULT_FN_ATTRS512 1697_mm512_maskz_mov_epi16 (__mmask32 __U, __m512i __A) 1698{ 1699 return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, 1700 (__v32hi) __A, 1701 (__v32hi) _mm512_setzero_si512 ()); 1702} 1703 1704static __inline__ __m512i __DEFAULT_FN_ATTRS512 1705_mm512_mask_mov_epi8 (__m512i __W, __mmask64 __U, __m512i __A) 1706{ 1707 return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, 1708 (__v64qi) __A, 1709 (__v64qi) __W); 1710} 1711 1712static __inline__ __m512i __DEFAULT_FN_ATTRS512 1713_mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A) 1714{ 1715 return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, 1716 (__v64qi) __A, 1717 (__v64qi) _mm512_setzero_si512 ()); 1718} 1719 1720static __inline__ __m512i __DEFAULT_FN_ATTRS512 1721_mm512_mask_set1_epi8 (__m512i __O, __mmask64 __M, char __A) 1722{ 1723 return (__m512i) __builtin_ia32_selectb_512(__M, 1724 (__v64qi)_mm512_set1_epi8(__A), 1725 (__v64qi) __O); 1726} 1727 1728static __inline__ __m512i __DEFAULT_FN_ATTRS512 1729_mm512_maskz_set1_epi8 (__mmask64 __M, char __A) 1730{ 1731 return (__m512i) __builtin_ia32_selectb_512(__M, 1732 (__v64qi) _mm512_set1_epi8(__A), 1733 (__v64qi) _mm512_setzero_si512()); 1734} 1735 1736static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 1737_mm512_kunpackd (__mmask64 __A, __mmask64 __B) 1738{ 1739 return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A, 1740 (__mmask64) __B); 1741} 1742 1743static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 1744_mm512_kunpackw (__mmask32 __A, __mmask32 __B) 1745{ 1746 return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A, 1747 (__mmask32) __B); 1748} 1749 1750static __inline __m512i __DEFAULT_FN_ATTRS512 1751_mm512_loadu_epi16 (void const *__P) 1752{ 1753 struct __loadu_epi16 { 1754 __m512i __v; 1755 } __attribute__((__packed__, __may_alias__)); 1756 return ((struct __loadu_epi16*)__P)->__v; 1757} 1758 1759static __inline__ __m512i __DEFAULT_FN_ATTRS512 1760_mm512_mask_loadu_epi16 (__m512i __W, __mmask32 __U, void const *__P) 1761{ 1762 return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P, 1763 (__v32hi) __W, 1764 (__mmask32) __U); 1765} 1766 1767static __inline__ __m512i __DEFAULT_FN_ATTRS512 1768_mm512_maskz_loadu_epi16 (__mmask32 __U, void const *__P) 1769{ 1770 return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P, 1771 (__v32hi) 1772 _mm512_setzero_si512 (), 1773 (__mmask32) __U); 1774} 1775 1776static __inline __m512i __DEFAULT_FN_ATTRS512 1777_mm512_loadu_epi8 (void const *__P) 1778{ 1779 struct __loadu_epi8 { 1780 __m512i __v; 1781 } __attribute__((__packed__, __may_alias__)); 1782 return ((struct __loadu_epi8*)__P)->__v; 1783} 1784 1785static __inline__ __m512i __DEFAULT_FN_ATTRS512 1786_mm512_mask_loadu_epi8 (__m512i __W, __mmask64 __U, void const *__P) 1787{ 1788 return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P, 1789 (__v64qi) __W, 1790 (__mmask64) __U); 1791} 1792 1793static __inline__ __m512i __DEFAULT_FN_ATTRS512 1794_mm512_maskz_loadu_epi8 (__mmask64 __U, void const *__P) 1795{ 1796 return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P, 1797 (__v64qi) 1798 _mm512_setzero_si512 (), 1799 (__mmask64) __U); 1800} 1801 1802static __inline void __DEFAULT_FN_ATTRS512 1803_mm512_storeu_epi16 (void *__P, __m512i __A) 1804{ 1805 struct __storeu_epi16 { 1806 __m512i __v; 1807 } __attribute__((__packed__, __may_alias__)); 1808 ((struct __storeu_epi16*)__P)->__v = __A; 1809} 1810 1811static __inline__ void __DEFAULT_FN_ATTRS512 1812_mm512_mask_storeu_epi16 (void *__P, __mmask32 __U, __m512i __A) 1813{ 1814 __builtin_ia32_storedquhi512_mask ((__v32hi *) __P, 1815 (__v32hi) __A, 1816 (__mmask32) __U); 1817} 1818 1819static __inline void __DEFAULT_FN_ATTRS512 1820_mm512_storeu_epi8 (void *__P, __m512i __A) 1821{ 1822 struct __storeu_epi8 { 1823 __m512i __v; 1824 } __attribute__((__packed__, __may_alias__)); 1825 ((struct __storeu_epi8*)__P)->__v = __A; 1826} 1827 1828static __inline__ void __DEFAULT_FN_ATTRS512 1829_mm512_mask_storeu_epi8 (void *__P, __mmask64 __U, __m512i __A) 1830{ 1831 __builtin_ia32_storedquqi512_mask ((__v64qi *) __P, 1832 (__v64qi) __A, 1833 (__mmask64) __U); 1834} 1835 1836static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 1837_mm512_test_epi8_mask (__m512i __A, __m512i __B) 1838{ 1839 return _mm512_cmpneq_epi8_mask (_mm512_and_epi32 (__A, __B), 1840 _mm512_setzero_si512()); 1841} 1842 1843static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 1844_mm512_mask_test_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B) 1845{ 1846 return _mm512_mask_cmpneq_epi8_mask (__U, _mm512_and_epi32 (__A, __B), 1847 _mm512_setzero_si512()); 1848} 1849 1850static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 1851_mm512_test_epi16_mask (__m512i __A, __m512i __B) 1852{ 1853 return _mm512_cmpneq_epi16_mask (_mm512_and_epi32 (__A, __B), 1854 _mm512_setzero_si512()); 1855} 1856 1857static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 1858_mm512_mask_test_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B) 1859{ 1860 return _mm512_mask_cmpneq_epi16_mask (__U, _mm512_and_epi32 (__A, __B), 1861 _mm512_setzero_si512()); 1862} 1863 1864static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 1865_mm512_testn_epi8_mask (__m512i __A, __m512i __B) 1866{ 1867 return _mm512_cmpeq_epi8_mask (_mm512_and_epi32 (__A, __B), _mm512_setzero_si512()); 1868} 1869 1870static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 1871_mm512_mask_testn_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B) 1872{ 1873 return _mm512_mask_cmpeq_epi8_mask (__U, _mm512_and_epi32 (__A, __B), 1874 _mm512_setzero_si512()); 1875} 1876 1877static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 1878_mm512_testn_epi16_mask (__m512i __A, __m512i __B) 1879{ 1880 return _mm512_cmpeq_epi16_mask (_mm512_and_epi32 (__A, __B), 1881 _mm512_setzero_si512()); 1882} 1883 1884static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 1885_mm512_mask_testn_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B) 1886{ 1887 return _mm512_mask_cmpeq_epi16_mask (__U, _mm512_and_epi32 (__A, __B), 1888 _mm512_setzero_si512()); 1889} 1890 1891static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 1892_mm512_movepi8_mask (__m512i __A) 1893{ 1894 return (__mmask64) __builtin_ia32_cvtb2mask512 ((__v64qi) __A); 1895} 1896 1897static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 1898_mm512_movepi16_mask (__m512i __A) 1899{ 1900 return (__mmask32) __builtin_ia32_cvtw2mask512 ((__v32hi) __A); 1901} 1902 1903static __inline__ __m512i __DEFAULT_FN_ATTRS512 1904_mm512_movm_epi8 (__mmask64 __A) 1905{ 1906 return (__m512i) __builtin_ia32_cvtmask2b512 (__A); 1907} 1908 1909static __inline__ __m512i __DEFAULT_FN_ATTRS512 1910_mm512_movm_epi16 (__mmask32 __A) 1911{ 1912 return (__m512i) __builtin_ia32_cvtmask2w512 (__A); 1913} 1914 1915static __inline__ __m512i __DEFAULT_FN_ATTRS512 1916_mm512_broadcastb_epi8 (__m128i __A) 1917{ 1918 return (__m512i)__builtin_shufflevector((__v16qi) __A, (__v16qi) __A, 1919 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1920 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1921 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1922 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); 1923} 1924 1925static __inline__ __m512i __DEFAULT_FN_ATTRS512 1926_mm512_mask_broadcastb_epi8 (__m512i __O, __mmask64 __M, __m128i __A) 1927{ 1928 return (__m512i)__builtin_ia32_selectb_512(__M, 1929 (__v64qi) _mm512_broadcastb_epi8(__A), 1930 (__v64qi) __O); 1931} 1932 1933static __inline__ __m512i __DEFAULT_FN_ATTRS512 1934_mm512_maskz_broadcastb_epi8 (__mmask64 __M, __m128i __A) 1935{ 1936 return (__m512i)__builtin_ia32_selectb_512(__M, 1937 (__v64qi) _mm512_broadcastb_epi8(__A), 1938 (__v64qi) _mm512_setzero_si512()); 1939} 1940 1941static __inline__ __m512i __DEFAULT_FN_ATTRS512 1942_mm512_mask_set1_epi16 (__m512i __O, __mmask32 __M, short __A) 1943{ 1944 return (__m512i) __builtin_ia32_selectw_512(__M, 1945 (__v32hi) _mm512_set1_epi16(__A), 1946 (__v32hi) __O); 1947} 1948 1949static __inline__ __m512i __DEFAULT_FN_ATTRS512 1950_mm512_maskz_set1_epi16 (__mmask32 __M, short __A) 1951{ 1952 return (__m512i) __builtin_ia32_selectw_512(__M, 1953 (__v32hi) _mm512_set1_epi16(__A), 1954 (__v32hi) _mm512_setzero_si512()); 1955} 1956 1957static __inline__ __m512i __DEFAULT_FN_ATTRS512 1958_mm512_broadcastw_epi16 (__m128i __A) 1959{ 1960 return (__m512i)__builtin_shufflevector((__v8hi) __A, (__v8hi) __A, 1961 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1962 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); 1963} 1964 1965static __inline__ __m512i __DEFAULT_FN_ATTRS512 1966_mm512_mask_broadcastw_epi16 (__m512i __O, __mmask32 __M, __m128i __A) 1967{ 1968 return (__m512i)__builtin_ia32_selectw_512(__M, 1969 (__v32hi) _mm512_broadcastw_epi16(__A), 1970 (__v32hi) __O); 1971} 1972 1973static __inline__ __m512i __DEFAULT_FN_ATTRS512 1974_mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A) 1975{ 1976 return (__m512i)__builtin_ia32_selectw_512(__M, 1977 (__v32hi) _mm512_broadcastw_epi16(__A), 1978 (__v32hi) _mm512_setzero_si512()); 1979} 1980 1981static __inline__ __m512i __DEFAULT_FN_ATTRS512 1982_mm512_permutexvar_epi16 (__m512i __A, __m512i __B) 1983{ 1984 return (__m512i)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A); 1985} 1986 1987static __inline__ __m512i __DEFAULT_FN_ATTRS512 1988_mm512_maskz_permutexvar_epi16 (__mmask32 __M, __m512i __A, 1989 __m512i __B) 1990{ 1991 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 1992 (__v32hi)_mm512_permutexvar_epi16(__A, __B), 1993 (__v32hi)_mm512_setzero_si512()); 1994} 1995 1996static __inline__ __m512i __DEFAULT_FN_ATTRS512 1997_mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A, 1998 __m512i __B) 1999{ 2000 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 2001 (__v32hi)_mm512_permutexvar_epi16(__A, __B), 2002 (__v32hi)__W); 2003} 2004 2005#define _mm512_alignr_epi8(A, B, N) \ 2006 (__m512i)__builtin_ia32_palignr512((__v64qi)(__m512i)(A), \ 2007 (__v64qi)(__m512i)(B), (int)(N)) 2008 2009#define _mm512_mask_alignr_epi8(W, U, A, B, N) \ 2010 (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ 2011 (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \ 2012 (__v64qi)(__m512i)(W)) 2013 2014#define _mm512_maskz_alignr_epi8(U, A, B, N) \ 2015 (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ 2016 (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \ 2017 (__v64qi)(__m512i)_mm512_setzero_si512()) 2018 2019#define _mm512_dbsad_epu8(A, B, imm) \ 2020 (__m512i)__builtin_ia32_dbpsadbw512((__v64qi)(__m512i)(A), \ 2021 (__v64qi)(__m512i)(B), (int)(imm)) 2022 2023#define _mm512_mask_dbsad_epu8(W, U, A, B, imm) \ 2024 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 2025 (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \ 2026 (__v32hi)(__m512i)(W)) 2027 2028#define _mm512_maskz_dbsad_epu8(U, A, B, imm) \ 2029 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 2030 (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \ 2031 (__v32hi)_mm512_setzero_si512()) 2032 2033static __inline__ __m512i __DEFAULT_FN_ATTRS512 2034_mm512_sad_epu8 (__m512i __A, __m512i __B) 2035{ 2036 return (__m512i) __builtin_ia32_psadbw512 ((__v64qi) __A, 2037 (__v64qi) __B); 2038} 2039 2040#undef __DEFAULT_FN_ATTRS512 2041#undef __DEFAULT_FN_ATTRS 2042 2043#endif 2044