avx512bwintrin.h revision 355940
1/*===------------- avx512bwintrin.h - AVX512BW intrinsics ------------------=== 2 * 3 * 4 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5 * See https://llvm.org/LICENSE.txt for license information. 6 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7 * 8 *===-----------------------------------------------------------------------=== 9 */ 10#ifndef __IMMINTRIN_H 11#error "Never use <avx512bwintrin.h> directly; include <immintrin.h> instead." 12#endif 13 14#ifndef __AVX512BWINTRIN_H 15#define __AVX512BWINTRIN_H 16 17typedef unsigned int __mmask32; 18typedef unsigned long long __mmask64; 19 20/* Define the default attributes for the functions in this file. */ 21#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"), __min_vector_width__(512))) 22#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"))) 23 24static __inline __mmask32 __DEFAULT_FN_ATTRS 25_knot_mask32(__mmask32 __M) 26{ 27 return __builtin_ia32_knotsi(__M); 28} 29 30static __inline __mmask64 __DEFAULT_FN_ATTRS 31_knot_mask64(__mmask64 __M) 32{ 33 return __builtin_ia32_knotdi(__M); 34} 35 36static __inline__ __mmask32 __DEFAULT_FN_ATTRS 37_kand_mask32(__mmask32 __A, __mmask32 __B) 38{ 39 return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B); 40} 41 42static __inline__ __mmask64 __DEFAULT_FN_ATTRS 43_kand_mask64(__mmask64 __A, __mmask64 __B) 44{ 45 return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B); 46} 47 48static __inline__ __mmask32 __DEFAULT_FN_ATTRS 49_kandn_mask32(__mmask32 __A, __mmask32 __B) 50{ 51 return (__mmask32)__builtin_ia32_kandnsi((__mmask32)__A, (__mmask32)__B); 52} 53 54static __inline__ __mmask64 __DEFAULT_FN_ATTRS 55_kandn_mask64(__mmask64 __A, __mmask64 __B) 56{ 57 return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B); 58} 59 60static __inline__ __mmask32 __DEFAULT_FN_ATTRS 61_kor_mask32(__mmask32 __A, __mmask32 __B) 62{ 63 return (__mmask32)__builtin_ia32_korsi((__mmask32)__A, (__mmask32)__B); 64} 65 66static __inline__ __mmask64 __DEFAULT_FN_ATTRS 67_kor_mask64(__mmask64 __A, __mmask64 __B) 68{ 69 return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B); 70} 71 72static __inline__ __mmask32 __DEFAULT_FN_ATTRS 73_kxnor_mask32(__mmask32 __A, __mmask32 __B) 74{ 75 return (__mmask32)__builtin_ia32_kxnorsi((__mmask32)__A, (__mmask32)__B); 76} 77 78static __inline__ __mmask64 __DEFAULT_FN_ATTRS 79_kxnor_mask64(__mmask64 __A, __mmask64 __B) 80{ 81 return (__mmask64)__builtin_ia32_kxnordi((__mmask64)__A, (__mmask64)__B); 82} 83 84static __inline__ __mmask32 __DEFAULT_FN_ATTRS 85_kxor_mask32(__mmask32 __A, __mmask32 __B) 86{ 87 return (__mmask32)__builtin_ia32_kxorsi((__mmask32)__A, (__mmask32)__B); 88} 89 90static __inline__ __mmask64 __DEFAULT_FN_ATTRS 91_kxor_mask64(__mmask64 __A, __mmask64 __B) 92{ 93 return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B); 94} 95 96static __inline__ unsigned char __DEFAULT_FN_ATTRS 97_kortestc_mask32_u8(__mmask32 __A, __mmask32 __B) 98{ 99 return (unsigned char)__builtin_ia32_kortestcsi(__A, __B); 100} 101 102static __inline__ unsigned char __DEFAULT_FN_ATTRS 103_kortestz_mask32_u8(__mmask32 __A, __mmask32 __B) 104{ 105 return (unsigned char)__builtin_ia32_kortestzsi(__A, __B); 106} 107 108static __inline__ unsigned char __DEFAULT_FN_ATTRS 109_kortest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) { 110 *__C = (unsigned char)__builtin_ia32_kortestcsi(__A, __B); 111 return (unsigned char)__builtin_ia32_kortestzsi(__A, __B); 112} 113 114static __inline__ unsigned char __DEFAULT_FN_ATTRS 115_kortestc_mask64_u8(__mmask64 __A, __mmask64 __B) 116{ 117 return (unsigned char)__builtin_ia32_kortestcdi(__A, __B); 118} 119 120static __inline__ unsigned char __DEFAULT_FN_ATTRS 121_kortestz_mask64_u8(__mmask64 __A, __mmask64 __B) 122{ 123 return (unsigned char)__builtin_ia32_kortestzdi(__A, __B); 124} 125 126static __inline__ unsigned char __DEFAULT_FN_ATTRS 127_kortest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) { 128 *__C = (unsigned char)__builtin_ia32_kortestcdi(__A, __B); 129 return (unsigned char)__builtin_ia32_kortestzdi(__A, __B); 130} 131 132static __inline__ unsigned char __DEFAULT_FN_ATTRS 133_ktestc_mask32_u8(__mmask32 __A, __mmask32 __B) 134{ 135 return (unsigned char)__builtin_ia32_ktestcsi(__A, __B); 136} 137 138static __inline__ unsigned char __DEFAULT_FN_ATTRS 139_ktestz_mask32_u8(__mmask32 __A, __mmask32 __B) 140{ 141 return (unsigned char)__builtin_ia32_ktestzsi(__A, __B); 142} 143 144static __inline__ unsigned char __DEFAULT_FN_ATTRS 145_ktest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) { 146 *__C = (unsigned char)__builtin_ia32_ktestcsi(__A, __B); 147 return (unsigned char)__builtin_ia32_ktestzsi(__A, __B); 148} 149 150static __inline__ unsigned char __DEFAULT_FN_ATTRS 151_ktestc_mask64_u8(__mmask64 __A, __mmask64 __B) 152{ 153 return (unsigned char)__builtin_ia32_ktestcdi(__A, __B); 154} 155 156static __inline__ unsigned char __DEFAULT_FN_ATTRS 157_ktestz_mask64_u8(__mmask64 __A, __mmask64 __B) 158{ 159 return (unsigned char)__builtin_ia32_ktestzdi(__A, __B); 160} 161 162static __inline__ unsigned char __DEFAULT_FN_ATTRS 163_ktest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) { 164 *__C = (unsigned char)__builtin_ia32_ktestcdi(__A, __B); 165 return (unsigned char)__builtin_ia32_ktestzdi(__A, __B); 166} 167 168static __inline__ __mmask32 __DEFAULT_FN_ATTRS 169_kadd_mask32(__mmask32 __A, __mmask32 __B) 170{ 171 return (__mmask32)__builtin_ia32_kaddsi((__mmask32)__A, (__mmask32)__B); 172} 173 174static __inline__ __mmask64 __DEFAULT_FN_ATTRS 175_kadd_mask64(__mmask64 __A, __mmask64 __B) 176{ 177 return (__mmask64)__builtin_ia32_kadddi((__mmask64)__A, (__mmask64)__B); 178} 179 180#define _kshiftli_mask32(A, I) \ 181 (__mmask32)__builtin_ia32_kshiftlisi((__mmask32)(A), (unsigned int)(I)) 182 183#define _kshiftri_mask32(A, I) \ 184 (__mmask32)__builtin_ia32_kshiftrisi((__mmask32)(A), (unsigned int)(I)) 185 186#define _kshiftli_mask64(A, I) \ 187 (__mmask64)__builtin_ia32_kshiftlidi((__mmask64)(A), (unsigned int)(I)) 188 189#define _kshiftri_mask64(A, I) \ 190 (__mmask64)__builtin_ia32_kshiftridi((__mmask64)(A), (unsigned int)(I)) 191 192static __inline__ unsigned int __DEFAULT_FN_ATTRS 193_cvtmask32_u32(__mmask32 __A) { 194 return (unsigned int)__builtin_ia32_kmovd((__mmask32)__A); 195} 196 197static __inline__ unsigned long long __DEFAULT_FN_ATTRS 198_cvtmask64_u64(__mmask64 __A) { 199 return (unsigned long long)__builtin_ia32_kmovq((__mmask64)__A); 200} 201 202static __inline__ __mmask32 __DEFAULT_FN_ATTRS 203_cvtu32_mask32(unsigned int __A) { 204 return (__mmask32)__builtin_ia32_kmovd((__mmask32)__A); 205} 206 207static __inline__ __mmask64 __DEFAULT_FN_ATTRS 208_cvtu64_mask64(unsigned long long __A) { 209 return (__mmask64)__builtin_ia32_kmovq((__mmask64)__A); 210} 211 212static __inline__ __mmask32 __DEFAULT_FN_ATTRS 213_load_mask32(__mmask32 *__A) { 214 return (__mmask32)__builtin_ia32_kmovd(*(__mmask32 *)__A); 215} 216 217static __inline__ __mmask64 __DEFAULT_FN_ATTRS 218_load_mask64(__mmask64 *__A) { 219 return (__mmask64)__builtin_ia32_kmovq(*(__mmask64 *)__A); 220} 221 222static __inline__ void __DEFAULT_FN_ATTRS 223_store_mask32(__mmask32 *__A, __mmask32 __B) { 224 *(__mmask32 *)__A = __builtin_ia32_kmovd((__mmask32)__B); 225} 226 227static __inline__ void __DEFAULT_FN_ATTRS 228_store_mask64(__mmask64 *__A, __mmask64 __B) { 229 *(__mmask64 *)__A = __builtin_ia32_kmovq((__mmask64)__B); 230} 231 232/* Integer compare */ 233 234#define _mm512_cmp_epi8_mask(a, b, p) \ 235 (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ 236 (__v64qi)(__m512i)(b), (int)(p), \ 237 (__mmask64)-1) 238 239#define _mm512_mask_cmp_epi8_mask(m, a, b, p) \ 240 (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ 241 (__v64qi)(__m512i)(b), (int)(p), \ 242 (__mmask64)(m)) 243 244#define _mm512_cmp_epu8_mask(a, b, p) \ 245 (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ 246 (__v64qi)(__m512i)(b), (int)(p), \ 247 (__mmask64)-1) 248 249#define _mm512_mask_cmp_epu8_mask(m, a, b, p) \ 250 (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ 251 (__v64qi)(__m512i)(b), (int)(p), \ 252 (__mmask64)(m)) 253 254#define _mm512_cmp_epi16_mask(a, b, p) \ 255 (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ 256 (__v32hi)(__m512i)(b), (int)(p), \ 257 (__mmask32)-1) 258 259#define _mm512_mask_cmp_epi16_mask(m, a, b, p) \ 260 (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ 261 (__v32hi)(__m512i)(b), (int)(p), \ 262 (__mmask32)(m)) 263 264#define _mm512_cmp_epu16_mask(a, b, p) \ 265 (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ 266 (__v32hi)(__m512i)(b), (int)(p), \ 267 (__mmask32)-1) 268 269#define _mm512_mask_cmp_epu16_mask(m, a, b, p) \ 270 (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ 271 (__v32hi)(__m512i)(b), (int)(p), \ 272 (__mmask32)(m)) 273 274#define _mm512_cmpeq_epi8_mask(A, B) \ 275 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ) 276#define _mm512_mask_cmpeq_epi8_mask(k, A, B) \ 277 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ) 278#define _mm512_cmpge_epi8_mask(A, B) \ 279 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GE) 280#define _mm512_mask_cmpge_epi8_mask(k, A, B) \ 281 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE) 282#define _mm512_cmpgt_epi8_mask(A, B) \ 283 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GT) 284#define _mm512_mask_cmpgt_epi8_mask(k, A, B) \ 285 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT) 286#define _mm512_cmple_epi8_mask(A, B) \ 287 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LE) 288#define _mm512_mask_cmple_epi8_mask(k, A, B) \ 289 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE) 290#define _mm512_cmplt_epi8_mask(A, B) \ 291 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LT) 292#define _mm512_mask_cmplt_epi8_mask(k, A, B) \ 293 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT) 294#define _mm512_cmpneq_epi8_mask(A, B) \ 295 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_NE) 296#define _mm512_mask_cmpneq_epi8_mask(k, A, B) \ 297 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE) 298 299#define _mm512_cmpeq_epu8_mask(A, B) \ 300 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ) 301#define _mm512_mask_cmpeq_epu8_mask(k, A, B) \ 302 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ) 303#define _mm512_cmpge_epu8_mask(A, B) \ 304 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GE) 305#define _mm512_mask_cmpge_epu8_mask(k, A, B) \ 306 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE) 307#define _mm512_cmpgt_epu8_mask(A, B) \ 308 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GT) 309#define _mm512_mask_cmpgt_epu8_mask(k, A, B) \ 310 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT) 311#define _mm512_cmple_epu8_mask(A, B) \ 312 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LE) 313#define _mm512_mask_cmple_epu8_mask(k, A, B) \ 314 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE) 315#define _mm512_cmplt_epu8_mask(A, B) \ 316 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LT) 317#define _mm512_mask_cmplt_epu8_mask(k, A, B) \ 318 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT) 319#define _mm512_cmpneq_epu8_mask(A, B) \ 320 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_NE) 321#define _mm512_mask_cmpneq_epu8_mask(k, A, B) \ 322 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE) 323 324#define _mm512_cmpeq_epi16_mask(A, B) \ 325 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ) 326#define _mm512_mask_cmpeq_epi16_mask(k, A, B) \ 327 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ) 328#define _mm512_cmpge_epi16_mask(A, B) \ 329 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GE) 330#define _mm512_mask_cmpge_epi16_mask(k, A, B) \ 331 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE) 332#define _mm512_cmpgt_epi16_mask(A, B) \ 333 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GT) 334#define _mm512_mask_cmpgt_epi16_mask(k, A, B) \ 335 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT) 336#define _mm512_cmple_epi16_mask(A, B) \ 337 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LE) 338#define _mm512_mask_cmple_epi16_mask(k, A, B) \ 339 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE) 340#define _mm512_cmplt_epi16_mask(A, B) \ 341 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LT) 342#define _mm512_mask_cmplt_epi16_mask(k, A, B) \ 343 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT) 344#define _mm512_cmpneq_epi16_mask(A, B) \ 345 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_NE) 346#define _mm512_mask_cmpneq_epi16_mask(k, A, B) \ 347 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE) 348 349#define _mm512_cmpeq_epu16_mask(A, B) \ 350 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ) 351#define _mm512_mask_cmpeq_epu16_mask(k, A, B) \ 352 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ) 353#define _mm512_cmpge_epu16_mask(A, B) \ 354 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GE) 355#define _mm512_mask_cmpge_epu16_mask(k, A, B) \ 356 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE) 357#define _mm512_cmpgt_epu16_mask(A, B) \ 358 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GT) 359#define _mm512_mask_cmpgt_epu16_mask(k, A, B) \ 360 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT) 361#define _mm512_cmple_epu16_mask(A, B) \ 362 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LE) 363#define _mm512_mask_cmple_epu16_mask(k, A, B) \ 364 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE) 365#define _mm512_cmplt_epu16_mask(A, B) \ 366 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LT) 367#define _mm512_mask_cmplt_epu16_mask(k, A, B) \ 368 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT) 369#define _mm512_cmpneq_epu16_mask(A, B) \ 370 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_NE) 371#define _mm512_mask_cmpneq_epu16_mask(k, A, B) \ 372 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE) 373 374static __inline__ __m512i __DEFAULT_FN_ATTRS512 375_mm512_add_epi8 (__m512i __A, __m512i __B) { 376 return (__m512i) ((__v64qu) __A + (__v64qu) __B); 377} 378 379static __inline__ __m512i __DEFAULT_FN_ATTRS512 380_mm512_mask_add_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { 381 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 382 (__v64qi)_mm512_add_epi8(__A, __B), 383 (__v64qi)__W); 384} 385 386static __inline__ __m512i __DEFAULT_FN_ATTRS512 387_mm512_maskz_add_epi8(__mmask64 __U, __m512i __A, __m512i __B) { 388 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 389 (__v64qi)_mm512_add_epi8(__A, __B), 390 (__v64qi)_mm512_setzero_si512()); 391} 392 393static __inline__ __m512i __DEFAULT_FN_ATTRS512 394_mm512_sub_epi8 (__m512i __A, __m512i __B) { 395 return (__m512i) ((__v64qu) __A - (__v64qu) __B); 396} 397 398static __inline__ __m512i __DEFAULT_FN_ATTRS512 399_mm512_mask_sub_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { 400 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 401 (__v64qi)_mm512_sub_epi8(__A, __B), 402 (__v64qi)__W); 403} 404 405static __inline__ __m512i __DEFAULT_FN_ATTRS512 406_mm512_maskz_sub_epi8(__mmask64 __U, __m512i __A, __m512i __B) { 407 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 408 (__v64qi)_mm512_sub_epi8(__A, __B), 409 (__v64qi)_mm512_setzero_si512()); 410} 411 412static __inline__ __m512i __DEFAULT_FN_ATTRS512 413_mm512_add_epi16 (__m512i __A, __m512i __B) { 414 return (__m512i) ((__v32hu) __A + (__v32hu) __B); 415} 416 417static __inline__ __m512i __DEFAULT_FN_ATTRS512 418_mm512_mask_add_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 419 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 420 (__v32hi)_mm512_add_epi16(__A, __B), 421 (__v32hi)__W); 422} 423 424static __inline__ __m512i __DEFAULT_FN_ATTRS512 425_mm512_maskz_add_epi16(__mmask32 __U, __m512i __A, __m512i __B) { 426 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 427 (__v32hi)_mm512_add_epi16(__A, __B), 428 (__v32hi)_mm512_setzero_si512()); 429} 430 431static __inline__ __m512i __DEFAULT_FN_ATTRS512 432_mm512_sub_epi16 (__m512i __A, __m512i __B) { 433 return (__m512i) ((__v32hu) __A - (__v32hu) __B); 434} 435 436static __inline__ __m512i __DEFAULT_FN_ATTRS512 437_mm512_mask_sub_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 438 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 439 (__v32hi)_mm512_sub_epi16(__A, __B), 440 (__v32hi)__W); 441} 442 443static __inline__ __m512i __DEFAULT_FN_ATTRS512 444_mm512_maskz_sub_epi16(__mmask32 __U, __m512i __A, __m512i __B) { 445 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 446 (__v32hi)_mm512_sub_epi16(__A, __B), 447 (__v32hi)_mm512_setzero_si512()); 448} 449 450static __inline__ __m512i __DEFAULT_FN_ATTRS512 451_mm512_mullo_epi16 (__m512i __A, __m512i __B) { 452 return (__m512i) ((__v32hu) __A * (__v32hu) __B); 453} 454 455static __inline__ __m512i __DEFAULT_FN_ATTRS512 456_mm512_mask_mullo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 457 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 458 (__v32hi)_mm512_mullo_epi16(__A, __B), 459 (__v32hi)__W); 460} 461 462static __inline__ __m512i __DEFAULT_FN_ATTRS512 463_mm512_maskz_mullo_epi16(__mmask32 __U, __m512i __A, __m512i __B) { 464 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 465 (__v32hi)_mm512_mullo_epi16(__A, __B), 466 (__v32hi)_mm512_setzero_si512()); 467} 468 469static __inline__ __m512i __DEFAULT_FN_ATTRS512 470_mm512_mask_blend_epi8 (__mmask64 __U, __m512i __A, __m512i __W) 471{ 472 return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, 473 (__v64qi) __W, 474 (__v64qi) __A); 475} 476 477static __inline__ __m512i __DEFAULT_FN_ATTRS512 478_mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W) 479{ 480 return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, 481 (__v32hi) __W, 482 (__v32hi) __A); 483} 484 485static __inline__ __m512i __DEFAULT_FN_ATTRS512 486_mm512_abs_epi8 (__m512i __A) 487{ 488 return (__m512i)__builtin_ia32_pabsb512((__v64qi)__A); 489} 490 491static __inline__ __m512i __DEFAULT_FN_ATTRS512 492_mm512_mask_abs_epi8 (__m512i __W, __mmask64 __U, __m512i __A) 493{ 494 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 495 (__v64qi)_mm512_abs_epi8(__A), 496 (__v64qi)__W); 497} 498 499static __inline__ __m512i __DEFAULT_FN_ATTRS512 500_mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A) 501{ 502 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 503 (__v64qi)_mm512_abs_epi8(__A), 504 (__v64qi)_mm512_setzero_si512()); 505} 506 507static __inline__ __m512i __DEFAULT_FN_ATTRS512 508_mm512_abs_epi16 (__m512i __A) 509{ 510 return (__m512i)__builtin_ia32_pabsw512((__v32hi)__A); 511} 512 513static __inline__ __m512i __DEFAULT_FN_ATTRS512 514_mm512_mask_abs_epi16 (__m512i __W, __mmask32 __U, __m512i __A) 515{ 516 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 517 (__v32hi)_mm512_abs_epi16(__A), 518 (__v32hi)__W); 519} 520 521static __inline__ __m512i __DEFAULT_FN_ATTRS512 522_mm512_maskz_abs_epi16 (__mmask32 __U, __m512i __A) 523{ 524 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 525 (__v32hi)_mm512_abs_epi16(__A), 526 (__v32hi)_mm512_setzero_si512()); 527} 528 529static __inline__ __m512i __DEFAULT_FN_ATTRS512 530_mm512_packs_epi32(__m512i __A, __m512i __B) 531{ 532 return (__m512i)__builtin_ia32_packssdw512((__v16si)__A, (__v16si)__B); 533} 534 535static __inline__ __m512i __DEFAULT_FN_ATTRS512 536_mm512_maskz_packs_epi32(__mmask32 __M, __m512i __A, __m512i __B) 537{ 538 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 539 (__v32hi)_mm512_packs_epi32(__A, __B), 540 (__v32hi)_mm512_setzero_si512()); 541} 542 543static __inline__ __m512i __DEFAULT_FN_ATTRS512 544_mm512_mask_packs_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) 545{ 546 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 547 (__v32hi)_mm512_packs_epi32(__A, __B), 548 (__v32hi)__W); 549} 550 551static __inline__ __m512i __DEFAULT_FN_ATTRS512 552_mm512_packs_epi16(__m512i __A, __m512i __B) 553{ 554 return (__m512i)__builtin_ia32_packsswb512((__v32hi)__A, (__v32hi) __B); 555} 556 557static __inline__ __m512i __DEFAULT_FN_ATTRS512 558_mm512_mask_packs_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 559{ 560 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 561 (__v64qi)_mm512_packs_epi16(__A, __B), 562 (__v64qi)__W); 563} 564 565static __inline__ __m512i __DEFAULT_FN_ATTRS512 566_mm512_maskz_packs_epi16(__mmask64 __M, __m512i __A, __m512i __B) 567{ 568 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 569 (__v64qi)_mm512_packs_epi16(__A, __B), 570 (__v64qi)_mm512_setzero_si512()); 571} 572 573static __inline__ __m512i __DEFAULT_FN_ATTRS512 574_mm512_packus_epi32(__m512i __A, __m512i __B) 575{ 576 return (__m512i)__builtin_ia32_packusdw512((__v16si) __A, (__v16si) __B); 577} 578 579static __inline__ __m512i __DEFAULT_FN_ATTRS512 580_mm512_maskz_packus_epi32(__mmask32 __M, __m512i __A, __m512i __B) 581{ 582 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 583 (__v32hi)_mm512_packus_epi32(__A, __B), 584 (__v32hi)_mm512_setzero_si512()); 585} 586 587static __inline__ __m512i __DEFAULT_FN_ATTRS512 588_mm512_mask_packus_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) 589{ 590 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 591 (__v32hi)_mm512_packus_epi32(__A, __B), 592 (__v32hi)__W); 593} 594 595static __inline__ __m512i __DEFAULT_FN_ATTRS512 596_mm512_packus_epi16(__m512i __A, __m512i __B) 597{ 598 return (__m512i)__builtin_ia32_packuswb512((__v32hi) __A, (__v32hi) __B); 599} 600 601static __inline__ __m512i __DEFAULT_FN_ATTRS512 602_mm512_mask_packus_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 603{ 604 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 605 (__v64qi)_mm512_packus_epi16(__A, __B), 606 (__v64qi)__W); 607} 608 609static __inline__ __m512i __DEFAULT_FN_ATTRS512 610_mm512_maskz_packus_epi16(__mmask64 __M, __m512i __A, __m512i __B) 611{ 612 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 613 (__v64qi)_mm512_packus_epi16(__A, __B), 614 (__v64qi)_mm512_setzero_si512()); 615} 616 617static __inline__ __m512i __DEFAULT_FN_ATTRS512 618_mm512_adds_epi8 (__m512i __A, __m512i __B) 619{ 620 return (__m512i)__builtin_ia32_paddsb512((__v64qi)__A, (__v64qi)__B); 621} 622 623static __inline__ __m512i __DEFAULT_FN_ATTRS512 624_mm512_mask_adds_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) 625{ 626 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 627 (__v64qi)_mm512_adds_epi8(__A, __B), 628 (__v64qi)__W); 629} 630 631static __inline__ __m512i __DEFAULT_FN_ATTRS512 632_mm512_maskz_adds_epi8 (__mmask64 __U, __m512i __A, __m512i __B) 633{ 634 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 635 (__v64qi)_mm512_adds_epi8(__A, __B), 636 (__v64qi)_mm512_setzero_si512()); 637} 638 639static __inline__ __m512i __DEFAULT_FN_ATTRS512 640_mm512_adds_epi16 (__m512i __A, __m512i __B) 641{ 642 return (__m512i)__builtin_ia32_paddsw512((__v32hi)__A, (__v32hi)__B); 643} 644 645static __inline__ __m512i __DEFAULT_FN_ATTRS512 646_mm512_mask_adds_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 647{ 648 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 649 (__v32hi)_mm512_adds_epi16(__A, __B), 650 (__v32hi)__W); 651} 652 653static __inline__ __m512i __DEFAULT_FN_ATTRS512 654_mm512_maskz_adds_epi16 (__mmask32 __U, __m512i __A, __m512i __B) 655{ 656 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 657 (__v32hi)_mm512_adds_epi16(__A, __B), 658 (__v32hi)_mm512_setzero_si512()); 659} 660 661static __inline__ __m512i __DEFAULT_FN_ATTRS512 662_mm512_adds_epu8 (__m512i __A, __m512i __B) 663{ 664 return (__m512i)__builtin_ia32_paddusb512((__v64qi) __A, (__v64qi) __B); 665} 666 667static __inline__ __m512i __DEFAULT_FN_ATTRS512 668_mm512_mask_adds_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) 669{ 670 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 671 (__v64qi)_mm512_adds_epu8(__A, __B), 672 (__v64qi)__W); 673} 674 675static __inline__ __m512i __DEFAULT_FN_ATTRS512 676_mm512_maskz_adds_epu8 (__mmask64 __U, __m512i __A, __m512i __B) 677{ 678 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 679 (__v64qi)_mm512_adds_epu8(__A, __B), 680 (__v64qi)_mm512_setzero_si512()); 681} 682 683static __inline__ __m512i __DEFAULT_FN_ATTRS512 684_mm512_adds_epu16 (__m512i __A, __m512i __B) 685{ 686 return (__m512i)__builtin_ia32_paddusw512((__v32hi) __A, (__v32hi) __B); 687} 688 689static __inline__ __m512i __DEFAULT_FN_ATTRS512 690_mm512_mask_adds_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 691{ 692 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 693 (__v32hi)_mm512_adds_epu16(__A, __B), 694 (__v32hi)__W); 695} 696 697static __inline__ __m512i __DEFAULT_FN_ATTRS512 698_mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B) 699{ 700 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 701 (__v32hi)_mm512_adds_epu16(__A, __B), 702 (__v32hi)_mm512_setzero_si512()); 703} 704 705static __inline__ __m512i __DEFAULT_FN_ATTRS512 706_mm512_avg_epu8 (__m512i __A, __m512i __B) 707{ 708 return (__m512i)__builtin_ia32_pavgb512((__v64qi)__A, (__v64qi)__B); 709} 710 711static __inline__ __m512i __DEFAULT_FN_ATTRS512 712_mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A, 713 __m512i __B) 714{ 715 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 716 (__v64qi)_mm512_avg_epu8(__A, __B), 717 (__v64qi)__W); 718} 719 720static __inline__ __m512i __DEFAULT_FN_ATTRS512 721_mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B) 722{ 723 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 724 (__v64qi)_mm512_avg_epu8(__A, __B), 725 (__v64qi)_mm512_setzero_si512()); 726} 727 728static __inline__ __m512i __DEFAULT_FN_ATTRS512 729_mm512_avg_epu16 (__m512i __A, __m512i __B) 730{ 731 return (__m512i)__builtin_ia32_pavgw512((__v32hi)__A, (__v32hi)__B); 732} 733 734static __inline__ __m512i __DEFAULT_FN_ATTRS512 735_mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A, 736 __m512i __B) 737{ 738 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 739 (__v32hi)_mm512_avg_epu16(__A, __B), 740 (__v32hi)__W); 741} 742 743static __inline__ __m512i __DEFAULT_FN_ATTRS512 744_mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B) 745{ 746 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 747 (__v32hi)_mm512_avg_epu16(__A, __B), 748 (__v32hi) _mm512_setzero_si512()); 749} 750 751static __inline__ __m512i __DEFAULT_FN_ATTRS512 752_mm512_max_epi8 (__m512i __A, __m512i __B) 753{ 754 return (__m512i)__builtin_ia32_pmaxsb512((__v64qi) __A, (__v64qi) __B); 755} 756 757static __inline__ __m512i __DEFAULT_FN_ATTRS512 758_mm512_maskz_max_epi8 (__mmask64 __M, __m512i __A, __m512i __B) 759{ 760 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 761 (__v64qi)_mm512_max_epi8(__A, __B), 762 (__v64qi)_mm512_setzero_si512()); 763} 764 765static __inline__ __m512i __DEFAULT_FN_ATTRS512 766_mm512_mask_max_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 767{ 768 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 769 (__v64qi)_mm512_max_epi8(__A, __B), 770 (__v64qi)__W); 771} 772 773static __inline__ __m512i __DEFAULT_FN_ATTRS512 774_mm512_max_epi16 (__m512i __A, __m512i __B) 775{ 776 return (__m512i)__builtin_ia32_pmaxsw512((__v32hi) __A, (__v32hi) __B); 777} 778 779static __inline__ __m512i __DEFAULT_FN_ATTRS512 780_mm512_maskz_max_epi16 (__mmask32 __M, __m512i __A, __m512i __B) 781{ 782 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 783 (__v32hi)_mm512_max_epi16(__A, __B), 784 (__v32hi)_mm512_setzero_si512()); 785} 786 787static __inline__ __m512i __DEFAULT_FN_ATTRS512 788_mm512_mask_max_epi16 (__m512i __W, __mmask32 __M, __m512i __A, 789 __m512i __B) 790{ 791 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 792 (__v32hi)_mm512_max_epi16(__A, __B), 793 (__v32hi)__W); 794} 795 796static __inline__ __m512i __DEFAULT_FN_ATTRS512 797_mm512_max_epu8 (__m512i __A, __m512i __B) 798{ 799 return (__m512i)__builtin_ia32_pmaxub512((__v64qi)__A, (__v64qi)__B); 800} 801 802static __inline__ __m512i __DEFAULT_FN_ATTRS512 803_mm512_maskz_max_epu8 (__mmask64 __M, __m512i __A, __m512i __B) 804{ 805 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 806 (__v64qi)_mm512_max_epu8(__A, __B), 807 (__v64qi)_mm512_setzero_si512()); 808} 809 810static __inline__ __m512i __DEFAULT_FN_ATTRS512 811_mm512_mask_max_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 812{ 813 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 814 (__v64qi)_mm512_max_epu8(__A, __B), 815 (__v64qi)__W); 816} 817 818static __inline__ __m512i __DEFAULT_FN_ATTRS512 819_mm512_max_epu16 (__m512i __A, __m512i __B) 820{ 821 return (__m512i)__builtin_ia32_pmaxuw512((__v32hi)__A, (__v32hi)__B); 822} 823 824static __inline__ __m512i __DEFAULT_FN_ATTRS512 825_mm512_maskz_max_epu16 (__mmask32 __M, __m512i __A, __m512i __B) 826{ 827 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 828 (__v32hi)_mm512_max_epu16(__A, __B), 829 (__v32hi)_mm512_setzero_si512()); 830} 831 832static __inline__ __m512i __DEFAULT_FN_ATTRS512 833_mm512_mask_max_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) 834{ 835 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 836 (__v32hi)_mm512_max_epu16(__A, __B), 837 (__v32hi)__W); 838} 839 840static __inline__ __m512i __DEFAULT_FN_ATTRS512 841_mm512_min_epi8 (__m512i __A, __m512i __B) 842{ 843 return (__m512i)__builtin_ia32_pminsb512((__v64qi) __A, (__v64qi) __B); 844} 845 846static __inline__ __m512i __DEFAULT_FN_ATTRS512 847_mm512_maskz_min_epi8 (__mmask64 __M, __m512i __A, __m512i __B) 848{ 849 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 850 (__v64qi)_mm512_min_epi8(__A, __B), 851 (__v64qi)_mm512_setzero_si512()); 852} 853 854static __inline__ __m512i __DEFAULT_FN_ATTRS512 855_mm512_mask_min_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 856{ 857 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 858 (__v64qi)_mm512_min_epi8(__A, __B), 859 (__v64qi)__W); 860} 861 862static __inline__ __m512i __DEFAULT_FN_ATTRS512 863_mm512_min_epi16 (__m512i __A, __m512i __B) 864{ 865 return (__m512i)__builtin_ia32_pminsw512((__v32hi) __A, (__v32hi) __B); 866} 867 868static __inline__ __m512i __DEFAULT_FN_ATTRS512 869_mm512_maskz_min_epi16 (__mmask32 __M, __m512i __A, __m512i __B) 870{ 871 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 872 (__v32hi)_mm512_min_epi16(__A, __B), 873 (__v32hi)_mm512_setzero_si512()); 874} 875 876static __inline__ __m512i __DEFAULT_FN_ATTRS512 877_mm512_mask_min_epi16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) 878{ 879 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 880 (__v32hi)_mm512_min_epi16(__A, __B), 881 (__v32hi)__W); 882} 883 884static __inline__ __m512i __DEFAULT_FN_ATTRS512 885_mm512_min_epu8 (__m512i __A, __m512i __B) 886{ 887 return (__m512i)__builtin_ia32_pminub512((__v64qi)__A, (__v64qi)__B); 888} 889 890static __inline__ __m512i __DEFAULT_FN_ATTRS512 891_mm512_maskz_min_epu8 (__mmask64 __M, __m512i __A, __m512i __B) 892{ 893 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 894 (__v64qi)_mm512_min_epu8(__A, __B), 895 (__v64qi)_mm512_setzero_si512()); 896} 897 898static __inline__ __m512i __DEFAULT_FN_ATTRS512 899_mm512_mask_min_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 900{ 901 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 902 (__v64qi)_mm512_min_epu8(__A, __B), 903 (__v64qi)__W); 904} 905 906static __inline__ __m512i __DEFAULT_FN_ATTRS512 907_mm512_min_epu16 (__m512i __A, __m512i __B) 908{ 909 return (__m512i)__builtin_ia32_pminuw512((__v32hi)__A, (__v32hi)__B); 910} 911 912static __inline__ __m512i __DEFAULT_FN_ATTRS512 913_mm512_maskz_min_epu16 (__mmask32 __M, __m512i __A, __m512i __B) 914{ 915 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 916 (__v32hi)_mm512_min_epu16(__A, __B), 917 (__v32hi)_mm512_setzero_si512()); 918} 919 920static __inline__ __m512i __DEFAULT_FN_ATTRS512 921_mm512_mask_min_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) 922{ 923 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 924 (__v32hi)_mm512_min_epu16(__A, __B), 925 (__v32hi)__W); 926} 927 928static __inline__ __m512i __DEFAULT_FN_ATTRS512 929_mm512_shuffle_epi8(__m512i __A, __m512i __B) 930{ 931 return (__m512i)__builtin_ia32_pshufb512((__v64qi)__A,(__v64qi)__B); 932} 933 934static __inline__ __m512i __DEFAULT_FN_ATTRS512 935_mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) 936{ 937 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 938 (__v64qi)_mm512_shuffle_epi8(__A, __B), 939 (__v64qi)__W); 940} 941 942static __inline__ __m512i __DEFAULT_FN_ATTRS512 943_mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B) 944{ 945 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 946 (__v64qi)_mm512_shuffle_epi8(__A, __B), 947 (__v64qi)_mm512_setzero_si512()); 948} 949 950static __inline__ __m512i __DEFAULT_FN_ATTRS512 951_mm512_subs_epi8 (__m512i __A, __m512i __B) 952{ 953 return (__m512i)__builtin_ia32_psubsb512((__v64qi)__A, (__v64qi)__B); 954} 955 956static __inline__ __m512i __DEFAULT_FN_ATTRS512 957_mm512_mask_subs_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) 958{ 959 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 960 (__v64qi)_mm512_subs_epi8(__A, __B), 961 (__v64qi)__W); 962} 963 964static __inline__ __m512i __DEFAULT_FN_ATTRS512 965_mm512_maskz_subs_epi8 (__mmask64 __U, __m512i __A, __m512i __B) 966{ 967 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 968 (__v64qi)_mm512_subs_epi8(__A, __B), 969 (__v64qi)_mm512_setzero_si512()); 970} 971 972static __inline__ __m512i __DEFAULT_FN_ATTRS512 973_mm512_subs_epi16 (__m512i __A, __m512i __B) 974{ 975 return (__m512i)__builtin_ia32_psubsw512((__v32hi)__A, (__v32hi)__B); 976} 977 978static __inline__ __m512i __DEFAULT_FN_ATTRS512 979_mm512_mask_subs_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 980{ 981 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 982 (__v32hi)_mm512_subs_epi16(__A, __B), 983 (__v32hi)__W); 984} 985 986static __inline__ __m512i __DEFAULT_FN_ATTRS512 987_mm512_maskz_subs_epi16 (__mmask32 __U, __m512i __A, __m512i __B) 988{ 989 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 990 (__v32hi)_mm512_subs_epi16(__A, __B), 991 (__v32hi)_mm512_setzero_si512()); 992} 993 994static __inline__ __m512i __DEFAULT_FN_ATTRS512 995_mm512_subs_epu8 (__m512i __A, __m512i __B) 996{ 997 return (__m512i)__builtin_ia32_psubusb512((__v64qi) __A, (__v64qi) __B); 998} 999 1000static __inline__ __m512i __DEFAULT_FN_ATTRS512 1001_mm512_mask_subs_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) 1002{ 1003 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 1004 (__v64qi)_mm512_subs_epu8(__A, __B), 1005 (__v64qi)__W); 1006} 1007 1008static __inline__ __m512i __DEFAULT_FN_ATTRS512 1009_mm512_maskz_subs_epu8 (__mmask64 __U, __m512i __A, __m512i __B) 1010{ 1011 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 1012 (__v64qi)_mm512_subs_epu8(__A, __B), 1013 (__v64qi)_mm512_setzero_si512()); 1014} 1015 1016static __inline__ __m512i __DEFAULT_FN_ATTRS512 1017_mm512_subs_epu16 (__m512i __A, __m512i __B) 1018{ 1019 return (__m512i)__builtin_ia32_psubusw512((__v32hi) __A, (__v32hi) __B); 1020} 1021 1022static __inline__ __m512i __DEFAULT_FN_ATTRS512 1023_mm512_mask_subs_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 1024{ 1025 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1026 (__v32hi)_mm512_subs_epu16(__A, __B), 1027 (__v32hi)__W); 1028} 1029 1030static __inline__ __m512i __DEFAULT_FN_ATTRS512 1031_mm512_maskz_subs_epu16 (__mmask32 __U, __m512i __A, __m512i __B) 1032{ 1033 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1034 (__v32hi)_mm512_subs_epu16(__A, __B), 1035 (__v32hi)_mm512_setzero_si512()); 1036} 1037 1038static __inline__ __m512i __DEFAULT_FN_ATTRS512 1039_mm512_permutex2var_epi16(__m512i __A, __m512i __I, __m512i __B) 1040{ 1041 return (__m512i)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I, 1042 (__v32hi)__B); 1043} 1044 1045static __inline__ __m512i __DEFAULT_FN_ATTRS512 1046_mm512_mask_permutex2var_epi16(__m512i __A, __mmask32 __U, __m512i __I, 1047 __m512i __B) 1048{ 1049 return (__m512i)__builtin_ia32_selectw_512(__U, 1050 (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B), 1051 (__v32hi)__A); 1052} 1053 1054static __inline__ __m512i __DEFAULT_FN_ATTRS512 1055_mm512_mask2_permutex2var_epi16(__m512i __A, __m512i __I, __mmask32 __U, 1056 __m512i __B) 1057{ 1058 return (__m512i)__builtin_ia32_selectw_512(__U, 1059 (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B), 1060 (__v32hi)__I); 1061} 1062 1063static __inline__ __m512i __DEFAULT_FN_ATTRS512 1064_mm512_maskz_permutex2var_epi16(__mmask32 __U, __m512i __A, __m512i __I, 1065 __m512i __B) 1066{ 1067 return (__m512i)__builtin_ia32_selectw_512(__U, 1068 (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B), 1069 (__v32hi)_mm512_setzero_si512()); 1070} 1071 1072static __inline__ __m512i __DEFAULT_FN_ATTRS512 1073_mm512_mulhrs_epi16(__m512i __A, __m512i __B) 1074{ 1075 return (__m512i)__builtin_ia32_pmulhrsw512((__v32hi)__A, (__v32hi)__B); 1076} 1077 1078static __inline__ __m512i __DEFAULT_FN_ATTRS512 1079_mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 1080{ 1081 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1082 (__v32hi)_mm512_mulhrs_epi16(__A, __B), 1083 (__v32hi)__W); 1084} 1085 1086static __inline__ __m512i __DEFAULT_FN_ATTRS512 1087_mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B) 1088{ 1089 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1090 (__v32hi)_mm512_mulhrs_epi16(__A, __B), 1091 (__v32hi)_mm512_setzero_si512()); 1092} 1093 1094static __inline__ __m512i __DEFAULT_FN_ATTRS512 1095_mm512_mulhi_epi16(__m512i __A, __m512i __B) 1096{ 1097 return (__m512i)__builtin_ia32_pmulhw512((__v32hi) __A, (__v32hi) __B); 1098} 1099 1100static __inline__ __m512i __DEFAULT_FN_ATTRS512 1101_mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, 1102 __m512i __B) 1103{ 1104 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1105 (__v32hi)_mm512_mulhi_epi16(__A, __B), 1106 (__v32hi)__W); 1107} 1108 1109static __inline__ __m512i __DEFAULT_FN_ATTRS512 1110_mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) 1111{ 1112 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1113 (__v32hi)_mm512_mulhi_epi16(__A, __B), 1114 (__v32hi)_mm512_setzero_si512()); 1115} 1116 1117static __inline__ __m512i __DEFAULT_FN_ATTRS512 1118_mm512_mulhi_epu16(__m512i __A, __m512i __B) 1119{ 1120 return (__m512i)__builtin_ia32_pmulhuw512((__v32hi) __A, (__v32hi) __B); 1121} 1122 1123static __inline__ __m512i __DEFAULT_FN_ATTRS512 1124_mm512_mask_mulhi_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 1125{ 1126 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1127 (__v32hi)_mm512_mulhi_epu16(__A, __B), 1128 (__v32hi)__W); 1129} 1130 1131static __inline__ __m512i __DEFAULT_FN_ATTRS512 1132_mm512_maskz_mulhi_epu16 (__mmask32 __U, __m512i __A, __m512i __B) 1133{ 1134 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1135 (__v32hi)_mm512_mulhi_epu16(__A, __B), 1136 (__v32hi)_mm512_setzero_si512()); 1137} 1138 1139static __inline__ __m512i __DEFAULT_FN_ATTRS512 1140_mm512_maddubs_epi16(__m512i __X, __m512i __Y) { 1141 return (__m512i)__builtin_ia32_pmaddubsw512((__v64qi)__X, (__v64qi)__Y); 1142} 1143 1144static __inline__ __m512i __DEFAULT_FN_ATTRS512 1145_mm512_mask_maddubs_epi16(__m512i __W, __mmask32 __U, __m512i __X, 1146 __m512i __Y) { 1147 return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U, 1148 (__v32hi)_mm512_maddubs_epi16(__X, __Y), 1149 (__v32hi)__W); 1150} 1151 1152static __inline__ __m512i __DEFAULT_FN_ATTRS512 1153_mm512_maskz_maddubs_epi16(__mmask32 __U, __m512i __X, __m512i __Y) { 1154 return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U, 1155 (__v32hi)_mm512_maddubs_epi16(__X, __Y), 1156 (__v32hi)_mm512_setzero_si512()); 1157} 1158 1159static __inline__ __m512i __DEFAULT_FN_ATTRS512 1160_mm512_madd_epi16(__m512i __A, __m512i __B) { 1161 return (__m512i)__builtin_ia32_pmaddwd512((__v32hi)__A, (__v32hi)__B); 1162} 1163 1164static __inline__ __m512i __DEFAULT_FN_ATTRS512 1165_mm512_mask_madd_epi16(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { 1166 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 1167 (__v16si)_mm512_madd_epi16(__A, __B), 1168 (__v16si)__W); 1169} 1170 1171static __inline__ __m512i __DEFAULT_FN_ATTRS512 1172_mm512_maskz_madd_epi16(__mmask16 __U, __m512i __A, __m512i __B) { 1173 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 1174 (__v16si)_mm512_madd_epi16(__A, __B), 1175 (__v16si)_mm512_setzero_si512()); 1176} 1177 1178static __inline__ __m256i __DEFAULT_FN_ATTRS512 1179_mm512_cvtsepi16_epi8 (__m512i __A) { 1180 return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A, 1181 (__v32qi)_mm256_setzero_si256(), 1182 (__mmask32) -1); 1183} 1184 1185static __inline__ __m256i __DEFAULT_FN_ATTRS512 1186_mm512_mask_cvtsepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) { 1187 return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A, 1188 (__v32qi)__O, 1189 __M); 1190} 1191 1192static __inline__ __m256i __DEFAULT_FN_ATTRS512 1193_mm512_maskz_cvtsepi16_epi8 (__mmask32 __M, __m512i __A) { 1194 return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A, 1195 (__v32qi) _mm256_setzero_si256(), 1196 __M); 1197} 1198 1199static __inline__ __m256i __DEFAULT_FN_ATTRS512 1200_mm512_cvtusepi16_epi8 (__m512i __A) { 1201 return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A, 1202 (__v32qi) _mm256_setzero_si256(), 1203 (__mmask32) -1); 1204} 1205 1206static __inline__ __m256i __DEFAULT_FN_ATTRS512 1207_mm512_mask_cvtusepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) { 1208 return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A, 1209 (__v32qi) __O, 1210 __M); 1211} 1212 1213static __inline__ __m256i __DEFAULT_FN_ATTRS512 1214_mm512_maskz_cvtusepi16_epi8 (__mmask32 __M, __m512i __A) { 1215 return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A, 1216 (__v32qi) _mm256_setzero_si256(), 1217 __M); 1218} 1219 1220static __inline__ __m256i __DEFAULT_FN_ATTRS512 1221_mm512_cvtepi16_epi8 (__m512i __A) { 1222 return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A, 1223 (__v32qi) _mm256_undefined_si256(), 1224 (__mmask32) -1); 1225} 1226 1227static __inline__ __m256i __DEFAULT_FN_ATTRS512 1228_mm512_mask_cvtepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) { 1229 return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A, 1230 (__v32qi) __O, 1231 __M); 1232} 1233 1234static __inline__ __m256i __DEFAULT_FN_ATTRS512 1235_mm512_maskz_cvtepi16_epi8 (__mmask32 __M, __m512i __A) { 1236 return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A, 1237 (__v32qi) _mm256_setzero_si256(), 1238 __M); 1239} 1240 1241static __inline__ void __DEFAULT_FN_ATTRS512 1242_mm512_mask_cvtepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) 1243{ 1244 __builtin_ia32_pmovwb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M); 1245} 1246 1247static __inline__ void __DEFAULT_FN_ATTRS512 1248_mm512_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) 1249{ 1250 __builtin_ia32_pmovswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M); 1251} 1252 1253static __inline__ void __DEFAULT_FN_ATTRS512 1254_mm512_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) 1255{ 1256 __builtin_ia32_pmovuswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M); 1257} 1258 1259static __inline__ __m512i __DEFAULT_FN_ATTRS512 1260_mm512_unpackhi_epi8(__m512i __A, __m512i __B) { 1261 return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B, 1262 8, 64+8, 9, 64+9, 1263 10, 64+10, 11, 64+11, 1264 12, 64+12, 13, 64+13, 1265 14, 64+14, 15, 64+15, 1266 24, 64+24, 25, 64+25, 1267 26, 64+26, 27, 64+27, 1268 28, 64+28, 29, 64+29, 1269 30, 64+30, 31, 64+31, 1270 40, 64+40, 41, 64+41, 1271 42, 64+42, 43, 64+43, 1272 44, 64+44, 45, 64+45, 1273 46, 64+46, 47, 64+47, 1274 56, 64+56, 57, 64+57, 1275 58, 64+58, 59, 64+59, 1276 60, 64+60, 61, 64+61, 1277 62, 64+62, 63, 64+63); 1278} 1279 1280static __inline__ __m512i __DEFAULT_FN_ATTRS512 1281_mm512_mask_unpackhi_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { 1282 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 1283 (__v64qi)_mm512_unpackhi_epi8(__A, __B), 1284 (__v64qi)__W); 1285} 1286 1287static __inline__ __m512i __DEFAULT_FN_ATTRS512 1288_mm512_maskz_unpackhi_epi8(__mmask64 __U, __m512i __A, __m512i __B) { 1289 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 1290 (__v64qi)_mm512_unpackhi_epi8(__A, __B), 1291 (__v64qi)_mm512_setzero_si512()); 1292} 1293 1294static __inline__ __m512i __DEFAULT_FN_ATTRS512 1295_mm512_unpackhi_epi16(__m512i __A, __m512i __B) { 1296 return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B, 1297 4, 32+4, 5, 32+5, 1298 6, 32+6, 7, 32+7, 1299 12, 32+12, 13, 32+13, 1300 14, 32+14, 15, 32+15, 1301 20, 32+20, 21, 32+21, 1302 22, 32+22, 23, 32+23, 1303 28, 32+28, 29, 32+29, 1304 30, 32+30, 31, 32+31); 1305} 1306 1307static __inline__ __m512i __DEFAULT_FN_ATTRS512 1308_mm512_mask_unpackhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 1309 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1310 (__v32hi)_mm512_unpackhi_epi16(__A, __B), 1311 (__v32hi)__W); 1312} 1313 1314static __inline__ __m512i __DEFAULT_FN_ATTRS512 1315_mm512_maskz_unpackhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { 1316 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1317 (__v32hi)_mm512_unpackhi_epi16(__A, __B), 1318 (__v32hi)_mm512_setzero_si512()); 1319} 1320 1321static __inline__ __m512i __DEFAULT_FN_ATTRS512 1322_mm512_unpacklo_epi8(__m512i __A, __m512i __B) { 1323 return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B, 1324 0, 64+0, 1, 64+1, 1325 2, 64+2, 3, 64+3, 1326 4, 64+4, 5, 64+5, 1327 6, 64+6, 7, 64+7, 1328 16, 64+16, 17, 64+17, 1329 18, 64+18, 19, 64+19, 1330 20, 64+20, 21, 64+21, 1331 22, 64+22, 23, 64+23, 1332 32, 64+32, 33, 64+33, 1333 34, 64+34, 35, 64+35, 1334 36, 64+36, 37, 64+37, 1335 38, 64+38, 39, 64+39, 1336 48, 64+48, 49, 64+49, 1337 50, 64+50, 51, 64+51, 1338 52, 64+52, 53, 64+53, 1339 54, 64+54, 55, 64+55); 1340} 1341 1342static __inline__ __m512i __DEFAULT_FN_ATTRS512 1343_mm512_mask_unpacklo_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { 1344 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 1345 (__v64qi)_mm512_unpacklo_epi8(__A, __B), 1346 (__v64qi)__W); 1347} 1348 1349static __inline__ __m512i __DEFAULT_FN_ATTRS512 1350_mm512_maskz_unpacklo_epi8(__mmask64 __U, __m512i __A, __m512i __B) { 1351 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 1352 (__v64qi)_mm512_unpacklo_epi8(__A, __B), 1353 (__v64qi)_mm512_setzero_si512()); 1354} 1355 1356static __inline__ __m512i __DEFAULT_FN_ATTRS512 1357_mm512_unpacklo_epi16(__m512i __A, __m512i __B) { 1358 return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B, 1359 0, 32+0, 1, 32+1, 1360 2, 32+2, 3, 32+3, 1361 8, 32+8, 9, 32+9, 1362 10, 32+10, 11, 32+11, 1363 16, 32+16, 17, 32+17, 1364 18, 32+18, 19, 32+19, 1365 24, 32+24, 25, 32+25, 1366 26, 32+26, 27, 32+27); 1367} 1368 1369static __inline__ __m512i __DEFAULT_FN_ATTRS512 1370_mm512_mask_unpacklo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 1371 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1372 (__v32hi)_mm512_unpacklo_epi16(__A, __B), 1373 (__v32hi)__W); 1374} 1375 1376static __inline__ __m512i __DEFAULT_FN_ATTRS512 1377_mm512_maskz_unpacklo_epi16(__mmask32 __U, __m512i __A, __m512i __B) { 1378 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1379 (__v32hi)_mm512_unpacklo_epi16(__A, __B), 1380 (__v32hi)_mm512_setzero_si512()); 1381} 1382 1383static __inline__ __m512i __DEFAULT_FN_ATTRS512 1384_mm512_cvtepi8_epi16(__m256i __A) 1385{ 1386 /* This function always performs a signed extension, but __v32qi is a char 1387 which may be signed or unsigned, so use __v32qs. */ 1388 return (__m512i)__builtin_convertvector((__v32qs)__A, __v32hi); 1389} 1390 1391static __inline__ __m512i __DEFAULT_FN_ATTRS512 1392_mm512_mask_cvtepi8_epi16(__m512i __W, __mmask32 __U, __m256i __A) 1393{ 1394 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1395 (__v32hi)_mm512_cvtepi8_epi16(__A), 1396 (__v32hi)__W); 1397} 1398 1399static __inline__ __m512i __DEFAULT_FN_ATTRS512 1400_mm512_maskz_cvtepi8_epi16(__mmask32 __U, __m256i __A) 1401{ 1402 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1403 (__v32hi)_mm512_cvtepi8_epi16(__A), 1404 (__v32hi)_mm512_setzero_si512()); 1405} 1406 1407static __inline__ __m512i __DEFAULT_FN_ATTRS512 1408_mm512_cvtepu8_epi16(__m256i __A) 1409{ 1410 return (__m512i)__builtin_convertvector((__v32qu)__A, __v32hi); 1411} 1412 1413static __inline__ __m512i __DEFAULT_FN_ATTRS512 1414_mm512_mask_cvtepu8_epi16(__m512i __W, __mmask32 __U, __m256i __A) 1415{ 1416 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1417 (__v32hi)_mm512_cvtepu8_epi16(__A), 1418 (__v32hi)__W); 1419} 1420 1421static __inline__ __m512i __DEFAULT_FN_ATTRS512 1422_mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A) 1423{ 1424 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1425 (__v32hi)_mm512_cvtepu8_epi16(__A), 1426 (__v32hi)_mm512_setzero_si512()); 1427} 1428 1429 1430#define _mm512_shufflehi_epi16(A, imm) \ 1431 (__m512i)__builtin_ia32_pshufhw512((__v32hi)(__m512i)(A), (int)(imm)) 1432 1433#define _mm512_mask_shufflehi_epi16(W, U, A, imm) \ 1434 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 1435 (__v32hi)_mm512_shufflehi_epi16((A), \ 1436 (imm)), \ 1437 (__v32hi)(__m512i)(W)) 1438 1439#define _mm512_maskz_shufflehi_epi16(U, A, imm) \ 1440 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 1441 (__v32hi)_mm512_shufflehi_epi16((A), \ 1442 (imm)), \ 1443 (__v32hi)_mm512_setzero_si512()) 1444 1445#define _mm512_shufflelo_epi16(A, imm) \ 1446 (__m512i)__builtin_ia32_pshuflw512((__v32hi)(__m512i)(A), (int)(imm)) 1447 1448 1449#define _mm512_mask_shufflelo_epi16(W, U, A, imm) \ 1450 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 1451 (__v32hi)_mm512_shufflelo_epi16((A), \ 1452 (imm)), \ 1453 (__v32hi)(__m512i)(W)) 1454 1455 1456#define _mm512_maskz_shufflelo_epi16(U, A, imm) \ 1457 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 1458 (__v32hi)_mm512_shufflelo_epi16((A), \ 1459 (imm)), \ 1460 (__v32hi)_mm512_setzero_si512()) 1461 1462static __inline__ __m512i __DEFAULT_FN_ATTRS512 1463_mm512_sllv_epi16(__m512i __A, __m512i __B) 1464{ 1465 return (__m512i)__builtin_ia32_psllv32hi((__v32hi) __A, (__v32hi) __B); 1466} 1467 1468static __inline__ __m512i __DEFAULT_FN_ATTRS512 1469_mm512_mask_sllv_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 1470{ 1471 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1472 (__v32hi)_mm512_sllv_epi16(__A, __B), 1473 (__v32hi)__W); 1474} 1475 1476static __inline__ __m512i __DEFAULT_FN_ATTRS512 1477_mm512_maskz_sllv_epi16(__mmask32 __U, __m512i __A, __m512i __B) 1478{ 1479 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1480 (__v32hi)_mm512_sllv_epi16(__A, __B), 1481 (__v32hi)_mm512_setzero_si512()); 1482} 1483 1484static __inline__ __m512i __DEFAULT_FN_ATTRS512 1485_mm512_sll_epi16(__m512i __A, __m128i __B) 1486{ 1487 return (__m512i)__builtin_ia32_psllw512((__v32hi) __A, (__v8hi) __B); 1488} 1489 1490static __inline__ __m512i __DEFAULT_FN_ATTRS512 1491_mm512_mask_sll_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) 1492{ 1493 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1494 (__v32hi)_mm512_sll_epi16(__A, __B), 1495 (__v32hi)__W); 1496} 1497 1498static __inline__ __m512i __DEFAULT_FN_ATTRS512 1499_mm512_maskz_sll_epi16(__mmask32 __U, __m512i __A, __m128i __B) 1500{ 1501 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1502 (__v32hi)_mm512_sll_epi16(__A, __B), 1503 (__v32hi)_mm512_setzero_si512()); 1504} 1505 1506static __inline__ __m512i __DEFAULT_FN_ATTRS512 1507_mm512_slli_epi16(__m512i __A, int __B) 1508{ 1509 return (__m512i)__builtin_ia32_psllwi512((__v32hi)__A, __B); 1510} 1511 1512static __inline__ __m512i __DEFAULT_FN_ATTRS512 1513_mm512_mask_slli_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B) 1514{ 1515 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1516 (__v32hi)_mm512_slli_epi16(__A, __B), 1517 (__v32hi)__W); 1518} 1519 1520static __inline__ __m512i __DEFAULT_FN_ATTRS512 1521_mm512_maskz_slli_epi16(__mmask32 __U, __m512i __A, int __B) 1522{ 1523 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1524 (__v32hi)_mm512_slli_epi16(__A, __B), 1525 (__v32hi)_mm512_setzero_si512()); 1526} 1527 1528#define _mm512_bslli_epi128(a, imm) \ 1529 (__m512i)__builtin_ia32_pslldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm)) 1530 1531static __inline__ __m512i __DEFAULT_FN_ATTRS512 1532_mm512_srlv_epi16(__m512i __A, __m512i __B) 1533{ 1534 return (__m512i)__builtin_ia32_psrlv32hi((__v32hi)__A, (__v32hi)__B); 1535} 1536 1537static __inline__ __m512i __DEFAULT_FN_ATTRS512 1538_mm512_mask_srlv_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 1539{ 1540 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1541 (__v32hi)_mm512_srlv_epi16(__A, __B), 1542 (__v32hi)__W); 1543} 1544 1545static __inline__ __m512i __DEFAULT_FN_ATTRS512 1546_mm512_maskz_srlv_epi16(__mmask32 __U, __m512i __A, __m512i __B) 1547{ 1548 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1549 (__v32hi)_mm512_srlv_epi16(__A, __B), 1550 (__v32hi)_mm512_setzero_si512()); 1551} 1552 1553static __inline__ __m512i __DEFAULT_FN_ATTRS512 1554_mm512_srav_epi16(__m512i __A, __m512i __B) 1555{ 1556 return (__m512i)__builtin_ia32_psrav32hi((__v32hi)__A, (__v32hi)__B); 1557} 1558 1559static __inline__ __m512i __DEFAULT_FN_ATTRS512 1560_mm512_mask_srav_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 1561{ 1562 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1563 (__v32hi)_mm512_srav_epi16(__A, __B), 1564 (__v32hi)__W); 1565} 1566 1567static __inline__ __m512i __DEFAULT_FN_ATTRS512 1568_mm512_maskz_srav_epi16(__mmask32 __U, __m512i __A, __m512i __B) 1569{ 1570 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1571 (__v32hi)_mm512_srav_epi16(__A, __B), 1572 (__v32hi)_mm512_setzero_si512()); 1573} 1574 1575static __inline__ __m512i __DEFAULT_FN_ATTRS512 1576_mm512_sra_epi16(__m512i __A, __m128i __B) 1577{ 1578 return (__m512i)__builtin_ia32_psraw512((__v32hi) __A, (__v8hi) __B); 1579} 1580 1581static __inline__ __m512i __DEFAULT_FN_ATTRS512 1582_mm512_mask_sra_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) 1583{ 1584 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1585 (__v32hi)_mm512_sra_epi16(__A, __B), 1586 (__v32hi)__W); 1587} 1588 1589static __inline__ __m512i __DEFAULT_FN_ATTRS512 1590_mm512_maskz_sra_epi16(__mmask32 __U, __m512i __A, __m128i __B) 1591{ 1592 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1593 (__v32hi)_mm512_sra_epi16(__A, __B), 1594 (__v32hi)_mm512_setzero_si512()); 1595} 1596 1597static __inline__ __m512i __DEFAULT_FN_ATTRS512 1598_mm512_srai_epi16(__m512i __A, int __B) 1599{ 1600 return (__m512i)__builtin_ia32_psrawi512((__v32hi)__A, __B); 1601} 1602 1603static __inline__ __m512i __DEFAULT_FN_ATTRS512 1604_mm512_mask_srai_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B) 1605{ 1606 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1607 (__v32hi)_mm512_srai_epi16(__A, __B), 1608 (__v32hi)__W); 1609} 1610 1611static __inline__ __m512i __DEFAULT_FN_ATTRS512 1612_mm512_maskz_srai_epi16(__mmask32 __U, __m512i __A, int __B) 1613{ 1614 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1615 (__v32hi)_mm512_srai_epi16(__A, __B), 1616 (__v32hi)_mm512_setzero_si512()); 1617} 1618 1619static __inline__ __m512i __DEFAULT_FN_ATTRS512 1620_mm512_srl_epi16(__m512i __A, __m128i __B) 1621{ 1622 return (__m512i)__builtin_ia32_psrlw512((__v32hi) __A, (__v8hi) __B); 1623} 1624 1625static __inline__ __m512i __DEFAULT_FN_ATTRS512 1626_mm512_mask_srl_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) 1627{ 1628 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1629 (__v32hi)_mm512_srl_epi16(__A, __B), 1630 (__v32hi)__W); 1631} 1632 1633static __inline__ __m512i __DEFAULT_FN_ATTRS512 1634_mm512_maskz_srl_epi16(__mmask32 __U, __m512i __A, __m128i __B) 1635{ 1636 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1637 (__v32hi)_mm512_srl_epi16(__A, __B), 1638 (__v32hi)_mm512_setzero_si512()); 1639} 1640 1641static __inline__ __m512i __DEFAULT_FN_ATTRS512 1642_mm512_srli_epi16(__m512i __A, int __B) 1643{ 1644 return (__m512i)__builtin_ia32_psrlwi512((__v32hi)__A, __B); 1645} 1646 1647static __inline__ __m512i __DEFAULT_FN_ATTRS512 1648_mm512_mask_srli_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B) 1649{ 1650 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1651 (__v32hi)_mm512_srli_epi16(__A, __B), 1652 (__v32hi)__W); 1653} 1654 1655static __inline__ __m512i __DEFAULT_FN_ATTRS512 1656_mm512_maskz_srli_epi16(__mmask32 __U, __m512i __A, int __B) 1657{ 1658 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1659 (__v32hi)_mm512_srli_epi16(__A, __B), 1660 (__v32hi)_mm512_setzero_si512()); 1661} 1662 1663#define _mm512_bsrli_epi128(a, imm) \ 1664 (__m512i)__builtin_ia32_psrldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm)) 1665 1666static __inline__ __m512i __DEFAULT_FN_ATTRS512 1667_mm512_mask_mov_epi16 (__m512i __W, __mmask32 __U, __m512i __A) 1668{ 1669 return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, 1670 (__v32hi) __A, 1671 (__v32hi) __W); 1672} 1673 1674static __inline__ __m512i __DEFAULT_FN_ATTRS512 1675_mm512_maskz_mov_epi16 (__mmask32 __U, __m512i __A) 1676{ 1677 return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, 1678 (__v32hi) __A, 1679 (__v32hi) _mm512_setzero_si512 ()); 1680} 1681 1682static __inline__ __m512i __DEFAULT_FN_ATTRS512 1683_mm512_mask_mov_epi8 (__m512i __W, __mmask64 __U, __m512i __A) 1684{ 1685 return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, 1686 (__v64qi) __A, 1687 (__v64qi) __W); 1688} 1689 1690static __inline__ __m512i __DEFAULT_FN_ATTRS512 1691_mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A) 1692{ 1693 return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, 1694 (__v64qi) __A, 1695 (__v64qi) _mm512_setzero_si512 ()); 1696} 1697 1698static __inline__ __m512i __DEFAULT_FN_ATTRS512 1699_mm512_mask_set1_epi8 (__m512i __O, __mmask64 __M, char __A) 1700{ 1701 return (__m512i) __builtin_ia32_selectb_512(__M, 1702 (__v64qi)_mm512_set1_epi8(__A), 1703 (__v64qi) __O); 1704} 1705 1706static __inline__ __m512i __DEFAULT_FN_ATTRS512 1707_mm512_maskz_set1_epi8 (__mmask64 __M, char __A) 1708{ 1709 return (__m512i) __builtin_ia32_selectb_512(__M, 1710 (__v64qi) _mm512_set1_epi8(__A), 1711 (__v64qi) _mm512_setzero_si512()); 1712} 1713 1714static __inline__ __mmask64 __DEFAULT_FN_ATTRS 1715_mm512_kunpackd (__mmask64 __A, __mmask64 __B) 1716{ 1717 return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A, 1718 (__mmask64) __B); 1719} 1720 1721static __inline__ __mmask32 __DEFAULT_FN_ATTRS 1722_mm512_kunpackw (__mmask32 __A, __mmask32 __B) 1723{ 1724 return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A, 1725 (__mmask32) __B); 1726} 1727 1728static __inline __m512i __DEFAULT_FN_ATTRS512 1729_mm512_loadu_epi16 (void const *__P) 1730{ 1731 struct __loadu_epi16 { 1732 __m512i_u __v; 1733 } __attribute__((__packed__, __may_alias__)); 1734 return ((struct __loadu_epi16*)__P)->__v; 1735} 1736 1737static __inline__ __m512i __DEFAULT_FN_ATTRS512 1738_mm512_mask_loadu_epi16 (__m512i __W, __mmask32 __U, void const *__P) 1739{ 1740 return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P, 1741 (__v32hi) __W, 1742 (__mmask32) __U); 1743} 1744 1745static __inline__ __m512i __DEFAULT_FN_ATTRS512 1746_mm512_maskz_loadu_epi16 (__mmask32 __U, void const *__P) 1747{ 1748 return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P, 1749 (__v32hi) 1750 _mm512_setzero_si512 (), 1751 (__mmask32) __U); 1752} 1753 1754static __inline __m512i __DEFAULT_FN_ATTRS512 1755_mm512_loadu_epi8 (void const *__P) 1756{ 1757 struct __loadu_epi8 { 1758 __m512i_u __v; 1759 } __attribute__((__packed__, __may_alias__)); 1760 return ((struct __loadu_epi8*)__P)->__v; 1761} 1762 1763static __inline__ __m512i __DEFAULT_FN_ATTRS512 1764_mm512_mask_loadu_epi8 (__m512i __W, __mmask64 __U, void const *__P) 1765{ 1766 return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P, 1767 (__v64qi) __W, 1768 (__mmask64) __U); 1769} 1770 1771static __inline__ __m512i __DEFAULT_FN_ATTRS512 1772_mm512_maskz_loadu_epi8 (__mmask64 __U, void const *__P) 1773{ 1774 return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P, 1775 (__v64qi) 1776 _mm512_setzero_si512 (), 1777 (__mmask64) __U); 1778} 1779 1780static __inline void __DEFAULT_FN_ATTRS512 1781_mm512_storeu_epi16 (void *__P, __m512i __A) 1782{ 1783 struct __storeu_epi16 { 1784 __m512i_u __v; 1785 } __attribute__((__packed__, __may_alias__)); 1786 ((struct __storeu_epi16*)__P)->__v = __A; 1787} 1788 1789static __inline__ void __DEFAULT_FN_ATTRS512 1790_mm512_mask_storeu_epi16 (void *__P, __mmask32 __U, __m512i __A) 1791{ 1792 __builtin_ia32_storedquhi512_mask ((__v32hi *) __P, 1793 (__v32hi) __A, 1794 (__mmask32) __U); 1795} 1796 1797static __inline void __DEFAULT_FN_ATTRS512 1798_mm512_storeu_epi8 (void *__P, __m512i __A) 1799{ 1800 struct __storeu_epi8 { 1801 __m512i_u __v; 1802 } __attribute__((__packed__, __may_alias__)); 1803 ((struct __storeu_epi8*)__P)->__v = __A; 1804} 1805 1806static __inline__ void __DEFAULT_FN_ATTRS512 1807_mm512_mask_storeu_epi8 (void *__P, __mmask64 __U, __m512i __A) 1808{ 1809 __builtin_ia32_storedquqi512_mask ((__v64qi *) __P, 1810 (__v64qi) __A, 1811 (__mmask64) __U); 1812} 1813 1814static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 1815_mm512_test_epi8_mask (__m512i __A, __m512i __B) 1816{ 1817 return _mm512_cmpneq_epi8_mask (_mm512_and_epi32 (__A, __B), 1818 _mm512_setzero_si512()); 1819} 1820 1821static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 1822_mm512_mask_test_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B) 1823{ 1824 return _mm512_mask_cmpneq_epi8_mask (__U, _mm512_and_epi32 (__A, __B), 1825 _mm512_setzero_si512()); 1826} 1827 1828static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 1829_mm512_test_epi16_mask (__m512i __A, __m512i __B) 1830{ 1831 return _mm512_cmpneq_epi16_mask (_mm512_and_epi32 (__A, __B), 1832 _mm512_setzero_si512()); 1833} 1834 1835static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 1836_mm512_mask_test_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B) 1837{ 1838 return _mm512_mask_cmpneq_epi16_mask (__U, _mm512_and_epi32 (__A, __B), 1839 _mm512_setzero_si512()); 1840} 1841 1842static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 1843_mm512_testn_epi8_mask (__m512i __A, __m512i __B) 1844{ 1845 return _mm512_cmpeq_epi8_mask (_mm512_and_epi32 (__A, __B), _mm512_setzero_si512()); 1846} 1847 1848static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 1849_mm512_mask_testn_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B) 1850{ 1851 return _mm512_mask_cmpeq_epi8_mask (__U, _mm512_and_epi32 (__A, __B), 1852 _mm512_setzero_si512()); 1853} 1854 1855static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 1856_mm512_testn_epi16_mask (__m512i __A, __m512i __B) 1857{ 1858 return _mm512_cmpeq_epi16_mask (_mm512_and_epi32 (__A, __B), 1859 _mm512_setzero_si512()); 1860} 1861 1862static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 1863_mm512_mask_testn_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B) 1864{ 1865 return _mm512_mask_cmpeq_epi16_mask (__U, _mm512_and_epi32 (__A, __B), 1866 _mm512_setzero_si512()); 1867} 1868 1869static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 1870_mm512_movepi8_mask (__m512i __A) 1871{ 1872 return (__mmask64) __builtin_ia32_cvtb2mask512 ((__v64qi) __A); 1873} 1874 1875static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 1876_mm512_movepi16_mask (__m512i __A) 1877{ 1878 return (__mmask32) __builtin_ia32_cvtw2mask512 ((__v32hi) __A); 1879} 1880 1881static __inline__ __m512i __DEFAULT_FN_ATTRS512 1882_mm512_movm_epi8 (__mmask64 __A) 1883{ 1884 return (__m512i) __builtin_ia32_cvtmask2b512 (__A); 1885} 1886 1887static __inline__ __m512i __DEFAULT_FN_ATTRS512 1888_mm512_movm_epi16 (__mmask32 __A) 1889{ 1890 return (__m512i) __builtin_ia32_cvtmask2w512 (__A); 1891} 1892 1893static __inline__ __m512i __DEFAULT_FN_ATTRS512 1894_mm512_broadcastb_epi8 (__m128i __A) 1895{ 1896 return (__m512i)__builtin_shufflevector((__v16qi) __A, (__v16qi) __A, 1897 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1898 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1899 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1900 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); 1901} 1902 1903static __inline__ __m512i __DEFAULT_FN_ATTRS512 1904_mm512_mask_broadcastb_epi8 (__m512i __O, __mmask64 __M, __m128i __A) 1905{ 1906 return (__m512i)__builtin_ia32_selectb_512(__M, 1907 (__v64qi) _mm512_broadcastb_epi8(__A), 1908 (__v64qi) __O); 1909} 1910 1911static __inline__ __m512i __DEFAULT_FN_ATTRS512 1912_mm512_maskz_broadcastb_epi8 (__mmask64 __M, __m128i __A) 1913{ 1914 return (__m512i)__builtin_ia32_selectb_512(__M, 1915 (__v64qi) _mm512_broadcastb_epi8(__A), 1916 (__v64qi) _mm512_setzero_si512()); 1917} 1918 1919static __inline__ __m512i __DEFAULT_FN_ATTRS512 1920_mm512_mask_set1_epi16 (__m512i __O, __mmask32 __M, short __A) 1921{ 1922 return (__m512i) __builtin_ia32_selectw_512(__M, 1923 (__v32hi) _mm512_set1_epi16(__A), 1924 (__v32hi) __O); 1925} 1926 1927static __inline__ __m512i __DEFAULT_FN_ATTRS512 1928_mm512_maskz_set1_epi16 (__mmask32 __M, short __A) 1929{ 1930 return (__m512i) __builtin_ia32_selectw_512(__M, 1931 (__v32hi) _mm512_set1_epi16(__A), 1932 (__v32hi) _mm512_setzero_si512()); 1933} 1934 1935static __inline__ __m512i __DEFAULT_FN_ATTRS512 1936_mm512_broadcastw_epi16 (__m128i __A) 1937{ 1938 return (__m512i)__builtin_shufflevector((__v8hi) __A, (__v8hi) __A, 1939 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1940 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); 1941} 1942 1943static __inline__ __m512i __DEFAULT_FN_ATTRS512 1944_mm512_mask_broadcastw_epi16 (__m512i __O, __mmask32 __M, __m128i __A) 1945{ 1946 return (__m512i)__builtin_ia32_selectw_512(__M, 1947 (__v32hi) _mm512_broadcastw_epi16(__A), 1948 (__v32hi) __O); 1949} 1950 1951static __inline__ __m512i __DEFAULT_FN_ATTRS512 1952_mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A) 1953{ 1954 return (__m512i)__builtin_ia32_selectw_512(__M, 1955 (__v32hi) _mm512_broadcastw_epi16(__A), 1956 (__v32hi) _mm512_setzero_si512()); 1957} 1958 1959static __inline__ __m512i __DEFAULT_FN_ATTRS512 1960_mm512_permutexvar_epi16 (__m512i __A, __m512i __B) 1961{ 1962 return (__m512i)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A); 1963} 1964 1965static __inline__ __m512i __DEFAULT_FN_ATTRS512 1966_mm512_maskz_permutexvar_epi16 (__mmask32 __M, __m512i __A, 1967 __m512i __B) 1968{ 1969 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 1970 (__v32hi)_mm512_permutexvar_epi16(__A, __B), 1971 (__v32hi)_mm512_setzero_si512()); 1972} 1973 1974static __inline__ __m512i __DEFAULT_FN_ATTRS512 1975_mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A, 1976 __m512i __B) 1977{ 1978 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 1979 (__v32hi)_mm512_permutexvar_epi16(__A, __B), 1980 (__v32hi)__W); 1981} 1982 1983#define _mm512_alignr_epi8(A, B, N) \ 1984 (__m512i)__builtin_ia32_palignr512((__v64qi)(__m512i)(A), \ 1985 (__v64qi)(__m512i)(B), (int)(N)) 1986 1987#define _mm512_mask_alignr_epi8(W, U, A, B, N) \ 1988 (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ 1989 (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \ 1990 (__v64qi)(__m512i)(W)) 1991 1992#define _mm512_maskz_alignr_epi8(U, A, B, N) \ 1993 (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ 1994 (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \ 1995 (__v64qi)(__m512i)_mm512_setzero_si512()) 1996 1997#define _mm512_dbsad_epu8(A, B, imm) \ 1998 (__m512i)__builtin_ia32_dbpsadbw512((__v64qi)(__m512i)(A), \ 1999 (__v64qi)(__m512i)(B), (int)(imm)) 2000 2001#define _mm512_mask_dbsad_epu8(W, U, A, B, imm) \ 2002 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 2003 (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \ 2004 (__v32hi)(__m512i)(W)) 2005 2006#define _mm512_maskz_dbsad_epu8(U, A, B, imm) \ 2007 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 2008 (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \ 2009 (__v32hi)_mm512_setzero_si512()) 2010 2011static __inline__ __m512i __DEFAULT_FN_ATTRS512 2012_mm512_sad_epu8 (__m512i __A, __m512i __B) 2013{ 2014 return (__m512i) __builtin_ia32_psadbw512 ((__v64qi) __A, 2015 (__v64qi) __B); 2016} 2017 2018#undef __DEFAULT_FN_ATTRS512 2019#undef __DEFAULT_FN_ATTRS 2020 2021#endif 2022