avx512bwintrin.h revision 353358
1163953Srrs/*===------------- avx512bwintrin.h - AVX512BW intrinsics ------------------=== 2185694Srrs * 3235828Stuexen * 4235828Stuexen * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5163953Srrs * See https://llvm.org/LICENSE.txt for license information. 6163953Srrs * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7163953Srrs * 8163953Srrs *===-----------------------------------------------------------------------=== 9163953Srrs */ 10228653Stuexen#ifndef __IMMINTRIN_H 11163953Srrs#error "Never use <avx512bwintrin.h> directly; include <immintrin.h> instead." 12163953Srrs#endif 13163953Srrs 14228653Stuexen#ifndef __AVX512BWINTRIN_H 15163953Srrs#define __AVX512BWINTRIN_H 16163953Srrs 17163953Srrstypedef unsigned int __mmask32; 18163953Srrstypedef unsigned long long __mmask64; 19163953Srrs 20163953Srrs/* Define the default attributes for the functions in this file. */ 21163953Srrs#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"), __min_vector_width__(512))) 22163953Srrs#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"))) 23163953Srrs 24163953Srrsstatic __inline __mmask32 __DEFAULT_FN_ATTRS 25163953Srrs_knot_mask32(__mmask32 __M) 26163953Srrs{ 27163953Srrs return __builtin_ia32_knotsi(__M); 28163953Srrs} 29163953Srrs 30163953Srrsstatic __inline __mmask64 __DEFAULT_FN_ATTRS 31163953Srrs_knot_mask64(__mmask64 __M) 32163953Srrs{ 33163953Srrs return __builtin_ia32_knotdi(__M); 34163953Srrs} 35163953Srrs 36163953Srrsstatic __inline__ __mmask32 __DEFAULT_FN_ATTRS 37163953Srrs_kand_mask32(__mmask32 __A, __mmask32 __B) 38167598Srrs{ 39163953Srrs return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B); 40163953Srrs} 41163953Srrs 42163953Srrsstatic __inline__ __mmask64 __DEFAULT_FN_ATTRS 43163953Srrs_kand_mask64(__mmask64 __A, __mmask64 __B) 44163953Srrs{ 45163953Srrs return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B); 46163953Srrs} 47170091Srrs 48172091Srrsstatic __inline__ __mmask32 __DEFAULT_FN_ATTRS 49188067Srrs_kandn_mask32(__mmask32 __A, __mmask32 __B) 50179157Srrs{ 51218211Srrs return (__mmask32)__builtin_ia32_kandnsi((__mmask32)__A, (__mmask32)__B); 52163953Srrs} 53163953Srrs 54163953Srrsstatic __inline__ __mmask64 __DEFAULT_FN_ATTRS 55163953Srrs_kandn_mask64(__mmask64 __A, __mmask64 __B) 56163953Srrs{ 57163953Srrs return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B); 58163953Srrs} 59163953Srrs 60165220Srrsstatic __inline__ __mmask32 __DEFAULT_FN_ATTRS 61165220Srrs_kor_mask32(__mmask32 __A, __mmask32 __B) 62165220Srrs{ 63165220Srrs return (__mmask32)__builtin_ia32_korsi((__mmask32)__A, (__mmask32)__B); 64165220Srrs} 65163953Srrs 66163953Srrsstatic __inline__ __mmask64 __DEFAULT_FN_ATTRS 67165220Srrs_kor_mask64(__mmask64 __A, __mmask64 __B) 68163953Srrs{ 69163953Srrs return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B); 70163953Srrs} 71165220Srrs 72165220Srrsstatic __inline__ __mmask32 __DEFAULT_FN_ATTRS 73165220Srrs_kxnor_mask32(__mmask32 __A, __mmask32 __B) 74165220Srrs{ 75165220Srrs return (__mmask32)__builtin_ia32_kxnorsi((__mmask32)__A, (__mmask32)__B); 76165220Srrs} 77163953Srrs 78163953Srrsstatic __inline__ __mmask64 __DEFAULT_FN_ATTRS 79163953Srrs_kxnor_mask64(__mmask64 __A, __mmask64 __B) 80163953Srrs{ 81163953Srrs return (__mmask64)__builtin_ia32_kxnordi((__mmask64)__A, (__mmask64)__B); 82163953Srrs} 83237715Stuexen 84237715Stuexenstatic __inline__ __mmask32 __DEFAULT_FN_ATTRS 85237049Stuexen_kxor_mask32(__mmask32 __A, __mmask32 __B) 86237049Stuexen{ 87237049Stuexen return (__mmask32)__builtin_ia32_kxorsi((__mmask32)__A, (__mmask32)__B); 88237049Stuexen} 89163953Srrs 90163953Srrsstatic __inline__ __mmask64 __DEFAULT_FN_ATTRS 91163953Srrs_kxor_mask64(__mmask64 __A, __mmask64 __B) 92163953Srrs{ 93169420Srrs return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B); 94169420Srrs} 95172396Srrs 96172396Srrsstatic __inline__ unsigned char __DEFAULT_FN_ATTRS 97172396Srrs_kortestc_mask32_u8(__mmask32 __A, __mmask32 __B) 98229774Stuexen{ 99163953Srrs return (unsigned char)__builtin_ia32_kortestcsi(__A, __B); 100163953Srrs} 101237715Stuexen 102237049Stuexenstatic __inline__ unsigned char __DEFAULT_FN_ATTRS 103179157Srrs_kortestz_mask32_u8(__mmask32 __A, __mmask32 __B) 104168299Srrs{ 105168299Srrs return (unsigned char)__builtin_ia32_kortestzsi(__A, __B); 106172396Srrs} 107163953Srrs 108163953Srrsstatic __inline__ unsigned char __DEFAULT_FN_ATTRS 109229774Stuexen_kortest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) { 110163953Srrs *__C = (unsigned char)__builtin_ia32_kortestcsi(__A, __B); 111163953Srrs return (unsigned char)__builtin_ia32_kortestzsi(__A, __B); 112163953Srrs} 113237715Stuexen 114237049Stuexenstatic __inline__ unsigned char __DEFAULT_FN_ATTRS 115179157Srrs_kortestc_mask64_u8(__mmask64 __A, __mmask64 __B) 116168299Srrs{ 117168299Srrs return (unsigned char)__builtin_ia32_kortestcdi(__A, __B); 118172396Srrs} 119163953Srrs 120163953Srrsstatic __inline__ unsigned char __DEFAULT_FN_ATTRS 121163953Srrs_kortestz_mask64_u8(__mmask64 __A, __mmask64 __B) 122163953Srrs{ 123237715Stuexen return (unsigned char)__builtin_ia32_kortestzdi(__A, __B); 124237049Stuexen} 125179157Srrs 126171440Srrsstatic __inline__ unsigned char __DEFAULT_FN_ATTRS 127171440Srrs_kortest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) { 128172396Srrs *__C = (unsigned char)__builtin_ia32_kortestcdi(__A, __B); 129163953Srrs return (unsigned char)__builtin_ia32_kortestzdi(__A, __B); 130163953Srrs} 131163953Srrs 132163953Srrsstatic __inline__ unsigned char __DEFAULT_FN_ATTRS 133237715Stuexen_ktestc_mask32_u8(__mmask32 __A, __mmask32 __B) 134237049Stuexen{ 135179157Srrs return (unsigned char)__builtin_ia32_ktestcsi(__A, __B); 136168299Srrs} 137168299Srrs 138172396Srrsstatic __inline__ unsigned char __DEFAULT_FN_ATTRS 139163953Srrs_ktestz_mask32_u8(__mmask32 __A, __mmask32 __B) 140163953Srrs{ 141163953Srrs return (unsigned char)__builtin_ia32_ktestzsi(__A, __B); 142163953Srrs} 143237715Stuexen 144237049Stuexenstatic __inline__ unsigned char __DEFAULT_FN_ATTRS 145179157Srrs_ktest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) { 146168299Srrs *__C = (unsigned char)__builtin_ia32_ktestcsi(__A, __B); 147168299Srrs return (unsigned char)__builtin_ia32_ktestzsi(__A, __B); 148172396Srrs} 149163953Srrs 150163953Srrsstatic __inline__ unsigned char __DEFAULT_FN_ATTRS 151229774Stuexen_ktestc_mask64_u8(__mmask64 __A, __mmask64 __B) 152163953Srrs{ 153237715Stuexen return (unsigned char)__builtin_ia32_ktestcdi(__A, __B); 154237049Stuexen} 155237049Stuexen 156168299Srrsstatic __inline__ unsigned char __DEFAULT_FN_ATTRS 157168299Srrs_ktestz_mask64_u8(__mmask64 __A, __mmask64 __B) 158172396Srrs{ 159163953Srrs return (unsigned char)__builtin_ia32_ktestzdi(__A, __B); 160229774Stuexen} 161229774Stuexen 162229774Stuexenstatic __inline__ unsigned char __DEFAULT_FN_ATTRS 163229774Stuexen_ktest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) { 164229774Stuexen *__C = (unsigned char)__builtin_ia32_ktestcdi(__A, __B); 165229774Stuexen return (unsigned char)__builtin_ia32_ktestzdi(__A, __B); 166229774Stuexen} 167229774Stuexen 168229774Stuexenstatic __inline__ __mmask32 __DEFAULT_FN_ATTRS 169229774Stuexen_kadd_mask32(__mmask32 __A, __mmask32 __B) 170229774Stuexen{ 171229774Stuexen return (__mmask32)__builtin_ia32_kaddsi((__mmask32)__A, (__mmask32)__B); 172229774Stuexen} 173229774Stuexen 174229774Stuexenstatic __inline__ __mmask64 __DEFAULT_FN_ATTRS 175229774Stuexen_kadd_mask64(__mmask64 __A, __mmask64 __B) 176229774Stuexen{ 177229774Stuexen return (__mmask64)__builtin_ia32_kadddi((__mmask64)__A, (__mmask64)__B); 178229774Stuexen} 179229774Stuexen 180229774Stuexen#define _kshiftli_mask32(A, I) \ 181229805Stuexen (__mmask32)__builtin_ia32_kshiftlisi((__mmask32)(A), (unsigned int)(I)) 182237715Stuexen 183237049Stuexen#define _kshiftri_mask32(A, I) \ 184237049Stuexen (__mmask32)__builtin_ia32_kshiftrisi((__mmask32)(A), (unsigned int)(I)) 185229805Stuexen 186229774Stuexen#define _kshiftli_mask64(A, I) \ 187229774Stuexen (__mmask64)__builtin_ia32_kshiftlidi((__mmask64)(A), (unsigned int)(I)) 188229774Stuexen 189229774Stuexen#define _kshiftri_mask64(A, I) \ 190229774Stuexen (__mmask64)__builtin_ia32_kshiftridi((__mmask64)(A), (unsigned int)(I)) 191229774Stuexen 192229774Stuexenstatic __inline__ unsigned int __DEFAULT_FN_ATTRS 193229774Stuexen_cvtmask32_u32(__mmask32 __A) { 194229774Stuexen return (unsigned int)__builtin_ia32_kmovd((__mmask32)__A); 195237715Stuexen} 196237715Stuexen 197237049Stuexenstatic __inline__ unsigned long long __DEFAULT_FN_ATTRS 198237049Stuexen_cvtmask64_u64(__mmask64 __A) { 199229774Stuexen return (unsigned long long)__builtin_ia32_kmovq((__mmask64)__A); 200229774Stuexen} 201172396Srrs 202172396Srrsstatic __inline__ __mmask32 __DEFAULT_FN_ATTRS 203172396Srrs_cvtu32_mask32(unsigned int __A) { 204172396Srrs return (__mmask32)__builtin_ia32_kmovd((__mmask32)__A); 205163953Srrs} 206163953Srrs 207163953Srrsstatic __inline__ __mmask64 __DEFAULT_FN_ATTRS 208163953Srrs_cvtu64_mask64(unsigned long long __A) { 209163953Srrs return (__mmask64)__builtin_ia32_kmovq((__mmask64)__A); 210171158Srrs} 211171158Srrs 212221627Stuexenstatic __inline__ __mmask32 __DEFAULT_FN_ATTRS 213221627Stuexen_load_mask32(__mmask32 *__A) { 214221627Stuexen return (__mmask32)__builtin_ia32_kmovd(*(__mmask32 *)__A); 215221627Stuexen} 216221627Stuexen 217171158Srrsstatic __inline__ __mmask64 __DEFAULT_FN_ATTRS 218171158Srrs_load_mask64(__mmask64 *__A) { 219217760Stuexen return (__mmask64)__builtin_ia32_kmovq(*(__mmask64 *)__A); 220217760Stuexen} 221171158Srrs 222171158Srrsstatic __inline__ void __DEFAULT_FN_ATTRS 223171158Srrs_store_mask32(__mmask32 *__A, __mmask32 __B) { 224171158Srrs *(__mmask32 *)__A = __builtin_ia32_kmovd((__mmask32)__B); 225171158Srrs} 226171158Srrs 227171158Srrsstatic __inline__ void __DEFAULT_FN_ATTRS 228171158Srrs_store_mask64(__mmask64 *__A, __mmask64 __B) { 229171158Srrs *(__mmask64 *)__A = __builtin_ia32_kmovq((__mmask64)__B); 230171158Srrs} 231217760Stuexen 232217760Stuexen/* Integer compare */ 233217760Stuexen 234217760Stuexen#define _mm512_cmp_epi8_mask(a, b, p) \ 235217760Stuexen (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ 236217760Stuexen (__v64qi)(__m512i)(b), (int)(p), \ 237217760Stuexen (__mmask64)-1) 238217760Stuexen 239171158Srrs#define _mm512_mask_cmp_epi8_mask(m, a, b, p) \ 240171158Srrs (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ 241171158Srrs (__v64qi)(__m512i)(b), (int)(p), \ 242171158Srrs (__mmask64)(m)) 243171158Srrs 244171158Srrs#define _mm512_cmp_epu8_mask(a, b, p) \ 245171158Srrs (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ 246171158Srrs (__v64qi)(__m512i)(b), (int)(p), \ 247171158Srrs (__mmask64)-1) 248171158Srrs 249171158Srrs#define _mm512_mask_cmp_epu8_mask(m, a, b, p) \ 250171158Srrs (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ 251171158Srrs (__v64qi)(__m512i)(b), (int)(p), \ 252171158Srrs (__mmask64)(m)) 253171158Srrs 254171158Srrs#define _mm512_cmp_epi16_mask(a, b, p) \ 255217760Stuexen (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ 256217760Stuexen (__v32hi)(__m512i)(b), (int)(p), \ 257212712Stuexen (__mmask32)-1) 258212712Stuexen 259212712Stuexen#define _mm512_mask_cmp_epi16_mask(m, a, b, p) \ 260212712Stuexen (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ 261171158Srrs (__v32hi)(__m512i)(b), (int)(p), \ 262171158Srrs (__mmask32)(m)) 263171158Srrs 264171158Srrs#define _mm512_cmp_epu16_mask(a, b, p) \ 265221627Stuexen (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ 266171158Srrs (__v32hi)(__m512i)(b), (int)(p), \ 267171158Srrs (__mmask32)-1) 268216822Stuexen 269171158Srrs#define _mm512_mask_cmp_epu16_mask(m, a, b, p) \ 270171158Srrs (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ 271171158Srrs (__v32hi)(__m512i)(b), (int)(p), \ 272171158Srrs (__mmask32)(m)) 273171158Srrs 274171158Srrs#define _mm512_cmpeq_epi8_mask(A, B) \ 275171158Srrs _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ) 276163953Srrs#define _mm512_mask_cmpeq_epi8_mask(k, A, B) \ 277228653Stuexen _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ) 278163953Srrs#define _mm512_cmpge_epi8_mask(A, B) \ 279163953Srrs _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GE) 280163953Srrs#define _mm512_mask_cmpge_epi8_mask(k, A, B) \ 281163953Srrs _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE) 282163953Srrs#define _mm512_cmpgt_epi8_mask(A, B) \ 283163953Srrs _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GT) 284163953Srrs#define _mm512_mask_cmpgt_epi8_mask(k, A, B) \ 285163953Srrs _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT) 286163953Srrs#define _mm512_cmple_epi8_mask(A, B) \ 287163953Srrs _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LE) 288163953Srrs#define _mm512_mask_cmple_epi8_mask(k, A, B) \ 289218129Srrs _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE) 290218129Srrs#define _mm512_cmplt_epi8_mask(A, B) \ 291218129Srrs _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LT) 292212712Stuexen#define _mm512_mask_cmplt_epi8_mask(k, A, B) \ 293163953Srrs _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT) 294163953Srrs#define _mm512_cmpneq_epi8_mask(A, B) \ 295163953Srrs _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_NE) 296179783Srrs#define _mm512_mask_cmpneq_epi8_mask(k, A, B) \ 297170744Srrs _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE) 298170744Srrs 299163953Srrs#define _mm512_cmpeq_epu8_mask(A, B) \ 300163953Srrs _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ) 301164181Srrs#define _mm512_mask_cmpeq_epu8_mask(k, A, B) \ 302163953Srrs _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ) 303163953Srrs#define _mm512_cmpge_epu8_mask(A, B) \ 304163953Srrs _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GE) 305216822Stuexen#define _mm512_mask_cmpge_epu8_mask(k, A, B) \ 306216822Stuexen _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE) 307163953Srrs#define _mm512_cmpgt_epu8_mask(A, B) \ 308196260Stuexen _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GT) 309163953Srrs#define _mm512_mask_cmpgt_epu8_mask(k, A, B) \ 310216822Stuexen _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT) 311216822Stuexen#define _mm512_cmple_epu8_mask(A, B) \ 312216822Stuexen _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LE) 313216822Stuexen#define _mm512_mask_cmple_epu8_mask(k, A, B) \ 314216822Stuexen _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE) 315216822Stuexen#define _mm512_cmplt_epu8_mask(A, B) \ 316235416Stuexen _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LT) 317235416Stuexen#define _mm512_mask_cmplt_epu8_mask(k, A, B) \ 318216822Stuexen _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT) 319216822Stuexen#define _mm512_cmpneq_epu8_mask(A, B) \ 320216822Stuexen _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_NE) 321196260Stuexen#define _mm512_mask_cmpneq_epu8_mask(k, A, B) \ 322196260Stuexen _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE) 323221627Stuexen 324216822Stuexen#define _mm512_cmpeq_epi16_mask(A, B) \ 325196260Stuexen _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ) 326196260Stuexen#define _mm512_mask_cmpeq_epi16_mask(k, A, B) \ 327163953Srrs _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ) 328163953Srrs#define _mm512_cmpge_epi16_mask(A, B) \ 329163953Srrs _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GE) 330216822Stuexen#define _mm512_mask_cmpge_epi16_mask(k, A, B) \ 331196260Stuexen _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE) 332163953Srrs#define _mm512_cmpgt_epi16_mask(A, B) \ 333163953Srrs _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GT) 334235416Stuexen#define _mm512_mask_cmpgt_epi16_mask(k, A, B) \ 335163953Srrs _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT) 336163953Srrs#define _mm512_cmple_epi16_mask(A, B) \ 337163953Srrs _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LE) 338163953Srrs#define _mm512_mask_cmple_epi16_mask(k, A, B) \ 339212712Stuexen _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE) 340212712Stuexen#define _mm512_cmplt_epi16_mask(A, B) \ 341212712Stuexen _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LT) 342212712Stuexen#define _mm512_mask_cmplt_epi16_mask(k, A, B) \ 343163953Srrs _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT) 344221627Stuexen#define _mm512_cmpneq_epi16_mask(A, B) \ 345169655Srrs _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_NE) 346163953Srrs#define _mm512_mask_cmpneq_epi16_mask(k, A, B) \ 347163953Srrs _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE) 348163953Srrs 349196260Stuexen#define _mm512_cmpeq_epu16_mask(A, B) \ 350163953Srrs _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ) 351163953Srrs#define _mm512_mask_cmpeq_epu16_mask(k, A, B) \ 352164181Srrs _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ) 353188854Srrs#define _mm512_cmpge_epu16_mask(A, B) \ 354218129Srrs _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GE) 355185694Srrs#define _mm512_mask_cmpge_epu16_mask(k, A, B) \ 356185694Srrs _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE) 357179783Srrs#define _mm512_cmpgt_epu16_mask(A, B) \ 358170744Srrs _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GT) 359170744Srrs#define _mm512_mask_cmpgt_epu16_mask(k, A, B) \ 360163953Srrs _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT) 361163953Srrs#define _mm512_cmple_epu16_mask(A, B) \ 362163953Srrs _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LE) 363163953Srrs#define _mm512_mask_cmple_epu16_mask(k, A, B) \ 364180955Srrs _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE) 365218129Srrs#define _mm512_cmplt_epu16_mask(A, B) \ 366163953Srrs _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LT) 367163953Srrs#define _mm512_mask_cmplt_epu16_mask(k, A, B) \ 368170091Srrs _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT) 369163953Srrs#define _mm512_cmpneq_epu16_mask(A, B) \ 370163953Srrs _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_NE) 371216822Stuexen#define _mm512_mask_cmpneq_epu16_mask(k, A, B) \ 372164181Srrs _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE) 373164181Srrs 374216822Stuexenstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 375164181Srrs_mm512_add_epi8 (__m512i __A, __m512i __B) { 376164181Srrs return (__m512i) ((__v64qu) __A + (__v64qu) __B); 377171158Srrs} 378164181Srrs 379164181Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 380164181Srrs_mm512_mask_add_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { 381164181Srrs return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 382164181Srrs (__v64qi)_mm512_add_epi8(__A, __B), 383170091Srrs (__v64qi)__W); 384163953Srrs} 385164181Srrs 386164181Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 387164181Srrs_mm512_maskz_add_epi8(__mmask64 __U, __m512i __A, __m512i __B) { 388164181Srrs return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 389163953Srrs (__v64qi)_mm512_add_epi8(__A, __B), 390170091Srrs (__v64qi)_mm512_setzero_si512()); 391163953Srrs} 392163953Srrs 393169420Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 394163953Srrs_mm512_sub_epi8 (__m512i __A, __m512i __B) { 395163953Srrs return (__m512i) ((__v64qu) __A - (__v64qu) __B); 396163953Srrs} 397163953Srrs 398163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 399163953Srrs_mm512_mask_sub_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { 400163953Srrs return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 401163953Srrs (__v64qi)_mm512_sub_epi8(__A, __B), 402163953Srrs (__v64qi)__W); 403163953Srrs} 404163953Srrs 405168943Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 406163953Srrs_mm512_maskz_sub_epi8(__mmask64 __U, __m512i __A, __m512i __B) { 407163953Srrs return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 408163953Srrs (__v64qi)_mm512_sub_epi8(__A, __B), 409163953Srrs (__v64qi)_mm512_setzero_si512()); 410163953Srrs} 411163953Srrs 412163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 413163953Srrs_mm512_add_epi16 (__m512i __A, __m512i __B) { 414163953Srrs return (__m512i) ((__v32hu) __A + (__v32hu) __B); 415163953Srrs} 416169655Srrs 417163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 418163953Srrs_mm512_mask_add_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 419163953Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 420163953Srrs (__v32hi)_mm512_add_epi16(__A, __B), 421163953Srrs (__v32hi)__W); 422163953Srrs} 423163953Srrs 424237715Stuexenstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 425237715Stuexen_mm512_maskz_add_epi16(__mmask32 __U, __m512i __A, __m512i __B) { 426237049Stuexen return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 427237049Stuexen (__v32hi)_mm512_add_epi16(__A, __B), 428237049Stuexen (__v32hi)_mm512_setzero_si512()); 429237049Stuexen} 430163953Srrs 431163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 432163953Srrs_mm512_sub_epi16 (__m512i __A, __m512i __B) { 433163953Srrs return (__m512i) ((__v32hu) __A - (__v32hu) __B); 434163953Srrs} 435185694Srrs 436163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 437163953Srrs_mm512_mask_sub_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 438163953Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 439163953Srrs (__v32hi)_mm512_sub_epi16(__A, __B), 440163953Srrs (__v32hi)__W); 441163953Srrs} 442163953Srrs 443185694Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 444163953Srrs_mm512_maskz_sub_epi16(__mmask32 __U, __m512i __A, __m512i __B) { 445163953Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 446235360Stuexen (__v32hi)_mm512_sub_epi16(__A, __B), 447170056Srrs (__v32hi)_mm512_setzero_si512()); 448163953Srrs} 449163953Srrs 450163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 451185694Srrs_mm512_mullo_epi16 (__m512i __A, __m512i __B) { 452163953Srrs return (__m512i) ((__v32hu) __A * (__v32hu) __B); 453228653Stuexen} 454163953Srrs 455163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 456163953Srrs_mm512_mask_mullo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 457163953Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 458163953Srrs (__v32hi)_mm512_mullo_epi16(__A, __B), 459228653Stuexen (__v32hi)__W); 460237715Stuexen} 461237715Stuexen 462163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 463169420Srrs_mm512_maskz_mullo_epi16(__mmask32 __U, __m512i __A, __m512i __B) { 464169420Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 465169420Srrs (__v32hi)_mm512_mullo_epi16(__A, __B), 466237715Stuexen (__v32hi)_mm512_setzero_si512()); 467237715Stuexen} 468237049Stuexen 469237049Stuexenstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 470168299Srrs_mm512_mask_blend_epi8 (__mmask64 __U, __m512i __A, __m512i __W) 471163953Srrs{ 472163953Srrs return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, 473171477Srrs (__v64qi) __W, 474171477Srrs (__v64qi) __A); 475216822Stuexen} 476171477Srrs 477216822Stuexenstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 478216822Stuexen_mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W) 479216822Stuexen{ 480171477Srrs return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, 481171477Srrs (__v32hi) __W, 482163953Srrs (__v32hi) __A); 483163953Srrs} 484163953Srrs 485163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 486163953Srrs_mm512_abs_epi8 (__m512i __A) 487163953Srrs{ 488163953Srrs return (__m512i)__builtin_ia32_pabsb512((__v64qi)__A); 489163953Srrs} 490179783Srrs 491171943Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 492171943Srrs_mm512_mask_abs_epi8 (__m512i __W, __mmask64 __U, __m512i __A) 493171943Srrs{ 494171943Srrs return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 495171943Srrs (__v64qi)_mm512_abs_epi8(__A), 496171943Srrs (__v64qi)__W); 497163953Srrs} 498163953Srrs 499163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 500163953Srrs_mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A) 501163953Srrs{ 502163953Srrs return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 503163953Srrs (__v64qi)_mm512_abs_epi8(__A), 504163953Srrs (__v64qi)_mm512_setzero_si512()); 505163953Srrs} 506163953Srrs 507165220Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 508163953Srrs_mm512_abs_epi16 (__m512i __A) 509165220Srrs{ 510218186Srrs return (__m512i)__builtin_ia32_pabsw512((__v32hi)__A); 511219397Srrs} 512165220Srrs 513163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 514163953Srrs_mm512_mask_abs_epi16 (__m512i __W, __mmask32 __U, __m512i __A) 515163953Srrs{ 516163953Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 517163953Srrs (__v32hi)_mm512_abs_epi16(__A), 518163953Srrs (__v32hi)__W); 519163953Srrs} 520163953Srrs 521163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 522163953Srrs_mm512_maskz_abs_epi16 (__mmask32 __U, __m512i __A) 523163953Srrs{ 524163953Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 525163953Srrs (__v32hi)_mm512_abs_epi16(__A), 526163953Srrs (__v32hi)_mm512_setzero_si512()); 527163953Srrs} 528163953Srrs 529163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 530163953Srrs_mm512_packs_epi32(__m512i __A, __m512i __B) 531165647Srrs{ 532163953Srrs return (__m512i)__builtin_ia32_packssdw512((__v16si)__A, (__v16si)__B); 533163953Srrs} 534163953Srrs 535163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 536163953Srrs_mm512_maskz_packs_epi32(__mmask32 __M, __m512i __A, __m512i __B) 537163953Srrs{ 538163953Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 539163953Srrs (__v32hi)_mm512_packs_epi32(__A, __B), 540163953Srrs (__v32hi)_mm512_setzero_si512()); 541163953Srrs} 542163953Srrs 543237715Stuexenstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 544237049Stuexen_mm512_mask_packs_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) 545237049Stuexen{ 546168299Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 547163953Srrs (__v32hi)_mm512_packs_epi32(__A, __B), 548163953Srrs (__v32hi)__W); 549163953Srrs} 550163953Srrs 551163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 552163953Srrs_mm512_packs_epi16(__m512i __A, __m512i __B) 553163953Srrs{ 554163953Srrs return (__m512i)__builtin_ia32_packsswb512((__v32hi)__A, (__v32hi) __B); 555163953Srrs} 556163953Srrs 557163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 558212225Srrs_mm512_mask_packs_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 559163953Srrs{ 560172396Srrs return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 561224641Stuexen (__v64qi)_mm512_packs_epi16(__A, __B), 562163953Srrs (__v64qi)__W); 563221249Stuexen} 564221249Stuexen 565221249Stuexenstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 566221249Stuexen_mm512_maskz_packs_epi16(__mmask64 __M, __m512i __A, __m512i __B) 567221249Stuexen{ 568221249Stuexen return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 569221249Stuexen (__v64qi)_mm512_packs_epi16(__A, __B), 570221249Stuexen (__v64qi)_mm512_setzero_si512()); 571221249Stuexen} 572163953Srrs 573163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 574163953Srrs_mm512_packus_epi32(__m512i __A, __m512i __B) 575163953Srrs{ 576163953Srrs return (__m512i)__builtin_ia32_packusdw512((__v16si) __A, (__v16si) __B); 577221249Stuexen} 578221249Stuexen 579221249Stuexenstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 580221249Stuexen_mm512_maskz_packus_epi32(__mmask32 __M, __m512i __A, __m512i __B) 581221249Stuexen{ 582221249Stuexen return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 583221249Stuexen (__v32hi)_mm512_packus_epi32(__A, __B), 584221249Stuexen (__v32hi)_mm512_setzero_si512()); 585221249Stuexen} 586221249Stuexen 587221249Stuexenstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 588221249Stuexen_mm512_mask_packus_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) 589221249Stuexen{ 590221249Stuexen return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 591221249Stuexen (__v32hi)_mm512_packus_epi32(__A, __B), 592221249Stuexen (__v32hi)__W); 593221249Stuexen} 594221249Stuexen 595221249Stuexenstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 596221249Stuexen_mm512_packus_epi16(__m512i __A, __m512i __B) 597221249Stuexen{ 598221249Stuexen return (__m512i)__builtin_ia32_packuswb512((__v32hi) __A, (__v32hi) __B); 599221249Stuexen} 600221249Stuexen 601221249Stuexenstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 602221249Stuexen_mm512_mask_packus_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 603221249Stuexen{ 604221249Stuexen return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 605221249Stuexen (__v64qi)_mm512_packus_epi16(__A, __B), 606221249Stuexen (__v64qi)__W); 607163953Srrs} 608163953Srrs 609221249Stuexenstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 610163953Srrs_mm512_maskz_packus_epi16(__mmask64 __M, __m512i __A, __m512i __B) 611169420Srrs{ 612163953Srrs return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 613163953Srrs (__v64qi)_mm512_packus_epi16(__A, __B), 614163953Srrs (__v64qi)_mm512_setzero_si512()); 615163953Srrs} 616163953Srrs 617163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 618163953Srrs_mm512_adds_epi8 (__m512i __A, __m512i __B) 619163953Srrs{ 620163953Srrs return (__m512i)__builtin_ia32_paddsb512((__v64qi)__A, (__v64qi)__B); 621163953Srrs} 622167598Srrs 623167598Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 624167598Srrs_mm512_mask_adds_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) 625212225Srrs{ 626212225Srrs return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 627167598Srrs (__v64qi)_mm512_adds_epi8(__A, __B), 628167598Srrs (__v64qi)__W); 629167598Srrs} 630167598Srrs 631167598Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 632167598Srrs_mm512_maskz_adds_epi8 (__mmask64 __U, __m512i __A, __m512i __B) 633212225Srrs{ 634212225Srrs return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 635167598Srrs (__v64qi)_mm512_adds_epi8(__A, __B), 636172396Srrs (__v64qi)_mm512_setzero_si512()); 637167598Srrs} 638163953Srrs 639172090Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 640224641Stuexen_mm512_adds_epi16 (__m512i __A, __m512i __B) 641224641Stuexen{ 642163953Srrs return (__m512i)__builtin_ia32_paddsw512((__v32hi)__A, (__v32hi)__B); 643224641Stuexen} 644163953Srrs 645163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 646163953Srrs_mm512_mask_adds_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 647163953Srrs{ 648224641Stuexen return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 649224641Stuexen (__v32hi)_mm512_adds_epi16(__A, __B), 650224641Stuexen (__v32hi)__W); 651224641Stuexen} 652163953Srrs 653163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 654235414Stuexen_mm512_maskz_adds_epi16 (__mmask32 __U, __m512i __A, __m512i __B) 655163953Srrs{ 656224641Stuexen return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 657224641Stuexen (__v32hi)_mm512_adds_epi16(__A, __B), 658224641Stuexen (__v32hi)_mm512_setzero_si512()); 659224641Stuexen} 660224641Stuexen 661224641Stuexenstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 662224641Stuexen_mm512_adds_epu8 (__m512i __A, __m512i __B) 663224641Stuexen{ 664224641Stuexen return (__m512i)__builtin_ia32_paddusb512((__v64qi) __A, (__v64qi) __B); 665224641Stuexen} 666224641Stuexen 667224641Stuexenstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 668224641Stuexen_mm512_mask_adds_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) 669171440Srrs{ 670171440Srrs return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 671172396Srrs (__v64qi)_mm512_adds_epu8(__A, __B), 672172396Srrs (__v64qi)__W); 673172396Srrs} 674172396Srrs 675172396Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 676172396Srrs_mm512_maskz_adds_epu8 (__mmask64 __U, __m512i __A, __m512i __B) 677172396Srrs{ 678172396Srrs return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 679172396Srrs (__v64qi)_mm512_adds_epu8(__A, __B), 680172396Srrs (__v64qi)_mm512_setzero_si512()); 681172396Srrs} 682172396Srrs 683172396Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 684172396Srrs_mm512_adds_epu16 (__m512i __A, __m512i __B) 685172396Srrs{ 686172396Srrs return (__m512i)__builtin_ia32_paddusw512((__v32hi) __A, (__v32hi) __B); 687172396Srrs} 688212712Stuexen 689212712Stuexenstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 690172396Srrs_mm512_mask_adds_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 691172396Srrs{ 692172396Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 693172396Srrs (__v32hi)_mm512_adds_epu16(__A, __B), 694172396Srrs (__v32hi)__W); 695163953Srrs} 696163953Srrs 697185694Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 698185694Srrs_mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B) 699185694Srrs{ 700185694Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 701185694Srrs (__v32hi)_mm512_adds_epu16(__A, __B), 702185694Srrs (__v32hi)_mm512_setzero_si512()); 703185694Srrs} 704185694Srrs 705185694Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 706185694Srrs_mm512_avg_epu8 (__m512i __A, __m512i __B) 707185694Srrs{ 708185694Srrs return (__m512i)__builtin_ia32_pavgb512((__v64qi)__A, (__v64qi)__B); 709185694Srrs} 710185694Srrs 711185694Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 712185694Srrs_mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A, 713185694Srrs __m512i __B) 714185694Srrs{ 715185694Srrs return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 716185694Srrs (__v64qi)_mm512_avg_epu8(__A, __B), 717185694Srrs (__v64qi)__W); 718185694Srrs} 719185694Srrs 720185694Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 721185694Srrs_mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B) 722185694Srrs{ 723185694Srrs return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 724185694Srrs (__v64qi)_mm512_avg_epu8(__A, __B), 725185694Srrs (__v64qi)_mm512_setzero_si512()); 726185694Srrs} 727185694Srrs 728185694Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 729185694Srrs_mm512_avg_epu16 (__m512i __A, __m512i __B) 730185694Srrs{ 731185694Srrs return (__m512i)__builtin_ia32_pavgw512((__v32hi)__A, (__v32hi)__B); 732185694Srrs} 733185694Srrs 734185694Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 735185694Srrs_mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A, 736185694Srrs __m512i __B) 737185694Srrs{ 738185694Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 739185694Srrs (__v32hi)_mm512_avg_epu16(__A, __B), 740185694Srrs (__v32hi)__W); 741185694Srrs} 742185694Srrs 743185694Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 744185694Srrs_mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B) 745185694Srrs{ 746185694Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 747185694Srrs (__v32hi)_mm512_avg_epu16(__A, __B), 748185694Srrs (__v32hi) _mm512_setzero_si512()); 749185694Srrs} 750185694Srrs 751185694Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 752185694Srrs_mm512_max_epi8 (__m512i __A, __m512i __B) 753185694Srrs{ 754185694Srrs return (__m512i)__builtin_ia32_pmaxsb512((__v64qi) __A, (__v64qi) __B); 755185694Srrs} 756185694Srrs 757185694Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 758185694Srrs_mm512_maskz_max_epi8 (__mmask64 __M, __m512i __A, __m512i __B) 759185694Srrs{ 760163953Srrs return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 761235360Stuexen (__v64qi)_mm512_max_epi8(__A, __B), 762163953Srrs (__v64qi)_mm512_setzero_si512()); 763163953Srrs} 764237565Stuexen 765172090Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 766172090Srrs_mm512_mask_max_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 767172090Srrs{ 768185694Srrs return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 769235360Stuexen (__v64qi)_mm512_max_epi8(__A, __B), 770172090Srrs (__v64qi)__W); 771169420Srrs} 772163953Srrs 773163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 774163953Srrs_mm512_max_epi16 (__m512i __A, __m512i __B) 775235360Stuexen{ 776185694Srrs return (__m512i)__builtin_ia32_pmaxsw512((__v32hi) __A, (__v32hi) __B); 777185694Srrs} 778185694Srrs 779185694Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 780185694Srrs_mm512_maskz_max_epi16 (__mmask32 __M, __m512i __A, __m512i __B) 781185694Srrs{ 782185694Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 783235360Stuexen (__v32hi)_mm512_max_epi16(__A, __B), 784235360Stuexen (__v32hi)_mm512_setzero_si512()); 785235360Stuexen} 786185694Srrs 787235360Stuexenstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 788185694Srrs_mm512_mask_max_epi16 (__m512i __W, __mmask32 __M, __m512i __A, 789185694Srrs __m512i __B) 790185694Srrs{ 791235360Stuexen return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 792185694Srrs (__v32hi)_mm512_max_epi16(__A, __B), 793235360Stuexen (__v32hi)__W); 794185694Srrs} 795185694Srrs 796185694Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 797185694Srrs_mm512_max_epu8 (__m512i __A, __m512i __B) 798235360Stuexen{ 799235360Stuexen return (__m512i)__builtin_ia32_pmaxub512((__v64qi)__A, (__v64qi)__B); 800185694Srrs} 801163953Srrs 802171440Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 803163953Srrs_mm512_maskz_max_epu8 (__mmask64 __M, __m512i __A, __m512i __B) 804235403Stuexen{ 805163953Srrs return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 806163953Srrs (__v64qi)_mm512_max_epu8(__A, __B), 807163953Srrs (__v64qi)_mm512_setzero_si512()); 808163953Srrs} 809163953Srrs 810163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 811168859Srrs_mm512_mask_max_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 812168859Srrs{ 813168859Srrs return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 814237565Stuexen (__v64qi)_mm512_max_epu8(__A, __B), 815172090Srrs (__v64qi)__W); 816172090Srrs} 817172090Srrs 818172090Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 819172090Srrs_mm512_max_epu16 (__m512i __A, __m512i __B) 820172090Srrs{ 821172090Srrs return (__m512i)__builtin_ia32_pmaxuw512((__v32hi)__A, (__v32hi)__B); 822171990Srrs} 823171943Srrs 824171943Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 825237565Stuexen_mm512_maskz_max_epu16 (__mmask32 __M, __m512i __A, __m512i __B) 826172090Srrs{ 827172090Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 828169420Srrs (__v32hi)_mm512_max_epu16(__A, __B), 829163953Srrs (__v32hi)_mm512_setzero_si512()); 830163953Srrs} 831163953Srrs 832224641Stuexenstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 833224641Stuexen_mm512_mask_max_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) 834224641Stuexen{ 835224641Stuexen return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 836224641Stuexen (__v32hi)_mm512_max_epu16(__A, __B), 837224641Stuexen (__v32hi)__W); 838224641Stuexen} 839224641Stuexen 840224641Stuexenstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 841224641Stuexen_mm512_min_epi8 (__m512i __A, __m512i __B) 842224641Stuexen{ 843224641Stuexen return (__m512i)__builtin_ia32_pminsb512((__v64qi) __A, (__v64qi) __B); 844224641Stuexen} 845224641Stuexen 846224641Stuexenstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 847224641Stuexen_mm512_maskz_min_epi8 (__mmask64 __M, __m512i __A, __m512i __B) 848224641Stuexen{ 849224641Stuexen return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 850224641Stuexen (__v64qi)_mm512_min_epi8(__A, __B), 851224641Stuexen (__v64qi)_mm512_setzero_si512()); 852224641Stuexen} 853224641Stuexen 854224641Stuexenstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 855224641Stuexen_mm512_mask_min_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 856224641Stuexen{ 857224641Stuexen return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 858224641Stuexen (__v64qi)_mm512_min_epi8(__A, __B), 859224641Stuexen (__v64qi)__W); 860224641Stuexen} 861163953Srrs 862163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 863163953Srrs_mm512_min_epi16 (__m512i __A, __m512i __B) 864163953Srrs{ 865163953Srrs return (__m512i)__builtin_ia32_pminsw512((__v32hi) __A, (__v32hi) __B); 866163953Srrs} 867237565Stuexen 868172090Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 869172090Srrs_mm512_maskz_min_epi16 (__mmask32 __M, __m512i __A, __m512i __B) 870172090Srrs{ 871172090Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 872169420Srrs (__v32hi)_mm512_min_epi16(__A, __B), 873169420Srrs (__v32hi)_mm512_setzero_si512()); 874163953Srrs} 875163953Srrs 876165220Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 877165220Srrs_mm512_mask_min_epi16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) 878165220Srrs{ 879163953Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 880163953Srrs (__v32hi)_mm512_min_epi16(__A, __B), 881163953Srrs (__v32hi)__W); 882163953Srrs} 883163953Srrs 884163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 885228653Stuexen_mm512_min_epu8 (__m512i __A, __m512i __B) 886199477Stuexen{ 887199477Stuexen return (__m512i)__builtin_ia32_pminub512((__v64qi)__A, (__v64qi)__B); 888199477Stuexen} 889163953Srrs 890165220Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 891163953Srrs_mm512_maskz_min_epu8 (__mmask64 __M, __m512i __A, __m512i __B) 892163953Srrs{ 893163953Srrs return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 894163953Srrs (__v64qi)_mm512_min_epu8(__A, __B), 895165220Srrs (__v64qi)_mm512_setzero_si512()); 896165220Srrs} 897165220Srrs 898163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 899237565Stuexen_mm512_mask_min_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 900172090Srrs{ 901172090Srrs return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 902172090Srrs (__v64qi)_mm512_min_epu8(__A, __B), 903172090Srrs (__v64qi)__W); 904172090Srrs} 905172090Srrs 906172090Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 907172090Srrs_mm512_min_epu16 (__m512i __A, __m512i __B) 908172090Srrs{ 909172090Srrs return (__m512i)__builtin_ia32_pminuw512((__v32hi)__A, (__v32hi)__B); 910172090Srrs} 911172090Srrs 912163996Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 913237565Stuexen_mm512_maskz_min_epu16 (__mmask32 __M, __m512i __A, __m512i __B) 914172090Srrs{ 915172090Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 916163953Srrs (__v32hi)_mm512_min_epu16(__A, __B), 917163953Srrs (__v32hi)_mm512_setzero_si512()); 918163953Srrs} 919163953Srrs 920170056Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 921163953Srrs_mm512_mask_min_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) 922171943Srrs{ 923172703Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 924163953Srrs (__v32hi)_mm512_min_epu16(__A, __B), 925163953Srrs (__v32hi)__W); 926163953Srrs} 927163953Srrs 928172090Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 929163953Srrs_mm512_shuffle_epi8(__m512i __A, __m512i __B) 930163953Srrs{ 931169378Srrs return (__m512i)__builtin_ia32_pshufb512((__v64qi)__A,(__v64qi)__B); 932163953Srrs} 933163953Srrs 934163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 935163953Srrs_mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) 936163953Srrs{ 937163953Srrs return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 938163953Srrs (__v64qi)_mm512_shuffle_epi8(__A, __B), 939171440Srrs (__v64qi)__W); 940163953Srrs} 941171158Srrs 942221627Stuexenstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 943163953Srrs_mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B) 944163953Srrs{ 945163953Srrs return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 946163953Srrs (__v64qi)_mm512_shuffle_epi8(__A, __B), 947163953Srrs (__v64qi)_mm512_setzero_si512()); 948163953Srrs} 949163953Srrs 950163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 951163953Srrs_mm512_subs_epi8 (__m512i __A, __m512i __B) 952224641Stuexen{ 953163953Srrs return (__m512i)__builtin_ia32_psubsb512((__v64qi)__A, (__v64qi)__B); 954166675Srrs} 955166675Srrs 956163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 957163953Srrs_mm512_mask_subs_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) 958171943Srrs{ 959172703Srrs return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 960214918Stuexen (__v64qi)_mm512_subs_epi8(__A, __B), 961163953Srrs (__v64qi)__W); 962163953Srrs} 963163953Srrs 964163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 965163953Srrs_mm512_maskz_subs_epi8 (__mmask64 __U, __m512i __A, __m512i __B) 966163953Srrs{ 967228653Stuexen return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 968204141Stuexen (__v64qi)_mm512_subs_epi8(__A, __B), 969204141Stuexen (__v64qi)_mm512_setzero_si512()); 970163953Srrs} 971163953Srrs 972163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 973237565Stuexen_mm512_subs_epi16 (__m512i __A, __m512i __B) 974172090Srrs{ 975172090Srrs return (__m512i)__builtin_ia32_psubsw512((__v32hi)__A, (__v32hi)__B); 976172090Srrs} 977172090Srrs 978169420Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 979169420Srrs_mm512_mask_subs_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 980163953Srrs{ 981163953Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 982163953Srrs (__v32hi)_mm512_subs_epi16(__A, __B), 983163953Srrs (__v32hi)__W); 984163953Srrs} 985204141Stuexen 986204141Stuexenstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 987204141Stuexen_mm512_maskz_subs_epi16 (__mmask32 __U, __m512i __A, __m512i __B) 988204141Stuexen{ 989204141Stuexen return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 990204141Stuexen (__v32hi)_mm512_subs_epi16(__A, __B), 991204141Stuexen (__v32hi)_mm512_setzero_si512()); 992163953Srrs} 993163953Srrs 994163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 995163953Srrs_mm512_subs_epu8 (__m512i __A, __m512i __B) 996163953Srrs{ 997163953Srrs return (__m512i)__builtin_ia32_psubusb512((__v64qi) __A, (__v64qi) __B); 998165220Srrs} 999163953Srrs 1000163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1001163953Srrs_mm512_mask_subs_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) 1002163953Srrs{ 1003165220Srrs return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 1004165220Srrs (__v64qi)_mm512_subs_epu8(__A, __B), 1005165220Srrs (__v64qi)__W); 1006163953Srrs} 1007237565Stuexen 1008172090Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1009172090Srrs_mm512_maskz_subs_epu8 (__mmask64 __U, __m512i __A, __m512i __B) 1010172090Srrs{ 1011172090Srrs return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 1012172090Srrs (__v64qi)_mm512_subs_epu8(__A, __B), 1013172090Srrs (__v64qi)_mm512_setzero_si512()); 1014172090Srrs} 1015172090Srrs 1016172090Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1017172090Srrs_mm512_subs_epu16 (__m512i __A, __m512i __B) 1018172090Srrs{ 1019163996Srrs return (__m512i)__builtin_ia32_psubusw512((__v32hi) __A, (__v32hi) __B); 1020237565Stuexen} 1021172090Srrs 1022172090Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1023163953Srrs_mm512_mask_subs_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 1024163953Srrs{ 1025163953Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1026163953Srrs (__v32hi)_mm512_subs_epu16(__A, __B), 1027217760Stuexen (__v32hi)__W); 1028235416Stuexen} 1029163953Srrs 1030163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1031171440Srrs_mm512_maskz_subs_epu16 (__mmask32 __U, __m512i __A, __m512i __B) 1032163953Srrs{ 1033204141Stuexen return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1034163953Srrs (__v32hi)_mm512_subs_epu16(__A, __B), 1035163953Srrs (__v32hi)_mm512_setzero_si512()); 1036163953Srrs} 1037163953Srrs 1038229774Stuexenstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1039163953Srrs_mm512_permutex2var_epi16(__m512i __A, __m512i __I, __m512i __B) 1040230379Stuexen{ 1041163953Srrs return (__m512i)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I, 1042163953Srrs (__v32hi)__B); 1043163953Srrs} 1044237565Stuexen 1045172090Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1046172090Srrs_mm512_mask_permutex2var_epi16(__m512i __A, __mmask32 __U, __m512i __I, 1047172090Srrs __m512i __B) 1048172090Srrs{ 1049172090Srrs return (__m512i)__builtin_ia32_selectw_512(__U, 1050172090Srrs (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B), 1051171943Srrs (__v32hi)__A); 1052171440Srrs} 1053237565Stuexen 1054172090Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1055172090Srrs_mm512_mask2_permutex2var_epi16(__m512i __A, __m512i __I, __mmask32 __U, 1056163953Srrs __m512i __B) 1057163953Srrs{ 1058163953Srrs return (__m512i)__builtin_ia32_selectw_512(__U, 1059163953Srrs (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B), 1060163953Srrs (__v32hi)__I); 1061163953Srrs} 1062163953Srrs 1063163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1064163953Srrs_mm512_maskz_permutex2var_epi16(__mmask32 __U, __m512i __A, __m512i __I, 1065163953Srrs __m512i __B) 1066163953Srrs{ 1067163953Srrs return (__m512i)__builtin_ia32_selectw_512(__U, 1068163953Srrs (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B), 1069163953Srrs (__v32hi)_mm512_setzero_si512()); 1070163953Srrs} 1071163953Srrs 1072163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1073163953Srrs_mm512_mulhrs_epi16(__m512i __A, __m512i __B) 1074163953Srrs{ 1075163953Srrs return (__m512i)__builtin_ia32_pmulhrsw512((__v32hi)__A, (__v32hi)__B); 1076163953Srrs} 1077163953Srrs 1078163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1079169420Srrs_mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 1080169420Srrs{ 1081169420Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1082163953Srrs (__v32hi)_mm512_mulhrs_epi16(__A, __B), 1083163953Srrs (__v32hi)__W); 1084163953Srrs} 1085163953Srrs 1086163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1087163953Srrs_mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B) 1088163953Srrs{ 1089163953Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1090163953Srrs (__v32hi)_mm512_mulhrs_epi16(__A, __B), 1091163953Srrs (__v32hi)_mm512_setzero_si512()); 1092163953Srrs} 1093163953Srrs 1094163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1095163953Srrs_mm512_mulhi_epi16(__m512i __A, __m512i __B) 1096163953Srrs{ 1097163953Srrs return (__m512i)__builtin_ia32_pmulhw512((__v32hi) __A, (__v32hi) __B); 1098163953Srrs} 1099163953Srrs 1100163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1101163953Srrs_mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, 1102163953Srrs __m512i __B) 1103163953Srrs{ 1104163953Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1105185694Srrs (__v32hi)_mm512_mulhi_epi16(__A, __B), 1106185694Srrs (__v32hi)__W); 1107185694Srrs} 1108163953Srrs 1109163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1110163953Srrs_mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) 1111163953Srrs{ 1112163953Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1113163953Srrs (__v32hi)_mm512_mulhi_epi16(__A, __B), 1114163953Srrs (__v32hi)_mm512_setzero_si512()); 1115169420Srrs} 1116169420Srrs 1117169420Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1118163953Srrs_mm512_mulhi_epu16(__m512i __A, __m512i __B) 1119163953Srrs{ 1120163953Srrs return (__m512i)__builtin_ia32_pmulhuw512((__v32hi) __A, (__v32hi) __B); 1121169420Srrs} 1122169420Srrs 1123169420Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1124163953Srrs_mm512_mask_mulhi_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 1125163953Srrs{ 1126163953Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1127163953Srrs (__v32hi)_mm512_mulhi_epu16(__A, __B), 1128163953Srrs (__v32hi)__W); 1129163953Srrs} 1130163953Srrs 1131163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1132163953Srrs_mm512_maskz_mulhi_epu16 (__mmask32 __U, __m512i __A, __m512i __B) 1133163953Srrs{ 1134235418Stuexen return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1135163953Srrs (__v32hi)_mm512_mulhi_epu16(__A, __B), 1136163953Srrs (__v32hi)_mm512_setzero_si512()); 1137163953Srrs} 1138163953Srrs 1139237565Stuexenstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1140172090Srrs_mm512_maddubs_epi16(__m512i __X, __m512i __Y) { 1141172090Srrs return (__m512i)__builtin_ia32_pmaddubsw512((__v64qi)__X, (__v64qi)__Y); 1142172090Srrs} 1143172090Srrs 1144163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1145163953Srrs_mm512_mask_maddubs_epi16(__m512i __W, __mmask32 __U, __m512i __X, 1146163953Srrs __m512i __Y) { 1147163953Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U, 1148163953Srrs (__v32hi)_mm512_maddubs_epi16(__X, __Y), 1149235418Stuexen (__v32hi)__W); 1150163953Srrs} 1151163953Srrs 1152163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1153163953Srrs_mm512_maskz_maddubs_epi16(__mmask32 __U, __m512i __X, __m512i __Y) { 1154163953Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U, 1155163953Srrs (__v32hi)_mm512_maddubs_epi16(__X, __Y), 1156169420Srrs (__v32hi)_mm512_setzero_si512()); 1157169420Srrs} 1158163953Srrs 1159163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1160235418Stuexen_mm512_madd_epi16(__m512i __A, __m512i __B) { 1161235418Stuexen return (__m512i)__builtin_ia32_pmaddwd512((__v32hi)__A, (__v32hi)__B); 1162235418Stuexen} 1163235418Stuexen 1164163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1165163953Srrs_mm512_mask_madd_epi16(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { 1166163953Srrs return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 1167163953Srrs (__v16si)_mm512_madd_epi16(__A, __B), 1168163953Srrs (__v16si)__W); 1169169420Srrs} 1170169420Srrs 1171163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1172185694Srrs_mm512_maskz_madd_epi16(__mmask16 __U, __m512i __A, __m512i __B) { 1173185694Srrs return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 1174185694Srrs (__v16si)_mm512_madd_epi16(__A, __B), 1175185694Srrs (__v16si)_mm512_setzero_si512()); 1176185694Srrs} 1177185694Srrs 1178185694Srrsstatic __inline__ __m256i __DEFAULT_FN_ATTRS512 1179185694Srrs_mm512_cvtsepi16_epi8 (__m512i __A) { 1180185694Srrs return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A, 1181185694Srrs (__v32qi)_mm256_setzero_si256(), 1182185694Srrs (__mmask32) -1); 1183185694Srrs} 1184185694Srrs 1185185694Srrsstatic __inline__ __m256i __DEFAULT_FN_ATTRS512 1186163953Srrs_mm512_mask_cvtsepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) { 1187163953Srrs return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A, 1188163953Srrs (__v32qi)__O, 1189163953Srrs __M); 1190163953Srrs} 1191163953Srrs 1192163953Srrsstatic __inline__ __m256i __DEFAULT_FN_ATTRS512 1193163953Srrs_mm512_maskz_cvtsepi16_epi8 (__mmask32 __M, __m512i __A) { 1194163953Srrs return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A, 1195163953Srrs (__v32qi) _mm256_setzero_si256(), 1196163953Srrs __M); 1197163953Srrs} 1198163953Srrs 1199163953Srrsstatic __inline__ __m256i __DEFAULT_FN_ATTRS512 1200235403Stuexen_mm512_cvtusepi16_epi8 (__m512i __A) { 1201163953Srrs return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A, 1202237565Stuexen (__v32qi) _mm256_setzero_si256(), 1203172090Srrs (__mmask32) -1); 1204172090Srrs} 1205172090Srrs 1206172090Srrsstatic __inline__ __m256i __DEFAULT_FN_ATTRS512 1207172090Srrs_mm512_mask_cvtusepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) { 1208172090Srrs return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A, 1209172090Srrs (__v32qi) __O, 1210171943Srrs __M); 1211171943Srrs} 1212237565Stuexen 1213172090Srrsstatic __inline__ __m256i __DEFAULT_FN_ATTRS512 1214172090Srrs_mm512_maskz_cvtusepi16_epi8 (__mmask32 __M, __m512i __A) { 1215163953Srrs return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A, 1216163953Srrs (__v32qi) _mm256_setzero_si256(), 1217163953Srrs __M); 1218185694Srrs} 1219163953Srrs 1220163953Srrsstatic __inline__ __m256i __DEFAULT_FN_ATTRS512 1221163953Srrs_mm512_cvtepi16_epi8 (__m512i __A) { 1222172090Srrs return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A, 1223163953Srrs (__v32qi) _mm256_undefined_si256(), 1224163953Srrs (__mmask32) -1); 1225163953Srrs} 1226163953Srrs 1227163953Srrsstatic __inline__ __m256i __DEFAULT_FN_ATTRS512 1228163953Srrs_mm512_mask_cvtepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) { 1229163953Srrs return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A, 1230163953Srrs (__v32qi) __O, 1231163953Srrs __M); 1232163953Srrs} 1233163953Srrs 1234163953Srrsstatic __inline__ __m256i __DEFAULT_FN_ATTRS512 1235163953Srrs_mm512_maskz_cvtepi16_epi8 (__mmask32 __M, __m512i __A) { 1236163953Srrs return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A, 1237163953Srrs (__v32qi) _mm256_setzero_si256(), 1238163953Srrs __M); 1239163953Srrs} 1240163953Srrs 1241163953Srrsstatic __inline__ void __DEFAULT_FN_ATTRS512 1242163953Srrs_mm512_mask_cvtepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) 1243163953Srrs{ 1244163953Srrs __builtin_ia32_pmovwb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M); 1245163953Srrs} 1246163953Srrs 1247163953Srrsstatic __inline__ void __DEFAULT_FN_ATTRS512 1248163953Srrs_mm512_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) 1249163953Srrs{ 1250163953Srrs __builtin_ia32_pmovswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M); 1251163953Srrs} 1252163953Srrs 1253163953Srrsstatic __inline__ void __DEFAULT_FN_ATTRS512 1254163953Srrs_mm512_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) 1255163953Srrs{ 1256163953Srrs __builtin_ia32_pmovuswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M); 1257166086Srrs} 1258163953Srrs 1259169420Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1260169420Srrs_mm512_unpackhi_epi8(__m512i __A, __m512i __B) { 1261163953Srrs return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B, 1262163953Srrs 8, 64+8, 9, 64+9, 1263163953Srrs 10, 64+10, 11, 64+11, 1264163953Srrs 12, 64+12, 13, 64+13, 1265163953Srrs 14, 64+14, 15, 64+15, 1266166086Srrs 24, 64+24, 25, 64+25, 1267163953Srrs 26, 64+26, 27, 64+27, 1268163953Srrs 28, 64+28, 29, 64+29, 1269163953Srrs 30, 64+30, 31, 64+31, 1270163953Srrs 40, 64+40, 41, 64+41, 1271163953Srrs 42, 64+42, 43, 64+43, 1272169420Srrs 44, 64+44, 45, 64+45, 1273169420Srrs 46, 64+46, 47, 64+47, 1274163953Srrs 56, 64+56, 57, 64+57, 1275163953Srrs 58, 64+58, 59, 64+59, 1276163953Srrs 60, 64+60, 61, 64+61, 1277163953Srrs 62, 64+62, 63, 64+63); 1278163953Srrs} 1279163953Srrs 1280235418Stuexenstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1281163953Srrs_mm512_mask_unpackhi_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { 1282163953Srrs return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 1283163953Srrs (__v64qi)_mm512_unpackhi_epi8(__A, __B), 1284163953Srrs (__v64qi)__W); 1285237715Stuexen} 1286237715Stuexen 1287237049Stuexenstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1288237049Stuexen_mm512_maskz_unpackhi_epi8(__mmask64 __U, __m512i __A, __m512i __B) { 1289237049Stuexen return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 1290237049Stuexen (__v64qi)_mm512_unpackhi_epi8(__A, __B), 1291163953Srrs (__v64qi)_mm512_setzero_si512()); 1292163953Srrs} 1293163953Srrs 1294163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1295169420Srrs_mm512_unpackhi_epi16(__m512i __A, __m512i __B) { 1296169420Srrs return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B, 1297169420Srrs 4, 32+4, 5, 32+5, 1298163953Srrs 6, 32+6, 7, 32+7, 1299169420Srrs 12, 32+12, 13, 32+13, 1300169420Srrs 14, 32+14, 15, 32+15, 1301163953Srrs 20, 32+20, 21, 32+21, 1302163953Srrs 22, 32+22, 23, 32+23, 1303163953Srrs 28, 32+28, 29, 32+29, 1304163953Srrs 30, 32+30, 31, 32+31); 1305163953Srrs} 1306237715Stuexen 1307237715Stuexenstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1308237049Stuexen_mm512_mask_unpackhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 1309237049Stuexen return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1310168299Srrs (__v32hi)_mm512_unpackhi_epi16(__A, __B), 1311163953Srrs (__v32hi)__W); 1312163953Srrs} 1313163953Srrs 1314163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1315163953Srrs_mm512_maskz_unpackhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { 1316163953Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1317163953Srrs (__v32hi)_mm512_unpackhi_epi16(__A, __B), 1318237715Stuexen (__v32hi)_mm512_setzero_si512()); 1319237715Stuexen} 1320237049Stuexen 1321237049Stuexenstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1322168299Srrs_mm512_unpacklo_epi8(__m512i __A, __m512i __B) { 1323163953Srrs return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B, 1324163953Srrs 0, 64+0, 1, 64+1, 1325163953Srrs 2, 64+2, 3, 64+3, 1326163953Srrs 4, 64+4, 5, 64+5, 1327163953Srrs 6, 64+6, 7, 64+7, 1328237715Stuexen 16, 64+16, 17, 64+17, 1329237715Stuexen 18, 64+18, 19, 64+19, 1330237049Stuexen 20, 64+20, 21, 64+21, 1331237049Stuexen 22, 64+22, 23, 64+23, 1332168299Srrs 32, 64+32, 33, 64+33, 1333163953Srrs 34, 64+34, 35, 64+35, 1334163953Srrs 36, 64+36, 37, 64+37, 1335163953Srrs 38, 64+38, 39, 64+39, 1336163953Srrs 48, 64+48, 49, 64+49, 1337163953Srrs 50, 64+50, 51, 64+51, 1338237715Stuexen 52, 64+52, 53, 64+53, 1339237715Stuexen 54, 64+54, 55, 64+55); 1340237049Stuexen} 1341237049Stuexen 1342168299Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1343163953Srrs_mm512_mask_unpacklo_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { 1344163953Srrs return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 1345163953Srrs (__v64qi)_mm512_unpacklo_epi8(__A, __B), 1346163953Srrs (__v64qi)__W); 1347163953Srrs} 1348237715Stuexen 1349237715Stuexenstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1350237049Stuexen_mm512_maskz_unpacklo_epi8(__mmask64 __U, __m512i __A, __m512i __B) { 1351237049Stuexen return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 1352168299Srrs (__v64qi)_mm512_unpacklo_epi8(__A, __B), 1353163953Srrs (__v64qi)_mm512_setzero_si512()); 1354163953Srrs} 1355163953Srrs 1356171943Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1357163953Srrs_mm512_unpacklo_epi16(__m512i __A, __m512i __B) { 1358163953Srrs return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B, 1359163953Srrs 0, 32+0, 1, 32+1, 1360163953Srrs 2, 32+2, 3, 32+3, 1361163953Srrs 8, 32+8, 9, 32+9, 1362163953Srrs 10, 32+10, 11, 32+11, 1363163953Srrs 16, 32+16, 17, 32+17, 1364163953Srrs 18, 32+18, 19, 32+19, 1365163953Srrs 24, 32+24, 25, 32+25, 1366163953Srrs 26, 32+26, 27, 32+27); 1367163953Srrs} 1368163953Srrs 1369163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1370163953Srrs_mm512_mask_unpacklo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 1371172090Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1372163953Srrs (__v32hi)_mm512_unpacklo_epi16(__A, __B), 1373237715Stuexen (__v32hi)__W); 1374237049Stuexen} 1375237049Stuexen 1376237049Stuexenstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1377163953Srrs_mm512_maskz_unpacklo_epi16(__mmask32 __U, __m512i __A, __m512i __B) { 1378163953Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1379163953Srrs (__v32hi)_mm512_unpacklo_epi16(__A, __B), 1380163953Srrs (__v32hi)_mm512_setzero_si512()); 1381169420Srrs} 1382171943Srrs 1383163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1384163953Srrs_mm512_cvtepi8_epi16(__m256i __A) 1385179783Srrs{ 1386171943Srrs /* This function always performs a signed extension, but __v32qi is a char 1387171943Srrs which may be signed or unsigned, so use __v32qs. */ 1388171943Srrs return (__m512i)__builtin_convertvector((__v32qs)__A, __v32hi); 1389171943Srrs} 1390171943Srrs 1391171943Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1392163953Srrs_mm512_mask_cvtepi8_epi16(__m512i __W, __mmask32 __U, __m256i __A) 1393169378Srrs{ 1394163953Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1395163953Srrs (__v32hi)_mm512_cvtepi8_epi16(__A), 1396165220Srrs (__v32hi)__W); 1397163953Srrs} 1398163953Srrs 1399163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1400163953Srrs_mm512_maskz_cvtepi8_epi16(__mmask32 __U, __m256i __A) 1401163953Srrs{ 1402163953Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1403163953Srrs (__v32hi)_mm512_cvtepi8_epi16(__A), 1404163953Srrs (__v32hi)_mm512_setzero_si512()); 1405163953Srrs} 1406163953Srrs 1407163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1408163953Srrs_mm512_cvtepu8_epi16(__m256i __A) 1409163953Srrs{ 1410163953Srrs return (__m512i)__builtin_convertvector((__v32qu)__A, __v32hi); 1411163953Srrs} 1412163953Srrs 1413163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1414163953Srrs_mm512_mask_cvtepu8_epi16(__m512i __W, __mmask32 __U, __m256i __A) 1415163953Srrs{ 1416163953Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1417163953Srrs (__v32hi)_mm512_cvtepu8_epi16(__A), 1418163953Srrs (__v32hi)__W); 1419163953Srrs} 1420163953Srrs 1421169420Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1422163953Srrs_mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A) 1423163953Srrs{ 1424163953Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1425185694Srrs (__v32hi)_mm512_cvtepu8_epi16(__A), 1426185694Srrs (__v32hi)_mm512_setzero_si512()); 1427237715Stuexen} 1428185694Srrs 1429185694Srrs 1430185694Srrs#define _mm512_shufflehi_epi16(A, imm) \ 1431185694Srrs (__m512i)__builtin_ia32_pshufhw512((__v32hi)(__m512i)(A), (int)(imm)) 1432237049Stuexen 1433185694Srrs#define _mm512_mask_shufflehi_epi16(W, U, A, imm) \ 1434163953Srrs (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 1435185694Srrs (__v32hi)_mm512_shufflehi_epi16((A), \ 1436163953Srrs (imm)), \ 1437163953Srrs (__v32hi)(__m512i)(W)) 1438163953Srrs 1439163953Srrs#define _mm512_maskz_shufflehi_epi16(U, A, imm) \ 1440163953Srrs (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 1441163953Srrs (__v32hi)_mm512_shufflehi_epi16((A), \ 1442163953Srrs (imm)), \ 1443163953Srrs (__v32hi)_mm512_setzero_si512()) 1444237715Stuexen 1445163953Srrs#define _mm512_shufflelo_epi16(A, imm) \ 1446185694Srrs (__m512i)__builtin_ia32_pshuflw512((__v32hi)(__m512i)(A), (int)(imm)) 1447228653Stuexen 1448237049Stuexen 1449237049Stuexen#define _mm512_mask_shufflelo_epi16(W, U, A, imm) \ 1450237049Stuexen (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 1451163953Srrs (__v32hi)_mm512_shufflelo_epi16((A), \ 1452163953Srrs (imm)), \ 1453163953Srrs (__v32hi)(__m512i)(W)) 1454163953Srrs 1455185694Srrs 1456185694Srrs#define _mm512_maskz_shufflelo_epi16(U, A, imm) \ 1457185694Srrs (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 1458165220Srrs (__v32hi)_mm512_shufflelo_epi16((A), \ 1459163953Srrs (imm)), \ 1460164205Srrs (__v32hi)_mm512_setzero_si512()) 1461170140Srrs 1462163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1463185694Srrs_mm512_sllv_epi16(__m512i __A, __m512i __B) 1464163953Srrs{ 1465163953Srrs return (__m512i)__builtin_ia32_psllv32hi((__v32hi) __A, (__v32hi) __B); 1466166675Srrs} 1467166023Srrs 1468166023Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1469166023Srrs_mm512_mask_sllv_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 1470166023Srrs{ 1471166023Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1472166023Srrs (__v32hi)_mm512_sllv_epi16(__A, __B), 1473163953Srrs (__v32hi)__W); 1474163953Srrs} 1475163953Srrs 1476163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1477165647Srrs_mm512_maskz_sllv_epi16(__mmask32 __U, __m512i __A, __m512i __B) 1478163953Srrs{ 1479163953Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1480163953Srrs (__v32hi)_mm512_sllv_epi16(__A, __B), 1481163953Srrs (__v32hi)_mm512_setzero_si512()); 1482163953Srrs} 1483165647Srrs 1484163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1485163953Srrs_mm512_sll_epi16(__m512i __A, __m128i __B) 1486163953Srrs{ 1487237715Stuexen return (__m512i)__builtin_ia32_psllw512((__v32hi) __A, (__v8hi) __B); 1488237049Stuexen} 1489179157Srrs 1490166023Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1491166023Srrs_mm512_mask_sll_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) 1492163953Srrs{ 1493163953Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1494163953Srrs (__v32hi)_mm512_sll_epi16(__A, __B), 1495163953Srrs (__v32hi)__W); 1496163953Srrs} 1497163953Srrs 1498163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1499163953Srrs_mm512_maskz_sll_epi16(__mmask32 __U, __m512i __A, __m128i __B) 1500163953Srrs{ 1501163953Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1502163953Srrs (__v32hi)_mm512_sll_epi16(__A, __B), 1503163953Srrs (__v32hi)_mm512_setzero_si512()); 1504163953Srrs} 1505163953Srrs 1506163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1507163953Srrs_mm512_slli_epi16(__m512i __A, int __B) 1508163953Srrs{ 1509163953Srrs return (__m512i)__builtin_ia32_psllwi512((__v32hi)__A, __B); 1510163953Srrs} 1511163953Srrs 1512163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1513163953Srrs_mm512_mask_slli_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B) 1514163953Srrs{ 1515228907Stuexen return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1516163953Srrs (__v32hi)_mm512_slli_epi16(__A, __B), 1517163953Srrs (__v32hi)__W); 1518163953Srrs} 1519163953Srrs 1520163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1521163953Srrs_mm512_maskz_slli_epi16(__mmask32 __U, __m512i __A, int __B) 1522163953Srrs{ 1523163953Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1524163953Srrs (__v32hi)_mm512_slli_epi16(__A, __B), 1525163953Srrs (__v32hi)_mm512_setzero_si512()); 1526163953Srrs} 1527163953Srrs 1528163953Srrs#define _mm512_bslli_epi128(a, imm) \ 1529163953Srrs (__m512i)__builtin_ia32_pslldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm)) 1530163953Srrs 1531163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1532166023Srrs_mm512_srlv_epi16(__m512i __A, __m512i __B) 1533172091Srrs{ 1534172091Srrs return (__m512i)__builtin_ia32_psrlv32hi((__v32hi)__A, (__v32hi)__B); 1535172091Srrs} 1536172091Srrs 1537172091Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1538172091Srrs_mm512_mask_srlv_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 1539172091Srrs{ 1540172091Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1541172091Srrs (__v32hi)_mm512_srlv_epi16(__A, __B), 1542172091Srrs (__v32hi)__W); 1543172091Srrs} 1544172091Srrs 1545172091Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1546172091Srrs_mm512_maskz_srlv_epi16(__mmask32 __U, __m512i __A, __m512i __B) 1547172091Srrs{ 1548172091Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1549172091Srrs (__v32hi)_mm512_srlv_epi16(__A, __B), 1550172091Srrs (__v32hi)_mm512_setzero_si512()); 1551166023Srrs} 1552163953Srrs 1553163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1554163953Srrs_mm512_srav_epi16(__m512i __A, __m512i __B) 1555166023Srrs{ 1556163953Srrs return (__m512i)__builtin_ia32_psrav32hi((__v32hi)__A, (__v32hi)__B); 1557169352Srrs} 1558166023Srrs 1559166023Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1560166023Srrs_mm512_mask_srav_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 1561163953Srrs{ 1562163953Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1563228653Stuexen (__v32hi)_mm512_srav_epi16(__A, __B), 1564163953Srrs (__v32hi)__W); 1565166023Srrs} 1566166023Srrs 1567163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1568163953Srrs_mm512_maskz_srav_epi16(__mmask32 __U, __m512i __A, __m512i __B) 1569163953Srrs{ 1570163953Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1571171440Srrs (__v32hi)_mm512_srav_epi16(__A, __B), 1572171440Srrs (__v32hi)_mm512_setzero_si512()); 1573163953Srrs} 1574166675Srrs 1575166675Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1576166675Srrs_mm512_sra_epi16(__m512i __A, __m128i __B) 1577166675Srrs{ 1578171943Srrs return (__m512i)__builtin_ia32_psraw512((__v32hi) __A, (__v8hi) __B); 1579171943Srrs} 1580163953Srrs 1581163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1582163953Srrs_mm512_mask_sra_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) 1583163953Srrs{ 1584166675Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1585165220Srrs (__v32hi)_mm512_sra_epi16(__A, __B), 1586163953Srrs (__v32hi)__W); 1587163953Srrs} 1588163953Srrs 1589163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1590237565Stuexen_mm512_maskz_sra_epi16(__mmask32 __U, __m512i __A, __m128i __B) 1591172090Srrs{ 1592172090Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1593172090Srrs (__v32hi)_mm512_sra_epi16(__A, __B), 1594163953Srrs (__v32hi)_mm512_setzero_si512()); 1595163953Srrs} 1596163953Srrs 1597163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1598163953Srrs_mm512_srai_epi16(__m512i __A, int __B) 1599163953Srrs{ 1600163953Srrs return (__m512i)__builtin_ia32_psrawi512((__v32hi)__A, __B); 1601163953Srrs} 1602237565Stuexen 1603172090Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1604172090Srrs_mm512_mask_srai_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B) 1605172090Srrs{ 1606172090Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1607172090Srrs (__v32hi)_mm512_srai_epi16(__A, __B), 1608172090Srrs (__v32hi)__W); 1609172090Srrs} 1610172090Srrs 1611172090Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1612172090Srrs_mm512_maskz_srai_epi16(__mmask32 __U, __m512i __A, int __B) 1613172090Srrs{ 1614172090Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1615237565Stuexen (__v32hi)_mm512_srai_epi16(__A, __B), 1616172090Srrs (__v32hi)_mm512_setzero_si512()); 1617172090Srrs} 1618163953Srrs 1619163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1620163953Srrs_mm512_srl_epi16(__m512i __A, __m128i __B) 1621163953Srrs{ 1622163953Srrs return (__m512i)__builtin_ia32_psrlw512((__v32hi) __A, (__v8hi) __B); 1623163953Srrs} 1624163953Srrs 1625163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1626170642Srrs_mm512_mask_srl_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) 1627218186Srrs{ 1628218186Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1629219397Srrs (__v32hi)_mm512_srl_epi16(__A, __B), 1630163953Srrs (__v32hi)__W); 1631163953Srrs} 1632163953Srrs 1633163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1634163953Srrs_mm512_maskz_srl_epi16(__mmask32 __U, __m512i __A, __m128i __B) 1635163953Srrs{ 1636163953Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1637163953Srrs (__v32hi)_mm512_srl_epi16(__A, __B), 1638163953Srrs (__v32hi)_mm512_setzero_si512()); 1639163953Srrs} 1640163953Srrs 1641163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1642163953Srrs_mm512_srli_epi16(__m512i __A, int __B) 1643163953Srrs{ 1644165220Srrs return (__m512i)__builtin_ia32_psrlwi512((__v32hi)__A, __B); 1645163953Srrs} 1646163953Srrs 1647163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1648163953Srrs_mm512_mask_srli_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B) 1649163953Srrs{ 1650228653Stuexen return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1651163953Srrs (__v32hi)_mm512_srli_epi16(__A, __B), 1652237715Stuexen (__v32hi)__W); 1653166023Srrs} 1654166023Srrs 1655163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1656163953Srrs_mm512_maskz_srli_epi16(__mmask32 __U, __m512i __A, int __B) 1657163953Srrs{ 1658165220Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1659163953Srrs (__v32hi)_mm512_srli_epi16(__A, __B), 1660166023Srrs (__v32hi)_mm512_setzero_si512()); 1661166023Srrs} 1662163953Srrs 1663169352Srrs#define _mm512_bsrli_epi128(a, imm) \ 1664163953Srrs (__m512i)__builtin_ia32_psrldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm)) 1665163953Srrs 1666163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1667163953Srrs_mm512_mask_mov_epi16 (__m512i __W, __mmask32 __U, __m512i __A) 1668163953Srrs{ 1669163953Srrs return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, 1670163953Srrs (__v32hi) __A, 1671166023Srrs (__v32hi) __W); 1672166023Srrs} 1673163953Srrs 1674163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1675185694Srrs_mm512_maskz_mov_epi16 (__mmask32 __U, __m512i __A) 1676185694Srrs{ 1677185694Srrs return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, 1678185694Srrs (__v32hi) __A, 1679185694Srrs (__v32hi) _mm512_setzero_si512 ()); 1680185694Srrs} 1681185694Srrs 1682185694Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1683185694Srrs_mm512_mask_mov_epi8 (__m512i __W, __mmask64 __U, __m512i __A) 1684163953Srrs{ 1685185694Srrs return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, 1686185694Srrs (__v64qi) __A, 1687185694Srrs (__v64qi) __W); 1688185694Srrs} 1689185694Srrs 1690185694Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1691185694Srrs_mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A) 1692185694Srrs{ 1693185694Srrs return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, 1694185694Srrs (__v64qi) __A, 1695185694Srrs (__v64qi) _mm512_setzero_si512 ()); 1696185694Srrs} 1697185694Srrs 1698185694Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1699185694Srrs_mm512_mask_set1_epi8 (__m512i __O, __mmask64 __M, char __A) 1700185694Srrs{ 1701185694Srrs return (__m512i) __builtin_ia32_selectb_512(__M, 1702185694Srrs (__v64qi)_mm512_set1_epi8(__A), 1703185694Srrs (__v64qi) __O); 1704185694Srrs} 1705185694Srrs 1706185694Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1707185694Srrs_mm512_maskz_set1_epi8 (__mmask64 __M, char __A) 1708185694Srrs{ 1709185694Srrs return (__m512i) __builtin_ia32_selectb_512(__M, 1710185694Srrs (__v64qi) _mm512_set1_epi8(__A), 1711185694Srrs (__v64qi) _mm512_setzero_si512()); 1712185694Srrs} 1713237715Stuexen 1714237049Stuexenstatic __inline__ __mmask64 __DEFAULT_FN_ATTRS 1715237049Stuexen_mm512_kunpackd (__mmask64 __A, __mmask64 __B) 1716185694Srrs{ 1717185694Srrs return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A, 1718185694Srrs (__mmask64) __B); 1719185694Srrs} 1720185694Srrs 1721185694Srrsstatic __inline__ __mmask32 __DEFAULT_FN_ATTRS 1722163953Srrs_mm512_kunpackw (__mmask32 __A, __mmask32 __B) 1723163953Srrs{ 1724163953Srrs return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A, 1725166023Srrs (__mmask32) __B); 1726166023Srrs} 1727166023Srrs 1728166023Srrsstatic __inline __m512i __DEFAULT_FN_ATTRS512 1729166023Srrs_mm512_loadu_epi16 (void const *__P) 1730166023Srrs{ 1731166023Srrs struct __loadu_epi16 { 1732166023Srrs __m512i_u __v; 1733166023Srrs } __attribute__((__packed__, __may_alias__)); 1734166023Srrs return ((struct __loadu_epi16*)__P)->__v; 1735166023Srrs} 1736166023Srrs 1737166023Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1738166023Srrs_mm512_mask_loadu_epi16 (__m512i __W, __mmask32 __U, void const *__P) 1739166023Srrs{ 1740166023Srrs return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P, 1741166023Srrs (__v32hi) __W, 1742166023Srrs (__mmask32) __U); 1743166023Srrs} 1744166023Srrs 1745166023Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1746166023Srrs_mm512_maskz_loadu_epi16 (__mmask32 __U, void const *__P) 1747166023Srrs{ 1748166023Srrs return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P, 1749166023Srrs (__v32hi) 1750171440Srrs _mm512_setzero_si512 (), 1751163953Srrs (__mmask32) __U); 1752163953Srrs} 1753163953Srrs 1754163953Srrsstatic __inline __m512i __DEFAULT_FN_ATTRS512 1755163953Srrs_mm512_loadu_epi8 (void const *__P) 1756163953Srrs{ 1757163953Srrs struct __loadu_epi8 { 1758163953Srrs __m512i_u __v; 1759163953Srrs } __attribute__((__packed__, __may_alias__)); 1760163953Srrs return ((struct __loadu_epi8*)__P)->__v; 1761163953Srrs} 1762163953Srrs 1763164205Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1764164205Srrs_mm512_mask_loadu_epi8 (__m512i __W, __mmask64 __U, void const *__P) 1765164205Srrs{ 1766164205Srrs return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P, 1767164205Srrs (__v64qi) __W, 1768164205Srrs (__mmask64) __U); 1769164205Srrs} 1770164205Srrs 1771164205Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1772164205Srrs_mm512_maskz_loadu_epi8 (__mmask64 __U, void const *__P) 1773164205Srrs{ 1774164205Srrs return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P, 1775163953Srrs (__v64qi) 1776164205Srrs _mm512_setzero_si512 (), 1777164205Srrs (__mmask64) __U); 1778164205Srrs} 1779171440Srrs 1780171440Srrsstatic __inline void __DEFAULT_FN_ATTRS512 1781168124Srrs_mm512_storeu_epi16 (void *__P, __m512i __A) 1782164205Srrs{ 1783164205Srrs struct __storeu_epi16 { 1784164205Srrs __m512i_u __v; 1785164205Srrs } __attribute__((__packed__, __may_alias__)); 1786164205Srrs ((struct __storeu_epi16*)__P)->__v = __A; 1787163953Srrs} 1788228653Stuexen 1789163953Srrsstatic __inline__ void __DEFAULT_FN_ATTRS512 1790166023Srrs_mm512_mask_storeu_epi16 (void *__P, __mmask32 __U, __m512i __A) 1791166023Srrs{ 1792163953Srrs __builtin_ia32_storedquhi512_mask ((__v32hi *) __P, 1793163953Srrs (__v32hi) __A, 1794228653Stuexen (__mmask32) __U); 1795163953Srrs} 1796237715Stuexen 1797166023Srrsstatic __inline void __DEFAULT_FN_ATTRS512 1798166023Srrs_mm512_storeu_epi8 (void *__P, __m512i __A) 1799163953Srrs{ 1800163953Srrs struct __storeu_epi8 { 1801163953Srrs __m512i_u __v; 1802163953Srrs } __attribute__((__packed__, __may_alias__)); 1803163953Srrs ((struct __storeu_epi8*)__P)->__v = __A; 1804163953Srrs} 1805163953Srrs 1806163953Srrsstatic __inline__ void __DEFAULT_FN_ATTRS512 1807163953Srrs_mm512_mask_storeu_epi8 (void *__P, __mmask64 __U, __m512i __A) 1808237565Stuexen{ 1809172090Srrs __builtin_ia32_storedquqi512_mask ((__v64qi *) __P, 1810172090Srrs (__v64qi) __A, 1811172090Srrs (__mmask64) __U); 1812163953Srrs} 1813163953Srrs 1814237565Stuexenstatic __inline__ __mmask64 __DEFAULT_FN_ATTRS512 1815172090Srrs_mm512_test_epi8_mask (__m512i __A, __m512i __B) 1816172090Srrs{ 1817172090Srrs return _mm512_cmpneq_epi8_mask (_mm512_and_epi32 (__A, __B), 1818172090Srrs _mm512_setzero_si512()); 1819172090Srrs} 1820172090Srrs 1821172090Srrsstatic __inline__ __mmask64 __DEFAULT_FN_ATTRS512 1822172090Srrs_mm512_mask_test_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B) 1823172090Srrs{ 1824172090Srrs return _mm512_mask_cmpneq_epi8_mask (__U, _mm512_and_epi32 (__A, __B), 1825172090Srrs _mm512_setzero_si512()); 1826172090Srrs} 1827237565Stuexen 1828172090Srrsstatic __inline__ __mmask32 __DEFAULT_FN_ATTRS512 1829172090Srrs_mm512_test_epi16_mask (__m512i __A, __m512i __B) 1830163953Srrs{ 1831166675Srrs return _mm512_cmpneq_epi16_mask (_mm512_and_epi32 (__A, __B), 1832166675Srrs _mm512_setzero_si512()); 1833166675Srrs} 1834166675Srrs 1835166675Srrsstatic __inline__ __mmask32 __DEFAULT_FN_ATTRS512 1836166675Srrs_mm512_mask_test_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B) 1837166675Srrs{ 1838166675Srrs return _mm512_mask_cmpneq_epi16_mask (__U, _mm512_and_epi32 (__A, __B), 1839166675Srrs _mm512_setzero_si512()); 1840163953Srrs} 1841171943Srrs 1842163953Srrsstatic __inline__ __mmask64 __DEFAULT_FN_ATTRS512 1843163953Srrs_mm512_testn_epi8_mask (__m512i __A, __m512i __B) 1844163953Srrs{ 1845163953Srrs return _mm512_cmpeq_epi8_mask (_mm512_and_epi32 (__A, __B), _mm512_setzero_si512()); 1846163953Srrs} 1847165220Srrs 1848163953Srrsstatic __inline__ __mmask64 __DEFAULT_FN_ATTRS512 1849164205Srrs_mm512_mask_testn_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B) 1850164205Srrs{ 1851164205Srrs return _mm512_mask_cmpeq_epi8_mask (__U, _mm512_and_epi32 (__A, __B), 1852164205Srrs _mm512_setzero_si512()); 1853164205Srrs} 1854164205Srrs 1855164205Srrsstatic __inline__ __mmask32 __DEFAULT_FN_ATTRS512 1856164205Srrs_mm512_testn_epi16_mask (__m512i __A, __m512i __B) 1857172090Srrs{ 1858164205Srrs return _mm512_cmpeq_epi16_mask (_mm512_and_epi32 (__A, __B), 1859166023Srrs _mm512_setzero_si512()); 1860166023Srrs} 1861166023Srrs 1862163953Srrsstatic __inline__ __mmask32 __DEFAULT_FN_ATTRS512 1863163953Srrs_mm512_mask_testn_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B) 1864163953Srrs{ 1865163953Srrs return _mm512_mask_cmpeq_epi16_mask (__U, _mm512_and_epi32 (__A, __B), 1866163953Srrs _mm512_setzero_si512()); 1867163953Srrs} 1868163953Srrs 1869163953Srrsstatic __inline__ __mmask64 __DEFAULT_FN_ATTRS512 1870163953Srrs_mm512_movepi8_mask (__m512i __A) 1871185694Srrs{ 1872185694Srrs return (__mmask64) __builtin_ia32_cvtb2mask512 ((__v64qi) __A); 1873185694Srrs} 1874185694Srrs 1875185694Srrsstatic __inline__ __mmask32 __DEFAULT_FN_ATTRS512 1876185694Srrs_mm512_movepi16_mask (__m512i __A) 1877237715Stuexen{ 1878237715Stuexen return (__mmask32) __builtin_ia32_cvtw2mask512 ((__v32hi) __A); 1879185694Srrs} 1880185694Srrs 1881237049Stuexenstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1882185694Srrs_mm512_movm_epi8 (__mmask64 __A) 1883185694Srrs{ 1884163953Srrs return (__m512i) __builtin_ia32_cvtmask2b512 (__A); 1885163953Srrs} 1886163953Srrs 1887165220Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1888166023Srrs_mm512_movm_epi16 (__mmask32 __A) 1889166023Srrs{ 1890171440Srrs return (__m512i) __builtin_ia32_cvtmask2w512 (__A); 1891171440Srrs} 1892165647Srrs 1893163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1894163953Srrs_mm512_broadcastb_epi8 (__m128i __A) 1895165220Srrs{ 1896166675Srrs return (__m512i)__builtin_shufflevector((__v16qi) __A, (__v16qi) __A, 1897166675Srrs 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1898166675Srrs 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1899166675Srrs 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1900166675Srrs 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); 1901166675Srrs} 1902166675Srrs 1903166675Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1904166675Srrs_mm512_mask_broadcastb_epi8 (__m512i __O, __mmask64 __M, __m128i __A) 1905166675Srrs{ 1906165647Srrs return (__m512i)__builtin_ia32_selectb_512(__M, 1907171943Srrs (__v64qi) _mm512_broadcastb_epi8(__A), 1908165647Srrs (__v64qi) __O); 1909165647Srrs} 1910165647Srrs 1911165647Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1912165647Srrs_mm512_maskz_broadcastb_epi8 (__mmask64 __M, __m128i __A) 1913171943Srrs{ 1914165647Srrs return (__m512i)__builtin_ia32_selectb_512(__M, 1915165647Srrs (__v64qi) _mm512_broadcastb_epi8(__A), 1916165647Srrs (__v64qi) _mm512_setzero_si512()); 1917165647Srrs} 1918165647Srrs 1919179157Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1920165647Srrs_mm512_mask_set1_epi16 (__m512i __O, __mmask32 __M, short __A) 1921165647Srrs{ 1922165647Srrs return (__m512i) __builtin_ia32_selectw_512(__M, 1923165647Srrs (__v32hi) _mm512_set1_epi16(__A), 1924165647Srrs (__v32hi) __O); 1925165647Srrs} 1926170138Srrs 1927165647Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1928165647Srrs_mm512_maskz_set1_epi16 (__mmask32 __M, short __A) 1929170138Srrs{ 1930205627Srrs return (__m512i) __builtin_ia32_selectw_512(__M, 1931185694Srrs (__v32hi) _mm512_set1_epi16(__A), 1932206137Stuexen (__v32hi) _mm512_setzero_si512()); 1933185694Srrs} 1934165220Srrs 1935165220Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1936165220Srrs_mm512_broadcastw_epi16 (__m128i __A) 1937165220Srrs{ 1938165220Srrs return (__m512i)__builtin_shufflevector((__v8hi) __A, (__v8hi) __A, 1939163953Srrs 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1940164205Srrs 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); 1941165220Srrs} 1942235416Stuexen 1943165220Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1944165220Srrs_mm512_mask_broadcastw_epi16 (__m512i __O, __mmask32 __M, __m128i __A) 1945165220Srrs{ 1946165220Srrs return (__m512i)__builtin_ia32_selectw_512(__M, 1947165220Srrs (__v32hi) _mm512_broadcastw_epi16(__A), 1948163953Srrs (__v32hi) __O); 1949163953Srrs} 1950163953Srrs 1951163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1952163953Srrs_mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A) 1953163953Srrs{ 1954163953Srrs return (__m512i)__builtin_ia32_selectw_512(__M, 1955179783Srrs (__v32hi) _mm512_broadcastw_epi16(__A), 1956179783Srrs (__v32hi) _mm512_setzero_si512()); 1957163953Srrs} 1958163953Srrs 1959163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1960163953Srrs_mm512_permutexvar_epi16 (__m512i __A, __m512i __B) 1961163953Srrs{ 1962163953Srrs return (__m512i)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A); 1963163953Srrs} 1964164205Srrs 1965165220Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1966165220Srrs_mm512_maskz_permutexvar_epi16 (__mmask32 __M, __m512i __A, 1967165220Srrs __m512i __B) 1968228653Stuexen{ 1969163953Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 1970166023Srrs (__v32hi)_mm512_permutexvar_epi16(__A, __B), 1971166023Srrs (__v32hi)_mm512_setzero_si512()); 1972166023Srrs} 1973163953Srrs 1974163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 1975163953Srrs_mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A, 1976163953Srrs __m512i __B) 1977163953Srrs{ 1978163953Srrs return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 1979163953Srrs (__v32hi)_mm512_permutexvar_epi16(__A, __B), 1980163953Srrs (__v32hi)__W); 1981228653Stuexen} 1982163953Srrs 1983237715Stuexen#define _mm512_alignr_epi8(A, B, N) \ 1984166023Srrs (__m512i)__builtin_ia32_palignr512((__v64qi)(__m512i)(A), \ 1985166023Srrs (__v64qi)(__m512i)(B), (int)(N)) 1986166023Srrs 1987163953Srrs#define _mm512_mask_alignr_epi8(W, U, A, B, N) \ 1988163953Srrs (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ 1989163953Srrs (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \ 1990163953Srrs (__v64qi)(__m512i)(W)) 1991165220Srrs 1992163953Srrs#define _mm512_maskz_alignr_epi8(U, A, B, N) \ 1993166023Srrs (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ 1994166023Srrs (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \ 1995163953Srrs (__v64qi)(__m512i)_mm512_setzero_si512()) 1996163953Srrs 1997163953Srrs#define _mm512_dbsad_epu8(A, B, imm) \ 1998166023Srrs (__m512i)__builtin_ia32_dbpsadbw512((__v64qi)(__m512i)(A), \ 1999166023Srrs (__v64qi)(__m512i)(B), (int)(imm)) 2000163953Srrs 2001163953Srrs#define _mm512_mask_dbsad_epu8(W, U, A, B, imm) \ 2002163953Srrs (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 2003163953Srrs (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \ 2004166086Srrs (__v32hi)(__m512i)(W)) 2005163953Srrs 2006163953Srrs#define _mm512_maskz_dbsad_epu8(U, A, B, imm) \ 2007163953Srrs (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 2008163953Srrs (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \ 2009163953Srrs (__v32hi)_mm512_setzero_si512()) 2010163953Srrs 2011163953Srrsstatic __inline__ __m512i __DEFAULT_FN_ATTRS512 2012237049Stuexen_mm512_sad_epu8 (__m512i __A, __m512i __B) 2013163953Srrs{ 2014237715Stuexen return (__m512i) __builtin_ia32_psadbw512 ((__v64qi) __A, 2015163953Srrs (__v64qi) __B); 2016163953Srrs} 2017163953Srrs 2018169352Srrs#undef __DEFAULT_FN_ATTRS512 2019237049Stuexen#undef __DEFAULT_FN_ATTRS 2020179157Srrs 2021163953Srrs#endif 2022163953Srrs