avx512bwintrin.h revision 341825
1/*===------------- avx512bwintrin.h - AVX512BW intrinsics ------------------=== 2 * 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a copy 5 * of this software and associated documentation files (the "Software"), to deal 6 * in the Software without restriction, including without limitation the rights 7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 * copies of the Software, and to permit persons to whom the Software is 9 * furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 * THE SOFTWARE. 21 * 22 *===-----------------------------------------------------------------------=== 23 */ 24#ifndef __IMMINTRIN_H 25#error "Never use <avx512bwintrin.h> directly; include <immintrin.h> instead." 26#endif 27 28#ifndef __AVX512BWINTRIN_H 29#define __AVX512BWINTRIN_H 30 31typedef unsigned int __mmask32; 32typedef unsigned long long __mmask64; 33 34/* Define the default attributes for the functions in this file. */ 35#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"), __min_vector_width__(512))) 36 37/* Integer compare */ 38 39#define _mm512_cmp_epi8_mask(a, b, p) \ 40 (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ 41 (__v64qi)(__m512i)(b), (int)(p), \ 42 (__mmask64)-1) 43 44#define _mm512_mask_cmp_epi8_mask(m, a, b, p) \ 45 (__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \ 46 (__v64qi)(__m512i)(b), (int)(p), \ 47 (__mmask64)(m)) 48 49#define _mm512_cmp_epu8_mask(a, b, p) \ 50 (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ 51 (__v64qi)(__m512i)(b), (int)(p), \ 52 (__mmask64)-1) 53 54#define _mm512_mask_cmp_epu8_mask(m, a, b, p) \ 55 (__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \ 56 (__v64qi)(__m512i)(b), (int)(p), \ 57 (__mmask64)(m)) 58 59#define _mm512_cmp_epi16_mask(a, b, p) \ 60 (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ 61 (__v32hi)(__m512i)(b), (int)(p), \ 62 (__mmask32)-1) 63 64#define _mm512_mask_cmp_epi16_mask(m, a, b, p) \ 65 (__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \ 66 (__v32hi)(__m512i)(b), (int)(p), \ 67 (__mmask32)(m)) 68 69#define _mm512_cmp_epu16_mask(a, b, p) \ 70 (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ 71 (__v32hi)(__m512i)(b), (int)(p), \ 72 (__mmask32)-1) 73 74#define _mm512_mask_cmp_epu16_mask(m, a, b, p) \ 75 (__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \ 76 (__v32hi)(__m512i)(b), (int)(p), \ 77 (__mmask32)(m)) 78 79#define _mm512_cmpeq_epi8_mask(A, B) \ 80 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ) 81#define _mm512_mask_cmpeq_epi8_mask(k, A, B) \ 82 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ) 83#define _mm512_cmpge_epi8_mask(A, B) \ 84 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GE) 85#define _mm512_mask_cmpge_epi8_mask(k, A, B) \ 86 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE) 87#define _mm512_cmpgt_epi8_mask(A, B) \ 88 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GT) 89#define _mm512_mask_cmpgt_epi8_mask(k, A, B) \ 90 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT) 91#define _mm512_cmple_epi8_mask(A, B) \ 92 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LE) 93#define _mm512_mask_cmple_epi8_mask(k, A, B) \ 94 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE) 95#define _mm512_cmplt_epi8_mask(A, B) \ 96 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LT) 97#define _mm512_mask_cmplt_epi8_mask(k, A, B) \ 98 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT) 99#define _mm512_cmpneq_epi8_mask(A, B) \ 100 _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_NE) 101#define _mm512_mask_cmpneq_epi8_mask(k, A, B) \ 102 _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE) 103 104#define _mm512_cmpeq_epu8_mask(A, B) \ 105 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ) 106#define _mm512_mask_cmpeq_epu8_mask(k, A, B) \ 107 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ) 108#define _mm512_cmpge_epu8_mask(A, B) \ 109 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GE) 110#define _mm512_mask_cmpge_epu8_mask(k, A, B) \ 111 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE) 112#define _mm512_cmpgt_epu8_mask(A, B) \ 113 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GT) 114#define _mm512_mask_cmpgt_epu8_mask(k, A, B) \ 115 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT) 116#define _mm512_cmple_epu8_mask(A, B) \ 117 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LE) 118#define _mm512_mask_cmple_epu8_mask(k, A, B) \ 119 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE) 120#define _mm512_cmplt_epu8_mask(A, B) \ 121 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LT) 122#define _mm512_mask_cmplt_epu8_mask(k, A, B) \ 123 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT) 124#define _mm512_cmpneq_epu8_mask(A, B) \ 125 _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_NE) 126#define _mm512_mask_cmpneq_epu8_mask(k, A, B) \ 127 _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE) 128 129#define _mm512_cmpeq_epi16_mask(A, B) \ 130 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ) 131#define _mm512_mask_cmpeq_epi16_mask(k, A, B) \ 132 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ) 133#define _mm512_cmpge_epi16_mask(A, B) \ 134 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GE) 135#define _mm512_mask_cmpge_epi16_mask(k, A, B) \ 136 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE) 137#define _mm512_cmpgt_epi16_mask(A, B) \ 138 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GT) 139#define _mm512_mask_cmpgt_epi16_mask(k, A, B) \ 140 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT) 141#define _mm512_cmple_epi16_mask(A, B) \ 142 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LE) 143#define _mm512_mask_cmple_epi16_mask(k, A, B) \ 144 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE) 145#define _mm512_cmplt_epi16_mask(A, B) \ 146 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LT) 147#define _mm512_mask_cmplt_epi16_mask(k, A, B) \ 148 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT) 149#define _mm512_cmpneq_epi16_mask(A, B) \ 150 _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_NE) 151#define _mm512_mask_cmpneq_epi16_mask(k, A, B) \ 152 _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE) 153 154#define _mm512_cmpeq_epu16_mask(A, B) \ 155 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ) 156#define _mm512_mask_cmpeq_epu16_mask(k, A, B) \ 157 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ) 158#define _mm512_cmpge_epu16_mask(A, B) \ 159 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GE) 160#define _mm512_mask_cmpge_epu16_mask(k, A, B) \ 161 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE) 162#define _mm512_cmpgt_epu16_mask(A, B) \ 163 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GT) 164#define _mm512_mask_cmpgt_epu16_mask(k, A, B) \ 165 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT) 166#define _mm512_cmple_epu16_mask(A, B) \ 167 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LE) 168#define _mm512_mask_cmple_epu16_mask(k, A, B) \ 169 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE) 170#define _mm512_cmplt_epu16_mask(A, B) \ 171 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LT) 172#define _mm512_mask_cmplt_epu16_mask(k, A, B) \ 173 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT) 174#define _mm512_cmpneq_epu16_mask(A, B) \ 175 _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_NE) 176#define _mm512_mask_cmpneq_epu16_mask(k, A, B) \ 177 _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE) 178 179static __inline__ __m512i __DEFAULT_FN_ATTRS 180_mm512_add_epi8 (__m512i __A, __m512i __B) { 181 return (__m512i) ((__v64qu) __A + (__v64qu) __B); 182} 183 184static __inline__ __m512i __DEFAULT_FN_ATTRS 185_mm512_mask_add_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { 186 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 187 (__v64qi)_mm512_add_epi8(__A, __B), 188 (__v64qi)__W); 189} 190 191static __inline__ __m512i __DEFAULT_FN_ATTRS 192_mm512_maskz_add_epi8(__mmask64 __U, __m512i __A, __m512i __B) { 193 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 194 (__v64qi)_mm512_add_epi8(__A, __B), 195 (__v64qi)_mm512_setzero_si512()); 196} 197 198static __inline__ __m512i __DEFAULT_FN_ATTRS 199_mm512_sub_epi8 (__m512i __A, __m512i __B) { 200 return (__m512i) ((__v64qu) __A - (__v64qu) __B); 201} 202 203static __inline__ __m512i __DEFAULT_FN_ATTRS 204_mm512_mask_sub_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { 205 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 206 (__v64qi)_mm512_sub_epi8(__A, __B), 207 (__v64qi)__W); 208} 209 210static __inline__ __m512i __DEFAULT_FN_ATTRS 211_mm512_maskz_sub_epi8(__mmask64 __U, __m512i __A, __m512i __B) { 212 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 213 (__v64qi)_mm512_sub_epi8(__A, __B), 214 (__v64qi)_mm512_setzero_si512()); 215} 216 217static __inline__ __m512i __DEFAULT_FN_ATTRS 218_mm512_add_epi16 (__m512i __A, __m512i __B) { 219 return (__m512i) ((__v32hu) __A + (__v32hu) __B); 220} 221 222static __inline__ __m512i __DEFAULT_FN_ATTRS 223_mm512_mask_add_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 224 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 225 (__v32hi)_mm512_add_epi16(__A, __B), 226 (__v32hi)__W); 227} 228 229static __inline__ __m512i __DEFAULT_FN_ATTRS 230_mm512_maskz_add_epi16(__mmask32 __U, __m512i __A, __m512i __B) { 231 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 232 (__v32hi)_mm512_add_epi16(__A, __B), 233 (__v32hi)_mm512_setzero_si512()); 234} 235 236static __inline__ __m512i __DEFAULT_FN_ATTRS 237_mm512_sub_epi16 (__m512i __A, __m512i __B) { 238 return (__m512i) ((__v32hu) __A - (__v32hu) __B); 239} 240 241static __inline__ __m512i __DEFAULT_FN_ATTRS 242_mm512_mask_sub_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 243 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 244 (__v32hi)_mm512_sub_epi16(__A, __B), 245 (__v32hi)__W); 246} 247 248static __inline__ __m512i __DEFAULT_FN_ATTRS 249_mm512_maskz_sub_epi16(__mmask32 __U, __m512i __A, __m512i __B) { 250 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 251 (__v32hi)_mm512_sub_epi16(__A, __B), 252 (__v32hi)_mm512_setzero_si512()); 253} 254 255static __inline__ __m512i __DEFAULT_FN_ATTRS 256_mm512_mullo_epi16 (__m512i __A, __m512i __B) { 257 return (__m512i) ((__v32hu) __A * (__v32hu) __B); 258} 259 260static __inline__ __m512i __DEFAULT_FN_ATTRS 261_mm512_mask_mullo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 262 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 263 (__v32hi)_mm512_mullo_epi16(__A, __B), 264 (__v32hi)__W); 265} 266 267static __inline__ __m512i __DEFAULT_FN_ATTRS 268_mm512_maskz_mullo_epi16(__mmask32 __U, __m512i __A, __m512i __B) { 269 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 270 (__v32hi)_mm512_mullo_epi16(__A, __B), 271 (__v32hi)_mm512_setzero_si512()); 272} 273 274static __inline__ __m512i __DEFAULT_FN_ATTRS 275_mm512_mask_blend_epi8 (__mmask64 __U, __m512i __A, __m512i __W) 276{ 277 return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, 278 (__v64qi) __W, 279 (__v64qi) __A); 280} 281 282static __inline__ __m512i __DEFAULT_FN_ATTRS 283_mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W) 284{ 285 return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, 286 (__v32hi) __W, 287 (__v32hi) __A); 288} 289 290static __inline__ __m512i __DEFAULT_FN_ATTRS 291_mm512_abs_epi8 (__m512i __A) 292{ 293 return (__m512i)__builtin_ia32_pabsb512((__v64qi)__A); 294} 295 296static __inline__ __m512i __DEFAULT_FN_ATTRS 297_mm512_mask_abs_epi8 (__m512i __W, __mmask64 __U, __m512i __A) 298{ 299 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 300 (__v64qi)_mm512_abs_epi8(__A), 301 (__v64qi)__W); 302} 303 304static __inline__ __m512i __DEFAULT_FN_ATTRS 305_mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A) 306{ 307 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 308 (__v64qi)_mm512_abs_epi8(__A), 309 (__v64qi)_mm512_setzero_si512()); 310} 311 312static __inline__ __m512i __DEFAULT_FN_ATTRS 313_mm512_abs_epi16 (__m512i __A) 314{ 315 return (__m512i)__builtin_ia32_pabsw512((__v32hi)__A); 316} 317 318static __inline__ __m512i __DEFAULT_FN_ATTRS 319_mm512_mask_abs_epi16 (__m512i __W, __mmask32 __U, __m512i __A) 320{ 321 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 322 (__v32hi)_mm512_abs_epi16(__A), 323 (__v32hi)__W); 324} 325 326static __inline__ __m512i __DEFAULT_FN_ATTRS 327_mm512_maskz_abs_epi16 (__mmask32 __U, __m512i __A) 328{ 329 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 330 (__v32hi)_mm512_abs_epi16(__A), 331 (__v32hi)_mm512_setzero_si512()); 332} 333 334static __inline__ __m512i __DEFAULT_FN_ATTRS 335_mm512_packs_epi32(__m512i __A, __m512i __B) 336{ 337 return (__m512i)__builtin_ia32_packssdw512((__v16si)__A, (__v16si)__B); 338} 339 340static __inline__ __m512i __DEFAULT_FN_ATTRS 341_mm512_maskz_packs_epi32(__mmask32 __M, __m512i __A, __m512i __B) 342{ 343 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 344 (__v32hi)_mm512_packs_epi32(__A, __B), 345 (__v32hi)_mm512_setzero_si512()); 346} 347 348static __inline__ __m512i __DEFAULT_FN_ATTRS 349_mm512_mask_packs_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) 350{ 351 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 352 (__v32hi)_mm512_packs_epi32(__A, __B), 353 (__v32hi)__W); 354} 355 356static __inline__ __m512i __DEFAULT_FN_ATTRS 357_mm512_packs_epi16(__m512i __A, __m512i __B) 358{ 359 return (__m512i)__builtin_ia32_packsswb512((__v32hi)__A, (__v32hi) __B); 360} 361 362static __inline__ __m512i __DEFAULT_FN_ATTRS 363_mm512_mask_packs_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 364{ 365 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 366 (__v64qi)_mm512_packs_epi16(__A, __B), 367 (__v64qi)__W); 368} 369 370static __inline__ __m512i __DEFAULT_FN_ATTRS 371_mm512_maskz_packs_epi16(__mmask64 __M, __m512i __A, __m512i __B) 372{ 373 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 374 (__v64qi)_mm512_packs_epi16(__A, __B), 375 (__v64qi)_mm512_setzero_si512()); 376} 377 378static __inline__ __m512i __DEFAULT_FN_ATTRS 379_mm512_packus_epi32(__m512i __A, __m512i __B) 380{ 381 return (__m512i)__builtin_ia32_packusdw512((__v16si) __A, (__v16si) __B); 382} 383 384static __inline__ __m512i __DEFAULT_FN_ATTRS 385_mm512_maskz_packus_epi32(__mmask32 __M, __m512i __A, __m512i __B) 386{ 387 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 388 (__v32hi)_mm512_packus_epi32(__A, __B), 389 (__v32hi)_mm512_setzero_si512()); 390} 391 392static __inline__ __m512i __DEFAULT_FN_ATTRS 393_mm512_mask_packus_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) 394{ 395 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 396 (__v32hi)_mm512_packus_epi32(__A, __B), 397 (__v32hi)__W); 398} 399 400static __inline__ __m512i __DEFAULT_FN_ATTRS 401_mm512_packus_epi16(__m512i __A, __m512i __B) 402{ 403 return (__m512i)__builtin_ia32_packuswb512((__v32hi) __A, (__v32hi) __B); 404} 405 406static __inline__ __m512i __DEFAULT_FN_ATTRS 407_mm512_mask_packus_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 408{ 409 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 410 (__v64qi)_mm512_packus_epi16(__A, __B), 411 (__v64qi)__W); 412} 413 414static __inline__ __m512i __DEFAULT_FN_ATTRS 415_mm512_maskz_packus_epi16(__mmask64 __M, __m512i __A, __m512i __B) 416{ 417 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 418 (__v64qi)_mm512_packus_epi16(__A, __B), 419 (__v64qi)_mm512_setzero_si512()); 420} 421 422static __inline__ __m512i __DEFAULT_FN_ATTRS 423_mm512_adds_epi8 (__m512i __A, __m512i __B) 424{ 425 return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A, 426 (__v64qi) __B, 427 (__v64qi) _mm512_setzero_si512(), 428 (__mmask64) -1); 429} 430 431static __inline__ __m512i __DEFAULT_FN_ATTRS 432_mm512_mask_adds_epi8 (__m512i __W, __mmask64 __U, __m512i __A, 433 __m512i __B) 434{ 435 return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A, 436 (__v64qi) __B, 437 (__v64qi) __W, 438 (__mmask64) __U); 439} 440 441static __inline__ __m512i __DEFAULT_FN_ATTRS 442_mm512_maskz_adds_epi8 (__mmask64 __U, __m512i __A, __m512i __B) 443{ 444 return (__m512i) __builtin_ia32_paddsb512_mask ((__v64qi) __A, 445 (__v64qi) __B, 446 (__v64qi) _mm512_setzero_si512(), 447 (__mmask64) __U); 448} 449 450static __inline__ __m512i __DEFAULT_FN_ATTRS 451_mm512_adds_epi16 (__m512i __A, __m512i __B) 452{ 453 return (__m512i) __builtin_ia32_paddsw512_mask ((__v32hi) __A, 454 (__v32hi) __B, 455 (__v32hi) _mm512_setzero_si512(), 456 (__mmask32) -1); 457} 458 459static __inline__ __m512i __DEFAULT_FN_ATTRS 460_mm512_mask_adds_epi16 (__m512i __W, __mmask32 __U, __m512i __A, 461 __m512i __B) 462{ 463 return (__m512i) __builtin_ia32_paddsw512_mask ((__v32hi) __A, 464 (__v32hi) __B, 465 (__v32hi) __W, 466 (__mmask32) __U); 467} 468 469static __inline__ __m512i __DEFAULT_FN_ATTRS 470_mm512_maskz_adds_epi16 (__mmask32 __U, __m512i __A, __m512i __B) 471{ 472 return (__m512i) __builtin_ia32_paddsw512_mask ((__v32hi) __A, 473 (__v32hi) __B, 474 (__v32hi) _mm512_setzero_si512(), 475 (__mmask32) __U); 476} 477 478static __inline__ __m512i __DEFAULT_FN_ATTRS 479_mm512_adds_epu8 (__m512i __A, __m512i __B) 480{ 481 return (__m512i) __builtin_ia32_paddusb512_mask ((__v64qi) __A, 482 (__v64qi) __B, 483 (__v64qi) _mm512_setzero_si512(), 484 (__mmask64) -1); 485} 486 487static __inline__ __m512i __DEFAULT_FN_ATTRS 488_mm512_mask_adds_epu8 (__m512i __W, __mmask64 __U, __m512i __A, 489 __m512i __B) 490{ 491 return (__m512i) __builtin_ia32_paddusb512_mask ((__v64qi) __A, 492 (__v64qi) __B, 493 (__v64qi) __W, 494 (__mmask64) __U); 495} 496 497static __inline__ __m512i __DEFAULT_FN_ATTRS 498_mm512_maskz_adds_epu8 (__mmask64 __U, __m512i __A, __m512i __B) 499{ 500 return (__m512i) __builtin_ia32_paddusb512_mask ((__v64qi) __A, 501 (__v64qi) __B, 502 (__v64qi) _mm512_setzero_si512(), 503 (__mmask64) __U); 504} 505 506static __inline__ __m512i __DEFAULT_FN_ATTRS 507_mm512_adds_epu16 (__m512i __A, __m512i __B) 508{ 509 return (__m512i) __builtin_ia32_paddusw512_mask ((__v32hi) __A, 510 (__v32hi) __B, 511 (__v32hi) _mm512_setzero_si512(), 512 (__mmask32) -1); 513} 514 515static __inline__ __m512i __DEFAULT_FN_ATTRS 516_mm512_mask_adds_epu16 (__m512i __W, __mmask32 __U, __m512i __A, 517 __m512i __B) 518{ 519 return (__m512i) __builtin_ia32_paddusw512_mask ((__v32hi) __A, 520 (__v32hi) __B, 521 (__v32hi) __W, 522 (__mmask32) __U); 523} 524 525static __inline__ __m512i __DEFAULT_FN_ATTRS 526_mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B) 527{ 528 return (__m512i) __builtin_ia32_paddusw512_mask ((__v32hi) __A, 529 (__v32hi) __B, 530 (__v32hi) _mm512_setzero_si512(), 531 (__mmask32) __U); 532} 533 534static __inline__ __m512i __DEFAULT_FN_ATTRS 535_mm512_avg_epu8 (__m512i __A, __m512i __B) 536{ 537 typedef unsigned short __v64hu __attribute__((__vector_size__(128))); 538 return (__m512i)__builtin_convertvector( 539 ((__builtin_convertvector((__v64qu) __A, __v64hu) + 540 __builtin_convertvector((__v64qu) __B, __v64hu)) + 1) 541 >> 1, __v64qu); 542} 543 544static __inline__ __m512i __DEFAULT_FN_ATTRS 545_mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A, 546 __m512i __B) 547{ 548 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 549 (__v64qi)_mm512_avg_epu8(__A, __B), 550 (__v64qi)__W); 551} 552 553static __inline__ __m512i __DEFAULT_FN_ATTRS 554_mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B) 555{ 556 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 557 (__v64qi)_mm512_avg_epu8(__A, __B), 558 (__v64qi)_mm512_setzero_si512()); 559} 560 561static __inline__ __m512i __DEFAULT_FN_ATTRS 562_mm512_avg_epu16 (__m512i __A, __m512i __B) 563{ 564 typedef unsigned int __v32su __attribute__((__vector_size__(128))); 565 return (__m512i)__builtin_convertvector( 566 ((__builtin_convertvector((__v32hu) __A, __v32su) + 567 __builtin_convertvector((__v32hu) __B, __v32su)) + 1) 568 >> 1, __v32hu); 569} 570 571static __inline__ __m512i __DEFAULT_FN_ATTRS 572_mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A, 573 __m512i __B) 574{ 575 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 576 (__v32hi)_mm512_avg_epu16(__A, __B), 577 (__v32hi)__W); 578} 579 580static __inline__ __m512i __DEFAULT_FN_ATTRS 581_mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B) 582{ 583 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 584 (__v32hi)_mm512_avg_epu16(__A, __B), 585 (__v32hi) _mm512_setzero_si512()); 586} 587 588static __inline__ __m512i __DEFAULT_FN_ATTRS 589_mm512_max_epi8 (__m512i __A, __m512i __B) 590{ 591 return (__m512i)__builtin_ia32_pmaxsb512((__v64qi) __A, (__v64qi) __B); 592} 593 594static __inline__ __m512i __DEFAULT_FN_ATTRS 595_mm512_maskz_max_epi8 (__mmask64 __M, __m512i __A, __m512i __B) 596{ 597 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 598 (__v64qi)_mm512_max_epi8(__A, __B), 599 (__v64qi)_mm512_setzero_si512()); 600} 601 602static __inline__ __m512i __DEFAULT_FN_ATTRS 603_mm512_mask_max_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 604{ 605 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 606 (__v64qi)_mm512_max_epi8(__A, __B), 607 (__v64qi)__W); 608} 609 610static __inline__ __m512i __DEFAULT_FN_ATTRS 611_mm512_max_epi16 (__m512i __A, __m512i __B) 612{ 613 return (__m512i)__builtin_ia32_pmaxsw512((__v32hi) __A, (__v32hi) __B); 614} 615 616static __inline__ __m512i __DEFAULT_FN_ATTRS 617_mm512_maskz_max_epi16 (__mmask32 __M, __m512i __A, __m512i __B) 618{ 619 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 620 (__v32hi)_mm512_max_epi16(__A, __B), 621 (__v32hi)_mm512_setzero_si512()); 622} 623 624static __inline__ __m512i __DEFAULT_FN_ATTRS 625_mm512_mask_max_epi16 (__m512i __W, __mmask32 __M, __m512i __A, 626 __m512i __B) 627{ 628 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 629 (__v32hi)_mm512_max_epi16(__A, __B), 630 (__v32hi)__W); 631} 632 633static __inline__ __m512i __DEFAULT_FN_ATTRS 634_mm512_max_epu8 (__m512i __A, __m512i __B) 635{ 636 return (__m512i)__builtin_ia32_pmaxub512((__v64qi)__A, (__v64qi)__B); 637} 638 639static __inline__ __m512i __DEFAULT_FN_ATTRS 640_mm512_maskz_max_epu8 (__mmask64 __M, __m512i __A, __m512i __B) 641{ 642 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 643 (__v64qi)_mm512_max_epu8(__A, __B), 644 (__v64qi)_mm512_setzero_si512()); 645} 646 647static __inline__ __m512i __DEFAULT_FN_ATTRS 648_mm512_mask_max_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 649{ 650 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 651 (__v64qi)_mm512_max_epu8(__A, __B), 652 (__v64qi)__W); 653} 654 655static __inline__ __m512i __DEFAULT_FN_ATTRS 656_mm512_max_epu16 (__m512i __A, __m512i __B) 657{ 658 return (__m512i)__builtin_ia32_pmaxuw512((__v32hi)__A, (__v32hi)__B); 659} 660 661static __inline__ __m512i __DEFAULT_FN_ATTRS 662_mm512_maskz_max_epu16 (__mmask32 __M, __m512i __A, __m512i __B) 663{ 664 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 665 (__v32hi)_mm512_max_epu16(__A, __B), 666 (__v32hi)_mm512_setzero_si512()); 667} 668 669static __inline__ __m512i __DEFAULT_FN_ATTRS 670_mm512_mask_max_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) 671{ 672 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 673 (__v32hi)_mm512_max_epu16(__A, __B), 674 (__v32hi)__W); 675} 676 677static __inline__ __m512i __DEFAULT_FN_ATTRS 678_mm512_min_epi8 (__m512i __A, __m512i __B) 679{ 680 return (__m512i)__builtin_ia32_pminsb512((__v64qi) __A, (__v64qi) __B); 681} 682 683static __inline__ __m512i __DEFAULT_FN_ATTRS 684_mm512_maskz_min_epi8 (__mmask64 __M, __m512i __A, __m512i __B) 685{ 686 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 687 (__v64qi)_mm512_min_epi8(__A, __B), 688 (__v64qi)_mm512_setzero_si512()); 689} 690 691static __inline__ __m512i __DEFAULT_FN_ATTRS 692_mm512_mask_min_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 693{ 694 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 695 (__v64qi)_mm512_min_epi8(__A, __B), 696 (__v64qi)__W); 697} 698 699static __inline__ __m512i __DEFAULT_FN_ATTRS 700_mm512_min_epi16 (__m512i __A, __m512i __B) 701{ 702 return (__m512i)__builtin_ia32_pminsw512((__v32hi) __A, (__v32hi) __B); 703} 704 705static __inline__ __m512i __DEFAULT_FN_ATTRS 706_mm512_maskz_min_epi16 (__mmask32 __M, __m512i __A, __m512i __B) 707{ 708 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 709 (__v32hi)_mm512_min_epi16(__A, __B), 710 (__v32hi)_mm512_setzero_si512()); 711} 712 713static __inline__ __m512i __DEFAULT_FN_ATTRS 714_mm512_mask_min_epi16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) 715{ 716 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 717 (__v32hi)_mm512_min_epi16(__A, __B), 718 (__v32hi)__W); 719} 720 721static __inline__ __m512i __DEFAULT_FN_ATTRS 722_mm512_min_epu8 (__m512i __A, __m512i __B) 723{ 724 return (__m512i)__builtin_ia32_pminub512((__v64qi)__A, (__v64qi)__B); 725} 726 727static __inline__ __m512i __DEFAULT_FN_ATTRS 728_mm512_maskz_min_epu8 (__mmask64 __M, __m512i __A, __m512i __B) 729{ 730 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 731 (__v64qi)_mm512_min_epu8(__A, __B), 732 (__v64qi)_mm512_setzero_si512()); 733} 734 735static __inline__ __m512i __DEFAULT_FN_ATTRS 736_mm512_mask_min_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) 737{ 738 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M, 739 (__v64qi)_mm512_min_epu8(__A, __B), 740 (__v64qi)__W); 741} 742 743static __inline__ __m512i __DEFAULT_FN_ATTRS 744_mm512_min_epu16 (__m512i __A, __m512i __B) 745{ 746 return (__m512i)__builtin_ia32_pminuw512((__v32hi)__A, (__v32hi)__B); 747} 748 749static __inline__ __m512i __DEFAULT_FN_ATTRS 750_mm512_maskz_min_epu16 (__mmask32 __M, __m512i __A, __m512i __B) 751{ 752 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 753 (__v32hi)_mm512_min_epu16(__A, __B), 754 (__v32hi)_mm512_setzero_si512()); 755} 756 757static __inline__ __m512i __DEFAULT_FN_ATTRS 758_mm512_mask_min_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) 759{ 760 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 761 (__v32hi)_mm512_min_epu16(__A, __B), 762 (__v32hi)__W); 763} 764 765static __inline__ __m512i __DEFAULT_FN_ATTRS 766_mm512_shuffle_epi8(__m512i __A, __m512i __B) 767{ 768 return (__m512i)__builtin_ia32_pshufb512((__v64qi)__A,(__v64qi)__B); 769} 770 771static __inline__ __m512i __DEFAULT_FN_ATTRS 772_mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) 773{ 774 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 775 (__v64qi)_mm512_shuffle_epi8(__A, __B), 776 (__v64qi)__W); 777} 778 779static __inline__ __m512i __DEFAULT_FN_ATTRS 780_mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B) 781{ 782 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 783 (__v64qi)_mm512_shuffle_epi8(__A, __B), 784 (__v64qi)_mm512_setzero_si512()); 785} 786 787static __inline__ __m512i __DEFAULT_FN_ATTRS 788_mm512_subs_epi8 (__m512i __A, __m512i __B) 789{ 790 return (__m512i) __builtin_ia32_psubsb512_mask ((__v64qi) __A, 791 (__v64qi) __B, 792 (__v64qi) _mm512_setzero_si512(), 793 (__mmask64) -1); 794} 795 796static __inline__ __m512i __DEFAULT_FN_ATTRS 797_mm512_mask_subs_epi8 (__m512i __W, __mmask64 __U, __m512i __A, 798 __m512i __B) 799{ 800 return (__m512i) __builtin_ia32_psubsb512_mask ((__v64qi) __A, 801 (__v64qi) __B, 802 (__v64qi) __W, 803 (__mmask64) __U); 804} 805 806static __inline__ __m512i __DEFAULT_FN_ATTRS 807_mm512_maskz_subs_epi8 (__mmask64 __U, __m512i __A, __m512i __B) 808{ 809 return (__m512i) __builtin_ia32_psubsb512_mask ((__v64qi) __A, 810 (__v64qi) __B, 811 (__v64qi) _mm512_setzero_si512(), 812 (__mmask64) __U); 813} 814 815static __inline__ __m512i __DEFAULT_FN_ATTRS 816_mm512_subs_epi16 (__m512i __A, __m512i __B) 817{ 818 return (__m512i) __builtin_ia32_psubsw512_mask ((__v32hi) __A, 819 (__v32hi) __B, 820 (__v32hi) _mm512_setzero_si512(), 821 (__mmask32) -1); 822} 823 824static __inline__ __m512i __DEFAULT_FN_ATTRS 825_mm512_mask_subs_epi16 (__m512i __W, __mmask32 __U, __m512i __A, 826 __m512i __B) 827{ 828 return (__m512i) __builtin_ia32_psubsw512_mask ((__v32hi) __A, 829 (__v32hi) __B, 830 (__v32hi) __W, 831 (__mmask32) __U); 832} 833 834static __inline__ __m512i __DEFAULT_FN_ATTRS 835_mm512_maskz_subs_epi16 (__mmask32 __U, __m512i __A, __m512i __B) 836{ 837 return (__m512i) __builtin_ia32_psubsw512_mask ((__v32hi) __A, 838 (__v32hi) __B, 839 (__v32hi) _mm512_setzero_si512(), 840 (__mmask32) __U); 841} 842 843static __inline__ __m512i __DEFAULT_FN_ATTRS 844_mm512_subs_epu8 (__m512i __A, __m512i __B) 845{ 846 return (__m512i) __builtin_ia32_psubusb512_mask ((__v64qi) __A, 847 (__v64qi) __B, 848 (__v64qi) _mm512_setzero_si512(), 849 (__mmask64) -1); 850} 851 852static __inline__ __m512i __DEFAULT_FN_ATTRS 853_mm512_mask_subs_epu8 (__m512i __W, __mmask64 __U, __m512i __A, 854 __m512i __B) 855{ 856 return (__m512i) __builtin_ia32_psubusb512_mask ((__v64qi) __A, 857 (__v64qi) __B, 858 (__v64qi) __W, 859 (__mmask64) __U); 860} 861 862static __inline__ __m512i __DEFAULT_FN_ATTRS 863_mm512_maskz_subs_epu8 (__mmask64 __U, __m512i __A, __m512i __B) 864{ 865 return (__m512i) __builtin_ia32_psubusb512_mask ((__v64qi) __A, 866 (__v64qi) __B, 867 (__v64qi) _mm512_setzero_si512(), 868 (__mmask64) __U); 869} 870 871static __inline__ __m512i __DEFAULT_FN_ATTRS 872_mm512_subs_epu16 (__m512i __A, __m512i __B) 873{ 874 return (__m512i) __builtin_ia32_psubusw512_mask ((__v32hi) __A, 875 (__v32hi) __B, 876 (__v32hi) _mm512_setzero_si512(), 877 (__mmask32) -1); 878} 879 880static __inline__ __m512i __DEFAULT_FN_ATTRS 881_mm512_mask_subs_epu16 (__m512i __W, __mmask32 __U, __m512i __A, 882 __m512i __B) 883{ 884 return (__m512i) __builtin_ia32_psubusw512_mask ((__v32hi) __A, 885 (__v32hi) __B, 886 (__v32hi) __W, 887 (__mmask32) __U); 888} 889 890static __inline__ __m512i __DEFAULT_FN_ATTRS 891_mm512_maskz_subs_epu16 (__mmask32 __U, __m512i __A, __m512i __B) 892{ 893 return (__m512i) __builtin_ia32_psubusw512_mask ((__v32hi) __A, 894 (__v32hi) __B, 895 (__v32hi) _mm512_setzero_si512(), 896 (__mmask32) __U); 897} 898 899static __inline__ __m512i __DEFAULT_FN_ATTRS 900_mm512_permutex2var_epi16(__m512i __A, __m512i __I, __m512i __B) 901{ 902 return (__m512i)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I, 903 (__v32hi)__B); 904} 905 906static __inline__ __m512i __DEFAULT_FN_ATTRS 907_mm512_mask_permutex2var_epi16(__m512i __A, __mmask32 __U, __m512i __I, 908 __m512i __B) 909{ 910 return (__m512i)__builtin_ia32_selectw_512(__U, 911 (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B), 912 (__v32hi)__A); 913} 914 915static __inline__ __m512i __DEFAULT_FN_ATTRS 916_mm512_mask2_permutex2var_epi16(__m512i __A, __m512i __I, __mmask32 __U, 917 __m512i __B) 918{ 919 return (__m512i)__builtin_ia32_selectw_512(__U, 920 (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B), 921 (__v32hi)__I); 922} 923 924static __inline__ __m512i __DEFAULT_FN_ATTRS 925_mm512_maskz_permutex2var_epi16(__mmask32 __U, __m512i __A, __m512i __I, 926 __m512i __B) 927{ 928 return (__m512i)__builtin_ia32_selectw_512(__U, 929 (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B), 930 (__v32hi)_mm512_setzero_si512()); 931} 932 933static __inline__ __m512i __DEFAULT_FN_ATTRS 934_mm512_mulhrs_epi16(__m512i __A, __m512i __B) 935{ 936 return (__m512i)__builtin_ia32_pmulhrsw512((__v32hi)__A, (__v32hi)__B); 937} 938 939static __inline__ __m512i __DEFAULT_FN_ATTRS 940_mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 941{ 942 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 943 (__v32hi)_mm512_mulhrs_epi16(__A, __B), 944 (__v32hi)__W); 945} 946 947static __inline__ __m512i __DEFAULT_FN_ATTRS 948_mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B) 949{ 950 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 951 (__v32hi)_mm512_mulhrs_epi16(__A, __B), 952 (__v32hi)_mm512_setzero_si512()); 953} 954 955static __inline__ __m512i __DEFAULT_FN_ATTRS 956_mm512_mulhi_epi16(__m512i __A, __m512i __B) 957{ 958 return (__m512i)__builtin_ia32_pmulhw512((__v32hi) __A, (__v32hi) __B); 959} 960 961static __inline__ __m512i __DEFAULT_FN_ATTRS 962_mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, 963 __m512i __B) 964{ 965 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 966 (__v32hi)_mm512_mulhi_epi16(__A, __B), 967 (__v32hi)__W); 968} 969 970static __inline__ __m512i __DEFAULT_FN_ATTRS 971_mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) 972{ 973 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 974 (__v32hi)_mm512_mulhi_epi16(__A, __B), 975 (__v32hi)_mm512_setzero_si512()); 976} 977 978static __inline__ __m512i __DEFAULT_FN_ATTRS 979_mm512_mulhi_epu16(__m512i __A, __m512i __B) 980{ 981 return (__m512i)__builtin_ia32_pmulhuw512((__v32hi) __A, (__v32hi) __B); 982} 983 984static __inline__ __m512i __DEFAULT_FN_ATTRS 985_mm512_mask_mulhi_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 986{ 987 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 988 (__v32hi)_mm512_mulhi_epu16(__A, __B), 989 (__v32hi)__W); 990} 991 992static __inline__ __m512i __DEFAULT_FN_ATTRS 993_mm512_maskz_mulhi_epu16 (__mmask32 __U, __m512i __A, __m512i __B) 994{ 995 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 996 (__v32hi)_mm512_mulhi_epu16(__A, __B), 997 (__v32hi)_mm512_setzero_si512()); 998} 999 1000static __inline__ __m512i __DEFAULT_FN_ATTRS 1001_mm512_maddubs_epi16(__m512i __X, __m512i __Y) { 1002 return (__m512i)__builtin_ia32_pmaddubsw512((__v64qi)__X, (__v64qi)__Y); 1003} 1004 1005static __inline__ __m512i __DEFAULT_FN_ATTRS 1006_mm512_mask_maddubs_epi16(__m512i __W, __mmask32 __U, __m512i __X, 1007 __m512i __Y) { 1008 return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U, 1009 (__v32hi)_mm512_maddubs_epi16(__X, __Y), 1010 (__v32hi)__W); 1011} 1012 1013static __inline__ __m512i __DEFAULT_FN_ATTRS 1014_mm512_maskz_maddubs_epi16(__mmask32 __U, __m512i __X, __m512i __Y) { 1015 return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U, 1016 (__v32hi)_mm512_maddubs_epi16(__X, __Y), 1017 (__v32hi)_mm512_setzero_si512()); 1018} 1019 1020static __inline__ __m512i __DEFAULT_FN_ATTRS 1021_mm512_madd_epi16(__m512i __A, __m512i __B) { 1022 return (__m512i)__builtin_ia32_pmaddwd512((__v32hi)__A, (__v32hi)__B); 1023} 1024 1025static __inline__ __m512i __DEFAULT_FN_ATTRS 1026_mm512_mask_madd_epi16(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { 1027 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 1028 (__v16si)_mm512_madd_epi16(__A, __B), 1029 (__v16si)__W); 1030} 1031 1032static __inline__ __m512i __DEFAULT_FN_ATTRS 1033_mm512_maskz_madd_epi16(__mmask16 __U, __m512i __A, __m512i __B) { 1034 return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, 1035 (__v16si)_mm512_madd_epi16(__A, __B), 1036 (__v16si)_mm512_setzero_si512()); 1037} 1038 1039static __inline__ __m256i __DEFAULT_FN_ATTRS 1040_mm512_cvtsepi16_epi8 (__m512i __A) { 1041 return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A, 1042 (__v32qi)_mm256_setzero_si256(), 1043 (__mmask32) -1); 1044} 1045 1046static __inline__ __m256i __DEFAULT_FN_ATTRS 1047_mm512_mask_cvtsepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) { 1048 return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A, 1049 (__v32qi)__O, 1050 __M); 1051} 1052 1053static __inline__ __m256i __DEFAULT_FN_ATTRS 1054_mm512_maskz_cvtsepi16_epi8 (__mmask32 __M, __m512i __A) { 1055 return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A, 1056 (__v32qi) _mm256_setzero_si256(), 1057 __M); 1058} 1059 1060static __inline__ __m256i __DEFAULT_FN_ATTRS 1061_mm512_cvtusepi16_epi8 (__m512i __A) { 1062 return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A, 1063 (__v32qi) _mm256_setzero_si256(), 1064 (__mmask32) -1); 1065} 1066 1067static __inline__ __m256i __DEFAULT_FN_ATTRS 1068_mm512_mask_cvtusepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) { 1069 return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A, 1070 (__v32qi) __O, 1071 __M); 1072} 1073 1074static __inline__ __m256i __DEFAULT_FN_ATTRS 1075_mm512_maskz_cvtusepi16_epi8 (__mmask32 __M, __m512i __A) { 1076 return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A, 1077 (__v32qi) _mm256_setzero_si256(), 1078 __M); 1079} 1080 1081static __inline__ __m256i __DEFAULT_FN_ATTRS 1082_mm512_cvtepi16_epi8 (__m512i __A) { 1083 return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A, 1084 (__v32qi) _mm256_undefined_si256(), 1085 (__mmask32) -1); 1086} 1087 1088static __inline__ __m256i __DEFAULT_FN_ATTRS 1089_mm512_mask_cvtepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) { 1090 return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A, 1091 (__v32qi) __O, 1092 __M); 1093} 1094 1095static __inline__ __m256i __DEFAULT_FN_ATTRS 1096_mm512_maskz_cvtepi16_epi8 (__mmask32 __M, __m512i __A) { 1097 return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A, 1098 (__v32qi) _mm256_setzero_si256(), 1099 __M); 1100} 1101 1102static __inline__ void __DEFAULT_FN_ATTRS 1103_mm512_mask_cvtepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) 1104{ 1105 __builtin_ia32_pmovwb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M); 1106} 1107 1108static __inline__ void __DEFAULT_FN_ATTRS 1109_mm512_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) 1110{ 1111 __builtin_ia32_pmovswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M); 1112} 1113 1114static __inline__ void __DEFAULT_FN_ATTRS 1115_mm512_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A) 1116{ 1117 __builtin_ia32_pmovuswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M); 1118} 1119 1120static __inline__ __m512i __DEFAULT_FN_ATTRS 1121_mm512_unpackhi_epi8(__m512i __A, __m512i __B) { 1122 return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B, 1123 8, 64+8, 9, 64+9, 1124 10, 64+10, 11, 64+11, 1125 12, 64+12, 13, 64+13, 1126 14, 64+14, 15, 64+15, 1127 24, 64+24, 25, 64+25, 1128 26, 64+26, 27, 64+27, 1129 28, 64+28, 29, 64+29, 1130 30, 64+30, 31, 64+31, 1131 40, 64+40, 41, 64+41, 1132 42, 64+42, 43, 64+43, 1133 44, 64+44, 45, 64+45, 1134 46, 64+46, 47, 64+47, 1135 56, 64+56, 57, 64+57, 1136 58, 64+58, 59, 64+59, 1137 60, 64+60, 61, 64+61, 1138 62, 64+62, 63, 64+63); 1139} 1140 1141static __inline__ __m512i __DEFAULT_FN_ATTRS 1142_mm512_mask_unpackhi_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { 1143 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 1144 (__v64qi)_mm512_unpackhi_epi8(__A, __B), 1145 (__v64qi)__W); 1146} 1147 1148static __inline__ __m512i __DEFAULT_FN_ATTRS 1149_mm512_maskz_unpackhi_epi8(__mmask64 __U, __m512i __A, __m512i __B) { 1150 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 1151 (__v64qi)_mm512_unpackhi_epi8(__A, __B), 1152 (__v64qi)_mm512_setzero_si512()); 1153} 1154 1155static __inline__ __m512i __DEFAULT_FN_ATTRS 1156_mm512_unpackhi_epi16(__m512i __A, __m512i __B) { 1157 return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B, 1158 4, 32+4, 5, 32+5, 1159 6, 32+6, 7, 32+7, 1160 12, 32+12, 13, 32+13, 1161 14, 32+14, 15, 32+15, 1162 20, 32+20, 21, 32+21, 1163 22, 32+22, 23, 32+23, 1164 28, 32+28, 29, 32+29, 1165 30, 32+30, 31, 32+31); 1166} 1167 1168static __inline__ __m512i __DEFAULT_FN_ATTRS 1169_mm512_mask_unpackhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 1170 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1171 (__v32hi)_mm512_unpackhi_epi16(__A, __B), 1172 (__v32hi)__W); 1173} 1174 1175static __inline__ __m512i __DEFAULT_FN_ATTRS 1176_mm512_maskz_unpackhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { 1177 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1178 (__v32hi)_mm512_unpackhi_epi16(__A, __B), 1179 (__v32hi)_mm512_setzero_si512()); 1180} 1181 1182static __inline__ __m512i __DEFAULT_FN_ATTRS 1183_mm512_unpacklo_epi8(__m512i __A, __m512i __B) { 1184 return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B, 1185 0, 64+0, 1, 64+1, 1186 2, 64+2, 3, 64+3, 1187 4, 64+4, 5, 64+5, 1188 6, 64+6, 7, 64+7, 1189 16, 64+16, 17, 64+17, 1190 18, 64+18, 19, 64+19, 1191 20, 64+20, 21, 64+21, 1192 22, 64+22, 23, 64+23, 1193 32, 64+32, 33, 64+33, 1194 34, 64+34, 35, 64+35, 1195 36, 64+36, 37, 64+37, 1196 38, 64+38, 39, 64+39, 1197 48, 64+48, 49, 64+49, 1198 50, 64+50, 51, 64+51, 1199 52, 64+52, 53, 64+53, 1200 54, 64+54, 55, 64+55); 1201} 1202 1203static __inline__ __m512i __DEFAULT_FN_ATTRS 1204_mm512_mask_unpacklo_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { 1205 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 1206 (__v64qi)_mm512_unpacklo_epi8(__A, __B), 1207 (__v64qi)__W); 1208} 1209 1210static __inline__ __m512i __DEFAULT_FN_ATTRS 1211_mm512_maskz_unpacklo_epi8(__mmask64 __U, __m512i __A, __m512i __B) { 1212 return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U, 1213 (__v64qi)_mm512_unpacklo_epi8(__A, __B), 1214 (__v64qi)_mm512_setzero_si512()); 1215} 1216 1217static __inline__ __m512i __DEFAULT_FN_ATTRS 1218_mm512_unpacklo_epi16(__m512i __A, __m512i __B) { 1219 return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B, 1220 0, 32+0, 1, 32+1, 1221 2, 32+2, 3, 32+3, 1222 8, 32+8, 9, 32+9, 1223 10, 32+10, 11, 32+11, 1224 16, 32+16, 17, 32+17, 1225 18, 32+18, 19, 32+19, 1226 24, 32+24, 25, 32+25, 1227 26, 32+26, 27, 32+27); 1228} 1229 1230static __inline__ __m512i __DEFAULT_FN_ATTRS 1231_mm512_mask_unpacklo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { 1232 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1233 (__v32hi)_mm512_unpacklo_epi16(__A, __B), 1234 (__v32hi)__W); 1235} 1236 1237static __inline__ __m512i __DEFAULT_FN_ATTRS 1238_mm512_maskz_unpacklo_epi16(__mmask32 __U, __m512i __A, __m512i __B) { 1239 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1240 (__v32hi)_mm512_unpacklo_epi16(__A, __B), 1241 (__v32hi)_mm512_setzero_si512()); 1242} 1243 1244static __inline__ __m512i __DEFAULT_FN_ATTRS 1245_mm512_cvtepi8_epi16(__m256i __A) 1246{ 1247 /* This function always performs a signed extension, but __v32qi is a char 1248 which may be signed or unsigned, so use __v32qs. */ 1249 return (__m512i)__builtin_convertvector((__v32qs)__A, __v32hi); 1250} 1251 1252static __inline__ __m512i __DEFAULT_FN_ATTRS 1253_mm512_mask_cvtepi8_epi16(__m512i __W, __mmask32 __U, __m256i __A) 1254{ 1255 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1256 (__v32hi)_mm512_cvtepi8_epi16(__A), 1257 (__v32hi)__W); 1258} 1259 1260static __inline__ __m512i __DEFAULT_FN_ATTRS 1261_mm512_maskz_cvtepi8_epi16(__mmask32 __U, __m256i __A) 1262{ 1263 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1264 (__v32hi)_mm512_cvtepi8_epi16(__A), 1265 (__v32hi)_mm512_setzero_si512()); 1266} 1267 1268static __inline__ __m512i __DEFAULT_FN_ATTRS 1269_mm512_cvtepu8_epi16(__m256i __A) 1270{ 1271 return (__m512i)__builtin_convertvector((__v32qu)__A, __v32hi); 1272} 1273 1274static __inline__ __m512i __DEFAULT_FN_ATTRS 1275_mm512_mask_cvtepu8_epi16(__m512i __W, __mmask32 __U, __m256i __A) 1276{ 1277 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1278 (__v32hi)_mm512_cvtepu8_epi16(__A), 1279 (__v32hi)__W); 1280} 1281 1282static __inline__ __m512i __DEFAULT_FN_ATTRS 1283_mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A) 1284{ 1285 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1286 (__v32hi)_mm512_cvtepu8_epi16(__A), 1287 (__v32hi)_mm512_setzero_si512()); 1288} 1289 1290 1291#define _mm512_shufflehi_epi16(A, imm) \ 1292 (__m512i)__builtin_ia32_pshufhw512((__v32hi)(__m512i)(A), (int)(imm)) 1293 1294#define _mm512_mask_shufflehi_epi16(W, U, A, imm) \ 1295 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 1296 (__v32hi)_mm512_shufflehi_epi16((A), \ 1297 (imm)), \ 1298 (__v32hi)(__m512i)(W)) 1299 1300#define _mm512_maskz_shufflehi_epi16(U, A, imm) \ 1301 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 1302 (__v32hi)_mm512_shufflehi_epi16((A), \ 1303 (imm)), \ 1304 (__v32hi)_mm512_setzero_si512()) 1305 1306#define _mm512_shufflelo_epi16(A, imm) \ 1307 (__m512i)__builtin_ia32_pshuflw512((__v32hi)(__m512i)(A), (int)(imm)) 1308 1309 1310#define _mm512_mask_shufflelo_epi16(W, U, A, imm) \ 1311 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 1312 (__v32hi)_mm512_shufflelo_epi16((A), \ 1313 (imm)), \ 1314 (__v32hi)(__m512i)(W)) 1315 1316 1317#define _mm512_maskz_shufflelo_epi16(U, A, imm) \ 1318 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 1319 (__v32hi)_mm512_shufflelo_epi16((A), \ 1320 (imm)), \ 1321 (__v32hi)_mm512_setzero_si512()) 1322 1323static __inline__ __m512i __DEFAULT_FN_ATTRS 1324_mm512_sllv_epi16(__m512i __A, __m512i __B) 1325{ 1326 return (__m512i)__builtin_ia32_psllv32hi((__v32hi) __A, (__v32hi) __B); 1327} 1328 1329static __inline__ __m512i __DEFAULT_FN_ATTRS 1330_mm512_mask_sllv_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 1331{ 1332 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1333 (__v32hi)_mm512_sllv_epi16(__A, __B), 1334 (__v32hi)__W); 1335} 1336 1337static __inline__ __m512i __DEFAULT_FN_ATTRS 1338_mm512_maskz_sllv_epi16(__mmask32 __U, __m512i __A, __m512i __B) 1339{ 1340 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1341 (__v32hi)_mm512_sllv_epi16(__A, __B), 1342 (__v32hi)_mm512_setzero_si512()); 1343} 1344 1345static __inline__ __m512i __DEFAULT_FN_ATTRS 1346_mm512_sll_epi16(__m512i __A, __m128i __B) 1347{ 1348 return (__m512i)__builtin_ia32_psllw512((__v32hi) __A, (__v8hi) __B); 1349} 1350 1351static __inline__ __m512i __DEFAULT_FN_ATTRS 1352_mm512_mask_sll_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) 1353{ 1354 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1355 (__v32hi)_mm512_sll_epi16(__A, __B), 1356 (__v32hi)__W); 1357} 1358 1359static __inline__ __m512i __DEFAULT_FN_ATTRS 1360_mm512_maskz_sll_epi16(__mmask32 __U, __m512i __A, __m128i __B) 1361{ 1362 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1363 (__v32hi)_mm512_sll_epi16(__A, __B), 1364 (__v32hi)_mm512_setzero_si512()); 1365} 1366 1367static __inline__ __m512i __DEFAULT_FN_ATTRS 1368_mm512_slli_epi16(__m512i __A, int __B) 1369{ 1370 return (__m512i)__builtin_ia32_psllwi512((__v32hi)__A, __B); 1371} 1372 1373static __inline__ __m512i __DEFAULT_FN_ATTRS 1374_mm512_mask_slli_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B) 1375{ 1376 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1377 (__v32hi)_mm512_slli_epi16(__A, __B), 1378 (__v32hi)__W); 1379} 1380 1381static __inline__ __m512i __DEFAULT_FN_ATTRS 1382_mm512_maskz_slli_epi16(__mmask32 __U, __m512i __A, int __B) 1383{ 1384 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1385 (__v32hi)_mm512_slli_epi16(__A, __B), 1386 (__v32hi)_mm512_setzero_si512()); 1387} 1388 1389#define _mm512_bslli_epi128(a, imm) \ 1390 (__m512i)__builtin_ia32_pslldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm)) 1391 1392static __inline__ __m512i __DEFAULT_FN_ATTRS 1393_mm512_srlv_epi16(__m512i __A, __m512i __B) 1394{ 1395 return (__m512i)__builtin_ia32_psrlv32hi((__v32hi)__A, (__v32hi)__B); 1396} 1397 1398static __inline__ __m512i __DEFAULT_FN_ATTRS 1399_mm512_mask_srlv_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 1400{ 1401 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1402 (__v32hi)_mm512_srlv_epi16(__A, __B), 1403 (__v32hi)__W); 1404} 1405 1406static __inline__ __m512i __DEFAULT_FN_ATTRS 1407_mm512_maskz_srlv_epi16(__mmask32 __U, __m512i __A, __m512i __B) 1408{ 1409 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1410 (__v32hi)_mm512_srlv_epi16(__A, __B), 1411 (__v32hi)_mm512_setzero_si512()); 1412} 1413 1414static __inline__ __m512i __DEFAULT_FN_ATTRS 1415_mm512_srav_epi16(__m512i __A, __m512i __B) 1416{ 1417 return (__m512i)__builtin_ia32_psrav32hi((__v32hi)__A, (__v32hi)__B); 1418} 1419 1420static __inline__ __m512i __DEFAULT_FN_ATTRS 1421_mm512_mask_srav_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) 1422{ 1423 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1424 (__v32hi)_mm512_srav_epi16(__A, __B), 1425 (__v32hi)__W); 1426} 1427 1428static __inline__ __m512i __DEFAULT_FN_ATTRS 1429_mm512_maskz_srav_epi16(__mmask32 __U, __m512i __A, __m512i __B) 1430{ 1431 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1432 (__v32hi)_mm512_srav_epi16(__A, __B), 1433 (__v32hi)_mm512_setzero_si512()); 1434} 1435 1436static __inline__ __m512i __DEFAULT_FN_ATTRS 1437_mm512_sra_epi16(__m512i __A, __m128i __B) 1438{ 1439 return (__m512i)__builtin_ia32_psraw512((__v32hi) __A, (__v8hi) __B); 1440} 1441 1442static __inline__ __m512i __DEFAULT_FN_ATTRS 1443_mm512_mask_sra_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) 1444{ 1445 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1446 (__v32hi)_mm512_sra_epi16(__A, __B), 1447 (__v32hi)__W); 1448} 1449 1450static __inline__ __m512i __DEFAULT_FN_ATTRS 1451_mm512_maskz_sra_epi16(__mmask32 __U, __m512i __A, __m128i __B) 1452{ 1453 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1454 (__v32hi)_mm512_sra_epi16(__A, __B), 1455 (__v32hi)_mm512_setzero_si512()); 1456} 1457 1458static __inline__ __m512i __DEFAULT_FN_ATTRS 1459_mm512_srai_epi16(__m512i __A, int __B) 1460{ 1461 return (__m512i)__builtin_ia32_psrawi512((__v32hi)__A, __B); 1462} 1463 1464static __inline__ __m512i __DEFAULT_FN_ATTRS 1465_mm512_mask_srai_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B) 1466{ 1467 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1468 (__v32hi)_mm512_srai_epi16(__A, __B), 1469 (__v32hi)__W); 1470} 1471 1472static __inline__ __m512i __DEFAULT_FN_ATTRS 1473_mm512_maskz_srai_epi16(__mmask32 __U, __m512i __A, int __B) 1474{ 1475 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1476 (__v32hi)_mm512_srai_epi16(__A, __B), 1477 (__v32hi)_mm512_setzero_si512()); 1478} 1479 1480static __inline__ __m512i __DEFAULT_FN_ATTRS 1481_mm512_srl_epi16(__m512i __A, __m128i __B) 1482{ 1483 return (__m512i)__builtin_ia32_psrlw512((__v32hi) __A, (__v8hi) __B); 1484} 1485 1486static __inline__ __m512i __DEFAULT_FN_ATTRS 1487_mm512_mask_srl_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B) 1488{ 1489 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1490 (__v32hi)_mm512_srl_epi16(__A, __B), 1491 (__v32hi)__W); 1492} 1493 1494static __inline__ __m512i __DEFAULT_FN_ATTRS 1495_mm512_maskz_srl_epi16(__mmask32 __U, __m512i __A, __m128i __B) 1496{ 1497 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1498 (__v32hi)_mm512_srl_epi16(__A, __B), 1499 (__v32hi)_mm512_setzero_si512()); 1500} 1501 1502static __inline__ __m512i __DEFAULT_FN_ATTRS 1503_mm512_srli_epi16(__m512i __A, int __B) 1504{ 1505 return (__m512i)__builtin_ia32_psrlwi512((__v32hi)__A, __B); 1506} 1507 1508static __inline__ __m512i __DEFAULT_FN_ATTRS 1509_mm512_mask_srli_epi16(__m512i __W, __mmask32 __U, __m512i __A, int __B) 1510{ 1511 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1512 (__v32hi)_mm512_srli_epi16(__A, __B), 1513 (__v32hi)__W); 1514} 1515 1516static __inline__ __m512i __DEFAULT_FN_ATTRS 1517_mm512_maskz_srli_epi16(__mmask32 __U, __m512i __A, int __B) 1518{ 1519 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U, 1520 (__v32hi)_mm512_srli_epi16(__A, __B), 1521 (__v32hi)_mm512_setzero_si512()); 1522} 1523 1524#define _mm512_bsrli_epi128(a, imm) \ 1525 (__m512i)__builtin_ia32_psrldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm)) 1526 1527static __inline__ __m512i __DEFAULT_FN_ATTRS 1528_mm512_mask_mov_epi16 (__m512i __W, __mmask32 __U, __m512i __A) 1529{ 1530 return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, 1531 (__v32hi) __A, 1532 (__v32hi) __W); 1533} 1534 1535static __inline__ __m512i __DEFAULT_FN_ATTRS 1536_mm512_maskz_mov_epi16 (__mmask32 __U, __m512i __A) 1537{ 1538 return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U, 1539 (__v32hi) __A, 1540 (__v32hi) _mm512_setzero_si512 ()); 1541} 1542 1543static __inline__ __m512i __DEFAULT_FN_ATTRS 1544_mm512_mask_mov_epi8 (__m512i __W, __mmask64 __U, __m512i __A) 1545{ 1546 return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, 1547 (__v64qi) __A, 1548 (__v64qi) __W); 1549} 1550 1551static __inline__ __m512i __DEFAULT_FN_ATTRS 1552_mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A) 1553{ 1554 return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U, 1555 (__v64qi) __A, 1556 (__v64qi) _mm512_setzero_si512 ()); 1557} 1558 1559static __inline__ __m512i __DEFAULT_FN_ATTRS 1560_mm512_mask_set1_epi8 (__m512i __O, __mmask64 __M, char __A) 1561{ 1562 return (__m512i) __builtin_ia32_selectb_512(__M, 1563 (__v64qi)_mm512_set1_epi8(__A), 1564 (__v64qi) __O); 1565} 1566 1567static __inline__ __m512i __DEFAULT_FN_ATTRS 1568_mm512_maskz_set1_epi8 (__mmask64 __M, char __A) 1569{ 1570 return (__m512i) __builtin_ia32_selectb_512(__M, 1571 (__v64qi) _mm512_set1_epi8(__A), 1572 (__v64qi) _mm512_setzero_si512()); 1573} 1574 1575static __inline__ __mmask64 __DEFAULT_FN_ATTRS 1576_mm512_kunpackd (__mmask64 __A, __mmask64 __B) 1577{ 1578 return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A, 1579 (__mmask64) __B); 1580} 1581 1582static __inline__ __mmask32 __DEFAULT_FN_ATTRS 1583_mm512_kunpackw (__mmask32 __A, __mmask32 __B) 1584{ 1585 return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A, 1586 (__mmask32) __B); 1587} 1588 1589static __inline__ __m512i __DEFAULT_FN_ATTRS 1590_mm512_mask_loadu_epi16 (__m512i __W, __mmask32 __U, void const *__P) 1591{ 1592 return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P, 1593 (__v32hi) __W, 1594 (__mmask32) __U); 1595} 1596 1597static __inline__ __m512i __DEFAULT_FN_ATTRS 1598_mm512_maskz_loadu_epi16 (__mmask32 __U, void const *__P) 1599{ 1600 return (__m512i) __builtin_ia32_loaddquhi512_mask ((__v32hi *) __P, 1601 (__v32hi) 1602 _mm512_setzero_si512 (), 1603 (__mmask32) __U); 1604} 1605 1606static __inline__ __m512i __DEFAULT_FN_ATTRS 1607_mm512_mask_loadu_epi8 (__m512i __W, __mmask64 __U, void const *__P) 1608{ 1609 return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P, 1610 (__v64qi) __W, 1611 (__mmask64) __U); 1612} 1613 1614static __inline__ __m512i __DEFAULT_FN_ATTRS 1615_mm512_maskz_loadu_epi8 (__mmask64 __U, void const *__P) 1616{ 1617 return (__m512i) __builtin_ia32_loaddquqi512_mask ((__v64qi *) __P, 1618 (__v64qi) 1619 _mm512_setzero_si512 (), 1620 (__mmask64) __U); 1621} 1622static __inline__ void __DEFAULT_FN_ATTRS 1623_mm512_mask_storeu_epi16 (void *__P, __mmask32 __U, __m512i __A) 1624{ 1625 __builtin_ia32_storedquhi512_mask ((__v32hi *) __P, 1626 (__v32hi) __A, 1627 (__mmask32) __U); 1628} 1629 1630static __inline__ void __DEFAULT_FN_ATTRS 1631_mm512_mask_storeu_epi8 (void *__P, __mmask64 __U, __m512i __A) 1632{ 1633 __builtin_ia32_storedquqi512_mask ((__v64qi *) __P, 1634 (__v64qi) __A, 1635 (__mmask64) __U); 1636} 1637 1638static __inline__ __mmask64 __DEFAULT_FN_ATTRS 1639_mm512_test_epi8_mask (__m512i __A, __m512i __B) 1640{ 1641 return _mm512_cmpneq_epi8_mask (_mm512_and_epi32 (__A, __B), 1642 _mm512_setzero_si512()); 1643} 1644 1645static __inline__ __mmask64 __DEFAULT_FN_ATTRS 1646_mm512_mask_test_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B) 1647{ 1648 return _mm512_mask_cmpneq_epi8_mask (__U, _mm512_and_epi32 (__A, __B), 1649 _mm512_setzero_si512()); 1650} 1651 1652static __inline__ __mmask32 __DEFAULT_FN_ATTRS 1653_mm512_test_epi16_mask (__m512i __A, __m512i __B) 1654{ 1655 return _mm512_cmpneq_epi16_mask (_mm512_and_epi32 (__A, __B), 1656 _mm512_setzero_si512()); 1657} 1658 1659static __inline__ __mmask32 __DEFAULT_FN_ATTRS 1660_mm512_mask_test_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B) 1661{ 1662 return _mm512_mask_cmpneq_epi16_mask (__U, _mm512_and_epi32 (__A, __B), 1663 _mm512_setzero_si512()); 1664} 1665 1666static __inline__ __mmask64 __DEFAULT_FN_ATTRS 1667_mm512_testn_epi8_mask (__m512i __A, __m512i __B) 1668{ 1669 return _mm512_cmpeq_epi8_mask (_mm512_and_epi32 (__A, __B), _mm512_setzero_si512()); 1670} 1671 1672static __inline__ __mmask64 __DEFAULT_FN_ATTRS 1673_mm512_mask_testn_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B) 1674{ 1675 return _mm512_mask_cmpeq_epi8_mask (__U, _mm512_and_epi32 (__A, __B), 1676 _mm512_setzero_si512()); 1677} 1678 1679static __inline__ __mmask32 __DEFAULT_FN_ATTRS 1680_mm512_testn_epi16_mask (__m512i __A, __m512i __B) 1681{ 1682 return _mm512_cmpeq_epi16_mask (_mm512_and_epi32 (__A, __B), 1683 _mm512_setzero_si512()); 1684} 1685 1686static __inline__ __mmask32 __DEFAULT_FN_ATTRS 1687_mm512_mask_testn_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B) 1688{ 1689 return _mm512_mask_cmpeq_epi16_mask (__U, _mm512_and_epi32 (__A, __B), 1690 _mm512_setzero_si512()); 1691} 1692 1693static __inline__ __mmask64 __DEFAULT_FN_ATTRS 1694_mm512_movepi8_mask (__m512i __A) 1695{ 1696 return (__mmask64) __builtin_ia32_cvtb2mask512 ((__v64qi) __A); 1697} 1698 1699static __inline__ __mmask32 __DEFAULT_FN_ATTRS 1700_mm512_movepi16_mask (__m512i __A) 1701{ 1702 return (__mmask32) __builtin_ia32_cvtw2mask512 ((__v32hi) __A); 1703} 1704 1705static __inline__ __m512i __DEFAULT_FN_ATTRS 1706_mm512_movm_epi8 (__mmask64 __A) 1707{ 1708 return (__m512i) __builtin_ia32_cvtmask2b512 (__A); 1709} 1710 1711static __inline__ __m512i __DEFAULT_FN_ATTRS 1712_mm512_movm_epi16 (__mmask32 __A) 1713{ 1714 return (__m512i) __builtin_ia32_cvtmask2w512 (__A); 1715} 1716 1717static __inline__ __m512i __DEFAULT_FN_ATTRS 1718_mm512_broadcastb_epi8 (__m128i __A) 1719{ 1720 return (__m512i)__builtin_shufflevector((__v16qi) __A, (__v16qi) __A, 1721 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1722 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1723 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1724 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); 1725} 1726 1727static __inline__ __m512i __DEFAULT_FN_ATTRS 1728_mm512_mask_broadcastb_epi8 (__m512i __O, __mmask64 __M, __m128i __A) 1729{ 1730 return (__m512i)__builtin_ia32_selectb_512(__M, 1731 (__v64qi) _mm512_broadcastb_epi8(__A), 1732 (__v64qi) __O); 1733} 1734 1735static __inline__ __m512i __DEFAULT_FN_ATTRS 1736_mm512_maskz_broadcastb_epi8 (__mmask64 __M, __m128i __A) 1737{ 1738 return (__m512i)__builtin_ia32_selectb_512(__M, 1739 (__v64qi) _mm512_broadcastb_epi8(__A), 1740 (__v64qi) _mm512_setzero_si512()); 1741} 1742 1743static __inline__ __m512i __DEFAULT_FN_ATTRS 1744_mm512_mask_set1_epi16 (__m512i __O, __mmask32 __M, short __A) 1745{ 1746 return (__m512i) __builtin_ia32_selectw_512(__M, 1747 (__v32hi) _mm512_set1_epi16(__A), 1748 (__v32hi) __O); 1749} 1750 1751static __inline__ __m512i __DEFAULT_FN_ATTRS 1752_mm512_maskz_set1_epi16 (__mmask32 __M, short __A) 1753{ 1754 return (__m512i) __builtin_ia32_selectw_512(__M, 1755 (__v32hi) _mm512_set1_epi16(__A), 1756 (__v32hi) _mm512_setzero_si512()); 1757} 1758 1759static __inline__ __m512i __DEFAULT_FN_ATTRS 1760_mm512_broadcastw_epi16 (__m128i __A) 1761{ 1762 return (__m512i)__builtin_shufflevector((__v8hi) __A, (__v8hi) __A, 1763 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1764 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); 1765} 1766 1767static __inline__ __m512i __DEFAULT_FN_ATTRS 1768_mm512_mask_broadcastw_epi16 (__m512i __O, __mmask32 __M, __m128i __A) 1769{ 1770 return (__m512i)__builtin_ia32_selectw_512(__M, 1771 (__v32hi) _mm512_broadcastw_epi16(__A), 1772 (__v32hi) __O); 1773} 1774 1775static __inline__ __m512i __DEFAULT_FN_ATTRS 1776_mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A) 1777{ 1778 return (__m512i)__builtin_ia32_selectw_512(__M, 1779 (__v32hi) _mm512_broadcastw_epi16(__A), 1780 (__v32hi) _mm512_setzero_si512()); 1781} 1782 1783static __inline__ __m512i __DEFAULT_FN_ATTRS 1784_mm512_permutexvar_epi16 (__m512i __A, __m512i __B) 1785{ 1786 return (__m512i)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A); 1787} 1788 1789static __inline__ __m512i __DEFAULT_FN_ATTRS 1790_mm512_maskz_permutexvar_epi16 (__mmask32 __M, __m512i __A, 1791 __m512i __B) 1792{ 1793 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 1794 (__v32hi)_mm512_permutexvar_epi16(__A, __B), 1795 (__v32hi)_mm512_setzero_si512()); 1796} 1797 1798static __inline__ __m512i __DEFAULT_FN_ATTRS 1799_mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A, 1800 __m512i __B) 1801{ 1802 return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M, 1803 (__v32hi)_mm512_permutexvar_epi16(__A, __B), 1804 (__v32hi)__W); 1805} 1806 1807#define _mm512_alignr_epi8(A, B, N) \ 1808 (__m512i)__builtin_ia32_palignr512((__v64qi)(__m512i)(A), \ 1809 (__v64qi)(__m512i)(B), (int)(N)) 1810 1811#define _mm512_mask_alignr_epi8(W, U, A, B, N) \ 1812 (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ 1813 (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \ 1814 (__v64qi)(__m512i)(W)) 1815 1816#define _mm512_maskz_alignr_epi8(U, A, B, N) \ 1817 (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ 1818 (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \ 1819 (__v64qi)(__m512i)_mm512_setzero_si512()) 1820 1821#define _mm512_dbsad_epu8(A, B, imm) \ 1822 (__m512i)__builtin_ia32_dbpsadbw512((__v64qi)(__m512i)(A), \ 1823 (__v64qi)(__m512i)(B), (int)(imm)) 1824 1825#define _mm512_mask_dbsad_epu8(W, U, A, B, imm) \ 1826 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 1827 (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \ 1828 (__v32hi)(__m512i)(W)) 1829 1830#define _mm512_maskz_dbsad_epu8(U, A, B, imm) \ 1831 (__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \ 1832 (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \ 1833 (__v32hi)_mm512_setzero_si512()) 1834 1835static __inline__ __m512i __DEFAULT_FN_ATTRS 1836_mm512_sad_epu8 (__m512i __A, __m512i __B) 1837{ 1838 return (__m512i) __builtin_ia32_psadbw512 ((__v64qi) __A, 1839 (__v64qi) __B); 1840} 1841 1842 1843 1844#undef __DEFAULT_FN_ATTRS 1845 1846#endif 1847