1/*===---- avx512vlbwintrin.h - AVX512VL and AVX512BW intrinsics ------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 10#ifndef __IMMINTRIN_H 11#error "Never use <avx512vlbwintrin.h> directly; include <immintrin.h> instead." 12#endif 13 14#ifndef __AVX512VLBWINTRIN_H 15#define __AVX512VLBWINTRIN_H 16 17/* Define the default attributes for the functions in this file. */ 18#define __DEFAULT_FN_ATTRS128 \ 19 __attribute__((__always_inline__, __nodebug__, \ 20 __target__("avx512vl,avx512bw,no-evex512"), \ 21 __min_vector_width__(128))) 22#define __DEFAULT_FN_ATTRS256 \ 23 __attribute__((__always_inline__, __nodebug__, \ 24 __target__("avx512vl,avx512bw,no-evex512"), \ 25 __min_vector_width__(256))) 26 27/* Integer compare */ 28 29#define _mm_cmp_epi8_mask(a, b, p) \ 30 ((__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \ 31 (__v16qi)(__m128i)(b), (int)(p), \ 32 (__mmask16)-1)) 33 34#define _mm_mask_cmp_epi8_mask(m, a, b, p) \ 35 ((__mmask16)__builtin_ia32_cmpb128_mask((__v16qi)(__m128i)(a), \ 36 (__v16qi)(__m128i)(b), (int)(p), \ 37 (__mmask16)(m))) 38 39#define _mm_cmp_epu8_mask(a, b, p) \ 40 ((__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \ 41 (__v16qi)(__m128i)(b), (int)(p), \ 42 (__mmask16)-1)) 43 44#define _mm_mask_cmp_epu8_mask(m, a, b, p) \ 45 ((__mmask16)__builtin_ia32_ucmpb128_mask((__v16qi)(__m128i)(a), \ 46 (__v16qi)(__m128i)(b), (int)(p), \ 47 (__mmask16)(m))) 48 49#define _mm256_cmp_epi8_mask(a, b, p) \ 50 ((__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \ 51 (__v32qi)(__m256i)(b), (int)(p), \ 52 (__mmask32)-1)) 53 54#define _mm256_mask_cmp_epi8_mask(m, a, b, p) \ 55 ((__mmask32)__builtin_ia32_cmpb256_mask((__v32qi)(__m256i)(a), \ 56 (__v32qi)(__m256i)(b), (int)(p), \ 57 (__mmask32)(m))) 58 59#define _mm256_cmp_epu8_mask(a, b, p) \ 60 ((__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \ 61 (__v32qi)(__m256i)(b), (int)(p), \ 62 (__mmask32)-1)) 63 64#define _mm256_mask_cmp_epu8_mask(m, a, b, p) \ 65 ((__mmask32)__builtin_ia32_ucmpb256_mask((__v32qi)(__m256i)(a), \ 66 (__v32qi)(__m256i)(b), (int)(p), \ 67 (__mmask32)(m))) 68 69#define _mm_cmp_epi16_mask(a, b, p) \ 70 ((__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \ 71 (__v8hi)(__m128i)(b), (int)(p), \ 72 (__mmask8)-1)) 73 74#define _mm_mask_cmp_epi16_mask(m, a, b, p) \ 75 ((__mmask8)__builtin_ia32_cmpw128_mask((__v8hi)(__m128i)(a), \ 76 (__v8hi)(__m128i)(b), (int)(p), \ 77 (__mmask8)(m))) 78 79#define _mm_cmp_epu16_mask(a, b, p) \ 80 ((__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \ 81 (__v8hi)(__m128i)(b), (int)(p), \ 82 (__mmask8)-1)) 83 84#define _mm_mask_cmp_epu16_mask(m, a, b, p) \ 85 ((__mmask8)__builtin_ia32_ucmpw128_mask((__v8hi)(__m128i)(a), \ 86 (__v8hi)(__m128i)(b), (int)(p), \ 87 (__mmask8)(m))) 88 89#define _mm256_cmp_epi16_mask(a, b, p) \ 90 ((__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \ 91 (__v16hi)(__m256i)(b), (int)(p), \ 92 (__mmask16)-1)) 93 94#define _mm256_mask_cmp_epi16_mask(m, a, b, p) \ 95 ((__mmask16)__builtin_ia32_cmpw256_mask((__v16hi)(__m256i)(a), \ 96 (__v16hi)(__m256i)(b), (int)(p), \ 97 (__mmask16)(m))) 98 99#define _mm256_cmp_epu16_mask(a, b, p) \ 100 ((__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \ 101 (__v16hi)(__m256i)(b), (int)(p), \ 102 (__mmask16)-1)) 103 104#define _mm256_mask_cmp_epu16_mask(m, a, b, p) \ 105 ((__mmask16)__builtin_ia32_ucmpw256_mask((__v16hi)(__m256i)(a), \ 106 (__v16hi)(__m256i)(b), (int)(p), \ 107 (__mmask16)(m))) 108 109#define _mm_cmpeq_epi8_mask(A, B) \ 110 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ) 111#define _mm_mask_cmpeq_epi8_mask(k, A, B) \ 112 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ) 113#define _mm_cmpge_epi8_mask(A, B) \ 114 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_GE) 115#define _mm_mask_cmpge_epi8_mask(k, A, B) \ 116 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE) 117#define _mm_cmpgt_epi8_mask(A, B) \ 118 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_GT) 119#define _mm_mask_cmpgt_epi8_mask(k, A, B) \ 120 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT) 121#define _mm_cmple_epi8_mask(A, B) \ 122 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_LE) 123#define _mm_mask_cmple_epi8_mask(k, A, B) \ 124 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE) 125#define _mm_cmplt_epi8_mask(A, B) \ 126 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_LT) 127#define _mm_mask_cmplt_epi8_mask(k, A, B) \ 128 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT) 129#define _mm_cmpneq_epi8_mask(A, B) \ 130 _mm_cmp_epi8_mask((A), (B), _MM_CMPINT_NE) 131#define _mm_mask_cmpneq_epi8_mask(k, A, B) \ 132 _mm_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE) 133 134#define _mm256_cmpeq_epi8_mask(A, B) \ 135 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ) 136#define _mm256_mask_cmpeq_epi8_mask(k, A, B) \ 137 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ) 138#define _mm256_cmpge_epi8_mask(A, B) \ 139 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_GE) 140#define _mm256_mask_cmpge_epi8_mask(k, A, B) \ 141 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE) 142#define _mm256_cmpgt_epi8_mask(A, B) \ 143 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_GT) 144#define _mm256_mask_cmpgt_epi8_mask(k, A, B) \ 145 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT) 146#define _mm256_cmple_epi8_mask(A, B) \ 147 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_LE) 148#define _mm256_mask_cmple_epi8_mask(k, A, B) \ 149 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE) 150#define _mm256_cmplt_epi8_mask(A, B) \ 151 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_LT) 152#define _mm256_mask_cmplt_epi8_mask(k, A, B) \ 153 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT) 154#define _mm256_cmpneq_epi8_mask(A, B) \ 155 _mm256_cmp_epi8_mask((A), (B), _MM_CMPINT_NE) 156#define _mm256_mask_cmpneq_epi8_mask(k, A, B) \ 157 _mm256_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE) 158 159#define _mm_cmpeq_epu8_mask(A, B) \ 160 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ) 161#define _mm_mask_cmpeq_epu8_mask(k, A, B) \ 162 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ) 163#define _mm_cmpge_epu8_mask(A, B) \ 164 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_GE) 165#define _mm_mask_cmpge_epu8_mask(k, A, B) \ 166 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE) 167#define _mm_cmpgt_epu8_mask(A, B) \ 168 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_GT) 169#define _mm_mask_cmpgt_epu8_mask(k, A, B) \ 170 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT) 171#define _mm_cmple_epu8_mask(A, B) \ 172 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_LE) 173#define _mm_mask_cmple_epu8_mask(k, A, B) \ 174 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE) 175#define _mm_cmplt_epu8_mask(A, B) \ 176 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_LT) 177#define _mm_mask_cmplt_epu8_mask(k, A, B) \ 178 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT) 179#define _mm_cmpneq_epu8_mask(A, B) \ 180 _mm_cmp_epu8_mask((A), (B), _MM_CMPINT_NE) 181#define _mm_mask_cmpneq_epu8_mask(k, A, B) \ 182 _mm_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE) 183 184#define _mm256_cmpeq_epu8_mask(A, B) \ 185 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ) 186#define _mm256_mask_cmpeq_epu8_mask(k, A, B) \ 187 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ) 188#define _mm256_cmpge_epu8_mask(A, B) \ 189 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_GE) 190#define _mm256_mask_cmpge_epu8_mask(k, A, B) \ 191 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE) 192#define _mm256_cmpgt_epu8_mask(A, B) \ 193 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_GT) 194#define _mm256_mask_cmpgt_epu8_mask(k, A, B) \ 195 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT) 196#define _mm256_cmple_epu8_mask(A, B) \ 197 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_LE) 198#define _mm256_mask_cmple_epu8_mask(k, A, B) \ 199 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE) 200#define _mm256_cmplt_epu8_mask(A, B) \ 201 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_LT) 202#define _mm256_mask_cmplt_epu8_mask(k, A, B) \ 203 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT) 204#define _mm256_cmpneq_epu8_mask(A, B) \ 205 _mm256_cmp_epu8_mask((A), (B), _MM_CMPINT_NE) 206#define _mm256_mask_cmpneq_epu8_mask(k, A, B) \ 207 _mm256_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE) 208 209#define _mm_cmpeq_epi16_mask(A, B) \ 210 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ) 211#define _mm_mask_cmpeq_epi16_mask(k, A, B) \ 212 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ) 213#define _mm_cmpge_epi16_mask(A, B) \ 214 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_GE) 215#define _mm_mask_cmpge_epi16_mask(k, A, B) \ 216 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE) 217#define _mm_cmpgt_epi16_mask(A, B) \ 218 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_GT) 219#define _mm_mask_cmpgt_epi16_mask(k, A, B) \ 220 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT) 221#define _mm_cmple_epi16_mask(A, B) \ 222 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_LE) 223#define _mm_mask_cmple_epi16_mask(k, A, B) \ 224 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE) 225#define _mm_cmplt_epi16_mask(A, B) \ 226 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_LT) 227#define _mm_mask_cmplt_epi16_mask(k, A, B) \ 228 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT) 229#define _mm_cmpneq_epi16_mask(A, B) \ 230 _mm_cmp_epi16_mask((A), (B), _MM_CMPINT_NE) 231#define _mm_mask_cmpneq_epi16_mask(k, A, B) \ 232 _mm_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE) 233 234#define _mm256_cmpeq_epi16_mask(A, B) \ 235 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ) 236#define _mm256_mask_cmpeq_epi16_mask(k, A, B) \ 237 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ) 238#define _mm256_cmpge_epi16_mask(A, B) \ 239 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_GE) 240#define _mm256_mask_cmpge_epi16_mask(k, A, B) \ 241 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE) 242#define _mm256_cmpgt_epi16_mask(A, B) \ 243 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_GT) 244#define _mm256_mask_cmpgt_epi16_mask(k, A, B) \ 245 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT) 246#define _mm256_cmple_epi16_mask(A, B) \ 247 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_LE) 248#define _mm256_mask_cmple_epi16_mask(k, A, B) \ 249 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE) 250#define _mm256_cmplt_epi16_mask(A, B) \ 251 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_LT) 252#define _mm256_mask_cmplt_epi16_mask(k, A, B) \ 253 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT) 254#define _mm256_cmpneq_epi16_mask(A, B) \ 255 _mm256_cmp_epi16_mask((A), (B), _MM_CMPINT_NE) 256#define _mm256_mask_cmpneq_epi16_mask(k, A, B) \ 257 _mm256_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE) 258 259#define _mm_cmpeq_epu16_mask(A, B) \ 260 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ) 261#define _mm_mask_cmpeq_epu16_mask(k, A, B) \ 262 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ) 263#define _mm_cmpge_epu16_mask(A, B) \ 264 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_GE) 265#define _mm_mask_cmpge_epu16_mask(k, A, B) \ 266 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE) 267#define _mm_cmpgt_epu16_mask(A, B) \ 268 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_GT) 269#define _mm_mask_cmpgt_epu16_mask(k, A, B) \ 270 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT) 271#define _mm_cmple_epu16_mask(A, B) \ 272 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_LE) 273#define _mm_mask_cmple_epu16_mask(k, A, B) \ 274 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE) 275#define _mm_cmplt_epu16_mask(A, B) \ 276 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_LT) 277#define _mm_mask_cmplt_epu16_mask(k, A, B) \ 278 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT) 279#define _mm_cmpneq_epu16_mask(A, B) \ 280 _mm_cmp_epu16_mask((A), (B), _MM_CMPINT_NE) 281#define _mm_mask_cmpneq_epu16_mask(k, A, B) \ 282 _mm_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE) 283 284#define _mm256_cmpeq_epu16_mask(A, B) \ 285 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ) 286#define _mm256_mask_cmpeq_epu16_mask(k, A, B) \ 287 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ) 288#define _mm256_cmpge_epu16_mask(A, B) \ 289 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_GE) 290#define _mm256_mask_cmpge_epu16_mask(k, A, B) \ 291 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE) 292#define _mm256_cmpgt_epu16_mask(A, B) \ 293 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_GT) 294#define _mm256_mask_cmpgt_epu16_mask(k, A, B) \ 295 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT) 296#define _mm256_cmple_epu16_mask(A, B) \ 297 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_LE) 298#define _mm256_mask_cmple_epu16_mask(k, A, B) \ 299 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE) 300#define _mm256_cmplt_epu16_mask(A, B) \ 301 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_LT) 302#define _mm256_mask_cmplt_epu16_mask(k, A, B) \ 303 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT) 304#define _mm256_cmpneq_epu16_mask(A, B) \ 305 _mm256_cmp_epu16_mask((A), (B), _MM_CMPINT_NE) 306#define _mm256_mask_cmpneq_epu16_mask(k, A, B) \ 307 _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE) 308 309static __inline__ __m256i __DEFAULT_FN_ATTRS256 310_mm256_mask_add_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B){ 311 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 312 (__v32qi)_mm256_add_epi8(__A, __B), 313 (__v32qi)__W); 314} 315 316static __inline__ __m256i __DEFAULT_FN_ATTRS256 317_mm256_maskz_add_epi8(__mmask32 __U, __m256i __A, __m256i __B) { 318 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 319 (__v32qi)_mm256_add_epi8(__A, __B), 320 (__v32qi)_mm256_setzero_si256()); 321} 322 323static __inline__ __m256i __DEFAULT_FN_ATTRS256 324_mm256_mask_add_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { 325 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 326 (__v16hi)_mm256_add_epi16(__A, __B), 327 (__v16hi)__W); 328} 329 330static __inline__ __m256i __DEFAULT_FN_ATTRS256 331_mm256_maskz_add_epi16(__mmask16 __U, __m256i __A, __m256i __B) { 332 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 333 (__v16hi)_mm256_add_epi16(__A, __B), 334 (__v16hi)_mm256_setzero_si256()); 335} 336 337static __inline__ __m256i __DEFAULT_FN_ATTRS256 338_mm256_mask_sub_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { 339 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 340 (__v32qi)_mm256_sub_epi8(__A, __B), 341 (__v32qi)__W); 342} 343 344static __inline__ __m256i __DEFAULT_FN_ATTRS256 345_mm256_maskz_sub_epi8(__mmask32 __U, __m256i __A, __m256i __B) { 346 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 347 (__v32qi)_mm256_sub_epi8(__A, __B), 348 (__v32qi)_mm256_setzero_si256()); 349} 350 351static __inline__ __m256i __DEFAULT_FN_ATTRS256 352_mm256_mask_sub_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { 353 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 354 (__v16hi)_mm256_sub_epi16(__A, __B), 355 (__v16hi)__W); 356} 357 358static __inline__ __m256i __DEFAULT_FN_ATTRS256 359_mm256_maskz_sub_epi16(__mmask16 __U, __m256i __A, __m256i __B) { 360 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 361 (__v16hi)_mm256_sub_epi16(__A, __B), 362 (__v16hi)_mm256_setzero_si256()); 363} 364 365static __inline__ __m128i __DEFAULT_FN_ATTRS128 366_mm_mask_add_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { 367 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 368 (__v16qi)_mm_add_epi8(__A, __B), 369 (__v16qi)__W); 370} 371 372static __inline__ __m128i __DEFAULT_FN_ATTRS128 373_mm_maskz_add_epi8(__mmask16 __U, __m128i __A, __m128i __B) { 374 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 375 (__v16qi)_mm_add_epi8(__A, __B), 376 (__v16qi)_mm_setzero_si128()); 377} 378 379static __inline__ __m128i __DEFAULT_FN_ATTRS128 380_mm_mask_add_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { 381 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 382 (__v8hi)_mm_add_epi16(__A, __B), 383 (__v8hi)__W); 384} 385 386static __inline__ __m128i __DEFAULT_FN_ATTRS128 387_mm_maskz_add_epi16(__mmask8 __U, __m128i __A, __m128i __B) { 388 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 389 (__v8hi)_mm_add_epi16(__A, __B), 390 (__v8hi)_mm_setzero_si128()); 391} 392 393static __inline__ __m128i __DEFAULT_FN_ATTRS128 394_mm_mask_sub_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { 395 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 396 (__v16qi)_mm_sub_epi8(__A, __B), 397 (__v16qi)__W); 398} 399 400static __inline__ __m128i __DEFAULT_FN_ATTRS128 401_mm_maskz_sub_epi8(__mmask16 __U, __m128i __A, __m128i __B) { 402 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 403 (__v16qi)_mm_sub_epi8(__A, __B), 404 (__v16qi)_mm_setzero_si128()); 405} 406 407static __inline__ __m128i __DEFAULT_FN_ATTRS128 408_mm_mask_sub_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { 409 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 410 (__v8hi)_mm_sub_epi16(__A, __B), 411 (__v8hi)__W); 412} 413 414static __inline__ __m128i __DEFAULT_FN_ATTRS128 415_mm_maskz_sub_epi16(__mmask8 __U, __m128i __A, __m128i __B) { 416 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 417 (__v8hi)_mm_sub_epi16(__A, __B), 418 (__v8hi)_mm_setzero_si128()); 419} 420 421static __inline__ __m256i __DEFAULT_FN_ATTRS256 422_mm256_mask_mullo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { 423 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 424 (__v16hi)_mm256_mullo_epi16(__A, __B), 425 (__v16hi)__W); 426} 427 428static __inline__ __m256i __DEFAULT_FN_ATTRS256 429_mm256_maskz_mullo_epi16(__mmask16 __U, __m256i __A, __m256i __B) { 430 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 431 (__v16hi)_mm256_mullo_epi16(__A, __B), 432 (__v16hi)_mm256_setzero_si256()); 433} 434 435static __inline__ __m128i __DEFAULT_FN_ATTRS128 436_mm_mask_mullo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { 437 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 438 (__v8hi)_mm_mullo_epi16(__A, __B), 439 (__v8hi)__W); 440} 441 442static __inline__ __m128i __DEFAULT_FN_ATTRS128 443_mm_maskz_mullo_epi16(__mmask8 __U, __m128i __A, __m128i __B) { 444 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 445 (__v8hi)_mm_mullo_epi16(__A, __B), 446 (__v8hi)_mm_setzero_si128()); 447} 448 449static __inline__ __m128i __DEFAULT_FN_ATTRS128 450_mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W) 451{ 452 return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U, 453 (__v16qi) __W, 454 (__v16qi) __A); 455} 456 457static __inline__ __m256i __DEFAULT_FN_ATTRS256 458_mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W) 459{ 460 return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U, 461 (__v32qi) __W, 462 (__v32qi) __A); 463} 464 465static __inline__ __m128i __DEFAULT_FN_ATTRS128 466_mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W) 467{ 468 return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U, 469 (__v8hi) __W, 470 (__v8hi) __A); 471} 472 473static __inline__ __m256i __DEFAULT_FN_ATTRS256 474_mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W) 475{ 476 return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U, 477 (__v16hi) __W, 478 (__v16hi) __A); 479} 480 481static __inline__ __m128i __DEFAULT_FN_ATTRS128 482_mm_mask_abs_epi8(__m128i __W, __mmask16 __U, __m128i __A) 483{ 484 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 485 (__v16qi)_mm_abs_epi8(__A), 486 (__v16qi)__W); 487} 488 489static __inline__ __m128i __DEFAULT_FN_ATTRS128 490_mm_maskz_abs_epi8(__mmask16 __U, __m128i __A) 491{ 492 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 493 (__v16qi)_mm_abs_epi8(__A), 494 (__v16qi)_mm_setzero_si128()); 495} 496 497static __inline__ __m256i __DEFAULT_FN_ATTRS256 498_mm256_mask_abs_epi8(__m256i __W, __mmask32 __U, __m256i __A) 499{ 500 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 501 (__v32qi)_mm256_abs_epi8(__A), 502 (__v32qi)__W); 503} 504 505static __inline__ __m256i __DEFAULT_FN_ATTRS256 506_mm256_maskz_abs_epi8 (__mmask32 __U, __m256i __A) 507{ 508 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 509 (__v32qi)_mm256_abs_epi8(__A), 510 (__v32qi)_mm256_setzero_si256()); 511} 512 513static __inline__ __m128i __DEFAULT_FN_ATTRS128 514_mm_mask_abs_epi16(__m128i __W, __mmask8 __U, __m128i __A) 515{ 516 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 517 (__v8hi)_mm_abs_epi16(__A), 518 (__v8hi)__W); 519} 520 521static __inline__ __m128i __DEFAULT_FN_ATTRS128 522_mm_maskz_abs_epi16(__mmask8 __U, __m128i __A) 523{ 524 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 525 (__v8hi)_mm_abs_epi16(__A), 526 (__v8hi)_mm_setzero_si128()); 527} 528 529static __inline__ __m256i __DEFAULT_FN_ATTRS256 530_mm256_mask_abs_epi16(__m256i __W, __mmask16 __U, __m256i __A) 531{ 532 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 533 (__v16hi)_mm256_abs_epi16(__A), 534 (__v16hi)__W); 535} 536 537static __inline__ __m256i __DEFAULT_FN_ATTRS256 538_mm256_maskz_abs_epi16(__mmask16 __U, __m256i __A) 539{ 540 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 541 (__v16hi)_mm256_abs_epi16(__A), 542 (__v16hi)_mm256_setzero_si256()); 543} 544 545static __inline__ __m128i __DEFAULT_FN_ATTRS128 546_mm_maskz_packs_epi32(__mmask8 __M, __m128i __A, __m128i __B) { 547 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, 548 (__v8hi)_mm_packs_epi32(__A, __B), 549 (__v8hi)_mm_setzero_si128()); 550} 551 552static __inline__ __m128i __DEFAULT_FN_ATTRS128 553_mm_mask_packs_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) 554{ 555 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, 556 (__v8hi)_mm_packs_epi32(__A, __B), 557 (__v8hi)__W); 558} 559 560static __inline__ __m256i __DEFAULT_FN_ATTRS256 561_mm256_maskz_packs_epi32(__mmask16 __M, __m256i __A, __m256i __B) 562{ 563 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, 564 (__v16hi)_mm256_packs_epi32(__A, __B), 565 (__v16hi)_mm256_setzero_si256()); 566} 567 568static __inline__ __m256i __DEFAULT_FN_ATTRS256 569_mm256_mask_packs_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) 570{ 571 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, 572 (__v16hi)_mm256_packs_epi32(__A, __B), 573 (__v16hi)__W); 574} 575 576static __inline__ __m128i __DEFAULT_FN_ATTRS128 577_mm_maskz_packs_epi16(__mmask16 __M, __m128i __A, __m128i __B) 578{ 579 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 580 (__v16qi)_mm_packs_epi16(__A, __B), 581 (__v16qi)_mm_setzero_si128()); 582} 583 584static __inline__ __m128i __DEFAULT_FN_ATTRS128 585_mm_mask_packs_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) 586{ 587 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 588 (__v16qi)_mm_packs_epi16(__A, __B), 589 (__v16qi)__W); 590} 591 592static __inline__ __m256i __DEFAULT_FN_ATTRS256 593_mm256_maskz_packs_epi16(__mmask32 __M, __m256i __A, __m256i __B) 594{ 595 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, 596 (__v32qi)_mm256_packs_epi16(__A, __B), 597 (__v32qi)_mm256_setzero_si256()); 598} 599 600static __inline__ __m256i __DEFAULT_FN_ATTRS256 601_mm256_mask_packs_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) 602{ 603 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, 604 (__v32qi)_mm256_packs_epi16(__A, __B), 605 (__v32qi)__W); 606} 607 608static __inline__ __m128i __DEFAULT_FN_ATTRS128 609_mm_maskz_packus_epi32(__mmask8 __M, __m128i __A, __m128i __B) 610{ 611 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, 612 (__v8hi)_mm_packus_epi32(__A, __B), 613 (__v8hi)_mm_setzero_si128()); 614} 615 616static __inline__ __m128i __DEFAULT_FN_ATTRS128 617_mm_mask_packus_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) 618{ 619 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, 620 (__v8hi)_mm_packus_epi32(__A, __B), 621 (__v8hi)__W); 622} 623 624static __inline__ __m256i __DEFAULT_FN_ATTRS256 625_mm256_maskz_packus_epi32(__mmask16 __M, __m256i __A, __m256i __B) 626{ 627 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, 628 (__v16hi)_mm256_packus_epi32(__A, __B), 629 (__v16hi)_mm256_setzero_si256()); 630} 631 632static __inline__ __m256i __DEFAULT_FN_ATTRS256 633_mm256_mask_packus_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) 634{ 635 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, 636 (__v16hi)_mm256_packus_epi32(__A, __B), 637 (__v16hi)__W); 638} 639 640static __inline__ __m128i __DEFAULT_FN_ATTRS128 641_mm_maskz_packus_epi16(__mmask16 __M, __m128i __A, __m128i __B) 642{ 643 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 644 (__v16qi)_mm_packus_epi16(__A, __B), 645 (__v16qi)_mm_setzero_si128()); 646} 647 648static __inline__ __m128i __DEFAULT_FN_ATTRS128 649_mm_mask_packus_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) 650{ 651 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 652 (__v16qi)_mm_packus_epi16(__A, __B), 653 (__v16qi)__W); 654} 655 656static __inline__ __m256i __DEFAULT_FN_ATTRS256 657_mm256_maskz_packus_epi16(__mmask32 __M, __m256i __A, __m256i __B) 658{ 659 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, 660 (__v32qi)_mm256_packus_epi16(__A, __B), 661 (__v32qi)_mm256_setzero_si256()); 662} 663 664static __inline__ __m256i __DEFAULT_FN_ATTRS256 665_mm256_mask_packus_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) 666{ 667 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, 668 (__v32qi)_mm256_packus_epi16(__A, __B), 669 (__v32qi)__W); 670} 671 672static __inline__ __m128i __DEFAULT_FN_ATTRS128 673_mm_mask_adds_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) 674{ 675 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 676 (__v16qi)_mm_adds_epi8(__A, __B), 677 (__v16qi)__W); 678} 679 680static __inline__ __m128i __DEFAULT_FN_ATTRS128 681_mm_maskz_adds_epi8(__mmask16 __U, __m128i __A, __m128i __B) 682{ 683 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 684 (__v16qi)_mm_adds_epi8(__A, __B), 685 (__v16qi)_mm_setzero_si128()); 686} 687 688static __inline__ __m256i __DEFAULT_FN_ATTRS256 689_mm256_mask_adds_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) 690{ 691 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 692 (__v32qi)_mm256_adds_epi8(__A, __B), 693 (__v32qi)__W); 694} 695 696static __inline__ __m256i __DEFAULT_FN_ATTRS256 697_mm256_maskz_adds_epi8(__mmask32 __U, __m256i __A, __m256i __B) 698{ 699 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 700 (__v32qi)_mm256_adds_epi8(__A, __B), 701 (__v32qi)_mm256_setzero_si256()); 702} 703 704static __inline__ __m128i __DEFAULT_FN_ATTRS128 705_mm_mask_adds_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 706{ 707 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 708 (__v8hi)_mm_adds_epi16(__A, __B), 709 (__v8hi)__W); 710} 711 712static __inline__ __m128i __DEFAULT_FN_ATTRS128 713_mm_maskz_adds_epi16(__mmask8 __U, __m128i __A, __m128i __B) 714{ 715 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 716 (__v8hi)_mm_adds_epi16(__A, __B), 717 (__v8hi)_mm_setzero_si128()); 718} 719 720static __inline__ __m256i __DEFAULT_FN_ATTRS256 721_mm256_mask_adds_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) 722{ 723 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 724 (__v16hi)_mm256_adds_epi16(__A, __B), 725 (__v16hi)__W); 726} 727 728static __inline__ __m256i __DEFAULT_FN_ATTRS256 729_mm256_maskz_adds_epi16(__mmask16 __U, __m256i __A, __m256i __B) 730{ 731 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 732 (__v16hi)_mm256_adds_epi16(__A, __B), 733 (__v16hi)_mm256_setzero_si256()); 734} 735 736static __inline__ __m128i __DEFAULT_FN_ATTRS128 737_mm_mask_adds_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) 738{ 739 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 740 (__v16qi)_mm_adds_epu8(__A, __B), 741 (__v16qi)__W); 742} 743 744static __inline__ __m128i __DEFAULT_FN_ATTRS128 745_mm_maskz_adds_epu8(__mmask16 __U, __m128i __A, __m128i __B) 746{ 747 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 748 (__v16qi)_mm_adds_epu8(__A, __B), 749 (__v16qi)_mm_setzero_si128()); 750} 751 752static __inline__ __m256i __DEFAULT_FN_ATTRS256 753_mm256_mask_adds_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) 754{ 755 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 756 (__v32qi)_mm256_adds_epu8(__A, __B), 757 (__v32qi)__W); 758} 759 760static __inline__ __m256i __DEFAULT_FN_ATTRS256 761_mm256_maskz_adds_epu8(__mmask32 __U, __m256i __A, __m256i __B) 762{ 763 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 764 (__v32qi)_mm256_adds_epu8(__A, __B), 765 (__v32qi)_mm256_setzero_si256()); 766} 767 768static __inline__ __m128i __DEFAULT_FN_ATTRS128 769_mm_mask_adds_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 770{ 771 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 772 (__v8hi)_mm_adds_epu16(__A, __B), 773 (__v8hi)__W); 774} 775 776static __inline__ __m128i __DEFAULT_FN_ATTRS128 777_mm_maskz_adds_epu16(__mmask8 __U, __m128i __A, __m128i __B) 778{ 779 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 780 (__v8hi)_mm_adds_epu16(__A, __B), 781 (__v8hi)_mm_setzero_si128()); 782} 783 784static __inline__ __m256i __DEFAULT_FN_ATTRS256 785_mm256_mask_adds_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) 786{ 787 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 788 (__v16hi)_mm256_adds_epu16(__A, __B), 789 (__v16hi)__W); 790} 791 792static __inline__ __m256i __DEFAULT_FN_ATTRS256 793_mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i __B) 794{ 795 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 796 (__v16hi)_mm256_adds_epu16(__A, __B), 797 (__v16hi)_mm256_setzero_si256()); 798} 799 800static __inline__ __m128i __DEFAULT_FN_ATTRS128 801_mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) 802{ 803 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 804 (__v16qi)_mm_avg_epu8(__A, __B), 805 (__v16qi)__W); 806} 807 808static __inline__ __m128i __DEFAULT_FN_ATTRS128 809_mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B) 810{ 811 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 812 (__v16qi)_mm_avg_epu8(__A, __B), 813 (__v16qi)_mm_setzero_si128()); 814} 815 816static __inline__ __m256i __DEFAULT_FN_ATTRS256 817_mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) 818{ 819 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 820 (__v32qi)_mm256_avg_epu8(__A, __B), 821 (__v32qi)__W); 822} 823 824static __inline__ __m256i __DEFAULT_FN_ATTRS256 825_mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B) 826{ 827 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 828 (__v32qi)_mm256_avg_epu8(__A, __B), 829 (__v32qi)_mm256_setzero_si256()); 830} 831 832static __inline__ __m128i __DEFAULT_FN_ATTRS128 833_mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 834{ 835 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 836 (__v8hi)_mm_avg_epu16(__A, __B), 837 (__v8hi)__W); 838} 839 840static __inline__ __m128i __DEFAULT_FN_ATTRS128 841_mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B) 842{ 843 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 844 (__v8hi)_mm_avg_epu16(__A, __B), 845 (__v8hi)_mm_setzero_si128()); 846} 847 848static __inline__ __m256i __DEFAULT_FN_ATTRS256 849_mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) 850{ 851 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 852 (__v16hi)_mm256_avg_epu16(__A, __B), 853 (__v16hi)__W); 854} 855 856static __inline__ __m256i __DEFAULT_FN_ATTRS256 857_mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B) 858{ 859 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 860 (__v16hi)_mm256_avg_epu16(__A, __B), 861 (__v16hi)_mm256_setzero_si256()); 862} 863 864static __inline__ __m128i __DEFAULT_FN_ATTRS128 865_mm_maskz_max_epi8(__mmask16 __M, __m128i __A, __m128i __B) 866{ 867 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 868 (__v16qi)_mm_max_epi8(__A, __B), 869 (__v16qi)_mm_setzero_si128()); 870} 871 872static __inline__ __m128i __DEFAULT_FN_ATTRS128 873_mm_mask_max_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) 874{ 875 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 876 (__v16qi)_mm_max_epi8(__A, __B), 877 (__v16qi)__W); 878} 879 880static __inline__ __m256i __DEFAULT_FN_ATTRS256 881_mm256_maskz_max_epi8(__mmask32 __M, __m256i __A, __m256i __B) 882{ 883 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, 884 (__v32qi)_mm256_max_epi8(__A, __B), 885 (__v32qi)_mm256_setzero_si256()); 886} 887 888static __inline__ __m256i __DEFAULT_FN_ATTRS256 889_mm256_mask_max_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) 890{ 891 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, 892 (__v32qi)_mm256_max_epi8(__A, __B), 893 (__v32qi)__W); 894} 895 896static __inline__ __m128i __DEFAULT_FN_ATTRS128 897_mm_maskz_max_epi16(__mmask8 __M, __m128i __A, __m128i __B) 898{ 899 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, 900 (__v8hi)_mm_max_epi16(__A, __B), 901 (__v8hi)_mm_setzero_si128()); 902} 903 904static __inline__ __m128i __DEFAULT_FN_ATTRS128 905_mm_mask_max_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) 906{ 907 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, 908 (__v8hi)_mm_max_epi16(__A, __B), 909 (__v8hi)__W); 910} 911 912static __inline__ __m256i __DEFAULT_FN_ATTRS256 913_mm256_maskz_max_epi16(__mmask16 __M, __m256i __A, __m256i __B) 914{ 915 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, 916 (__v16hi)_mm256_max_epi16(__A, __B), 917 (__v16hi)_mm256_setzero_si256()); 918} 919 920static __inline__ __m256i __DEFAULT_FN_ATTRS256 921_mm256_mask_max_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) 922{ 923 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, 924 (__v16hi)_mm256_max_epi16(__A, __B), 925 (__v16hi)__W); 926} 927 928static __inline__ __m128i __DEFAULT_FN_ATTRS128 929_mm_maskz_max_epu8(__mmask16 __M, __m128i __A, __m128i __B) 930{ 931 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 932 (__v16qi)_mm_max_epu8(__A, __B), 933 (__v16qi)_mm_setzero_si128()); 934} 935 936static __inline__ __m128i __DEFAULT_FN_ATTRS128 937_mm_mask_max_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) 938{ 939 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 940 (__v16qi)_mm_max_epu8(__A, __B), 941 (__v16qi)__W); 942} 943 944static __inline__ __m256i __DEFAULT_FN_ATTRS256 945_mm256_maskz_max_epu8 (__mmask32 __M, __m256i __A, __m256i __B) 946{ 947 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, 948 (__v32qi)_mm256_max_epu8(__A, __B), 949 (__v32qi)_mm256_setzero_si256()); 950} 951 952static __inline__ __m256i __DEFAULT_FN_ATTRS256 953_mm256_mask_max_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) 954{ 955 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, 956 (__v32qi)_mm256_max_epu8(__A, __B), 957 (__v32qi)__W); 958} 959 960static __inline__ __m128i __DEFAULT_FN_ATTRS128 961_mm_maskz_max_epu16(__mmask8 __M, __m128i __A, __m128i __B) 962{ 963 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, 964 (__v8hi)_mm_max_epu16(__A, __B), 965 (__v8hi)_mm_setzero_si128()); 966} 967 968static __inline__ __m128i __DEFAULT_FN_ATTRS128 969_mm_mask_max_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) 970{ 971 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, 972 (__v8hi)_mm_max_epu16(__A, __B), 973 (__v8hi)__W); 974} 975 976static __inline__ __m256i __DEFAULT_FN_ATTRS256 977_mm256_maskz_max_epu16(__mmask16 __M, __m256i __A, __m256i __B) 978{ 979 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, 980 (__v16hi)_mm256_max_epu16(__A, __B), 981 (__v16hi)_mm256_setzero_si256()); 982} 983 984static __inline__ __m256i __DEFAULT_FN_ATTRS256 985_mm256_mask_max_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) 986{ 987 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, 988 (__v16hi)_mm256_max_epu16(__A, __B), 989 (__v16hi)__W); 990} 991 992static __inline__ __m128i __DEFAULT_FN_ATTRS128 993_mm_maskz_min_epi8(__mmask16 __M, __m128i __A, __m128i __B) 994{ 995 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 996 (__v16qi)_mm_min_epi8(__A, __B), 997 (__v16qi)_mm_setzero_si128()); 998} 999 1000static __inline__ __m128i __DEFAULT_FN_ATTRS128 1001_mm_mask_min_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) 1002{ 1003 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 1004 (__v16qi)_mm_min_epi8(__A, __B), 1005 (__v16qi)__W); 1006} 1007 1008static __inline__ __m256i __DEFAULT_FN_ATTRS256 1009_mm256_maskz_min_epi8(__mmask32 __M, __m256i __A, __m256i __B) 1010{ 1011 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, 1012 (__v32qi)_mm256_min_epi8(__A, __B), 1013 (__v32qi)_mm256_setzero_si256()); 1014} 1015 1016static __inline__ __m256i __DEFAULT_FN_ATTRS256 1017_mm256_mask_min_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) 1018{ 1019 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, 1020 (__v32qi)_mm256_min_epi8(__A, __B), 1021 (__v32qi)__W); 1022} 1023 1024static __inline__ __m128i __DEFAULT_FN_ATTRS128 1025_mm_maskz_min_epi16(__mmask8 __M, __m128i __A, __m128i __B) 1026{ 1027 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, 1028 (__v8hi)_mm_min_epi16(__A, __B), 1029 (__v8hi)_mm_setzero_si128()); 1030} 1031 1032static __inline__ __m128i __DEFAULT_FN_ATTRS128 1033_mm_mask_min_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) 1034{ 1035 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, 1036 (__v8hi)_mm_min_epi16(__A, __B), 1037 (__v8hi)__W); 1038} 1039 1040static __inline__ __m256i __DEFAULT_FN_ATTRS256 1041_mm256_maskz_min_epi16(__mmask16 __M, __m256i __A, __m256i __B) 1042{ 1043 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, 1044 (__v16hi)_mm256_min_epi16(__A, __B), 1045 (__v16hi)_mm256_setzero_si256()); 1046} 1047 1048static __inline__ __m256i __DEFAULT_FN_ATTRS256 1049_mm256_mask_min_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) 1050{ 1051 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, 1052 (__v16hi)_mm256_min_epi16(__A, __B), 1053 (__v16hi)__W); 1054} 1055 1056static __inline__ __m128i __DEFAULT_FN_ATTRS128 1057_mm_maskz_min_epu8(__mmask16 __M, __m128i __A, __m128i __B) 1058{ 1059 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 1060 (__v16qi)_mm_min_epu8(__A, __B), 1061 (__v16qi)_mm_setzero_si128()); 1062} 1063 1064static __inline__ __m128i __DEFAULT_FN_ATTRS128 1065_mm_mask_min_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) 1066{ 1067 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 1068 (__v16qi)_mm_min_epu8(__A, __B), 1069 (__v16qi)__W); 1070} 1071 1072static __inline__ __m256i __DEFAULT_FN_ATTRS256 1073_mm256_maskz_min_epu8 (__mmask32 __M, __m256i __A, __m256i __B) 1074{ 1075 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, 1076 (__v32qi)_mm256_min_epu8(__A, __B), 1077 (__v32qi)_mm256_setzero_si256()); 1078} 1079 1080static __inline__ __m256i __DEFAULT_FN_ATTRS256 1081_mm256_mask_min_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) 1082{ 1083 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, 1084 (__v32qi)_mm256_min_epu8(__A, __B), 1085 (__v32qi)__W); 1086} 1087 1088static __inline__ __m128i __DEFAULT_FN_ATTRS128 1089_mm_maskz_min_epu16(__mmask8 __M, __m128i __A, __m128i __B) 1090{ 1091 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, 1092 (__v8hi)_mm_min_epu16(__A, __B), 1093 (__v8hi)_mm_setzero_si128()); 1094} 1095 1096static __inline__ __m128i __DEFAULT_FN_ATTRS128 1097_mm_mask_min_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) 1098{ 1099 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, 1100 (__v8hi)_mm_min_epu16(__A, __B), 1101 (__v8hi)__W); 1102} 1103 1104static __inline__ __m256i __DEFAULT_FN_ATTRS256 1105_mm256_maskz_min_epu16(__mmask16 __M, __m256i __A, __m256i __B) 1106{ 1107 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, 1108 (__v16hi)_mm256_min_epu16(__A, __B), 1109 (__v16hi)_mm256_setzero_si256()); 1110} 1111 1112static __inline__ __m256i __DEFAULT_FN_ATTRS256 1113_mm256_mask_min_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) 1114{ 1115 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, 1116 (__v16hi)_mm256_min_epu16(__A, __B), 1117 (__v16hi)__W); 1118} 1119 1120static __inline__ __m128i __DEFAULT_FN_ATTRS128 1121_mm_mask_shuffle_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) 1122{ 1123 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 1124 (__v16qi)_mm_shuffle_epi8(__A, __B), 1125 (__v16qi)__W); 1126} 1127 1128static __inline__ __m128i __DEFAULT_FN_ATTRS128 1129_mm_maskz_shuffle_epi8(__mmask16 __U, __m128i __A, __m128i __B) 1130{ 1131 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 1132 (__v16qi)_mm_shuffle_epi8(__A, __B), 1133 (__v16qi)_mm_setzero_si128()); 1134} 1135 1136static __inline__ __m256i __DEFAULT_FN_ATTRS256 1137_mm256_mask_shuffle_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) 1138{ 1139 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 1140 (__v32qi)_mm256_shuffle_epi8(__A, __B), 1141 (__v32qi)__W); 1142} 1143 1144static __inline__ __m256i __DEFAULT_FN_ATTRS256 1145_mm256_maskz_shuffle_epi8(__mmask32 __U, __m256i __A, __m256i __B) 1146{ 1147 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 1148 (__v32qi)_mm256_shuffle_epi8(__A, __B), 1149 (__v32qi)_mm256_setzero_si256()); 1150} 1151 1152static __inline__ __m128i __DEFAULT_FN_ATTRS128 1153_mm_mask_subs_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) 1154{ 1155 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 1156 (__v16qi)_mm_subs_epi8(__A, __B), 1157 (__v16qi)__W); 1158} 1159 1160static __inline__ __m128i __DEFAULT_FN_ATTRS128 1161_mm_maskz_subs_epi8(__mmask16 __U, __m128i __A, __m128i __B) 1162{ 1163 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 1164 (__v16qi)_mm_subs_epi8(__A, __B), 1165 (__v16qi)_mm_setzero_si128()); 1166} 1167 1168static __inline__ __m256i __DEFAULT_FN_ATTRS256 1169_mm256_mask_subs_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) 1170{ 1171 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 1172 (__v32qi)_mm256_subs_epi8(__A, __B), 1173 (__v32qi)__W); 1174} 1175 1176static __inline__ __m256i __DEFAULT_FN_ATTRS256 1177_mm256_maskz_subs_epi8(__mmask32 __U, __m256i __A, __m256i __B) 1178{ 1179 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 1180 (__v32qi)_mm256_subs_epi8(__A, __B), 1181 (__v32qi)_mm256_setzero_si256()); 1182} 1183 1184static __inline__ __m128i __DEFAULT_FN_ATTRS128 1185_mm_mask_subs_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 1186{ 1187 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 1188 (__v8hi)_mm_subs_epi16(__A, __B), 1189 (__v8hi)__W); 1190} 1191 1192static __inline__ __m128i __DEFAULT_FN_ATTRS128 1193_mm_maskz_subs_epi16(__mmask8 __U, __m128i __A, __m128i __B) 1194{ 1195 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 1196 (__v8hi)_mm_subs_epi16(__A, __B), 1197 (__v8hi)_mm_setzero_si128()); 1198} 1199 1200static __inline__ __m256i __DEFAULT_FN_ATTRS256 1201_mm256_mask_subs_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) 1202{ 1203 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 1204 (__v16hi)_mm256_subs_epi16(__A, __B), 1205 (__v16hi)__W); 1206} 1207 1208static __inline__ __m256i __DEFAULT_FN_ATTRS256 1209_mm256_maskz_subs_epi16(__mmask16 __U, __m256i __A, __m256i __B) 1210{ 1211 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 1212 (__v16hi)_mm256_subs_epi16(__A, __B), 1213 (__v16hi)_mm256_setzero_si256()); 1214} 1215 1216static __inline__ __m128i __DEFAULT_FN_ATTRS128 1217_mm_mask_subs_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) 1218{ 1219 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 1220 (__v16qi)_mm_subs_epu8(__A, __B), 1221 (__v16qi)__W); 1222} 1223 1224static __inline__ __m128i __DEFAULT_FN_ATTRS128 1225_mm_maskz_subs_epu8(__mmask16 __U, __m128i __A, __m128i __B) 1226{ 1227 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 1228 (__v16qi)_mm_subs_epu8(__A, __B), 1229 (__v16qi)_mm_setzero_si128()); 1230} 1231 1232static __inline__ __m256i __DEFAULT_FN_ATTRS256 1233_mm256_mask_subs_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) 1234{ 1235 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 1236 (__v32qi)_mm256_subs_epu8(__A, __B), 1237 (__v32qi)__W); 1238} 1239 1240static __inline__ __m256i __DEFAULT_FN_ATTRS256 1241_mm256_maskz_subs_epu8(__mmask32 __U, __m256i __A, __m256i __B) 1242{ 1243 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 1244 (__v32qi)_mm256_subs_epu8(__A, __B), 1245 (__v32qi)_mm256_setzero_si256()); 1246} 1247 1248static __inline__ __m128i __DEFAULT_FN_ATTRS128 1249_mm_mask_subs_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 1250{ 1251 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 1252 (__v8hi)_mm_subs_epu16(__A, __B), 1253 (__v8hi)__W); 1254} 1255 1256static __inline__ __m128i __DEFAULT_FN_ATTRS128 1257_mm_maskz_subs_epu16(__mmask8 __U, __m128i __A, __m128i __B) 1258{ 1259 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 1260 (__v8hi)_mm_subs_epu16(__A, __B), 1261 (__v8hi)_mm_setzero_si128()); 1262} 1263 1264static __inline__ __m256i __DEFAULT_FN_ATTRS256 1265_mm256_mask_subs_epu16(__m256i __W, __mmask16 __U, __m256i __A, 1266 __m256i __B) { 1267 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 1268 (__v16hi)_mm256_subs_epu16(__A, __B), 1269 (__v16hi)__W); 1270} 1271 1272static __inline__ __m256i __DEFAULT_FN_ATTRS256 1273_mm256_maskz_subs_epu16(__mmask16 __U, __m256i __A, __m256i __B) 1274{ 1275 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 1276 (__v16hi)_mm256_subs_epu16(__A, __B), 1277 (__v16hi)_mm256_setzero_si256()); 1278} 1279 1280static __inline__ __m128i __DEFAULT_FN_ATTRS128 1281_mm_permutex2var_epi16(__m128i __A, __m128i __I, __m128i __B) 1282{ 1283 return (__m128i)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I, 1284 (__v8hi) __B); 1285} 1286 1287static __inline__ __m128i __DEFAULT_FN_ATTRS128 1288_mm_mask_permutex2var_epi16(__m128i __A, __mmask8 __U, __m128i __I, 1289 __m128i __B) 1290{ 1291 return (__m128i)__builtin_ia32_selectw_128(__U, 1292 (__v8hi)_mm_permutex2var_epi16(__A, __I, __B), 1293 (__v8hi)__A); 1294} 1295 1296static __inline__ __m128i __DEFAULT_FN_ATTRS128 1297_mm_mask2_permutex2var_epi16(__m128i __A, __m128i __I, __mmask8 __U, 1298 __m128i __B) 1299{ 1300 return (__m128i)__builtin_ia32_selectw_128(__U, 1301 (__v8hi)_mm_permutex2var_epi16(__A, __I, __B), 1302 (__v8hi)__I); 1303} 1304 1305static __inline__ __m128i __DEFAULT_FN_ATTRS128 1306_mm_maskz_permutex2var_epi16 (__mmask8 __U, __m128i __A, __m128i __I, 1307 __m128i __B) 1308{ 1309 return (__m128i)__builtin_ia32_selectw_128(__U, 1310 (__v8hi)_mm_permutex2var_epi16(__A, __I, __B), 1311 (__v8hi)_mm_setzero_si128()); 1312} 1313 1314static __inline__ __m256i __DEFAULT_FN_ATTRS256 1315_mm256_permutex2var_epi16(__m256i __A, __m256i __I, __m256i __B) 1316{ 1317 return (__m256i)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I, 1318 (__v16hi)__B); 1319} 1320 1321static __inline__ __m256i __DEFAULT_FN_ATTRS256 1322_mm256_mask_permutex2var_epi16(__m256i __A, __mmask16 __U, __m256i __I, 1323 __m256i __B) 1324{ 1325 return (__m256i)__builtin_ia32_selectw_256(__U, 1326 (__v16hi)_mm256_permutex2var_epi16(__A, __I, __B), 1327 (__v16hi)__A); 1328} 1329 1330static __inline__ __m256i __DEFAULT_FN_ATTRS256 1331_mm256_mask2_permutex2var_epi16(__m256i __A, __m256i __I, __mmask16 __U, 1332 __m256i __B) 1333{ 1334 return (__m256i)__builtin_ia32_selectw_256(__U, 1335 (__v16hi)_mm256_permutex2var_epi16(__A, __I, __B), 1336 (__v16hi)__I); 1337} 1338 1339static __inline__ __m256i __DEFAULT_FN_ATTRS256 1340_mm256_maskz_permutex2var_epi16 (__mmask16 __U, __m256i __A, __m256i __I, 1341 __m256i __B) 1342{ 1343 return (__m256i)__builtin_ia32_selectw_256(__U, 1344 (__v16hi)_mm256_permutex2var_epi16(__A, __I, __B), 1345 (__v16hi)_mm256_setzero_si256()); 1346} 1347 1348static __inline__ __m128i __DEFAULT_FN_ATTRS128 1349_mm_mask_maddubs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { 1350 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 1351 (__v8hi)_mm_maddubs_epi16(__X, __Y), 1352 (__v8hi)__W); 1353} 1354 1355static __inline__ __m128i __DEFAULT_FN_ATTRS128 1356_mm_maskz_maddubs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) { 1357 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 1358 (__v8hi)_mm_maddubs_epi16(__X, __Y), 1359 (__v8hi)_mm_setzero_si128()); 1360} 1361 1362static __inline__ __m256i __DEFAULT_FN_ATTRS256 1363_mm256_mask_maddubs_epi16(__m256i __W, __mmask16 __U, __m256i __X, 1364 __m256i __Y) { 1365 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 1366 (__v16hi)_mm256_maddubs_epi16(__X, __Y), 1367 (__v16hi)__W); 1368} 1369 1370static __inline__ __m256i __DEFAULT_FN_ATTRS256 1371_mm256_maskz_maddubs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) { 1372 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 1373 (__v16hi)_mm256_maddubs_epi16(__X, __Y), 1374 (__v16hi)_mm256_setzero_si256()); 1375} 1376 1377static __inline__ __m128i __DEFAULT_FN_ATTRS128 1378_mm_mask_madd_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { 1379 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 1380 (__v4si)_mm_madd_epi16(__A, __B), 1381 (__v4si)__W); 1382} 1383 1384static __inline__ __m128i __DEFAULT_FN_ATTRS128 1385_mm_maskz_madd_epi16(__mmask8 __U, __m128i __A, __m128i __B) { 1386 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 1387 (__v4si)_mm_madd_epi16(__A, __B), 1388 (__v4si)_mm_setzero_si128()); 1389} 1390 1391static __inline__ __m256i __DEFAULT_FN_ATTRS256 1392_mm256_mask_madd_epi16(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { 1393 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 1394 (__v8si)_mm256_madd_epi16(__A, __B), 1395 (__v8si)__W); 1396} 1397 1398static __inline__ __m256i __DEFAULT_FN_ATTRS256 1399_mm256_maskz_madd_epi16(__mmask8 __U, __m256i __A, __m256i __B) { 1400 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 1401 (__v8si)_mm256_madd_epi16(__A, __B), 1402 (__v8si)_mm256_setzero_si256()); 1403} 1404 1405static __inline__ __m128i __DEFAULT_FN_ATTRS128 1406_mm_cvtsepi16_epi8 (__m128i __A) { 1407 return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A, 1408 (__v16qi) _mm_setzero_si128(), 1409 (__mmask8) -1); 1410} 1411 1412static __inline__ __m128i __DEFAULT_FN_ATTRS128 1413_mm_mask_cvtsepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { 1414 return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A, 1415 (__v16qi) __O, 1416 __M); 1417} 1418 1419static __inline__ __m128i __DEFAULT_FN_ATTRS128 1420_mm_maskz_cvtsepi16_epi8 (__mmask8 __M, __m128i __A) { 1421 return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A, 1422 (__v16qi) _mm_setzero_si128(), 1423 __M); 1424} 1425 1426static __inline__ __m128i __DEFAULT_FN_ATTRS256 1427_mm256_cvtsepi16_epi8 (__m256i __A) { 1428 return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A, 1429 (__v16qi) _mm_setzero_si128(), 1430 (__mmask16) -1); 1431} 1432 1433static __inline__ __m128i __DEFAULT_FN_ATTRS256 1434_mm256_mask_cvtsepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) { 1435 return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A, 1436 (__v16qi) __O, 1437 __M); 1438} 1439 1440static __inline__ __m128i __DEFAULT_FN_ATTRS256 1441_mm256_maskz_cvtsepi16_epi8 (__mmask16 __M, __m256i __A) { 1442 return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A, 1443 (__v16qi) _mm_setzero_si128(), 1444 __M); 1445} 1446 1447static __inline__ __m128i __DEFAULT_FN_ATTRS128 1448_mm_cvtusepi16_epi8 (__m128i __A) { 1449 return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A, 1450 (__v16qi) _mm_setzero_si128(), 1451 (__mmask8) -1); 1452} 1453 1454static __inline__ __m128i __DEFAULT_FN_ATTRS128 1455_mm_mask_cvtusepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { 1456 return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A, 1457 (__v16qi) __O, 1458 __M); 1459} 1460 1461static __inline__ __m128i __DEFAULT_FN_ATTRS128 1462_mm_maskz_cvtusepi16_epi8 (__mmask8 __M, __m128i __A) { 1463 return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A, 1464 (__v16qi) _mm_setzero_si128(), 1465 __M); 1466} 1467 1468static __inline__ __m128i __DEFAULT_FN_ATTRS256 1469_mm256_cvtusepi16_epi8 (__m256i __A) { 1470 return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A, 1471 (__v16qi) _mm_setzero_si128(), 1472 (__mmask16) -1); 1473} 1474 1475static __inline__ __m128i __DEFAULT_FN_ATTRS256 1476_mm256_mask_cvtusepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) { 1477 return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A, 1478 (__v16qi) __O, 1479 __M); 1480} 1481 1482static __inline__ __m128i __DEFAULT_FN_ATTRS256 1483_mm256_maskz_cvtusepi16_epi8 (__mmask16 __M, __m256i __A) { 1484 return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A, 1485 (__v16qi) _mm_setzero_si128(), 1486 __M); 1487} 1488 1489static __inline__ __m128i __DEFAULT_FN_ATTRS128 1490_mm_cvtepi16_epi8 (__m128i __A) { 1491 return (__m128i)__builtin_shufflevector( 1492 __builtin_convertvector((__v8hi)__A, __v8qi), 1493 (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 1494 12, 13, 14, 15); 1495} 1496 1497static __inline__ __m128i __DEFAULT_FN_ATTRS128 1498_mm_mask_cvtepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { 1499 return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A, 1500 (__v16qi) __O, 1501 __M); 1502} 1503 1504static __inline__ __m128i __DEFAULT_FN_ATTRS128 1505_mm_maskz_cvtepi16_epi8 (__mmask8 __M, __m128i __A) { 1506 return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A, 1507 (__v16qi) _mm_setzero_si128(), 1508 __M); 1509} 1510 1511static __inline__ void __DEFAULT_FN_ATTRS128 1512_mm_mask_cvtepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 1513{ 1514 __builtin_ia32_pmovwb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M); 1515} 1516 1517 1518static __inline__ void __DEFAULT_FN_ATTRS128 1519_mm_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 1520{ 1521 __builtin_ia32_pmovswb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M); 1522} 1523 1524static __inline__ void __DEFAULT_FN_ATTRS128 1525_mm_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 1526{ 1527 __builtin_ia32_pmovuswb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M); 1528} 1529 1530static __inline__ __m128i __DEFAULT_FN_ATTRS256 1531_mm256_cvtepi16_epi8 (__m256i __A) { 1532 return (__m128i)__builtin_convertvector((__v16hi) __A, __v16qi); 1533} 1534 1535static __inline__ __m128i __DEFAULT_FN_ATTRS256 1536_mm256_mask_cvtepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) { 1537 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 1538 (__v16qi)_mm256_cvtepi16_epi8(__A), 1539 (__v16qi)__O); 1540} 1541 1542static __inline__ __m128i __DEFAULT_FN_ATTRS256 1543_mm256_maskz_cvtepi16_epi8 (__mmask16 __M, __m256i __A) { 1544 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, 1545 (__v16qi)_mm256_cvtepi16_epi8(__A), 1546 (__v16qi)_mm_setzero_si128()); 1547} 1548 1549static __inline__ void __DEFAULT_FN_ATTRS256 1550_mm256_mask_cvtepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A) 1551{ 1552 __builtin_ia32_pmovwb256mem_mask ((__v16qi *) __P, (__v16hi) __A, __M); 1553} 1554 1555static __inline__ void __DEFAULT_FN_ATTRS256 1556_mm256_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A) 1557{ 1558 __builtin_ia32_pmovswb256mem_mask ((__v16qi *) __P, (__v16hi) __A, __M); 1559} 1560 1561static __inline__ void __DEFAULT_FN_ATTRS256 1562_mm256_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A) 1563{ 1564 __builtin_ia32_pmovuswb256mem_mask ((__v16qi*) __P, (__v16hi) __A, __M); 1565} 1566 1567static __inline__ __m128i __DEFAULT_FN_ATTRS128 1568_mm_mask_mulhrs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { 1569 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 1570 (__v8hi)_mm_mulhrs_epi16(__X, __Y), 1571 (__v8hi)__W); 1572} 1573 1574static __inline__ __m128i __DEFAULT_FN_ATTRS128 1575_mm_maskz_mulhrs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) { 1576 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 1577 (__v8hi)_mm_mulhrs_epi16(__X, __Y), 1578 (__v8hi)_mm_setzero_si128()); 1579} 1580 1581static __inline__ __m256i __DEFAULT_FN_ATTRS256 1582_mm256_mask_mulhrs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y) { 1583 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 1584 (__v16hi)_mm256_mulhrs_epi16(__X, __Y), 1585 (__v16hi)__W); 1586} 1587 1588static __inline__ __m256i __DEFAULT_FN_ATTRS256 1589_mm256_maskz_mulhrs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) { 1590 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 1591 (__v16hi)_mm256_mulhrs_epi16(__X, __Y), 1592 (__v16hi)_mm256_setzero_si256()); 1593} 1594 1595static __inline__ __m128i __DEFAULT_FN_ATTRS128 1596_mm_mask_mulhi_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { 1597 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 1598 (__v8hi)_mm_mulhi_epu16(__A, __B), 1599 (__v8hi)__W); 1600} 1601 1602static __inline__ __m128i __DEFAULT_FN_ATTRS128 1603_mm_maskz_mulhi_epu16(__mmask8 __U, __m128i __A, __m128i __B) { 1604 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 1605 (__v8hi)_mm_mulhi_epu16(__A, __B), 1606 (__v8hi)_mm_setzero_si128()); 1607} 1608 1609static __inline__ __m256i __DEFAULT_FN_ATTRS256 1610_mm256_mask_mulhi_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { 1611 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 1612 (__v16hi)_mm256_mulhi_epu16(__A, __B), 1613 (__v16hi)__W); 1614} 1615 1616static __inline__ __m256i __DEFAULT_FN_ATTRS256 1617_mm256_maskz_mulhi_epu16(__mmask16 __U, __m256i __A, __m256i __B) { 1618 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 1619 (__v16hi)_mm256_mulhi_epu16(__A, __B), 1620 (__v16hi)_mm256_setzero_si256()); 1621} 1622 1623static __inline__ __m128i __DEFAULT_FN_ATTRS128 1624_mm_mask_mulhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { 1625 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 1626 (__v8hi)_mm_mulhi_epi16(__A, __B), 1627 (__v8hi)__W); 1628} 1629 1630static __inline__ __m128i __DEFAULT_FN_ATTRS128 1631_mm_maskz_mulhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) { 1632 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 1633 (__v8hi)_mm_mulhi_epi16(__A, __B), 1634 (__v8hi)_mm_setzero_si128()); 1635} 1636 1637static __inline__ __m256i __DEFAULT_FN_ATTRS256 1638_mm256_mask_mulhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { 1639 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 1640 (__v16hi)_mm256_mulhi_epi16(__A, __B), 1641 (__v16hi)__W); 1642} 1643 1644static __inline__ __m256i __DEFAULT_FN_ATTRS256 1645_mm256_maskz_mulhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) { 1646 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 1647 (__v16hi)_mm256_mulhi_epi16(__A, __B), 1648 (__v16hi)_mm256_setzero_si256()); 1649} 1650 1651static __inline__ __m128i __DEFAULT_FN_ATTRS128 1652_mm_mask_unpackhi_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { 1653 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 1654 (__v16qi)_mm_unpackhi_epi8(__A, __B), 1655 (__v16qi)__W); 1656} 1657 1658static __inline__ __m128i __DEFAULT_FN_ATTRS128 1659_mm_maskz_unpackhi_epi8(__mmask16 __U, __m128i __A, __m128i __B) { 1660 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 1661 (__v16qi)_mm_unpackhi_epi8(__A, __B), 1662 (__v16qi)_mm_setzero_si128()); 1663} 1664 1665static __inline__ __m256i __DEFAULT_FN_ATTRS256 1666_mm256_mask_unpackhi_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { 1667 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 1668 (__v32qi)_mm256_unpackhi_epi8(__A, __B), 1669 (__v32qi)__W); 1670} 1671 1672static __inline__ __m256i __DEFAULT_FN_ATTRS256 1673_mm256_maskz_unpackhi_epi8(__mmask32 __U, __m256i __A, __m256i __B) { 1674 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 1675 (__v32qi)_mm256_unpackhi_epi8(__A, __B), 1676 (__v32qi)_mm256_setzero_si256()); 1677} 1678 1679static __inline__ __m128i __DEFAULT_FN_ATTRS128 1680_mm_mask_unpackhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { 1681 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 1682 (__v8hi)_mm_unpackhi_epi16(__A, __B), 1683 (__v8hi)__W); 1684} 1685 1686static __inline__ __m128i __DEFAULT_FN_ATTRS128 1687_mm_maskz_unpackhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) { 1688 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 1689 (__v8hi)_mm_unpackhi_epi16(__A, __B), 1690 (__v8hi) _mm_setzero_si128()); 1691} 1692 1693static __inline__ __m256i __DEFAULT_FN_ATTRS256 1694_mm256_mask_unpackhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { 1695 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 1696 (__v16hi)_mm256_unpackhi_epi16(__A, __B), 1697 (__v16hi)__W); 1698} 1699 1700static __inline__ __m256i __DEFAULT_FN_ATTRS256 1701_mm256_maskz_unpackhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) { 1702 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 1703 (__v16hi)_mm256_unpackhi_epi16(__A, __B), 1704 (__v16hi)_mm256_setzero_si256()); 1705} 1706 1707static __inline__ __m128i __DEFAULT_FN_ATTRS128 1708_mm_mask_unpacklo_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { 1709 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 1710 (__v16qi)_mm_unpacklo_epi8(__A, __B), 1711 (__v16qi)__W); 1712} 1713 1714static __inline__ __m128i __DEFAULT_FN_ATTRS128 1715_mm_maskz_unpacklo_epi8(__mmask16 __U, __m128i __A, __m128i __B) { 1716 return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, 1717 (__v16qi)_mm_unpacklo_epi8(__A, __B), 1718 (__v16qi)_mm_setzero_si128()); 1719} 1720 1721static __inline__ __m256i __DEFAULT_FN_ATTRS256 1722_mm256_mask_unpacklo_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { 1723 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 1724 (__v32qi)_mm256_unpacklo_epi8(__A, __B), 1725 (__v32qi)__W); 1726} 1727 1728static __inline__ __m256i __DEFAULT_FN_ATTRS256 1729_mm256_maskz_unpacklo_epi8(__mmask32 __U, __m256i __A, __m256i __B) { 1730 return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, 1731 (__v32qi)_mm256_unpacklo_epi8(__A, __B), 1732 (__v32qi)_mm256_setzero_si256()); 1733} 1734 1735static __inline__ __m128i __DEFAULT_FN_ATTRS128 1736_mm_mask_unpacklo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { 1737 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 1738 (__v8hi)_mm_unpacklo_epi16(__A, __B), 1739 (__v8hi)__W); 1740} 1741 1742static __inline__ __m128i __DEFAULT_FN_ATTRS128 1743_mm_maskz_unpacklo_epi16(__mmask8 __U, __m128i __A, __m128i __B) { 1744 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 1745 (__v8hi)_mm_unpacklo_epi16(__A, __B), 1746 (__v8hi) _mm_setzero_si128()); 1747} 1748 1749static __inline__ __m256i __DEFAULT_FN_ATTRS256 1750_mm256_mask_unpacklo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { 1751 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 1752 (__v16hi)_mm256_unpacklo_epi16(__A, __B), 1753 (__v16hi)__W); 1754} 1755 1756static __inline__ __m256i __DEFAULT_FN_ATTRS256 1757_mm256_maskz_unpacklo_epi16(__mmask16 __U, __m256i __A, __m256i __B) { 1758 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 1759 (__v16hi)_mm256_unpacklo_epi16(__A, __B), 1760 (__v16hi)_mm256_setzero_si256()); 1761} 1762 1763static __inline__ __m128i __DEFAULT_FN_ATTRS128 1764_mm_mask_cvtepi8_epi16(__m128i __W, __mmask8 __U, __m128i __A) 1765{ 1766 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 1767 (__v8hi)_mm_cvtepi8_epi16(__A), 1768 (__v8hi)__W); 1769} 1770 1771static __inline__ __m128i __DEFAULT_FN_ATTRS128 1772_mm_maskz_cvtepi8_epi16(__mmask8 __U, __m128i __A) 1773{ 1774 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 1775 (__v8hi)_mm_cvtepi8_epi16(__A), 1776 (__v8hi)_mm_setzero_si128()); 1777} 1778 1779static __inline__ __m256i __DEFAULT_FN_ATTRS256 1780_mm256_mask_cvtepi8_epi16(__m256i __W, __mmask16 __U, __m128i __A) 1781{ 1782 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 1783 (__v16hi)_mm256_cvtepi8_epi16(__A), 1784 (__v16hi)__W); 1785} 1786 1787static __inline__ __m256i __DEFAULT_FN_ATTRS256 1788_mm256_maskz_cvtepi8_epi16(__mmask16 __U, __m128i __A) 1789{ 1790 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 1791 (__v16hi)_mm256_cvtepi8_epi16(__A), 1792 (__v16hi)_mm256_setzero_si256()); 1793} 1794 1795 1796static __inline__ __m128i __DEFAULT_FN_ATTRS128 1797_mm_mask_cvtepu8_epi16(__m128i __W, __mmask8 __U, __m128i __A) 1798{ 1799 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 1800 (__v8hi)_mm_cvtepu8_epi16(__A), 1801 (__v8hi)__W); 1802} 1803 1804static __inline__ __m128i __DEFAULT_FN_ATTRS128 1805_mm_maskz_cvtepu8_epi16(__mmask8 __U, __m128i __A) 1806{ 1807 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 1808 (__v8hi)_mm_cvtepu8_epi16(__A), 1809 (__v8hi)_mm_setzero_si128()); 1810} 1811 1812static __inline__ __m256i __DEFAULT_FN_ATTRS256 1813_mm256_mask_cvtepu8_epi16(__m256i __W, __mmask16 __U, __m128i __A) 1814{ 1815 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 1816 (__v16hi)_mm256_cvtepu8_epi16(__A), 1817 (__v16hi)__W); 1818} 1819 1820static __inline__ __m256i __DEFAULT_FN_ATTRS256 1821_mm256_maskz_cvtepu8_epi16 (__mmask16 __U, __m128i __A) 1822{ 1823 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 1824 (__v16hi)_mm256_cvtepu8_epi16(__A), 1825 (__v16hi)_mm256_setzero_si256()); 1826} 1827 1828 1829#define _mm_mask_shufflehi_epi16(W, U, A, imm) \ 1830 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 1831 (__v8hi)_mm_shufflehi_epi16((A), (imm)), \ 1832 (__v8hi)(__m128i)(W))) 1833 1834#define _mm_maskz_shufflehi_epi16(U, A, imm) \ 1835 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 1836 (__v8hi)_mm_shufflehi_epi16((A), (imm)), \ 1837 (__v8hi)_mm_setzero_si128())) 1838 1839#define _mm256_mask_shufflehi_epi16(W, U, A, imm) \ 1840 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 1841 (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \ 1842 (__v16hi)(__m256i)(W))) 1843 1844#define _mm256_maskz_shufflehi_epi16(U, A, imm) \ 1845 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 1846 (__v16hi)_mm256_shufflehi_epi16((A), (imm)), \ 1847 (__v16hi)_mm256_setzero_si256())) 1848 1849#define _mm_mask_shufflelo_epi16(W, U, A, imm) \ 1850 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 1851 (__v8hi)_mm_shufflelo_epi16((A), (imm)), \ 1852 (__v8hi)(__m128i)(W))) 1853 1854#define _mm_maskz_shufflelo_epi16(U, A, imm) \ 1855 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 1856 (__v8hi)_mm_shufflelo_epi16((A), (imm)), \ 1857 (__v8hi)_mm_setzero_si128())) 1858 1859#define _mm256_mask_shufflelo_epi16(W, U, A, imm) \ 1860 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 1861 (__v16hi)_mm256_shufflelo_epi16((A), \ 1862 (imm)), \ 1863 (__v16hi)(__m256i)(W))) 1864 1865#define _mm256_maskz_shufflelo_epi16(U, A, imm) \ 1866 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 1867 (__v16hi)_mm256_shufflelo_epi16((A), \ 1868 (imm)), \ 1869 (__v16hi)_mm256_setzero_si256())) 1870 1871static __inline__ __m256i __DEFAULT_FN_ATTRS256 1872_mm256_sllv_epi16(__m256i __A, __m256i __B) 1873{ 1874 return (__m256i)__builtin_ia32_psllv16hi((__v16hi)__A, (__v16hi)__B); 1875} 1876 1877static __inline__ __m256i __DEFAULT_FN_ATTRS256 1878_mm256_mask_sllv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) 1879{ 1880 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 1881 (__v16hi)_mm256_sllv_epi16(__A, __B), 1882 (__v16hi)__W); 1883} 1884 1885static __inline__ __m256i __DEFAULT_FN_ATTRS256 1886_mm256_maskz_sllv_epi16(__mmask16 __U, __m256i __A, __m256i __B) 1887{ 1888 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 1889 (__v16hi)_mm256_sllv_epi16(__A, __B), 1890 (__v16hi)_mm256_setzero_si256()); 1891} 1892 1893static __inline__ __m128i __DEFAULT_FN_ATTRS128 1894_mm_sllv_epi16(__m128i __A, __m128i __B) 1895{ 1896 return (__m128i)__builtin_ia32_psllv8hi((__v8hi)__A, (__v8hi)__B); 1897} 1898 1899static __inline__ __m128i __DEFAULT_FN_ATTRS128 1900_mm_mask_sllv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 1901{ 1902 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 1903 (__v8hi)_mm_sllv_epi16(__A, __B), 1904 (__v8hi)__W); 1905} 1906 1907static __inline__ __m128i __DEFAULT_FN_ATTRS128 1908_mm_maskz_sllv_epi16(__mmask8 __U, __m128i __A, __m128i __B) 1909{ 1910 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 1911 (__v8hi)_mm_sllv_epi16(__A, __B), 1912 (__v8hi)_mm_setzero_si128()); 1913} 1914 1915static __inline__ __m128i __DEFAULT_FN_ATTRS128 1916_mm_mask_sll_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 1917{ 1918 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 1919 (__v8hi)_mm_sll_epi16(__A, __B), 1920 (__v8hi)__W); 1921} 1922 1923static __inline__ __m128i __DEFAULT_FN_ATTRS128 1924_mm_maskz_sll_epi16 (__mmask8 __U, __m128i __A, __m128i __B) 1925{ 1926 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 1927 (__v8hi)_mm_sll_epi16(__A, __B), 1928 (__v8hi)_mm_setzero_si128()); 1929} 1930 1931static __inline__ __m256i __DEFAULT_FN_ATTRS256 1932_mm256_mask_sll_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B) 1933{ 1934 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 1935 (__v16hi)_mm256_sll_epi16(__A, __B), 1936 (__v16hi)__W); 1937} 1938 1939static __inline__ __m256i __DEFAULT_FN_ATTRS256 1940_mm256_maskz_sll_epi16(__mmask16 __U, __m256i __A, __m128i __B) 1941{ 1942 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 1943 (__v16hi)_mm256_sll_epi16(__A, __B), 1944 (__v16hi)_mm256_setzero_si256()); 1945} 1946 1947static __inline__ __m128i __DEFAULT_FN_ATTRS128 1948_mm_mask_slli_epi16(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) 1949{ 1950 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 1951 (__v8hi)_mm_slli_epi16(__A, (int)__B), 1952 (__v8hi)__W); 1953} 1954 1955static __inline__ __m128i __DEFAULT_FN_ATTRS128 1956_mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B) 1957{ 1958 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 1959 (__v8hi)_mm_slli_epi16(__A, (int)__B), 1960 (__v8hi)_mm_setzero_si128()); 1961} 1962 1963static __inline__ __m256i __DEFAULT_FN_ATTRS256 1964_mm256_mask_slli_epi16(__m256i __W, __mmask16 __U, __m256i __A, 1965 unsigned int __B) 1966{ 1967 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 1968 (__v16hi)_mm256_slli_epi16(__A, (int)__B), 1969 (__v16hi)__W); 1970} 1971 1972static __inline__ __m256i __DEFAULT_FN_ATTRS256 1973_mm256_maskz_slli_epi16(__mmask16 __U, __m256i __A, unsigned int __B) 1974{ 1975 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 1976 (__v16hi)_mm256_slli_epi16(__A, (int)__B), 1977 (__v16hi)_mm256_setzero_si256()); 1978} 1979 1980static __inline__ __m256i __DEFAULT_FN_ATTRS256 1981_mm256_srlv_epi16(__m256i __A, __m256i __B) 1982{ 1983 return (__m256i)__builtin_ia32_psrlv16hi((__v16hi)__A, (__v16hi)__B); 1984} 1985 1986static __inline__ __m256i __DEFAULT_FN_ATTRS256 1987_mm256_mask_srlv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) 1988{ 1989 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 1990 (__v16hi)_mm256_srlv_epi16(__A, __B), 1991 (__v16hi)__W); 1992} 1993 1994static __inline__ __m256i __DEFAULT_FN_ATTRS256 1995_mm256_maskz_srlv_epi16(__mmask16 __U, __m256i __A, __m256i __B) 1996{ 1997 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 1998 (__v16hi)_mm256_srlv_epi16(__A, __B), 1999 (__v16hi)_mm256_setzero_si256()); 2000} 2001 2002static __inline__ __m128i __DEFAULT_FN_ATTRS128 2003_mm_srlv_epi16(__m128i __A, __m128i __B) 2004{ 2005 return (__m128i)__builtin_ia32_psrlv8hi((__v8hi)__A, (__v8hi)__B); 2006} 2007 2008static __inline__ __m128i __DEFAULT_FN_ATTRS128 2009_mm_mask_srlv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 2010{ 2011 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 2012 (__v8hi)_mm_srlv_epi16(__A, __B), 2013 (__v8hi)__W); 2014} 2015 2016static __inline__ __m128i __DEFAULT_FN_ATTRS128 2017_mm_maskz_srlv_epi16(__mmask8 __U, __m128i __A, __m128i __B) 2018{ 2019 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 2020 (__v8hi)_mm_srlv_epi16(__A, __B), 2021 (__v8hi)_mm_setzero_si128()); 2022} 2023 2024static __inline__ __m256i __DEFAULT_FN_ATTRS256 2025_mm256_srav_epi16(__m256i __A, __m256i __B) 2026{ 2027 return (__m256i)__builtin_ia32_psrav16hi((__v16hi)__A, (__v16hi)__B); 2028} 2029 2030static __inline__ __m256i __DEFAULT_FN_ATTRS256 2031_mm256_mask_srav_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) 2032{ 2033 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 2034 (__v16hi)_mm256_srav_epi16(__A, __B), 2035 (__v16hi)__W); 2036} 2037 2038static __inline__ __m256i __DEFAULT_FN_ATTRS256 2039_mm256_maskz_srav_epi16(__mmask16 __U, __m256i __A, __m256i __B) 2040{ 2041 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 2042 (__v16hi)_mm256_srav_epi16(__A, __B), 2043 (__v16hi)_mm256_setzero_si256()); 2044} 2045 2046static __inline__ __m128i __DEFAULT_FN_ATTRS128 2047_mm_srav_epi16(__m128i __A, __m128i __B) 2048{ 2049 return (__m128i)__builtin_ia32_psrav8hi((__v8hi)__A, (__v8hi)__B); 2050} 2051 2052static __inline__ __m128i __DEFAULT_FN_ATTRS128 2053_mm_mask_srav_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 2054{ 2055 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 2056 (__v8hi)_mm_srav_epi16(__A, __B), 2057 (__v8hi)__W); 2058} 2059 2060static __inline__ __m128i __DEFAULT_FN_ATTRS128 2061_mm_maskz_srav_epi16(__mmask8 __U, __m128i __A, __m128i __B) 2062{ 2063 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 2064 (__v8hi)_mm_srav_epi16(__A, __B), 2065 (__v8hi)_mm_setzero_si128()); 2066} 2067 2068static __inline__ __m128i __DEFAULT_FN_ATTRS128 2069_mm_mask_sra_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 2070{ 2071 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 2072 (__v8hi)_mm_sra_epi16(__A, __B), 2073 (__v8hi)__W); 2074} 2075 2076static __inline__ __m128i __DEFAULT_FN_ATTRS128 2077_mm_maskz_sra_epi16(__mmask8 __U, __m128i __A, __m128i __B) 2078{ 2079 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 2080 (__v8hi)_mm_sra_epi16(__A, __B), 2081 (__v8hi)_mm_setzero_si128()); 2082} 2083 2084static __inline__ __m256i __DEFAULT_FN_ATTRS256 2085_mm256_mask_sra_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B) 2086{ 2087 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 2088 (__v16hi)_mm256_sra_epi16(__A, __B), 2089 (__v16hi)__W); 2090} 2091 2092static __inline__ __m256i __DEFAULT_FN_ATTRS256 2093_mm256_maskz_sra_epi16(__mmask16 __U, __m256i __A, __m128i __B) 2094{ 2095 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 2096 (__v16hi)_mm256_sra_epi16(__A, __B), 2097 (__v16hi)_mm256_setzero_si256()); 2098} 2099 2100static __inline__ __m128i __DEFAULT_FN_ATTRS128 2101_mm_mask_srai_epi16(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) 2102{ 2103 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 2104 (__v8hi)_mm_srai_epi16(__A, (int)__B), 2105 (__v8hi)__W); 2106} 2107 2108static __inline__ __m128i __DEFAULT_FN_ATTRS128 2109_mm_maskz_srai_epi16(__mmask8 __U, __m128i __A, unsigned int __B) 2110{ 2111 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 2112 (__v8hi)_mm_srai_epi16(__A, (int)__B), 2113 (__v8hi)_mm_setzero_si128()); 2114} 2115 2116static __inline__ __m256i __DEFAULT_FN_ATTRS256 2117_mm256_mask_srai_epi16(__m256i __W, __mmask16 __U, __m256i __A, 2118 unsigned int __B) 2119{ 2120 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 2121 (__v16hi)_mm256_srai_epi16(__A, (int)__B), 2122 (__v16hi)__W); 2123} 2124 2125static __inline__ __m256i __DEFAULT_FN_ATTRS256 2126_mm256_maskz_srai_epi16(__mmask16 __U, __m256i __A, unsigned int __B) 2127{ 2128 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 2129 (__v16hi)_mm256_srai_epi16(__A, (int)__B), 2130 (__v16hi)_mm256_setzero_si256()); 2131} 2132 2133static __inline__ __m128i __DEFAULT_FN_ATTRS128 2134_mm_mask_srl_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 2135{ 2136 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 2137 (__v8hi)_mm_srl_epi16(__A, __B), 2138 (__v8hi)__W); 2139} 2140 2141static __inline__ __m128i __DEFAULT_FN_ATTRS128 2142_mm_maskz_srl_epi16 (__mmask8 __U, __m128i __A, __m128i __B) 2143{ 2144 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 2145 (__v8hi)_mm_srl_epi16(__A, __B), 2146 (__v8hi)_mm_setzero_si128()); 2147} 2148 2149static __inline__ __m256i __DEFAULT_FN_ATTRS256 2150_mm256_mask_srl_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B) 2151{ 2152 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 2153 (__v16hi)_mm256_srl_epi16(__A, __B), 2154 (__v16hi)__W); 2155} 2156 2157static __inline__ __m256i __DEFAULT_FN_ATTRS256 2158_mm256_maskz_srl_epi16(__mmask16 __U, __m256i __A, __m128i __B) 2159{ 2160 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 2161 (__v16hi)_mm256_srl_epi16(__A, __B), 2162 (__v16hi)_mm256_setzero_si256()); 2163} 2164 2165static __inline__ __m128i __DEFAULT_FN_ATTRS128 2166_mm_mask_srli_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B) 2167{ 2168 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 2169 (__v8hi)_mm_srli_epi16(__A, __B), 2170 (__v8hi)__W); 2171} 2172 2173static __inline__ __m128i __DEFAULT_FN_ATTRS128 2174_mm_maskz_srli_epi16 (__mmask8 __U, __m128i __A, int __B) 2175{ 2176 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, 2177 (__v8hi)_mm_srli_epi16(__A, __B), 2178 (__v8hi)_mm_setzero_si128()); 2179} 2180 2181static __inline__ __m256i __DEFAULT_FN_ATTRS256 2182_mm256_mask_srli_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B) 2183{ 2184 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 2185 (__v16hi)_mm256_srli_epi16(__A, __B), 2186 (__v16hi)__W); 2187} 2188 2189static __inline__ __m256i __DEFAULT_FN_ATTRS256 2190_mm256_maskz_srli_epi16(__mmask16 __U, __m256i __A, int __B) 2191{ 2192 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, 2193 (__v16hi)_mm256_srli_epi16(__A, __B), 2194 (__v16hi)_mm256_setzero_si256()); 2195} 2196 2197static __inline__ __m128i __DEFAULT_FN_ATTRS128 2198_mm_mask_mov_epi16 (__m128i __W, __mmask8 __U, __m128i __A) 2199{ 2200 return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U, 2201 (__v8hi) __A, 2202 (__v8hi) __W); 2203} 2204 2205static __inline__ __m128i __DEFAULT_FN_ATTRS128 2206_mm_maskz_mov_epi16 (__mmask8 __U, __m128i __A) 2207{ 2208 return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U, 2209 (__v8hi) __A, 2210 (__v8hi) _mm_setzero_si128 ()); 2211} 2212 2213static __inline__ __m256i __DEFAULT_FN_ATTRS256 2214_mm256_mask_mov_epi16 (__m256i __W, __mmask16 __U, __m256i __A) 2215{ 2216 return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U, 2217 (__v16hi) __A, 2218 (__v16hi) __W); 2219} 2220 2221static __inline__ __m256i __DEFAULT_FN_ATTRS256 2222_mm256_maskz_mov_epi16 (__mmask16 __U, __m256i __A) 2223{ 2224 return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U, 2225 (__v16hi) __A, 2226 (__v16hi) _mm256_setzero_si256 ()); 2227} 2228 2229static __inline__ __m128i __DEFAULT_FN_ATTRS128 2230_mm_mask_mov_epi8 (__m128i __W, __mmask16 __U, __m128i __A) 2231{ 2232 return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U, 2233 (__v16qi) __A, 2234 (__v16qi) __W); 2235} 2236 2237static __inline__ __m128i __DEFAULT_FN_ATTRS128 2238_mm_maskz_mov_epi8 (__mmask16 __U, __m128i __A) 2239{ 2240 return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U, 2241 (__v16qi) __A, 2242 (__v16qi) _mm_setzero_si128 ()); 2243} 2244 2245static __inline__ __m256i __DEFAULT_FN_ATTRS256 2246_mm256_mask_mov_epi8 (__m256i __W, __mmask32 __U, __m256i __A) 2247{ 2248 return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U, 2249 (__v32qi) __A, 2250 (__v32qi) __W); 2251} 2252 2253static __inline__ __m256i __DEFAULT_FN_ATTRS256 2254_mm256_maskz_mov_epi8 (__mmask32 __U, __m256i __A) 2255{ 2256 return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U, 2257 (__v32qi) __A, 2258 (__v32qi) _mm256_setzero_si256 ()); 2259} 2260 2261 2262static __inline__ __m128i __DEFAULT_FN_ATTRS128 2263_mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A) 2264{ 2265 return (__m128i) __builtin_ia32_selectb_128(__M, 2266 (__v16qi) _mm_set1_epi8(__A), 2267 (__v16qi) __O); 2268} 2269 2270static __inline__ __m128i __DEFAULT_FN_ATTRS128 2271_mm_maskz_set1_epi8 (__mmask16 __M, char __A) 2272{ 2273 return (__m128i) __builtin_ia32_selectb_128(__M, 2274 (__v16qi) _mm_set1_epi8(__A), 2275 (__v16qi) _mm_setzero_si128()); 2276} 2277 2278static __inline__ __m256i __DEFAULT_FN_ATTRS256 2279_mm256_mask_set1_epi8 (__m256i __O, __mmask32 __M, char __A) 2280{ 2281 return (__m256i) __builtin_ia32_selectb_256(__M, 2282 (__v32qi) _mm256_set1_epi8(__A), 2283 (__v32qi) __O); 2284} 2285 2286static __inline__ __m256i __DEFAULT_FN_ATTRS256 2287_mm256_maskz_set1_epi8 (__mmask32 __M, char __A) 2288{ 2289 return (__m256i) __builtin_ia32_selectb_256(__M, 2290 (__v32qi) _mm256_set1_epi8(__A), 2291 (__v32qi) _mm256_setzero_si256()); 2292} 2293 2294static __inline __m128i __DEFAULT_FN_ATTRS128 2295_mm_loadu_epi16 (void const *__P) 2296{ 2297 struct __loadu_epi16 { 2298 __m128i_u __v; 2299 } __attribute__((__packed__, __may_alias__)); 2300 return ((const struct __loadu_epi16*)__P)->__v; 2301} 2302 2303static __inline__ __m128i __DEFAULT_FN_ATTRS128 2304_mm_mask_loadu_epi16 (__m128i __W, __mmask8 __U, void const *__P) 2305{ 2306 return (__m128i) __builtin_ia32_loaddquhi128_mask ((const __v8hi *) __P, 2307 (__v8hi) __W, 2308 (__mmask8) __U); 2309} 2310 2311static __inline__ __m128i __DEFAULT_FN_ATTRS128 2312_mm_maskz_loadu_epi16 (__mmask8 __U, void const *__P) 2313{ 2314 return (__m128i) __builtin_ia32_loaddquhi128_mask ((const __v8hi *) __P, 2315 (__v8hi) 2316 _mm_setzero_si128 (), 2317 (__mmask8) __U); 2318} 2319 2320static __inline __m256i __DEFAULT_FN_ATTRS256 2321_mm256_loadu_epi16 (void const *__P) 2322{ 2323 struct __loadu_epi16 { 2324 __m256i_u __v; 2325 } __attribute__((__packed__, __may_alias__)); 2326 return ((const struct __loadu_epi16*)__P)->__v; 2327} 2328 2329static __inline__ __m256i __DEFAULT_FN_ATTRS256 2330_mm256_mask_loadu_epi16 (__m256i __W, __mmask16 __U, void const *__P) 2331{ 2332 return (__m256i) __builtin_ia32_loaddquhi256_mask ((const __v16hi *) __P, 2333 (__v16hi) __W, 2334 (__mmask16) __U); 2335} 2336 2337static __inline__ __m256i __DEFAULT_FN_ATTRS256 2338_mm256_maskz_loadu_epi16 (__mmask16 __U, void const *__P) 2339{ 2340 return (__m256i) __builtin_ia32_loaddquhi256_mask ((const __v16hi *) __P, 2341 (__v16hi) 2342 _mm256_setzero_si256 (), 2343 (__mmask16) __U); 2344} 2345 2346static __inline __m128i __DEFAULT_FN_ATTRS128 2347_mm_loadu_epi8 (void const *__P) 2348{ 2349 struct __loadu_epi8 { 2350 __m128i_u __v; 2351 } __attribute__((__packed__, __may_alias__)); 2352 return ((const struct __loadu_epi8*)__P)->__v; 2353} 2354 2355static __inline__ __m128i __DEFAULT_FN_ATTRS128 2356_mm_mask_loadu_epi8 (__m128i __W, __mmask16 __U, void const *__P) 2357{ 2358 return (__m128i) __builtin_ia32_loaddquqi128_mask ((const __v16qi *) __P, 2359 (__v16qi) __W, 2360 (__mmask16) __U); 2361} 2362 2363static __inline__ __m128i __DEFAULT_FN_ATTRS128 2364_mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P) 2365{ 2366 return (__m128i) __builtin_ia32_loaddquqi128_mask ((const __v16qi *) __P, 2367 (__v16qi) 2368 _mm_setzero_si128 (), 2369 (__mmask16) __U); 2370} 2371 2372static __inline __m256i __DEFAULT_FN_ATTRS256 2373_mm256_loadu_epi8 (void const *__P) 2374{ 2375 struct __loadu_epi8 { 2376 __m256i_u __v; 2377 } __attribute__((__packed__, __may_alias__)); 2378 return ((const struct __loadu_epi8*)__P)->__v; 2379} 2380 2381static __inline__ __m256i __DEFAULT_FN_ATTRS256 2382_mm256_mask_loadu_epi8 (__m256i __W, __mmask32 __U, void const *__P) 2383{ 2384 return (__m256i) __builtin_ia32_loaddquqi256_mask ((const __v32qi *) __P, 2385 (__v32qi) __W, 2386 (__mmask32) __U); 2387} 2388 2389static __inline__ __m256i __DEFAULT_FN_ATTRS256 2390_mm256_maskz_loadu_epi8 (__mmask32 __U, void const *__P) 2391{ 2392 return (__m256i) __builtin_ia32_loaddquqi256_mask ((const __v32qi *) __P, 2393 (__v32qi) 2394 _mm256_setzero_si256 (), 2395 (__mmask32) __U); 2396} 2397 2398static __inline void __DEFAULT_FN_ATTRS128 2399_mm_storeu_epi16 (void *__P, __m128i __A) 2400{ 2401 struct __storeu_epi16 { 2402 __m128i_u __v; 2403 } __attribute__((__packed__, __may_alias__)); 2404 ((struct __storeu_epi16*)__P)->__v = __A; 2405} 2406 2407static __inline__ void __DEFAULT_FN_ATTRS128 2408_mm_mask_storeu_epi16 (void *__P, __mmask8 __U, __m128i __A) 2409{ 2410 __builtin_ia32_storedquhi128_mask ((__v8hi *) __P, 2411 (__v8hi) __A, 2412 (__mmask8) __U); 2413} 2414 2415static __inline void __DEFAULT_FN_ATTRS256 2416_mm256_storeu_epi16 (void *__P, __m256i __A) 2417{ 2418 struct __storeu_epi16 { 2419 __m256i_u __v; 2420 } __attribute__((__packed__, __may_alias__)); 2421 ((struct __storeu_epi16*)__P)->__v = __A; 2422} 2423 2424static __inline__ void __DEFAULT_FN_ATTRS256 2425_mm256_mask_storeu_epi16 (void *__P, __mmask16 __U, __m256i __A) 2426{ 2427 __builtin_ia32_storedquhi256_mask ((__v16hi *) __P, 2428 (__v16hi) __A, 2429 (__mmask16) __U); 2430} 2431 2432static __inline void __DEFAULT_FN_ATTRS128 2433_mm_storeu_epi8 (void *__P, __m128i __A) 2434{ 2435 struct __storeu_epi8 { 2436 __m128i_u __v; 2437 } __attribute__((__packed__, __may_alias__)); 2438 ((struct __storeu_epi8*)__P)->__v = __A; 2439} 2440 2441static __inline__ void __DEFAULT_FN_ATTRS128 2442_mm_mask_storeu_epi8 (void *__P, __mmask16 __U, __m128i __A) 2443{ 2444 __builtin_ia32_storedquqi128_mask ((__v16qi *) __P, 2445 (__v16qi) __A, 2446 (__mmask16) __U); 2447} 2448 2449static __inline void __DEFAULT_FN_ATTRS256 2450_mm256_storeu_epi8 (void *__P, __m256i __A) 2451{ 2452 struct __storeu_epi8 { 2453 __m256i_u __v; 2454 } __attribute__((__packed__, __may_alias__)); 2455 ((struct __storeu_epi8*)__P)->__v = __A; 2456} 2457 2458static __inline__ void __DEFAULT_FN_ATTRS256 2459_mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A) 2460{ 2461 __builtin_ia32_storedquqi256_mask ((__v32qi *) __P, 2462 (__v32qi) __A, 2463 (__mmask32) __U); 2464} 2465 2466static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 2467_mm_test_epi8_mask (__m128i __A, __m128i __B) 2468{ 2469 return _mm_cmpneq_epi8_mask (_mm_and_si128(__A, __B), _mm_setzero_si128()); 2470} 2471 2472static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 2473_mm_mask_test_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B) 2474{ 2475 return _mm_mask_cmpneq_epi8_mask (__U, _mm_and_si128 (__A, __B), 2476 _mm_setzero_si128()); 2477} 2478 2479static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 2480_mm256_test_epi8_mask (__m256i __A, __m256i __B) 2481{ 2482 return _mm256_cmpneq_epi8_mask (_mm256_and_si256(__A, __B), 2483 _mm256_setzero_si256()); 2484} 2485 2486static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 2487_mm256_mask_test_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B) 2488{ 2489 return _mm256_mask_cmpneq_epi8_mask (__U, _mm256_and_si256(__A, __B), 2490 _mm256_setzero_si256()); 2491} 2492 2493static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 2494_mm_test_epi16_mask (__m128i __A, __m128i __B) 2495{ 2496 return _mm_cmpneq_epi16_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); 2497} 2498 2499static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 2500_mm_mask_test_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B) 2501{ 2502 return _mm_mask_cmpneq_epi16_mask (__U, _mm_and_si128 (__A, __B), 2503 _mm_setzero_si128()); 2504} 2505 2506static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 2507_mm256_test_epi16_mask (__m256i __A, __m256i __B) 2508{ 2509 return _mm256_cmpneq_epi16_mask (_mm256_and_si256 (__A, __B), 2510 _mm256_setzero_si256 ()); 2511} 2512 2513static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 2514_mm256_mask_test_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B) 2515{ 2516 return _mm256_mask_cmpneq_epi16_mask (__U, _mm256_and_si256(__A, __B), 2517 _mm256_setzero_si256()); 2518} 2519 2520static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 2521_mm_testn_epi8_mask (__m128i __A, __m128i __B) 2522{ 2523 return _mm_cmpeq_epi8_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); 2524} 2525 2526static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 2527_mm_mask_testn_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B) 2528{ 2529 return _mm_mask_cmpeq_epi8_mask (__U, _mm_and_si128 (__A, __B), 2530 _mm_setzero_si128()); 2531} 2532 2533static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 2534_mm256_testn_epi8_mask (__m256i __A, __m256i __B) 2535{ 2536 return _mm256_cmpeq_epi8_mask (_mm256_and_si256 (__A, __B), 2537 _mm256_setzero_si256()); 2538} 2539 2540static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 2541_mm256_mask_testn_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B) 2542{ 2543 return _mm256_mask_cmpeq_epi8_mask (__U, _mm256_and_si256 (__A, __B), 2544 _mm256_setzero_si256()); 2545} 2546 2547static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 2548_mm_testn_epi16_mask (__m128i __A, __m128i __B) 2549{ 2550 return _mm_cmpeq_epi16_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); 2551} 2552 2553static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 2554_mm_mask_testn_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B) 2555{ 2556 return _mm_mask_cmpeq_epi16_mask (__U, _mm_and_si128(__A, __B), _mm_setzero_si128()); 2557} 2558 2559static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 2560_mm256_testn_epi16_mask (__m256i __A, __m256i __B) 2561{ 2562 return _mm256_cmpeq_epi16_mask (_mm256_and_si256(__A, __B), 2563 _mm256_setzero_si256()); 2564} 2565 2566static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 2567_mm256_mask_testn_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B) 2568{ 2569 return _mm256_mask_cmpeq_epi16_mask (__U, _mm256_and_si256 (__A, __B), 2570 _mm256_setzero_si256()); 2571} 2572 2573static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 2574_mm_movepi8_mask (__m128i __A) 2575{ 2576 return (__mmask16) __builtin_ia32_cvtb2mask128 ((__v16qi) __A); 2577} 2578 2579static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 2580_mm256_movepi8_mask (__m256i __A) 2581{ 2582 return (__mmask32) __builtin_ia32_cvtb2mask256 ((__v32qi) __A); 2583} 2584 2585static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 2586_mm_movepi16_mask (__m128i __A) 2587{ 2588 return (__mmask8) __builtin_ia32_cvtw2mask128 ((__v8hi) __A); 2589} 2590 2591static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 2592_mm256_movepi16_mask (__m256i __A) 2593{ 2594 return (__mmask16) __builtin_ia32_cvtw2mask256 ((__v16hi) __A); 2595} 2596 2597static __inline__ __m128i __DEFAULT_FN_ATTRS128 2598_mm_movm_epi8 (__mmask16 __A) 2599{ 2600 return (__m128i) __builtin_ia32_cvtmask2b128 (__A); 2601} 2602 2603static __inline__ __m256i __DEFAULT_FN_ATTRS256 2604_mm256_movm_epi8 (__mmask32 __A) 2605{ 2606 return (__m256i) __builtin_ia32_cvtmask2b256 (__A); 2607} 2608 2609static __inline__ __m128i __DEFAULT_FN_ATTRS128 2610_mm_movm_epi16 (__mmask8 __A) 2611{ 2612 return (__m128i) __builtin_ia32_cvtmask2w128 (__A); 2613} 2614 2615static __inline__ __m256i __DEFAULT_FN_ATTRS256 2616_mm256_movm_epi16 (__mmask16 __A) 2617{ 2618 return (__m256i) __builtin_ia32_cvtmask2w256 (__A); 2619} 2620 2621static __inline__ __m128i __DEFAULT_FN_ATTRS128 2622_mm_mask_broadcastb_epi8 (__m128i __O, __mmask16 __M, __m128i __A) 2623{ 2624 return (__m128i)__builtin_ia32_selectb_128(__M, 2625 (__v16qi) _mm_broadcastb_epi8(__A), 2626 (__v16qi) __O); 2627} 2628 2629static __inline__ __m128i __DEFAULT_FN_ATTRS128 2630_mm_maskz_broadcastb_epi8 (__mmask16 __M, __m128i __A) 2631{ 2632 return (__m128i)__builtin_ia32_selectb_128(__M, 2633 (__v16qi) _mm_broadcastb_epi8(__A), 2634 (__v16qi) _mm_setzero_si128()); 2635} 2636 2637static __inline__ __m256i __DEFAULT_FN_ATTRS256 2638_mm256_mask_broadcastb_epi8 (__m256i __O, __mmask32 __M, __m128i __A) 2639{ 2640 return (__m256i)__builtin_ia32_selectb_256(__M, 2641 (__v32qi) _mm256_broadcastb_epi8(__A), 2642 (__v32qi) __O); 2643} 2644 2645static __inline__ __m256i __DEFAULT_FN_ATTRS256 2646_mm256_maskz_broadcastb_epi8 (__mmask32 __M, __m128i __A) 2647{ 2648 return (__m256i)__builtin_ia32_selectb_256(__M, 2649 (__v32qi) _mm256_broadcastb_epi8(__A), 2650 (__v32qi) _mm256_setzero_si256()); 2651} 2652 2653static __inline__ __m128i __DEFAULT_FN_ATTRS128 2654_mm_mask_broadcastw_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 2655{ 2656 return (__m128i)__builtin_ia32_selectw_128(__M, 2657 (__v8hi) _mm_broadcastw_epi16(__A), 2658 (__v8hi) __O); 2659} 2660 2661static __inline__ __m128i __DEFAULT_FN_ATTRS128 2662_mm_maskz_broadcastw_epi16 (__mmask8 __M, __m128i __A) 2663{ 2664 return (__m128i)__builtin_ia32_selectw_128(__M, 2665 (__v8hi) _mm_broadcastw_epi16(__A), 2666 (__v8hi) _mm_setzero_si128()); 2667} 2668 2669static __inline__ __m256i __DEFAULT_FN_ATTRS256 2670_mm256_mask_broadcastw_epi16 (__m256i __O, __mmask16 __M, __m128i __A) 2671{ 2672 return (__m256i)__builtin_ia32_selectw_256(__M, 2673 (__v16hi) _mm256_broadcastw_epi16(__A), 2674 (__v16hi) __O); 2675} 2676 2677static __inline__ __m256i __DEFAULT_FN_ATTRS256 2678_mm256_maskz_broadcastw_epi16 (__mmask16 __M, __m128i __A) 2679{ 2680 return (__m256i)__builtin_ia32_selectw_256(__M, 2681 (__v16hi) _mm256_broadcastw_epi16(__A), 2682 (__v16hi) _mm256_setzero_si256()); 2683} 2684 2685static __inline__ __m256i __DEFAULT_FN_ATTRS256 2686_mm256_mask_set1_epi16 (__m256i __O, __mmask16 __M, short __A) 2687{ 2688 return (__m256i) __builtin_ia32_selectw_256 (__M, 2689 (__v16hi) _mm256_set1_epi16(__A), 2690 (__v16hi) __O); 2691} 2692 2693static __inline__ __m256i __DEFAULT_FN_ATTRS256 2694_mm256_maskz_set1_epi16 (__mmask16 __M, short __A) 2695{ 2696 return (__m256i) __builtin_ia32_selectw_256(__M, 2697 (__v16hi)_mm256_set1_epi16(__A), 2698 (__v16hi) _mm256_setzero_si256()); 2699} 2700 2701static __inline__ __m128i __DEFAULT_FN_ATTRS128 2702_mm_mask_set1_epi16 (__m128i __O, __mmask8 __M, short __A) 2703{ 2704 return (__m128i) __builtin_ia32_selectw_128(__M, 2705 (__v8hi) _mm_set1_epi16(__A), 2706 (__v8hi) __O); 2707} 2708 2709static __inline__ __m128i __DEFAULT_FN_ATTRS128 2710_mm_maskz_set1_epi16 (__mmask8 __M, short __A) 2711{ 2712 return (__m128i) __builtin_ia32_selectw_128(__M, 2713 (__v8hi) _mm_set1_epi16(__A), 2714 (__v8hi) _mm_setzero_si128()); 2715} 2716 2717static __inline__ __m128i __DEFAULT_FN_ATTRS128 2718_mm_permutexvar_epi16 (__m128i __A, __m128i __B) 2719{ 2720 return (__m128i)__builtin_ia32_permvarhi128((__v8hi) __B, (__v8hi) __A); 2721} 2722 2723static __inline__ __m128i __DEFAULT_FN_ATTRS128 2724_mm_maskz_permutexvar_epi16 (__mmask8 __M, __m128i __A, __m128i __B) 2725{ 2726 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, 2727 (__v8hi)_mm_permutexvar_epi16(__A, __B), 2728 (__v8hi) _mm_setzero_si128()); 2729} 2730 2731static __inline__ __m128i __DEFAULT_FN_ATTRS128 2732_mm_mask_permutexvar_epi16 (__m128i __W, __mmask8 __M, __m128i __A, 2733 __m128i __B) 2734{ 2735 return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, 2736 (__v8hi)_mm_permutexvar_epi16(__A, __B), 2737 (__v8hi)__W); 2738} 2739 2740static __inline__ __m256i __DEFAULT_FN_ATTRS256 2741_mm256_permutexvar_epi16 (__m256i __A, __m256i __B) 2742{ 2743 return (__m256i)__builtin_ia32_permvarhi256((__v16hi) __B, (__v16hi) __A); 2744} 2745 2746static __inline__ __m256i __DEFAULT_FN_ATTRS256 2747_mm256_maskz_permutexvar_epi16 (__mmask16 __M, __m256i __A, 2748 __m256i __B) 2749{ 2750 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, 2751 (__v16hi)_mm256_permutexvar_epi16(__A, __B), 2752 (__v16hi)_mm256_setzero_si256()); 2753} 2754 2755static __inline__ __m256i __DEFAULT_FN_ATTRS256 2756_mm256_mask_permutexvar_epi16 (__m256i __W, __mmask16 __M, __m256i __A, 2757 __m256i __B) 2758{ 2759 return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, 2760 (__v16hi)_mm256_permutexvar_epi16(__A, __B), 2761 (__v16hi)__W); 2762} 2763 2764#define _mm_mask_alignr_epi8(W, U, A, B, N) \ 2765 ((__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ 2766 (__v16qi)_mm_alignr_epi8((A), (B), (int)(N)), \ 2767 (__v16qi)(__m128i)(W))) 2768 2769#define _mm_maskz_alignr_epi8(U, A, B, N) \ 2770 ((__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ 2771 (__v16qi)_mm_alignr_epi8((A), (B), (int)(N)), \ 2772 (__v16qi)_mm_setzero_si128())) 2773 2774#define _mm256_mask_alignr_epi8(W, U, A, B, N) \ 2775 ((__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ 2776 (__v32qi)_mm256_alignr_epi8((A), (B), (int)(N)), \ 2777 (__v32qi)(__m256i)(W))) 2778 2779#define _mm256_maskz_alignr_epi8(U, A, B, N) \ 2780 ((__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ 2781 (__v32qi)_mm256_alignr_epi8((A), (B), (int)(N)), \ 2782 (__v32qi)_mm256_setzero_si256())) 2783 2784#define _mm_dbsad_epu8(A, B, imm) \ 2785 ((__m128i)__builtin_ia32_dbpsadbw128((__v16qi)(__m128i)(A), \ 2786 (__v16qi)(__m128i)(B), (int)(imm))) 2787 2788#define _mm_mask_dbsad_epu8(W, U, A, B, imm) \ 2789 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 2790 (__v8hi)_mm_dbsad_epu8((A), (B), (imm)), \ 2791 (__v8hi)(__m128i)(W))) 2792 2793#define _mm_maskz_dbsad_epu8(U, A, B, imm) \ 2794 ((__m128i)__builtin_ia32_selectw_128((__mmask8)(U), \ 2795 (__v8hi)_mm_dbsad_epu8((A), (B), (imm)), \ 2796 (__v8hi)_mm_setzero_si128())) 2797 2798#define _mm256_dbsad_epu8(A, B, imm) \ 2799 ((__m256i)__builtin_ia32_dbpsadbw256((__v32qi)(__m256i)(A), \ 2800 (__v32qi)(__m256i)(B), (int)(imm))) 2801 2802#define _mm256_mask_dbsad_epu8(W, U, A, B, imm) \ 2803 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 2804 (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \ 2805 (__v16hi)(__m256i)(W))) 2806 2807#define _mm256_maskz_dbsad_epu8(U, A, B, imm) \ 2808 ((__m256i)__builtin_ia32_selectw_256((__mmask16)(U), \ 2809 (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \ 2810 (__v16hi)_mm256_setzero_si256())) 2811 2812static __inline__ short __DEFAULT_FN_ATTRS128 2813_mm_reduce_add_epi16(__m128i __W) { 2814 return __builtin_reduce_add((__v8hi)__W); 2815} 2816 2817static __inline__ short __DEFAULT_FN_ATTRS128 2818_mm_reduce_mul_epi16(__m128i __W) { 2819 return __builtin_reduce_mul((__v8hi)__W); 2820} 2821 2822static __inline__ short __DEFAULT_FN_ATTRS128 2823_mm_reduce_and_epi16(__m128i __W) { 2824 return __builtin_reduce_and((__v8hi)__W); 2825} 2826 2827static __inline__ short __DEFAULT_FN_ATTRS128 2828_mm_reduce_or_epi16(__m128i __W) { 2829 return __builtin_reduce_or((__v8hi)__W); 2830} 2831 2832static __inline__ short __DEFAULT_FN_ATTRS128 2833_mm_mask_reduce_add_epi16( __mmask8 __M, __m128i __W) { 2834 __W = _mm_maskz_mov_epi16(__M, __W); 2835 return __builtin_reduce_add((__v8hi)__W); 2836} 2837 2838static __inline__ short __DEFAULT_FN_ATTRS128 2839_mm_mask_reduce_mul_epi16( __mmask8 __M, __m128i __W) { 2840 __W = _mm_mask_mov_epi16(_mm_set1_epi16(1), __M, __W); 2841 return __builtin_reduce_mul((__v8hi)__W); 2842} 2843 2844static __inline__ short __DEFAULT_FN_ATTRS128 2845_mm_mask_reduce_and_epi16( __mmask8 __M, __m128i __W) { 2846 __W = _mm_mask_mov_epi16(_mm_set1_epi16(-1), __M, __W); 2847 return __builtin_reduce_and((__v8hi)__W); 2848} 2849 2850static __inline__ short __DEFAULT_FN_ATTRS128 2851_mm_mask_reduce_or_epi16(__mmask8 __M, __m128i __W) { 2852 __W = _mm_maskz_mov_epi16(__M, __W); 2853 return __builtin_reduce_or((__v8hi)__W); 2854} 2855 2856static __inline__ short __DEFAULT_FN_ATTRS128 2857_mm_reduce_max_epi16(__m128i __V) { 2858 return __builtin_reduce_max((__v8hi)__V); 2859} 2860 2861static __inline__ unsigned short __DEFAULT_FN_ATTRS128 2862_mm_reduce_max_epu16(__m128i __V) { 2863 return __builtin_reduce_max((__v8hu)__V); 2864} 2865 2866static __inline__ short __DEFAULT_FN_ATTRS128 2867_mm_reduce_min_epi16(__m128i __V) { 2868 return __builtin_reduce_min((__v8hi)__V); 2869} 2870 2871static __inline__ unsigned short __DEFAULT_FN_ATTRS128 2872_mm_reduce_min_epu16(__m128i __V) { 2873 return __builtin_reduce_min((__v8hu)__V); 2874} 2875 2876static __inline__ short __DEFAULT_FN_ATTRS128 2877_mm_mask_reduce_max_epi16(__mmask16 __M, __m128i __V) { 2878 __V = _mm_mask_mov_epi16(_mm_set1_epi16(-32767-1), __M, __V); 2879 return __builtin_reduce_max((__v8hi)__V); 2880} 2881 2882static __inline__ unsigned short __DEFAULT_FN_ATTRS128 2883_mm_mask_reduce_max_epu16(__mmask16 __M, __m128i __V) { 2884 __V = _mm_maskz_mov_epi16(__M, __V); 2885 return __builtin_reduce_max((__v8hu)__V); 2886} 2887 2888static __inline__ short __DEFAULT_FN_ATTRS128 2889_mm_mask_reduce_min_epi16(__mmask16 __M, __m128i __V) { 2890 __V = _mm_mask_mov_epi16(_mm_set1_epi16(32767), __M, __V); 2891 return __builtin_reduce_min((__v8hi)__V); 2892} 2893 2894static __inline__ unsigned short __DEFAULT_FN_ATTRS128 2895_mm_mask_reduce_min_epu16(__mmask16 __M, __m128i __V) { 2896 __V = _mm_mask_mov_epi16(_mm_set1_epi16(-1), __M, __V); 2897 return __builtin_reduce_min((__v8hu)__V); 2898} 2899 2900static __inline__ short __DEFAULT_FN_ATTRS256 2901_mm256_reduce_add_epi16(__m256i __W) { 2902 return __builtin_reduce_add((__v16hi)__W); 2903} 2904 2905static __inline__ short __DEFAULT_FN_ATTRS256 2906_mm256_reduce_mul_epi16(__m256i __W) { 2907 return __builtin_reduce_mul((__v16hi)__W); 2908} 2909 2910static __inline__ short __DEFAULT_FN_ATTRS256 2911_mm256_reduce_and_epi16(__m256i __W) { 2912 return __builtin_reduce_and((__v16hi)__W); 2913} 2914 2915static __inline__ short __DEFAULT_FN_ATTRS256 2916_mm256_reduce_or_epi16(__m256i __W) { 2917 return __builtin_reduce_or((__v16hi)__W); 2918} 2919 2920static __inline__ short __DEFAULT_FN_ATTRS256 2921_mm256_mask_reduce_add_epi16( __mmask16 __M, __m256i __W) { 2922 __W = _mm256_maskz_mov_epi16(__M, __W); 2923 return __builtin_reduce_add((__v16hi)__W); 2924} 2925 2926static __inline__ short __DEFAULT_FN_ATTRS256 2927_mm256_mask_reduce_mul_epi16( __mmask16 __M, __m256i __W) { 2928 __W = _mm256_mask_mov_epi16(_mm256_set1_epi16(1), __M, __W); 2929 return __builtin_reduce_mul((__v16hi)__W); 2930} 2931 2932static __inline__ short __DEFAULT_FN_ATTRS256 2933_mm256_mask_reduce_and_epi16( __mmask16 __M, __m256i __W) { 2934 __W = _mm256_mask_mov_epi16(_mm256_set1_epi16(-1), __M, __W); 2935 return __builtin_reduce_and((__v16hi)__W); 2936} 2937 2938static __inline__ short __DEFAULT_FN_ATTRS256 2939_mm256_mask_reduce_or_epi16(__mmask16 __M, __m256i __W) { 2940 __W = _mm256_maskz_mov_epi16(__M, __W); 2941 return __builtin_reduce_or((__v16hi)__W); 2942} 2943 2944static __inline__ short __DEFAULT_FN_ATTRS256 2945_mm256_reduce_max_epi16(__m256i __V) { 2946 return __builtin_reduce_max((__v16hi)__V); 2947} 2948 2949static __inline__ unsigned short __DEFAULT_FN_ATTRS256 2950_mm256_reduce_max_epu16(__m256i __V) { 2951 return __builtin_reduce_max((__v16hu)__V); 2952} 2953 2954static __inline__ short __DEFAULT_FN_ATTRS256 2955_mm256_reduce_min_epi16(__m256i __V) { 2956 return __builtin_reduce_min((__v16hi)__V); 2957} 2958 2959static __inline__ unsigned short __DEFAULT_FN_ATTRS256 2960_mm256_reduce_min_epu16(__m256i __V) { 2961 return __builtin_reduce_min((__v16hu)__V); 2962} 2963 2964static __inline__ short __DEFAULT_FN_ATTRS256 2965_mm256_mask_reduce_max_epi16(__mmask16 __M, __m256i __V) { 2966 __V = _mm256_mask_mov_epi16(_mm256_set1_epi16(-32767-1), __M, __V); 2967 return __builtin_reduce_max((__v16hi)__V); 2968} 2969 2970static __inline__ unsigned short __DEFAULT_FN_ATTRS256 2971_mm256_mask_reduce_max_epu16(__mmask16 __M, __m256i __V) { 2972 __V = _mm256_maskz_mov_epi16(__M, __V); 2973 return __builtin_reduce_max((__v16hu)__V); 2974} 2975 2976static __inline__ short __DEFAULT_FN_ATTRS256 2977_mm256_mask_reduce_min_epi16(__mmask16 __M, __m256i __V) { 2978 __V = _mm256_mask_mov_epi16(_mm256_set1_epi16(32767), __M, __V); 2979 return __builtin_reduce_min((__v16hi)__V); 2980} 2981 2982static __inline__ unsigned short __DEFAULT_FN_ATTRS256 2983_mm256_mask_reduce_min_epu16(__mmask16 __M, __m256i __V) { 2984 __V = _mm256_mask_mov_epi16(_mm256_set1_epi16(-1), __M, __V); 2985 return __builtin_reduce_min((__v16hu)__V); 2986} 2987 2988static __inline__ signed char __DEFAULT_FN_ATTRS128 2989_mm_reduce_add_epi8(__m128i __W) { 2990 return __builtin_reduce_add((__v16qs)__W); 2991} 2992 2993static __inline__ signed char __DEFAULT_FN_ATTRS128 2994_mm_reduce_mul_epi8(__m128i __W) { 2995 return __builtin_reduce_mul((__v16qs)__W); 2996} 2997 2998static __inline__ signed char __DEFAULT_FN_ATTRS128 2999_mm_reduce_and_epi8(__m128i __W) { 3000 return __builtin_reduce_and((__v16qs)__W); 3001} 3002 3003static __inline__ signed char __DEFAULT_FN_ATTRS128 3004_mm_reduce_or_epi8(__m128i __W) { 3005 return __builtin_reduce_or((__v16qs)__W); 3006} 3007 3008static __inline__ signed char __DEFAULT_FN_ATTRS128 3009_mm_mask_reduce_add_epi8(__mmask16 __M, __m128i __W) { 3010 __W = _mm_maskz_mov_epi8(__M, __W); 3011 return __builtin_reduce_add((__v16qs)__W); 3012} 3013 3014static __inline__ signed char __DEFAULT_FN_ATTRS128 3015_mm_mask_reduce_mul_epi8(__mmask16 __M, __m128i __W) { 3016 __W = _mm_mask_mov_epi8(_mm_set1_epi8(1), __M, __W); 3017 return __builtin_reduce_mul((__v16qs)__W); 3018} 3019 3020static __inline__ signed char __DEFAULT_FN_ATTRS128 3021_mm_mask_reduce_and_epi8(__mmask16 __M, __m128i __W) { 3022 __W = _mm_mask_mov_epi8(_mm_set1_epi8(-1), __M, __W); 3023 return __builtin_reduce_and((__v16qs)__W); 3024} 3025 3026static __inline__ signed char __DEFAULT_FN_ATTRS128 3027_mm_mask_reduce_or_epi8(__mmask16 __M, __m128i __W) { 3028 __W = _mm_maskz_mov_epi8(__M, __W); 3029 return __builtin_reduce_or((__v16qs)__W); 3030} 3031 3032static __inline__ signed char __DEFAULT_FN_ATTRS128 3033_mm_reduce_max_epi8(__m128i __V) { 3034 return __builtin_reduce_max((__v16qs)__V); 3035} 3036 3037static __inline__ unsigned char __DEFAULT_FN_ATTRS128 3038_mm_reduce_max_epu8(__m128i __V) { 3039 return __builtin_reduce_max((__v16qu)__V); 3040} 3041 3042static __inline__ signed char __DEFAULT_FN_ATTRS128 3043_mm_reduce_min_epi8(__m128i __V) { 3044 return __builtin_reduce_min((__v16qs)__V); 3045} 3046 3047static __inline__ unsigned char __DEFAULT_FN_ATTRS128 3048_mm_reduce_min_epu8(__m128i __V) { 3049 return __builtin_reduce_min((__v16qu)__V); 3050} 3051 3052static __inline__ signed char __DEFAULT_FN_ATTRS128 3053_mm_mask_reduce_max_epi8(__mmask16 __M, __m128i __V) { 3054 __V = _mm_mask_mov_epi8(_mm_set1_epi8(-127-1), __M, __V); 3055 return __builtin_reduce_max((__v16qs)__V); 3056} 3057 3058static __inline__ unsigned char __DEFAULT_FN_ATTRS128 3059_mm_mask_reduce_max_epu8(__mmask16 __M, __m128i __V) { 3060 __V = _mm_maskz_mov_epi8(__M, __V); 3061 return __builtin_reduce_max((__v16qu)__V); 3062} 3063 3064static __inline__ signed char __DEFAULT_FN_ATTRS128 3065_mm_mask_reduce_min_epi8(__mmask16 __M, __m128i __V) { 3066 __V = _mm_mask_mov_epi8(_mm_set1_epi8(127), __M, __V); 3067 return __builtin_reduce_min((__v16qs)__V); 3068} 3069 3070static __inline__ unsigned char __DEFAULT_FN_ATTRS128 3071_mm_mask_reduce_min_epu8(__mmask16 __M, __m128i __V) { 3072 __V = _mm_mask_mov_epi8(_mm_set1_epi8(-1), __M, __V); 3073 return __builtin_reduce_min((__v16qu)__V); 3074} 3075 3076static __inline__ signed char __DEFAULT_FN_ATTRS256 3077_mm256_reduce_add_epi8(__m256i __W) { 3078 return __builtin_reduce_add((__v32qs)__W); 3079} 3080 3081static __inline__ signed char __DEFAULT_FN_ATTRS256 3082_mm256_reduce_mul_epi8(__m256i __W) { 3083 return __builtin_reduce_mul((__v32qs)__W); 3084} 3085 3086static __inline__ signed char __DEFAULT_FN_ATTRS256 3087_mm256_reduce_and_epi8(__m256i __W) { 3088 return __builtin_reduce_and((__v32qs)__W); 3089} 3090 3091static __inline__ signed char __DEFAULT_FN_ATTRS256 3092_mm256_reduce_or_epi8(__m256i __W) { 3093 return __builtin_reduce_or((__v32qs)__W); 3094} 3095 3096static __inline__ signed char __DEFAULT_FN_ATTRS256 3097_mm256_mask_reduce_add_epi8(__mmask32 __M, __m256i __W) { 3098 __W = _mm256_maskz_mov_epi8(__M, __W); 3099 return __builtin_reduce_add((__v32qs)__W); 3100} 3101 3102static __inline__ signed char __DEFAULT_FN_ATTRS256 3103_mm256_mask_reduce_mul_epi8(__mmask32 __M, __m256i __W) { 3104 __W = _mm256_mask_mov_epi8(_mm256_set1_epi8(1), __M, __W); 3105 return __builtin_reduce_mul((__v32qs)__W); 3106} 3107 3108static __inline__ signed char __DEFAULT_FN_ATTRS256 3109_mm256_mask_reduce_and_epi8(__mmask32 __M, __m256i __W) { 3110 __W = _mm256_mask_mov_epi8(_mm256_set1_epi8(-1), __M, __W); 3111 return __builtin_reduce_and((__v32qs)__W); 3112} 3113 3114static __inline__ signed char __DEFAULT_FN_ATTRS256 3115_mm256_mask_reduce_or_epi8(__mmask32 __M, __m256i __W) { 3116 __W = _mm256_maskz_mov_epi8(__M, __W); 3117 return __builtin_reduce_or((__v32qs)__W); 3118} 3119 3120static __inline__ signed char __DEFAULT_FN_ATTRS256 3121_mm256_reduce_max_epi8(__m256i __V) { 3122 return __builtin_reduce_max((__v32qs)__V); 3123} 3124 3125static __inline__ unsigned char __DEFAULT_FN_ATTRS256 3126_mm256_reduce_max_epu8(__m256i __V) { 3127 return __builtin_reduce_max((__v32qu)__V); 3128} 3129 3130static __inline__ signed char __DEFAULT_FN_ATTRS256 3131_mm256_reduce_min_epi8(__m256i __V) { 3132 return __builtin_reduce_min((__v32qs)__V); 3133} 3134 3135static __inline__ unsigned char __DEFAULT_FN_ATTRS256 3136_mm256_reduce_min_epu8(__m256i __V) { 3137 return __builtin_reduce_min((__v32qu)__V); 3138} 3139 3140static __inline__ signed char __DEFAULT_FN_ATTRS256 3141_mm256_mask_reduce_max_epi8(__mmask32 __M, __m256i __V) { 3142 __V = _mm256_mask_mov_epi8(_mm256_set1_epi8(-127-1), __M, __V); 3143 return __builtin_reduce_max((__v32qs)__V); 3144} 3145 3146static __inline__ unsigned char __DEFAULT_FN_ATTRS256 3147_mm256_mask_reduce_max_epu8(__mmask32 __M, __m256i __V) { 3148 __V = _mm256_maskz_mov_epi8(__M, __V); 3149 return __builtin_reduce_max((__v32qu)__V); 3150} 3151 3152static __inline__ signed char __DEFAULT_FN_ATTRS256 3153_mm256_mask_reduce_min_epi8(__mmask32 __M, __m256i __V) { 3154 __V = _mm256_mask_mov_epi8(_mm256_set1_epi8(127), __M, __V); 3155 return __builtin_reduce_min((__v32qs)__V); 3156} 3157 3158static __inline__ unsigned char __DEFAULT_FN_ATTRS256 3159_mm256_mask_reduce_min_epu8(__mmask32 __M, __m256i __V) { 3160 __V = _mm256_mask_mov_epi8(_mm256_set1_epi8(-1), __M, __V); 3161 return __builtin_reduce_min((__v32qu)__V); 3162} 3163 3164#undef __DEFAULT_FN_ATTRS128 3165#undef __DEFAULT_FN_ATTRS256 3166 3167#endif /* __AVX512VLBWINTRIN_H */ 3168