1/*===---- avx512vlintrin.h - AVX512VL intrinsics ---------------------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 10#ifndef __IMMINTRIN_H 11#error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead." 12#endif 13 14#ifndef __AVX512VLINTRIN_H 15#define __AVX512VLINTRIN_H 16 17#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(128))) 18#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(256))) 19 20typedef short __v2hi __attribute__((__vector_size__(4))); 21typedef char __v4qi __attribute__((__vector_size__(4))); 22typedef char __v2qi __attribute__((__vector_size__(2))); 23 24/* Integer compare */ 25 26#define _mm_cmpeq_epi32_mask(A, B) \ 27 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ) 28#define _mm_mask_cmpeq_epi32_mask(k, A, B) \ 29 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ) 30#define _mm_cmpge_epi32_mask(A, B) \ 31 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GE) 32#define _mm_mask_cmpge_epi32_mask(k, A, B) \ 33 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE) 34#define _mm_cmpgt_epi32_mask(A, B) \ 35 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GT) 36#define _mm_mask_cmpgt_epi32_mask(k, A, B) \ 37 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT) 38#define _mm_cmple_epi32_mask(A, B) \ 39 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LE) 40#define _mm_mask_cmple_epi32_mask(k, A, B) \ 41 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE) 42#define _mm_cmplt_epi32_mask(A, B) \ 43 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LT) 44#define _mm_mask_cmplt_epi32_mask(k, A, B) \ 45 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT) 46#define _mm_cmpneq_epi32_mask(A, B) \ 47 _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_NE) 48#define _mm_mask_cmpneq_epi32_mask(k, A, B) \ 49 _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE) 50 51#define _mm256_cmpeq_epi32_mask(A, B) \ 52 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ) 53#define _mm256_mask_cmpeq_epi32_mask(k, A, B) \ 54 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ) 55#define _mm256_cmpge_epi32_mask(A, B) \ 56 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GE) 57#define _mm256_mask_cmpge_epi32_mask(k, A, B) \ 58 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE) 59#define _mm256_cmpgt_epi32_mask(A, B) \ 60 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GT) 61#define _mm256_mask_cmpgt_epi32_mask(k, A, B) \ 62 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT) 63#define _mm256_cmple_epi32_mask(A, B) \ 64 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LE) 65#define _mm256_mask_cmple_epi32_mask(k, A, B) \ 66 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE) 67#define _mm256_cmplt_epi32_mask(A, B) \ 68 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LT) 69#define _mm256_mask_cmplt_epi32_mask(k, A, B) \ 70 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT) 71#define _mm256_cmpneq_epi32_mask(A, B) \ 72 _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_NE) 73#define _mm256_mask_cmpneq_epi32_mask(k, A, B) \ 74 _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE) 75 76#define _mm_cmpeq_epu32_mask(A, B) \ 77 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ) 78#define _mm_mask_cmpeq_epu32_mask(k, A, B) \ 79 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ) 80#define _mm_cmpge_epu32_mask(A, B) \ 81 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GE) 82#define _mm_mask_cmpge_epu32_mask(k, A, B) \ 83 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE) 84#define _mm_cmpgt_epu32_mask(A, B) \ 85 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GT) 86#define _mm_mask_cmpgt_epu32_mask(k, A, B) \ 87 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT) 88#define _mm_cmple_epu32_mask(A, B) \ 89 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LE) 90#define _mm_mask_cmple_epu32_mask(k, A, B) \ 91 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE) 92#define _mm_cmplt_epu32_mask(A, B) \ 93 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LT) 94#define _mm_mask_cmplt_epu32_mask(k, A, B) \ 95 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT) 96#define _mm_cmpneq_epu32_mask(A, B) \ 97 _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_NE) 98#define _mm_mask_cmpneq_epu32_mask(k, A, B) \ 99 _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE) 100 101#define _mm256_cmpeq_epu32_mask(A, B) \ 102 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ) 103#define _mm256_mask_cmpeq_epu32_mask(k, A, B) \ 104 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ) 105#define _mm256_cmpge_epu32_mask(A, B) \ 106 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GE) 107#define _mm256_mask_cmpge_epu32_mask(k, A, B) \ 108 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE) 109#define _mm256_cmpgt_epu32_mask(A, B) \ 110 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GT) 111#define _mm256_mask_cmpgt_epu32_mask(k, A, B) \ 112 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT) 113#define _mm256_cmple_epu32_mask(A, B) \ 114 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LE) 115#define _mm256_mask_cmple_epu32_mask(k, A, B) \ 116 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE) 117#define _mm256_cmplt_epu32_mask(A, B) \ 118 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LT) 119#define _mm256_mask_cmplt_epu32_mask(k, A, B) \ 120 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT) 121#define _mm256_cmpneq_epu32_mask(A, B) \ 122 _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_NE) 123#define _mm256_mask_cmpneq_epu32_mask(k, A, B) \ 124 _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE) 125 126#define _mm_cmpeq_epi64_mask(A, B) \ 127 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ) 128#define _mm_mask_cmpeq_epi64_mask(k, A, B) \ 129 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ) 130#define _mm_cmpge_epi64_mask(A, B) \ 131 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GE) 132#define _mm_mask_cmpge_epi64_mask(k, A, B) \ 133 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE) 134#define _mm_cmpgt_epi64_mask(A, B) \ 135 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GT) 136#define _mm_mask_cmpgt_epi64_mask(k, A, B) \ 137 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT) 138#define _mm_cmple_epi64_mask(A, B) \ 139 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LE) 140#define _mm_mask_cmple_epi64_mask(k, A, B) \ 141 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE) 142#define _mm_cmplt_epi64_mask(A, B) \ 143 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LT) 144#define _mm_mask_cmplt_epi64_mask(k, A, B) \ 145 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT) 146#define _mm_cmpneq_epi64_mask(A, B) \ 147 _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_NE) 148#define _mm_mask_cmpneq_epi64_mask(k, A, B) \ 149 _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE) 150 151#define _mm256_cmpeq_epi64_mask(A, B) \ 152 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ) 153#define _mm256_mask_cmpeq_epi64_mask(k, A, B) \ 154 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ) 155#define _mm256_cmpge_epi64_mask(A, B) \ 156 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GE) 157#define _mm256_mask_cmpge_epi64_mask(k, A, B) \ 158 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE) 159#define _mm256_cmpgt_epi64_mask(A, B) \ 160 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GT) 161#define _mm256_mask_cmpgt_epi64_mask(k, A, B) \ 162 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT) 163#define _mm256_cmple_epi64_mask(A, B) \ 164 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LE) 165#define _mm256_mask_cmple_epi64_mask(k, A, B) \ 166 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE) 167#define _mm256_cmplt_epi64_mask(A, B) \ 168 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LT) 169#define _mm256_mask_cmplt_epi64_mask(k, A, B) \ 170 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT) 171#define _mm256_cmpneq_epi64_mask(A, B) \ 172 _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_NE) 173#define _mm256_mask_cmpneq_epi64_mask(k, A, B) \ 174 _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE) 175 176#define _mm_cmpeq_epu64_mask(A, B) \ 177 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ) 178#define _mm_mask_cmpeq_epu64_mask(k, A, B) \ 179 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ) 180#define _mm_cmpge_epu64_mask(A, B) \ 181 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GE) 182#define _mm_mask_cmpge_epu64_mask(k, A, B) \ 183 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE) 184#define _mm_cmpgt_epu64_mask(A, B) \ 185 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GT) 186#define _mm_mask_cmpgt_epu64_mask(k, A, B) \ 187 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT) 188#define _mm_cmple_epu64_mask(A, B) \ 189 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LE) 190#define _mm_mask_cmple_epu64_mask(k, A, B) \ 191 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE) 192#define _mm_cmplt_epu64_mask(A, B) \ 193 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LT) 194#define _mm_mask_cmplt_epu64_mask(k, A, B) \ 195 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT) 196#define _mm_cmpneq_epu64_mask(A, B) \ 197 _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_NE) 198#define _mm_mask_cmpneq_epu64_mask(k, A, B) \ 199 _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE) 200 201#define _mm256_cmpeq_epu64_mask(A, B) \ 202 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ) 203#define _mm256_mask_cmpeq_epu64_mask(k, A, B) \ 204 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ) 205#define _mm256_cmpge_epu64_mask(A, B) \ 206 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GE) 207#define _mm256_mask_cmpge_epu64_mask(k, A, B) \ 208 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE) 209#define _mm256_cmpgt_epu64_mask(A, B) \ 210 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GT) 211#define _mm256_mask_cmpgt_epu64_mask(k, A, B) \ 212 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT) 213#define _mm256_cmple_epu64_mask(A, B) \ 214 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LE) 215#define _mm256_mask_cmple_epu64_mask(k, A, B) \ 216 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE) 217#define _mm256_cmplt_epu64_mask(A, B) \ 218 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LT) 219#define _mm256_mask_cmplt_epu64_mask(k, A, B) \ 220 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT) 221#define _mm256_cmpneq_epu64_mask(A, B) \ 222 _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_NE) 223#define _mm256_mask_cmpneq_epu64_mask(k, A, B) \ 224 _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE) 225 226static __inline__ __m256i __DEFAULT_FN_ATTRS256 227_mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 228{ 229 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 230 (__v8si)_mm256_add_epi32(__A, __B), 231 (__v8si)__W); 232} 233 234static __inline__ __m256i __DEFAULT_FN_ATTRS256 235_mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B) 236{ 237 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 238 (__v8si)_mm256_add_epi32(__A, __B), 239 (__v8si)_mm256_setzero_si256()); 240} 241 242static __inline__ __m256i __DEFAULT_FN_ATTRS256 243_mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 244{ 245 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 246 (__v4di)_mm256_add_epi64(__A, __B), 247 (__v4di)__W); 248} 249 250static __inline__ __m256i __DEFAULT_FN_ATTRS256 251_mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B) 252{ 253 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 254 (__v4di)_mm256_add_epi64(__A, __B), 255 (__v4di)_mm256_setzero_si256()); 256} 257 258static __inline__ __m256i __DEFAULT_FN_ATTRS256 259_mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 260{ 261 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 262 (__v8si)_mm256_sub_epi32(__A, __B), 263 (__v8si)__W); 264} 265 266static __inline__ __m256i __DEFAULT_FN_ATTRS256 267_mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B) 268{ 269 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 270 (__v8si)_mm256_sub_epi32(__A, __B), 271 (__v8si)_mm256_setzero_si256()); 272} 273 274static __inline__ __m256i __DEFAULT_FN_ATTRS256 275_mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 276{ 277 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 278 (__v4di)_mm256_sub_epi64(__A, __B), 279 (__v4di)__W); 280} 281 282static __inline__ __m256i __DEFAULT_FN_ATTRS256 283_mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B) 284{ 285 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 286 (__v4di)_mm256_sub_epi64(__A, __B), 287 (__v4di)_mm256_setzero_si256()); 288} 289 290static __inline__ __m128i __DEFAULT_FN_ATTRS128 291_mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 292{ 293 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 294 (__v4si)_mm_add_epi32(__A, __B), 295 (__v4si)__W); 296} 297 298static __inline__ __m128i __DEFAULT_FN_ATTRS128 299_mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B) 300{ 301 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 302 (__v4si)_mm_add_epi32(__A, __B), 303 (__v4si)_mm_setzero_si128()); 304} 305 306static __inline__ __m128i __DEFAULT_FN_ATTRS128 307_mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 308{ 309 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 310 (__v2di)_mm_add_epi64(__A, __B), 311 (__v2di)__W); 312} 313 314static __inline__ __m128i __DEFAULT_FN_ATTRS128 315_mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B) 316{ 317 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 318 (__v2di)_mm_add_epi64(__A, __B), 319 (__v2di)_mm_setzero_si128()); 320} 321 322static __inline__ __m128i __DEFAULT_FN_ATTRS128 323_mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 324{ 325 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 326 (__v4si)_mm_sub_epi32(__A, __B), 327 (__v4si)__W); 328} 329 330static __inline__ __m128i __DEFAULT_FN_ATTRS128 331_mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B) 332{ 333 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 334 (__v4si)_mm_sub_epi32(__A, __B), 335 (__v4si)_mm_setzero_si128()); 336} 337 338static __inline__ __m128i __DEFAULT_FN_ATTRS128 339_mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 340{ 341 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 342 (__v2di)_mm_sub_epi64(__A, __B), 343 (__v2di)__W); 344} 345 346static __inline__ __m128i __DEFAULT_FN_ATTRS128 347_mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B) 348{ 349 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 350 (__v2di)_mm_sub_epi64(__A, __B), 351 (__v2di)_mm_setzero_si128()); 352} 353 354static __inline__ __m256i __DEFAULT_FN_ATTRS256 355_mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) 356{ 357 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 358 (__v4di)_mm256_mul_epi32(__X, __Y), 359 (__v4di)__W); 360} 361 362static __inline__ __m256i __DEFAULT_FN_ATTRS256 363_mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y) 364{ 365 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 366 (__v4di)_mm256_mul_epi32(__X, __Y), 367 (__v4di)_mm256_setzero_si256()); 368} 369 370static __inline__ __m128i __DEFAULT_FN_ATTRS128 371_mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) 372{ 373 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 374 (__v2di)_mm_mul_epi32(__X, __Y), 375 (__v2di)__W); 376} 377 378static __inline__ __m128i __DEFAULT_FN_ATTRS128 379_mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y) 380{ 381 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 382 (__v2di)_mm_mul_epi32(__X, __Y), 383 (__v2di)_mm_setzero_si128()); 384} 385 386static __inline__ __m256i __DEFAULT_FN_ATTRS256 387_mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) 388{ 389 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 390 (__v4di)_mm256_mul_epu32(__X, __Y), 391 (__v4di)__W); 392} 393 394static __inline__ __m256i __DEFAULT_FN_ATTRS256 395_mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y) 396{ 397 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 398 (__v4di)_mm256_mul_epu32(__X, __Y), 399 (__v4di)_mm256_setzero_si256()); 400} 401 402static __inline__ __m128i __DEFAULT_FN_ATTRS128 403_mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) 404{ 405 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 406 (__v2di)_mm_mul_epu32(__X, __Y), 407 (__v2di)__W); 408} 409 410static __inline__ __m128i __DEFAULT_FN_ATTRS128 411_mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y) 412{ 413 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 414 (__v2di)_mm_mul_epu32(__X, __Y), 415 (__v2di)_mm_setzero_si128()); 416} 417 418static __inline__ __m256i __DEFAULT_FN_ATTRS256 419_mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B) 420{ 421 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 422 (__v8si)_mm256_mullo_epi32(__A, __B), 423 (__v8si)_mm256_setzero_si256()); 424} 425 426static __inline__ __m256i __DEFAULT_FN_ATTRS256 427_mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) 428{ 429 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 430 (__v8si)_mm256_mullo_epi32(__A, __B), 431 (__v8si)__W); 432} 433 434static __inline__ __m128i __DEFAULT_FN_ATTRS128 435_mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B) 436{ 437 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 438 (__v4si)_mm_mullo_epi32(__A, __B), 439 (__v4si)_mm_setzero_si128()); 440} 441 442static __inline__ __m128i __DEFAULT_FN_ATTRS128 443_mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) 444{ 445 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 446 (__v4si)_mm_mullo_epi32(__A, __B), 447 (__v4si)__W); 448} 449 450static __inline__ __m256i __DEFAULT_FN_ATTRS256 451_mm256_and_epi32(__m256i __a, __m256i __b) 452{ 453 return (__m256i)((__v8su)__a & (__v8su)__b); 454} 455 456static __inline__ __m256i __DEFAULT_FN_ATTRS256 457_mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 458{ 459 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 460 (__v8si)_mm256_and_epi32(__A, __B), 461 (__v8si)__W); 462} 463 464static __inline__ __m256i __DEFAULT_FN_ATTRS256 465_mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B) 466{ 467 return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B); 468} 469 470static __inline__ __m128i __DEFAULT_FN_ATTRS128 471_mm_and_epi32(__m128i __a, __m128i __b) 472{ 473 return (__m128i)((__v4su)__a & (__v4su)__b); 474} 475 476static __inline__ __m128i __DEFAULT_FN_ATTRS128 477_mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 478{ 479 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 480 (__v4si)_mm_and_epi32(__A, __B), 481 (__v4si)__W); 482} 483 484static __inline__ __m128i __DEFAULT_FN_ATTRS128 485_mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B) 486{ 487 return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U, __A, __B); 488} 489 490static __inline__ __m256i __DEFAULT_FN_ATTRS256 491_mm256_andnot_epi32(__m256i __A, __m256i __B) 492{ 493 return (__m256i)(~(__v8su)__A & (__v8su)__B); 494} 495 496static __inline__ __m256i __DEFAULT_FN_ATTRS256 497_mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 498{ 499 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 500 (__v8si)_mm256_andnot_epi32(__A, __B), 501 (__v8si)__W); 502} 503 504static __inline__ __m256i __DEFAULT_FN_ATTRS256 505_mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B) 506{ 507 return (__m256i)_mm256_mask_andnot_epi32(_mm256_setzero_si256(), 508 __U, __A, __B); 509} 510 511static __inline__ __m128i __DEFAULT_FN_ATTRS128 512_mm_andnot_epi32(__m128i __A, __m128i __B) 513{ 514 return (__m128i)(~(__v4su)__A & (__v4su)__B); 515} 516 517static __inline__ __m128i __DEFAULT_FN_ATTRS128 518_mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 519{ 520 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 521 (__v4si)_mm_andnot_epi32(__A, __B), 522 (__v4si)__W); 523} 524 525static __inline__ __m128i __DEFAULT_FN_ATTRS128 526_mm_maskz_andnot_epi32(__mmask8 __U, __m128i __A, __m128i __B) 527{ 528 return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U, __A, __B); 529} 530 531static __inline__ __m256i __DEFAULT_FN_ATTRS256 532_mm256_or_epi32(__m256i __a, __m256i __b) 533{ 534 return (__m256i)((__v8su)__a | (__v8su)__b); 535} 536 537static __inline__ __m256i __DEFAULT_FN_ATTRS256 538_mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 539{ 540 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 541 (__v8si)_mm256_or_epi32(__A, __B), 542 (__v8si)__W); 543} 544 545static __inline__ __m256i __DEFAULT_FN_ATTRS256 546_mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B) 547{ 548 return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U, __A, __B); 549} 550 551static __inline__ __m128i __DEFAULT_FN_ATTRS128 552_mm_or_epi32(__m128i __a, __m128i __b) 553{ 554 return (__m128i)((__v4su)__a | (__v4su)__b); 555} 556 557static __inline__ __m128i __DEFAULT_FN_ATTRS128 558_mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 559{ 560 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 561 (__v4si)_mm_or_epi32(__A, __B), 562 (__v4si)__W); 563} 564 565static __inline__ __m128i __DEFAULT_FN_ATTRS128 566_mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B) 567{ 568 return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U, __A, __B); 569} 570 571static __inline__ __m256i __DEFAULT_FN_ATTRS256 572_mm256_xor_epi32(__m256i __a, __m256i __b) 573{ 574 return (__m256i)((__v8su)__a ^ (__v8su)__b); 575} 576 577static __inline__ __m256i __DEFAULT_FN_ATTRS256 578_mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 579{ 580 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 581 (__v8si)_mm256_xor_epi32(__A, __B), 582 (__v8si)__W); 583} 584 585static __inline__ __m256i __DEFAULT_FN_ATTRS256 586_mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B) 587{ 588 return (__m256i)_mm256_mask_xor_epi32(_mm256_setzero_si256(), __U, __A, __B); 589} 590 591static __inline__ __m128i __DEFAULT_FN_ATTRS128 592_mm_xor_epi32(__m128i __a, __m128i __b) 593{ 594 return (__m128i)((__v4su)__a ^ (__v4su)__b); 595} 596 597static __inline__ __m128i __DEFAULT_FN_ATTRS128 598_mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 599{ 600 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 601 (__v4si)_mm_xor_epi32(__A, __B), 602 (__v4si)__W); 603} 604 605static __inline__ __m128i __DEFAULT_FN_ATTRS128 606_mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B) 607{ 608 return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U, __A, __B); 609} 610 611static __inline__ __m256i __DEFAULT_FN_ATTRS256 612_mm256_and_epi64(__m256i __a, __m256i __b) 613{ 614 return (__m256i)((__v4du)__a & (__v4du)__b); 615} 616 617static __inline__ __m256i __DEFAULT_FN_ATTRS256 618_mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 619{ 620 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 621 (__v4di)_mm256_and_epi64(__A, __B), 622 (__v4di)__W); 623} 624 625static __inline__ __m256i __DEFAULT_FN_ATTRS256 626_mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B) 627{ 628 return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U, __A, __B); 629} 630 631static __inline__ __m128i __DEFAULT_FN_ATTRS128 632_mm_and_epi64(__m128i __a, __m128i __b) 633{ 634 return (__m128i)((__v2du)__a & (__v2du)__b); 635} 636 637static __inline__ __m128i __DEFAULT_FN_ATTRS128 638_mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 639{ 640 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 641 (__v2di)_mm_and_epi64(__A, __B), 642 (__v2di)__W); 643} 644 645static __inline__ __m128i __DEFAULT_FN_ATTRS128 646_mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B) 647{ 648 return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U, __A, __B); 649} 650 651static __inline__ __m256i __DEFAULT_FN_ATTRS256 652_mm256_andnot_epi64(__m256i __A, __m256i __B) 653{ 654 return (__m256i)(~(__v4du)__A & (__v4du)__B); 655} 656 657static __inline__ __m256i __DEFAULT_FN_ATTRS256 658_mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 659{ 660 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 661 (__v4di)_mm256_andnot_epi64(__A, __B), 662 (__v4di)__W); 663} 664 665static __inline__ __m256i __DEFAULT_FN_ATTRS256 666_mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B) 667{ 668 return (__m256i)_mm256_mask_andnot_epi64(_mm256_setzero_si256(), 669 __U, __A, __B); 670} 671 672static __inline__ __m128i __DEFAULT_FN_ATTRS128 673_mm_andnot_epi64(__m128i __A, __m128i __B) 674{ 675 return (__m128i)(~(__v2du)__A & (__v2du)__B); 676} 677 678static __inline__ __m128i __DEFAULT_FN_ATTRS128 679_mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 680{ 681 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 682 (__v2di)_mm_andnot_epi64(__A, __B), 683 (__v2di)__W); 684} 685 686static __inline__ __m128i __DEFAULT_FN_ATTRS128 687_mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B) 688{ 689 return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U, __A, __B); 690} 691 692static __inline__ __m256i __DEFAULT_FN_ATTRS256 693_mm256_or_epi64(__m256i __a, __m256i __b) 694{ 695 return (__m256i)((__v4du)__a | (__v4du)__b); 696} 697 698static __inline__ __m256i __DEFAULT_FN_ATTRS256 699_mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 700{ 701 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 702 (__v4di)_mm256_or_epi64(__A, __B), 703 (__v4di)__W); 704} 705 706static __inline__ __m256i __DEFAULT_FN_ATTRS256 707_mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B) 708{ 709 return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U, __A, __B); 710} 711 712static __inline__ __m128i __DEFAULT_FN_ATTRS128 713_mm_or_epi64(__m128i __a, __m128i __b) 714{ 715 return (__m128i)((__v2du)__a | (__v2du)__b); 716} 717 718static __inline__ __m128i __DEFAULT_FN_ATTRS128 719_mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 720{ 721 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 722 (__v2di)_mm_or_epi64(__A, __B), 723 (__v2di)__W); 724} 725 726static __inline__ __m128i __DEFAULT_FN_ATTRS128 727_mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B) 728{ 729 return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U, __A, __B); 730} 731 732static __inline__ __m256i __DEFAULT_FN_ATTRS256 733_mm256_xor_epi64(__m256i __a, __m256i __b) 734{ 735 return (__m256i)((__v4du)__a ^ (__v4du)__b); 736} 737 738static __inline__ __m256i __DEFAULT_FN_ATTRS256 739_mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 740{ 741 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 742 (__v4di)_mm256_xor_epi64(__A, __B), 743 (__v4di)__W); 744} 745 746static __inline__ __m256i __DEFAULT_FN_ATTRS256 747_mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B) 748{ 749 return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U, __A, __B); 750} 751 752static __inline__ __m128i __DEFAULT_FN_ATTRS128 753_mm_xor_epi64(__m128i __a, __m128i __b) 754{ 755 return (__m128i)((__v2du)__a ^ (__v2du)__b); 756} 757 758static __inline__ __m128i __DEFAULT_FN_ATTRS128 759_mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A, 760 __m128i __B) 761{ 762 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 763 (__v2di)_mm_xor_epi64(__A, __B), 764 (__v2di)__W); 765} 766 767static __inline__ __m128i __DEFAULT_FN_ATTRS128 768_mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B) 769{ 770 return (__m128i)_mm_mask_xor_epi64(_mm_setzero_si128(), __U, __A, __B); 771} 772 773#define _mm_cmp_epi32_mask(a, b, p) \ 774 ((__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \ 775 (__v4si)(__m128i)(b), (int)(p), \ 776 (__mmask8)-1)) 777 778#define _mm_mask_cmp_epi32_mask(m, a, b, p) \ 779 ((__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \ 780 (__v4si)(__m128i)(b), (int)(p), \ 781 (__mmask8)(m))) 782 783#define _mm_cmp_epu32_mask(a, b, p) \ 784 ((__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \ 785 (__v4si)(__m128i)(b), (int)(p), \ 786 (__mmask8)-1)) 787 788#define _mm_mask_cmp_epu32_mask(m, a, b, p) \ 789 ((__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \ 790 (__v4si)(__m128i)(b), (int)(p), \ 791 (__mmask8)(m))) 792 793#define _mm256_cmp_epi32_mask(a, b, p) \ 794 ((__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \ 795 (__v8si)(__m256i)(b), (int)(p), \ 796 (__mmask8)-1)) 797 798#define _mm256_mask_cmp_epi32_mask(m, a, b, p) \ 799 ((__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \ 800 (__v8si)(__m256i)(b), (int)(p), \ 801 (__mmask8)(m))) 802 803#define _mm256_cmp_epu32_mask(a, b, p) \ 804 ((__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \ 805 (__v8si)(__m256i)(b), (int)(p), \ 806 (__mmask8)-1)) 807 808#define _mm256_mask_cmp_epu32_mask(m, a, b, p) \ 809 ((__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \ 810 (__v8si)(__m256i)(b), (int)(p), \ 811 (__mmask8)(m))) 812 813#define _mm_cmp_epi64_mask(a, b, p) \ 814 ((__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \ 815 (__v2di)(__m128i)(b), (int)(p), \ 816 (__mmask8)-1)) 817 818#define _mm_mask_cmp_epi64_mask(m, a, b, p) \ 819 ((__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \ 820 (__v2di)(__m128i)(b), (int)(p), \ 821 (__mmask8)(m))) 822 823#define _mm_cmp_epu64_mask(a, b, p) \ 824 ((__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \ 825 (__v2di)(__m128i)(b), (int)(p), \ 826 (__mmask8)-1)) 827 828#define _mm_mask_cmp_epu64_mask(m, a, b, p) \ 829 ((__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \ 830 (__v2di)(__m128i)(b), (int)(p), \ 831 (__mmask8)(m))) 832 833#define _mm256_cmp_epi64_mask(a, b, p) \ 834 ((__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \ 835 (__v4di)(__m256i)(b), (int)(p), \ 836 (__mmask8)-1)) 837 838#define _mm256_mask_cmp_epi64_mask(m, a, b, p) \ 839 ((__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \ 840 (__v4di)(__m256i)(b), (int)(p), \ 841 (__mmask8)(m))) 842 843#define _mm256_cmp_epu64_mask(a, b, p) \ 844 ((__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \ 845 (__v4di)(__m256i)(b), (int)(p), \ 846 (__mmask8)-1)) 847 848#define _mm256_mask_cmp_epu64_mask(m, a, b, p) \ 849 ((__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \ 850 (__v4di)(__m256i)(b), (int)(p), \ 851 (__mmask8)(m))) 852 853#define _mm256_cmp_ps_mask(a, b, p) \ 854 ((__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \ 855 (__v8sf)(__m256)(b), (int)(p), \ 856 (__mmask8)-1)) 857 858#define _mm256_mask_cmp_ps_mask(m, a, b, p) \ 859 ((__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \ 860 (__v8sf)(__m256)(b), (int)(p), \ 861 (__mmask8)(m))) 862 863#define _mm256_cmp_pd_mask(a, b, p) \ 864 ((__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \ 865 (__v4df)(__m256d)(b), (int)(p), \ 866 (__mmask8)-1)) 867 868#define _mm256_mask_cmp_pd_mask(m, a, b, p) \ 869 ((__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \ 870 (__v4df)(__m256d)(b), (int)(p), \ 871 (__mmask8)(m))) 872 873#define _mm_cmp_ps_mask(a, b, p) \ 874 ((__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \ 875 (__v4sf)(__m128)(b), (int)(p), \ 876 (__mmask8)-1)) 877 878#define _mm_mask_cmp_ps_mask(m, a, b, p) \ 879 ((__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \ 880 (__v4sf)(__m128)(b), (int)(p), \ 881 (__mmask8)(m))) 882 883#define _mm_cmp_pd_mask(a, b, p) \ 884 ((__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \ 885 (__v2df)(__m128d)(b), (int)(p), \ 886 (__mmask8)-1)) 887 888#define _mm_mask_cmp_pd_mask(m, a, b, p) \ 889 ((__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \ 890 (__v2df)(__m128d)(b), (int)(p), \ 891 (__mmask8)(m))) 892 893static __inline__ __m128d __DEFAULT_FN_ATTRS128 894_mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 895{ 896 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 897 __builtin_ia32_vfmaddpd ((__v2df) __A, 898 (__v2df) __B, 899 (__v2df) __C), 900 (__v2df) __A); 901} 902 903static __inline__ __m128d __DEFAULT_FN_ATTRS128 904_mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 905{ 906 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 907 __builtin_ia32_vfmaddpd ((__v2df) __A, 908 (__v2df) __B, 909 (__v2df) __C), 910 (__v2df) __C); 911} 912 913static __inline__ __m128d __DEFAULT_FN_ATTRS128 914_mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 915{ 916 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 917 __builtin_ia32_vfmaddpd ((__v2df) __A, 918 (__v2df) __B, 919 (__v2df) __C), 920 (__v2df)_mm_setzero_pd()); 921} 922 923static __inline__ __m128d __DEFAULT_FN_ATTRS128 924_mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 925{ 926 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 927 __builtin_ia32_vfmaddpd ((__v2df) __A, 928 (__v2df) __B, 929 -(__v2df) __C), 930 (__v2df) __A); 931} 932 933static __inline__ __m128d __DEFAULT_FN_ATTRS128 934_mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 935{ 936 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 937 __builtin_ia32_vfmaddpd ((__v2df) __A, 938 (__v2df) __B, 939 -(__v2df) __C), 940 (__v2df)_mm_setzero_pd()); 941} 942 943static __inline__ __m128d __DEFAULT_FN_ATTRS128 944_mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 945{ 946 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 947 __builtin_ia32_vfmaddpd (-(__v2df) __A, 948 (__v2df) __B, 949 (__v2df) __C), 950 (__v2df) __C); 951} 952 953static __inline__ __m128d __DEFAULT_FN_ATTRS128 954_mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 955{ 956 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 957 __builtin_ia32_vfmaddpd (-(__v2df) __A, 958 (__v2df) __B, 959 (__v2df) __C), 960 (__v2df)_mm_setzero_pd()); 961} 962 963static __inline__ __m128d __DEFAULT_FN_ATTRS128 964_mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 965{ 966 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 967 __builtin_ia32_vfmaddpd (-(__v2df) __A, 968 (__v2df) __B, 969 -(__v2df) __C), 970 (__v2df)_mm_setzero_pd()); 971} 972 973static __inline__ __m256d __DEFAULT_FN_ATTRS256 974_mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 975{ 976 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 977 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 978 (__v4df) __B, 979 (__v4df) __C), 980 (__v4df) __A); 981} 982 983static __inline__ __m256d __DEFAULT_FN_ATTRS256 984_mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 985{ 986 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 987 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 988 (__v4df) __B, 989 (__v4df) __C), 990 (__v4df) __C); 991} 992 993static __inline__ __m256d __DEFAULT_FN_ATTRS256 994_mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 995{ 996 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 997 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 998 (__v4df) __B, 999 (__v4df) __C), 1000 (__v4df)_mm256_setzero_pd()); 1001} 1002 1003static __inline__ __m256d __DEFAULT_FN_ATTRS256 1004_mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1005{ 1006 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1007 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 1008 (__v4df) __B, 1009 -(__v4df) __C), 1010 (__v4df) __A); 1011} 1012 1013static __inline__ __m256d __DEFAULT_FN_ATTRS256 1014_mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1015{ 1016 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1017 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 1018 (__v4df) __B, 1019 -(__v4df) __C), 1020 (__v4df)_mm256_setzero_pd()); 1021} 1022 1023static __inline__ __m256d __DEFAULT_FN_ATTRS256 1024_mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1025{ 1026 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1027 __builtin_ia32_vfmaddpd256 (-(__v4df) __A, 1028 (__v4df) __B, 1029 (__v4df) __C), 1030 (__v4df) __C); 1031} 1032 1033static __inline__ __m256d __DEFAULT_FN_ATTRS256 1034_mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1035{ 1036 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1037 __builtin_ia32_vfmaddpd256 (-(__v4df) __A, 1038 (__v4df) __B, 1039 (__v4df) __C), 1040 (__v4df)_mm256_setzero_pd()); 1041} 1042 1043static __inline__ __m256d __DEFAULT_FN_ATTRS256 1044_mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1045{ 1046 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1047 __builtin_ia32_vfmaddpd256 (-(__v4df) __A, 1048 (__v4df) __B, 1049 -(__v4df) __C), 1050 (__v4df)_mm256_setzero_pd()); 1051} 1052 1053static __inline__ __m128 __DEFAULT_FN_ATTRS128 1054_mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1055{ 1056 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1057 __builtin_ia32_vfmaddps ((__v4sf) __A, 1058 (__v4sf) __B, 1059 (__v4sf) __C), 1060 (__v4sf) __A); 1061} 1062 1063static __inline__ __m128 __DEFAULT_FN_ATTRS128 1064_mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1065{ 1066 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1067 __builtin_ia32_vfmaddps ((__v4sf) __A, 1068 (__v4sf) __B, 1069 (__v4sf) __C), 1070 (__v4sf) __C); 1071} 1072 1073static __inline__ __m128 __DEFAULT_FN_ATTRS128 1074_mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1075{ 1076 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1077 __builtin_ia32_vfmaddps ((__v4sf) __A, 1078 (__v4sf) __B, 1079 (__v4sf) __C), 1080 (__v4sf)_mm_setzero_ps()); 1081} 1082 1083static __inline__ __m128 __DEFAULT_FN_ATTRS128 1084_mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1085{ 1086 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1087 __builtin_ia32_vfmaddps ((__v4sf) __A, 1088 (__v4sf) __B, 1089 -(__v4sf) __C), 1090 (__v4sf) __A); 1091} 1092 1093static __inline__ __m128 __DEFAULT_FN_ATTRS128 1094_mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1095{ 1096 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1097 __builtin_ia32_vfmaddps ((__v4sf) __A, 1098 (__v4sf) __B, 1099 -(__v4sf) __C), 1100 (__v4sf)_mm_setzero_ps()); 1101} 1102 1103static __inline__ __m128 __DEFAULT_FN_ATTRS128 1104_mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1105{ 1106 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1107 __builtin_ia32_vfmaddps (-(__v4sf) __A, 1108 (__v4sf) __B, 1109 (__v4sf) __C), 1110 (__v4sf) __C); 1111} 1112 1113static __inline__ __m128 __DEFAULT_FN_ATTRS128 1114_mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1115{ 1116 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1117 __builtin_ia32_vfmaddps (-(__v4sf) __A, 1118 (__v4sf) __B, 1119 (__v4sf) __C), 1120 (__v4sf)_mm_setzero_ps()); 1121} 1122 1123static __inline__ __m128 __DEFAULT_FN_ATTRS128 1124_mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1125{ 1126 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1127 __builtin_ia32_vfmaddps (-(__v4sf) __A, 1128 (__v4sf) __B, 1129 -(__v4sf) __C), 1130 (__v4sf)_mm_setzero_ps()); 1131} 1132 1133static __inline__ __m256 __DEFAULT_FN_ATTRS256 1134_mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1135{ 1136 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1137 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1138 (__v8sf) __B, 1139 (__v8sf) __C), 1140 (__v8sf) __A); 1141} 1142 1143static __inline__ __m256 __DEFAULT_FN_ATTRS256 1144_mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1145{ 1146 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1147 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1148 (__v8sf) __B, 1149 (__v8sf) __C), 1150 (__v8sf) __C); 1151} 1152 1153static __inline__ __m256 __DEFAULT_FN_ATTRS256 1154_mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1155{ 1156 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1157 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1158 (__v8sf) __B, 1159 (__v8sf) __C), 1160 (__v8sf)_mm256_setzero_ps()); 1161} 1162 1163static __inline__ __m256 __DEFAULT_FN_ATTRS256 1164_mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1165{ 1166 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1167 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1168 (__v8sf) __B, 1169 -(__v8sf) __C), 1170 (__v8sf) __A); 1171} 1172 1173static __inline__ __m256 __DEFAULT_FN_ATTRS256 1174_mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1175{ 1176 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1177 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1178 (__v8sf) __B, 1179 -(__v8sf) __C), 1180 (__v8sf)_mm256_setzero_ps()); 1181} 1182 1183static __inline__ __m256 __DEFAULT_FN_ATTRS256 1184_mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1185{ 1186 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1187 __builtin_ia32_vfmaddps256 (-(__v8sf) __A, 1188 (__v8sf) __B, 1189 (__v8sf) __C), 1190 (__v8sf) __C); 1191} 1192 1193static __inline__ __m256 __DEFAULT_FN_ATTRS256 1194_mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1195{ 1196 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1197 __builtin_ia32_vfmaddps256 (-(__v8sf) __A, 1198 (__v8sf) __B, 1199 (__v8sf) __C), 1200 (__v8sf)_mm256_setzero_ps()); 1201} 1202 1203static __inline__ __m256 __DEFAULT_FN_ATTRS256 1204_mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1205{ 1206 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1207 __builtin_ia32_vfmaddps256 (-(__v8sf) __A, 1208 (__v8sf) __B, 1209 -(__v8sf) __C), 1210 (__v8sf)_mm256_setzero_ps()); 1211} 1212 1213static __inline__ __m128d __DEFAULT_FN_ATTRS128 1214_mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1215{ 1216 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1217 __builtin_ia32_vfmaddsubpd ((__v2df) __A, 1218 (__v2df) __B, 1219 (__v2df) __C), 1220 (__v2df) __A); 1221} 1222 1223static __inline__ __m128d __DEFAULT_FN_ATTRS128 1224_mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1225{ 1226 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1227 __builtin_ia32_vfmaddsubpd ((__v2df) __A, 1228 (__v2df) __B, 1229 (__v2df) __C), 1230 (__v2df) __C); 1231} 1232 1233static __inline__ __m128d __DEFAULT_FN_ATTRS128 1234_mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 1235{ 1236 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1237 __builtin_ia32_vfmaddsubpd ((__v2df) __A, 1238 (__v2df) __B, 1239 (__v2df) __C), 1240 (__v2df)_mm_setzero_pd()); 1241} 1242 1243static __inline__ __m128d __DEFAULT_FN_ATTRS128 1244_mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1245{ 1246 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1247 __builtin_ia32_vfmaddsubpd ((__v2df) __A, 1248 (__v2df) __B, 1249 -(__v2df) __C), 1250 (__v2df) __A); 1251} 1252 1253static __inline__ __m128d __DEFAULT_FN_ATTRS128 1254_mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) 1255{ 1256 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1257 __builtin_ia32_vfmaddsubpd ((__v2df) __A, 1258 (__v2df) __B, 1259 -(__v2df) __C), 1260 (__v2df)_mm_setzero_pd()); 1261} 1262 1263static __inline__ __m256d __DEFAULT_FN_ATTRS256 1264_mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1265{ 1266 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1267 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, 1268 (__v4df) __B, 1269 (__v4df) __C), 1270 (__v4df) __A); 1271} 1272 1273static __inline__ __m256d __DEFAULT_FN_ATTRS256 1274_mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1275{ 1276 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1277 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, 1278 (__v4df) __B, 1279 (__v4df) __C), 1280 (__v4df) __C); 1281} 1282 1283static __inline__ __m256d __DEFAULT_FN_ATTRS256 1284_mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1285{ 1286 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1287 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, 1288 (__v4df) __B, 1289 (__v4df) __C), 1290 (__v4df)_mm256_setzero_pd()); 1291} 1292 1293static __inline__ __m256d __DEFAULT_FN_ATTRS256 1294_mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1295{ 1296 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1297 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, 1298 (__v4df) __B, 1299 -(__v4df) __C), 1300 (__v4df) __A); 1301} 1302 1303static __inline__ __m256d __DEFAULT_FN_ATTRS256 1304_mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) 1305{ 1306 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1307 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, 1308 (__v4df) __B, 1309 -(__v4df) __C), 1310 (__v4df)_mm256_setzero_pd()); 1311} 1312 1313static __inline__ __m128 __DEFAULT_FN_ATTRS128 1314_mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1315{ 1316 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1317 __builtin_ia32_vfmaddsubps ((__v4sf) __A, 1318 (__v4sf) __B, 1319 (__v4sf) __C), 1320 (__v4sf) __A); 1321} 1322 1323static __inline__ __m128 __DEFAULT_FN_ATTRS128 1324_mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1325{ 1326 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1327 __builtin_ia32_vfmaddsubps ((__v4sf) __A, 1328 (__v4sf) __B, 1329 (__v4sf) __C), 1330 (__v4sf) __C); 1331} 1332 1333static __inline__ __m128 __DEFAULT_FN_ATTRS128 1334_mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1335{ 1336 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1337 __builtin_ia32_vfmaddsubps ((__v4sf) __A, 1338 (__v4sf) __B, 1339 (__v4sf) __C), 1340 (__v4sf)_mm_setzero_ps()); 1341} 1342 1343static __inline__ __m128 __DEFAULT_FN_ATTRS128 1344_mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1345{ 1346 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1347 __builtin_ia32_vfmaddsubps ((__v4sf) __A, 1348 (__v4sf) __B, 1349 -(__v4sf) __C), 1350 (__v4sf) __A); 1351} 1352 1353static __inline__ __m128 __DEFAULT_FN_ATTRS128 1354_mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) 1355{ 1356 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1357 __builtin_ia32_vfmaddsubps ((__v4sf) __A, 1358 (__v4sf) __B, 1359 -(__v4sf) __C), 1360 (__v4sf)_mm_setzero_ps()); 1361} 1362 1363static __inline__ __m256 __DEFAULT_FN_ATTRS256 1364_mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B, 1365 __m256 __C) 1366{ 1367 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1368 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, 1369 (__v8sf) __B, 1370 (__v8sf) __C), 1371 (__v8sf) __A); 1372} 1373 1374static __inline__ __m256 __DEFAULT_FN_ATTRS256 1375_mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1376{ 1377 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1378 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, 1379 (__v8sf) __B, 1380 (__v8sf) __C), 1381 (__v8sf) __C); 1382} 1383 1384static __inline__ __m256 __DEFAULT_FN_ATTRS256 1385_mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1386{ 1387 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1388 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, 1389 (__v8sf) __B, 1390 (__v8sf) __C), 1391 (__v8sf)_mm256_setzero_ps()); 1392} 1393 1394static __inline__ __m256 __DEFAULT_FN_ATTRS256 1395_mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1396{ 1397 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1398 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, 1399 (__v8sf) __B, 1400 -(__v8sf) __C), 1401 (__v8sf) __A); 1402} 1403 1404static __inline__ __m256 __DEFAULT_FN_ATTRS256 1405_mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) 1406{ 1407 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1408 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, 1409 (__v8sf) __B, 1410 -(__v8sf) __C), 1411 (__v8sf)_mm256_setzero_ps()); 1412} 1413 1414static __inline__ __m128d __DEFAULT_FN_ATTRS128 1415_mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1416{ 1417 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1418 __builtin_ia32_vfmaddpd ((__v2df) __A, 1419 (__v2df) __B, 1420 -(__v2df) __C), 1421 (__v2df) __C); 1422} 1423 1424static __inline__ __m256d __DEFAULT_FN_ATTRS256 1425_mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1426{ 1427 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1428 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 1429 (__v4df) __B, 1430 -(__v4df) __C), 1431 (__v4df) __C); 1432} 1433 1434static __inline__ __m128 __DEFAULT_FN_ATTRS128 1435_mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1436{ 1437 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1438 __builtin_ia32_vfmaddps ((__v4sf) __A, 1439 (__v4sf) __B, 1440 -(__v4sf) __C), 1441 (__v4sf) __C); 1442} 1443 1444static __inline__ __m256 __DEFAULT_FN_ATTRS256 1445_mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1446{ 1447 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1448 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1449 (__v8sf) __B, 1450 -(__v8sf) __C), 1451 (__v8sf) __C); 1452} 1453 1454static __inline__ __m128d __DEFAULT_FN_ATTRS128 1455_mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1456{ 1457 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1458 __builtin_ia32_vfmaddsubpd ((__v2df) __A, 1459 (__v2df) __B, 1460 -(__v2df) __C), 1461 (__v2df) __C); 1462} 1463 1464static __inline__ __m256d __DEFAULT_FN_ATTRS256 1465_mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1466{ 1467 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1468 __builtin_ia32_vfmaddsubpd256 ((__v4df) __A, 1469 (__v4df) __B, 1470 -(__v4df) __C), 1471 (__v4df) __C); 1472} 1473 1474static __inline__ __m128 __DEFAULT_FN_ATTRS128 1475_mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1476{ 1477 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1478 __builtin_ia32_vfmaddsubps ((__v4sf) __A, 1479 (__v4sf) __B, 1480 -(__v4sf) __C), 1481 (__v4sf) __C); 1482} 1483 1484static __inline__ __m256 __DEFAULT_FN_ATTRS256 1485_mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1486{ 1487 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1488 __builtin_ia32_vfmaddsubps256 ((__v8sf) __A, 1489 (__v8sf) __B, 1490 -(__v8sf) __C), 1491 (__v8sf) __C); 1492} 1493 1494static __inline__ __m128d __DEFAULT_FN_ATTRS128 1495_mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1496{ 1497 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1498 __builtin_ia32_vfmaddpd ((__v2df) __A, 1499 -(__v2df) __B, 1500 (__v2df) __C), 1501 (__v2df) __A); 1502} 1503 1504static __inline__ __m256d __DEFAULT_FN_ATTRS256 1505_mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1506{ 1507 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1508 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 1509 -(__v4df) __B, 1510 (__v4df) __C), 1511 (__v4df) __A); 1512} 1513 1514static __inline__ __m128 __DEFAULT_FN_ATTRS128 1515_mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1516{ 1517 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1518 __builtin_ia32_vfmaddps ((__v4sf) __A, 1519 -(__v4sf) __B, 1520 (__v4sf) __C), 1521 (__v4sf) __A); 1522} 1523 1524static __inline__ __m256 __DEFAULT_FN_ATTRS256 1525_mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1526{ 1527 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1528 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1529 -(__v8sf) __B, 1530 (__v8sf) __C), 1531 (__v8sf) __A); 1532} 1533 1534static __inline__ __m128d __DEFAULT_FN_ATTRS128 1535_mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) 1536{ 1537 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1538 __builtin_ia32_vfmaddpd ((__v2df) __A, 1539 -(__v2df) __B, 1540 -(__v2df) __C), 1541 (__v2df) __A); 1542} 1543 1544static __inline__ __m128d __DEFAULT_FN_ATTRS128 1545_mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) 1546{ 1547 return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, 1548 __builtin_ia32_vfmaddpd ((__v2df) __A, 1549 -(__v2df) __B, 1550 -(__v2df) __C), 1551 (__v2df) __C); 1552} 1553 1554static __inline__ __m256d __DEFAULT_FN_ATTRS256 1555_mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) 1556{ 1557 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1558 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 1559 -(__v4df) __B, 1560 -(__v4df) __C), 1561 (__v4df) __A); 1562} 1563 1564static __inline__ __m256d __DEFAULT_FN_ATTRS256 1565_mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) 1566{ 1567 return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, 1568 __builtin_ia32_vfmaddpd256 ((__v4df) __A, 1569 -(__v4df) __B, 1570 -(__v4df) __C), 1571 (__v4df) __C); 1572} 1573 1574static __inline__ __m128 __DEFAULT_FN_ATTRS128 1575_mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) 1576{ 1577 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1578 __builtin_ia32_vfmaddps ((__v4sf) __A, 1579 -(__v4sf) __B, 1580 -(__v4sf) __C), 1581 (__v4sf) __A); 1582} 1583 1584static __inline__ __m128 __DEFAULT_FN_ATTRS128 1585_mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) 1586{ 1587 return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, 1588 __builtin_ia32_vfmaddps ((__v4sf) __A, 1589 -(__v4sf) __B, 1590 -(__v4sf) __C), 1591 (__v4sf) __C); 1592} 1593 1594static __inline__ __m256 __DEFAULT_FN_ATTRS256 1595_mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) 1596{ 1597 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1598 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1599 -(__v8sf) __B, 1600 -(__v8sf) __C), 1601 (__v8sf) __A); 1602} 1603 1604static __inline__ __m256 __DEFAULT_FN_ATTRS256 1605_mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) 1606{ 1607 return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, 1608 __builtin_ia32_vfmaddps256 ((__v8sf) __A, 1609 -(__v8sf) __B, 1610 -(__v8sf) __C), 1611 (__v8sf) __C); 1612} 1613 1614static __inline__ __m128d __DEFAULT_FN_ATTRS128 1615_mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 1616 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 1617 (__v2df)_mm_add_pd(__A, __B), 1618 (__v2df)__W); 1619} 1620 1621static __inline__ __m128d __DEFAULT_FN_ATTRS128 1622_mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) { 1623 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 1624 (__v2df)_mm_add_pd(__A, __B), 1625 (__v2df)_mm_setzero_pd()); 1626} 1627 1628static __inline__ __m256d __DEFAULT_FN_ATTRS256 1629_mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 1630 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 1631 (__v4df)_mm256_add_pd(__A, __B), 1632 (__v4df)__W); 1633} 1634 1635static __inline__ __m256d __DEFAULT_FN_ATTRS256 1636_mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) { 1637 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 1638 (__v4df)_mm256_add_pd(__A, __B), 1639 (__v4df)_mm256_setzero_pd()); 1640} 1641 1642static __inline__ __m128 __DEFAULT_FN_ATTRS128 1643_mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 1644 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 1645 (__v4sf)_mm_add_ps(__A, __B), 1646 (__v4sf)__W); 1647} 1648 1649static __inline__ __m128 __DEFAULT_FN_ATTRS128 1650_mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B) { 1651 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 1652 (__v4sf)_mm_add_ps(__A, __B), 1653 (__v4sf)_mm_setzero_ps()); 1654} 1655 1656static __inline__ __m256 __DEFAULT_FN_ATTRS256 1657_mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 1658 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 1659 (__v8sf)_mm256_add_ps(__A, __B), 1660 (__v8sf)__W); 1661} 1662 1663static __inline__ __m256 __DEFAULT_FN_ATTRS256 1664_mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) { 1665 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 1666 (__v8sf)_mm256_add_ps(__A, __B), 1667 (__v8sf)_mm256_setzero_ps()); 1668} 1669 1670static __inline__ __m128i __DEFAULT_FN_ATTRS128 1671_mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) { 1672 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, 1673 (__v4si) __W, 1674 (__v4si) __A); 1675} 1676 1677static __inline__ __m256i __DEFAULT_FN_ATTRS256 1678_mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) { 1679 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, 1680 (__v8si) __W, 1681 (__v8si) __A); 1682} 1683 1684static __inline__ __m128d __DEFAULT_FN_ATTRS128 1685_mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) { 1686 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, 1687 (__v2df) __W, 1688 (__v2df) __A); 1689} 1690 1691static __inline__ __m256d __DEFAULT_FN_ATTRS256 1692_mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) { 1693 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, 1694 (__v4df) __W, 1695 (__v4df) __A); 1696} 1697 1698static __inline__ __m128 __DEFAULT_FN_ATTRS128 1699_mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) { 1700 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, 1701 (__v4sf) __W, 1702 (__v4sf) __A); 1703} 1704 1705static __inline__ __m256 __DEFAULT_FN_ATTRS256 1706_mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) { 1707 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, 1708 (__v8sf) __W, 1709 (__v8sf) __A); 1710} 1711 1712static __inline__ __m128i __DEFAULT_FN_ATTRS128 1713_mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) { 1714 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, 1715 (__v2di) __W, 1716 (__v2di) __A); 1717} 1718 1719static __inline__ __m256i __DEFAULT_FN_ATTRS256 1720_mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) { 1721 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, 1722 (__v4di) __W, 1723 (__v4di) __A); 1724} 1725 1726static __inline__ __m128d __DEFAULT_FN_ATTRS128 1727_mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A) { 1728 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A, 1729 (__v2df) __W, 1730 (__mmask8) __U); 1731} 1732 1733static __inline__ __m128d __DEFAULT_FN_ATTRS128 1734_mm_maskz_compress_pd (__mmask8 __U, __m128d __A) { 1735 return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A, 1736 (__v2df) 1737 _mm_setzero_pd (), 1738 (__mmask8) __U); 1739} 1740 1741static __inline__ __m256d __DEFAULT_FN_ATTRS256 1742_mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A) { 1743 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A, 1744 (__v4df) __W, 1745 (__mmask8) __U); 1746} 1747 1748static __inline__ __m256d __DEFAULT_FN_ATTRS256 1749_mm256_maskz_compress_pd (__mmask8 __U, __m256d __A) { 1750 return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A, 1751 (__v4df) 1752 _mm256_setzero_pd (), 1753 (__mmask8) __U); 1754} 1755 1756static __inline__ __m128i __DEFAULT_FN_ATTRS128 1757_mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { 1758 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A, 1759 (__v2di) __W, 1760 (__mmask8) __U); 1761} 1762 1763static __inline__ __m128i __DEFAULT_FN_ATTRS128 1764_mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A) { 1765 return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A, 1766 (__v2di) 1767 _mm_setzero_si128 (), 1768 (__mmask8) __U); 1769} 1770 1771static __inline__ __m256i __DEFAULT_FN_ATTRS256 1772_mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { 1773 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A, 1774 (__v4di) __W, 1775 (__mmask8) __U); 1776} 1777 1778static __inline__ __m256i __DEFAULT_FN_ATTRS256 1779_mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A) { 1780 return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A, 1781 (__v4di) 1782 _mm256_setzero_si256 (), 1783 (__mmask8) __U); 1784} 1785 1786static __inline__ __m128 __DEFAULT_FN_ATTRS128 1787_mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A) { 1788 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A, 1789 (__v4sf) __W, 1790 (__mmask8) __U); 1791} 1792 1793static __inline__ __m128 __DEFAULT_FN_ATTRS128 1794_mm_maskz_compress_ps (__mmask8 __U, __m128 __A) { 1795 return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A, 1796 (__v4sf) 1797 _mm_setzero_ps (), 1798 (__mmask8) __U); 1799} 1800 1801static __inline__ __m256 __DEFAULT_FN_ATTRS256 1802_mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A) { 1803 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A, 1804 (__v8sf) __W, 1805 (__mmask8) __U); 1806} 1807 1808static __inline__ __m256 __DEFAULT_FN_ATTRS256 1809_mm256_maskz_compress_ps (__mmask8 __U, __m256 __A) { 1810 return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A, 1811 (__v8sf) 1812 _mm256_setzero_ps (), 1813 (__mmask8) __U); 1814} 1815 1816static __inline__ __m128i __DEFAULT_FN_ATTRS128 1817_mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { 1818 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A, 1819 (__v4si) __W, 1820 (__mmask8) __U); 1821} 1822 1823static __inline__ __m128i __DEFAULT_FN_ATTRS128 1824_mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A) { 1825 return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A, 1826 (__v4si) 1827 _mm_setzero_si128 (), 1828 (__mmask8) __U); 1829} 1830 1831static __inline__ __m256i __DEFAULT_FN_ATTRS256 1832_mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { 1833 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A, 1834 (__v8si) __W, 1835 (__mmask8) __U); 1836} 1837 1838static __inline__ __m256i __DEFAULT_FN_ATTRS256 1839_mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A) { 1840 return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A, 1841 (__v8si) 1842 _mm256_setzero_si256 (), 1843 (__mmask8) __U); 1844} 1845 1846static __inline__ void __DEFAULT_FN_ATTRS128 1847_mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A) { 1848 __builtin_ia32_compressstoredf128_mask ((__v2df *) __P, 1849 (__v2df) __A, 1850 (__mmask8) __U); 1851} 1852 1853static __inline__ void __DEFAULT_FN_ATTRS256 1854_mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A) { 1855 __builtin_ia32_compressstoredf256_mask ((__v4df *) __P, 1856 (__v4df) __A, 1857 (__mmask8) __U); 1858} 1859 1860static __inline__ void __DEFAULT_FN_ATTRS128 1861_mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A) { 1862 __builtin_ia32_compressstoredi128_mask ((__v2di *) __P, 1863 (__v2di) __A, 1864 (__mmask8) __U); 1865} 1866 1867static __inline__ void __DEFAULT_FN_ATTRS256 1868_mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A) { 1869 __builtin_ia32_compressstoredi256_mask ((__v4di *) __P, 1870 (__v4di) __A, 1871 (__mmask8) __U); 1872} 1873 1874static __inline__ void __DEFAULT_FN_ATTRS128 1875_mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A) { 1876 __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P, 1877 (__v4sf) __A, 1878 (__mmask8) __U); 1879} 1880 1881static __inline__ void __DEFAULT_FN_ATTRS256 1882_mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A) { 1883 __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P, 1884 (__v8sf) __A, 1885 (__mmask8) __U); 1886} 1887 1888static __inline__ void __DEFAULT_FN_ATTRS128 1889_mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A) { 1890 __builtin_ia32_compressstoresi128_mask ((__v4si *) __P, 1891 (__v4si) __A, 1892 (__mmask8) __U); 1893} 1894 1895static __inline__ void __DEFAULT_FN_ATTRS256 1896_mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A) { 1897 __builtin_ia32_compressstoresi256_mask ((__v8si *) __P, 1898 (__v8si) __A, 1899 (__mmask8) __U); 1900} 1901 1902static __inline__ __m128d __DEFAULT_FN_ATTRS128 1903_mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) { 1904 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, 1905 (__v2df)_mm_cvtepi32_pd(__A), 1906 (__v2df)__W); 1907} 1908 1909static __inline__ __m128d __DEFAULT_FN_ATTRS128 1910_mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) { 1911 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, 1912 (__v2df)_mm_cvtepi32_pd(__A), 1913 (__v2df)_mm_setzero_pd()); 1914} 1915 1916static __inline__ __m256d __DEFAULT_FN_ATTRS256 1917_mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) { 1918 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, 1919 (__v4df)_mm256_cvtepi32_pd(__A), 1920 (__v4df)__W); 1921} 1922 1923static __inline__ __m256d __DEFAULT_FN_ATTRS256 1924_mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) { 1925 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, 1926 (__v4df)_mm256_cvtepi32_pd(__A), 1927 (__v4df)_mm256_setzero_pd()); 1928} 1929 1930static __inline__ __m128 __DEFAULT_FN_ATTRS128 1931_mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) { 1932 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 1933 (__v4sf)_mm_cvtepi32_ps(__A), 1934 (__v4sf)__W); 1935} 1936 1937static __inline__ __m128 __DEFAULT_FN_ATTRS128 1938_mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A) { 1939 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 1940 (__v4sf)_mm_cvtepi32_ps(__A), 1941 (__v4sf)_mm_setzero_ps()); 1942} 1943 1944static __inline__ __m256 __DEFAULT_FN_ATTRS256 1945_mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) { 1946 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 1947 (__v8sf)_mm256_cvtepi32_ps(__A), 1948 (__v8sf)__W); 1949} 1950 1951static __inline__ __m256 __DEFAULT_FN_ATTRS256 1952_mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A) { 1953 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 1954 (__v8sf)_mm256_cvtepi32_ps(__A), 1955 (__v8sf)_mm256_setzero_ps()); 1956} 1957 1958static __inline__ __m128i __DEFAULT_FN_ATTRS128 1959_mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) { 1960 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A, 1961 (__v4si) __W, 1962 (__mmask8) __U); 1963} 1964 1965static __inline__ __m128i __DEFAULT_FN_ATTRS128 1966_mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A) { 1967 return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A, 1968 (__v4si) 1969 _mm_setzero_si128 (), 1970 (__mmask8) __U); 1971} 1972 1973static __inline__ __m128i __DEFAULT_FN_ATTRS256 1974_mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) { 1975 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 1976 (__v4si)_mm256_cvtpd_epi32(__A), 1977 (__v4si)__W); 1978} 1979 1980static __inline__ __m128i __DEFAULT_FN_ATTRS256 1981_mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) { 1982 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 1983 (__v4si)_mm256_cvtpd_epi32(__A), 1984 (__v4si)_mm_setzero_si128()); 1985} 1986 1987static __inline__ __m128 __DEFAULT_FN_ATTRS128 1988_mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) { 1989 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A, 1990 (__v4sf) __W, 1991 (__mmask8) __U); 1992} 1993 1994static __inline__ __m128 __DEFAULT_FN_ATTRS128 1995_mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) { 1996 return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A, 1997 (__v4sf) 1998 _mm_setzero_ps (), 1999 (__mmask8) __U); 2000} 2001 2002static __inline__ __m128 __DEFAULT_FN_ATTRS256 2003_mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) { 2004 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2005 (__v4sf)_mm256_cvtpd_ps(__A), 2006 (__v4sf)__W); 2007} 2008 2009static __inline__ __m128 __DEFAULT_FN_ATTRS256 2010_mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) { 2011 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2012 (__v4sf)_mm256_cvtpd_ps(__A), 2013 (__v4sf)_mm_setzero_ps()); 2014} 2015 2016static __inline__ __m128i __DEFAULT_FN_ATTRS128 2017_mm_cvtpd_epu32 (__m128d __A) { 2018 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, 2019 (__v4si) 2020 _mm_setzero_si128 (), 2021 (__mmask8) -1); 2022} 2023 2024static __inline__ __m128i __DEFAULT_FN_ATTRS128 2025_mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) { 2026 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, 2027 (__v4si) __W, 2028 (__mmask8) __U); 2029} 2030 2031static __inline__ __m128i __DEFAULT_FN_ATTRS128 2032_mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A) { 2033 return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, 2034 (__v4si) 2035 _mm_setzero_si128 (), 2036 (__mmask8) __U); 2037} 2038 2039static __inline__ __m128i __DEFAULT_FN_ATTRS256 2040_mm256_cvtpd_epu32 (__m256d __A) { 2041 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, 2042 (__v4si) 2043 _mm_setzero_si128 (), 2044 (__mmask8) -1); 2045} 2046 2047static __inline__ __m128i __DEFAULT_FN_ATTRS256 2048_mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) { 2049 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, 2050 (__v4si) __W, 2051 (__mmask8) __U); 2052} 2053 2054static __inline__ __m128i __DEFAULT_FN_ATTRS256 2055_mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A) { 2056 return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, 2057 (__v4si) 2058 _mm_setzero_si128 (), 2059 (__mmask8) __U); 2060} 2061 2062static __inline__ __m128i __DEFAULT_FN_ATTRS128 2063_mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) { 2064 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 2065 (__v4si)_mm_cvtps_epi32(__A), 2066 (__v4si)__W); 2067} 2068 2069static __inline__ __m128i __DEFAULT_FN_ATTRS128 2070_mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A) { 2071 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 2072 (__v4si)_mm_cvtps_epi32(__A), 2073 (__v4si)_mm_setzero_si128()); 2074} 2075 2076static __inline__ __m256i __DEFAULT_FN_ATTRS256 2077_mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) { 2078 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 2079 (__v8si)_mm256_cvtps_epi32(__A), 2080 (__v8si)__W); 2081} 2082 2083static __inline__ __m256i __DEFAULT_FN_ATTRS256 2084_mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A) { 2085 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 2086 (__v8si)_mm256_cvtps_epi32(__A), 2087 (__v8si)_mm256_setzero_si256()); 2088} 2089 2090static __inline__ __m128d __DEFAULT_FN_ATTRS128 2091_mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A) { 2092 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2093 (__v2df)_mm_cvtps_pd(__A), 2094 (__v2df)__W); 2095} 2096 2097static __inline__ __m128d __DEFAULT_FN_ATTRS128 2098_mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A) { 2099 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2100 (__v2df)_mm_cvtps_pd(__A), 2101 (__v2df)_mm_setzero_pd()); 2102} 2103 2104static __inline__ __m256d __DEFAULT_FN_ATTRS256 2105_mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A) { 2106 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2107 (__v4df)_mm256_cvtps_pd(__A), 2108 (__v4df)__W); 2109} 2110 2111static __inline__ __m256d __DEFAULT_FN_ATTRS256 2112_mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A) { 2113 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2114 (__v4df)_mm256_cvtps_pd(__A), 2115 (__v4df)_mm256_setzero_pd()); 2116} 2117 2118static __inline__ __m128i __DEFAULT_FN_ATTRS128 2119_mm_cvtps_epu32 (__m128 __A) { 2120 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, 2121 (__v4si) 2122 _mm_setzero_si128 (), 2123 (__mmask8) -1); 2124} 2125 2126static __inline__ __m128i __DEFAULT_FN_ATTRS128 2127_mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) { 2128 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, 2129 (__v4si) __W, 2130 (__mmask8) __U); 2131} 2132 2133static __inline__ __m128i __DEFAULT_FN_ATTRS128 2134_mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A) { 2135 return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, 2136 (__v4si) 2137 _mm_setzero_si128 (), 2138 (__mmask8) __U); 2139} 2140 2141static __inline__ __m256i __DEFAULT_FN_ATTRS256 2142_mm256_cvtps_epu32 (__m256 __A) { 2143 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, 2144 (__v8si) 2145 _mm256_setzero_si256 (), 2146 (__mmask8) -1); 2147} 2148 2149static __inline__ __m256i __DEFAULT_FN_ATTRS256 2150_mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) { 2151 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, 2152 (__v8si) __W, 2153 (__mmask8) __U); 2154} 2155 2156static __inline__ __m256i __DEFAULT_FN_ATTRS256 2157_mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A) { 2158 return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, 2159 (__v8si) 2160 _mm256_setzero_si256 (), 2161 (__mmask8) __U); 2162} 2163 2164static __inline__ __m128i __DEFAULT_FN_ATTRS128 2165_mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) { 2166 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A, 2167 (__v4si) __W, 2168 (__mmask8) __U); 2169} 2170 2171static __inline__ __m128i __DEFAULT_FN_ATTRS128 2172_mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A) { 2173 return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A, 2174 (__v4si) 2175 _mm_setzero_si128 (), 2176 (__mmask8) __U); 2177} 2178 2179static __inline__ __m128i __DEFAULT_FN_ATTRS256 2180_mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) { 2181 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 2182 (__v4si)_mm256_cvttpd_epi32(__A), 2183 (__v4si)__W); 2184} 2185 2186static __inline__ __m128i __DEFAULT_FN_ATTRS256 2187_mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A) { 2188 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 2189 (__v4si)_mm256_cvttpd_epi32(__A), 2190 (__v4si)_mm_setzero_si128()); 2191} 2192 2193static __inline__ __m128i __DEFAULT_FN_ATTRS128 2194_mm_cvttpd_epu32 (__m128d __A) { 2195 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, 2196 (__v4si) 2197 _mm_setzero_si128 (), 2198 (__mmask8) -1); 2199} 2200 2201static __inline__ __m128i __DEFAULT_FN_ATTRS128 2202_mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) { 2203 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, 2204 (__v4si) __W, 2205 (__mmask8) __U); 2206} 2207 2208static __inline__ __m128i __DEFAULT_FN_ATTRS128 2209_mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A) { 2210 return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, 2211 (__v4si) 2212 _mm_setzero_si128 (), 2213 (__mmask8) __U); 2214} 2215 2216static __inline__ __m128i __DEFAULT_FN_ATTRS256 2217_mm256_cvttpd_epu32 (__m256d __A) { 2218 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, 2219 (__v4si) 2220 _mm_setzero_si128 (), 2221 (__mmask8) -1); 2222} 2223 2224static __inline__ __m128i __DEFAULT_FN_ATTRS256 2225_mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) { 2226 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, 2227 (__v4si) __W, 2228 (__mmask8) __U); 2229} 2230 2231static __inline__ __m128i __DEFAULT_FN_ATTRS256 2232_mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A) { 2233 return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, 2234 (__v4si) 2235 _mm_setzero_si128 (), 2236 (__mmask8) __U); 2237} 2238 2239static __inline__ __m128i __DEFAULT_FN_ATTRS128 2240_mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) { 2241 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 2242 (__v4si)_mm_cvttps_epi32(__A), 2243 (__v4si)__W); 2244} 2245 2246static __inline__ __m128i __DEFAULT_FN_ATTRS128 2247_mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A) { 2248 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 2249 (__v4si)_mm_cvttps_epi32(__A), 2250 (__v4si)_mm_setzero_si128()); 2251} 2252 2253static __inline__ __m256i __DEFAULT_FN_ATTRS256 2254_mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) { 2255 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 2256 (__v8si)_mm256_cvttps_epi32(__A), 2257 (__v8si)__W); 2258} 2259 2260static __inline__ __m256i __DEFAULT_FN_ATTRS256 2261_mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A) { 2262 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 2263 (__v8si)_mm256_cvttps_epi32(__A), 2264 (__v8si)_mm256_setzero_si256()); 2265} 2266 2267static __inline__ __m128i __DEFAULT_FN_ATTRS128 2268_mm_cvttps_epu32 (__m128 __A) { 2269 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, 2270 (__v4si) 2271 _mm_setzero_si128 (), 2272 (__mmask8) -1); 2273} 2274 2275static __inline__ __m128i __DEFAULT_FN_ATTRS128 2276_mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) { 2277 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, 2278 (__v4si) __W, 2279 (__mmask8) __U); 2280} 2281 2282static __inline__ __m128i __DEFAULT_FN_ATTRS128 2283_mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A) { 2284 return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, 2285 (__v4si) 2286 _mm_setzero_si128 (), 2287 (__mmask8) __U); 2288} 2289 2290static __inline__ __m256i __DEFAULT_FN_ATTRS256 2291_mm256_cvttps_epu32 (__m256 __A) { 2292 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, 2293 (__v8si) 2294 _mm256_setzero_si256 (), 2295 (__mmask8) -1); 2296} 2297 2298static __inline__ __m256i __DEFAULT_FN_ATTRS256 2299_mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) { 2300 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, 2301 (__v8si) __W, 2302 (__mmask8) __U); 2303} 2304 2305static __inline__ __m256i __DEFAULT_FN_ATTRS256 2306_mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A) { 2307 return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, 2308 (__v8si) 2309 _mm256_setzero_si256 (), 2310 (__mmask8) __U); 2311} 2312 2313static __inline__ __m128d __DEFAULT_FN_ATTRS128 2314_mm_cvtepu32_pd (__m128i __A) { 2315 return (__m128d) __builtin_convertvector( 2316 __builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df); 2317} 2318 2319static __inline__ __m128d __DEFAULT_FN_ATTRS128 2320_mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) { 2321 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, 2322 (__v2df)_mm_cvtepu32_pd(__A), 2323 (__v2df)__W); 2324} 2325 2326static __inline__ __m128d __DEFAULT_FN_ATTRS128 2327_mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) { 2328 return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, 2329 (__v2df)_mm_cvtepu32_pd(__A), 2330 (__v2df)_mm_setzero_pd()); 2331} 2332 2333static __inline__ __m256d __DEFAULT_FN_ATTRS256 2334_mm256_cvtepu32_pd (__m128i __A) { 2335 return (__m256d)__builtin_convertvector((__v4su)__A, __v4df); 2336} 2337 2338static __inline__ __m256d __DEFAULT_FN_ATTRS256 2339_mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) { 2340 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, 2341 (__v4df)_mm256_cvtepu32_pd(__A), 2342 (__v4df)__W); 2343} 2344 2345static __inline__ __m256d __DEFAULT_FN_ATTRS256 2346_mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) { 2347 return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, 2348 (__v4df)_mm256_cvtepu32_pd(__A), 2349 (__v4df)_mm256_setzero_pd()); 2350} 2351 2352static __inline__ __m128 __DEFAULT_FN_ATTRS128 2353_mm_cvtepu32_ps (__m128i __A) { 2354 return (__m128)__builtin_convertvector((__v4su)__A, __v4sf); 2355} 2356 2357static __inline__ __m128 __DEFAULT_FN_ATTRS128 2358_mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) { 2359 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2360 (__v4sf)_mm_cvtepu32_ps(__A), 2361 (__v4sf)__W); 2362} 2363 2364static __inline__ __m128 __DEFAULT_FN_ATTRS128 2365_mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A) { 2366 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2367 (__v4sf)_mm_cvtepu32_ps(__A), 2368 (__v4sf)_mm_setzero_ps()); 2369} 2370 2371static __inline__ __m256 __DEFAULT_FN_ATTRS256 2372_mm256_cvtepu32_ps (__m256i __A) { 2373 return (__m256)__builtin_convertvector((__v8su)__A, __v8sf); 2374} 2375 2376static __inline__ __m256 __DEFAULT_FN_ATTRS256 2377_mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) { 2378 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2379 (__v8sf)_mm256_cvtepu32_ps(__A), 2380 (__v8sf)__W); 2381} 2382 2383static __inline__ __m256 __DEFAULT_FN_ATTRS256 2384_mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A) { 2385 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2386 (__v8sf)_mm256_cvtepu32_ps(__A), 2387 (__v8sf)_mm256_setzero_ps()); 2388} 2389 2390static __inline__ __m128d __DEFAULT_FN_ATTRS128 2391_mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 2392 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2393 (__v2df)_mm_div_pd(__A, __B), 2394 (__v2df)__W); 2395} 2396 2397static __inline__ __m128d __DEFAULT_FN_ATTRS128 2398_mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) { 2399 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2400 (__v2df)_mm_div_pd(__A, __B), 2401 (__v2df)_mm_setzero_pd()); 2402} 2403 2404static __inline__ __m256d __DEFAULT_FN_ATTRS256 2405_mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 2406 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2407 (__v4df)_mm256_div_pd(__A, __B), 2408 (__v4df)__W); 2409} 2410 2411static __inline__ __m256d __DEFAULT_FN_ATTRS256 2412_mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) { 2413 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2414 (__v4df)_mm256_div_pd(__A, __B), 2415 (__v4df)_mm256_setzero_pd()); 2416} 2417 2418static __inline__ __m128 __DEFAULT_FN_ATTRS128 2419_mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 2420 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2421 (__v4sf)_mm_div_ps(__A, __B), 2422 (__v4sf)__W); 2423} 2424 2425static __inline__ __m128 __DEFAULT_FN_ATTRS128 2426_mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) { 2427 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2428 (__v4sf)_mm_div_ps(__A, __B), 2429 (__v4sf)_mm_setzero_ps()); 2430} 2431 2432static __inline__ __m256 __DEFAULT_FN_ATTRS256 2433_mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 2434 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2435 (__v8sf)_mm256_div_ps(__A, __B), 2436 (__v8sf)__W); 2437} 2438 2439static __inline__ __m256 __DEFAULT_FN_ATTRS256 2440_mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) { 2441 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2442 (__v8sf)_mm256_div_ps(__A, __B), 2443 (__v8sf)_mm256_setzero_ps()); 2444} 2445 2446static __inline__ __m128d __DEFAULT_FN_ATTRS128 2447_mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A) { 2448 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A, 2449 (__v2df) __W, 2450 (__mmask8) __U); 2451} 2452 2453static __inline__ __m128d __DEFAULT_FN_ATTRS128 2454_mm_maskz_expand_pd (__mmask8 __U, __m128d __A) { 2455 return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A, 2456 (__v2df) 2457 _mm_setzero_pd (), 2458 (__mmask8) __U); 2459} 2460 2461static __inline__ __m256d __DEFAULT_FN_ATTRS256 2462_mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A) { 2463 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A, 2464 (__v4df) __W, 2465 (__mmask8) __U); 2466} 2467 2468static __inline__ __m256d __DEFAULT_FN_ATTRS256 2469_mm256_maskz_expand_pd (__mmask8 __U, __m256d __A) { 2470 return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A, 2471 (__v4df) 2472 _mm256_setzero_pd (), 2473 (__mmask8) __U); 2474} 2475 2476static __inline__ __m128i __DEFAULT_FN_ATTRS128 2477_mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { 2478 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A, 2479 (__v2di) __W, 2480 (__mmask8) __U); 2481} 2482 2483static __inline__ __m128i __DEFAULT_FN_ATTRS128 2484_mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A) { 2485 return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A, 2486 (__v2di) 2487 _mm_setzero_si128 (), 2488 (__mmask8) __U); 2489} 2490 2491static __inline__ __m256i __DEFAULT_FN_ATTRS256 2492_mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { 2493 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A, 2494 (__v4di) __W, 2495 (__mmask8) __U); 2496} 2497 2498static __inline__ __m256i __DEFAULT_FN_ATTRS256 2499_mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A) { 2500 return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A, 2501 (__v4di) 2502 _mm256_setzero_si256 (), 2503 (__mmask8) __U); 2504} 2505 2506static __inline__ __m128d __DEFAULT_FN_ATTRS128 2507_mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) { 2508 return (__m128d) __builtin_ia32_expandloaddf128_mask ((const __v2df *) __P, 2509 (__v2df) __W, 2510 (__mmask8) 2511 __U); 2512} 2513 2514static __inline__ __m128d __DEFAULT_FN_ATTRS128 2515_mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P) { 2516 return (__m128d) __builtin_ia32_expandloaddf128_mask ((const __v2df *) __P, 2517 (__v2df) 2518 _mm_setzero_pd (), 2519 (__mmask8) 2520 __U); 2521} 2522 2523static __inline__ __m256d __DEFAULT_FN_ATTRS256 2524_mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) { 2525 return (__m256d) __builtin_ia32_expandloaddf256_mask ((const __v4df *) __P, 2526 (__v4df) __W, 2527 (__mmask8) 2528 __U); 2529} 2530 2531static __inline__ __m256d __DEFAULT_FN_ATTRS256 2532_mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P) { 2533 return (__m256d) __builtin_ia32_expandloaddf256_mask ((const __v4df *) __P, 2534 (__v4df) 2535 _mm256_setzero_pd (), 2536 (__mmask8) 2537 __U); 2538} 2539 2540static __inline__ __m128i __DEFAULT_FN_ATTRS128 2541_mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) { 2542 return (__m128i) __builtin_ia32_expandloaddi128_mask ((const __v2di *) __P, 2543 (__v2di) __W, 2544 (__mmask8) 2545 __U); 2546} 2547 2548static __inline__ __m128i __DEFAULT_FN_ATTRS128 2549_mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) { 2550 return (__m128i) __builtin_ia32_expandloaddi128_mask ((const __v2di *) __P, 2551 (__v2di) 2552 _mm_setzero_si128 (), 2553 (__mmask8) 2554 __U); 2555} 2556 2557static __inline__ __m256i __DEFAULT_FN_ATTRS256 2558_mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U, 2559 void const *__P) { 2560 return (__m256i) __builtin_ia32_expandloaddi256_mask ((const __v4di *) __P, 2561 (__v4di) __W, 2562 (__mmask8) 2563 __U); 2564} 2565 2566static __inline__ __m256i __DEFAULT_FN_ATTRS256 2567_mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) { 2568 return (__m256i) __builtin_ia32_expandloaddi256_mask ((const __v4di *) __P, 2569 (__v4di) 2570 _mm256_setzero_si256 (), 2571 (__mmask8) 2572 __U); 2573} 2574 2575static __inline__ __m128 __DEFAULT_FN_ATTRS128 2576_mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) { 2577 return (__m128) __builtin_ia32_expandloadsf128_mask ((const __v4sf *) __P, 2578 (__v4sf) __W, 2579 (__mmask8) __U); 2580} 2581 2582static __inline__ __m128 __DEFAULT_FN_ATTRS128 2583_mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P) { 2584 return (__m128) __builtin_ia32_expandloadsf128_mask ((const __v4sf *) __P, 2585 (__v4sf) 2586 _mm_setzero_ps (), 2587 (__mmask8) 2588 __U); 2589} 2590 2591static __inline__ __m256 __DEFAULT_FN_ATTRS256 2592_mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) { 2593 return (__m256) __builtin_ia32_expandloadsf256_mask ((const __v8sf *) __P, 2594 (__v8sf) __W, 2595 (__mmask8) __U); 2596} 2597 2598static __inline__ __m256 __DEFAULT_FN_ATTRS256 2599_mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P) { 2600 return (__m256) __builtin_ia32_expandloadsf256_mask ((const __v8sf *) __P, 2601 (__v8sf) 2602 _mm256_setzero_ps (), 2603 (__mmask8) 2604 __U); 2605} 2606 2607static __inline__ __m128i __DEFAULT_FN_ATTRS128 2608_mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) { 2609 return (__m128i) __builtin_ia32_expandloadsi128_mask ((const __v4si *) __P, 2610 (__v4si) __W, 2611 (__mmask8) 2612 __U); 2613} 2614 2615static __inline__ __m128i __DEFAULT_FN_ATTRS128 2616_mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) { 2617 return (__m128i) __builtin_ia32_expandloadsi128_mask ((const __v4si *) __P, 2618 (__v4si) 2619 _mm_setzero_si128 (), 2620 (__mmask8) __U); 2621} 2622 2623static __inline__ __m256i __DEFAULT_FN_ATTRS256 2624_mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U, 2625 void const *__P) { 2626 return (__m256i) __builtin_ia32_expandloadsi256_mask ((const __v8si *) __P, 2627 (__v8si) __W, 2628 (__mmask8) 2629 __U); 2630} 2631 2632static __inline__ __m256i __DEFAULT_FN_ATTRS256 2633_mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) { 2634 return (__m256i) __builtin_ia32_expandloadsi256_mask ((const __v8si *) __P, 2635 (__v8si) 2636 _mm256_setzero_si256 (), 2637 (__mmask8) 2638 __U); 2639} 2640 2641static __inline__ __m128 __DEFAULT_FN_ATTRS128 2642_mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A) { 2643 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A, 2644 (__v4sf) __W, 2645 (__mmask8) __U); 2646} 2647 2648static __inline__ __m128 __DEFAULT_FN_ATTRS128 2649_mm_maskz_expand_ps (__mmask8 __U, __m128 __A) { 2650 return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A, 2651 (__v4sf) 2652 _mm_setzero_ps (), 2653 (__mmask8) __U); 2654} 2655 2656static __inline__ __m256 __DEFAULT_FN_ATTRS256 2657_mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A) { 2658 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A, 2659 (__v8sf) __W, 2660 (__mmask8) __U); 2661} 2662 2663static __inline__ __m256 __DEFAULT_FN_ATTRS256 2664_mm256_maskz_expand_ps (__mmask8 __U, __m256 __A) { 2665 return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A, 2666 (__v8sf) 2667 _mm256_setzero_ps (), 2668 (__mmask8) __U); 2669} 2670 2671static __inline__ __m128i __DEFAULT_FN_ATTRS128 2672_mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { 2673 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A, 2674 (__v4si) __W, 2675 (__mmask8) __U); 2676} 2677 2678static __inline__ __m128i __DEFAULT_FN_ATTRS128 2679_mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A) { 2680 return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A, 2681 (__v4si) 2682 _mm_setzero_si128 (), 2683 (__mmask8) __U); 2684} 2685 2686static __inline__ __m256i __DEFAULT_FN_ATTRS256 2687_mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { 2688 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A, 2689 (__v8si) __W, 2690 (__mmask8) __U); 2691} 2692 2693static __inline__ __m256i __DEFAULT_FN_ATTRS256 2694_mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A) { 2695 return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A, 2696 (__v8si) 2697 _mm256_setzero_si256 (), 2698 (__mmask8) __U); 2699} 2700 2701static __inline__ __m128d __DEFAULT_FN_ATTRS128 2702_mm_getexp_pd (__m128d __A) { 2703 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, 2704 (__v2df) 2705 _mm_setzero_pd (), 2706 (__mmask8) -1); 2707} 2708 2709static __inline__ __m128d __DEFAULT_FN_ATTRS128 2710_mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A) { 2711 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, 2712 (__v2df) __W, 2713 (__mmask8) __U); 2714} 2715 2716static __inline__ __m128d __DEFAULT_FN_ATTRS128 2717_mm_maskz_getexp_pd (__mmask8 __U, __m128d __A) { 2718 return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, 2719 (__v2df) 2720 _mm_setzero_pd (), 2721 (__mmask8) __U); 2722} 2723 2724static __inline__ __m256d __DEFAULT_FN_ATTRS256 2725_mm256_getexp_pd (__m256d __A) { 2726 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, 2727 (__v4df) 2728 _mm256_setzero_pd (), 2729 (__mmask8) -1); 2730} 2731 2732static __inline__ __m256d __DEFAULT_FN_ATTRS256 2733_mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A) { 2734 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, 2735 (__v4df) __W, 2736 (__mmask8) __U); 2737} 2738 2739static __inline__ __m256d __DEFAULT_FN_ATTRS256 2740_mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A) { 2741 return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, 2742 (__v4df) 2743 _mm256_setzero_pd (), 2744 (__mmask8) __U); 2745} 2746 2747static __inline__ __m128 __DEFAULT_FN_ATTRS128 2748_mm_getexp_ps (__m128 __A) { 2749 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, 2750 (__v4sf) 2751 _mm_setzero_ps (), 2752 (__mmask8) -1); 2753} 2754 2755static __inline__ __m128 __DEFAULT_FN_ATTRS128 2756_mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A) { 2757 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, 2758 (__v4sf) __W, 2759 (__mmask8) __U); 2760} 2761 2762static __inline__ __m128 __DEFAULT_FN_ATTRS128 2763_mm_maskz_getexp_ps (__mmask8 __U, __m128 __A) { 2764 return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, 2765 (__v4sf) 2766 _mm_setzero_ps (), 2767 (__mmask8) __U); 2768} 2769 2770static __inline__ __m256 __DEFAULT_FN_ATTRS256 2771_mm256_getexp_ps (__m256 __A) { 2772 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, 2773 (__v8sf) 2774 _mm256_setzero_ps (), 2775 (__mmask8) -1); 2776} 2777 2778static __inline__ __m256 __DEFAULT_FN_ATTRS256 2779_mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A) { 2780 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, 2781 (__v8sf) __W, 2782 (__mmask8) __U); 2783} 2784 2785static __inline__ __m256 __DEFAULT_FN_ATTRS256 2786_mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A) { 2787 return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, 2788 (__v8sf) 2789 _mm256_setzero_ps (), 2790 (__mmask8) __U); 2791} 2792 2793static __inline__ __m128d __DEFAULT_FN_ATTRS128 2794_mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 2795 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2796 (__v2df)_mm_max_pd(__A, __B), 2797 (__v2df)__W); 2798} 2799 2800static __inline__ __m128d __DEFAULT_FN_ATTRS128 2801_mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) { 2802 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2803 (__v2df)_mm_max_pd(__A, __B), 2804 (__v2df)_mm_setzero_pd()); 2805} 2806 2807static __inline__ __m256d __DEFAULT_FN_ATTRS256 2808_mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 2809 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2810 (__v4df)_mm256_max_pd(__A, __B), 2811 (__v4df)__W); 2812} 2813 2814static __inline__ __m256d __DEFAULT_FN_ATTRS256 2815_mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) { 2816 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2817 (__v4df)_mm256_max_pd(__A, __B), 2818 (__v4df)_mm256_setzero_pd()); 2819} 2820 2821static __inline__ __m128 __DEFAULT_FN_ATTRS128 2822_mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 2823 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2824 (__v4sf)_mm_max_ps(__A, __B), 2825 (__v4sf)__W); 2826} 2827 2828static __inline__ __m128 __DEFAULT_FN_ATTRS128 2829_mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) { 2830 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2831 (__v4sf)_mm_max_ps(__A, __B), 2832 (__v4sf)_mm_setzero_ps()); 2833} 2834 2835static __inline__ __m256 __DEFAULT_FN_ATTRS256 2836_mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 2837 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2838 (__v8sf)_mm256_max_ps(__A, __B), 2839 (__v8sf)__W); 2840} 2841 2842static __inline__ __m256 __DEFAULT_FN_ATTRS256 2843_mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) { 2844 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2845 (__v8sf)_mm256_max_ps(__A, __B), 2846 (__v8sf)_mm256_setzero_ps()); 2847} 2848 2849static __inline__ __m128d __DEFAULT_FN_ATTRS128 2850_mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 2851 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2852 (__v2df)_mm_min_pd(__A, __B), 2853 (__v2df)__W); 2854} 2855 2856static __inline__ __m128d __DEFAULT_FN_ATTRS128 2857_mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) { 2858 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2859 (__v2df)_mm_min_pd(__A, __B), 2860 (__v2df)_mm_setzero_pd()); 2861} 2862 2863static __inline__ __m256d __DEFAULT_FN_ATTRS256 2864_mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 2865 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2866 (__v4df)_mm256_min_pd(__A, __B), 2867 (__v4df)__W); 2868} 2869 2870static __inline__ __m256d __DEFAULT_FN_ATTRS256 2871_mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) { 2872 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2873 (__v4df)_mm256_min_pd(__A, __B), 2874 (__v4df)_mm256_setzero_pd()); 2875} 2876 2877static __inline__ __m128 __DEFAULT_FN_ATTRS128 2878_mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 2879 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2880 (__v4sf)_mm_min_ps(__A, __B), 2881 (__v4sf)__W); 2882} 2883 2884static __inline__ __m128 __DEFAULT_FN_ATTRS128 2885_mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) { 2886 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2887 (__v4sf)_mm_min_ps(__A, __B), 2888 (__v4sf)_mm_setzero_ps()); 2889} 2890 2891static __inline__ __m256 __DEFAULT_FN_ATTRS256 2892_mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 2893 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2894 (__v8sf)_mm256_min_ps(__A, __B), 2895 (__v8sf)__W); 2896} 2897 2898static __inline__ __m256 __DEFAULT_FN_ATTRS256 2899_mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) { 2900 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2901 (__v8sf)_mm256_min_ps(__A, __B), 2902 (__v8sf)_mm256_setzero_ps()); 2903} 2904 2905static __inline__ __m128d __DEFAULT_FN_ATTRS128 2906_mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 2907 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2908 (__v2df)_mm_mul_pd(__A, __B), 2909 (__v2df)__W); 2910} 2911 2912static __inline__ __m128d __DEFAULT_FN_ATTRS128 2913_mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) { 2914 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 2915 (__v2df)_mm_mul_pd(__A, __B), 2916 (__v2df)_mm_setzero_pd()); 2917} 2918 2919static __inline__ __m256d __DEFAULT_FN_ATTRS256 2920_mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 2921 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2922 (__v4df)_mm256_mul_pd(__A, __B), 2923 (__v4df)__W); 2924} 2925 2926static __inline__ __m256d __DEFAULT_FN_ATTRS256 2927_mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) { 2928 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 2929 (__v4df)_mm256_mul_pd(__A, __B), 2930 (__v4df)_mm256_setzero_pd()); 2931} 2932 2933static __inline__ __m128 __DEFAULT_FN_ATTRS128 2934_mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 2935 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2936 (__v4sf)_mm_mul_ps(__A, __B), 2937 (__v4sf)__W); 2938} 2939 2940static __inline__ __m128 __DEFAULT_FN_ATTRS128 2941_mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B) { 2942 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 2943 (__v4sf)_mm_mul_ps(__A, __B), 2944 (__v4sf)_mm_setzero_ps()); 2945} 2946 2947static __inline__ __m256 __DEFAULT_FN_ATTRS256 2948_mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 2949 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2950 (__v8sf)_mm256_mul_ps(__A, __B), 2951 (__v8sf)__W); 2952} 2953 2954static __inline__ __m256 __DEFAULT_FN_ATTRS256 2955_mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) { 2956 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 2957 (__v8sf)_mm256_mul_ps(__A, __B), 2958 (__v8sf)_mm256_setzero_ps()); 2959} 2960 2961static __inline__ __m128i __DEFAULT_FN_ATTRS128 2962_mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) { 2963 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 2964 (__v4si)_mm_abs_epi32(__A), 2965 (__v4si)__W); 2966} 2967 2968static __inline__ __m128i __DEFAULT_FN_ATTRS128 2969_mm_maskz_abs_epi32(__mmask8 __U, __m128i __A) { 2970 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 2971 (__v4si)_mm_abs_epi32(__A), 2972 (__v4si)_mm_setzero_si128()); 2973} 2974 2975static __inline__ __m256i __DEFAULT_FN_ATTRS256 2976_mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) { 2977 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 2978 (__v8si)_mm256_abs_epi32(__A), 2979 (__v8si)__W); 2980} 2981 2982static __inline__ __m256i __DEFAULT_FN_ATTRS256 2983_mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) { 2984 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 2985 (__v8si)_mm256_abs_epi32(__A), 2986 (__v8si)_mm256_setzero_si256()); 2987} 2988 2989static __inline__ __m128i __DEFAULT_FN_ATTRS128 2990_mm_abs_epi64 (__m128i __A) { 2991 return (__m128i)__builtin_elementwise_abs((__v2di)__A); 2992} 2993 2994static __inline__ __m128i __DEFAULT_FN_ATTRS128 2995_mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { 2996 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 2997 (__v2di)_mm_abs_epi64(__A), 2998 (__v2di)__W); 2999} 3000 3001static __inline__ __m128i __DEFAULT_FN_ATTRS128 3002_mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) { 3003 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 3004 (__v2di)_mm_abs_epi64(__A), 3005 (__v2di)_mm_setzero_si128()); 3006} 3007 3008static __inline__ __m256i __DEFAULT_FN_ATTRS256 3009_mm256_abs_epi64 (__m256i __A) { 3010 return (__m256i)__builtin_elementwise_abs((__v4di)__A); 3011} 3012 3013static __inline__ __m256i __DEFAULT_FN_ATTRS256 3014_mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { 3015 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 3016 (__v4di)_mm256_abs_epi64(__A), 3017 (__v4di)__W); 3018} 3019 3020static __inline__ __m256i __DEFAULT_FN_ATTRS256 3021_mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A) { 3022 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 3023 (__v4di)_mm256_abs_epi64(__A), 3024 (__v4di)_mm256_setzero_si256()); 3025} 3026 3027static __inline__ __m128i __DEFAULT_FN_ATTRS128 3028_mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) { 3029 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3030 (__v4si)_mm_max_epi32(__A, __B), 3031 (__v4si)_mm_setzero_si128()); 3032} 3033 3034static __inline__ __m128i __DEFAULT_FN_ATTRS128 3035_mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3036 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3037 (__v4si)_mm_max_epi32(__A, __B), 3038 (__v4si)__W); 3039} 3040 3041static __inline__ __m256i __DEFAULT_FN_ATTRS256 3042_mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) { 3043 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3044 (__v8si)_mm256_max_epi32(__A, __B), 3045 (__v8si)_mm256_setzero_si256()); 3046} 3047 3048static __inline__ __m256i __DEFAULT_FN_ATTRS256 3049_mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3050 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3051 (__v8si)_mm256_max_epi32(__A, __B), 3052 (__v8si)__W); 3053} 3054 3055static __inline__ __m128i __DEFAULT_FN_ATTRS128 3056_mm_max_epi64 (__m128i __A, __m128i __B) { 3057 return (__m128i)__builtin_elementwise_max((__v2di)__A, (__v2di)__B); 3058} 3059 3060static __inline__ __m128i __DEFAULT_FN_ATTRS128 3061_mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B) { 3062 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3063 (__v2di)_mm_max_epi64(__A, __B), 3064 (__v2di)_mm_setzero_si128()); 3065} 3066 3067static __inline__ __m128i __DEFAULT_FN_ATTRS128 3068_mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3069 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3070 (__v2di)_mm_max_epi64(__A, __B), 3071 (__v2di)__W); 3072} 3073 3074static __inline__ __m256i __DEFAULT_FN_ATTRS256 3075_mm256_max_epi64 (__m256i __A, __m256i __B) { 3076 return (__m256i)__builtin_elementwise_max((__v4di)__A, (__v4di)__B); 3077} 3078 3079static __inline__ __m256i __DEFAULT_FN_ATTRS256 3080_mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B) { 3081 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3082 (__v4di)_mm256_max_epi64(__A, __B), 3083 (__v4di)_mm256_setzero_si256()); 3084} 3085 3086static __inline__ __m256i __DEFAULT_FN_ATTRS256 3087_mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3088 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3089 (__v4di)_mm256_max_epi64(__A, __B), 3090 (__v4di)__W); 3091} 3092 3093static __inline__ __m128i __DEFAULT_FN_ATTRS128 3094_mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) { 3095 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3096 (__v4si)_mm_max_epu32(__A, __B), 3097 (__v4si)_mm_setzero_si128()); 3098} 3099 3100static __inline__ __m128i __DEFAULT_FN_ATTRS128 3101_mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3102 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3103 (__v4si)_mm_max_epu32(__A, __B), 3104 (__v4si)__W); 3105} 3106 3107static __inline__ __m256i __DEFAULT_FN_ATTRS256 3108_mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) { 3109 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3110 (__v8si)_mm256_max_epu32(__A, __B), 3111 (__v8si)_mm256_setzero_si256()); 3112} 3113 3114static __inline__ __m256i __DEFAULT_FN_ATTRS256 3115_mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3116 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3117 (__v8si)_mm256_max_epu32(__A, __B), 3118 (__v8si)__W); 3119} 3120 3121static __inline__ __m128i __DEFAULT_FN_ATTRS128 3122_mm_max_epu64 (__m128i __A, __m128i __B) { 3123 return (__m128i)__builtin_elementwise_max((__v2du)__A, (__v2du)__B); 3124} 3125 3126static __inline__ __m128i __DEFAULT_FN_ATTRS128 3127_mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B) { 3128 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3129 (__v2di)_mm_max_epu64(__A, __B), 3130 (__v2di)_mm_setzero_si128()); 3131} 3132 3133static __inline__ __m128i __DEFAULT_FN_ATTRS128 3134_mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3135 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3136 (__v2di)_mm_max_epu64(__A, __B), 3137 (__v2di)__W); 3138} 3139 3140static __inline__ __m256i __DEFAULT_FN_ATTRS256 3141_mm256_max_epu64 (__m256i __A, __m256i __B) { 3142 return (__m256i)__builtin_elementwise_max((__v4du)__A, (__v4du)__B); 3143} 3144 3145static __inline__ __m256i __DEFAULT_FN_ATTRS256 3146_mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B) { 3147 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3148 (__v4di)_mm256_max_epu64(__A, __B), 3149 (__v4di)_mm256_setzero_si256()); 3150} 3151 3152static __inline__ __m256i __DEFAULT_FN_ATTRS256 3153_mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3154 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3155 (__v4di)_mm256_max_epu64(__A, __B), 3156 (__v4di)__W); 3157} 3158 3159static __inline__ __m128i __DEFAULT_FN_ATTRS128 3160_mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) { 3161 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3162 (__v4si)_mm_min_epi32(__A, __B), 3163 (__v4si)_mm_setzero_si128()); 3164} 3165 3166static __inline__ __m128i __DEFAULT_FN_ATTRS128 3167_mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3168 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3169 (__v4si)_mm_min_epi32(__A, __B), 3170 (__v4si)__W); 3171} 3172 3173static __inline__ __m256i __DEFAULT_FN_ATTRS256 3174_mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) { 3175 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3176 (__v8si)_mm256_min_epi32(__A, __B), 3177 (__v8si)_mm256_setzero_si256()); 3178} 3179 3180static __inline__ __m256i __DEFAULT_FN_ATTRS256 3181_mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3182 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3183 (__v8si)_mm256_min_epi32(__A, __B), 3184 (__v8si)__W); 3185} 3186 3187static __inline__ __m128i __DEFAULT_FN_ATTRS128 3188_mm_min_epi64 (__m128i __A, __m128i __B) { 3189 return (__m128i)__builtin_elementwise_min((__v2di)__A, (__v2di)__B); 3190} 3191 3192static __inline__ __m128i __DEFAULT_FN_ATTRS128 3193_mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3194 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3195 (__v2di)_mm_min_epi64(__A, __B), 3196 (__v2di)__W); 3197} 3198 3199static __inline__ __m128i __DEFAULT_FN_ATTRS128 3200_mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) { 3201 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3202 (__v2di)_mm_min_epi64(__A, __B), 3203 (__v2di)_mm_setzero_si128()); 3204} 3205 3206static __inline__ __m256i __DEFAULT_FN_ATTRS256 3207_mm256_min_epi64 (__m256i __A, __m256i __B) { 3208 return (__m256i)__builtin_elementwise_min((__v4di)__A, (__v4di)__B); 3209} 3210 3211static __inline__ __m256i __DEFAULT_FN_ATTRS256 3212_mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3213 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3214 (__v4di)_mm256_min_epi64(__A, __B), 3215 (__v4di)__W); 3216} 3217 3218static __inline__ __m256i __DEFAULT_FN_ATTRS256 3219_mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B) { 3220 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3221 (__v4di)_mm256_min_epi64(__A, __B), 3222 (__v4di)_mm256_setzero_si256()); 3223} 3224 3225static __inline__ __m128i __DEFAULT_FN_ATTRS128 3226_mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) { 3227 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3228 (__v4si)_mm_min_epu32(__A, __B), 3229 (__v4si)_mm_setzero_si128()); 3230} 3231 3232static __inline__ __m128i __DEFAULT_FN_ATTRS128 3233_mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3234 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 3235 (__v4si)_mm_min_epu32(__A, __B), 3236 (__v4si)__W); 3237} 3238 3239static __inline__ __m256i __DEFAULT_FN_ATTRS256 3240_mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) { 3241 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3242 (__v8si)_mm256_min_epu32(__A, __B), 3243 (__v8si)_mm256_setzero_si256()); 3244} 3245 3246static __inline__ __m256i __DEFAULT_FN_ATTRS256 3247_mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3248 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 3249 (__v8si)_mm256_min_epu32(__A, __B), 3250 (__v8si)__W); 3251} 3252 3253static __inline__ __m128i __DEFAULT_FN_ATTRS128 3254_mm_min_epu64 (__m128i __A, __m128i __B) { 3255 return (__m128i)__builtin_elementwise_min((__v2du)__A, (__v2du)__B); 3256} 3257 3258static __inline__ __m128i __DEFAULT_FN_ATTRS128 3259_mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { 3260 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3261 (__v2di)_mm_min_epu64(__A, __B), 3262 (__v2di)__W); 3263} 3264 3265static __inline__ __m128i __DEFAULT_FN_ATTRS128 3266_mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) { 3267 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, 3268 (__v2di)_mm_min_epu64(__A, __B), 3269 (__v2di)_mm_setzero_si128()); 3270} 3271 3272static __inline__ __m256i __DEFAULT_FN_ATTRS256 3273_mm256_min_epu64 (__m256i __A, __m256i __B) { 3274 return (__m256i)__builtin_elementwise_min((__v4du)__A, (__v4du)__B); 3275} 3276 3277static __inline__ __m256i __DEFAULT_FN_ATTRS256 3278_mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { 3279 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3280 (__v4di)_mm256_min_epu64(__A, __B), 3281 (__v4di)__W); 3282} 3283 3284static __inline__ __m256i __DEFAULT_FN_ATTRS256 3285_mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) { 3286 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 3287 (__v4di)_mm256_min_epu64(__A, __B), 3288 (__v4di)_mm256_setzero_si256()); 3289} 3290 3291#define _mm_roundscale_pd(A, imm) \ 3292 ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ 3293 (int)(imm), \ 3294 (__v2df)_mm_setzero_pd(), \ 3295 (__mmask8)-1)) 3296 3297 3298#define _mm_mask_roundscale_pd(W, U, A, imm) \ 3299 ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ 3300 (int)(imm), \ 3301 (__v2df)(__m128d)(W), \ 3302 (__mmask8)(U))) 3303 3304 3305#define _mm_maskz_roundscale_pd(U, A, imm) \ 3306 ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \ 3307 (int)(imm), \ 3308 (__v2df)_mm_setzero_pd(), \ 3309 (__mmask8)(U))) 3310 3311 3312#define _mm256_roundscale_pd(A, imm) \ 3313 ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ 3314 (int)(imm), \ 3315 (__v4df)_mm256_setzero_pd(), \ 3316 (__mmask8)-1)) 3317 3318 3319#define _mm256_mask_roundscale_pd(W, U, A, imm) \ 3320 ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ 3321 (int)(imm), \ 3322 (__v4df)(__m256d)(W), \ 3323 (__mmask8)(U))) 3324 3325 3326#define _mm256_maskz_roundscale_pd(U, A, imm) \ 3327 ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \ 3328 (int)(imm), \ 3329 (__v4df)_mm256_setzero_pd(), \ 3330 (__mmask8)(U))) 3331 3332#define _mm_roundscale_ps(A, imm) \ 3333 ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ 3334 (__v4sf)_mm_setzero_ps(), \ 3335 (__mmask8)-1)) 3336 3337 3338#define _mm_mask_roundscale_ps(W, U, A, imm) \ 3339 ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ 3340 (__v4sf)(__m128)(W), \ 3341 (__mmask8)(U))) 3342 3343 3344#define _mm_maskz_roundscale_ps(U, A, imm) \ 3345 ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \ 3346 (__v4sf)_mm_setzero_ps(), \ 3347 (__mmask8)(U))) 3348 3349#define _mm256_roundscale_ps(A, imm) \ 3350 ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ 3351 (__v8sf)_mm256_setzero_ps(), \ 3352 (__mmask8)-1)) 3353 3354#define _mm256_mask_roundscale_ps(W, U, A, imm) \ 3355 ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ 3356 (__v8sf)(__m256)(W), \ 3357 (__mmask8)(U))) 3358 3359 3360#define _mm256_maskz_roundscale_ps(U, A, imm) \ 3361 ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \ 3362 (__v8sf)_mm256_setzero_ps(), \ 3363 (__mmask8)(U))) 3364 3365static __inline__ __m128d __DEFAULT_FN_ATTRS128 3366_mm_scalef_pd (__m128d __A, __m128d __B) { 3367 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, 3368 (__v2df) __B, 3369 (__v2df) 3370 _mm_setzero_pd (), 3371 (__mmask8) -1); 3372} 3373 3374static __inline__ __m128d __DEFAULT_FN_ATTRS128 3375_mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A, 3376 __m128d __B) { 3377 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, 3378 (__v2df) __B, 3379 (__v2df) __W, 3380 (__mmask8) __U); 3381} 3382 3383static __inline__ __m128d __DEFAULT_FN_ATTRS128 3384_mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B) { 3385 return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, 3386 (__v2df) __B, 3387 (__v2df) 3388 _mm_setzero_pd (), 3389 (__mmask8) __U); 3390} 3391 3392static __inline__ __m256d __DEFAULT_FN_ATTRS256 3393_mm256_scalef_pd (__m256d __A, __m256d __B) { 3394 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, 3395 (__v4df) __B, 3396 (__v4df) 3397 _mm256_setzero_pd (), 3398 (__mmask8) -1); 3399} 3400 3401static __inline__ __m256d __DEFAULT_FN_ATTRS256 3402_mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A, 3403 __m256d __B) { 3404 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, 3405 (__v4df) __B, 3406 (__v4df) __W, 3407 (__mmask8) __U); 3408} 3409 3410static __inline__ __m256d __DEFAULT_FN_ATTRS256 3411_mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B) { 3412 return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, 3413 (__v4df) __B, 3414 (__v4df) 3415 _mm256_setzero_pd (), 3416 (__mmask8) __U); 3417} 3418 3419static __inline__ __m128 __DEFAULT_FN_ATTRS128 3420_mm_scalef_ps (__m128 __A, __m128 __B) { 3421 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, 3422 (__v4sf) __B, 3423 (__v4sf) 3424 _mm_setzero_ps (), 3425 (__mmask8) -1); 3426} 3427 3428static __inline__ __m128 __DEFAULT_FN_ATTRS128 3429_mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 3430 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, 3431 (__v4sf) __B, 3432 (__v4sf) __W, 3433 (__mmask8) __U); 3434} 3435 3436static __inline__ __m128 __DEFAULT_FN_ATTRS128 3437_mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B) { 3438 return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, 3439 (__v4sf) __B, 3440 (__v4sf) 3441 _mm_setzero_ps (), 3442 (__mmask8) __U); 3443} 3444 3445static __inline__ __m256 __DEFAULT_FN_ATTRS256 3446_mm256_scalef_ps (__m256 __A, __m256 __B) { 3447 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, 3448 (__v8sf) __B, 3449 (__v8sf) 3450 _mm256_setzero_ps (), 3451 (__mmask8) -1); 3452} 3453 3454static __inline__ __m256 __DEFAULT_FN_ATTRS256 3455_mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A, 3456 __m256 __B) { 3457 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, 3458 (__v8sf) __B, 3459 (__v8sf) __W, 3460 (__mmask8) __U); 3461} 3462 3463static __inline__ __m256 __DEFAULT_FN_ATTRS256 3464_mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { 3465 return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, 3466 (__v8sf) __B, 3467 (__v8sf) 3468 _mm256_setzero_ps (), 3469 (__mmask8) __U); 3470} 3471 3472#define _mm_i64scatter_pd(addr, index, v1, scale) \ 3473 __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)-1, \ 3474 (__v2di)(__m128i)(index), \ 3475 (__v2df)(__m128d)(v1), (int)(scale)) 3476 3477#define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) \ 3478 __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)(mask), \ 3479 (__v2di)(__m128i)(index), \ 3480 (__v2df)(__m128d)(v1), (int)(scale)) 3481 3482#define _mm_i64scatter_epi64(addr, index, v1, scale) \ 3483 __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)-1, \ 3484 (__v2di)(__m128i)(index), \ 3485 (__v2di)(__m128i)(v1), (int)(scale)) 3486 3487#define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) \ 3488 __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)(mask), \ 3489 (__v2di)(__m128i)(index), \ 3490 (__v2di)(__m128i)(v1), (int)(scale)) 3491 3492#define _mm256_i64scatter_pd(addr, index, v1, scale) \ 3493 __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)-1, \ 3494 (__v4di)(__m256i)(index), \ 3495 (__v4df)(__m256d)(v1), (int)(scale)) 3496 3497#define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) \ 3498 __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)(mask), \ 3499 (__v4di)(__m256i)(index), \ 3500 (__v4df)(__m256d)(v1), (int)(scale)) 3501 3502#define _mm256_i64scatter_epi64(addr, index, v1, scale) \ 3503 __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)-1, \ 3504 (__v4di)(__m256i)(index), \ 3505 (__v4di)(__m256i)(v1), (int)(scale)) 3506 3507#define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) \ 3508 __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)(mask), \ 3509 (__v4di)(__m256i)(index), \ 3510 (__v4di)(__m256i)(v1), (int)(scale)) 3511 3512#define _mm_i64scatter_ps(addr, index, v1, scale) \ 3513 __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)-1, \ 3514 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \ 3515 (int)(scale)) 3516 3517#define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) \ 3518 __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)(mask), \ 3519 (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \ 3520 (int)(scale)) 3521 3522#define _mm_i64scatter_epi32(addr, index, v1, scale) \ 3523 __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)-1, \ 3524 (__v2di)(__m128i)(index), \ 3525 (__v4si)(__m128i)(v1), (int)(scale)) 3526 3527#define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) \ 3528 __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)(mask), \ 3529 (__v2di)(__m128i)(index), \ 3530 (__v4si)(__m128i)(v1), (int)(scale)) 3531 3532#define _mm256_i64scatter_ps(addr, index, v1, scale) \ 3533 __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)-1, \ 3534 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \ 3535 (int)(scale)) 3536 3537#define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) \ 3538 __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)(mask), \ 3539 (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \ 3540 (int)(scale)) 3541 3542#define _mm256_i64scatter_epi32(addr, index, v1, scale) \ 3543 __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)-1, \ 3544 (__v4di)(__m256i)(index), \ 3545 (__v4si)(__m128i)(v1), (int)(scale)) 3546 3547#define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) \ 3548 __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)(mask), \ 3549 (__v4di)(__m256i)(index), \ 3550 (__v4si)(__m128i)(v1), (int)(scale)) 3551 3552#define _mm_i32scatter_pd(addr, index, v1, scale) \ 3553 __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)-1, \ 3554 (__v4si)(__m128i)(index), \ 3555 (__v2df)(__m128d)(v1), (int)(scale)) 3556 3557#define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) \ 3558 __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)(mask), \ 3559 (__v4si)(__m128i)(index), \ 3560 (__v2df)(__m128d)(v1), (int)(scale)) 3561 3562#define _mm_i32scatter_epi64(addr, index, v1, scale) \ 3563 __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)-1, \ 3564 (__v4si)(__m128i)(index), \ 3565 (__v2di)(__m128i)(v1), (int)(scale)) 3566 3567#define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) \ 3568 __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)(mask), \ 3569 (__v4si)(__m128i)(index), \ 3570 (__v2di)(__m128i)(v1), (int)(scale)) 3571 3572#define _mm256_i32scatter_pd(addr, index, v1, scale) \ 3573 __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)-1, \ 3574 (__v4si)(__m128i)(index), \ 3575 (__v4df)(__m256d)(v1), (int)(scale)) 3576 3577#define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) \ 3578 __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)(mask), \ 3579 (__v4si)(__m128i)(index), \ 3580 (__v4df)(__m256d)(v1), (int)(scale)) 3581 3582#define _mm256_i32scatter_epi64(addr, index, v1, scale) \ 3583 __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)-1, \ 3584 (__v4si)(__m128i)(index), \ 3585 (__v4di)(__m256i)(v1), (int)(scale)) 3586 3587#define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) \ 3588 __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)(mask), \ 3589 (__v4si)(__m128i)(index), \ 3590 (__v4di)(__m256i)(v1), (int)(scale)) 3591 3592#define _mm_i32scatter_ps(addr, index, v1, scale) \ 3593 __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)-1, \ 3594 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \ 3595 (int)(scale)) 3596 3597#define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) \ 3598 __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)(mask), \ 3599 (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \ 3600 (int)(scale)) 3601 3602#define _mm_i32scatter_epi32(addr, index, v1, scale) \ 3603 __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)-1, \ 3604 (__v4si)(__m128i)(index), \ 3605 (__v4si)(__m128i)(v1), (int)(scale)) 3606 3607#define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) \ 3608 __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)(mask), \ 3609 (__v4si)(__m128i)(index), \ 3610 (__v4si)(__m128i)(v1), (int)(scale)) 3611 3612#define _mm256_i32scatter_ps(addr, index, v1, scale) \ 3613 __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)-1, \ 3614 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \ 3615 (int)(scale)) 3616 3617#define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) \ 3618 __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)(mask), \ 3619 (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \ 3620 (int)(scale)) 3621 3622#define _mm256_i32scatter_epi32(addr, index, v1, scale) \ 3623 __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)-1, \ 3624 (__v8si)(__m256i)(index), \ 3625 (__v8si)(__m256i)(v1), (int)(scale)) 3626 3627#define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) \ 3628 __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)(mask), \ 3629 (__v8si)(__m256i)(index), \ 3630 (__v8si)(__m256i)(v1), (int)(scale)) 3631 3632 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3633 _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) { 3634 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3635 (__v2df)_mm_sqrt_pd(__A), 3636 (__v2df)__W); 3637 } 3638 3639 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3640 _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) { 3641 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3642 (__v2df)_mm_sqrt_pd(__A), 3643 (__v2df)_mm_setzero_pd()); 3644 } 3645 3646 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3647 _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) { 3648 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3649 (__v4df)_mm256_sqrt_pd(__A), 3650 (__v4df)__W); 3651 } 3652 3653 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3654 _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) { 3655 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3656 (__v4df)_mm256_sqrt_pd(__A), 3657 (__v4df)_mm256_setzero_pd()); 3658 } 3659 3660 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3661 _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) { 3662 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3663 (__v4sf)_mm_sqrt_ps(__A), 3664 (__v4sf)__W); 3665 } 3666 3667 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3668 _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) { 3669 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3670 (__v4sf)_mm_sqrt_ps(__A), 3671 (__v4sf)_mm_setzero_ps()); 3672 } 3673 3674 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3675 _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) { 3676 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 3677 (__v8sf)_mm256_sqrt_ps(__A), 3678 (__v8sf)__W); 3679 } 3680 3681 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3682 _mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) { 3683 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 3684 (__v8sf)_mm256_sqrt_ps(__A), 3685 (__v8sf)_mm256_setzero_ps()); 3686 } 3687 3688 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3689 _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { 3690 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3691 (__v2df)_mm_sub_pd(__A, __B), 3692 (__v2df)__W); 3693 } 3694 3695 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3696 _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) { 3697 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 3698 (__v2df)_mm_sub_pd(__A, __B), 3699 (__v2df)_mm_setzero_pd()); 3700 } 3701 3702 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3703 _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { 3704 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3705 (__v4df)_mm256_sub_pd(__A, __B), 3706 (__v4df)__W); 3707 } 3708 3709 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3710 _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) { 3711 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 3712 (__v4df)_mm256_sub_pd(__A, __B), 3713 (__v4df)_mm256_setzero_pd()); 3714 } 3715 3716 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3717 _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { 3718 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3719 (__v4sf)_mm_sub_ps(__A, __B), 3720 (__v4sf)__W); 3721 } 3722 3723 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3724 _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) { 3725 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 3726 (__v4sf)_mm_sub_ps(__A, __B), 3727 (__v4sf)_mm_setzero_ps()); 3728 } 3729 3730 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3731 _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { 3732 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 3733 (__v8sf)_mm256_sub_ps(__A, __B), 3734 (__v8sf)__W); 3735 } 3736 3737 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3738 _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) { 3739 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 3740 (__v8sf)_mm256_sub_ps(__A, __B), 3741 (__v8sf)_mm256_setzero_ps()); 3742 } 3743 3744 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3745 _mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B) { 3746 return (__m128i)__builtin_ia32_vpermi2vard128((__v4si) __A, (__v4si)__I, 3747 (__v4si)__B); 3748 } 3749 3750 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3751 _mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I, 3752 __m128i __B) { 3753 return (__m128i)__builtin_ia32_selectd_128(__U, 3754 (__v4si)_mm_permutex2var_epi32(__A, __I, __B), 3755 (__v4si)__A); 3756 } 3757 3758 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3759 _mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U, 3760 __m128i __B) { 3761 return (__m128i)__builtin_ia32_selectd_128(__U, 3762 (__v4si)_mm_permutex2var_epi32(__A, __I, __B), 3763 (__v4si)__I); 3764 } 3765 3766 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3767 _mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I, 3768 __m128i __B) { 3769 return (__m128i)__builtin_ia32_selectd_128(__U, 3770 (__v4si)_mm_permutex2var_epi32(__A, __I, __B), 3771 (__v4si)_mm_setzero_si128()); 3772 } 3773 3774 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3775 _mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B) { 3776 return (__m256i)__builtin_ia32_vpermi2vard256((__v8si)__A, (__v8si) __I, 3777 (__v8si) __B); 3778 } 3779 3780 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3781 _mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I, 3782 __m256i __B) { 3783 return (__m256i)__builtin_ia32_selectd_256(__U, 3784 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B), 3785 (__v8si)__A); 3786 } 3787 3788 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3789 _mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U, 3790 __m256i __B) { 3791 return (__m256i)__builtin_ia32_selectd_256(__U, 3792 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B), 3793 (__v8si)__I); 3794 } 3795 3796 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3797 _mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I, 3798 __m256i __B) { 3799 return (__m256i)__builtin_ia32_selectd_256(__U, 3800 (__v8si)_mm256_permutex2var_epi32(__A, __I, __B), 3801 (__v8si)_mm256_setzero_si256()); 3802 } 3803 3804 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3805 _mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B) { 3806 return (__m128d)__builtin_ia32_vpermi2varpd128((__v2df)__A, (__v2di)__I, 3807 (__v2df)__B); 3808 } 3809 3810 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3811 _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B) { 3812 return (__m128d)__builtin_ia32_selectpd_128(__U, 3813 (__v2df)_mm_permutex2var_pd(__A, __I, __B), 3814 (__v2df)__A); 3815 } 3816 3817 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3818 _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B) { 3819 return (__m128d)__builtin_ia32_selectpd_128(__U, 3820 (__v2df)_mm_permutex2var_pd(__A, __I, __B), 3821 (__v2df)(__m128d)__I); 3822 } 3823 3824 static __inline__ __m128d __DEFAULT_FN_ATTRS128 3825 _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B) { 3826 return (__m128d)__builtin_ia32_selectpd_128(__U, 3827 (__v2df)_mm_permutex2var_pd(__A, __I, __B), 3828 (__v2df)_mm_setzero_pd()); 3829 } 3830 3831 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3832 _mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B) { 3833 return (__m256d)__builtin_ia32_vpermi2varpd256((__v4df)__A, (__v4di)__I, 3834 (__v4df)__B); 3835 } 3836 3837 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3838 _mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I, 3839 __m256d __B) { 3840 return (__m256d)__builtin_ia32_selectpd_256(__U, 3841 (__v4df)_mm256_permutex2var_pd(__A, __I, __B), 3842 (__v4df)__A); 3843 } 3844 3845 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3846 _mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U, 3847 __m256d __B) { 3848 return (__m256d)__builtin_ia32_selectpd_256(__U, 3849 (__v4df)_mm256_permutex2var_pd(__A, __I, __B), 3850 (__v4df)(__m256d)__I); 3851 } 3852 3853 static __inline__ __m256d __DEFAULT_FN_ATTRS256 3854 _mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I, 3855 __m256d __B) { 3856 return (__m256d)__builtin_ia32_selectpd_256(__U, 3857 (__v4df)_mm256_permutex2var_pd(__A, __I, __B), 3858 (__v4df)_mm256_setzero_pd()); 3859 } 3860 3861 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3862 _mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B) { 3863 return (__m128)__builtin_ia32_vpermi2varps128((__v4sf)__A, (__v4si)__I, 3864 (__v4sf)__B); 3865 } 3866 3867 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3868 _mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B) { 3869 return (__m128)__builtin_ia32_selectps_128(__U, 3870 (__v4sf)_mm_permutex2var_ps(__A, __I, __B), 3871 (__v4sf)__A); 3872 } 3873 3874 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3875 _mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B) { 3876 return (__m128)__builtin_ia32_selectps_128(__U, 3877 (__v4sf)_mm_permutex2var_ps(__A, __I, __B), 3878 (__v4sf)(__m128)__I); 3879 } 3880 3881 static __inline__ __m128 __DEFAULT_FN_ATTRS128 3882 _mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B) { 3883 return (__m128)__builtin_ia32_selectps_128(__U, 3884 (__v4sf)_mm_permutex2var_ps(__A, __I, __B), 3885 (__v4sf)_mm_setzero_ps()); 3886 } 3887 3888 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3889 _mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B) { 3890 return (__m256)__builtin_ia32_vpermi2varps256((__v8sf)__A, (__v8si)__I, 3891 (__v8sf) __B); 3892 } 3893 3894 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3895 _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B) { 3896 return (__m256)__builtin_ia32_selectps_256(__U, 3897 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B), 3898 (__v8sf)__A); 3899 } 3900 3901 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3902 _mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U, 3903 __m256 __B) { 3904 return (__m256)__builtin_ia32_selectps_256(__U, 3905 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B), 3906 (__v8sf)(__m256)__I); 3907 } 3908 3909 static __inline__ __m256 __DEFAULT_FN_ATTRS256 3910 _mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I, 3911 __m256 __B) { 3912 return (__m256)__builtin_ia32_selectps_256(__U, 3913 (__v8sf)_mm256_permutex2var_ps(__A, __I, __B), 3914 (__v8sf)_mm256_setzero_ps()); 3915 } 3916 3917 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3918 _mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B) { 3919 return (__m128i)__builtin_ia32_vpermi2varq128((__v2di)__A, (__v2di)__I, 3920 (__v2di)__B); 3921 } 3922 3923 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3924 _mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I, 3925 __m128i __B) { 3926 return (__m128i)__builtin_ia32_selectq_128(__U, 3927 (__v2di)_mm_permutex2var_epi64(__A, __I, __B), 3928 (__v2di)__A); 3929 } 3930 3931 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3932 _mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U, 3933 __m128i __B) { 3934 return (__m128i)__builtin_ia32_selectq_128(__U, 3935 (__v2di)_mm_permutex2var_epi64(__A, __I, __B), 3936 (__v2di)__I); 3937 } 3938 3939 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3940 _mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I, 3941 __m128i __B) { 3942 return (__m128i)__builtin_ia32_selectq_128(__U, 3943 (__v2di)_mm_permutex2var_epi64(__A, __I, __B), 3944 (__v2di)_mm_setzero_si128()); 3945 } 3946 3947 3948 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3949 _mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B) { 3950 return (__m256i)__builtin_ia32_vpermi2varq256((__v4di)__A, (__v4di) __I, 3951 (__v4di) __B); 3952 } 3953 3954 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3955 _mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I, 3956 __m256i __B) { 3957 return (__m256i)__builtin_ia32_selectq_256(__U, 3958 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B), 3959 (__v4di)__A); 3960 } 3961 3962 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3963 _mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U, 3964 __m256i __B) { 3965 return (__m256i)__builtin_ia32_selectq_256(__U, 3966 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B), 3967 (__v4di)__I); 3968 } 3969 3970 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3971 _mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I, 3972 __m256i __B) { 3973 return (__m256i)__builtin_ia32_selectq_256(__U, 3974 (__v4di)_mm256_permutex2var_epi64(__A, __I, __B), 3975 (__v4di)_mm256_setzero_si256()); 3976 } 3977 3978 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3979 _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A) 3980 { 3981 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 3982 (__v4si)_mm_cvtepi8_epi32(__A), 3983 (__v4si)__W); 3984 } 3985 3986 static __inline__ __m128i __DEFAULT_FN_ATTRS128 3987 _mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A) 3988 { 3989 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 3990 (__v4si)_mm_cvtepi8_epi32(__A), 3991 (__v4si)_mm_setzero_si128()); 3992 } 3993 3994 static __inline__ __m256i __DEFAULT_FN_ATTRS256 3995 _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A) 3996 { 3997 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 3998 (__v8si)_mm256_cvtepi8_epi32(__A), 3999 (__v8si)__W); 4000 } 4001 4002 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4003 _mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A) 4004 { 4005 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4006 (__v8si)_mm256_cvtepi8_epi32(__A), 4007 (__v8si)_mm256_setzero_si256()); 4008 } 4009 4010 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4011 _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A) 4012 { 4013 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4014 (__v2di)_mm_cvtepi8_epi64(__A), 4015 (__v2di)__W); 4016 } 4017 4018 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4019 _mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) 4020 { 4021 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4022 (__v2di)_mm_cvtepi8_epi64(__A), 4023 (__v2di)_mm_setzero_si128()); 4024 } 4025 4026 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4027 _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A) 4028 { 4029 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4030 (__v4di)_mm256_cvtepi8_epi64(__A), 4031 (__v4di)__W); 4032 } 4033 4034 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4035 _mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) 4036 { 4037 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4038 (__v4di)_mm256_cvtepi8_epi64(__A), 4039 (__v4di)_mm256_setzero_si256()); 4040 } 4041 4042 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4043 _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X) 4044 { 4045 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4046 (__v2di)_mm_cvtepi32_epi64(__X), 4047 (__v2di)__W); 4048 } 4049 4050 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4051 _mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X) 4052 { 4053 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4054 (__v2di)_mm_cvtepi32_epi64(__X), 4055 (__v2di)_mm_setzero_si128()); 4056 } 4057 4058 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4059 _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X) 4060 { 4061 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4062 (__v4di)_mm256_cvtepi32_epi64(__X), 4063 (__v4di)__W); 4064 } 4065 4066 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4067 _mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X) 4068 { 4069 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4070 (__v4di)_mm256_cvtepi32_epi64(__X), 4071 (__v4di)_mm256_setzero_si256()); 4072 } 4073 4074 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4075 _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A) 4076 { 4077 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4078 (__v4si)_mm_cvtepi16_epi32(__A), 4079 (__v4si)__W); 4080 } 4081 4082 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4083 _mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A) 4084 { 4085 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4086 (__v4si)_mm_cvtepi16_epi32(__A), 4087 (__v4si)_mm_setzero_si128()); 4088 } 4089 4090 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4091 _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A) 4092 { 4093 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4094 (__v8si)_mm256_cvtepi16_epi32(__A), 4095 (__v8si)__W); 4096 } 4097 4098 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4099 _mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A) 4100 { 4101 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4102 (__v8si)_mm256_cvtepi16_epi32(__A), 4103 (__v8si)_mm256_setzero_si256()); 4104 } 4105 4106 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4107 _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A) 4108 { 4109 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4110 (__v2di)_mm_cvtepi16_epi64(__A), 4111 (__v2di)__W); 4112 } 4113 4114 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4115 _mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) 4116 { 4117 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4118 (__v2di)_mm_cvtepi16_epi64(__A), 4119 (__v2di)_mm_setzero_si128()); 4120 } 4121 4122 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4123 _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A) 4124 { 4125 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4126 (__v4di)_mm256_cvtepi16_epi64(__A), 4127 (__v4di)__W); 4128 } 4129 4130 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4131 _mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) 4132 { 4133 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4134 (__v4di)_mm256_cvtepi16_epi64(__A), 4135 (__v4di)_mm256_setzero_si256()); 4136 } 4137 4138 4139 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4140 _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A) 4141 { 4142 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4143 (__v4si)_mm_cvtepu8_epi32(__A), 4144 (__v4si)__W); 4145 } 4146 4147 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4148 _mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A) 4149 { 4150 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4151 (__v4si)_mm_cvtepu8_epi32(__A), 4152 (__v4si)_mm_setzero_si128()); 4153 } 4154 4155 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4156 _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A) 4157 { 4158 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4159 (__v8si)_mm256_cvtepu8_epi32(__A), 4160 (__v8si)__W); 4161 } 4162 4163 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4164 _mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A) 4165 { 4166 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4167 (__v8si)_mm256_cvtepu8_epi32(__A), 4168 (__v8si)_mm256_setzero_si256()); 4169 } 4170 4171 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4172 _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A) 4173 { 4174 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4175 (__v2di)_mm_cvtepu8_epi64(__A), 4176 (__v2di)__W); 4177 } 4178 4179 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4180 _mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A) 4181 { 4182 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4183 (__v2di)_mm_cvtepu8_epi64(__A), 4184 (__v2di)_mm_setzero_si128()); 4185 } 4186 4187 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4188 _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A) 4189 { 4190 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4191 (__v4di)_mm256_cvtepu8_epi64(__A), 4192 (__v4di)__W); 4193 } 4194 4195 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4196 _mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A) 4197 { 4198 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4199 (__v4di)_mm256_cvtepu8_epi64(__A), 4200 (__v4di)_mm256_setzero_si256()); 4201 } 4202 4203 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4204 _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X) 4205 { 4206 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4207 (__v2di)_mm_cvtepu32_epi64(__X), 4208 (__v2di)__W); 4209 } 4210 4211 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4212 _mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X) 4213 { 4214 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4215 (__v2di)_mm_cvtepu32_epi64(__X), 4216 (__v2di)_mm_setzero_si128()); 4217 } 4218 4219 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4220 _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X) 4221 { 4222 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4223 (__v4di)_mm256_cvtepu32_epi64(__X), 4224 (__v4di)__W); 4225 } 4226 4227 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4228 _mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X) 4229 { 4230 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4231 (__v4di)_mm256_cvtepu32_epi64(__X), 4232 (__v4di)_mm256_setzero_si256()); 4233 } 4234 4235 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4236 _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A) 4237 { 4238 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4239 (__v4si)_mm_cvtepu16_epi32(__A), 4240 (__v4si)__W); 4241 } 4242 4243 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4244 _mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A) 4245 { 4246 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4247 (__v4si)_mm_cvtepu16_epi32(__A), 4248 (__v4si)_mm_setzero_si128()); 4249 } 4250 4251 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4252 _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A) 4253 { 4254 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4255 (__v8si)_mm256_cvtepu16_epi32(__A), 4256 (__v8si)__W); 4257 } 4258 4259 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4260 _mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A) 4261 { 4262 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4263 (__v8si)_mm256_cvtepu16_epi32(__A), 4264 (__v8si)_mm256_setzero_si256()); 4265 } 4266 4267 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4268 _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A) 4269 { 4270 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4271 (__v2di)_mm_cvtepu16_epi64(__A), 4272 (__v2di)__W); 4273 } 4274 4275 static __inline__ __m128i __DEFAULT_FN_ATTRS128 4276 _mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) 4277 { 4278 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4279 (__v2di)_mm_cvtepu16_epi64(__A), 4280 (__v2di)_mm_setzero_si128()); 4281 } 4282 4283 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4284 _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A) 4285 { 4286 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4287 (__v4di)_mm256_cvtepu16_epi64(__A), 4288 (__v4di)__W); 4289 } 4290 4291 static __inline__ __m256i __DEFAULT_FN_ATTRS256 4292 _mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) 4293 { 4294 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4295 (__v4di)_mm256_cvtepu16_epi64(__A), 4296 (__v4di)_mm256_setzero_si256()); 4297 } 4298 4299 4300#define _mm_rol_epi32(a, b) \ 4301 ((__m128i)__builtin_ia32_prold128((__v4si)(__m128i)(a), (int)(b))) 4302 4303#define _mm_mask_rol_epi32(w, u, a, b) \ 4304 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \ 4305 (__v4si)_mm_rol_epi32((a), (b)), \ 4306 (__v4si)(__m128i)(w))) 4307 4308#define _mm_maskz_rol_epi32(u, a, b) \ 4309 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \ 4310 (__v4si)_mm_rol_epi32((a), (b)), \ 4311 (__v4si)_mm_setzero_si128())) 4312 4313#define _mm256_rol_epi32(a, b) \ 4314 ((__m256i)__builtin_ia32_prold256((__v8si)(__m256i)(a), (int)(b))) 4315 4316#define _mm256_mask_rol_epi32(w, u, a, b) \ 4317 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \ 4318 (__v8si)_mm256_rol_epi32((a), (b)), \ 4319 (__v8si)(__m256i)(w))) 4320 4321#define _mm256_maskz_rol_epi32(u, a, b) \ 4322 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \ 4323 (__v8si)_mm256_rol_epi32((a), (b)), \ 4324 (__v8si)_mm256_setzero_si256())) 4325 4326#define _mm_rol_epi64(a, b) \ 4327 ((__m128i)__builtin_ia32_prolq128((__v2di)(__m128i)(a), (int)(b))) 4328 4329#define _mm_mask_rol_epi64(w, u, a, b) \ 4330 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \ 4331 (__v2di)_mm_rol_epi64((a), (b)), \ 4332 (__v2di)(__m128i)(w))) 4333 4334#define _mm_maskz_rol_epi64(u, a, b) \ 4335 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \ 4336 (__v2di)_mm_rol_epi64((a), (b)), \ 4337 (__v2di)_mm_setzero_si128())) 4338 4339#define _mm256_rol_epi64(a, b) \ 4340 ((__m256i)__builtin_ia32_prolq256((__v4di)(__m256i)(a), (int)(b))) 4341 4342#define _mm256_mask_rol_epi64(w, u, a, b) \ 4343 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \ 4344 (__v4di)_mm256_rol_epi64((a), (b)), \ 4345 (__v4di)(__m256i)(w))) 4346 4347#define _mm256_maskz_rol_epi64(u, a, b) \ 4348 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \ 4349 (__v4di)_mm256_rol_epi64((a), (b)), \ 4350 (__v4di)_mm256_setzero_si256())) 4351 4352static __inline__ __m128i __DEFAULT_FN_ATTRS128 4353_mm_rolv_epi32 (__m128i __A, __m128i __B) 4354{ 4355 return (__m128i)__builtin_ia32_prolvd128((__v4si)__A, (__v4si)__B); 4356} 4357 4358static __inline__ __m128i __DEFAULT_FN_ATTRS128 4359_mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4360{ 4361 return (__m128i)__builtin_ia32_selectd_128(__U, 4362 (__v4si)_mm_rolv_epi32(__A, __B), 4363 (__v4si)__W); 4364} 4365 4366static __inline__ __m128i __DEFAULT_FN_ATTRS128 4367_mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 4368{ 4369 return (__m128i)__builtin_ia32_selectd_128(__U, 4370 (__v4si)_mm_rolv_epi32(__A, __B), 4371 (__v4si)_mm_setzero_si128()); 4372} 4373 4374static __inline__ __m256i __DEFAULT_FN_ATTRS256 4375_mm256_rolv_epi32 (__m256i __A, __m256i __B) 4376{ 4377 return (__m256i)__builtin_ia32_prolvd256((__v8si)__A, (__v8si)__B); 4378} 4379 4380static __inline__ __m256i __DEFAULT_FN_ATTRS256 4381_mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 4382{ 4383 return (__m256i)__builtin_ia32_selectd_256(__U, 4384 (__v8si)_mm256_rolv_epi32(__A, __B), 4385 (__v8si)__W); 4386} 4387 4388static __inline__ __m256i __DEFAULT_FN_ATTRS256 4389_mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B) 4390{ 4391 return (__m256i)__builtin_ia32_selectd_256(__U, 4392 (__v8si)_mm256_rolv_epi32(__A, __B), 4393 (__v8si)_mm256_setzero_si256()); 4394} 4395 4396static __inline__ __m128i __DEFAULT_FN_ATTRS128 4397_mm_rolv_epi64 (__m128i __A, __m128i __B) 4398{ 4399 return (__m128i)__builtin_ia32_prolvq128((__v2di)__A, (__v2di)__B); 4400} 4401 4402static __inline__ __m128i __DEFAULT_FN_ATTRS128 4403_mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4404{ 4405 return (__m128i)__builtin_ia32_selectq_128(__U, 4406 (__v2di)_mm_rolv_epi64(__A, __B), 4407 (__v2di)__W); 4408} 4409 4410static __inline__ __m128i __DEFAULT_FN_ATTRS128 4411_mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 4412{ 4413 return (__m128i)__builtin_ia32_selectq_128(__U, 4414 (__v2di)_mm_rolv_epi64(__A, __B), 4415 (__v2di)_mm_setzero_si128()); 4416} 4417 4418static __inline__ __m256i __DEFAULT_FN_ATTRS256 4419_mm256_rolv_epi64 (__m256i __A, __m256i __B) 4420{ 4421 return (__m256i)__builtin_ia32_prolvq256((__v4di)__A, (__v4di)__B); 4422} 4423 4424static __inline__ __m256i __DEFAULT_FN_ATTRS256 4425_mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 4426{ 4427 return (__m256i)__builtin_ia32_selectq_256(__U, 4428 (__v4di)_mm256_rolv_epi64(__A, __B), 4429 (__v4di)__W); 4430} 4431 4432static __inline__ __m256i __DEFAULT_FN_ATTRS256 4433_mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B) 4434{ 4435 return (__m256i)__builtin_ia32_selectq_256(__U, 4436 (__v4di)_mm256_rolv_epi64(__A, __B), 4437 (__v4di)_mm256_setzero_si256()); 4438} 4439 4440#define _mm_ror_epi32(a, b) \ 4441 ((__m128i)__builtin_ia32_prord128((__v4si)(__m128i)(a), (int)(b))) 4442 4443#define _mm_mask_ror_epi32(w, u, a, b) \ 4444 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \ 4445 (__v4si)_mm_ror_epi32((a), (b)), \ 4446 (__v4si)(__m128i)(w))) 4447 4448#define _mm_maskz_ror_epi32(u, a, b) \ 4449 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \ 4450 (__v4si)_mm_ror_epi32((a), (b)), \ 4451 (__v4si)_mm_setzero_si128())) 4452 4453#define _mm256_ror_epi32(a, b) \ 4454 ((__m256i)__builtin_ia32_prord256((__v8si)(__m256i)(a), (int)(b))) 4455 4456#define _mm256_mask_ror_epi32(w, u, a, b) \ 4457 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \ 4458 (__v8si)_mm256_ror_epi32((a), (b)), \ 4459 (__v8si)(__m256i)(w))) 4460 4461#define _mm256_maskz_ror_epi32(u, a, b) \ 4462 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \ 4463 (__v8si)_mm256_ror_epi32((a), (b)), \ 4464 (__v8si)_mm256_setzero_si256())) 4465 4466#define _mm_ror_epi64(a, b) \ 4467 ((__m128i)__builtin_ia32_prorq128((__v2di)(__m128i)(a), (int)(b))) 4468 4469#define _mm_mask_ror_epi64(w, u, a, b) \ 4470 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \ 4471 (__v2di)_mm_ror_epi64((a), (b)), \ 4472 (__v2di)(__m128i)(w))) 4473 4474#define _mm_maskz_ror_epi64(u, a, b) \ 4475 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \ 4476 (__v2di)_mm_ror_epi64((a), (b)), \ 4477 (__v2di)_mm_setzero_si128())) 4478 4479#define _mm256_ror_epi64(a, b) \ 4480 ((__m256i)__builtin_ia32_prorq256((__v4di)(__m256i)(a), (int)(b))) 4481 4482#define _mm256_mask_ror_epi64(w, u, a, b) \ 4483 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \ 4484 (__v4di)_mm256_ror_epi64((a), (b)), \ 4485 (__v4di)(__m256i)(w))) 4486 4487#define _mm256_maskz_ror_epi64(u, a, b) \ 4488 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \ 4489 (__v4di)_mm256_ror_epi64((a), (b)), \ 4490 (__v4di)_mm256_setzero_si256())) 4491 4492static __inline__ __m128i __DEFAULT_FN_ATTRS128 4493_mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4494{ 4495 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4496 (__v4si)_mm_sll_epi32(__A, __B), 4497 (__v4si)__W); 4498} 4499 4500static __inline__ __m128i __DEFAULT_FN_ATTRS128 4501_mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B) 4502{ 4503 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4504 (__v4si)_mm_sll_epi32(__A, __B), 4505 (__v4si)_mm_setzero_si128()); 4506} 4507 4508static __inline__ __m256i __DEFAULT_FN_ATTRS256 4509_mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 4510{ 4511 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4512 (__v8si)_mm256_sll_epi32(__A, __B), 4513 (__v8si)__W); 4514} 4515 4516static __inline__ __m256i __DEFAULT_FN_ATTRS256 4517_mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B) 4518{ 4519 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4520 (__v8si)_mm256_sll_epi32(__A, __B), 4521 (__v8si)_mm256_setzero_si256()); 4522} 4523 4524static __inline__ __m128i __DEFAULT_FN_ATTRS128 4525_mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) 4526{ 4527 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4528 (__v4si)_mm_slli_epi32(__A, (int)__B), 4529 (__v4si)__W); 4530} 4531 4532static __inline__ __m128i __DEFAULT_FN_ATTRS128 4533_mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, unsigned int __B) 4534{ 4535 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4536 (__v4si)_mm_slli_epi32(__A, (int)__B), 4537 (__v4si)_mm_setzero_si128()); 4538} 4539 4540static __inline__ __m256i __DEFAULT_FN_ATTRS256 4541_mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) 4542{ 4543 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4544 (__v8si)_mm256_slli_epi32(__A, (int)__B), 4545 (__v8si)__W); 4546} 4547 4548static __inline__ __m256i __DEFAULT_FN_ATTRS256 4549_mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, unsigned int __B) 4550{ 4551 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4552 (__v8si)_mm256_slli_epi32(__A, (int)__B), 4553 (__v8si)_mm256_setzero_si256()); 4554} 4555 4556static __inline__ __m128i __DEFAULT_FN_ATTRS128 4557_mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4558{ 4559 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4560 (__v2di)_mm_sll_epi64(__A, __B), 4561 (__v2di)__W); 4562} 4563 4564static __inline__ __m128i __DEFAULT_FN_ATTRS128 4565_mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B) 4566{ 4567 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4568 (__v2di)_mm_sll_epi64(__A, __B), 4569 (__v2di)_mm_setzero_si128()); 4570} 4571 4572static __inline__ __m256i __DEFAULT_FN_ATTRS256 4573_mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 4574{ 4575 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4576 (__v4di)_mm256_sll_epi64(__A, __B), 4577 (__v4di)__W); 4578} 4579 4580static __inline__ __m256i __DEFAULT_FN_ATTRS256 4581_mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B) 4582{ 4583 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4584 (__v4di)_mm256_sll_epi64(__A, __B), 4585 (__v4di)_mm256_setzero_si256()); 4586} 4587 4588static __inline__ __m128i __DEFAULT_FN_ATTRS128 4589_mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) 4590{ 4591 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4592 (__v2di)_mm_slli_epi64(__A, (int)__B), 4593 (__v2di)__W); 4594} 4595 4596static __inline__ __m128i __DEFAULT_FN_ATTRS128 4597_mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, unsigned int __B) 4598{ 4599 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4600 (__v2di)_mm_slli_epi64(__A, (int)__B), 4601 (__v2di)_mm_setzero_si128()); 4602} 4603 4604static __inline__ __m256i __DEFAULT_FN_ATTRS256 4605_mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) 4606{ 4607 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4608 (__v4di)_mm256_slli_epi64(__A, (int)__B), 4609 (__v4di)__W); 4610} 4611 4612static __inline__ __m256i __DEFAULT_FN_ATTRS256 4613_mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, unsigned int __B) 4614{ 4615 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4616 (__v4di)_mm256_slli_epi64(__A, (int)__B), 4617 (__v4di)_mm256_setzero_si256()); 4618} 4619 4620static __inline__ __m128i __DEFAULT_FN_ATTRS128 4621_mm_rorv_epi32 (__m128i __A, __m128i __B) 4622{ 4623 return (__m128i)__builtin_ia32_prorvd128((__v4si)__A, (__v4si)__B); 4624} 4625 4626static __inline__ __m128i __DEFAULT_FN_ATTRS128 4627_mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4628{ 4629 return (__m128i)__builtin_ia32_selectd_128(__U, 4630 (__v4si)_mm_rorv_epi32(__A, __B), 4631 (__v4si)__W); 4632} 4633 4634static __inline__ __m128i __DEFAULT_FN_ATTRS128 4635_mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B) 4636{ 4637 return (__m128i)__builtin_ia32_selectd_128(__U, 4638 (__v4si)_mm_rorv_epi32(__A, __B), 4639 (__v4si)_mm_setzero_si128()); 4640} 4641 4642static __inline__ __m256i __DEFAULT_FN_ATTRS256 4643_mm256_rorv_epi32 (__m256i __A, __m256i __B) 4644{ 4645 return (__m256i)__builtin_ia32_prorvd256((__v8si)__A, (__v8si)__B); 4646} 4647 4648static __inline__ __m256i __DEFAULT_FN_ATTRS256 4649_mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 4650{ 4651 return (__m256i)__builtin_ia32_selectd_256(__U, 4652 (__v8si)_mm256_rorv_epi32(__A, __B), 4653 (__v8si)__W); 4654} 4655 4656static __inline__ __m256i __DEFAULT_FN_ATTRS256 4657_mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B) 4658{ 4659 return (__m256i)__builtin_ia32_selectd_256(__U, 4660 (__v8si)_mm256_rorv_epi32(__A, __B), 4661 (__v8si)_mm256_setzero_si256()); 4662} 4663 4664static __inline__ __m128i __DEFAULT_FN_ATTRS128 4665_mm_rorv_epi64 (__m128i __A, __m128i __B) 4666{ 4667 return (__m128i)__builtin_ia32_prorvq128((__v2di)__A, (__v2di)__B); 4668} 4669 4670static __inline__ __m128i __DEFAULT_FN_ATTRS128 4671_mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4672{ 4673 return (__m128i)__builtin_ia32_selectq_128(__U, 4674 (__v2di)_mm_rorv_epi64(__A, __B), 4675 (__v2di)__W); 4676} 4677 4678static __inline__ __m128i __DEFAULT_FN_ATTRS128 4679_mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) 4680{ 4681 return (__m128i)__builtin_ia32_selectq_128(__U, 4682 (__v2di)_mm_rorv_epi64(__A, __B), 4683 (__v2di)_mm_setzero_si128()); 4684} 4685 4686static __inline__ __m256i __DEFAULT_FN_ATTRS256 4687_mm256_rorv_epi64 (__m256i __A, __m256i __B) 4688{ 4689 return (__m256i)__builtin_ia32_prorvq256((__v4di)__A, (__v4di)__B); 4690} 4691 4692static __inline__ __m256i __DEFAULT_FN_ATTRS256 4693_mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 4694{ 4695 return (__m256i)__builtin_ia32_selectq_256(__U, 4696 (__v4di)_mm256_rorv_epi64(__A, __B), 4697 (__v4di)__W); 4698} 4699 4700static __inline__ __m256i __DEFAULT_FN_ATTRS256 4701_mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B) 4702{ 4703 return (__m256i)__builtin_ia32_selectq_256(__U, 4704 (__v4di)_mm256_rorv_epi64(__A, __B), 4705 (__v4di)_mm256_setzero_si256()); 4706} 4707 4708static __inline__ __m128i __DEFAULT_FN_ATTRS128 4709_mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 4710{ 4711 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4712 (__v2di)_mm_sllv_epi64(__X, __Y), 4713 (__v2di)__W); 4714} 4715 4716static __inline__ __m128i __DEFAULT_FN_ATTRS128 4717_mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y) 4718{ 4719 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4720 (__v2di)_mm_sllv_epi64(__X, __Y), 4721 (__v2di)_mm_setzero_si128()); 4722} 4723 4724static __inline__ __m256i __DEFAULT_FN_ATTRS256 4725_mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 4726{ 4727 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4728 (__v4di)_mm256_sllv_epi64(__X, __Y), 4729 (__v4di)__W); 4730} 4731 4732static __inline__ __m256i __DEFAULT_FN_ATTRS256 4733_mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y) 4734{ 4735 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4736 (__v4di)_mm256_sllv_epi64(__X, __Y), 4737 (__v4di)_mm256_setzero_si256()); 4738} 4739 4740static __inline__ __m128i __DEFAULT_FN_ATTRS128 4741_mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 4742{ 4743 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4744 (__v4si)_mm_sllv_epi32(__X, __Y), 4745 (__v4si)__W); 4746} 4747 4748static __inline__ __m128i __DEFAULT_FN_ATTRS128 4749_mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y) 4750{ 4751 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4752 (__v4si)_mm_sllv_epi32(__X, __Y), 4753 (__v4si)_mm_setzero_si128()); 4754} 4755 4756static __inline__ __m256i __DEFAULT_FN_ATTRS256 4757_mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 4758{ 4759 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4760 (__v8si)_mm256_sllv_epi32(__X, __Y), 4761 (__v8si)__W); 4762} 4763 4764static __inline__ __m256i __DEFAULT_FN_ATTRS256 4765_mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y) 4766{ 4767 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4768 (__v8si)_mm256_sllv_epi32(__X, __Y), 4769 (__v8si)_mm256_setzero_si256()); 4770} 4771 4772static __inline__ __m128i __DEFAULT_FN_ATTRS128 4773_mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 4774{ 4775 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4776 (__v2di)_mm_srlv_epi64(__X, __Y), 4777 (__v2di)__W); 4778} 4779 4780static __inline__ __m128i __DEFAULT_FN_ATTRS128 4781_mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y) 4782{ 4783 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4784 (__v2di)_mm_srlv_epi64(__X, __Y), 4785 (__v2di)_mm_setzero_si128()); 4786} 4787 4788static __inline__ __m256i __DEFAULT_FN_ATTRS256 4789_mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 4790{ 4791 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4792 (__v4di)_mm256_srlv_epi64(__X, __Y), 4793 (__v4di)__W); 4794} 4795 4796static __inline__ __m256i __DEFAULT_FN_ATTRS256 4797_mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y) 4798{ 4799 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4800 (__v4di)_mm256_srlv_epi64(__X, __Y), 4801 (__v4di)_mm256_setzero_si256()); 4802} 4803 4804static __inline__ __m128i __DEFAULT_FN_ATTRS128 4805_mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 4806{ 4807 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4808 (__v4si)_mm_srlv_epi32(__X, __Y), 4809 (__v4si)__W); 4810} 4811 4812static __inline__ __m128i __DEFAULT_FN_ATTRS128 4813_mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y) 4814{ 4815 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4816 (__v4si)_mm_srlv_epi32(__X, __Y), 4817 (__v4si)_mm_setzero_si128()); 4818} 4819 4820static __inline__ __m256i __DEFAULT_FN_ATTRS256 4821_mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 4822{ 4823 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4824 (__v8si)_mm256_srlv_epi32(__X, __Y), 4825 (__v8si)__W); 4826} 4827 4828static __inline__ __m256i __DEFAULT_FN_ATTRS256 4829_mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y) 4830{ 4831 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4832 (__v8si)_mm256_srlv_epi32(__X, __Y), 4833 (__v8si)_mm256_setzero_si256()); 4834} 4835 4836static __inline__ __m128i __DEFAULT_FN_ATTRS128 4837_mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4838{ 4839 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4840 (__v4si)_mm_srl_epi32(__A, __B), 4841 (__v4si)__W); 4842} 4843 4844static __inline__ __m128i __DEFAULT_FN_ATTRS128 4845_mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B) 4846{ 4847 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4848 (__v4si)_mm_srl_epi32(__A, __B), 4849 (__v4si)_mm_setzero_si128()); 4850} 4851 4852static __inline__ __m256i __DEFAULT_FN_ATTRS256 4853_mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 4854{ 4855 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4856 (__v8si)_mm256_srl_epi32(__A, __B), 4857 (__v8si)__W); 4858} 4859 4860static __inline__ __m256i __DEFAULT_FN_ATTRS256 4861_mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B) 4862{ 4863 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4864 (__v8si)_mm256_srl_epi32(__A, __B), 4865 (__v8si)_mm256_setzero_si256()); 4866} 4867 4868static __inline__ __m128i __DEFAULT_FN_ATTRS128 4869_mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) 4870{ 4871 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4872 (__v4si)_mm_srli_epi32(__A, (int)__B), 4873 (__v4si)__W); 4874} 4875 4876static __inline__ __m128i __DEFAULT_FN_ATTRS128 4877_mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, unsigned int __B) 4878{ 4879 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4880 (__v4si)_mm_srli_epi32(__A, (int)__B), 4881 (__v4si)_mm_setzero_si128()); 4882} 4883 4884static __inline__ __m256i __DEFAULT_FN_ATTRS256 4885_mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) 4886{ 4887 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4888 (__v8si)_mm256_srli_epi32(__A, (int)__B), 4889 (__v8si)__W); 4890} 4891 4892static __inline__ __m256i __DEFAULT_FN_ATTRS256 4893_mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, unsigned int __B) 4894{ 4895 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4896 (__v8si)_mm256_srli_epi32(__A, (int)__B), 4897 (__v8si)_mm256_setzero_si256()); 4898} 4899 4900static __inline__ __m128i __DEFAULT_FN_ATTRS128 4901_mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 4902{ 4903 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4904 (__v2di)_mm_srl_epi64(__A, __B), 4905 (__v2di)__W); 4906} 4907 4908static __inline__ __m128i __DEFAULT_FN_ATTRS128 4909_mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B) 4910{ 4911 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4912 (__v2di)_mm_srl_epi64(__A, __B), 4913 (__v2di)_mm_setzero_si128()); 4914} 4915 4916static __inline__ __m256i __DEFAULT_FN_ATTRS256 4917_mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 4918{ 4919 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4920 (__v4di)_mm256_srl_epi64(__A, __B), 4921 (__v4di)__W); 4922} 4923 4924static __inline__ __m256i __DEFAULT_FN_ATTRS256 4925_mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B) 4926{ 4927 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4928 (__v4di)_mm256_srl_epi64(__A, __B), 4929 (__v4di)_mm256_setzero_si256()); 4930} 4931 4932static __inline__ __m128i __DEFAULT_FN_ATTRS128 4933_mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) 4934{ 4935 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4936 (__v2di)_mm_srli_epi64(__A, (int)__B), 4937 (__v2di)__W); 4938} 4939 4940static __inline__ __m128i __DEFAULT_FN_ATTRS128 4941_mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, unsigned int __B) 4942{ 4943 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 4944 (__v2di)_mm_srli_epi64(__A, (int)__B), 4945 (__v2di)_mm_setzero_si128()); 4946} 4947 4948static __inline__ __m256i __DEFAULT_FN_ATTRS256 4949_mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) 4950{ 4951 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4952 (__v4di)_mm256_srli_epi64(__A, (int)__B), 4953 (__v4di)__W); 4954} 4955 4956static __inline__ __m256i __DEFAULT_FN_ATTRS256 4957_mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, unsigned int __B) 4958{ 4959 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 4960 (__v4di)_mm256_srli_epi64(__A, (int)__B), 4961 (__v4di)_mm256_setzero_si256()); 4962} 4963 4964static __inline__ __m128i __DEFAULT_FN_ATTRS128 4965_mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 4966{ 4967 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4968 (__v4si)_mm_srav_epi32(__X, __Y), 4969 (__v4si)__W); 4970} 4971 4972static __inline__ __m128i __DEFAULT_FN_ATTRS128 4973_mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y) 4974{ 4975 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 4976 (__v4si)_mm_srav_epi32(__X, __Y), 4977 (__v4si)_mm_setzero_si128()); 4978} 4979 4980static __inline__ __m256i __DEFAULT_FN_ATTRS256 4981_mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 4982{ 4983 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4984 (__v8si)_mm256_srav_epi32(__X, __Y), 4985 (__v8si)__W); 4986} 4987 4988static __inline__ __m256i __DEFAULT_FN_ATTRS256 4989_mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y) 4990{ 4991 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 4992 (__v8si)_mm256_srav_epi32(__X, __Y), 4993 (__v8si)_mm256_setzero_si256()); 4994} 4995 4996static __inline__ __m128i __DEFAULT_FN_ATTRS128 4997_mm_srav_epi64(__m128i __X, __m128i __Y) 4998{ 4999 return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)__Y); 5000} 5001 5002static __inline__ __m128i __DEFAULT_FN_ATTRS128 5003_mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) 5004{ 5005 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5006 (__v2di)_mm_srav_epi64(__X, __Y), 5007 (__v2di)__W); 5008} 5009 5010static __inline__ __m128i __DEFAULT_FN_ATTRS128 5011_mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y) 5012{ 5013 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 5014 (__v2di)_mm_srav_epi64(__X, __Y), 5015 (__v2di)_mm_setzero_si128()); 5016} 5017 5018static __inline__ __m256i __DEFAULT_FN_ATTRS256 5019_mm256_srav_epi64(__m256i __X, __m256i __Y) 5020{ 5021 return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di) __Y); 5022} 5023 5024static __inline__ __m256i __DEFAULT_FN_ATTRS256 5025_mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) 5026{ 5027 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5028 (__v4di)_mm256_srav_epi64(__X, __Y), 5029 (__v4di)__W); 5030} 5031 5032static __inline__ __m256i __DEFAULT_FN_ATTRS256 5033_mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y) 5034{ 5035 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 5036 (__v4di)_mm256_srav_epi64(__X, __Y), 5037 (__v4di)_mm256_setzero_si256()); 5038} 5039 5040static __inline__ __m128i __DEFAULT_FN_ATTRS128 5041_mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A) 5042{ 5043 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, 5044 (__v4si) __A, 5045 (__v4si) __W); 5046} 5047 5048static __inline__ __m128i __DEFAULT_FN_ATTRS128 5049_mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A) 5050{ 5051 return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, 5052 (__v4si) __A, 5053 (__v4si) _mm_setzero_si128 ()); 5054} 5055 5056 5057static __inline__ __m256i __DEFAULT_FN_ATTRS256 5058_mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A) 5059{ 5060 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, 5061 (__v8si) __A, 5062 (__v8si) __W); 5063} 5064 5065static __inline__ __m256i __DEFAULT_FN_ATTRS256 5066_mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A) 5067{ 5068 return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, 5069 (__v8si) __A, 5070 (__v8si) _mm256_setzero_si256 ()); 5071} 5072 5073static __inline __m128i __DEFAULT_FN_ATTRS128 5074_mm_load_epi32 (void const *__P) 5075{ 5076 return *(const __m128i *) __P; 5077} 5078 5079static __inline__ __m128i __DEFAULT_FN_ATTRS128 5080_mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P) 5081{ 5082 return (__m128i) __builtin_ia32_movdqa32load128_mask ((const __v4si *) __P, 5083 (__v4si) __W, 5084 (__mmask8) 5085 __U); 5086} 5087 5088static __inline__ __m128i __DEFAULT_FN_ATTRS128 5089_mm_maskz_load_epi32 (__mmask8 __U, void const *__P) 5090{ 5091 return (__m128i) __builtin_ia32_movdqa32load128_mask ((const __v4si *) __P, 5092 (__v4si) 5093 _mm_setzero_si128 (), 5094 (__mmask8) 5095 __U); 5096} 5097 5098static __inline __m256i __DEFAULT_FN_ATTRS256 5099_mm256_load_epi32 (void const *__P) 5100{ 5101 return *(const __m256i *) __P; 5102} 5103 5104static __inline__ __m256i __DEFAULT_FN_ATTRS256 5105_mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P) 5106{ 5107 return (__m256i) __builtin_ia32_movdqa32load256_mask ((const __v8si *) __P, 5108 (__v8si) __W, 5109 (__mmask8) 5110 __U); 5111} 5112 5113static __inline__ __m256i __DEFAULT_FN_ATTRS256 5114_mm256_maskz_load_epi32 (__mmask8 __U, void const *__P) 5115{ 5116 return (__m256i) __builtin_ia32_movdqa32load256_mask ((const __v8si *) __P, 5117 (__v8si) 5118 _mm256_setzero_si256 (), 5119 (__mmask8) 5120 __U); 5121} 5122 5123static __inline void __DEFAULT_FN_ATTRS128 5124_mm_store_epi32 (void *__P, __m128i __A) 5125{ 5126 *(__m128i *) __P = __A; 5127} 5128 5129static __inline__ void __DEFAULT_FN_ATTRS128 5130_mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A) 5131{ 5132 __builtin_ia32_movdqa32store128_mask ((__v4si *) __P, 5133 (__v4si) __A, 5134 (__mmask8) __U); 5135} 5136 5137static __inline void __DEFAULT_FN_ATTRS256 5138_mm256_store_epi32 (void *__P, __m256i __A) 5139{ 5140 *(__m256i *) __P = __A; 5141} 5142 5143static __inline__ void __DEFAULT_FN_ATTRS256 5144_mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A) 5145{ 5146 __builtin_ia32_movdqa32store256_mask ((__v8si *) __P, 5147 (__v8si) __A, 5148 (__mmask8) __U); 5149} 5150 5151static __inline__ __m128i __DEFAULT_FN_ATTRS128 5152_mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A) 5153{ 5154 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, 5155 (__v2di) __A, 5156 (__v2di) __W); 5157} 5158 5159static __inline__ __m128i __DEFAULT_FN_ATTRS128 5160_mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A) 5161{ 5162 return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, 5163 (__v2di) __A, 5164 (__v2di) _mm_setzero_si128 ()); 5165} 5166 5167static __inline__ __m256i __DEFAULT_FN_ATTRS256 5168_mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A) 5169{ 5170 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, 5171 (__v4di) __A, 5172 (__v4di) __W); 5173} 5174 5175static __inline__ __m256i __DEFAULT_FN_ATTRS256 5176_mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A) 5177{ 5178 return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, 5179 (__v4di) __A, 5180 (__v4di) _mm256_setzero_si256 ()); 5181} 5182 5183static __inline __m128i __DEFAULT_FN_ATTRS128 5184_mm_load_epi64 (void const *__P) 5185{ 5186 return *(const __m128i *) __P; 5187} 5188 5189static __inline__ __m128i __DEFAULT_FN_ATTRS128 5190_mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P) 5191{ 5192 return (__m128i) __builtin_ia32_movdqa64load128_mask ((const __v2di *) __P, 5193 (__v2di) __W, 5194 (__mmask8) 5195 __U); 5196} 5197 5198static __inline__ __m128i __DEFAULT_FN_ATTRS128 5199_mm_maskz_load_epi64 (__mmask8 __U, void const *__P) 5200{ 5201 return (__m128i) __builtin_ia32_movdqa64load128_mask ((const __v2di *) __P, 5202 (__v2di) 5203 _mm_setzero_si128 (), 5204 (__mmask8) 5205 __U); 5206} 5207 5208static __inline __m256i __DEFAULT_FN_ATTRS256 5209_mm256_load_epi64 (void const *__P) 5210{ 5211 return *(const __m256i *) __P; 5212} 5213 5214static __inline__ __m256i __DEFAULT_FN_ATTRS256 5215_mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P) 5216{ 5217 return (__m256i) __builtin_ia32_movdqa64load256_mask ((const __v4di *) __P, 5218 (__v4di) __W, 5219 (__mmask8) 5220 __U); 5221} 5222 5223static __inline__ __m256i __DEFAULT_FN_ATTRS256 5224_mm256_maskz_load_epi64 (__mmask8 __U, void const *__P) 5225{ 5226 return (__m256i) __builtin_ia32_movdqa64load256_mask ((const __v4di *) __P, 5227 (__v4di) 5228 _mm256_setzero_si256 (), 5229 (__mmask8) 5230 __U); 5231} 5232 5233static __inline void __DEFAULT_FN_ATTRS128 5234_mm_store_epi64 (void *__P, __m128i __A) 5235{ 5236 *(__m128i *) __P = __A; 5237} 5238 5239static __inline__ void __DEFAULT_FN_ATTRS128 5240_mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A) 5241{ 5242 __builtin_ia32_movdqa64store128_mask ((__v2di *) __P, 5243 (__v2di) __A, 5244 (__mmask8) __U); 5245} 5246 5247static __inline void __DEFAULT_FN_ATTRS256 5248_mm256_store_epi64 (void *__P, __m256i __A) 5249{ 5250 *(__m256i *) __P = __A; 5251} 5252 5253static __inline__ void __DEFAULT_FN_ATTRS256 5254_mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A) 5255{ 5256 __builtin_ia32_movdqa64store256_mask ((__v4di *) __P, 5257 (__v4di) __A, 5258 (__mmask8) __U); 5259} 5260 5261static __inline__ __m128d __DEFAULT_FN_ATTRS128 5262_mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A) 5263{ 5264 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 5265 (__v2df)_mm_movedup_pd(__A), 5266 (__v2df)__W); 5267} 5268 5269static __inline__ __m128d __DEFAULT_FN_ATTRS128 5270_mm_maskz_movedup_pd (__mmask8 __U, __m128d __A) 5271{ 5272 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 5273 (__v2df)_mm_movedup_pd(__A), 5274 (__v2df)_mm_setzero_pd()); 5275} 5276 5277static __inline__ __m256d __DEFAULT_FN_ATTRS256 5278_mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A) 5279{ 5280 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 5281 (__v4df)_mm256_movedup_pd(__A), 5282 (__v4df)__W); 5283} 5284 5285static __inline__ __m256d __DEFAULT_FN_ATTRS256 5286_mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A) 5287{ 5288 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 5289 (__v4df)_mm256_movedup_pd(__A), 5290 (__v4df)_mm256_setzero_pd()); 5291} 5292 5293static __inline__ __m128i __DEFAULT_FN_ATTRS128 5294_mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A) 5295{ 5296 return (__m128i)__builtin_ia32_selectd_128(__M, 5297 (__v4si) _mm_set1_epi32(__A), 5298 (__v4si)__O); 5299} 5300 5301static __inline__ __m128i __DEFAULT_FN_ATTRS128 5302_mm_maskz_set1_epi32( __mmask8 __M, int __A) 5303{ 5304 return (__m128i)__builtin_ia32_selectd_128(__M, 5305 (__v4si) _mm_set1_epi32(__A), 5306 (__v4si)_mm_setzero_si128()); 5307} 5308 5309static __inline__ __m256i __DEFAULT_FN_ATTRS256 5310_mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A) 5311{ 5312 return (__m256i)__builtin_ia32_selectd_256(__M, 5313 (__v8si) _mm256_set1_epi32(__A), 5314 (__v8si)__O); 5315} 5316 5317static __inline__ __m256i __DEFAULT_FN_ATTRS256 5318_mm256_maskz_set1_epi32( __mmask8 __M, int __A) 5319{ 5320 return (__m256i)__builtin_ia32_selectd_256(__M, 5321 (__v8si) _mm256_set1_epi32(__A), 5322 (__v8si)_mm256_setzero_si256()); 5323} 5324 5325 5326static __inline__ __m128i __DEFAULT_FN_ATTRS128 5327_mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A) 5328{ 5329 return (__m128i) __builtin_ia32_selectq_128(__M, 5330 (__v2di) _mm_set1_epi64x(__A), 5331 (__v2di) __O); 5332} 5333 5334static __inline__ __m128i __DEFAULT_FN_ATTRS128 5335_mm_maskz_set1_epi64 (__mmask8 __M, long long __A) 5336{ 5337 return (__m128i) __builtin_ia32_selectq_128(__M, 5338 (__v2di) _mm_set1_epi64x(__A), 5339 (__v2di) _mm_setzero_si128()); 5340} 5341 5342static __inline__ __m256i __DEFAULT_FN_ATTRS256 5343_mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A) 5344{ 5345 return (__m256i) __builtin_ia32_selectq_256(__M, 5346 (__v4di) _mm256_set1_epi64x(__A), 5347 (__v4di) __O) ; 5348} 5349 5350static __inline__ __m256i __DEFAULT_FN_ATTRS256 5351_mm256_maskz_set1_epi64 (__mmask8 __M, long long __A) 5352{ 5353 return (__m256i) __builtin_ia32_selectq_256(__M, 5354 (__v4di) _mm256_set1_epi64x(__A), 5355 (__v4di) _mm256_setzero_si256()); 5356} 5357 5358#define _mm_fixupimm_pd(A, B, C, imm) \ 5359 ((__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \ 5360 (__v2df)(__m128d)(B), \ 5361 (__v2di)(__m128i)(C), (int)(imm), \ 5362 (__mmask8)-1)) 5363 5364#define _mm_mask_fixupimm_pd(A, U, B, C, imm) \ 5365 ((__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \ 5366 (__v2df)(__m128d)(B), \ 5367 (__v2di)(__m128i)(C), (int)(imm), \ 5368 (__mmask8)(U))) 5369 5370#define _mm_maskz_fixupimm_pd(U, A, B, C, imm) \ 5371 ((__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \ 5372 (__v2df)(__m128d)(B), \ 5373 (__v2di)(__m128i)(C), \ 5374 (int)(imm), (__mmask8)(U))) 5375 5376#define _mm256_fixupimm_pd(A, B, C, imm) \ 5377 ((__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \ 5378 (__v4df)(__m256d)(B), \ 5379 (__v4di)(__m256i)(C), (int)(imm), \ 5380 (__mmask8)-1)) 5381 5382#define _mm256_mask_fixupimm_pd(A, U, B, C, imm) \ 5383 ((__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \ 5384 (__v4df)(__m256d)(B), \ 5385 (__v4di)(__m256i)(C), (int)(imm), \ 5386 (__mmask8)(U))) 5387 5388#define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) \ 5389 ((__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \ 5390 (__v4df)(__m256d)(B), \ 5391 (__v4di)(__m256i)(C), \ 5392 (int)(imm), (__mmask8)(U))) 5393 5394#define _mm_fixupimm_ps(A, B, C, imm) \ 5395 ((__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \ 5396 (__v4sf)(__m128)(B), \ 5397 (__v4si)(__m128i)(C), (int)(imm), \ 5398 (__mmask8)-1)) 5399 5400#define _mm_mask_fixupimm_ps(A, U, B, C, imm) \ 5401 ((__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \ 5402 (__v4sf)(__m128)(B), \ 5403 (__v4si)(__m128i)(C), (int)(imm), \ 5404 (__mmask8)(U))) 5405 5406#define _mm_maskz_fixupimm_ps(U, A, B, C, imm) \ 5407 ((__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \ 5408 (__v4sf)(__m128)(B), \ 5409 (__v4si)(__m128i)(C), (int)(imm), \ 5410 (__mmask8)(U))) 5411 5412#define _mm256_fixupimm_ps(A, B, C, imm) \ 5413 ((__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \ 5414 (__v8sf)(__m256)(B), \ 5415 (__v8si)(__m256i)(C), (int)(imm), \ 5416 (__mmask8)-1)) 5417 5418#define _mm256_mask_fixupimm_ps(A, U, B, C, imm) \ 5419 ((__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \ 5420 (__v8sf)(__m256)(B), \ 5421 (__v8si)(__m256i)(C), (int)(imm), \ 5422 (__mmask8)(U))) 5423 5424#define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) \ 5425 ((__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \ 5426 (__v8sf)(__m256)(B), \ 5427 (__v8si)(__m256i)(C), (int)(imm), \ 5428 (__mmask8)(U))) 5429 5430static __inline__ __m128d __DEFAULT_FN_ATTRS128 5431_mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P) 5432{ 5433 return (__m128d) __builtin_ia32_loadapd128_mask ((const __v2df *) __P, 5434 (__v2df) __W, 5435 (__mmask8) __U); 5436} 5437 5438static __inline__ __m128d __DEFAULT_FN_ATTRS128 5439_mm_maskz_load_pd (__mmask8 __U, void const *__P) 5440{ 5441 return (__m128d) __builtin_ia32_loadapd128_mask ((const __v2df *) __P, 5442 (__v2df) 5443 _mm_setzero_pd (), 5444 (__mmask8) __U); 5445} 5446 5447static __inline__ __m256d __DEFAULT_FN_ATTRS256 5448_mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P) 5449{ 5450 return (__m256d) __builtin_ia32_loadapd256_mask ((const __v4df *) __P, 5451 (__v4df) __W, 5452 (__mmask8) __U); 5453} 5454 5455static __inline__ __m256d __DEFAULT_FN_ATTRS256 5456_mm256_maskz_load_pd (__mmask8 __U, void const *__P) 5457{ 5458 return (__m256d) __builtin_ia32_loadapd256_mask ((const __v4df *) __P, 5459 (__v4df) 5460 _mm256_setzero_pd (), 5461 (__mmask8) __U); 5462} 5463 5464static __inline__ __m128 __DEFAULT_FN_ATTRS128 5465_mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P) 5466{ 5467 return (__m128) __builtin_ia32_loadaps128_mask ((const __v4sf *) __P, 5468 (__v4sf) __W, 5469 (__mmask8) __U); 5470} 5471 5472static __inline__ __m128 __DEFAULT_FN_ATTRS128 5473_mm_maskz_load_ps (__mmask8 __U, void const *__P) 5474{ 5475 return (__m128) __builtin_ia32_loadaps128_mask ((const __v4sf *) __P, 5476 (__v4sf) 5477 _mm_setzero_ps (), 5478 (__mmask8) __U); 5479} 5480 5481static __inline__ __m256 __DEFAULT_FN_ATTRS256 5482_mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P) 5483{ 5484 return (__m256) __builtin_ia32_loadaps256_mask ((const __v8sf *) __P, 5485 (__v8sf) __W, 5486 (__mmask8) __U); 5487} 5488 5489static __inline__ __m256 __DEFAULT_FN_ATTRS256 5490_mm256_maskz_load_ps (__mmask8 __U, void const *__P) 5491{ 5492 return (__m256) __builtin_ia32_loadaps256_mask ((const __v8sf *) __P, 5493 (__v8sf) 5494 _mm256_setzero_ps (), 5495 (__mmask8) __U); 5496} 5497 5498static __inline __m128i __DEFAULT_FN_ATTRS128 5499_mm_loadu_epi64 (void const *__P) 5500{ 5501 struct __loadu_epi64 { 5502 __m128i_u __v; 5503 } __attribute__((__packed__, __may_alias__)); 5504 return ((const struct __loadu_epi64*)__P)->__v; 5505} 5506 5507static __inline__ __m128i __DEFAULT_FN_ATTRS128 5508_mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) 5509{ 5510 return (__m128i) __builtin_ia32_loaddqudi128_mask ((const __v2di *) __P, 5511 (__v2di) __W, 5512 (__mmask8) __U); 5513} 5514 5515static __inline__ __m128i __DEFAULT_FN_ATTRS128 5516_mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P) 5517{ 5518 return (__m128i) __builtin_ia32_loaddqudi128_mask ((const __v2di *) __P, 5519 (__v2di) 5520 _mm_setzero_si128 (), 5521 (__mmask8) __U); 5522} 5523 5524static __inline __m256i __DEFAULT_FN_ATTRS256 5525_mm256_loadu_epi64 (void const *__P) 5526{ 5527 struct __loadu_epi64 { 5528 __m256i_u __v; 5529 } __attribute__((__packed__, __may_alias__)); 5530 return ((const struct __loadu_epi64*)__P)->__v; 5531} 5532 5533static __inline__ __m256i __DEFAULT_FN_ATTRS256 5534_mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P) 5535{ 5536 return (__m256i) __builtin_ia32_loaddqudi256_mask ((const __v4di *) __P, 5537 (__v4di) __W, 5538 (__mmask8) __U); 5539} 5540 5541static __inline__ __m256i __DEFAULT_FN_ATTRS256 5542_mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P) 5543{ 5544 return (__m256i) __builtin_ia32_loaddqudi256_mask ((const __v4di *) __P, 5545 (__v4di) 5546 _mm256_setzero_si256 (), 5547 (__mmask8) __U); 5548} 5549 5550static __inline __m128i __DEFAULT_FN_ATTRS128 5551_mm_loadu_epi32 (void const *__P) 5552{ 5553 struct __loadu_epi32 { 5554 __m128i_u __v; 5555 } __attribute__((__packed__, __may_alias__)); 5556 return ((const struct __loadu_epi32*)__P)->__v; 5557} 5558 5559static __inline__ __m128i __DEFAULT_FN_ATTRS128 5560_mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) 5561{ 5562 return (__m128i) __builtin_ia32_loaddqusi128_mask ((const __v4si *) __P, 5563 (__v4si) __W, 5564 (__mmask8) __U); 5565} 5566 5567static __inline__ __m128i __DEFAULT_FN_ATTRS128 5568_mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P) 5569{ 5570 return (__m128i) __builtin_ia32_loaddqusi128_mask ((const __v4si *) __P, 5571 (__v4si) 5572 _mm_setzero_si128 (), 5573 (__mmask8) __U); 5574} 5575 5576static __inline __m256i __DEFAULT_FN_ATTRS256 5577_mm256_loadu_epi32 (void const *__P) 5578{ 5579 struct __loadu_epi32 { 5580 __m256i_u __v; 5581 } __attribute__((__packed__, __may_alias__)); 5582 return ((const struct __loadu_epi32*)__P)->__v; 5583} 5584 5585static __inline__ __m256i __DEFAULT_FN_ATTRS256 5586_mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P) 5587{ 5588 return (__m256i) __builtin_ia32_loaddqusi256_mask ((const __v8si *) __P, 5589 (__v8si) __W, 5590 (__mmask8) __U); 5591} 5592 5593static __inline__ __m256i __DEFAULT_FN_ATTRS256 5594_mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P) 5595{ 5596 return (__m256i) __builtin_ia32_loaddqusi256_mask ((const __v8si *) __P, 5597 (__v8si) 5598 _mm256_setzero_si256 (), 5599 (__mmask8) __U); 5600} 5601 5602static __inline__ __m128d __DEFAULT_FN_ATTRS128 5603_mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P) 5604{ 5605 return (__m128d) __builtin_ia32_loadupd128_mask ((const __v2df *) __P, 5606 (__v2df) __W, 5607 (__mmask8) __U); 5608} 5609 5610static __inline__ __m128d __DEFAULT_FN_ATTRS128 5611_mm_maskz_loadu_pd (__mmask8 __U, void const *__P) 5612{ 5613 return (__m128d) __builtin_ia32_loadupd128_mask ((const __v2df *) __P, 5614 (__v2df) 5615 _mm_setzero_pd (), 5616 (__mmask8) __U); 5617} 5618 5619static __inline__ __m256d __DEFAULT_FN_ATTRS256 5620_mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P) 5621{ 5622 return (__m256d) __builtin_ia32_loadupd256_mask ((const __v4df *) __P, 5623 (__v4df) __W, 5624 (__mmask8) __U); 5625} 5626 5627static __inline__ __m256d __DEFAULT_FN_ATTRS256 5628_mm256_maskz_loadu_pd (__mmask8 __U, void const *__P) 5629{ 5630 return (__m256d) __builtin_ia32_loadupd256_mask ((const __v4df *) __P, 5631 (__v4df) 5632 _mm256_setzero_pd (), 5633 (__mmask8) __U); 5634} 5635 5636static __inline__ __m128 __DEFAULT_FN_ATTRS128 5637_mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P) 5638{ 5639 return (__m128) __builtin_ia32_loadups128_mask ((const __v4sf *) __P, 5640 (__v4sf) __W, 5641 (__mmask8) __U); 5642} 5643 5644static __inline__ __m128 __DEFAULT_FN_ATTRS128 5645_mm_maskz_loadu_ps (__mmask8 __U, void const *__P) 5646{ 5647 return (__m128) __builtin_ia32_loadups128_mask ((const __v4sf *) __P, 5648 (__v4sf) 5649 _mm_setzero_ps (), 5650 (__mmask8) __U); 5651} 5652 5653static __inline__ __m256 __DEFAULT_FN_ATTRS256 5654_mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P) 5655{ 5656 return (__m256) __builtin_ia32_loadups256_mask ((const __v8sf *) __P, 5657 (__v8sf) __W, 5658 (__mmask8) __U); 5659} 5660 5661static __inline__ __m256 __DEFAULT_FN_ATTRS256 5662_mm256_maskz_loadu_ps (__mmask8 __U, void const *__P) 5663{ 5664 return (__m256) __builtin_ia32_loadups256_mask ((const __v8sf *) __P, 5665 (__v8sf) 5666 _mm256_setzero_ps (), 5667 (__mmask8) __U); 5668} 5669 5670static __inline__ void __DEFAULT_FN_ATTRS128 5671_mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A) 5672{ 5673 __builtin_ia32_storeapd128_mask ((__v2df *) __P, 5674 (__v2df) __A, 5675 (__mmask8) __U); 5676} 5677 5678static __inline__ void __DEFAULT_FN_ATTRS256 5679_mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A) 5680{ 5681 __builtin_ia32_storeapd256_mask ((__v4df *) __P, 5682 (__v4df) __A, 5683 (__mmask8) __U); 5684} 5685 5686static __inline__ void __DEFAULT_FN_ATTRS128 5687_mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A) 5688{ 5689 __builtin_ia32_storeaps128_mask ((__v4sf *) __P, 5690 (__v4sf) __A, 5691 (__mmask8) __U); 5692} 5693 5694static __inline__ void __DEFAULT_FN_ATTRS256 5695_mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A) 5696{ 5697 __builtin_ia32_storeaps256_mask ((__v8sf *) __P, 5698 (__v8sf) __A, 5699 (__mmask8) __U); 5700} 5701 5702static __inline void __DEFAULT_FN_ATTRS128 5703_mm_storeu_epi64 (void *__P, __m128i __A) 5704{ 5705 struct __storeu_epi64 { 5706 __m128i_u __v; 5707 } __attribute__((__packed__, __may_alias__)); 5708 ((struct __storeu_epi64*)__P)->__v = __A; 5709} 5710 5711static __inline__ void __DEFAULT_FN_ATTRS128 5712_mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A) 5713{ 5714 __builtin_ia32_storedqudi128_mask ((__v2di *) __P, 5715 (__v2di) __A, 5716 (__mmask8) __U); 5717} 5718 5719static __inline void __DEFAULT_FN_ATTRS256 5720_mm256_storeu_epi64 (void *__P, __m256i __A) 5721{ 5722 struct __storeu_epi64 { 5723 __m256i_u __v; 5724 } __attribute__((__packed__, __may_alias__)); 5725 ((struct __storeu_epi64*)__P)->__v = __A; 5726} 5727 5728static __inline__ void __DEFAULT_FN_ATTRS256 5729_mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A) 5730{ 5731 __builtin_ia32_storedqudi256_mask ((__v4di *) __P, 5732 (__v4di) __A, 5733 (__mmask8) __U); 5734} 5735 5736static __inline void __DEFAULT_FN_ATTRS128 5737_mm_storeu_epi32 (void *__P, __m128i __A) 5738{ 5739 struct __storeu_epi32 { 5740 __m128i_u __v; 5741 } __attribute__((__packed__, __may_alias__)); 5742 ((struct __storeu_epi32*)__P)->__v = __A; 5743} 5744 5745static __inline__ void __DEFAULT_FN_ATTRS128 5746_mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A) 5747{ 5748 __builtin_ia32_storedqusi128_mask ((__v4si *) __P, 5749 (__v4si) __A, 5750 (__mmask8) __U); 5751} 5752 5753static __inline void __DEFAULT_FN_ATTRS256 5754_mm256_storeu_epi32 (void *__P, __m256i __A) 5755{ 5756 struct __storeu_epi32 { 5757 __m256i_u __v; 5758 } __attribute__((__packed__, __may_alias__)); 5759 ((struct __storeu_epi32*)__P)->__v = __A; 5760} 5761 5762static __inline__ void __DEFAULT_FN_ATTRS256 5763_mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A) 5764{ 5765 __builtin_ia32_storedqusi256_mask ((__v8si *) __P, 5766 (__v8si) __A, 5767 (__mmask8) __U); 5768} 5769 5770static __inline__ void __DEFAULT_FN_ATTRS128 5771_mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A) 5772{ 5773 __builtin_ia32_storeupd128_mask ((__v2df *) __P, 5774 (__v2df) __A, 5775 (__mmask8) __U); 5776} 5777 5778static __inline__ void __DEFAULT_FN_ATTRS256 5779_mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A) 5780{ 5781 __builtin_ia32_storeupd256_mask ((__v4df *) __P, 5782 (__v4df) __A, 5783 (__mmask8) __U); 5784} 5785 5786static __inline__ void __DEFAULT_FN_ATTRS128 5787_mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A) 5788{ 5789 __builtin_ia32_storeups128_mask ((__v4sf *) __P, 5790 (__v4sf) __A, 5791 (__mmask8) __U); 5792} 5793 5794static __inline__ void __DEFAULT_FN_ATTRS256 5795_mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A) 5796{ 5797 __builtin_ia32_storeups256_mask ((__v8sf *) __P, 5798 (__v8sf) __A, 5799 (__mmask8) __U); 5800} 5801 5802 5803static __inline__ __m128d __DEFAULT_FN_ATTRS128 5804_mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 5805{ 5806 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 5807 (__v2df)_mm_unpackhi_pd(__A, __B), 5808 (__v2df)__W); 5809} 5810 5811static __inline__ __m128d __DEFAULT_FN_ATTRS128 5812_mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B) 5813{ 5814 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 5815 (__v2df)_mm_unpackhi_pd(__A, __B), 5816 (__v2df)_mm_setzero_pd()); 5817} 5818 5819static __inline__ __m256d __DEFAULT_FN_ATTRS256 5820_mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) 5821{ 5822 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 5823 (__v4df)_mm256_unpackhi_pd(__A, __B), 5824 (__v4df)__W); 5825} 5826 5827static __inline__ __m256d __DEFAULT_FN_ATTRS256 5828_mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B) 5829{ 5830 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 5831 (__v4df)_mm256_unpackhi_pd(__A, __B), 5832 (__v4df)_mm256_setzero_pd()); 5833} 5834 5835static __inline__ __m128 __DEFAULT_FN_ATTRS128 5836_mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 5837{ 5838 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 5839 (__v4sf)_mm_unpackhi_ps(__A, __B), 5840 (__v4sf)__W); 5841} 5842 5843static __inline__ __m128 __DEFAULT_FN_ATTRS128 5844_mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B) 5845{ 5846 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 5847 (__v4sf)_mm_unpackhi_ps(__A, __B), 5848 (__v4sf)_mm_setzero_ps()); 5849} 5850 5851static __inline__ __m256 __DEFAULT_FN_ATTRS256 5852_mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) 5853{ 5854 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 5855 (__v8sf)_mm256_unpackhi_ps(__A, __B), 5856 (__v8sf)__W); 5857} 5858 5859static __inline__ __m256 __DEFAULT_FN_ATTRS256 5860_mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B) 5861{ 5862 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 5863 (__v8sf)_mm256_unpackhi_ps(__A, __B), 5864 (__v8sf)_mm256_setzero_ps()); 5865} 5866 5867static __inline__ __m128d __DEFAULT_FN_ATTRS128 5868_mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) 5869{ 5870 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 5871 (__v2df)_mm_unpacklo_pd(__A, __B), 5872 (__v2df)__W); 5873} 5874 5875static __inline__ __m128d __DEFAULT_FN_ATTRS128 5876_mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B) 5877{ 5878 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 5879 (__v2df)_mm_unpacklo_pd(__A, __B), 5880 (__v2df)_mm_setzero_pd()); 5881} 5882 5883static __inline__ __m256d __DEFAULT_FN_ATTRS256 5884_mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) 5885{ 5886 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 5887 (__v4df)_mm256_unpacklo_pd(__A, __B), 5888 (__v4df)__W); 5889} 5890 5891static __inline__ __m256d __DEFAULT_FN_ATTRS256 5892_mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B) 5893{ 5894 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 5895 (__v4df)_mm256_unpacklo_pd(__A, __B), 5896 (__v4df)_mm256_setzero_pd()); 5897} 5898 5899static __inline__ __m128 __DEFAULT_FN_ATTRS128 5900_mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) 5901{ 5902 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 5903 (__v4sf)_mm_unpacklo_ps(__A, __B), 5904 (__v4sf)__W); 5905} 5906 5907static __inline__ __m128 __DEFAULT_FN_ATTRS128 5908_mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B) 5909{ 5910 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 5911 (__v4sf)_mm_unpacklo_ps(__A, __B), 5912 (__v4sf)_mm_setzero_ps()); 5913} 5914 5915static __inline__ __m256 __DEFAULT_FN_ATTRS256 5916_mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) 5917{ 5918 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 5919 (__v8sf)_mm256_unpacklo_ps(__A, __B), 5920 (__v8sf)__W); 5921} 5922 5923static __inline__ __m256 __DEFAULT_FN_ATTRS256 5924_mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B) 5925{ 5926 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 5927 (__v8sf)_mm256_unpacklo_ps(__A, __B), 5928 (__v8sf)_mm256_setzero_ps()); 5929} 5930 5931static __inline__ __m128d __DEFAULT_FN_ATTRS128 5932_mm_rcp14_pd (__m128d __A) 5933{ 5934 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, 5935 (__v2df) 5936 _mm_setzero_pd (), 5937 (__mmask8) -1); 5938} 5939 5940static __inline__ __m128d __DEFAULT_FN_ATTRS128 5941_mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A) 5942{ 5943 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, 5944 (__v2df) __W, 5945 (__mmask8) __U); 5946} 5947 5948static __inline__ __m128d __DEFAULT_FN_ATTRS128 5949_mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A) 5950{ 5951 return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, 5952 (__v2df) 5953 _mm_setzero_pd (), 5954 (__mmask8) __U); 5955} 5956 5957static __inline__ __m256d __DEFAULT_FN_ATTRS256 5958_mm256_rcp14_pd (__m256d __A) 5959{ 5960 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, 5961 (__v4df) 5962 _mm256_setzero_pd (), 5963 (__mmask8) -1); 5964} 5965 5966static __inline__ __m256d __DEFAULT_FN_ATTRS256 5967_mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A) 5968{ 5969 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, 5970 (__v4df) __W, 5971 (__mmask8) __U); 5972} 5973 5974static __inline__ __m256d __DEFAULT_FN_ATTRS256 5975_mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A) 5976{ 5977 return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, 5978 (__v4df) 5979 _mm256_setzero_pd (), 5980 (__mmask8) __U); 5981} 5982 5983static __inline__ __m128 __DEFAULT_FN_ATTRS128 5984_mm_rcp14_ps (__m128 __A) 5985{ 5986 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, 5987 (__v4sf) 5988 _mm_setzero_ps (), 5989 (__mmask8) -1); 5990} 5991 5992static __inline__ __m128 __DEFAULT_FN_ATTRS128 5993_mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A) 5994{ 5995 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, 5996 (__v4sf) __W, 5997 (__mmask8) __U); 5998} 5999 6000static __inline__ __m128 __DEFAULT_FN_ATTRS128 6001_mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A) 6002{ 6003 return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, 6004 (__v4sf) 6005 _mm_setzero_ps (), 6006 (__mmask8) __U); 6007} 6008 6009static __inline__ __m256 __DEFAULT_FN_ATTRS256 6010_mm256_rcp14_ps (__m256 __A) 6011{ 6012 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, 6013 (__v8sf) 6014 _mm256_setzero_ps (), 6015 (__mmask8) -1); 6016} 6017 6018static __inline__ __m256 __DEFAULT_FN_ATTRS256 6019_mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A) 6020{ 6021 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, 6022 (__v8sf) __W, 6023 (__mmask8) __U); 6024} 6025 6026static __inline__ __m256 __DEFAULT_FN_ATTRS256 6027_mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A) 6028{ 6029 return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, 6030 (__v8sf) 6031 _mm256_setzero_ps (), 6032 (__mmask8) __U); 6033} 6034 6035#define _mm_mask_permute_pd(W, U, X, C) \ 6036 ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 6037 (__v2df)_mm_permute_pd((X), (C)), \ 6038 (__v2df)(__m128d)(W))) 6039 6040#define _mm_maskz_permute_pd(U, X, C) \ 6041 ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 6042 (__v2df)_mm_permute_pd((X), (C)), \ 6043 (__v2df)_mm_setzero_pd())) 6044 6045#define _mm256_mask_permute_pd(W, U, X, C) \ 6046 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6047 (__v4df)_mm256_permute_pd((X), (C)), \ 6048 (__v4df)(__m256d)(W))) 6049 6050#define _mm256_maskz_permute_pd(U, X, C) \ 6051 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6052 (__v4df)_mm256_permute_pd((X), (C)), \ 6053 (__v4df)_mm256_setzero_pd())) 6054 6055#define _mm_mask_permute_ps(W, U, X, C) \ 6056 ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 6057 (__v4sf)_mm_permute_ps((X), (C)), \ 6058 (__v4sf)(__m128)(W))) 6059 6060#define _mm_maskz_permute_ps(U, X, C) \ 6061 ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 6062 (__v4sf)_mm_permute_ps((X), (C)), \ 6063 (__v4sf)_mm_setzero_ps())) 6064 6065#define _mm256_mask_permute_ps(W, U, X, C) \ 6066 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6067 (__v8sf)_mm256_permute_ps((X), (C)), \ 6068 (__v8sf)(__m256)(W))) 6069 6070#define _mm256_maskz_permute_ps(U, X, C) \ 6071 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6072 (__v8sf)_mm256_permute_ps((X), (C)), \ 6073 (__v8sf)_mm256_setzero_ps())) 6074 6075static __inline__ __m128d __DEFAULT_FN_ATTRS128 6076_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C) 6077{ 6078 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 6079 (__v2df)_mm_permutevar_pd(__A, __C), 6080 (__v2df)__W); 6081} 6082 6083static __inline__ __m128d __DEFAULT_FN_ATTRS128 6084_mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C) 6085{ 6086 return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, 6087 (__v2df)_mm_permutevar_pd(__A, __C), 6088 (__v2df)_mm_setzero_pd()); 6089} 6090 6091static __inline__ __m256d __DEFAULT_FN_ATTRS256 6092_mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C) 6093{ 6094 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 6095 (__v4df)_mm256_permutevar_pd(__A, __C), 6096 (__v4df)__W); 6097} 6098 6099static __inline__ __m256d __DEFAULT_FN_ATTRS256 6100_mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C) 6101{ 6102 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 6103 (__v4df)_mm256_permutevar_pd(__A, __C), 6104 (__v4df)_mm256_setzero_pd()); 6105} 6106 6107static __inline__ __m128 __DEFAULT_FN_ATTRS128 6108_mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C) 6109{ 6110 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 6111 (__v4sf)_mm_permutevar_ps(__A, __C), 6112 (__v4sf)__W); 6113} 6114 6115static __inline__ __m128 __DEFAULT_FN_ATTRS128 6116_mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C) 6117{ 6118 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 6119 (__v4sf)_mm_permutevar_ps(__A, __C), 6120 (__v4sf)_mm_setzero_ps()); 6121} 6122 6123static __inline__ __m256 __DEFAULT_FN_ATTRS256 6124_mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C) 6125{ 6126 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 6127 (__v8sf)_mm256_permutevar_ps(__A, __C), 6128 (__v8sf)__W); 6129} 6130 6131static __inline__ __m256 __DEFAULT_FN_ATTRS256 6132_mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C) 6133{ 6134 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 6135 (__v8sf)_mm256_permutevar_ps(__A, __C), 6136 (__v8sf)_mm256_setzero_ps()); 6137} 6138 6139static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 6140_mm_test_epi32_mask (__m128i __A, __m128i __B) 6141{ 6142 return _mm_cmpneq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); 6143} 6144 6145static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 6146_mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B) 6147{ 6148 return _mm_mask_cmpneq_epi32_mask (__U, _mm_and_si128 (__A, __B), 6149 _mm_setzero_si128()); 6150} 6151 6152static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 6153_mm256_test_epi32_mask (__m256i __A, __m256i __B) 6154{ 6155 return _mm256_cmpneq_epi32_mask (_mm256_and_si256 (__A, __B), 6156 _mm256_setzero_si256()); 6157} 6158 6159static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 6160_mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B) 6161{ 6162 return _mm256_mask_cmpneq_epi32_mask (__U, _mm256_and_si256 (__A, __B), 6163 _mm256_setzero_si256()); 6164} 6165 6166static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 6167_mm_test_epi64_mask (__m128i __A, __m128i __B) 6168{ 6169 return _mm_cmpneq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); 6170} 6171 6172static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 6173_mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B) 6174{ 6175 return _mm_mask_cmpneq_epi64_mask (__U, _mm_and_si128 (__A, __B), 6176 _mm_setzero_si128()); 6177} 6178 6179static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 6180_mm256_test_epi64_mask (__m256i __A, __m256i __B) 6181{ 6182 return _mm256_cmpneq_epi64_mask (_mm256_and_si256 (__A, __B), 6183 _mm256_setzero_si256()); 6184} 6185 6186static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 6187_mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B) 6188{ 6189 return _mm256_mask_cmpneq_epi64_mask (__U, _mm256_and_si256 (__A, __B), 6190 _mm256_setzero_si256()); 6191} 6192 6193static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 6194_mm_testn_epi32_mask (__m128i __A, __m128i __B) 6195{ 6196 return _mm_cmpeq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); 6197} 6198 6199static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 6200_mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B) 6201{ 6202 return _mm_mask_cmpeq_epi32_mask (__U, _mm_and_si128 (__A, __B), 6203 _mm_setzero_si128()); 6204} 6205 6206static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 6207_mm256_testn_epi32_mask (__m256i __A, __m256i __B) 6208{ 6209 return _mm256_cmpeq_epi32_mask (_mm256_and_si256 (__A, __B), 6210 _mm256_setzero_si256()); 6211} 6212 6213static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 6214_mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B) 6215{ 6216 return _mm256_mask_cmpeq_epi32_mask (__U, _mm256_and_si256 (__A, __B), 6217 _mm256_setzero_si256()); 6218} 6219 6220static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 6221_mm_testn_epi64_mask (__m128i __A, __m128i __B) 6222{ 6223 return _mm_cmpeq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); 6224} 6225 6226static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 6227_mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B) 6228{ 6229 return _mm_mask_cmpeq_epi64_mask (__U, _mm_and_si128 (__A, __B), 6230 _mm_setzero_si128()); 6231} 6232 6233static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 6234_mm256_testn_epi64_mask (__m256i __A, __m256i __B) 6235{ 6236 return _mm256_cmpeq_epi64_mask (_mm256_and_si256 (__A, __B), 6237 _mm256_setzero_si256()); 6238} 6239 6240static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 6241_mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B) 6242{ 6243 return _mm256_mask_cmpeq_epi64_mask (__U, _mm256_and_si256 (__A, __B), 6244 _mm256_setzero_si256()); 6245} 6246 6247static __inline__ __m128i __DEFAULT_FN_ATTRS128 6248_mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6249{ 6250 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6251 (__v4si)_mm_unpackhi_epi32(__A, __B), 6252 (__v4si)__W); 6253} 6254 6255static __inline__ __m128i __DEFAULT_FN_ATTRS128 6256_mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B) 6257{ 6258 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6259 (__v4si)_mm_unpackhi_epi32(__A, __B), 6260 (__v4si)_mm_setzero_si128()); 6261} 6262 6263static __inline__ __m256i __DEFAULT_FN_ATTRS256 6264_mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 6265{ 6266 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6267 (__v8si)_mm256_unpackhi_epi32(__A, __B), 6268 (__v8si)__W); 6269} 6270 6271static __inline__ __m256i __DEFAULT_FN_ATTRS256 6272_mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B) 6273{ 6274 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6275 (__v8si)_mm256_unpackhi_epi32(__A, __B), 6276 (__v8si)_mm256_setzero_si256()); 6277} 6278 6279static __inline__ __m128i __DEFAULT_FN_ATTRS128 6280_mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6281{ 6282 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 6283 (__v2di)_mm_unpackhi_epi64(__A, __B), 6284 (__v2di)__W); 6285} 6286 6287static __inline__ __m128i __DEFAULT_FN_ATTRS128 6288_mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B) 6289{ 6290 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 6291 (__v2di)_mm_unpackhi_epi64(__A, __B), 6292 (__v2di)_mm_setzero_si128()); 6293} 6294 6295static __inline__ __m256i __DEFAULT_FN_ATTRS256 6296_mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 6297{ 6298 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 6299 (__v4di)_mm256_unpackhi_epi64(__A, __B), 6300 (__v4di)__W); 6301} 6302 6303static __inline__ __m256i __DEFAULT_FN_ATTRS256 6304_mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B) 6305{ 6306 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 6307 (__v4di)_mm256_unpackhi_epi64(__A, __B), 6308 (__v4di)_mm256_setzero_si256()); 6309} 6310 6311static __inline__ __m128i __DEFAULT_FN_ATTRS128 6312_mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6313{ 6314 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6315 (__v4si)_mm_unpacklo_epi32(__A, __B), 6316 (__v4si)__W); 6317} 6318 6319static __inline__ __m128i __DEFAULT_FN_ATTRS128 6320_mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B) 6321{ 6322 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6323 (__v4si)_mm_unpacklo_epi32(__A, __B), 6324 (__v4si)_mm_setzero_si128()); 6325} 6326 6327static __inline__ __m256i __DEFAULT_FN_ATTRS256 6328_mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 6329{ 6330 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6331 (__v8si)_mm256_unpacklo_epi32(__A, __B), 6332 (__v8si)__W); 6333} 6334 6335static __inline__ __m256i __DEFAULT_FN_ATTRS256 6336_mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B) 6337{ 6338 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6339 (__v8si)_mm256_unpacklo_epi32(__A, __B), 6340 (__v8si)_mm256_setzero_si256()); 6341} 6342 6343static __inline__ __m128i __DEFAULT_FN_ATTRS128 6344_mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6345{ 6346 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 6347 (__v2di)_mm_unpacklo_epi64(__A, __B), 6348 (__v2di)__W); 6349} 6350 6351static __inline__ __m128i __DEFAULT_FN_ATTRS128 6352_mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B) 6353{ 6354 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, 6355 (__v2di)_mm_unpacklo_epi64(__A, __B), 6356 (__v2di)_mm_setzero_si128()); 6357} 6358 6359static __inline__ __m256i __DEFAULT_FN_ATTRS256 6360_mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) 6361{ 6362 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 6363 (__v4di)_mm256_unpacklo_epi64(__A, __B), 6364 (__v4di)__W); 6365} 6366 6367static __inline__ __m256i __DEFAULT_FN_ATTRS256 6368_mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B) 6369{ 6370 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, 6371 (__v4di)_mm256_unpacklo_epi64(__A, __B), 6372 (__v4di)_mm256_setzero_si256()); 6373} 6374 6375static __inline__ __m128i __DEFAULT_FN_ATTRS128 6376_mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6377{ 6378 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6379 (__v4si)_mm_sra_epi32(__A, __B), 6380 (__v4si)__W); 6381} 6382 6383static __inline__ __m128i __DEFAULT_FN_ATTRS128 6384_mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B) 6385{ 6386 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6387 (__v4si)_mm_sra_epi32(__A, __B), 6388 (__v4si)_mm_setzero_si128()); 6389} 6390 6391static __inline__ __m256i __DEFAULT_FN_ATTRS256 6392_mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 6393{ 6394 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6395 (__v8si)_mm256_sra_epi32(__A, __B), 6396 (__v8si)__W); 6397} 6398 6399static __inline__ __m256i __DEFAULT_FN_ATTRS256 6400_mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B) 6401{ 6402 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6403 (__v8si)_mm256_sra_epi32(__A, __B), 6404 (__v8si)_mm256_setzero_si256()); 6405} 6406 6407static __inline__ __m128i __DEFAULT_FN_ATTRS128 6408_mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B) 6409{ 6410 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6411 (__v4si)_mm_srai_epi32(__A, (int)__B), 6412 (__v4si)__W); 6413} 6414 6415static __inline__ __m128i __DEFAULT_FN_ATTRS128 6416_mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, unsigned int __B) 6417{ 6418 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, 6419 (__v4si)_mm_srai_epi32(__A, (int)__B), 6420 (__v4si)_mm_setzero_si128()); 6421} 6422 6423static __inline__ __m256i __DEFAULT_FN_ATTRS256 6424_mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B) 6425{ 6426 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6427 (__v8si)_mm256_srai_epi32(__A, (int)__B), 6428 (__v8si)__W); 6429} 6430 6431static __inline__ __m256i __DEFAULT_FN_ATTRS256 6432_mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, unsigned int __B) 6433{ 6434 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, 6435 (__v8si)_mm256_srai_epi32(__A, (int)__B), 6436 (__v8si)_mm256_setzero_si256()); 6437} 6438 6439static __inline__ __m128i __DEFAULT_FN_ATTRS128 6440_mm_sra_epi64(__m128i __A, __m128i __B) 6441{ 6442 return (__m128i)__builtin_ia32_psraq128((__v2di)__A, (__v2di)__B); 6443} 6444 6445static __inline__ __m128i __DEFAULT_FN_ATTRS128 6446_mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) 6447{ 6448 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ 6449 (__v2di)_mm_sra_epi64(__A, __B), \ 6450 (__v2di)__W); 6451} 6452 6453static __inline__ __m128i __DEFAULT_FN_ATTRS128 6454_mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B) 6455{ 6456 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ 6457 (__v2di)_mm_sra_epi64(__A, __B), \ 6458 (__v2di)_mm_setzero_si128()); 6459} 6460 6461static __inline__ __m256i __DEFAULT_FN_ATTRS256 6462_mm256_sra_epi64(__m256i __A, __m128i __B) 6463{ 6464 return (__m256i)__builtin_ia32_psraq256((__v4di) __A, (__v2di) __B); 6465} 6466 6467static __inline__ __m256i __DEFAULT_FN_ATTRS256 6468_mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) 6469{ 6470 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ 6471 (__v4di)_mm256_sra_epi64(__A, __B), \ 6472 (__v4di)__W); 6473} 6474 6475static __inline__ __m256i __DEFAULT_FN_ATTRS256 6476_mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B) 6477{ 6478 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ 6479 (__v4di)_mm256_sra_epi64(__A, __B), \ 6480 (__v4di)_mm256_setzero_si256()); 6481} 6482 6483static __inline__ __m128i __DEFAULT_FN_ATTRS128 6484_mm_srai_epi64(__m128i __A, unsigned int __imm) 6485{ 6486 return (__m128i)__builtin_ia32_psraqi128((__v2di)__A, (int)__imm); 6487} 6488 6489static __inline__ __m128i __DEFAULT_FN_ATTRS128 6490_mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __imm) 6491{ 6492 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ 6493 (__v2di)_mm_srai_epi64(__A, __imm), \ 6494 (__v2di)__W); 6495} 6496 6497static __inline__ __m128i __DEFAULT_FN_ATTRS128 6498_mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, unsigned int __imm) 6499{ 6500 return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ 6501 (__v2di)_mm_srai_epi64(__A, __imm), \ 6502 (__v2di)_mm_setzero_si128()); 6503} 6504 6505static __inline__ __m256i __DEFAULT_FN_ATTRS256 6506_mm256_srai_epi64(__m256i __A, unsigned int __imm) 6507{ 6508 return (__m256i)__builtin_ia32_psraqi256((__v4di)__A, (int)__imm); 6509} 6510 6511static __inline__ __m256i __DEFAULT_FN_ATTRS256 6512_mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A, 6513 unsigned int __imm) 6514{ 6515 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ 6516 (__v4di)_mm256_srai_epi64(__A, __imm), \ 6517 (__v4di)__W); 6518} 6519 6520static __inline__ __m256i __DEFAULT_FN_ATTRS256 6521_mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, unsigned int __imm) 6522{ 6523 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ 6524 (__v4di)_mm256_srai_epi64(__A, __imm), \ 6525 (__v4di)_mm256_setzero_si256()); 6526} 6527 6528#define _mm_ternarylogic_epi32(A, B, C, imm) \ 6529 ((__m128i)__builtin_ia32_pternlogd128_mask( \ 6530 (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), \ 6531 (unsigned char)(imm), (__mmask8)-1)) 6532 6533#define _mm_mask_ternarylogic_epi32(A, U, B, C, imm) \ 6534 ((__m128i)__builtin_ia32_pternlogd128_mask( \ 6535 (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), \ 6536 (unsigned char)(imm), (__mmask8)(U))) 6537 6538#define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm) \ 6539 ((__m128i)__builtin_ia32_pternlogd128_maskz( \ 6540 (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C), \ 6541 (unsigned char)(imm), (__mmask8)(U))) 6542 6543#define _mm256_ternarylogic_epi32(A, B, C, imm) \ 6544 ((__m256i)__builtin_ia32_pternlogd256_mask( \ 6545 (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), \ 6546 (unsigned char)(imm), (__mmask8)-1)) 6547 6548#define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm) \ 6549 ((__m256i)__builtin_ia32_pternlogd256_mask( \ 6550 (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), \ 6551 (unsigned char)(imm), (__mmask8)(U))) 6552 6553#define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm) \ 6554 ((__m256i)__builtin_ia32_pternlogd256_maskz( \ 6555 (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C), \ 6556 (unsigned char)(imm), (__mmask8)(U))) 6557 6558#define _mm_ternarylogic_epi64(A, B, C, imm) \ 6559 ((__m128i)__builtin_ia32_pternlogq128_mask( \ 6560 (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), \ 6561 (unsigned char)(imm), (__mmask8)-1)) 6562 6563#define _mm_mask_ternarylogic_epi64(A, U, B, C, imm) \ 6564 ((__m128i)__builtin_ia32_pternlogq128_mask( \ 6565 (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), \ 6566 (unsigned char)(imm), (__mmask8)(U))) 6567 6568#define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm) \ 6569 ((__m128i)__builtin_ia32_pternlogq128_maskz( \ 6570 (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C), \ 6571 (unsigned char)(imm), (__mmask8)(U))) 6572 6573#define _mm256_ternarylogic_epi64(A, B, C, imm) \ 6574 ((__m256i)__builtin_ia32_pternlogq256_mask( \ 6575 (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), \ 6576 (unsigned char)(imm), (__mmask8)-1)) 6577 6578#define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm) \ 6579 ((__m256i)__builtin_ia32_pternlogq256_mask( \ 6580 (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), \ 6581 (unsigned char)(imm), (__mmask8)(U))) 6582 6583#define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm) \ 6584 ((__m256i)__builtin_ia32_pternlogq256_maskz( \ 6585 (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C), \ 6586 (unsigned char)(imm), (__mmask8)(U))) 6587 6588#define _mm256_shuffle_f32x4(A, B, imm) \ 6589 ((__m256)__builtin_ia32_shuf_f32x4_256((__v8sf)(__m256)(A), \ 6590 (__v8sf)(__m256)(B), (int)(imm))) 6591 6592#define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) \ 6593 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6594 (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \ 6595 (__v8sf)(__m256)(W))) 6596 6597#define _mm256_maskz_shuffle_f32x4(U, A, B, imm) \ 6598 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6599 (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \ 6600 (__v8sf)_mm256_setzero_ps())) 6601 6602#define _mm256_shuffle_f64x2(A, B, imm) \ 6603 ((__m256d)__builtin_ia32_shuf_f64x2_256((__v4df)(__m256d)(A), \ 6604 (__v4df)(__m256d)(B), (int)(imm))) 6605 6606#define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) \ 6607 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6608 (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \ 6609 (__v4df)(__m256d)(W))) 6610 6611#define _mm256_maskz_shuffle_f64x2(U, A, B, imm) \ 6612 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6613 (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \ 6614 (__v4df)_mm256_setzero_pd())) 6615 6616#define _mm256_shuffle_i32x4(A, B, imm) \ 6617 ((__m256i)__builtin_ia32_shuf_i32x4_256((__v8si)(__m256i)(A), \ 6618 (__v8si)(__m256i)(B), (int)(imm))) 6619 6620#define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) \ 6621 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 6622 (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \ 6623 (__v8si)(__m256i)(W))) 6624 6625#define _mm256_maskz_shuffle_i32x4(U, A, B, imm) \ 6626 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 6627 (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \ 6628 (__v8si)_mm256_setzero_si256())) 6629 6630#define _mm256_shuffle_i64x2(A, B, imm) \ 6631 ((__m256i)__builtin_ia32_shuf_i64x2_256((__v4di)(__m256i)(A), \ 6632 (__v4di)(__m256i)(B), (int)(imm))) 6633 6634#define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) \ 6635 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 6636 (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \ 6637 (__v4di)(__m256i)(W))) 6638 6639 6640#define _mm256_maskz_shuffle_i64x2(U, A, B, imm) \ 6641 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 6642 (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \ 6643 (__v4di)_mm256_setzero_si256())) 6644 6645#define _mm_mask_shuffle_pd(W, U, A, B, M) \ 6646 ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 6647 (__v2df)_mm_shuffle_pd((A), (B), (M)), \ 6648 (__v2df)(__m128d)(W))) 6649 6650#define _mm_maskz_shuffle_pd(U, A, B, M) \ 6651 ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \ 6652 (__v2df)_mm_shuffle_pd((A), (B), (M)), \ 6653 (__v2df)_mm_setzero_pd())) 6654 6655#define _mm256_mask_shuffle_pd(W, U, A, B, M) \ 6656 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6657 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \ 6658 (__v4df)(__m256d)(W))) 6659 6660#define _mm256_maskz_shuffle_pd(U, A, B, M) \ 6661 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 6662 (__v4df)_mm256_shuffle_pd((A), (B), (M)), \ 6663 (__v4df)_mm256_setzero_pd())) 6664 6665#define _mm_mask_shuffle_ps(W, U, A, B, M) \ 6666 ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 6667 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \ 6668 (__v4sf)(__m128)(W))) 6669 6670#define _mm_maskz_shuffle_ps(U, A, B, M) \ 6671 ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \ 6672 (__v4sf)_mm_shuffle_ps((A), (B), (M)), \ 6673 (__v4sf)_mm_setzero_ps())) 6674 6675#define _mm256_mask_shuffle_ps(W, U, A, B, M) \ 6676 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6677 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \ 6678 (__v8sf)(__m256)(W))) 6679 6680#define _mm256_maskz_shuffle_ps(U, A, B, M) \ 6681 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 6682 (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \ 6683 (__v8sf)_mm256_setzero_ps())) 6684 6685static __inline__ __m128d __DEFAULT_FN_ATTRS128 6686_mm_rsqrt14_pd (__m128d __A) 6687{ 6688 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, 6689 (__v2df) 6690 _mm_setzero_pd (), 6691 (__mmask8) -1); 6692} 6693 6694static __inline__ __m128d __DEFAULT_FN_ATTRS128 6695_mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A) 6696{ 6697 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, 6698 (__v2df) __W, 6699 (__mmask8) __U); 6700} 6701 6702static __inline__ __m128d __DEFAULT_FN_ATTRS128 6703_mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A) 6704{ 6705 return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, 6706 (__v2df) 6707 _mm_setzero_pd (), 6708 (__mmask8) __U); 6709} 6710 6711static __inline__ __m256d __DEFAULT_FN_ATTRS256 6712_mm256_rsqrt14_pd (__m256d __A) 6713{ 6714 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, 6715 (__v4df) 6716 _mm256_setzero_pd (), 6717 (__mmask8) -1); 6718} 6719 6720static __inline__ __m256d __DEFAULT_FN_ATTRS256 6721_mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A) 6722{ 6723 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, 6724 (__v4df) __W, 6725 (__mmask8) __U); 6726} 6727 6728static __inline__ __m256d __DEFAULT_FN_ATTRS256 6729_mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A) 6730{ 6731 return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, 6732 (__v4df) 6733 _mm256_setzero_pd (), 6734 (__mmask8) __U); 6735} 6736 6737static __inline__ __m128 __DEFAULT_FN_ATTRS128 6738_mm_rsqrt14_ps (__m128 __A) 6739{ 6740 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, 6741 (__v4sf) 6742 _mm_setzero_ps (), 6743 (__mmask8) -1); 6744} 6745 6746static __inline__ __m128 __DEFAULT_FN_ATTRS128 6747_mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A) 6748{ 6749 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, 6750 (__v4sf) __W, 6751 (__mmask8) __U); 6752} 6753 6754static __inline__ __m128 __DEFAULT_FN_ATTRS128 6755_mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A) 6756{ 6757 return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, 6758 (__v4sf) 6759 _mm_setzero_ps (), 6760 (__mmask8) __U); 6761} 6762 6763static __inline__ __m256 __DEFAULT_FN_ATTRS256 6764_mm256_rsqrt14_ps (__m256 __A) 6765{ 6766 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, 6767 (__v8sf) 6768 _mm256_setzero_ps (), 6769 (__mmask8) -1); 6770} 6771 6772static __inline__ __m256 __DEFAULT_FN_ATTRS256 6773_mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A) 6774{ 6775 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, 6776 (__v8sf) __W, 6777 (__mmask8) __U); 6778} 6779 6780static __inline__ __m256 __DEFAULT_FN_ATTRS256 6781_mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A) 6782{ 6783 return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, 6784 (__v8sf) 6785 _mm256_setzero_ps (), 6786 (__mmask8) __U); 6787} 6788 6789static __inline__ __m256 __DEFAULT_FN_ATTRS256 6790_mm256_broadcast_f32x4(__m128 __A) 6791{ 6792 return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, 6793 0, 1, 2, 3, 0, 1, 2, 3); 6794} 6795 6796static __inline__ __m256 __DEFAULT_FN_ATTRS256 6797_mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A) 6798{ 6799 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, 6800 (__v8sf)_mm256_broadcast_f32x4(__A), 6801 (__v8sf)__O); 6802} 6803 6804static __inline__ __m256 __DEFAULT_FN_ATTRS256 6805_mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A) 6806{ 6807 return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, 6808 (__v8sf)_mm256_broadcast_f32x4(__A), 6809 (__v8sf)_mm256_setzero_ps()); 6810} 6811 6812static __inline__ __m256i __DEFAULT_FN_ATTRS256 6813_mm256_broadcast_i32x4(__m128i __A) 6814{ 6815 return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 6816 0, 1, 2, 3, 0, 1, 2, 3); 6817} 6818 6819static __inline__ __m256i __DEFAULT_FN_ATTRS256 6820_mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A) 6821{ 6822 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 6823 (__v8si)_mm256_broadcast_i32x4(__A), 6824 (__v8si)__O); 6825} 6826 6827static __inline__ __m256i __DEFAULT_FN_ATTRS256 6828_mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A) 6829{ 6830 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 6831 (__v8si)_mm256_broadcast_i32x4(__A), 6832 (__v8si)_mm256_setzero_si256()); 6833} 6834 6835static __inline__ __m256d __DEFAULT_FN_ATTRS256 6836_mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A) 6837{ 6838 return (__m256d)__builtin_ia32_selectpd_256(__M, 6839 (__v4df) _mm256_broadcastsd_pd(__A), 6840 (__v4df) __O); 6841} 6842 6843static __inline__ __m256d __DEFAULT_FN_ATTRS256 6844_mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) 6845{ 6846 return (__m256d)__builtin_ia32_selectpd_256(__M, 6847 (__v4df) _mm256_broadcastsd_pd(__A), 6848 (__v4df) _mm256_setzero_pd()); 6849} 6850 6851static __inline__ __m128 __DEFAULT_FN_ATTRS128 6852_mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A) 6853{ 6854 return (__m128)__builtin_ia32_selectps_128(__M, 6855 (__v4sf) _mm_broadcastss_ps(__A), 6856 (__v4sf) __O); 6857} 6858 6859static __inline__ __m128 __DEFAULT_FN_ATTRS128 6860_mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) 6861{ 6862 return (__m128)__builtin_ia32_selectps_128(__M, 6863 (__v4sf) _mm_broadcastss_ps(__A), 6864 (__v4sf) _mm_setzero_ps()); 6865} 6866 6867static __inline__ __m256 __DEFAULT_FN_ATTRS256 6868_mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A) 6869{ 6870 return (__m256)__builtin_ia32_selectps_256(__M, 6871 (__v8sf) _mm256_broadcastss_ps(__A), 6872 (__v8sf) __O); 6873} 6874 6875static __inline__ __m256 __DEFAULT_FN_ATTRS256 6876_mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) 6877{ 6878 return (__m256)__builtin_ia32_selectps_256(__M, 6879 (__v8sf) _mm256_broadcastss_ps(__A), 6880 (__v8sf) _mm256_setzero_ps()); 6881} 6882 6883static __inline__ __m128i __DEFAULT_FN_ATTRS128 6884_mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 6885{ 6886 return (__m128i)__builtin_ia32_selectd_128(__M, 6887 (__v4si) _mm_broadcastd_epi32(__A), 6888 (__v4si) __O); 6889} 6890 6891static __inline__ __m128i __DEFAULT_FN_ATTRS128 6892_mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) 6893{ 6894 return (__m128i)__builtin_ia32_selectd_128(__M, 6895 (__v4si) _mm_broadcastd_epi32(__A), 6896 (__v4si) _mm_setzero_si128()); 6897} 6898 6899static __inline__ __m256i __DEFAULT_FN_ATTRS256 6900_mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A) 6901{ 6902 return (__m256i)__builtin_ia32_selectd_256(__M, 6903 (__v8si) _mm256_broadcastd_epi32(__A), 6904 (__v8si) __O); 6905} 6906 6907static __inline__ __m256i __DEFAULT_FN_ATTRS256 6908_mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) 6909{ 6910 return (__m256i)__builtin_ia32_selectd_256(__M, 6911 (__v8si) _mm256_broadcastd_epi32(__A), 6912 (__v8si) _mm256_setzero_si256()); 6913} 6914 6915static __inline__ __m128i __DEFAULT_FN_ATTRS128 6916_mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A) 6917{ 6918 return (__m128i)__builtin_ia32_selectq_128(__M, 6919 (__v2di) _mm_broadcastq_epi64(__A), 6920 (__v2di) __O); 6921} 6922 6923static __inline__ __m128i __DEFAULT_FN_ATTRS128 6924_mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) 6925{ 6926 return (__m128i)__builtin_ia32_selectq_128(__M, 6927 (__v2di) _mm_broadcastq_epi64(__A), 6928 (__v2di) _mm_setzero_si128()); 6929} 6930 6931static __inline__ __m256i __DEFAULT_FN_ATTRS256 6932_mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A) 6933{ 6934 return (__m256i)__builtin_ia32_selectq_256(__M, 6935 (__v4di) _mm256_broadcastq_epi64(__A), 6936 (__v4di) __O); 6937} 6938 6939static __inline__ __m256i __DEFAULT_FN_ATTRS256 6940_mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) 6941{ 6942 return (__m256i)__builtin_ia32_selectq_256(__M, 6943 (__v4di) _mm256_broadcastq_epi64(__A), 6944 (__v4di) _mm256_setzero_si256()); 6945} 6946 6947static __inline__ __m128i __DEFAULT_FN_ATTRS128 6948_mm_cvtsepi32_epi8 (__m128i __A) 6949{ 6950 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, 6951 (__v16qi)_mm_undefined_si128(), 6952 (__mmask8) -1); 6953} 6954 6955static __inline__ __m128i __DEFAULT_FN_ATTRS128 6956_mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 6957{ 6958 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, 6959 (__v16qi) __O, __M); 6960} 6961 6962static __inline__ __m128i __DEFAULT_FN_ATTRS128 6963_mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A) 6964{ 6965 return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, 6966 (__v16qi) _mm_setzero_si128 (), 6967 __M); 6968} 6969 6970static __inline__ void __DEFAULT_FN_ATTRS128 6971_mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 6972{ 6973 __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); 6974} 6975 6976static __inline__ __m128i __DEFAULT_FN_ATTRS256 6977_mm256_cvtsepi32_epi8 (__m256i __A) 6978{ 6979 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, 6980 (__v16qi)_mm_undefined_si128(), 6981 (__mmask8) -1); 6982} 6983 6984static __inline__ __m128i __DEFAULT_FN_ATTRS256 6985_mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 6986{ 6987 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, 6988 (__v16qi) __O, __M); 6989} 6990 6991static __inline__ __m128i __DEFAULT_FN_ATTRS256 6992_mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A) 6993{ 6994 return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, 6995 (__v16qi) _mm_setzero_si128 (), 6996 __M); 6997} 6998 6999static __inline__ void __DEFAULT_FN_ATTRS256 7000_mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7001{ 7002 __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M); 7003} 7004 7005static __inline__ __m128i __DEFAULT_FN_ATTRS128 7006_mm_cvtsepi32_epi16 (__m128i __A) 7007{ 7008 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, 7009 (__v8hi)_mm_setzero_si128 (), 7010 (__mmask8) -1); 7011} 7012 7013static __inline__ __m128i __DEFAULT_FN_ATTRS128 7014_mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7015{ 7016 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, 7017 (__v8hi)__O, 7018 __M); 7019} 7020 7021static __inline__ __m128i __DEFAULT_FN_ATTRS128 7022_mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A) 7023{ 7024 return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, 7025 (__v8hi) _mm_setzero_si128 (), 7026 __M); 7027} 7028 7029static __inline__ void __DEFAULT_FN_ATTRS128 7030_mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7031{ 7032 __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); 7033} 7034 7035static __inline__ __m128i __DEFAULT_FN_ATTRS256 7036_mm256_cvtsepi32_epi16 (__m256i __A) 7037{ 7038 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, 7039 (__v8hi)_mm_undefined_si128(), 7040 (__mmask8) -1); 7041} 7042 7043static __inline__ __m128i __DEFAULT_FN_ATTRS256 7044_mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7045{ 7046 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, 7047 (__v8hi) __O, __M); 7048} 7049 7050static __inline__ __m128i __DEFAULT_FN_ATTRS256 7051_mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A) 7052{ 7053 return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, 7054 (__v8hi) _mm_setzero_si128 (), 7055 __M); 7056} 7057 7058static __inline__ void __DEFAULT_FN_ATTRS256 7059_mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7060{ 7061 __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); 7062} 7063 7064static __inline__ __m128i __DEFAULT_FN_ATTRS128 7065_mm_cvtsepi64_epi8 (__m128i __A) 7066{ 7067 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, 7068 (__v16qi)_mm_undefined_si128(), 7069 (__mmask8) -1); 7070} 7071 7072static __inline__ __m128i __DEFAULT_FN_ATTRS128 7073_mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7074{ 7075 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, 7076 (__v16qi) __O, __M); 7077} 7078 7079static __inline__ __m128i __DEFAULT_FN_ATTRS128 7080_mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A) 7081{ 7082 return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, 7083 (__v16qi) _mm_setzero_si128 (), 7084 __M); 7085} 7086 7087static __inline__ void __DEFAULT_FN_ATTRS128 7088_mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7089{ 7090 __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); 7091} 7092 7093static __inline__ __m128i __DEFAULT_FN_ATTRS256 7094_mm256_cvtsepi64_epi8 (__m256i __A) 7095{ 7096 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, 7097 (__v16qi)_mm_undefined_si128(), 7098 (__mmask8) -1); 7099} 7100 7101static __inline__ __m128i __DEFAULT_FN_ATTRS256 7102_mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7103{ 7104 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, 7105 (__v16qi) __O, __M); 7106} 7107 7108static __inline__ __m128i __DEFAULT_FN_ATTRS256 7109_mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A) 7110{ 7111 return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, 7112 (__v16qi) _mm_setzero_si128 (), 7113 __M); 7114} 7115 7116static __inline__ void __DEFAULT_FN_ATTRS256 7117_mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7118{ 7119 __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); 7120} 7121 7122static __inline__ __m128i __DEFAULT_FN_ATTRS128 7123_mm_cvtsepi64_epi32 (__m128i __A) 7124{ 7125 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, 7126 (__v4si)_mm_undefined_si128(), 7127 (__mmask8) -1); 7128} 7129 7130static __inline__ __m128i __DEFAULT_FN_ATTRS128 7131_mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 7132{ 7133 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, 7134 (__v4si) __O, __M); 7135} 7136 7137static __inline__ __m128i __DEFAULT_FN_ATTRS128 7138_mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A) 7139{ 7140 return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, 7141 (__v4si) _mm_setzero_si128 (), 7142 __M); 7143} 7144 7145static __inline__ void __DEFAULT_FN_ATTRS128 7146_mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) 7147{ 7148 __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); 7149} 7150 7151static __inline__ __m128i __DEFAULT_FN_ATTRS256 7152_mm256_cvtsepi64_epi32 (__m256i __A) 7153{ 7154 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, 7155 (__v4si)_mm_undefined_si128(), 7156 (__mmask8) -1); 7157} 7158 7159static __inline__ __m128i __DEFAULT_FN_ATTRS256 7160_mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) 7161{ 7162 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, 7163 (__v4si)__O, 7164 __M); 7165} 7166 7167static __inline__ __m128i __DEFAULT_FN_ATTRS256 7168_mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A) 7169{ 7170 return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, 7171 (__v4si) _mm_setzero_si128 (), 7172 __M); 7173} 7174 7175static __inline__ void __DEFAULT_FN_ATTRS256 7176_mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) 7177{ 7178 __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); 7179} 7180 7181static __inline__ __m128i __DEFAULT_FN_ATTRS128 7182_mm_cvtsepi64_epi16 (__m128i __A) 7183{ 7184 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, 7185 (__v8hi)_mm_undefined_si128(), 7186 (__mmask8) -1); 7187} 7188 7189static __inline__ __m128i __DEFAULT_FN_ATTRS128 7190_mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7191{ 7192 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, 7193 (__v8hi) __O, __M); 7194} 7195 7196static __inline__ __m128i __DEFAULT_FN_ATTRS128 7197_mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A) 7198{ 7199 return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, 7200 (__v8hi) _mm_setzero_si128 (), 7201 __M); 7202} 7203 7204static __inline__ void __DEFAULT_FN_ATTRS128 7205_mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7206{ 7207 __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); 7208} 7209 7210static __inline__ __m128i __DEFAULT_FN_ATTRS256 7211_mm256_cvtsepi64_epi16 (__m256i __A) 7212{ 7213 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, 7214 (__v8hi)_mm_undefined_si128(), 7215 (__mmask8) -1); 7216} 7217 7218static __inline__ __m128i __DEFAULT_FN_ATTRS256 7219_mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7220{ 7221 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, 7222 (__v8hi) __O, __M); 7223} 7224 7225static __inline__ __m128i __DEFAULT_FN_ATTRS256 7226_mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A) 7227{ 7228 return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, 7229 (__v8hi) _mm_setzero_si128 (), 7230 __M); 7231} 7232 7233static __inline__ void __DEFAULT_FN_ATTRS256 7234_mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7235{ 7236 __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); 7237} 7238 7239static __inline__ __m128i __DEFAULT_FN_ATTRS128 7240_mm_cvtusepi32_epi8 (__m128i __A) 7241{ 7242 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, 7243 (__v16qi)_mm_undefined_si128(), 7244 (__mmask8) -1); 7245} 7246 7247static __inline__ __m128i __DEFAULT_FN_ATTRS128 7248_mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7249{ 7250 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, 7251 (__v16qi) __O, 7252 __M); 7253} 7254 7255static __inline__ __m128i __DEFAULT_FN_ATTRS128 7256_mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A) 7257{ 7258 return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, 7259 (__v16qi) _mm_setzero_si128 (), 7260 __M); 7261} 7262 7263static __inline__ void __DEFAULT_FN_ATTRS128 7264_mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7265{ 7266 __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); 7267} 7268 7269static __inline__ __m128i __DEFAULT_FN_ATTRS256 7270_mm256_cvtusepi32_epi8 (__m256i __A) 7271{ 7272 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, 7273 (__v16qi)_mm_undefined_si128(), 7274 (__mmask8) -1); 7275} 7276 7277static __inline__ __m128i __DEFAULT_FN_ATTRS256 7278_mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7279{ 7280 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, 7281 (__v16qi) __O, 7282 __M); 7283} 7284 7285static __inline__ __m128i __DEFAULT_FN_ATTRS256 7286_mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A) 7287{ 7288 return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, 7289 (__v16qi) _mm_setzero_si128 (), 7290 __M); 7291} 7292 7293static __inline__ void __DEFAULT_FN_ATTRS256 7294_mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7295{ 7296 __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M); 7297} 7298 7299static __inline__ __m128i __DEFAULT_FN_ATTRS128 7300_mm_cvtusepi32_epi16 (__m128i __A) 7301{ 7302 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, 7303 (__v8hi)_mm_undefined_si128(), 7304 (__mmask8) -1); 7305} 7306 7307static __inline__ __m128i __DEFAULT_FN_ATTRS128 7308_mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7309{ 7310 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, 7311 (__v8hi) __O, __M); 7312} 7313 7314static __inline__ __m128i __DEFAULT_FN_ATTRS128 7315_mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A) 7316{ 7317 return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, 7318 (__v8hi) _mm_setzero_si128 (), 7319 __M); 7320} 7321 7322static __inline__ void __DEFAULT_FN_ATTRS128 7323_mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7324{ 7325 __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); 7326} 7327 7328static __inline__ __m128i __DEFAULT_FN_ATTRS256 7329_mm256_cvtusepi32_epi16 (__m256i __A) 7330{ 7331 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, 7332 (__v8hi) _mm_undefined_si128(), 7333 (__mmask8) -1); 7334} 7335 7336static __inline__ __m128i __DEFAULT_FN_ATTRS256 7337_mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7338{ 7339 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, 7340 (__v8hi) __O, __M); 7341} 7342 7343static __inline__ __m128i __DEFAULT_FN_ATTRS256 7344_mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A) 7345{ 7346 return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, 7347 (__v8hi) _mm_setzero_si128 (), 7348 __M); 7349} 7350 7351static __inline__ void __DEFAULT_FN_ATTRS256 7352_mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7353{ 7354 __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); 7355} 7356 7357static __inline__ __m128i __DEFAULT_FN_ATTRS128 7358_mm_cvtusepi64_epi8 (__m128i __A) 7359{ 7360 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, 7361 (__v16qi)_mm_undefined_si128(), 7362 (__mmask8) -1); 7363} 7364 7365static __inline__ __m128i __DEFAULT_FN_ATTRS128 7366_mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7367{ 7368 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, 7369 (__v16qi) __O, 7370 __M); 7371} 7372 7373static __inline__ __m128i __DEFAULT_FN_ATTRS128 7374_mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A) 7375{ 7376 return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, 7377 (__v16qi) _mm_setzero_si128 (), 7378 __M); 7379} 7380 7381static __inline__ void __DEFAULT_FN_ATTRS128 7382_mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7383{ 7384 __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); 7385} 7386 7387static __inline__ __m128i __DEFAULT_FN_ATTRS256 7388_mm256_cvtusepi64_epi8 (__m256i __A) 7389{ 7390 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, 7391 (__v16qi)_mm_undefined_si128(), 7392 (__mmask8) -1); 7393} 7394 7395static __inline__ __m128i __DEFAULT_FN_ATTRS256 7396_mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7397{ 7398 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, 7399 (__v16qi) __O, 7400 __M); 7401} 7402 7403static __inline__ __m128i __DEFAULT_FN_ATTRS256 7404_mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A) 7405{ 7406 return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, 7407 (__v16qi) _mm_setzero_si128 (), 7408 __M); 7409} 7410 7411static __inline__ void __DEFAULT_FN_ATTRS256 7412_mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7413{ 7414 __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); 7415} 7416 7417static __inline__ __m128i __DEFAULT_FN_ATTRS128 7418_mm_cvtusepi64_epi32 (__m128i __A) 7419{ 7420 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, 7421 (__v4si)_mm_undefined_si128(), 7422 (__mmask8) -1); 7423} 7424 7425static __inline__ __m128i __DEFAULT_FN_ATTRS128 7426_mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 7427{ 7428 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, 7429 (__v4si) __O, __M); 7430} 7431 7432static __inline__ __m128i __DEFAULT_FN_ATTRS128 7433_mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A) 7434{ 7435 return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, 7436 (__v4si) _mm_setzero_si128 (), 7437 __M); 7438} 7439 7440static __inline__ void __DEFAULT_FN_ATTRS128 7441_mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) 7442{ 7443 __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); 7444} 7445 7446static __inline__ __m128i __DEFAULT_FN_ATTRS256 7447_mm256_cvtusepi64_epi32 (__m256i __A) 7448{ 7449 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, 7450 (__v4si)_mm_undefined_si128(), 7451 (__mmask8) -1); 7452} 7453 7454static __inline__ __m128i __DEFAULT_FN_ATTRS256 7455_mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) 7456{ 7457 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, 7458 (__v4si) __O, __M); 7459} 7460 7461static __inline__ __m128i __DEFAULT_FN_ATTRS256 7462_mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A) 7463{ 7464 return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, 7465 (__v4si) _mm_setzero_si128 (), 7466 __M); 7467} 7468 7469static __inline__ void __DEFAULT_FN_ATTRS256 7470_mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) 7471{ 7472 __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); 7473} 7474 7475static __inline__ __m128i __DEFAULT_FN_ATTRS128 7476_mm_cvtusepi64_epi16 (__m128i __A) 7477{ 7478 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, 7479 (__v8hi)_mm_undefined_si128(), 7480 (__mmask8) -1); 7481} 7482 7483static __inline__ __m128i __DEFAULT_FN_ATTRS128 7484_mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7485{ 7486 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, 7487 (__v8hi) __O, __M); 7488} 7489 7490static __inline__ __m128i __DEFAULT_FN_ATTRS128 7491_mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A) 7492{ 7493 return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, 7494 (__v8hi) _mm_setzero_si128 (), 7495 __M); 7496} 7497 7498static __inline__ void __DEFAULT_FN_ATTRS128 7499_mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7500{ 7501 __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); 7502} 7503 7504static __inline__ __m128i __DEFAULT_FN_ATTRS256 7505_mm256_cvtusepi64_epi16 (__m256i __A) 7506{ 7507 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, 7508 (__v8hi)_mm_undefined_si128(), 7509 (__mmask8) -1); 7510} 7511 7512static __inline__ __m128i __DEFAULT_FN_ATTRS256 7513_mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7514{ 7515 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, 7516 (__v8hi) __O, __M); 7517} 7518 7519static __inline__ __m128i __DEFAULT_FN_ATTRS256 7520_mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A) 7521{ 7522 return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, 7523 (__v8hi) _mm_setzero_si128 (), 7524 __M); 7525} 7526 7527static __inline__ void __DEFAULT_FN_ATTRS256 7528_mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7529{ 7530 __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); 7531} 7532 7533static __inline__ __m128i __DEFAULT_FN_ATTRS128 7534_mm_cvtepi32_epi8 (__m128i __A) 7535{ 7536 return (__m128i)__builtin_shufflevector( 7537 __builtin_convertvector((__v4si)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1, 7538 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7); 7539} 7540 7541static __inline__ __m128i __DEFAULT_FN_ATTRS128 7542_mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7543{ 7544 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, 7545 (__v16qi) __O, __M); 7546} 7547 7548static __inline__ __m128i __DEFAULT_FN_ATTRS128 7549_mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A) 7550{ 7551 return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, 7552 (__v16qi) 7553 _mm_setzero_si128 (), 7554 __M); 7555} 7556 7557static __inline__ void __DEFAULT_FN_ATTRS128 7558_mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7559{ 7560 __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); 7561} 7562 7563static __inline__ __m128i __DEFAULT_FN_ATTRS256 7564_mm256_cvtepi32_epi8 (__m256i __A) 7565{ 7566 return (__m128i)__builtin_shufflevector( 7567 __builtin_convertvector((__v8si)__A, __v8qi), 7568 (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 7569 12, 13, 14, 15); 7570} 7571 7572static __inline__ __m128i __DEFAULT_FN_ATTRS256 7573_mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7574{ 7575 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, 7576 (__v16qi) __O, __M); 7577} 7578 7579static __inline__ __m128i __DEFAULT_FN_ATTRS256 7580_mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A) 7581{ 7582 return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, 7583 (__v16qi) _mm_setzero_si128 (), 7584 __M); 7585} 7586 7587static __inline__ void __DEFAULT_FN_ATTRS256 7588_mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7589{ 7590 __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M); 7591} 7592 7593static __inline__ __m128i __DEFAULT_FN_ATTRS128 7594_mm_cvtepi32_epi16 (__m128i __A) 7595{ 7596 return (__m128i)__builtin_shufflevector( 7597 __builtin_convertvector((__v4si)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1, 7598 2, 3, 4, 5, 6, 7); 7599} 7600 7601static __inline__ __m128i __DEFAULT_FN_ATTRS128 7602_mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7603{ 7604 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, 7605 (__v8hi) __O, __M); 7606} 7607 7608static __inline__ __m128i __DEFAULT_FN_ATTRS128 7609_mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A) 7610{ 7611 return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, 7612 (__v8hi) _mm_setzero_si128 (), 7613 __M); 7614} 7615 7616static __inline__ void __DEFAULT_FN_ATTRS128 7617_mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7618{ 7619 __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); 7620} 7621 7622static __inline__ __m128i __DEFAULT_FN_ATTRS256 7623_mm256_cvtepi32_epi16 (__m256i __A) 7624{ 7625 return (__m128i)__builtin_convertvector((__v8si)__A, __v8hi); 7626} 7627 7628static __inline__ __m128i __DEFAULT_FN_ATTRS256 7629_mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7630{ 7631 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, 7632 (__v8hi) __O, __M); 7633} 7634 7635static __inline__ __m128i __DEFAULT_FN_ATTRS256 7636_mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A) 7637{ 7638 return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, 7639 (__v8hi) _mm_setzero_si128 (), 7640 __M); 7641} 7642 7643static __inline__ void __DEFAULT_FN_ATTRS256 7644_mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7645{ 7646 __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); 7647} 7648 7649static __inline__ __m128i __DEFAULT_FN_ATTRS128 7650_mm_cvtepi64_epi8 (__m128i __A) 7651{ 7652 return (__m128i)__builtin_shufflevector( 7653 __builtin_convertvector((__v2di)__A, __v2qi), (__v2qi){0, 0}, 0, 1, 2, 3, 7654 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3); 7655} 7656 7657static __inline__ __m128i __DEFAULT_FN_ATTRS128 7658_mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) 7659{ 7660 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, 7661 (__v16qi) __O, __M); 7662} 7663 7664static __inline__ __m128i __DEFAULT_FN_ATTRS128 7665_mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A) 7666{ 7667 return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, 7668 (__v16qi) _mm_setzero_si128 (), 7669 __M); 7670} 7671 7672static __inline__ void __DEFAULT_FN_ATTRS128 7673_mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) 7674{ 7675 __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); 7676} 7677 7678static __inline__ __m128i __DEFAULT_FN_ATTRS256 7679_mm256_cvtepi64_epi8 (__m256i __A) 7680{ 7681 return (__m128i)__builtin_shufflevector( 7682 __builtin_convertvector((__v4di)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1, 7683 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7); 7684} 7685 7686static __inline__ __m128i __DEFAULT_FN_ATTRS256 7687_mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) 7688{ 7689 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, 7690 (__v16qi) __O, __M); 7691} 7692 7693static __inline__ __m128i __DEFAULT_FN_ATTRS256 7694_mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A) 7695{ 7696 return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, 7697 (__v16qi) _mm_setzero_si128 (), 7698 __M); 7699} 7700 7701static __inline__ void __DEFAULT_FN_ATTRS256 7702_mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) 7703{ 7704 __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); 7705} 7706 7707static __inline__ __m128i __DEFAULT_FN_ATTRS128 7708_mm_cvtepi64_epi32 (__m128i __A) 7709{ 7710 return (__m128i)__builtin_shufflevector( 7711 __builtin_convertvector((__v2di)__A, __v2si), (__v2si){0, 0}, 0, 1, 2, 3); 7712} 7713 7714static __inline__ __m128i __DEFAULT_FN_ATTRS128 7715_mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) 7716{ 7717 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, 7718 (__v4si) __O, __M); 7719} 7720 7721static __inline__ __m128i __DEFAULT_FN_ATTRS128 7722_mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A) 7723{ 7724 return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, 7725 (__v4si) _mm_setzero_si128 (), 7726 __M); 7727} 7728 7729static __inline__ void __DEFAULT_FN_ATTRS128 7730_mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) 7731{ 7732 __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); 7733} 7734 7735static __inline__ __m128i __DEFAULT_FN_ATTRS256 7736_mm256_cvtepi64_epi32 (__m256i __A) 7737{ 7738 return (__m128i)__builtin_convertvector((__v4di)__A, __v4si); 7739} 7740 7741static __inline__ __m128i __DEFAULT_FN_ATTRS256 7742_mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) 7743{ 7744 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 7745 (__v4si)_mm256_cvtepi64_epi32(__A), 7746 (__v4si)__O); 7747} 7748 7749static __inline__ __m128i __DEFAULT_FN_ATTRS256 7750_mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A) 7751{ 7752 return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, 7753 (__v4si)_mm256_cvtepi64_epi32(__A), 7754 (__v4si)_mm_setzero_si128()); 7755} 7756 7757static __inline__ void __DEFAULT_FN_ATTRS256 7758_mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) 7759{ 7760 __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); 7761} 7762 7763static __inline__ __m128i __DEFAULT_FN_ATTRS128 7764_mm_cvtepi64_epi16 (__m128i __A) 7765{ 7766 return (__m128i)__builtin_shufflevector( 7767 __builtin_convertvector((__v2di)__A, __v2hi), (__v2hi){0, 0}, 0, 1, 2, 3, 7768 3, 3, 3, 3); 7769} 7770 7771static __inline__ __m128i __DEFAULT_FN_ATTRS128 7772_mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) 7773{ 7774 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, 7775 (__v8hi)__O, 7776 __M); 7777} 7778 7779static __inline__ __m128i __DEFAULT_FN_ATTRS128 7780_mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A) 7781{ 7782 return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, 7783 (__v8hi) _mm_setzero_si128 (), 7784 __M); 7785} 7786 7787static __inline__ void __DEFAULT_FN_ATTRS128 7788_mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) 7789{ 7790 __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); 7791} 7792 7793static __inline__ __m128i __DEFAULT_FN_ATTRS256 7794_mm256_cvtepi64_epi16 (__m256i __A) 7795{ 7796 return (__m128i)__builtin_shufflevector( 7797 __builtin_convertvector((__v4di)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1, 7798 2, 3, 4, 5, 6, 7); 7799} 7800 7801static __inline__ __m128i __DEFAULT_FN_ATTRS256 7802_mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) 7803{ 7804 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, 7805 (__v8hi) __O, __M); 7806} 7807 7808static __inline__ __m128i __DEFAULT_FN_ATTRS256 7809_mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A) 7810{ 7811 return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, 7812 (__v8hi) _mm_setzero_si128 (), 7813 __M); 7814} 7815 7816static __inline__ void __DEFAULT_FN_ATTRS256 7817_mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) 7818{ 7819 __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); 7820} 7821 7822#define _mm256_extractf32x4_ps(A, imm) \ 7823 ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ 7824 (int)(imm), \ 7825 (__v4sf)_mm_undefined_ps(), \ 7826 (__mmask8)-1)) 7827 7828#define _mm256_mask_extractf32x4_ps(W, U, A, imm) \ 7829 ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ 7830 (int)(imm), \ 7831 (__v4sf)(__m128)(W), \ 7832 (__mmask8)(U))) 7833 7834#define _mm256_maskz_extractf32x4_ps(U, A, imm) \ 7835 ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \ 7836 (int)(imm), \ 7837 (__v4sf)_mm_setzero_ps(), \ 7838 (__mmask8)(U))) 7839 7840#define _mm256_extracti32x4_epi32(A, imm) \ 7841 ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ 7842 (int)(imm), \ 7843 (__v4si)_mm_undefined_si128(), \ 7844 (__mmask8)-1)) 7845 7846#define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \ 7847 ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ 7848 (int)(imm), \ 7849 (__v4si)(__m128i)(W), \ 7850 (__mmask8)(U))) 7851 7852#define _mm256_maskz_extracti32x4_epi32(U, A, imm) \ 7853 ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \ 7854 (int)(imm), \ 7855 (__v4si)_mm_setzero_si128(), \ 7856 (__mmask8)(U))) 7857 7858#define _mm256_insertf32x4(A, B, imm) \ 7859 ((__m256)__builtin_ia32_insertf32x4_256((__v8sf)(__m256)(A), \ 7860 (__v4sf)(__m128)(B), (int)(imm))) 7861 7862#define _mm256_mask_insertf32x4(W, U, A, B, imm) \ 7863 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 7864 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \ 7865 (__v8sf)(__m256)(W))) 7866 7867#define _mm256_maskz_insertf32x4(U, A, B, imm) \ 7868 ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \ 7869 (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \ 7870 (__v8sf)_mm256_setzero_ps())) 7871 7872#define _mm256_inserti32x4(A, B, imm) \ 7873 ((__m256i)__builtin_ia32_inserti32x4_256((__v8si)(__m256i)(A), \ 7874 (__v4si)(__m128i)(B), (int)(imm))) 7875 7876#define _mm256_mask_inserti32x4(W, U, A, B, imm) \ 7877 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 7878 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \ 7879 (__v8si)(__m256i)(W))) 7880 7881#define _mm256_maskz_inserti32x4(U, A, B, imm) \ 7882 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 7883 (__v8si)_mm256_inserti32x4((A), (B), (imm)), \ 7884 (__v8si)_mm256_setzero_si256())) 7885 7886#define _mm_getmant_pd(A, B, C) \ 7887 ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ 7888 (int)(((C)<<2) | (B)), \ 7889 (__v2df)_mm_setzero_pd(), \ 7890 (__mmask8)-1)) 7891 7892#define _mm_mask_getmant_pd(W, U, A, B, C) \ 7893 ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ 7894 (int)(((C)<<2) | (B)), \ 7895 (__v2df)(__m128d)(W), \ 7896 (__mmask8)(U))) 7897 7898#define _mm_maskz_getmant_pd(U, A, B, C) \ 7899 ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \ 7900 (int)(((C)<<2) | (B)), \ 7901 (__v2df)_mm_setzero_pd(), \ 7902 (__mmask8)(U))) 7903 7904#define _mm256_getmant_pd(A, B, C) \ 7905 ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ 7906 (int)(((C)<<2) | (B)), \ 7907 (__v4df)_mm256_setzero_pd(), \ 7908 (__mmask8)-1)) 7909 7910#define _mm256_mask_getmant_pd(W, U, A, B, C) \ 7911 ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ 7912 (int)(((C)<<2) | (B)), \ 7913 (__v4df)(__m256d)(W), \ 7914 (__mmask8)(U))) 7915 7916#define _mm256_maskz_getmant_pd(U, A, B, C) \ 7917 ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \ 7918 (int)(((C)<<2) | (B)), \ 7919 (__v4df)_mm256_setzero_pd(), \ 7920 (__mmask8)(U))) 7921 7922#define _mm_getmant_ps(A, B, C) \ 7923 ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ 7924 (int)(((C)<<2) | (B)), \ 7925 (__v4sf)_mm_setzero_ps(), \ 7926 (__mmask8)-1)) 7927 7928#define _mm_mask_getmant_ps(W, U, A, B, C) \ 7929 ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ 7930 (int)(((C)<<2) | (B)), \ 7931 (__v4sf)(__m128)(W), \ 7932 (__mmask8)(U))) 7933 7934#define _mm_maskz_getmant_ps(U, A, B, C) \ 7935 ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \ 7936 (int)(((C)<<2) | (B)), \ 7937 (__v4sf)_mm_setzero_ps(), \ 7938 (__mmask8)(U))) 7939 7940#define _mm256_getmant_ps(A, B, C) \ 7941 ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ 7942 (int)(((C)<<2) | (B)), \ 7943 (__v8sf)_mm256_setzero_ps(), \ 7944 (__mmask8)-1)) 7945 7946#define _mm256_mask_getmant_ps(W, U, A, B, C) \ 7947 ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ 7948 (int)(((C)<<2) | (B)), \ 7949 (__v8sf)(__m256)(W), \ 7950 (__mmask8)(U))) 7951 7952#define _mm256_maskz_getmant_ps(U, A, B, C) \ 7953 ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \ 7954 (int)(((C)<<2) | (B)), \ 7955 (__v8sf)_mm256_setzero_ps(), \ 7956 (__mmask8)(U))) 7957 7958#define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \ 7959 ((__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \ 7960 (void const *)(addr), \ 7961 (__v2di)(__m128i)(index), \ 7962 (__mmask8)(mask), (int)(scale))) 7963 7964#define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \ 7965 ((__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \ 7966 (void const *)(addr), \ 7967 (__v2di)(__m128i)(index), \ 7968 (__mmask8)(mask), (int)(scale))) 7969 7970#define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \ 7971 ((__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \ 7972 (void const *)(addr), \ 7973 (__v4di)(__m256i)(index), \ 7974 (__mmask8)(mask), (int)(scale))) 7975 7976#define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \ 7977 ((__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \ 7978 (void const *)(addr), \ 7979 (__v4di)(__m256i)(index), \ 7980 (__mmask8)(mask), (int)(scale))) 7981 7982#define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \ 7983 ((__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \ 7984 (void const *)(addr), \ 7985 (__v2di)(__m128i)(index), \ 7986 (__mmask8)(mask), (int)(scale))) 7987 7988#define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \ 7989 ((__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \ 7990 (void const *)(addr), \ 7991 (__v2di)(__m128i)(index), \ 7992 (__mmask8)(mask), (int)(scale))) 7993 7994#define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \ 7995 ((__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \ 7996 (void const *)(addr), \ 7997 (__v4di)(__m256i)(index), \ 7998 (__mmask8)(mask), (int)(scale))) 7999 8000#define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \ 8001 ((__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \ 8002 (void const *)(addr), \ 8003 (__v4di)(__m256i)(index), \ 8004 (__mmask8)(mask), (int)(scale))) 8005 8006#define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \ 8007 ((__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \ 8008 (void const *)(addr), \ 8009 (__v4si)(__m128i)(index), \ 8010 (__mmask8)(mask), (int)(scale))) 8011 8012#define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \ 8013 ((__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \ 8014 (void const *)(addr), \ 8015 (__v4si)(__m128i)(index), \ 8016 (__mmask8)(mask), (int)(scale))) 8017 8018#define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \ 8019 ((__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \ 8020 (void const *)(addr), \ 8021 (__v4si)(__m128i)(index), \ 8022 (__mmask8)(mask), (int)(scale))) 8023 8024#define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \ 8025 ((__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \ 8026 (void const *)(addr), \ 8027 (__v4si)(__m128i)(index), \ 8028 (__mmask8)(mask), (int)(scale))) 8029 8030#define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \ 8031 ((__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \ 8032 (void const *)(addr), \ 8033 (__v4si)(__m128i)(index), \ 8034 (__mmask8)(mask), (int)(scale))) 8035 8036#define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \ 8037 ((__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \ 8038 (void const *)(addr), \ 8039 (__v4si)(__m128i)(index), \ 8040 (__mmask8)(mask), (int)(scale))) 8041 8042#define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \ 8043 ((__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \ 8044 (void const *)(addr), \ 8045 (__v8si)(__m256i)(index), \ 8046 (__mmask8)(mask), (int)(scale))) 8047 8048#define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \ 8049 ((__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \ 8050 (void const *)(addr), \ 8051 (__v8si)(__m256i)(index), \ 8052 (__mmask8)(mask), (int)(scale))) 8053 8054#define _mm256_permutex_pd(X, C) \ 8055 ((__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(X), (int)(C))) 8056 8057#define _mm256_mask_permutex_pd(W, U, X, C) \ 8058 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 8059 (__v4df)_mm256_permutex_pd((X), (C)), \ 8060 (__v4df)(__m256d)(W))) 8061 8062#define _mm256_maskz_permutex_pd(U, X, C) \ 8063 ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \ 8064 (__v4df)_mm256_permutex_pd((X), (C)), \ 8065 (__v4df)_mm256_setzero_pd())) 8066 8067#define _mm256_permutex_epi64(X, C) \ 8068 ((__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(X), (int)(C))) 8069 8070#define _mm256_mask_permutex_epi64(W, U, X, C) \ 8071 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 8072 (__v4di)_mm256_permutex_epi64((X), (C)), \ 8073 (__v4di)(__m256i)(W))) 8074 8075#define _mm256_maskz_permutex_epi64(U, X, C) \ 8076 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 8077 (__v4di)_mm256_permutex_epi64((X), (C)), \ 8078 (__v4di)_mm256_setzero_si256())) 8079 8080static __inline__ __m256d __DEFAULT_FN_ATTRS256 8081_mm256_permutexvar_pd (__m256i __X, __m256d __Y) 8082{ 8083 return (__m256d)__builtin_ia32_permvardf256((__v4df)__Y, (__v4di)__X); 8084} 8085 8086static __inline__ __m256d __DEFAULT_FN_ATTRS256 8087_mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X, 8088 __m256d __Y) 8089{ 8090 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 8091 (__v4df)_mm256_permutexvar_pd(__X, __Y), 8092 (__v4df)__W); 8093} 8094 8095static __inline__ __m256d __DEFAULT_FN_ATTRS256 8096_mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y) 8097{ 8098 return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, 8099 (__v4df)_mm256_permutexvar_pd(__X, __Y), 8100 (__v4df)_mm256_setzero_pd()); 8101} 8102 8103static __inline__ __m256i __DEFAULT_FN_ATTRS256 8104_mm256_permutexvar_epi64 ( __m256i __X, __m256i __Y) 8105{ 8106 return (__m256i)__builtin_ia32_permvardi256((__v4di) __Y, (__v4di) __X); 8107} 8108 8109static __inline__ __m256i __DEFAULT_FN_ATTRS256 8110_mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y) 8111{ 8112 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 8113 (__v4di)_mm256_permutexvar_epi64(__X, __Y), 8114 (__v4di)_mm256_setzero_si256()); 8115} 8116 8117static __inline__ __m256i __DEFAULT_FN_ATTRS256 8118_mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X, 8119 __m256i __Y) 8120{ 8121 return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, 8122 (__v4di)_mm256_permutexvar_epi64(__X, __Y), 8123 (__v4di)__W); 8124} 8125 8126#define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A)) 8127 8128static __inline__ __m256 __DEFAULT_FN_ATTRS256 8129_mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y) 8130{ 8131 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8132 (__v8sf)_mm256_permutexvar_ps(__X, __Y), 8133 (__v8sf)__W); 8134} 8135 8136static __inline__ __m256 __DEFAULT_FN_ATTRS256 8137_mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y) 8138{ 8139 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8140 (__v8sf)_mm256_permutexvar_ps(__X, __Y), 8141 (__v8sf)_mm256_setzero_ps()); 8142} 8143 8144#define _mm256_permutexvar_epi32(A, B) _mm256_permutevar8x32_epi32((B), (A)) 8145 8146static __inline__ __m256i __DEFAULT_FN_ATTRS256 8147_mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X, 8148 __m256i __Y) 8149{ 8150 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 8151 (__v8si)_mm256_permutexvar_epi32(__X, __Y), 8152 (__v8si)__W); 8153} 8154 8155static __inline__ __m256i __DEFAULT_FN_ATTRS256 8156_mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y) 8157{ 8158 return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, 8159 (__v8si)_mm256_permutexvar_epi32(__X, __Y), 8160 (__v8si)_mm256_setzero_si256()); 8161} 8162 8163#define _mm_alignr_epi32(A, B, imm) \ 8164 ((__m128i)__builtin_ia32_alignd128((__v4si)(__m128i)(A), \ 8165 (__v4si)(__m128i)(B), (int)(imm))) 8166 8167#define _mm_mask_alignr_epi32(W, U, A, B, imm) \ 8168 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 8169 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \ 8170 (__v4si)(__m128i)(W))) 8171 8172#define _mm_maskz_alignr_epi32(U, A, B, imm) \ 8173 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 8174 (__v4si)_mm_alignr_epi32((A), (B), (imm)), \ 8175 (__v4si)_mm_setzero_si128())) 8176 8177#define _mm256_alignr_epi32(A, B, imm) \ 8178 ((__m256i)__builtin_ia32_alignd256((__v8si)(__m256i)(A), \ 8179 (__v8si)(__m256i)(B), (int)(imm))) 8180 8181#define _mm256_mask_alignr_epi32(W, U, A, B, imm) \ 8182 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 8183 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \ 8184 (__v8si)(__m256i)(W))) 8185 8186#define _mm256_maskz_alignr_epi32(U, A, B, imm) \ 8187 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 8188 (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \ 8189 (__v8si)_mm256_setzero_si256())) 8190 8191#define _mm_alignr_epi64(A, B, imm) \ 8192 ((__m128i)__builtin_ia32_alignq128((__v2di)(__m128i)(A), \ 8193 (__v2di)(__m128i)(B), (int)(imm))) 8194 8195#define _mm_mask_alignr_epi64(W, U, A, B, imm) \ 8196 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ 8197 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \ 8198 (__v2di)(__m128i)(W))) 8199 8200#define _mm_maskz_alignr_epi64(U, A, B, imm) \ 8201 ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \ 8202 (__v2di)_mm_alignr_epi64((A), (B), (imm)), \ 8203 (__v2di)_mm_setzero_si128())) 8204 8205#define _mm256_alignr_epi64(A, B, imm) \ 8206 ((__m256i)__builtin_ia32_alignq256((__v4di)(__m256i)(A), \ 8207 (__v4di)(__m256i)(B), (int)(imm))) 8208 8209#define _mm256_mask_alignr_epi64(W, U, A, B, imm) \ 8210 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 8211 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \ 8212 (__v4di)(__m256i)(W))) 8213 8214#define _mm256_maskz_alignr_epi64(U, A, B, imm) \ 8215 ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \ 8216 (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \ 8217 (__v4di)_mm256_setzero_si256())) 8218 8219static __inline__ __m128 __DEFAULT_FN_ATTRS128 8220_mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A) 8221{ 8222 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 8223 (__v4sf)_mm_movehdup_ps(__A), 8224 (__v4sf)__W); 8225} 8226 8227static __inline__ __m128 __DEFAULT_FN_ATTRS128 8228_mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A) 8229{ 8230 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 8231 (__v4sf)_mm_movehdup_ps(__A), 8232 (__v4sf)_mm_setzero_ps()); 8233} 8234 8235static __inline__ __m256 __DEFAULT_FN_ATTRS256 8236_mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A) 8237{ 8238 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8239 (__v8sf)_mm256_movehdup_ps(__A), 8240 (__v8sf)__W); 8241} 8242 8243static __inline__ __m256 __DEFAULT_FN_ATTRS256 8244_mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A) 8245{ 8246 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8247 (__v8sf)_mm256_movehdup_ps(__A), 8248 (__v8sf)_mm256_setzero_ps()); 8249} 8250 8251static __inline__ __m128 __DEFAULT_FN_ATTRS128 8252_mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A) 8253{ 8254 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 8255 (__v4sf)_mm_moveldup_ps(__A), 8256 (__v4sf)__W); 8257} 8258 8259static __inline__ __m128 __DEFAULT_FN_ATTRS128 8260_mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A) 8261{ 8262 return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, 8263 (__v4sf)_mm_moveldup_ps(__A), 8264 (__v4sf)_mm_setzero_ps()); 8265} 8266 8267static __inline__ __m256 __DEFAULT_FN_ATTRS256 8268_mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A) 8269{ 8270 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8271 (__v8sf)_mm256_moveldup_ps(__A), 8272 (__v8sf)__W); 8273} 8274 8275static __inline__ __m256 __DEFAULT_FN_ATTRS256 8276_mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A) 8277{ 8278 return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, 8279 (__v8sf)_mm256_moveldup_ps(__A), 8280 (__v8sf)_mm256_setzero_ps()); 8281} 8282 8283#define _mm256_mask_shuffle_epi32(W, U, A, I) \ 8284 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 8285 (__v8si)_mm256_shuffle_epi32((A), (I)), \ 8286 (__v8si)(__m256i)(W))) 8287 8288#define _mm256_maskz_shuffle_epi32(U, A, I) \ 8289 ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \ 8290 (__v8si)_mm256_shuffle_epi32((A), (I)), \ 8291 (__v8si)_mm256_setzero_si256())) 8292 8293#define _mm_mask_shuffle_epi32(W, U, A, I) \ 8294 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 8295 (__v4si)_mm_shuffle_epi32((A), (I)), \ 8296 (__v4si)(__m128i)(W))) 8297 8298#define _mm_maskz_shuffle_epi32(U, A, I) \ 8299 ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \ 8300 (__v4si)_mm_shuffle_epi32((A), (I)), \ 8301 (__v4si)_mm_setzero_si128())) 8302 8303static __inline__ __m128d __DEFAULT_FN_ATTRS128 8304_mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A) 8305{ 8306 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, 8307 (__v2df) __A, 8308 (__v2df) __W); 8309} 8310 8311static __inline__ __m128d __DEFAULT_FN_ATTRS128 8312_mm_maskz_mov_pd (__mmask8 __U, __m128d __A) 8313{ 8314 return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, 8315 (__v2df) __A, 8316 (__v2df) _mm_setzero_pd ()); 8317} 8318 8319static __inline__ __m256d __DEFAULT_FN_ATTRS256 8320_mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A) 8321{ 8322 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, 8323 (__v4df) __A, 8324 (__v4df) __W); 8325} 8326 8327static __inline__ __m256d __DEFAULT_FN_ATTRS256 8328_mm256_maskz_mov_pd (__mmask8 __U, __m256d __A) 8329{ 8330 return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, 8331 (__v4df) __A, 8332 (__v4df) _mm256_setzero_pd ()); 8333} 8334 8335static __inline__ __m128 __DEFAULT_FN_ATTRS128 8336_mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A) 8337{ 8338 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, 8339 (__v4sf) __A, 8340 (__v4sf) __W); 8341} 8342 8343static __inline__ __m128 __DEFAULT_FN_ATTRS128 8344_mm_maskz_mov_ps (__mmask8 __U, __m128 __A) 8345{ 8346 return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, 8347 (__v4sf) __A, 8348 (__v4sf) _mm_setzero_ps ()); 8349} 8350 8351static __inline__ __m256 __DEFAULT_FN_ATTRS256 8352_mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A) 8353{ 8354 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, 8355 (__v8sf) __A, 8356 (__v8sf) __W); 8357} 8358 8359static __inline__ __m256 __DEFAULT_FN_ATTRS256 8360_mm256_maskz_mov_ps (__mmask8 __U, __m256 __A) 8361{ 8362 return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, 8363 (__v8sf) __A, 8364 (__v8sf) _mm256_setzero_ps ()); 8365} 8366 8367static __inline__ __m128 __DEFAULT_FN_ATTRS128 8368_mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A) 8369{ 8370 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A, 8371 (__v4sf) __W, 8372 (__mmask8) __U); 8373} 8374 8375static __inline__ __m128 __DEFAULT_FN_ATTRS128 8376_mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A) 8377{ 8378 return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A, 8379 (__v4sf) 8380 _mm_setzero_ps (), 8381 (__mmask8) __U); 8382} 8383 8384static __inline__ __m256 __DEFAULT_FN_ATTRS256 8385_mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A) 8386{ 8387 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A, 8388 (__v8sf) __W, 8389 (__mmask8) __U); 8390} 8391 8392static __inline__ __m256 __DEFAULT_FN_ATTRS256 8393_mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A) 8394{ 8395 return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A, 8396 (__v8sf) 8397 _mm256_setzero_ps (), 8398 (__mmask8) __U); 8399} 8400 8401#define _mm_mask_cvt_roundps_ph(W, U, A, I) \ 8402 ((__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \ 8403 (__v8hi)(__m128i)(W), \ 8404 (__mmask8)(U))) 8405 8406#define _mm_maskz_cvt_roundps_ph(U, A, I) \ 8407 ((__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \ 8408 (__v8hi)_mm_setzero_si128(), \ 8409 (__mmask8)(U))) 8410 8411#define _mm_mask_cvtps_ph _mm_mask_cvt_roundps_ph 8412#define _mm_maskz_cvtps_ph _mm_maskz_cvt_roundps_ph 8413 8414#define _mm256_mask_cvt_roundps_ph(W, U, A, I) \ 8415 ((__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ 8416 (__v8hi)(__m128i)(W), \ 8417 (__mmask8)(U))) 8418 8419#define _mm256_maskz_cvt_roundps_ph(U, A, I) \ 8420 ((__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ 8421 (__v8hi)_mm_setzero_si128(), \ 8422 (__mmask8)(U))) 8423 8424#define _mm256_mask_cvtps_ph _mm256_mask_cvt_roundps_ph 8425#define _mm256_maskz_cvtps_ph _mm256_maskz_cvt_roundps_ph 8426 8427 8428#undef __DEFAULT_FN_ATTRS128 8429#undef __DEFAULT_FN_ATTRS256 8430 8431#endif /* __AVX512VLINTRIN_H */ 8432