1212904Sdim/*===---- emmintrin.h - SSE2 intrinsics ------------------------------------=== 2193326Sed * 3193326Sed * Permission is hereby granted, free of charge, to any person obtaining a copy 4193326Sed * of this software and associated documentation files (the "Software"), to deal 5193326Sed * in the Software without restriction, including without limitation the rights 6193326Sed * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7193326Sed * copies of the Software, and to permit persons to whom the Software is 8193326Sed * furnished to do so, subject to the following conditions: 9193326Sed * 10193326Sed * The above copyright notice and this permission notice shall be included in 11193326Sed * all copies or substantial portions of the Software. 12193326Sed * 13193326Sed * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14193326Sed * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15193326Sed * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16193326Sed * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17193326Sed * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18193326Sed * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19193326Sed * THE SOFTWARE. 20193326Sed * 21193326Sed *===-----------------------------------------------------------------------=== 22193326Sed */ 23212904Sdim 24193326Sed#ifndef __EMMINTRIN_H 25193326Sed#define __EMMINTRIN_H 26193326Sed 27193326Sed#ifndef __SSE2__ 28193326Sed#error "SSE2 instruction set not enabled" 29193326Sed#else 30193326Sed 31193326Sed#include <xmmintrin.h> 32193326Sed 33193326Sedtypedef double __m128d __attribute__((__vector_size__(16))); 34193326Sedtypedef long long __m128i __attribute__((__vector_size__(16))); 35193326Sed 36212904Sdim/* Type defines. */ 37212904Sdimtypedef double __v2df __attribute__ ((__vector_size__ (16))); 38212904Sdimtypedef long long __v2di __attribute__ ((__vector_size__ (16))); 39193326Sedtypedef short __v8hi __attribute__((__vector_size__(16))); 40193326Sedtypedef char __v16qi __attribute__((__vector_size__(16))); 41193326Sed 42206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 43249423Sdim_mm_add_sd(__m128d __a, __m128d __b) 44193326Sed{ 45249423Sdim __a[0] += __b[0]; 46249423Sdim return __a; 47193326Sed} 48193326Sed 49206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 50249423Sdim_mm_add_pd(__m128d __a, __m128d __b) 51193326Sed{ 52249423Sdim return __a + __b; 53193326Sed} 54193326Sed 55206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 56249423Sdim_mm_sub_sd(__m128d __a, __m128d __b) 57193326Sed{ 58249423Sdim __a[0] -= __b[0]; 59249423Sdim return __a; 60193326Sed} 61193326Sed 62206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 63249423Sdim_mm_sub_pd(__m128d __a, __m128d __b) 64193326Sed{ 65249423Sdim return __a - __b; 66193326Sed} 67193326Sed 68206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 69249423Sdim_mm_mul_sd(__m128d __a, __m128d __b) 70193326Sed{ 71249423Sdim __a[0] *= __b[0]; 72249423Sdim return __a; 73193326Sed} 74193326Sed 75206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 76249423Sdim_mm_mul_pd(__m128d __a, __m128d __b) 77193326Sed{ 78249423Sdim return __a * __b; 79193326Sed} 80193326Sed 81206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 82249423Sdim_mm_div_sd(__m128d __a, __m128d __b) 83193326Sed{ 84249423Sdim __a[0] /= __b[0]; 85249423Sdim return __a; 86193326Sed} 87193326Sed 88206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 89249423Sdim_mm_div_pd(__m128d __a, __m128d __b) 90193326Sed{ 91249423Sdim return __a / __b; 92193326Sed} 93193326Sed 94206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 95249423Sdim_mm_sqrt_sd(__m128d __a, __m128d __b) 96193326Sed{ 97249423Sdim __m128d __c = __builtin_ia32_sqrtsd(__b); 98249423Sdim return (__m128d) { __c[0], __a[1] }; 99193326Sed} 100193326Sed 101206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 102249423Sdim_mm_sqrt_pd(__m128d __a) 103193326Sed{ 104249423Sdim return __builtin_ia32_sqrtpd(__a); 105193326Sed} 106193326Sed 107206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 108249423Sdim_mm_min_sd(__m128d __a, __m128d __b) 109193326Sed{ 110249423Sdim return __builtin_ia32_minsd(__a, __b); 111193326Sed} 112193326Sed 113206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 114249423Sdim_mm_min_pd(__m128d __a, __m128d __b) 115193326Sed{ 116249423Sdim return __builtin_ia32_minpd(__a, __b); 117193326Sed} 118193326Sed 119206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 120249423Sdim_mm_max_sd(__m128d __a, __m128d __b) 121193326Sed{ 122249423Sdim return __builtin_ia32_maxsd(__a, __b); 123193326Sed} 124193326Sed 125206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 126249423Sdim_mm_max_pd(__m128d __a, __m128d __b) 127193326Sed{ 128249423Sdim return __builtin_ia32_maxpd(__a, __b); 129193326Sed} 130193326Sed 131206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 132249423Sdim_mm_and_pd(__m128d __a, __m128d __b) 133193326Sed{ 134249423Sdim return (__m128d)((__v4si)__a & (__v4si)__b); 135193326Sed} 136193326Sed 137206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 138249423Sdim_mm_andnot_pd(__m128d __a, __m128d __b) 139193326Sed{ 140249423Sdim return (__m128d)(~(__v4si)__a & (__v4si)__b); 141193326Sed} 142193326Sed 143206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 144249423Sdim_mm_or_pd(__m128d __a, __m128d __b) 145193326Sed{ 146249423Sdim return (__m128d)((__v4si)__a | (__v4si)__b); 147193326Sed} 148193326Sed 149206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 150249423Sdim_mm_xor_pd(__m128d __a, __m128d __b) 151193326Sed{ 152249423Sdim return (__m128d)((__v4si)__a ^ (__v4si)__b); 153193326Sed} 154193326Sed 155206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 156249423Sdim_mm_cmpeq_pd(__m128d __a, __m128d __b) 157193326Sed{ 158249423Sdim return (__m128d)__builtin_ia32_cmppd(__a, __b, 0); 159193326Sed} 160193326Sed 161206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 162249423Sdim_mm_cmplt_pd(__m128d __a, __m128d __b) 163193326Sed{ 164249423Sdim return (__m128d)__builtin_ia32_cmppd(__a, __b, 1); 165193326Sed} 166193326Sed 167206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 168249423Sdim_mm_cmple_pd(__m128d __a, __m128d __b) 169193326Sed{ 170249423Sdim return (__m128d)__builtin_ia32_cmppd(__a, __b, 2); 171193326Sed} 172193326Sed 173206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 174249423Sdim_mm_cmpgt_pd(__m128d __a, __m128d __b) 175193326Sed{ 176249423Sdim return (__m128d)__builtin_ia32_cmppd(__b, __a, 1); 177193326Sed} 178193326Sed 179206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 180249423Sdim_mm_cmpge_pd(__m128d __a, __m128d __b) 181193326Sed{ 182249423Sdim return (__m128d)__builtin_ia32_cmppd(__b, __a, 2); 183193326Sed} 184193326Sed 185206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 186249423Sdim_mm_cmpord_pd(__m128d __a, __m128d __b) 187193326Sed{ 188249423Sdim return (__m128d)__builtin_ia32_cmppd(__a, __b, 7); 189193326Sed} 190193326Sed 191206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 192249423Sdim_mm_cmpunord_pd(__m128d __a, __m128d __b) 193193326Sed{ 194249423Sdim return (__m128d)__builtin_ia32_cmppd(__a, __b, 3); 195193326Sed} 196193326Sed 197206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 198249423Sdim_mm_cmpneq_pd(__m128d __a, __m128d __b) 199193326Sed{ 200249423Sdim return (__m128d)__builtin_ia32_cmppd(__a, __b, 4); 201193326Sed} 202193326Sed 203206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 204249423Sdim_mm_cmpnlt_pd(__m128d __a, __m128d __b) 205193326Sed{ 206249423Sdim return (__m128d)__builtin_ia32_cmppd(__a, __b, 5); 207193326Sed} 208193326Sed 209206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 210249423Sdim_mm_cmpnle_pd(__m128d __a, __m128d __b) 211193326Sed{ 212249423Sdim return (__m128d)__builtin_ia32_cmppd(__a, __b, 6); 213193326Sed} 214193326Sed 215206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 216249423Sdim_mm_cmpngt_pd(__m128d __a, __m128d __b) 217193326Sed{ 218249423Sdim return (__m128d)__builtin_ia32_cmppd(__b, __a, 5); 219193326Sed} 220193326Sed 221206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 222249423Sdim_mm_cmpnge_pd(__m128d __a, __m128d __b) 223193326Sed{ 224249423Sdim return (__m128d)__builtin_ia32_cmppd(__b, __a, 6); 225193326Sed} 226193326Sed 227206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 228249423Sdim_mm_cmpeq_sd(__m128d __a, __m128d __b) 229193326Sed{ 230249423Sdim return (__m128d)__builtin_ia32_cmpsd(__a, __b, 0); 231193326Sed} 232193326Sed 233206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 234249423Sdim_mm_cmplt_sd(__m128d __a, __m128d __b) 235193326Sed{ 236249423Sdim return (__m128d)__builtin_ia32_cmpsd(__a, __b, 1); 237193326Sed} 238193326Sed 239206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 240249423Sdim_mm_cmple_sd(__m128d __a, __m128d __b) 241193326Sed{ 242249423Sdim return (__m128d)__builtin_ia32_cmpsd(__a, __b, 2); 243193326Sed} 244193326Sed 245206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 246249423Sdim_mm_cmpgt_sd(__m128d __a, __m128d __b) 247193326Sed{ 248263508Sdim __m128d __c = __builtin_ia32_cmpsd(__b, __a, 1); 249263508Sdim return (__m128d) { __c[0], __a[1] }; 250193326Sed} 251193326Sed 252206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 253249423Sdim_mm_cmpge_sd(__m128d __a, __m128d __b) 254193326Sed{ 255263508Sdim __m128d __c = __builtin_ia32_cmpsd(__b, __a, 2); 256263508Sdim return (__m128d) { __c[0], __a[1] }; 257193326Sed} 258193326Sed 259206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 260249423Sdim_mm_cmpord_sd(__m128d __a, __m128d __b) 261193326Sed{ 262249423Sdim return (__m128d)__builtin_ia32_cmpsd(__a, __b, 7); 263193326Sed} 264193326Sed 265206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 266249423Sdim_mm_cmpunord_sd(__m128d __a, __m128d __b) 267193326Sed{ 268249423Sdim return (__m128d)__builtin_ia32_cmpsd(__a, __b, 3); 269193326Sed} 270193326Sed 271206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 272249423Sdim_mm_cmpneq_sd(__m128d __a, __m128d __b) 273193326Sed{ 274249423Sdim return (__m128d)__builtin_ia32_cmpsd(__a, __b, 4); 275193326Sed} 276193326Sed 277206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 278249423Sdim_mm_cmpnlt_sd(__m128d __a, __m128d __b) 279193326Sed{ 280249423Sdim return (__m128d)__builtin_ia32_cmpsd(__a, __b, 5); 281193326Sed} 282193326Sed 283206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 284249423Sdim_mm_cmpnle_sd(__m128d __a, __m128d __b) 285193326Sed{ 286249423Sdim return (__m128d)__builtin_ia32_cmpsd(__a, __b, 6); 287193326Sed} 288193326Sed 289206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 290249423Sdim_mm_cmpngt_sd(__m128d __a, __m128d __b) 291193326Sed{ 292263508Sdim __m128d __c = __builtin_ia32_cmpsd(__b, __a, 5); 293263508Sdim return (__m128d) { __c[0], __a[1] }; 294193326Sed} 295193326Sed 296206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 297249423Sdim_mm_cmpnge_sd(__m128d __a, __m128d __b) 298193326Sed{ 299263508Sdim __m128d __c = __builtin_ia32_cmpsd(__b, __a, 6); 300263508Sdim return (__m128d) { __c[0], __a[1] }; 301193326Sed} 302193326Sed 303206084Srdivackystatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 304249423Sdim_mm_comieq_sd(__m128d __a, __m128d __b) 305193326Sed{ 306249423Sdim return __builtin_ia32_comisdeq(__a, __b); 307193326Sed} 308193326Sed 309206084Srdivackystatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 310249423Sdim_mm_comilt_sd(__m128d __a, __m128d __b) 311193326Sed{ 312249423Sdim return __builtin_ia32_comisdlt(__a, __b); 313193326Sed} 314193326Sed 315206084Srdivackystatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 316249423Sdim_mm_comile_sd(__m128d __a, __m128d __b) 317193326Sed{ 318249423Sdim return __builtin_ia32_comisdle(__a, __b); 319193326Sed} 320193326Sed 321206084Srdivackystatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 322249423Sdim_mm_comigt_sd(__m128d __a, __m128d __b) 323193326Sed{ 324249423Sdim return __builtin_ia32_comisdgt(__a, __b); 325193326Sed} 326193326Sed 327206084Srdivackystatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 328249423Sdim_mm_comige_sd(__m128d __a, __m128d __b) 329226633Sdim{ 330249423Sdim return __builtin_ia32_comisdge(__a, __b); 331226633Sdim} 332226633Sdim 333226633Sdimstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 334249423Sdim_mm_comineq_sd(__m128d __a, __m128d __b) 335193326Sed{ 336249423Sdim return __builtin_ia32_comisdneq(__a, __b); 337193326Sed} 338193326Sed 339206084Srdivackystatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 340249423Sdim_mm_ucomieq_sd(__m128d __a, __m128d __b) 341193326Sed{ 342249423Sdim return __builtin_ia32_ucomisdeq(__a, __b); 343193326Sed} 344193326Sed 345206084Srdivackystatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 346249423Sdim_mm_ucomilt_sd(__m128d __a, __m128d __b) 347193326Sed{ 348249423Sdim return __builtin_ia32_ucomisdlt(__a, __b); 349193326Sed} 350193326Sed 351206084Srdivackystatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 352249423Sdim_mm_ucomile_sd(__m128d __a, __m128d __b) 353193326Sed{ 354249423Sdim return __builtin_ia32_ucomisdle(__a, __b); 355193326Sed} 356193326Sed 357206084Srdivackystatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 358249423Sdim_mm_ucomigt_sd(__m128d __a, __m128d __b) 359193326Sed{ 360249423Sdim return __builtin_ia32_ucomisdgt(__a, __b); 361193326Sed} 362193326Sed 363206084Srdivackystatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 364249423Sdim_mm_ucomige_sd(__m128d __a, __m128d __b) 365226633Sdim{ 366249423Sdim return __builtin_ia32_ucomisdge(__a, __b); 367226633Sdim} 368226633Sdim 369226633Sdimstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 370249423Sdim_mm_ucomineq_sd(__m128d __a, __m128d __b) 371193326Sed{ 372249423Sdim return __builtin_ia32_ucomisdneq(__a, __b); 373193326Sed} 374193326Sed 375206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 376249423Sdim_mm_cvtpd_ps(__m128d __a) 377193326Sed{ 378249423Sdim return __builtin_ia32_cvtpd2ps(__a); 379193326Sed} 380193326Sed 381206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 382249423Sdim_mm_cvtps_pd(__m128 __a) 383193326Sed{ 384249423Sdim return __builtin_ia32_cvtps2pd(__a); 385193326Sed} 386193326Sed 387206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 388249423Sdim_mm_cvtepi32_pd(__m128i __a) 389193326Sed{ 390249423Sdim return __builtin_ia32_cvtdq2pd((__v4si)__a); 391193326Sed} 392193326Sed 393206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 394249423Sdim_mm_cvtpd_epi32(__m128d __a) 395193326Sed{ 396249423Sdim return __builtin_ia32_cvtpd2dq(__a); 397193326Sed} 398193326Sed 399206084Srdivackystatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 400249423Sdim_mm_cvtsd_si32(__m128d __a) 401193326Sed{ 402249423Sdim return __builtin_ia32_cvtsd2si(__a); 403193326Sed} 404193326Sed 405206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 406249423Sdim_mm_cvtsd_ss(__m128 __a, __m128d __b) 407193326Sed{ 408249423Sdim __a[0] = __b[0]; 409249423Sdim return __a; 410193326Sed} 411193326Sed 412206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 413249423Sdim_mm_cvtsi32_sd(__m128d __a, int __b) 414193326Sed{ 415249423Sdim __a[0] = __b; 416249423Sdim return __a; 417193326Sed} 418193326Sed 419206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 420249423Sdim_mm_cvtss_sd(__m128d __a, __m128 __b) 421193326Sed{ 422249423Sdim __a[0] = __b[0]; 423249423Sdim return __a; 424193326Sed} 425193326Sed 426206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 427249423Sdim_mm_cvttpd_epi32(__m128d __a) 428193326Sed{ 429249423Sdim return (__m128i)__builtin_ia32_cvttpd2dq(__a); 430193326Sed} 431193326Sed 432206084Srdivackystatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 433249423Sdim_mm_cvttsd_si32(__m128d __a) 434193326Sed{ 435249423Sdim return __a[0]; 436193326Sed} 437193326Sed 438206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 439249423Sdim_mm_cvtpd_pi32(__m128d __a) 440193326Sed{ 441249423Sdim return (__m64)__builtin_ia32_cvtpd2pi(__a); 442193326Sed} 443193326Sed 444206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 445249423Sdim_mm_cvttpd_pi32(__m128d __a) 446193326Sed{ 447249423Sdim return (__m64)__builtin_ia32_cvttpd2pi(__a); 448193326Sed} 449193326Sed 450206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 451249423Sdim_mm_cvtpi32_pd(__m64 __a) 452193326Sed{ 453249423Sdim return __builtin_ia32_cvtpi2pd((__v2si)__a); 454193326Sed} 455193326Sed 456206084Srdivackystatic __inline__ double __attribute__((__always_inline__, __nodebug__)) 457249423Sdim_mm_cvtsd_f64(__m128d __a) 458193326Sed{ 459249423Sdim return __a[0]; 460193326Sed} 461193326Sed 462206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 463249423Sdim_mm_load_pd(double const *__dp) 464193326Sed{ 465249423Sdim return *(__m128d*)__dp; 466193326Sed} 467193326Sed 468206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 469249423Sdim_mm_load1_pd(double const *__dp) 470193326Sed{ 471226633Sdim struct __mm_load1_pd_struct { 472249423Sdim double __u; 473226633Sdim } __attribute__((__packed__, __may_alias__)); 474249423Sdim double __u = ((struct __mm_load1_pd_struct*)__dp)->__u; 475249423Sdim return (__m128d){ __u, __u }; 476193326Sed} 477193326Sed 478193326Sed#define _mm_load_pd1(dp) _mm_load1_pd(dp) 479193326Sed 480206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 481249423Sdim_mm_loadr_pd(double const *__dp) 482193326Sed{ 483249423Sdim __m128d __u = *(__m128d*)__dp; 484249423Sdim return __builtin_shufflevector(__u, __u, 1, 0); 485193326Sed} 486193326Sed 487206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 488249423Sdim_mm_loadu_pd(double const *__dp) 489193326Sed{ 490223017Sdim struct __loadu_pd { 491249423Sdim __m128d __v; 492223017Sdim } __attribute__((packed, may_alias)); 493249423Sdim return ((struct __loadu_pd*)__dp)->__v; 494193326Sed} 495193326Sed 496206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 497249423Sdim_mm_load_sd(double const *__dp) 498193326Sed{ 499226633Sdim struct __mm_load_sd_struct { 500249423Sdim double __u; 501226633Sdim } __attribute__((__packed__, __may_alias__)); 502249423Sdim double __u = ((struct __mm_load_sd_struct*)__dp)->__u; 503249423Sdim return (__m128d){ __u, 0 }; 504193326Sed} 505193326Sed 506206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 507249423Sdim_mm_loadh_pd(__m128d __a, double const *__dp) 508193326Sed{ 509226633Sdim struct __mm_loadh_pd_struct { 510249423Sdim double __u; 511226633Sdim } __attribute__((__packed__, __may_alias__)); 512249423Sdim double __u = ((struct __mm_loadh_pd_struct*)__dp)->__u; 513249423Sdim return (__m128d){ __a[0], __u }; 514193326Sed} 515193326Sed 516206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 517249423Sdim_mm_loadl_pd(__m128d __a, double const *__dp) 518193326Sed{ 519226633Sdim struct __mm_loadl_pd_struct { 520249423Sdim double __u; 521226633Sdim } __attribute__((__packed__, __may_alias__)); 522249423Sdim double __u = ((struct __mm_loadl_pd_struct*)__dp)->__u; 523249423Sdim return (__m128d){ __u, __a[1] }; 524193326Sed} 525193326Sed 526206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 527249423Sdim_mm_set_sd(double __w) 528193326Sed{ 529249423Sdim return (__m128d){ __w, 0 }; 530193326Sed} 531193326Sed 532206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 533249423Sdim_mm_set1_pd(double __w) 534193326Sed{ 535249423Sdim return (__m128d){ __w, __w }; 536193326Sed} 537193326Sed 538206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 539249423Sdim_mm_set_pd(double __w, double __x) 540193326Sed{ 541249423Sdim return (__m128d){ __x, __w }; 542193326Sed} 543193326Sed 544206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 545249423Sdim_mm_setr_pd(double __w, double __x) 546193326Sed{ 547249423Sdim return (__m128d){ __w, __x }; 548193326Sed} 549193326Sed 550206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 551193326Sed_mm_setzero_pd(void) 552193326Sed{ 553193326Sed return (__m128d){ 0, 0 }; 554193326Sed} 555193326Sed 556206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 557249423Sdim_mm_move_sd(__m128d __a, __m128d __b) 558193326Sed{ 559249423Sdim return (__m128d){ __b[0], __a[1] }; 560193326Sed} 561193326Sed 562206084Srdivackystatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 563249423Sdim_mm_store_sd(double *__dp, __m128d __a) 564193326Sed{ 565226633Sdim struct __mm_store_sd_struct { 566249423Sdim double __u; 567226633Sdim } __attribute__((__packed__, __may_alias__)); 568249423Sdim ((struct __mm_store_sd_struct*)__dp)->__u = __a[0]; 569193326Sed} 570193326Sed 571206084Srdivackystatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 572249423Sdim_mm_store1_pd(double *__dp, __m128d __a) 573193326Sed{ 574226633Sdim struct __mm_store1_pd_struct { 575249423Sdim double __u[2]; 576226633Sdim } __attribute__((__packed__, __may_alias__)); 577249423Sdim ((struct __mm_store1_pd_struct*)__dp)->__u[0] = __a[0]; 578249423Sdim ((struct __mm_store1_pd_struct*)__dp)->__u[1] = __a[0]; 579193326Sed} 580193326Sed 581206084Srdivackystatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 582249423Sdim_mm_store_pd(double *__dp, __m128d __a) 583193326Sed{ 584249423Sdim *(__m128d *)__dp = __a; 585193326Sed} 586193326Sed 587206084Srdivackystatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 588249423Sdim_mm_storeu_pd(double *__dp, __m128d __a) 589193326Sed{ 590249423Sdim __builtin_ia32_storeupd(__dp, __a); 591193326Sed} 592193326Sed 593206084Srdivackystatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 594249423Sdim_mm_storer_pd(double *__dp, __m128d __a) 595193326Sed{ 596249423Sdim __a = __builtin_shufflevector(__a, __a, 1, 0); 597249423Sdim *(__m128d *)__dp = __a; 598193326Sed} 599193326Sed 600206084Srdivackystatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 601249423Sdim_mm_storeh_pd(double *__dp, __m128d __a) 602193326Sed{ 603226633Sdim struct __mm_storeh_pd_struct { 604249423Sdim double __u; 605226633Sdim } __attribute__((__packed__, __may_alias__)); 606249423Sdim ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[1]; 607193326Sed} 608193326Sed 609206084Srdivackystatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 610249423Sdim_mm_storel_pd(double *__dp, __m128d __a) 611193326Sed{ 612226633Sdim struct __mm_storeh_pd_struct { 613249423Sdim double __u; 614226633Sdim } __attribute__((__packed__, __may_alias__)); 615249423Sdim ((struct __mm_storeh_pd_struct*)__dp)->__u = __a[0]; 616193326Sed} 617193326Sed 618206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 619249423Sdim_mm_add_epi8(__m128i __a, __m128i __b) 620193326Sed{ 621249423Sdim return (__m128i)((__v16qi)__a + (__v16qi)__b); 622193326Sed} 623193326Sed 624206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 625249423Sdim_mm_add_epi16(__m128i __a, __m128i __b) 626193326Sed{ 627249423Sdim return (__m128i)((__v8hi)__a + (__v8hi)__b); 628193326Sed} 629193326Sed 630206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 631249423Sdim_mm_add_epi32(__m128i __a, __m128i __b) 632193326Sed{ 633249423Sdim return (__m128i)((__v4si)__a + (__v4si)__b); 634193326Sed} 635193326Sed 636206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 637249423Sdim_mm_add_si64(__m64 __a, __m64 __b) 638193326Sed{ 639249423Sdim return __a + __b; 640193326Sed} 641193326Sed 642206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 643249423Sdim_mm_add_epi64(__m128i __a, __m128i __b) 644193326Sed{ 645249423Sdim return __a + __b; 646193326Sed} 647193326Sed 648206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 649249423Sdim_mm_adds_epi8(__m128i __a, __m128i __b) 650193326Sed{ 651249423Sdim return (__m128i)__builtin_ia32_paddsb128((__v16qi)__a, (__v16qi)__b); 652193326Sed} 653193326Sed 654206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 655249423Sdim_mm_adds_epi16(__m128i __a, __m128i __b) 656193326Sed{ 657249423Sdim return (__m128i)__builtin_ia32_paddsw128((__v8hi)__a, (__v8hi)__b); 658193326Sed} 659193326Sed 660206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 661249423Sdim_mm_adds_epu8(__m128i __a, __m128i __b) 662193326Sed{ 663249423Sdim return (__m128i)__builtin_ia32_paddusb128((__v16qi)__a, (__v16qi)__b); 664193326Sed} 665193326Sed 666206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 667249423Sdim_mm_adds_epu16(__m128i __a, __m128i __b) 668193326Sed{ 669249423Sdim return (__m128i)__builtin_ia32_paddusw128((__v8hi)__a, (__v8hi)__b); 670193326Sed} 671193326Sed 672206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 673249423Sdim_mm_avg_epu8(__m128i __a, __m128i __b) 674193326Sed{ 675249423Sdim return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b); 676193326Sed} 677193326Sed 678206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 679249423Sdim_mm_avg_epu16(__m128i __a, __m128i __b) 680193326Sed{ 681249423Sdim return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b); 682193326Sed} 683193326Sed 684206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 685249423Sdim_mm_madd_epi16(__m128i __a, __m128i __b) 686193326Sed{ 687249423Sdim return (__m128i)__builtin_ia32_pmaddwd128((__v8hi)__a, (__v8hi)__b); 688193326Sed} 689193326Sed 690206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 691249423Sdim_mm_max_epi16(__m128i __a, __m128i __b) 692193326Sed{ 693249423Sdim return (__m128i)__builtin_ia32_pmaxsw128((__v8hi)__a, (__v8hi)__b); 694193326Sed} 695193326Sed 696206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 697249423Sdim_mm_max_epu8(__m128i __a, __m128i __b) 698193326Sed{ 699249423Sdim return (__m128i)__builtin_ia32_pmaxub128((__v16qi)__a, (__v16qi)__b); 700193326Sed} 701193326Sed 702206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 703249423Sdim_mm_min_epi16(__m128i __a, __m128i __b) 704193326Sed{ 705249423Sdim return (__m128i)__builtin_ia32_pminsw128((__v8hi)__a, (__v8hi)__b); 706193326Sed} 707193326Sed 708206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 709249423Sdim_mm_min_epu8(__m128i __a, __m128i __b) 710193326Sed{ 711249423Sdim return (__m128i)__builtin_ia32_pminub128((__v16qi)__a, (__v16qi)__b); 712193326Sed} 713193326Sed 714206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 715249423Sdim_mm_mulhi_epi16(__m128i __a, __m128i __b) 716193326Sed{ 717249423Sdim return (__m128i)__builtin_ia32_pmulhw128((__v8hi)__a, (__v8hi)__b); 718193326Sed} 719193326Sed 720206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 721249423Sdim_mm_mulhi_epu16(__m128i __a, __m128i __b) 722193326Sed{ 723249423Sdim return (__m128i)__builtin_ia32_pmulhuw128((__v8hi)__a, (__v8hi)__b); 724193326Sed} 725193326Sed 726206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 727249423Sdim_mm_mullo_epi16(__m128i __a, __m128i __b) 728193326Sed{ 729249423Sdim return (__m128i)((__v8hi)__a * (__v8hi)__b); 730193326Sed} 731193326Sed 732206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 733249423Sdim_mm_mul_su32(__m64 __a, __m64 __b) 734193326Sed{ 735249423Sdim return __builtin_ia32_pmuludq((__v2si)__a, (__v2si)__b); 736193326Sed} 737193326Sed 738206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 739249423Sdim_mm_mul_epu32(__m128i __a, __m128i __b) 740193326Sed{ 741249423Sdim return __builtin_ia32_pmuludq128((__v4si)__a, (__v4si)__b); 742193326Sed} 743193326Sed 744206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 745249423Sdim_mm_sad_epu8(__m128i __a, __m128i __b) 746193326Sed{ 747249423Sdim return __builtin_ia32_psadbw128((__v16qi)__a, (__v16qi)__b); 748193326Sed} 749193326Sed 750206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 751249423Sdim_mm_sub_epi8(__m128i __a, __m128i __b) 752193326Sed{ 753249423Sdim return (__m128i)((__v16qi)__a - (__v16qi)__b); 754193326Sed} 755193326Sed 756206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 757249423Sdim_mm_sub_epi16(__m128i __a, __m128i __b) 758193326Sed{ 759249423Sdim return (__m128i)((__v8hi)__a - (__v8hi)__b); 760193326Sed} 761193326Sed 762206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 763249423Sdim_mm_sub_epi32(__m128i __a, __m128i __b) 764193326Sed{ 765249423Sdim return (__m128i)((__v4si)__a - (__v4si)__b); 766193326Sed} 767193326Sed 768206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 769249423Sdim_mm_sub_si64(__m64 __a, __m64 __b) 770193326Sed{ 771249423Sdim return __a - __b; 772193326Sed} 773193326Sed 774206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 775249423Sdim_mm_sub_epi64(__m128i __a, __m128i __b) 776193326Sed{ 777249423Sdim return __a - __b; 778193326Sed} 779193326Sed 780206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 781249423Sdim_mm_subs_epi8(__m128i __a, __m128i __b) 782193326Sed{ 783249423Sdim return (__m128i)__builtin_ia32_psubsb128((__v16qi)__a, (__v16qi)__b); 784193326Sed} 785193326Sed 786206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 787249423Sdim_mm_subs_epi16(__m128i __a, __m128i __b) 788193326Sed{ 789249423Sdim return (__m128i)__builtin_ia32_psubsw128((__v8hi)__a, (__v8hi)__b); 790193326Sed} 791193326Sed 792206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 793249423Sdim_mm_subs_epu8(__m128i __a, __m128i __b) 794193326Sed{ 795249423Sdim return (__m128i)__builtin_ia32_psubusb128((__v16qi)__a, (__v16qi)__b); 796193326Sed} 797193326Sed 798206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 799249423Sdim_mm_subs_epu16(__m128i __a, __m128i __b) 800193326Sed{ 801249423Sdim return (__m128i)__builtin_ia32_psubusw128((__v8hi)__a, (__v8hi)__b); 802193326Sed} 803193326Sed 804206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 805249423Sdim_mm_and_si128(__m128i __a, __m128i __b) 806193326Sed{ 807249423Sdim return __a & __b; 808193326Sed} 809193326Sed 810206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 811249423Sdim_mm_andnot_si128(__m128i __a, __m128i __b) 812193326Sed{ 813249423Sdim return ~__a & __b; 814193326Sed} 815193326Sed 816206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 817249423Sdim_mm_or_si128(__m128i __a, __m128i __b) 818193326Sed{ 819249423Sdim return __a | __b; 820193326Sed} 821193326Sed 822206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 823249423Sdim_mm_xor_si128(__m128i __a, __m128i __b) 824193326Sed{ 825249423Sdim return __a ^ __b; 826193326Sed} 827193326Sed 828234353Sdim#define _mm_slli_si128(a, count) __extension__ ({ \ 829263508Sdim _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \ 830234353Sdim __m128i __a = (a); \ 831263508Sdim _Pragma("clang diagnostic pop"); \ 832234353Sdim (__m128i)__builtin_ia32_pslldqi128(__a, (count)*8); }) 833193326Sed 834206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 835249423Sdim_mm_slli_epi16(__m128i __a, int __count) 836193326Sed{ 837249423Sdim return (__m128i)__builtin_ia32_psllwi128((__v8hi)__a, __count); 838193326Sed} 839193326Sed 840206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 841249423Sdim_mm_sll_epi16(__m128i __a, __m128i __count) 842193326Sed{ 843249423Sdim return (__m128i)__builtin_ia32_psllw128((__v8hi)__a, (__v8hi)__count); 844193326Sed} 845193326Sed 846206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 847249423Sdim_mm_slli_epi32(__m128i __a, int __count) 848193326Sed{ 849249423Sdim return (__m128i)__builtin_ia32_pslldi128((__v4si)__a, __count); 850193326Sed} 851193326Sed 852206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 853249423Sdim_mm_sll_epi32(__m128i __a, __m128i __count) 854193326Sed{ 855249423Sdim return (__m128i)__builtin_ia32_pslld128((__v4si)__a, (__v4si)__count); 856193326Sed} 857193326Sed 858206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 859249423Sdim_mm_slli_epi64(__m128i __a, int __count) 860193326Sed{ 861249423Sdim return __builtin_ia32_psllqi128(__a, __count); 862193326Sed} 863193326Sed 864206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 865249423Sdim_mm_sll_epi64(__m128i __a, __m128i __count) 866193326Sed{ 867249423Sdim return __builtin_ia32_psllq128(__a, __count); 868193326Sed} 869193326Sed 870206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 871249423Sdim_mm_srai_epi16(__m128i __a, int __count) 872193326Sed{ 873249423Sdim return (__m128i)__builtin_ia32_psrawi128((__v8hi)__a, __count); 874193326Sed} 875193326Sed 876206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 877249423Sdim_mm_sra_epi16(__m128i __a, __m128i __count) 878193326Sed{ 879249423Sdim return (__m128i)__builtin_ia32_psraw128((__v8hi)__a, (__v8hi)__count); 880193326Sed} 881193326Sed 882206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 883249423Sdim_mm_srai_epi32(__m128i __a, int __count) 884193326Sed{ 885249423Sdim return (__m128i)__builtin_ia32_psradi128((__v4si)__a, __count); 886193326Sed} 887193326Sed 888206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 889249423Sdim_mm_sra_epi32(__m128i __a, __m128i __count) 890193326Sed{ 891249423Sdim return (__m128i)__builtin_ia32_psrad128((__v4si)__a, (__v4si)__count); 892193326Sed} 893193326Sed 894193326Sed 895234353Sdim#define _mm_srli_si128(a, count) __extension__ ({ \ 896263508Sdim _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \ 897234353Sdim __m128i __a = (a); \ 898263508Sdim _Pragma("clang diagnostic pop"); \ 899234353Sdim (__m128i)__builtin_ia32_psrldqi128(__a, (count)*8); }) 900218893Sdim 901206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 902249423Sdim_mm_srli_epi16(__m128i __a, int __count) 903193326Sed{ 904249423Sdim return (__m128i)__builtin_ia32_psrlwi128((__v8hi)__a, __count); 905193326Sed} 906193326Sed 907206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 908249423Sdim_mm_srl_epi16(__m128i __a, __m128i __count) 909193326Sed{ 910249423Sdim return (__m128i)__builtin_ia32_psrlw128((__v8hi)__a, (__v8hi)__count); 911193326Sed} 912193326Sed 913206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 914249423Sdim_mm_srli_epi32(__m128i __a, int __count) 915193326Sed{ 916249423Sdim return (__m128i)__builtin_ia32_psrldi128((__v4si)__a, __count); 917193326Sed} 918193326Sed 919206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 920249423Sdim_mm_srl_epi32(__m128i __a, __m128i __count) 921193326Sed{ 922249423Sdim return (__m128i)__builtin_ia32_psrld128((__v4si)__a, (__v4si)__count); 923193326Sed} 924193326Sed 925206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 926249423Sdim_mm_srli_epi64(__m128i __a, int __count) 927193326Sed{ 928249423Sdim return __builtin_ia32_psrlqi128(__a, __count); 929193326Sed} 930193326Sed 931206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 932249423Sdim_mm_srl_epi64(__m128i __a, __m128i __count) 933193326Sed{ 934249423Sdim return __builtin_ia32_psrlq128(__a, __count); 935193326Sed} 936193326Sed 937206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 938249423Sdim_mm_cmpeq_epi8(__m128i __a, __m128i __b) 939193326Sed{ 940249423Sdim return (__m128i)((__v16qi)__a == (__v16qi)__b); 941193326Sed} 942193326Sed 943206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 944249423Sdim_mm_cmpeq_epi16(__m128i __a, __m128i __b) 945193326Sed{ 946249423Sdim return (__m128i)((__v8hi)__a == (__v8hi)__b); 947193326Sed} 948193326Sed 949206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 950249423Sdim_mm_cmpeq_epi32(__m128i __a, __m128i __b) 951193326Sed{ 952249423Sdim return (__m128i)((__v4si)__a == (__v4si)__b); 953193326Sed} 954193326Sed 955206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 956249423Sdim_mm_cmpgt_epi8(__m128i __a, __m128i __b) 957193326Sed{ 958234353Sdim /* This function always performs a signed comparison, but __v16qi is a char 959234353Sdim which may be signed or unsigned. */ 960234353Sdim typedef signed char __v16qs __attribute__((__vector_size__(16))); 961249423Sdim return (__m128i)((__v16qs)__a > (__v16qs)__b); 962193326Sed} 963193326Sed 964206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 965249423Sdim_mm_cmpgt_epi16(__m128i __a, __m128i __b) 966193326Sed{ 967249423Sdim return (__m128i)((__v8hi)__a > (__v8hi)__b); 968193326Sed} 969193326Sed 970206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 971249423Sdim_mm_cmpgt_epi32(__m128i __a, __m128i __b) 972193326Sed{ 973249423Sdim return (__m128i)((__v4si)__a > (__v4si)__b); 974193326Sed} 975193326Sed 976206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 977249423Sdim_mm_cmplt_epi8(__m128i __a, __m128i __b) 978193326Sed{ 979249423Sdim return _mm_cmpgt_epi8(__b, __a); 980193326Sed} 981193326Sed 982206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 983249423Sdim_mm_cmplt_epi16(__m128i __a, __m128i __b) 984193326Sed{ 985249423Sdim return _mm_cmpgt_epi16(__b, __a); 986193326Sed} 987193326Sed 988206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 989249423Sdim_mm_cmplt_epi32(__m128i __a, __m128i __b) 990193326Sed{ 991249423Sdim return _mm_cmpgt_epi32(__b, __a); 992193326Sed} 993193326Sed 994193326Sed#ifdef __x86_64__ 995206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 996249423Sdim_mm_cvtsi64_sd(__m128d __a, long long __b) 997193326Sed{ 998249423Sdim __a[0] = __b; 999249423Sdim return __a; 1000193326Sed} 1001193326Sed 1002206084Srdivackystatic __inline__ long long __attribute__((__always_inline__, __nodebug__)) 1003249423Sdim_mm_cvtsd_si64(__m128d __a) 1004193326Sed{ 1005249423Sdim return __builtin_ia32_cvtsd2si64(__a); 1006193326Sed} 1007193326Sed 1008206084Srdivackystatic __inline__ long long __attribute__((__always_inline__, __nodebug__)) 1009249423Sdim_mm_cvttsd_si64(__m128d __a) 1010193326Sed{ 1011249423Sdim return __a[0]; 1012193326Sed} 1013193326Sed#endif 1014193326Sed 1015206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 1016249423Sdim_mm_cvtepi32_ps(__m128i __a) 1017193326Sed{ 1018249423Sdim return __builtin_ia32_cvtdq2ps((__v4si)__a); 1019193326Sed} 1020193326Sed 1021206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 1022249423Sdim_mm_cvtps_epi32(__m128 __a) 1023193326Sed{ 1024249423Sdim return (__m128i)__builtin_ia32_cvtps2dq(__a); 1025193326Sed} 1026193326Sed 1027206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 1028249423Sdim_mm_cvttps_epi32(__m128 __a) 1029193326Sed{ 1030249423Sdim return (__m128i)__builtin_ia32_cvttps2dq(__a); 1031193326Sed} 1032193326Sed 1033206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 1034249423Sdim_mm_cvtsi32_si128(int __a) 1035193326Sed{ 1036249423Sdim return (__m128i)(__v4si){ __a, 0, 0, 0 }; 1037193326Sed} 1038193326Sed 1039193326Sed#ifdef __x86_64__ 1040206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 1041249423Sdim_mm_cvtsi64_si128(long long __a) 1042193326Sed{ 1043249423Sdim return (__m128i){ __a, 0 }; 1044193326Sed} 1045193326Sed#endif 1046193326Sed 1047206084Srdivackystatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 1048249423Sdim_mm_cvtsi128_si32(__m128i __a) 1049193326Sed{ 1050249423Sdim __v4si __b = (__v4si)__a; 1051249423Sdim return __b[0]; 1052193326Sed} 1053193326Sed 1054193326Sed#ifdef __x86_64__ 1055206084Srdivackystatic __inline__ long long __attribute__((__always_inline__, __nodebug__)) 1056249423Sdim_mm_cvtsi128_si64(__m128i __a) 1057193326Sed{ 1058249423Sdim return __a[0]; 1059193326Sed} 1060193326Sed#endif 1061193326Sed 1062206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 1063249423Sdim_mm_load_si128(__m128i const *__p) 1064193326Sed{ 1065249423Sdim return *__p; 1066193326Sed} 1067193326Sed 1068206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 1069249423Sdim_mm_loadu_si128(__m128i const *__p) 1070193326Sed{ 1071223017Sdim struct __loadu_si128 { 1072249423Sdim __m128i __v; 1073223017Sdim } __attribute__((packed, may_alias)); 1074249423Sdim return ((struct __loadu_si128*)__p)->__v; 1075193326Sed} 1076193326Sed 1077206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 1078249423Sdim_mm_loadl_epi64(__m128i const *__p) 1079193326Sed{ 1080226633Sdim struct __mm_loadl_epi64_struct { 1081249423Sdim long long __u; 1082226633Sdim } __attribute__((__packed__, __may_alias__)); 1083249423Sdim return (__m128i) { ((struct __mm_loadl_epi64_struct*)__p)->__u, 0}; 1084193326Sed} 1085193326Sed 1086206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 1087198092Srdivacky_mm_set_epi64x(long long q1, long long q0) 1088198092Srdivacky{ 1089198092Srdivacky return (__m128i){ q0, q1 }; 1090198092Srdivacky} 1091198092Srdivacky 1092206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 1093193326Sed_mm_set_epi64(__m64 q1, __m64 q0) 1094193326Sed{ 1095193326Sed return (__m128i){ (long long)q0, (long long)q1 }; 1096193326Sed} 1097193326Sed 1098206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 1099193326Sed_mm_set_epi32(int i3, int i2, int i1, int i0) 1100193326Sed{ 1101193326Sed return (__m128i)(__v4si){ i0, i1, i2, i3}; 1102193326Sed} 1103193326Sed 1104206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 1105193326Sed_mm_set_epi16(short w7, short w6, short w5, short w4, short w3, short w2, short w1, short w0) 1106193326Sed{ 1107193326Sed return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 }; 1108193326Sed} 1109193326Sed 1110206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 1111193326Sed_mm_set_epi8(char b15, char b14, char b13, char b12, char b11, char b10, char b9, char b8, char b7, char b6, char b5, char b4, char b3, char b2, char b1, char b0) 1112193326Sed{ 1113193326Sed return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 }; 1114193326Sed} 1115193326Sed 1116206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 1117249423Sdim_mm_set1_epi64x(long long __q) 1118198092Srdivacky{ 1119249423Sdim return (__m128i){ __q, __q }; 1120198092Srdivacky} 1121198092Srdivacky 1122206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 1123249423Sdim_mm_set1_epi64(__m64 __q) 1124193326Sed{ 1125249423Sdim return (__m128i){ (long long)__q, (long long)__q }; 1126193326Sed} 1127193326Sed 1128206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 1129249423Sdim_mm_set1_epi32(int __i) 1130193326Sed{ 1131249423Sdim return (__m128i)(__v4si){ __i, __i, __i, __i }; 1132193326Sed} 1133193326Sed 1134206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 1135249423Sdim_mm_set1_epi16(short __w) 1136193326Sed{ 1137249423Sdim return (__m128i)(__v8hi){ __w, __w, __w, __w, __w, __w, __w, __w }; 1138193326Sed} 1139193326Sed 1140206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 1141249423Sdim_mm_set1_epi8(char __b) 1142193326Sed{ 1143249423Sdim return (__m128i)(__v16qi){ __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b, __b }; 1144193326Sed} 1145193326Sed 1146206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 1147193326Sed_mm_setr_epi64(__m64 q0, __m64 q1) 1148193326Sed{ 1149193326Sed return (__m128i){ (long long)q0, (long long)q1 }; 1150193326Sed} 1151193326Sed 1152206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 1153193326Sed_mm_setr_epi32(int i0, int i1, int i2, int i3) 1154193326Sed{ 1155193326Sed return (__m128i)(__v4si){ i0, i1, i2, i3}; 1156193326Sed} 1157193326Sed 1158206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 1159193326Sed_mm_setr_epi16(short w0, short w1, short w2, short w3, short w4, short w5, short w6, short w7) 1160193326Sed{ 1161193326Sed return (__m128i)(__v8hi){ w0, w1, w2, w3, w4, w5, w6, w7 }; 1162193326Sed} 1163193326Sed 1164206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 1165193326Sed_mm_setr_epi8(char b0, char b1, char b2, char b3, char b4, char b5, char b6, char b7, char b8, char b9, char b10, char b11, char b12, char b13, char b14, char b15) 1166193326Sed{ 1167193326Sed return (__m128i)(__v16qi){ b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15 }; 1168193326Sed} 1169193326Sed 1170206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 1171193326Sed_mm_setzero_si128(void) 1172193326Sed{ 1173193326Sed return (__m128i){ 0LL, 0LL }; 1174193326Sed} 1175193326Sed 1176206084Srdivackystatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 1177249423Sdim_mm_store_si128(__m128i *__p, __m128i __b) 1178193326Sed{ 1179249423Sdim *__p = __b; 1180193326Sed} 1181193326Sed 1182206084Srdivackystatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 1183249423Sdim_mm_storeu_si128(__m128i *__p, __m128i __b) 1184193326Sed{ 1185249423Sdim __builtin_ia32_storedqu((char *)__p, (__v16qi)__b); 1186193326Sed} 1187193326Sed 1188206084Srdivackystatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 1189249423Sdim_mm_maskmoveu_si128(__m128i __d, __m128i __n, char *__p) 1190193326Sed{ 1191249423Sdim __builtin_ia32_maskmovdqu((__v16qi)__d, (__v16qi)__n, __p); 1192193326Sed} 1193193326Sed 1194206084Srdivackystatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 1195249423Sdim_mm_storel_epi64(__m128i *__p, __m128i __a) 1196193326Sed{ 1197239462Sdim struct __mm_storel_epi64_struct { 1198249423Sdim long long __u; 1199239462Sdim } __attribute__((__packed__, __may_alias__)); 1200249423Sdim ((struct __mm_storel_epi64_struct*)__p)->__u = __a[0]; 1201193326Sed} 1202193326Sed 1203206084Srdivackystatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 1204249423Sdim_mm_stream_pd(double *__p, __m128d __a) 1205193326Sed{ 1206249423Sdim __builtin_ia32_movntpd(__p, __a); 1207193326Sed} 1208193326Sed 1209206084Srdivackystatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 1210249423Sdim_mm_stream_si128(__m128i *__p, __m128i __a) 1211193326Sed{ 1212249423Sdim __builtin_ia32_movntdq(__p, __a); 1213193326Sed} 1214193326Sed 1215206084Srdivackystatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 1216249423Sdim_mm_stream_si32(int *__p, int __a) 1217193326Sed{ 1218249423Sdim __builtin_ia32_movnti(__p, __a); 1219193326Sed} 1220193326Sed 1221263508Sdim#ifdef __x86_64__ 1222206084Srdivackystatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 1223263508Sdim_mm_stream_si64(long long *__p, long long __a) 1224263508Sdim{ 1225263508Sdim __builtin_ia32_movnti64(__p, __a); 1226263508Sdim} 1227263508Sdim#endif 1228263508Sdim 1229263508Sdimstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 1230249423Sdim_mm_clflush(void const *__p) 1231193326Sed{ 1232249423Sdim __builtin_ia32_clflush(__p); 1233193326Sed} 1234193326Sed 1235206084Srdivackystatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 1236193326Sed_mm_lfence(void) 1237193326Sed{ 1238193326Sed __builtin_ia32_lfence(); 1239193326Sed} 1240193326Sed 1241206084Srdivackystatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 1242193326Sed_mm_mfence(void) 1243193326Sed{ 1244193326Sed __builtin_ia32_mfence(); 1245193326Sed} 1246193326Sed 1247206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 1248249423Sdim_mm_packs_epi16(__m128i __a, __m128i __b) 1249193326Sed{ 1250249423Sdim return (__m128i)__builtin_ia32_packsswb128((__v8hi)__a, (__v8hi)__b); 1251193326Sed} 1252193326Sed 1253206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 1254249423Sdim_mm_packs_epi32(__m128i __a, __m128i __b) 1255193326Sed{ 1256249423Sdim return (__m128i)__builtin_ia32_packssdw128((__v4si)__a, (__v4si)__b); 1257193326Sed} 1258193326Sed 1259206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 1260249423Sdim_mm_packus_epi16(__m128i __a, __m128i __b) 1261193326Sed{ 1262249423Sdim return (__m128i)__builtin_ia32_packuswb128((__v8hi)__a, (__v8hi)__b); 1263193326Sed} 1264193326Sed 1265206084Srdivackystatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 1266249423Sdim_mm_extract_epi16(__m128i __a, int __imm) 1267193326Sed{ 1268249423Sdim __v8hi __b = (__v8hi)__a; 1269263508Sdim return (unsigned short)__b[__imm & 7]; 1270193326Sed} 1271193326Sed 1272206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 1273249423Sdim_mm_insert_epi16(__m128i __a, int __b, int __imm) 1274193326Sed{ 1275249423Sdim __v8hi __c = (__v8hi)__a; 1276249423Sdim __c[__imm & 7] = __b; 1277249423Sdim return (__m128i)__c; 1278193326Sed} 1279193326Sed 1280206084Srdivackystatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 1281249423Sdim_mm_movemask_epi8(__m128i __a) 1282193326Sed{ 1283249423Sdim return __builtin_ia32_pmovmskb128((__v16qi)__a); 1284193326Sed} 1285193326Sed 1286234353Sdim#define _mm_shuffle_epi32(a, imm) __extension__ ({ \ 1287263508Sdim _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \ 1288234353Sdim __m128i __a = (a); \ 1289263508Sdim _Pragma("clang diagnostic pop"); \ 1290234353Sdim (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si) _mm_set1_epi32(0), \ 1291234353Sdim (imm) & 0x3, ((imm) & 0xc) >> 2, \ 1292234353Sdim ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6); }) 1293221345Sdim 1294234353Sdim#define _mm_shufflelo_epi16(a, imm) __extension__ ({ \ 1295263508Sdim _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \ 1296234353Sdim __m128i __a = (a); \ 1297263508Sdim _Pragma("clang diagnostic pop"); \ 1298234353Sdim (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi) _mm_set1_epi16(0), \ 1299234353Sdim (imm) & 0x3, ((imm) & 0xc) >> 2, \ 1300234353Sdim ((imm) & 0x30) >> 4, ((imm) & 0xc0) >> 6, \ 1301234353Sdim 4, 5, 6, 7); }) 1302221345Sdim 1303234353Sdim#define _mm_shufflehi_epi16(a, imm) __extension__ ({ \ 1304263508Sdim _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \ 1305234353Sdim __m128i __a = (a); \ 1306263508Sdim _Pragma("clang diagnostic pop"); \ 1307234353Sdim (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi) _mm_set1_epi16(0), \ 1308234353Sdim 0, 1, 2, 3, \ 1309234353Sdim 4 + (((imm) & 0x03) >> 0), \ 1310234353Sdim 4 + (((imm) & 0x0c) >> 2), \ 1311234353Sdim 4 + (((imm) & 0x30) >> 4), \ 1312234353Sdim 4 + (((imm) & 0xc0) >> 6)); }) 1313193326Sed 1314206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 1315249423Sdim_mm_unpackhi_epi8(__m128i __a, __m128i __b) 1316193326Sed{ 1317249423Sdim return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15); 1318193326Sed} 1319193326Sed 1320206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 1321249423Sdim_mm_unpackhi_epi16(__m128i __a, __m128i __b) 1322193326Sed{ 1323249423Sdim return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 4, 8+4, 5, 8+5, 6, 8+6, 7, 8+7); 1324193326Sed} 1325193326Sed 1326206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 1327249423Sdim_mm_unpackhi_epi32(__m128i __a, __m128i __b) 1328193326Sed{ 1329249423Sdim return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 2, 4+2, 3, 4+3); 1330193326Sed} 1331193326Sed 1332206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 1333249423Sdim_mm_unpackhi_epi64(__m128i __a, __m128i __b) 1334193326Sed{ 1335249423Sdim return (__m128i)__builtin_shufflevector(__a, __b, 1, 2+1); 1336193326Sed} 1337193326Sed 1338206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 1339249423Sdim_mm_unpacklo_epi8(__m128i __a, __m128i __b) 1340193326Sed{ 1341249423Sdim return (__m128i)__builtin_shufflevector((__v16qi)__a, (__v16qi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7); 1342193326Sed} 1343193326Sed 1344206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 1345249423Sdim_mm_unpacklo_epi16(__m128i __a, __m128i __b) 1346193326Sed{ 1347249423Sdim return (__m128i)__builtin_shufflevector((__v8hi)__a, (__v8hi)__b, 0, 8+0, 1, 8+1, 2, 8+2, 3, 8+3); 1348193326Sed} 1349193326Sed 1350206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 1351249423Sdim_mm_unpacklo_epi32(__m128i __a, __m128i __b) 1352193326Sed{ 1353249423Sdim return (__m128i)__builtin_shufflevector((__v4si)__a, (__v4si)__b, 0, 4+0, 1, 4+1); 1354193326Sed} 1355193326Sed 1356206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 1357249423Sdim_mm_unpacklo_epi64(__m128i __a, __m128i __b) 1358193326Sed{ 1359249423Sdim return (__m128i)__builtin_shufflevector(__a, __b, 0, 2+0); 1360193326Sed} 1361193326Sed 1362206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 1363249423Sdim_mm_movepi64_pi64(__m128i __a) 1364193326Sed{ 1365249423Sdim return (__m64)__a[0]; 1366193326Sed} 1367193326Sed 1368206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 1369258747Sdim_mm_movpi64_epi64(__m64 __a) 1370193326Sed{ 1371249423Sdim return (__m128i){ (long long)__a, 0 }; 1372193326Sed} 1373193326Sed 1374206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 1375249423Sdim_mm_move_epi64(__m128i __a) 1376193326Sed{ 1377249423Sdim return __builtin_shufflevector(__a, (__m128i){ 0 }, 0, 2); 1378193326Sed} 1379193326Sed 1380206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 1381249423Sdim_mm_unpackhi_pd(__m128d __a, __m128d __b) 1382193326Sed{ 1383249423Sdim return __builtin_shufflevector(__a, __b, 1, 2+1); 1384193326Sed} 1385193326Sed 1386206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 1387249423Sdim_mm_unpacklo_pd(__m128d __a, __m128d __b) 1388193326Sed{ 1389249423Sdim return __builtin_shufflevector(__a, __b, 0, 2+0); 1390193326Sed} 1391193326Sed 1392206084Srdivackystatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 1393249423Sdim_mm_movemask_pd(__m128d __a) 1394193326Sed{ 1395249423Sdim return __builtin_ia32_movmskpd(__a); 1396193326Sed} 1397193326Sed 1398234353Sdim#define _mm_shuffle_pd(a, b, i) __extension__ ({ \ 1399263508Sdim _Pragma("clang diagnostic push") _Pragma("clang diagnostic ignored \"-Wshadow\""); \ 1400234353Sdim __m128d __a = (a); \ 1401234353Sdim __m128d __b = (b); \ 1402263508Sdim _Pragma("clang diagnostic pop"); \ 1403234353Sdim __builtin_shufflevector(__a, __b, (i) & 1, (((i) & 2) >> 1) + 2); }) 1404193326Sed 1405206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 1406251662Sdim_mm_castpd_ps(__m128d __a) 1407193326Sed{ 1408251662Sdim return (__m128)__a; 1409193326Sed} 1410193326Sed 1411206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 1412251662Sdim_mm_castpd_si128(__m128d __a) 1413193326Sed{ 1414251662Sdim return (__m128i)__a; 1415193326Sed} 1416193326Sed 1417206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 1418251662Sdim_mm_castps_pd(__m128 __a) 1419193326Sed{ 1420251662Sdim return (__m128d)__a; 1421193326Sed} 1422193326Sed 1423206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 1424251662Sdim_mm_castps_si128(__m128 __a) 1425193326Sed{ 1426251662Sdim return (__m128i)__a; 1427193326Sed} 1428193326Sed 1429206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 1430251662Sdim_mm_castsi128_ps(__m128i __a) 1431193326Sed{ 1432251662Sdim return (__m128)__a; 1433193326Sed} 1434193326Sed 1435206084Srdivackystatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 1436251662Sdim_mm_castsi128_pd(__m128i __a) 1437193326Sed{ 1438251662Sdim return (__m128d)__a; 1439193326Sed} 1440193326Sed 1441206084Srdivackystatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 1442193326Sed_mm_pause(void) 1443193326Sed{ 1444193326Sed __asm__ volatile ("pause"); 1445193326Sed} 1446193326Sed 1447193326Sed#define _MM_SHUFFLE2(x, y) (((x) << 1) | (y)) 1448193326Sed 1449193326Sed#endif /* __SSE2__ */ 1450193326Sed 1451193326Sed#endif /* __EMMINTRIN_H */ 1452