xmmintrin.h revision 234353
1193326Sed/*===---- xmmintrin.h - SSE intrinsics -------------------------------------=== 2193326Sed * 3193326Sed * Permission is hereby granted, free of charge, to any person obtaining a copy 4193326Sed * of this software and associated documentation files (the "Software"), to deal 5193326Sed * in the Software without restriction, including without limitation the rights 6193326Sed * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7193326Sed * copies of the Software, and to permit persons to whom the Software is 8193326Sed * furnished to do so, subject to the following conditions: 9193326Sed * 10193326Sed * The above copyright notice and this permission notice shall be included in 11193326Sed * all copies or substantial portions of the Software. 12193326Sed * 13193326Sed * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14193326Sed * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15193326Sed * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16193326Sed * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17193326Sed * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18193326Sed * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19193326Sed * THE SOFTWARE. 20193326Sed * 21193326Sed *===-----------------------------------------------------------------------=== 22193326Sed */ 23193326Sed 24193326Sed#ifndef __XMMINTRIN_H 25193326Sed#define __XMMINTRIN_H 26193326Sed 27193326Sed#ifndef __SSE__ 28193326Sed#error "SSE instruction set not enabled" 29193326Sed#else 30193326Sed 31193326Sed#include <mmintrin.h> 32193326Sed 33205408Srdivackytypedef int __v4si __attribute__((__vector_size__(16))); 34193326Sedtypedef float __v4sf __attribute__((__vector_size__(16))); 35193326Sedtypedef float __m128 __attribute__((__vector_size__(16))); 36193326Sed 37218893Sdim// This header should only be included in a hosted environment as it depends on 38218893Sdim// a standard library to provide allocation routines. 39218893Sdim#if __STDC_HOSTED__ 40193326Sed#include <mm_malloc.h> 41218893Sdim#endif 42193326Sed 43206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 44193326Sed_mm_add_ss(__m128 a, __m128 b) 45193326Sed{ 46193576Sed a[0] += b[0]; 47193576Sed return a; 48193326Sed} 49193326Sed 50206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 51193326Sed_mm_add_ps(__m128 a, __m128 b) 52193326Sed{ 53193326Sed return a + b; 54193326Sed} 55193326Sed 56206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 57193326Sed_mm_sub_ss(__m128 a, __m128 b) 58193326Sed{ 59193576Sed a[0] -= b[0]; 60193576Sed return a; 61193326Sed} 62193326Sed 63206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 64193326Sed_mm_sub_ps(__m128 a, __m128 b) 65193326Sed{ 66193326Sed return a - b; 67193326Sed} 68193326Sed 69206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 70193326Sed_mm_mul_ss(__m128 a, __m128 b) 71193326Sed{ 72193576Sed a[0] *= b[0]; 73193576Sed return a; 74193326Sed} 75193326Sed 76206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 77193326Sed_mm_mul_ps(__m128 a, __m128 b) 78193326Sed{ 79193326Sed return a * b; 80193326Sed} 81193326Sed 82206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 83193326Sed_mm_div_ss(__m128 a, __m128 b) 84193326Sed{ 85193576Sed a[0] /= b[0]; 86193576Sed return a; 87193326Sed} 88193326Sed 89206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 90193326Sed_mm_div_ps(__m128 a, __m128 b) 91193326Sed{ 92193326Sed return a / b; 93193326Sed} 94193326Sed 95206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 96193326Sed_mm_sqrt_ss(__m128 a) 97193326Sed{ 98193326Sed return __builtin_ia32_sqrtss(a); 99193326Sed} 100193326Sed 101206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 102193326Sed_mm_sqrt_ps(__m128 a) 103193326Sed{ 104193326Sed return __builtin_ia32_sqrtps(a); 105193326Sed} 106193326Sed 107206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 108193326Sed_mm_rcp_ss(__m128 a) 109193326Sed{ 110193326Sed return __builtin_ia32_rcpss(a); 111193326Sed} 112193326Sed 113206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 114193326Sed_mm_rcp_ps(__m128 a) 115193326Sed{ 116193326Sed return __builtin_ia32_rcpps(a); 117193326Sed} 118193326Sed 119206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 120193326Sed_mm_rsqrt_ss(__m128 a) 121193326Sed{ 122193326Sed return __builtin_ia32_rsqrtss(a); 123193326Sed} 124193326Sed 125206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 126193326Sed_mm_rsqrt_ps(__m128 a) 127193326Sed{ 128193326Sed return __builtin_ia32_rsqrtps(a); 129193326Sed} 130193326Sed 131206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 132193326Sed_mm_min_ss(__m128 a, __m128 b) 133193326Sed{ 134193326Sed return __builtin_ia32_minss(a, b); 135193326Sed} 136193326Sed 137206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 138193326Sed_mm_min_ps(__m128 a, __m128 b) 139193326Sed{ 140193326Sed return __builtin_ia32_minps(a, b); 141193326Sed} 142193326Sed 143206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 144193326Sed_mm_max_ss(__m128 a, __m128 b) 145193326Sed{ 146193326Sed return __builtin_ia32_maxss(a, b); 147193326Sed} 148193326Sed 149206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 150193326Sed_mm_max_ps(__m128 a, __m128 b) 151193326Sed{ 152193326Sed return __builtin_ia32_maxps(a, b); 153193326Sed} 154193326Sed 155206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 156193326Sed_mm_and_ps(__m128 a, __m128 b) 157193326Sed{ 158193576Sed return (__m128)((__v4si)a & (__v4si)b); 159193326Sed} 160193326Sed 161206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 162193326Sed_mm_andnot_ps(__m128 a, __m128 b) 163193326Sed{ 164193576Sed return (__m128)(~(__v4si)a & (__v4si)b); 165193326Sed} 166193326Sed 167206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 168193326Sed_mm_or_ps(__m128 a, __m128 b) 169193326Sed{ 170193576Sed return (__m128)((__v4si)a | (__v4si)b); 171193326Sed} 172193326Sed 173206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 174193326Sed_mm_xor_ps(__m128 a, __m128 b) 175193326Sed{ 176202379Srdivacky return (__m128)((__v4si)a ^ (__v4si)b); 177193326Sed} 178193326Sed 179206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 180193326Sed_mm_cmpeq_ss(__m128 a, __m128 b) 181193326Sed{ 182193326Sed return (__m128)__builtin_ia32_cmpss(a, b, 0); 183193326Sed} 184193326Sed 185206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 186193326Sed_mm_cmpeq_ps(__m128 a, __m128 b) 187193326Sed{ 188193326Sed return (__m128)__builtin_ia32_cmpps(a, b, 0); 189193326Sed} 190193326Sed 191206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 192193326Sed_mm_cmplt_ss(__m128 a, __m128 b) 193193326Sed{ 194193326Sed return (__m128)__builtin_ia32_cmpss(a, b, 1); 195193326Sed} 196193326Sed 197206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 198193326Sed_mm_cmplt_ps(__m128 a, __m128 b) 199193326Sed{ 200193326Sed return (__m128)__builtin_ia32_cmpps(a, b, 1); 201193326Sed} 202193326Sed 203206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 204193326Sed_mm_cmple_ss(__m128 a, __m128 b) 205193326Sed{ 206193326Sed return (__m128)__builtin_ia32_cmpss(a, b, 2); 207193326Sed} 208193326Sed 209206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 210193326Sed_mm_cmple_ps(__m128 a, __m128 b) 211193326Sed{ 212193326Sed return (__m128)__builtin_ia32_cmpps(a, b, 2); 213193326Sed} 214193326Sed 215206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 216193326Sed_mm_cmpgt_ss(__m128 a, __m128 b) 217193326Sed{ 218193326Sed return (__m128)__builtin_ia32_cmpss(b, a, 1); 219193326Sed} 220193326Sed 221206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 222193326Sed_mm_cmpgt_ps(__m128 a, __m128 b) 223193326Sed{ 224193326Sed return (__m128)__builtin_ia32_cmpps(b, a, 1); 225193326Sed} 226193326Sed 227206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 228193326Sed_mm_cmpge_ss(__m128 a, __m128 b) 229193326Sed{ 230193326Sed return (__m128)__builtin_ia32_cmpss(b, a, 2); 231193326Sed} 232193326Sed 233206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 234193326Sed_mm_cmpge_ps(__m128 a, __m128 b) 235193326Sed{ 236193326Sed return (__m128)__builtin_ia32_cmpps(b, a, 2); 237193326Sed} 238193326Sed 239206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 240193326Sed_mm_cmpneq_ss(__m128 a, __m128 b) 241193326Sed{ 242193326Sed return (__m128)__builtin_ia32_cmpss(a, b, 4); 243193326Sed} 244193326Sed 245206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 246193326Sed_mm_cmpneq_ps(__m128 a, __m128 b) 247193326Sed{ 248193326Sed return (__m128)__builtin_ia32_cmpps(a, b, 4); 249193326Sed} 250193326Sed 251206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 252193326Sed_mm_cmpnlt_ss(__m128 a, __m128 b) 253193326Sed{ 254193326Sed return (__m128)__builtin_ia32_cmpss(a, b, 5); 255193326Sed} 256193326Sed 257206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 258193326Sed_mm_cmpnlt_ps(__m128 a, __m128 b) 259193326Sed{ 260193326Sed return (__m128)__builtin_ia32_cmpps(a, b, 5); 261193326Sed} 262193326Sed 263206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 264193326Sed_mm_cmpnle_ss(__m128 a, __m128 b) 265193326Sed{ 266193326Sed return (__m128)__builtin_ia32_cmpss(a, b, 6); 267193326Sed} 268193326Sed 269206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 270193326Sed_mm_cmpnle_ps(__m128 a, __m128 b) 271193326Sed{ 272193326Sed return (__m128)__builtin_ia32_cmpps(a, b, 6); 273193326Sed} 274193326Sed 275206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 276193326Sed_mm_cmpngt_ss(__m128 a, __m128 b) 277193326Sed{ 278193326Sed return (__m128)__builtin_ia32_cmpss(b, a, 5); 279193326Sed} 280193326Sed 281206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 282193326Sed_mm_cmpngt_ps(__m128 a, __m128 b) 283193326Sed{ 284193326Sed return (__m128)__builtin_ia32_cmpps(b, a, 5); 285193326Sed} 286193326Sed 287206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 288193326Sed_mm_cmpnge_ss(__m128 a, __m128 b) 289193326Sed{ 290193326Sed return (__m128)__builtin_ia32_cmpss(b, a, 6); 291193326Sed} 292193326Sed 293206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 294193326Sed_mm_cmpnge_ps(__m128 a, __m128 b) 295193326Sed{ 296193326Sed return (__m128)__builtin_ia32_cmpps(b, a, 6); 297193326Sed} 298193326Sed 299206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 300193326Sed_mm_cmpord_ss(__m128 a, __m128 b) 301193326Sed{ 302193326Sed return (__m128)__builtin_ia32_cmpss(a, b, 7); 303193326Sed} 304193326Sed 305206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 306193326Sed_mm_cmpord_ps(__m128 a, __m128 b) 307193326Sed{ 308193326Sed return (__m128)__builtin_ia32_cmpps(a, b, 7); 309193326Sed} 310193326Sed 311206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 312193326Sed_mm_cmpunord_ss(__m128 a, __m128 b) 313193326Sed{ 314193326Sed return (__m128)__builtin_ia32_cmpss(a, b, 3); 315193326Sed} 316193326Sed 317206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 318193326Sed_mm_cmpunord_ps(__m128 a, __m128 b) 319193326Sed{ 320193326Sed return (__m128)__builtin_ia32_cmpps(a, b, 3); 321193326Sed} 322193326Sed 323206084Srdivackystatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 324193326Sed_mm_comieq_ss(__m128 a, __m128 b) 325193326Sed{ 326193326Sed return __builtin_ia32_comieq(a, b); 327193326Sed} 328193326Sed 329206084Srdivackystatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 330193326Sed_mm_comilt_ss(__m128 a, __m128 b) 331193326Sed{ 332193326Sed return __builtin_ia32_comilt(a, b); 333193326Sed} 334193326Sed 335206084Srdivackystatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 336193326Sed_mm_comile_ss(__m128 a, __m128 b) 337193326Sed{ 338193326Sed return __builtin_ia32_comile(a, b); 339193326Sed} 340193326Sed 341206084Srdivackystatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 342193326Sed_mm_comigt_ss(__m128 a, __m128 b) 343193326Sed{ 344193326Sed return __builtin_ia32_comigt(a, b); 345193326Sed} 346193326Sed 347206084Srdivackystatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 348193326Sed_mm_comige_ss(__m128 a, __m128 b) 349193326Sed{ 350193326Sed return __builtin_ia32_comige(a, b); 351193326Sed} 352193326Sed 353206084Srdivackystatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 354193326Sed_mm_comineq_ss(__m128 a, __m128 b) 355193326Sed{ 356193326Sed return __builtin_ia32_comineq(a, b); 357193326Sed} 358193326Sed 359206084Srdivackystatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 360193326Sed_mm_ucomieq_ss(__m128 a, __m128 b) 361193326Sed{ 362193326Sed return __builtin_ia32_ucomieq(a, b); 363193326Sed} 364193326Sed 365206084Srdivackystatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 366193326Sed_mm_ucomilt_ss(__m128 a, __m128 b) 367193326Sed{ 368193326Sed return __builtin_ia32_ucomilt(a, b); 369193326Sed} 370193326Sed 371206084Srdivackystatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 372193326Sed_mm_ucomile_ss(__m128 a, __m128 b) 373193326Sed{ 374193326Sed return __builtin_ia32_ucomile(a, b); 375193326Sed} 376193326Sed 377206084Srdivackystatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 378193326Sed_mm_ucomigt_ss(__m128 a, __m128 b) 379193326Sed{ 380193326Sed return __builtin_ia32_ucomigt(a, b); 381193326Sed} 382193326Sed 383206084Srdivackystatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 384193326Sed_mm_ucomige_ss(__m128 a, __m128 b) 385193326Sed{ 386193326Sed return __builtin_ia32_ucomige(a, b); 387193326Sed} 388193326Sed 389206084Srdivackystatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 390193326Sed_mm_ucomineq_ss(__m128 a, __m128 b) 391193326Sed{ 392193326Sed return __builtin_ia32_ucomineq(a, b); 393193326Sed} 394193326Sed 395206084Srdivackystatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 396193326Sed_mm_cvtss_si32(__m128 a) 397193326Sed{ 398193326Sed return __builtin_ia32_cvtss2si(a); 399193326Sed} 400193326Sed 401206084Srdivackystatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 402204643Srdivacky_mm_cvt_ss2si(__m128 a) 403204643Srdivacky{ 404204643Srdivacky return _mm_cvtss_si32(a); 405204643Srdivacky} 406204643Srdivacky 407193576Sed#ifdef __x86_64__ 408193576Sed 409206084Srdivackystatic __inline__ long long __attribute__((__always_inline__, __nodebug__)) 410193326Sed_mm_cvtss_si64(__m128 a) 411193326Sed{ 412193326Sed return __builtin_ia32_cvtss2si64(a); 413193326Sed} 414193326Sed 415193576Sed#endif 416193576Sed 417206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 418193326Sed_mm_cvtps_pi32(__m128 a) 419193326Sed{ 420193326Sed return (__m64)__builtin_ia32_cvtps2pi(a); 421193326Sed} 422193326Sed 423212904Sdimstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 424212904Sdim_mm_cvt_ps2pi(__m128 a) 425212904Sdim{ 426212904Sdim return _mm_cvtps_pi32(a); 427212904Sdim} 428212904Sdim 429206084Srdivackystatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 430193326Sed_mm_cvttss_si32(__m128 a) 431193326Sed{ 432193576Sed return a[0]; 433193326Sed} 434193326Sed 435206084Srdivackystatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 436204643Srdivacky_mm_cvtt_ss2si(__m128 a) 437204643Srdivacky{ 438204643Srdivacky return _mm_cvttss_si32(a); 439204643Srdivacky} 440204643Srdivacky 441206084Srdivackystatic __inline__ long long __attribute__((__always_inline__, __nodebug__)) 442193326Sed_mm_cvttss_si64(__m128 a) 443193326Sed{ 444193576Sed return a[0]; 445193326Sed} 446193326Sed 447206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 448193326Sed_mm_cvttps_pi32(__m128 a) 449193326Sed{ 450193326Sed return (__m64)__builtin_ia32_cvttps2pi(a); 451193326Sed} 452193326Sed 453212904Sdimstatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 454212904Sdim_mm_cvtt_ps2pi(__m128 a) 455212904Sdim{ 456212904Sdim return _mm_cvttps_pi32(a); 457212904Sdim} 458212904Sdim 459206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 460193326Sed_mm_cvtsi32_ss(__m128 a, int b) 461193326Sed{ 462193576Sed a[0] = b; 463193576Sed return a; 464193326Sed} 465193326Sed 466212904Sdimstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 467212904Sdim_mm_cvt_si2ss(__m128 a, int b) 468212904Sdim{ 469212904Sdim return _mm_cvtsi32_ss(a, b); 470212904Sdim} 471212904Sdim 472193326Sed#ifdef __x86_64__ 473193326Sed 474206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 475193326Sed_mm_cvtsi64_ss(__m128 a, long long b) 476193326Sed{ 477193576Sed a[0] = b; 478193576Sed return a; 479193326Sed} 480193326Sed 481193326Sed#endif 482193326Sed 483206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 484193326Sed_mm_cvtpi32_ps(__m128 a, __m64 b) 485193326Sed{ 486193326Sed return __builtin_ia32_cvtpi2ps(a, (__v2si)b); 487193326Sed} 488193326Sed 489212904Sdimstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 490212904Sdim_mm_cvt_pi2ps(__m128 a, __m64 b) 491212904Sdim{ 492212904Sdim return _mm_cvtpi32_ps(a, b); 493212904Sdim} 494212904Sdim 495206084Srdivackystatic __inline__ float __attribute__((__always_inline__, __nodebug__)) 496193326Sed_mm_cvtss_f32(__m128 a) 497193326Sed{ 498193326Sed return a[0]; 499193326Sed} 500193326Sed 501206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 502203955Srdivacky_mm_loadh_pi(__m128 a, const __m64 *p) 503193326Sed{ 504226633Sdim typedef float __mm_loadh_pi_v2f32 __attribute__((__vector_size__(8))); 505226633Sdim struct __mm_loadh_pi_struct { 506226633Sdim __mm_loadh_pi_v2f32 u; 507226633Sdim } __attribute__((__packed__, __may_alias__)); 508226633Sdim __mm_loadh_pi_v2f32 b = ((struct __mm_loadh_pi_struct*)p)->u; 509226633Sdim __m128 bb = __builtin_shufflevector(b, b, 0, 1, 0, 1); 510226633Sdim return __builtin_shufflevector(a, bb, 0, 1, 4, 5); 511193326Sed} 512193326Sed 513206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 514203955Srdivacky_mm_loadl_pi(__m128 a, const __m64 *p) 515193326Sed{ 516226633Sdim typedef float __mm_loadl_pi_v2f32 __attribute__((__vector_size__(8))); 517226633Sdim struct __mm_loadl_pi_struct { 518226633Sdim __mm_loadl_pi_v2f32 u; 519226633Sdim } __attribute__((__packed__, __may_alias__)); 520226633Sdim __mm_loadl_pi_v2f32 b = ((struct __mm_loadl_pi_struct*)p)->u; 521226633Sdim __m128 bb = __builtin_shufflevector(b, b, 0, 1, 0, 1); 522226633Sdim return __builtin_shufflevector(a, bb, 4, 5, 2, 3); 523193326Sed} 524193326Sed 525206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 526203955Srdivacky_mm_load_ss(const float *p) 527193326Sed{ 528226633Sdim struct __mm_load_ss_struct { 529226633Sdim float u; 530226633Sdim } __attribute__((__packed__, __may_alias__)); 531226633Sdim float u = ((struct __mm_load_ss_struct*)p)->u; 532226633Sdim return (__m128){ u, 0, 0, 0 }; 533193326Sed} 534193326Sed 535206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 536203955Srdivacky_mm_load1_ps(const float *p) 537193326Sed{ 538226633Sdim struct __mm_load1_ps_struct { 539226633Sdim float u; 540226633Sdim } __attribute__((__packed__, __may_alias__)); 541226633Sdim float u = ((struct __mm_load1_ps_struct*)p)->u; 542226633Sdim return (__m128){ u, u, u, u }; 543193326Sed} 544193326Sed 545193326Sed#define _mm_load_ps1(p) _mm_load1_ps(p) 546193326Sed 547206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 548203955Srdivacky_mm_load_ps(const float *p) 549193326Sed{ 550193326Sed return *(__m128*)p; 551193326Sed} 552193326Sed 553206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 554203955Srdivacky_mm_loadu_ps(const float *p) 555193326Sed{ 556223017Sdim struct __loadu_ps { 557223017Sdim __m128 v; 558226633Sdim } __attribute__((__packed__, __may_alias__)); 559223017Sdim return ((struct __loadu_ps*)p)->v; 560193326Sed} 561193326Sed 562206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 563203955Srdivacky_mm_loadr_ps(const float *p) 564193326Sed{ 565193326Sed __m128 a = _mm_load_ps(p); 566193326Sed return __builtin_shufflevector(a, a, 3, 2, 1, 0); 567193326Sed} 568193326Sed 569206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 570193326Sed_mm_set_ss(float w) 571193326Sed{ 572193326Sed return (__m128){ w, 0, 0, 0 }; 573193326Sed} 574193326Sed 575206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 576193326Sed_mm_set1_ps(float w) 577193326Sed{ 578193326Sed return (__m128){ w, w, w, w }; 579193326Sed} 580193326Sed 581193326Sed// Microsoft specific. 582206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 583193326Sed_mm_set_ps1(float w) 584193326Sed{ 585193326Sed return _mm_set1_ps(w); 586193326Sed} 587193326Sed 588206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 589193326Sed_mm_set_ps(float z, float y, float x, float w) 590193326Sed{ 591193326Sed return (__m128){ w, x, y, z }; 592193326Sed} 593193326Sed 594206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 595193326Sed_mm_setr_ps(float z, float y, float x, float w) 596193326Sed{ 597193326Sed return (__m128){ z, y, x, w }; 598193326Sed} 599193326Sed 600206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__)) 601193326Sed_mm_setzero_ps(void) 602193326Sed{ 603193326Sed return (__m128){ 0, 0, 0, 0 }; 604193326Sed} 605193326Sed 606206084Srdivackystatic __inline__ void __attribute__((__always_inline__)) 607193326Sed_mm_storeh_pi(__m64 *p, __m128 a) 608193326Sed{ 609193326Sed __builtin_ia32_storehps((__v2si *)p, a); 610193326Sed} 611193326Sed 612206084Srdivackystatic __inline__ void __attribute__((__always_inline__)) 613193326Sed_mm_storel_pi(__m64 *p, __m128 a) 614193326Sed{ 615193326Sed __builtin_ia32_storelps((__v2si *)p, a); 616193326Sed} 617193326Sed 618206084Srdivackystatic __inline__ void __attribute__((__always_inline__)) 619193326Sed_mm_store_ss(float *p, __m128 a) 620193326Sed{ 621226633Sdim struct __mm_store_ss_struct { 622226633Sdim float u; 623226633Sdim } __attribute__((__packed__, __may_alias__)); 624226633Sdim ((struct __mm_store_ss_struct*)p)->u = a[0]; 625193326Sed} 626193326Sed 627206084Srdivackystatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 628193326Sed_mm_storeu_ps(float *p, __m128 a) 629193326Sed{ 630193326Sed __builtin_ia32_storeups(p, a); 631193326Sed} 632193326Sed 633206084Srdivackystatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 634193326Sed_mm_store1_ps(float *p, __m128 a) 635193326Sed{ 636193326Sed a = __builtin_shufflevector(a, a, 0, 0, 0, 0); 637193326Sed _mm_storeu_ps(p, a); 638193326Sed} 639193326Sed 640206084Srdivackystatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 641212904Sdim_mm_store_ps1(float *p, __m128 a) 642212904Sdim{ 643212904Sdim return _mm_store1_ps(p, a); 644212904Sdim} 645212904Sdim 646212904Sdimstatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 647193326Sed_mm_store_ps(float *p, __m128 a) 648193326Sed{ 649193326Sed *(__m128 *)p = a; 650193326Sed} 651193326Sed 652206084Srdivackystatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 653193326Sed_mm_storer_ps(float *p, __m128 a) 654193326Sed{ 655193326Sed a = __builtin_shufflevector(a, a, 3, 2, 1, 0); 656193326Sed _mm_store_ps(p, a); 657193326Sed} 658193326Sed 659212904Sdim#define _MM_HINT_T0 3 660193326Sed#define _MM_HINT_T1 2 661212904Sdim#define _MM_HINT_T2 1 662193326Sed#define _MM_HINT_NTA 0 663193326Sed 664210299Sed/* FIXME: We have to #define this because "sel" must be a constant integer, and 665193326Sed Sema doesn't do any form of constant propagation yet. */ 666193326Sed 667234353Sdim#define _mm_prefetch(a, sel) (__builtin_prefetch((void *)(a), 0, (sel))) 668193326Sed 669206084Srdivackystatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 670193326Sed_mm_stream_pi(__m64 *p, __m64 a) 671193326Sed{ 672193326Sed __builtin_ia32_movntq(p, a); 673193326Sed} 674193326Sed 675206084Srdivackystatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 676193326Sed_mm_stream_ps(float *p, __m128 a) 677193326Sed{ 678193326Sed __builtin_ia32_movntps(p, a); 679193326Sed} 680193326Sed 681206084Srdivackystatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 682193326Sed_mm_sfence(void) 683193326Sed{ 684193326Sed __builtin_ia32_sfence(); 685193326Sed} 686193326Sed 687206084Srdivackystatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 688193326Sed_mm_extract_pi16(__m64 a, int n) 689193326Sed{ 690193326Sed __v4hi b = (__v4hi)a; 691193576Sed return (unsigned short)b[n & 3]; 692193326Sed} 693193326Sed 694206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 695193326Sed_mm_insert_pi16(__m64 a, int d, int n) 696193326Sed{ 697193576Sed __v4hi b = (__v4hi)a; 698193576Sed b[n & 3] = d; 699193576Sed return (__m64)b; 700193326Sed} 701193326Sed 702206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 703193326Sed_mm_max_pi16(__m64 a, __m64 b) 704193326Sed{ 705193326Sed return (__m64)__builtin_ia32_pmaxsw((__v4hi)a, (__v4hi)b); 706193326Sed} 707193326Sed 708206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 709193326Sed_mm_max_pu8(__m64 a, __m64 b) 710193326Sed{ 711193326Sed return (__m64)__builtin_ia32_pmaxub((__v8qi)a, (__v8qi)b); 712193326Sed} 713193326Sed 714206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 715193326Sed_mm_min_pi16(__m64 a, __m64 b) 716193326Sed{ 717193326Sed return (__m64)__builtin_ia32_pminsw((__v4hi)a, (__v4hi)b); 718193326Sed} 719193326Sed 720206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 721193326Sed_mm_min_pu8(__m64 a, __m64 b) 722193326Sed{ 723193326Sed return (__m64)__builtin_ia32_pminub((__v8qi)a, (__v8qi)b); 724193326Sed} 725193326Sed 726206084Srdivackystatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 727193326Sed_mm_movemask_pi8(__m64 a) 728193326Sed{ 729193326Sed return __builtin_ia32_pmovmskb((__v8qi)a); 730193326Sed} 731193326Sed 732206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 733193326Sed_mm_mulhi_pu16(__m64 a, __m64 b) 734193326Sed{ 735193326Sed return (__m64)__builtin_ia32_pmulhuw((__v4hi)a, (__v4hi)b); 736193326Sed} 737193326Sed 738234353Sdim#define _mm_shuffle_pi16(a, n) __extension__ ({ \ 739234353Sdim __m64 __a = (a); \ 740234353Sdim (__m64)__builtin_ia32_pshufw((__v4hi)__a, (n)); }) 741193326Sed 742206084Srdivackystatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 743193326Sed_mm_maskmove_si64(__m64 d, __m64 n, char *p) 744193326Sed{ 745193326Sed __builtin_ia32_maskmovq((__v8qi)d, (__v8qi)n, p); 746193326Sed} 747193326Sed 748206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 749193326Sed_mm_avg_pu8(__m64 a, __m64 b) 750193326Sed{ 751193326Sed return (__m64)__builtin_ia32_pavgb((__v8qi)a, (__v8qi)b); 752193326Sed} 753193326Sed 754206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 755193326Sed_mm_avg_pu16(__m64 a, __m64 b) 756193326Sed{ 757193326Sed return (__m64)__builtin_ia32_pavgw((__v4hi)a, (__v4hi)b); 758193326Sed} 759193326Sed 760206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 761193326Sed_mm_sad_pu8(__m64 a, __m64 b) 762193326Sed{ 763193326Sed return (__m64)__builtin_ia32_psadbw((__v8qi)a, (__v8qi)b); 764193326Sed} 765193326Sed 766206084Srdivackystatic __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) 767193326Sed_mm_getcsr(void) 768193326Sed{ 769193326Sed return __builtin_ia32_stmxcsr(); 770193326Sed} 771193326Sed 772206084Srdivackystatic __inline__ void __attribute__((__always_inline__, __nodebug__)) 773193326Sed_mm_setcsr(unsigned int i) 774193326Sed{ 775193326Sed __builtin_ia32_ldmxcsr(i); 776193326Sed} 777193326Sed 778234353Sdim#define _mm_shuffle_ps(a, b, mask) __extension__ ({ \ 779234353Sdim __m128 __a = (a); \ 780234353Sdim __m128 __b = (b); \ 781234353Sdim (__m128)__builtin_shufflevector((__v4sf)__a, (__v4sf)__b, \ 782234353Sdim (mask) & 0x3, ((mask) & 0xc) >> 2, \ 783234353Sdim (((mask) & 0x30) >> 4) + 4, \ 784234353Sdim (((mask) & 0xc0) >> 6) + 4); }) 785193326Sed 786206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 787193326Sed_mm_unpackhi_ps(__m128 a, __m128 b) 788193326Sed{ 789193326Sed return __builtin_shufflevector(a, b, 2, 6, 3, 7); 790193326Sed} 791193326Sed 792206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 793193326Sed_mm_unpacklo_ps(__m128 a, __m128 b) 794193326Sed{ 795193326Sed return __builtin_shufflevector(a, b, 0, 4, 1, 5); 796193326Sed} 797193326Sed 798206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 799193326Sed_mm_move_ss(__m128 a, __m128 b) 800193326Sed{ 801193326Sed return __builtin_shufflevector(a, b, 4, 1, 2, 3); 802193326Sed} 803193326Sed 804206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 805193326Sed_mm_movehl_ps(__m128 a, __m128 b) 806193326Sed{ 807193326Sed return __builtin_shufflevector(a, b, 6, 7, 2, 3); 808193326Sed} 809193326Sed 810206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 811193326Sed_mm_movelh_ps(__m128 a, __m128 b) 812193326Sed{ 813193326Sed return __builtin_shufflevector(a, b, 0, 1, 4, 5); 814193326Sed} 815193326Sed 816206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 817193326Sed_mm_cvtpi16_ps(__m64 a) 818193326Sed{ 819193326Sed __m64 b, c; 820193326Sed __m128 r; 821193326Sed 822193326Sed b = _mm_setzero_si64(); 823193326Sed b = _mm_cmpgt_pi16(b, a); 824193326Sed c = _mm_unpackhi_pi16(a, b); 825193326Sed r = _mm_setzero_ps(); 826193326Sed r = _mm_cvtpi32_ps(r, c); 827193326Sed r = _mm_movelh_ps(r, r); 828193326Sed c = _mm_unpacklo_pi16(a, b); 829193326Sed r = _mm_cvtpi32_ps(r, c); 830193326Sed 831193326Sed return r; 832193326Sed} 833193326Sed 834206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 835193326Sed_mm_cvtpu16_ps(__m64 a) 836193326Sed{ 837193326Sed __m64 b, c; 838193326Sed __m128 r; 839193326Sed 840193326Sed b = _mm_setzero_si64(); 841193326Sed c = _mm_unpackhi_pi16(a, b); 842193326Sed r = _mm_setzero_ps(); 843193326Sed r = _mm_cvtpi32_ps(r, c); 844193326Sed r = _mm_movelh_ps(r, r); 845193326Sed c = _mm_unpacklo_pi16(a, b); 846193326Sed r = _mm_cvtpi32_ps(r, c); 847193326Sed 848193326Sed return r; 849193326Sed} 850193326Sed 851206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 852193326Sed_mm_cvtpi8_ps(__m64 a) 853193326Sed{ 854193326Sed __m64 b; 855193326Sed 856193326Sed b = _mm_setzero_si64(); 857193326Sed b = _mm_cmpgt_pi8(b, a); 858193326Sed b = _mm_unpacklo_pi8(a, b); 859193326Sed 860193326Sed return _mm_cvtpi16_ps(b); 861193326Sed} 862193326Sed 863206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 864193326Sed_mm_cvtpu8_ps(__m64 a) 865193326Sed{ 866193326Sed __m64 b; 867193326Sed 868193326Sed b = _mm_setzero_si64(); 869193326Sed b = _mm_unpacklo_pi8(a, b); 870193326Sed 871193326Sed return _mm_cvtpi16_ps(b); 872193326Sed} 873193326Sed 874206084Srdivackystatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 875193326Sed_mm_cvtpi32x2_ps(__m64 a, __m64 b) 876193326Sed{ 877193326Sed __m128 c; 878193326Sed 879193326Sed c = _mm_setzero_ps(); 880193326Sed c = _mm_cvtpi32_ps(c, b); 881193326Sed c = _mm_movelh_ps(c, c); 882193326Sed 883193326Sed return _mm_cvtpi32_ps(c, a); 884193326Sed} 885193326Sed 886206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 887193326Sed_mm_cvtps_pi16(__m128 a) 888193326Sed{ 889193326Sed __m64 b, c; 890193326Sed 891193326Sed b = _mm_cvtps_pi32(a); 892193326Sed a = _mm_movehl_ps(a, a); 893193326Sed c = _mm_cvtps_pi32(a); 894193326Sed 895193326Sed return _mm_packs_pi16(b, c); 896193326Sed} 897193326Sed 898206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__)) 899193326Sed_mm_cvtps_pi8(__m128 a) 900193326Sed{ 901193326Sed __m64 b, c; 902193326Sed 903193326Sed b = _mm_cvtps_pi16(a); 904193326Sed c = _mm_setzero_si64(); 905193326Sed 906193326Sed return _mm_packs_pi16(b, c); 907193326Sed} 908193326Sed 909206084Srdivackystatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 910193326Sed_mm_movemask_ps(__m128 a) 911193326Sed{ 912193326Sed return __builtin_ia32_movmskps(a); 913193326Sed} 914193326Sed 915193326Sed#define _MM_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) 916193326Sed 917193326Sed#define _MM_EXCEPT_INVALID (0x0001) 918193326Sed#define _MM_EXCEPT_DENORM (0x0002) 919193326Sed#define _MM_EXCEPT_DIV_ZERO (0x0004) 920193326Sed#define _MM_EXCEPT_OVERFLOW (0x0008) 921193326Sed#define _MM_EXCEPT_UNDERFLOW (0x0010) 922193326Sed#define _MM_EXCEPT_INEXACT (0x0020) 923193326Sed#define _MM_EXCEPT_MASK (0x003f) 924193326Sed 925193326Sed#define _MM_MASK_INVALID (0x0080) 926193326Sed#define _MM_MASK_DENORM (0x0100) 927193326Sed#define _MM_MASK_DIV_ZERO (0x0200) 928193326Sed#define _MM_MASK_OVERFLOW (0x0400) 929193326Sed#define _MM_MASK_UNDERFLOW (0x0800) 930193326Sed#define _MM_MASK_INEXACT (0x1000) 931193326Sed#define _MM_MASK_MASK (0x1f80) 932193326Sed 933193326Sed#define _MM_ROUND_NEAREST (0x0000) 934193326Sed#define _MM_ROUND_DOWN (0x2000) 935193326Sed#define _MM_ROUND_UP (0x4000) 936193326Sed#define _MM_ROUND_TOWARD_ZERO (0x6000) 937193326Sed#define _MM_ROUND_MASK (0x6000) 938193326Sed 939193326Sed#define _MM_FLUSH_ZERO_MASK (0x8000) 940193326Sed#define _MM_FLUSH_ZERO_ON (0x8000) 941234353Sdim#define _MM_FLUSH_ZERO_OFF (0x0000) 942193326Sed 943193326Sed#define _MM_GET_EXCEPTION_MASK() (_mm_getcsr() & _MM_MASK_MASK) 944193326Sed#define _MM_GET_EXCEPTION_STATE() (_mm_getcsr() & _MM_EXCEPT_MASK) 945193326Sed#define _MM_GET_FLUSH_ZERO_MODE() (_mm_getcsr() & _MM_FLUSH_ZERO_MASK) 946193326Sed#define _MM_GET_ROUNDING_MODE() (_mm_getcsr() & _MM_ROUND_MASK) 947193326Sed 948193326Sed#define _MM_SET_EXCEPTION_MASK(x) (_mm_setcsr((_mm_getcsr() & ~_MM_MASK_MASK) | (x))) 949193326Sed#define _MM_SET_EXCEPTION_STATE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_EXCEPT_MASK) | (x))) 950193326Sed#define _MM_SET_FLUSH_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | (x))) 951193326Sed#define _MM_SET_ROUNDING_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_ROUND_MASK) | (x))) 952193326Sed 953193326Sed#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ 954193326Seddo { \ 955193326Sed __m128 tmp3, tmp2, tmp1, tmp0; \ 956193326Sed tmp0 = _mm_unpacklo_ps((row0), (row1)); \ 957193326Sed tmp2 = _mm_unpacklo_ps((row2), (row3)); \ 958193326Sed tmp1 = _mm_unpackhi_ps((row0), (row1)); \ 959193326Sed tmp3 = _mm_unpackhi_ps((row2), (row3)); \ 960193326Sed (row0) = _mm_movelh_ps(tmp0, tmp2); \ 961193326Sed (row1) = _mm_movehl_ps(tmp2, tmp0); \ 962193326Sed (row2) = _mm_movelh_ps(tmp1, tmp3); \ 963203955Srdivacky (row3) = _mm_movehl_ps(tmp3, tmp1); \ 964193326Sed} while (0) 965193326Sed 966212904Sdim/* Aliases for compatibility. */ 967212904Sdim#define _m_pextrw _mm_extract_pi16 968212904Sdim#define _m_pinsrw _mm_insert_pi16 969212904Sdim#define _m_pmaxsw _mm_max_pi16 970212904Sdim#define _m_pmaxub _mm_max_pu8 971212904Sdim#define _m_pminsw _mm_min_pi16 972212904Sdim#define _m_pminub _mm_min_pu8 973212904Sdim#define _m_pmovmskb _mm_movemask_pi8 974212904Sdim#define _m_pmulhuw _mm_mulhi_pu16 975212904Sdim#define _m_pshufw _mm_shuffle_pi16 976212904Sdim#define _m_maskmovq _mm_maskmove_si64 977212904Sdim#define _m_pavgb _mm_avg_pu8 978212904Sdim#define _m_pavgw _mm_avg_pu16 979212904Sdim#define _m_psadbw _mm_sad_pu8 980212904Sdim#define _m_ _mm_ 981212904Sdim#define _m_ _mm_ 982212904Sdim 983194179Sed/* Ugly hack for backwards-compatibility (compatible with gcc) */ 984194179Sed#ifdef __SSE2__ 985193326Sed#include <emmintrin.h> 986194179Sed#endif 987193326Sed 988193326Sed#endif /* __SSE__ */ 989193326Sed 990193326Sed#endif /* __XMMINTRIN_H */ 991