pmmintrin.h revision 193326
1193326Sed/*===---- pmmintrin.h - SSE3 intrinsics ------------------------------------=== 2193326Sed * 3193326Sed * Permission is hereby granted, free of charge, to any person obtaining a copy 4193326Sed * of this software and associated documentation files (the "Software"), to deal 5193326Sed * in the Software without restriction, including without limitation the rights 6193326Sed * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7193326Sed * copies of the Software, and to permit persons to whom the Software is 8193326Sed * furnished to do so, subject to the following conditions: 9193326Sed * 10193326Sed * The above copyright notice and this permission notice shall be included in 11193326Sed * all copies or substantial portions of the Software. 12193326Sed * 13193326Sed * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14193326Sed * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15193326Sed * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16193326Sed * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17193326Sed * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18193326Sed * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19193326Sed * THE SOFTWARE. 20193326Sed * 21193326Sed *===-----------------------------------------------------------------------=== 22193326Sed */ 23193326Sed 24193326Sed#ifndef __PMMINTRIN_H 25193326Sed#define __PMMINTRIN_H 26193326Sed 27193326Sed#ifndef __SSE3__ 28193326Sed#error "SSE3 instruction set not enabled" 29193326Sed#else 30193326Sed 31193326Sed#include <emmintrin.h> 32193326Sed 33193326Sedstatic inline __m128i __attribute__((__always_inline__, __nodebug__)) 34193326Sed_mm_lddqu_si128(__m128i const *p) 35193326Sed{ 36193326Sed return (__m128i)__builtin_ia32_lddqu((char const *)p); 37193326Sed} 38193326Sed 39193326Sedstatic inline __m128 __attribute__((__always_inline__, __nodebug__)) 40193326Sed_mm_addsub_ps(__m128 a, __m128 b) 41193326Sed{ 42193326Sed return __builtin_ia32_addsubps(a, b); 43193326Sed} 44193326Sed 45193326Sedstatic inline __m128 __attribute__((__always_inline__, __nodebug__)) 46193326Sed_mm_hadd_ps(__m128 a, __m128 b) 47193326Sed{ 48193326Sed return __builtin_ia32_haddps(a, b); 49193326Sed} 50193326Sed 51193326Sedstatic inline __m128 __attribute__((__always_inline__, __nodebug__)) 52193326Sed_mm_hsub_ps(__m128 a, __m128 b) 53193326Sed{ 54193326Sed return __builtin_ia32_hsubps(a, b); 55193326Sed} 56193326Sed 57193326Sedstatic inline __m128 __attribute__((__always_inline__, __nodebug__)) 58193326Sed_mm_movehdup_ps(__m128 a) 59193326Sed{ 60193326Sed return __builtin_shufflevector(a, a, 1, 1, 3, 3); 61193326Sed} 62193326Sed 63193326Sedstatic inline __m128 __attribute__((__always_inline__, __nodebug__)) 64193326Sed_mm_moveldup_ps(__m128 a) 65193326Sed{ 66193326Sed return __builtin_shufflevector(a, a, 0, 0, 2, 2); 67193326Sed} 68193326Sed 69193326Sedstatic inline __m128d __attribute__((__always_inline__, __nodebug__)) 70193326Sed_mm_addsub_pd(__m128d a, __m128d b) 71193326Sed{ 72193326Sed return __builtin_ia32_addsubpd(a, b); 73193326Sed} 74193326Sed 75193326Sedstatic inline __m128d __attribute__((__always_inline__, __nodebug__)) 76193326Sed_mm_hadd_pd(__m128d a, __m128d b) 77193326Sed{ 78193326Sed return __builtin_ia32_haddpd(a, b); 79193326Sed} 80193326Sed 81193326Sedstatic inline __m128d __attribute__((__always_inline__, __nodebug__)) 82193326Sed_mm_hsub_pd(__m128d a, __m128d b) 83193326Sed{ 84193326Sed return __builtin_ia32_hsubpd(a, b); 85193326Sed} 86193326Sed 87193326Sedstatic inline __m128d __attribute__((__always_inline__, __nodebug__)) 88193326Sed_mm_loaddup_pd(double const *dp) 89193326Sed{ 90193326Sed return (__m128d){ *dp, *dp }; 91193326Sed} 92193326Sed 93193326Sedstatic inline __m128d __attribute__((__always_inline__, __nodebug__)) 94193326Sed_mm_movedup_pd(__m128d a) 95193326Sed{ 96193326Sed return __builtin_shufflevector(a, a, 0, 0); 97193326Sed} 98193326Sed 99193326Sed#define _MM_DENORMALS_ZERO_ON (0x0040) 100193326Sed#define _MM_DENORMALS_ZERO_OFF (0x0000) 101193326Sed 102193326Sed#define _MM_DENORMALS_ZERO_MASK (0x0040) 103193326Sed 104193326Sed#define _MM_GET_DENORMALS_ZERO_MODE() (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK) 105193326Sed#define _MM_SET_DENORMALS_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x))) 106193326Sed 107193326Sedstatic inline void __attribute__((__always_inline__, __nodebug__)) 108193326Sed_mm_monitor(void const *p, unsigned extensions, unsigned hints) 109193326Sed{ 110193326Sed __builtin_ia32_monitor((void *)p, extensions, hints); 111193326Sed} 112193326Sed 113193326Sedstatic inline void __attribute__((__always_inline__, __nodebug__)) 114193326Sed_mm_mwait(unsigned extensions, unsigned hints) 115193326Sed{ 116193326Sed __builtin_ia32_mwait(extensions, hints); 117193326Sed} 118193326Sed 119193326Sed#endif /* __SSE3__ */ 120193326Sed 121193326Sed#endif /* __PMMINTRIN_H */ 122