pmmintrin.h revision 193326
1193326Sed/*===---- pmmintrin.h - SSE3 intrinsics ------------------------------------===
2193326Sed *
3193326Sed * Permission is hereby granted, free of charge, to any person obtaining a copy
4193326Sed * of this software and associated documentation files (the "Software"), to deal
5193326Sed * in the Software without restriction, including without limitation the rights
6193326Sed * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7193326Sed * copies of the Software, and to permit persons to whom the Software is
8193326Sed * furnished to do so, subject to the following conditions:
9193326Sed *
10193326Sed * The above copyright notice and this permission notice shall be included in
11193326Sed * all copies or substantial portions of the Software.
12193326Sed *
13193326Sed * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14193326Sed * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15193326Sed * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16193326Sed * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17193326Sed * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18193326Sed * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19193326Sed * THE SOFTWARE.
20193326Sed *
21193326Sed *===-----------------------------------------------------------------------===
22193326Sed */
23193326Sed
24193326Sed#ifndef __PMMINTRIN_H
25193326Sed#define __PMMINTRIN_H
26193326Sed
27193326Sed#ifndef __SSE3__
28193326Sed#error "SSE3 instruction set not enabled"
29193326Sed#else
30193326Sed
31193326Sed#include <emmintrin.h>
32193326Sed
33193326Sedstatic inline __m128i __attribute__((__always_inline__, __nodebug__))
34193326Sed_mm_lddqu_si128(__m128i const *p)
35193326Sed{
36193326Sed  return (__m128i)__builtin_ia32_lddqu((char const *)p);
37193326Sed}
38193326Sed
39193326Sedstatic inline __m128 __attribute__((__always_inline__, __nodebug__))
40193326Sed_mm_addsub_ps(__m128 a, __m128 b)
41193326Sed{
42193326Sed  return __builtin_ia32_addsubps(a, b);
43193326Sed}
44193326Sed
45193326Sedstatic inline __m128 __attribute__((__always_inline__, __nodebug__))
46193326Sed_mm_hadd_ps(__m128 a, __m128 b)
47193326Sed{
48193326Sed  return __builtin_ia32_haddps(a, b);
49193326Sed}
50193326Sed
51193326Sedstatic inline __m128 __attribute__((__always_inline__, __nodebug__))
52193326Sed_mm_hsub_ps(__m128 a, __m128 b)
53193326Sed{
54193326Sed  return __builtin_ia32_hsubps(a, b);
55193326Sed}
56193326Sed
57193326Sedstatic inline __m128 __attribute__((__always_inline__, __nodebug__))
58193326Sed_mm_movehdup_ps(__m128 a)
59193326Sed{
60193326Sed  return __builtin_shufflevector(a, a, 1, 1, 3, 3);
61193326Sed}
62193326Sed
63193326Sedstatic inline __m128 __attribute__((__always_inline__, __nodebug__))
64193326Sed_mm_moveldup_ps(__m128 a)
65193326Sed{
66193326Sed  return __builtin_shufflevector(a, a, 0, 0, 2, 2);
67193326Sed}
68193326Sed
69193326Sedstatic inline __m128d __attribute__((__always_inline__, __nodebug__))
70193326Sed_mm_addsub_pd(__m128d a, __m128d b)
71193326Sed{
72193326Sed  return __builtin_ia32_addsubpd(a, b);
73193326Sed}
74193326Sed
75193326Sedstatic inline __m128d __attribute__((__always_inline__, __nodebug__))
76193326Sed_mm_hadd_pd(__m128d a, __m128d b)
77193326Sed{
78193326Sed  return __builtin_ia32_haddpd(a, b);
79193326Sed}
80193326Sed
81193326Sedstatic inline __m128d __attribute__((__always_inline__, __nodebug__))
82193326Sed_mm_hsub_pd(__m128d a, __m128d b)
83193326Sed{
84193326Sed  return __builtin_ia32_hsubpd(a, b);
85193326Sed}
86193326Sed
87193326Sedstatic inline __m128d __attribute__((__always_inline__, __nodebug__))
88193326Sed_mm_loaddup_pd(double const *dp)
89193326Sed{
90193326Sed  return (__m128d){ *dp, *dp };
91193326Sed}
92193326Sed
93193326Sedstatic inline __m128d __attribute__((__always_inline__, __nodebug__))
94193326Sed_mm_movedup_pd(__m128d a)
95193326Sed{
96193326Sed  return __builtin_shufflevector(a, a, 0, 0);
97193326Sed}
98193326Sed
99193326Sed#define _MM_DENORMALS_ZERO_ON   (0x0040)
100193326Sed#define _MM_DENORMALS_ZERO_OFF  (0x0000)
101193326Sed
102193326Sed#define _MM_DENORMALS_ZERO_MASK (0x0040)
103193326Sed
104193326Sed#define _MM_GET_DENORMALS_ZERO_MODE() (_mm_getcsr() & _MM_DENORMALS_ZERO_MASK)
105193326Sed#define _MM_SET_DENORMALS_ZERO_MODE(x) (_mm_setcsr((_mm_getcsr() & ~_MM_DENORMALS_ZERO_MASK) | (x)))
106193326Sed
107193326Sedstatic inline void __attribute__((__always_inline__, __nodebug__))
108193326Sed_mm_monitor(void const *p, unsigned extensions, unsigned hints)
109193326Sed{
110193326Sed  __builtin_ia32_monitor((void *)p, extensions, hints);
111193326Sed}
112193326Sed
113193326Sedstatic inline void __attribute__((__always_inline__, __nodebug__))
114193326Sed_mm_mwait(unsigned extensions, unsigned hints)
115193326Sed{
116193326Sed  __builtin_ia32_mwait(extensions, hints);
117193326Sed}
118193326Sed
119193326Sed#endif /* __SSE3__ */
120193326Sed
121193326Sed#endif /* __PMMINTRIN_H */
122