tmmintrin.h revision 234353
1193326Sed/*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------===
2193326Sed *
3193326Sed * Permission is hereby granted, free of charge, to any person obtaining a copy
4193326Sed * of this software and associated documentation files (the "Software"), to deal
5193326Sed * in the Software without restriction, including without limitation the rights
6193326Sed * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7193326Sed * copies of the Software, and to permit persons to whom the Software is
8193326Sed * furnished to do so, subject to the following conditions:
9193326Sed *
10193326Sed * The above copyright notice and this permission notice shall be included in
11193326Sed * all copies or substantial portions of the Software.
12193326Sed *
13193326Sed * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14193326Sed * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15193326Sed * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16193326Sed * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17193326Sed * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18193326Sed * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19193326Sed * THE SOFTWARE.
20193326Sed *
21193326Sed *===-----------------------------------------------------------------------===
22193326Sed */
23193326Sed
24193326Sed#ifndef __TMMINTRIN_H
25193326Sed#define __TMMINTRIN_H
26193326Sed
27193326Sed#ifndef __SSSE3__
28193326Sed#error "SSSE3 instruction set not enabled"
29193326Sed#else
30193326Sed
31193326Sed#include <pmmintrin.h>
32193326Sed
33206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
34193326Sed_mm_abs_pi8(__m64 a)
35193326Sed{
36193326Sed    return (__m64)__builtin_ia32_pabsb((__v8qi)a);
37193326Sed}
38193326Sed
39206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
40193326Sed_mm_abs_epi8(__m128i a)
41193326Sed{
42193326Sed    return (__m128i)__builtin_ia32_pabsb128((__v16qi)a);
43193326Sed}
44193326Sed
45206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
46193326Sed_mm_abs_pi16(__m64 a)
47193326Sed{
48193326Sed    return (__m64)__builtin_ia32_pabsw((__v4hi)a);
49193326Sed}
50193326Sed
51206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
52193326Sed_mm_abs_epi16(__m128i a)
53193326Sed{
54193326Sed    return (__m128i)__builtin_ia32_pabsw128((__v8hi)a);
55193326Sed}
56193326Sed
57206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
58193326Sed_mm_abs_pi32(__m64 a)
59193326Sed{
60193326Sed    return (__m64)__builtin_ia32_pabsd((__v2si)a);
61193326Sed}
62193326Sed
63206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
64193326Sed_mm_abs_epi32(__m128i a)
65193326Sed{
66193326Sed    return (__m128i)__builtin_ia32_pabsd128((__v4si)a);
67193326Sed}
68193326Sed
69234353Sdim#define _mm_alignr_epi8(a, b, n) __extension__ ({ \
70234353Sdim  __m128i __a = (a); \
71234353Sdim  __m128i __b = (b); \
72234353Sdim  (__m128i)__builtin_ia32_palignr128((__v16qi)__a, (__v16qi)__b, (n)); })
73193326Sed
74234353Sdim#define _mm_alignr_pi8(a, b, n) __extension__ ({ \
75234353Sdim  __m64 __a = (a); \
76234353Sdim  __m64 __b = (b); \
77234353Sdim  (__m64)__builtin_ia32_palignr((__v8qi)__a, (__v8qi)__b, (n)); })
78234353Sdim
79206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
80193326Sed_mm_hadd_epi16(__m128i a, __m128i b)
81193326Sed{
82193326Sed    return (__m128i)__builtin_ia32_phaddw128((__v8hi)a, (__v8hi)b);
83193326Sed}
84193326Sed
85206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
86193326Sed_mm_hadd_epi32(__m128i a, __m128i b)
87193326Sed{
88193326Sed    return (__m128i)__builtin_ia32_phaddd128((__v4si)a, (__v4si)b);
89193326Sed}
90193326Sed
91206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
92193326Sed_mm_hadd_pi16(__m64 a, __m64 b)
93193326Sed{
94193326Sed    return (__m64)__builtin_ia32_phaddw((__v4hi)a, (__v4hi)b);
95193326Sed}
96193326Sed
97206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
98193326Sed_mm_hadd_pi32(__m64 a, __m64 b)
99193326Sed{
100193326Sed    return (__m64)__builtin_ia32_phaddd((__v2si)a, (__v2si)b);
101193326Sed}
102193326Sed
103206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
104193326Sed_mm_hadds_epi16(__m128i a, __m128i b)
105193326Sed{
106193326Sed    return (__m128i)__builtin_ia32_phaddsw128((__v8hi)a, (__v8hi)b);
107193326Sed}
108193326Sed
109206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
110193326Sed_mm_hadds_pi16(__m64 a, __m64 b)
111193326Sed{
112193326Sed    return (__m64)__builtin_ia32_phaddsw((__v4hi)a, (__v4hi)b);
113193326Sed}
114193326Sed
115206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
116193326Sed_mm_hsub_epi16(__m128i a, __m128i b)
117193326Sed{
118193326Sed    return (__m128i)__builtin_ia32_phsubw128((__v8hi)a, (__v8hi)b);
119193326Sed}
120193326Sed
121206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
122193326Sed_mm_hsub_epi32(__m128i a, __m128i b)
123193326Sed{
124193576Sed    return (__m128i)__builtin_ia32_phsubd128((__v4si)a, (__v4si)b);
125193326Sed}
126193326Sed
127206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
128193326Sed_mm_hsub_pi16(__m64 a, __m64 b)
129193326Sed{
130193576Sed    return (__m64)__builtin_ia32_phsubw((__v4hi)a, (__v4hi)b);
131193326Sed}
132193326Sed
133206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
134193326Sed_mm_hsub_pi32(__m64 a, __m64 b)
135193326Sed{
136193576Sed    return (__m64)__builtin_ia32_phsubd((__v2si)a, (__v2si)b);
137193326Sed}
138193326Sed
139206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
140193326Sed_mm_hsubs_epi16(__m128i a, __m128i b)
141193326Sed{
142193326Sed    return (__m128i)__builtin_ia32_phsubsw128((__v8hi)a, (__v8hi)b);
143193326Sed}
144193326Sed
145206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
146193326Sed_mm_hsubs_pi16(__m64 a, __m64 b)
147193326Sed{
148193326Sed    return (__m64)__builtin_ia32_phsubsw((__v4hi)a, (__v4hi)b);
149193326Sed}
150193326Sed
151206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
152193326Sed_mm_maddubs_epi16(__m128i a, __m128i b)
153193326Sed{
154193326Sed    return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)a, (__v16qi)b);
155193326Sed}
156193326Sed
157206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
158193326Sed_mm_maddubs_pi16(__m64 a, __m64 b)
159193326Sed{
160193326Sed    return (__m64)__builtin_ia32_pmaddubsw((__v8qi)a, (__v8qi)b);
161193326Sed}
162193326Sed
163206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
164193326Sed_mm_mulhrs_epi16(__m128i a, __m128i b)
165193326Sed{
166193326Sed    return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)a, (__v8hi)b);
167193326Sed}
168193326Sed
169206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
170193326Sed_mm_mulhrs_pi16(__m64 a, __m64 b)
171193326Sed{
172193326Sed    return (__m64)__builtin_ia32_pmulhrsw((__v4hi)a, (__v4hi)b);
173193326Sed}
174193326Sed
175206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
176193326Sed_mm_shuffle_epi8(__m128i a, __m128i b)
177193326Sed{
178193326Sed    return (__m128i)__builtin_ia32_pshufb128((__v16qi)a, (__v16qi)b);
179193326Sed}
180193326Sed
181206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
182193326Sed_mm_shuffle_pi8(__m64 a, __m64 b)
183193326Sed{
184193326Sed    return (__m64)__builtin_ia32_pshufb((__v8qi)a, (__v8qi)b);
185193326Sed}
186193326Sed
187206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
188193326Sed_mm_sign_epi8(__m128i a, __m128i b)
189193326Sed{
190193326Sed    return (__m128i)__builtin_ia32_psignb128((__v16qi)a, (__v16qi)b);
191193326Sed}
192193326Sed
193206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
194193326Sed_mm_sign_epi16(__m128i a, __m128i b)
195193326Sed{
196193326Sed    return (__m128i)__builtin_ia32_psignw128((__v8hi)a, (__v8hi)b);
197193326Sed}
198193326Sed
199206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
200193326Sed_mm_sign_epi32(__m128i a, __m128i b)
201193326Sed{
202193326Sed    return (__m128i)__builtin_ia32_psignd128((__v4si)a, (__v4si)b);
203193326Sed}
204193326Sed
205206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
206193326Sed_mm_sign_pi8(__m64 a, __m64 b)
207193326Sed{
208193326Sed    return (__m64)__builtin_ia32_psignb((__v8qi)a, (__v8qi)b);
209193326Sed}
210193326Sed
211206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
212193326Sed_mm_sign_pi16(__m64 a, __m64 b)
213193326Sed{
214193326Sed    return (__m64)__builtin_ia32_psignw((__v4hi)a, (__v4hi)b);
215193326Sed}
216193326Sed
217206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
218193326Sed_mm_sign_pi32(__m64 a, __m64 b)
219193326Sed{
220193326Sed    return (__m64)__builtin_ia32_psignd((__v2si)a, (__v2si)b);
221193326Sed}
222193326Sed
223193326Sed#endif /* __SSSE3__ */
224193326Sed
225193326Sed#endif /* __TMMINTRIN_H */
226