tmmintrin.h revision 249423
1193326Sed/*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------===
2193326Sed *
3193326Sed * Permission is hereby granted, free of charge, to any person obtaining a copy
4193326Sed * of this software and associated documentation files (the "Software"), to deal
5193326Sed * in the Software without restriction, including without limitation the rights
6193326Sed * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7193326Sed * copies of the Software, and to permit persons to whom the Software is
8193326Sed * furnished to do so, subject to the following conditions:
9193326Sed *
10193326Sed * The above copyright notice and this permission notice shall be included in
11193326Sed * all copies or substantial portions of the Software.
12193326Sed *
13193326Sed * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14193326Sed * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15193326Sed * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16193326Sed * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17193326Sed * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18193326Sed * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19193326Sed * THE SOFTWARE.
20193326Sed *
21193326Sed *===-----------------------------------------------------------------------===
22193326Sed */
23193326Sed
24193326Sed#ifndef __TMMINTRIN_H
25193326Sed#define __TMMINTRIN_H
26193326Sed
27193326Sed#ifndef __SSSE3__
28193326Sed#error "SSSE3 instruction set not enabled"
29193326Sed#else
30193326Sed
31193326Sed#include <pmmintrin.h>
32193326Sed
33206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
34249423Sdim_mm_abs_pi8(__m64 __a)
35193326Sed{
36249423Sdim    return (__m64)__builtin_ia32_pabsb((__v8qi)__a);
37193326Sed}
38193326Sed
39206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
40249423Sdim_mm_abs_epi8(__m128i __a)
41193326Sed{
42249423Sdim    return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a);
43193326Sed}
44193326Sed
45206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
46249423Sdim_mm_abs_pi16(__m64 __a)
47193326Sed{
48249423Sdim    return (__m64)__builtin_ia32_pabsw((__v4hi)__a);
49193326Sed}
50193326Sed
51206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
52249423Sdim_mm_abs_epi16(__m128i __a)
53193326Sed{
54249423Sdim    return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a);
55193326Sed}
56193326Sed
57206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
58249423Sdim_mm_abs_pi32(__m64 __a)
59193326Sed{
60249423Sdim    return (__m64)__builtin_ia32_pabsd((__v2si)__a);
61193326Sed}
62193326Sed
63206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
64249423Sdim_mm_abs_epi32(__m128i __a)
65193326Sed{
66249423Sdim    return (__m128i)__builtin_ia32_pabsd128((__v4si)__a);
67193326Sed}
68193326Sed
69234353Sdim#define _mm_alignr_epi8(a, b, n) __extension__ ({ \
70234353Sdim  __m128i __a = (a); \
71234353Sdim  __m128i __b = (b); \
72234353Sdim  (__m128i)__builtin_ia32_palignr128((__v16qi)__a, (__v16qi)__b, (n)); })
73193326Sed
74234353Sdim#define _mm_alignr_pi8(a, b, n) __extension__ ({ \
75234353Sdim  __m64 __a = (a); \
76234353Sdim  __m64 __b = (b); \
77234353Sdim  (__m64)__builtin_ia32_palignr((__v8qi)__a, (__v8qi)__b, (n)); })
78234353Sdim
79206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
80249423Sdim_mm_hadd_epi16(__m128i __a, __m128i __b)
81193326Sed{
82249423Sdim    return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
83193326Sed}
84193326Sed
85206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
86249423Sdim_mm_hadd_epi32(__m128i __a, __m128i __b)
87193326Sed{
88249423Sdim    return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
89193326Sed}
90193326Sed
91206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
92249423Sdim_mm_hadd_pi16(__m64 __a, __m64 __b)
93193326Sed{
94249423Sdim    return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);
95193326Sed}
96193326Sed
97206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
98249423Sdim_mm_hadd_pi32(__m64 __a, __m64 __b)
99193326Sed{
100249423Sdim    return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);
101193326Sed}
102193326Sed
103206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
104249423Sdim_mm_hadds_epi16(__m128i __a, __m128i __b)
105193326Sed{
106249423Sdim    return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
107193326Sed}
108193326Sed
109206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
110249423Sdim_mm_hadds_pi16(__m64 __a, __m64 __b)
111193326Sed{
112249423Sdim    return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);
113193326Sed}
114193326Sed
115206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
116249423Sdim_mm_hsub_epi16(__m128i __a, __m128i __b)
117193326Sed{
118249423Sdim    return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
119193326Sed}
120193326Sed
121206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
122249423Sdim_mm_hsub_epi32(__m128i __a, __m128i __b)
123193326Sed{
124249423Sdim    return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
125193326Sed}
126193326Sed
127206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
128249423Sdim_mm_hsub_pi16(__m64 __a, __m64 __b)
129193326Sed{
130249423Sdim    return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);
131193326Sed}
132193326Sed
133206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
134249423Sdim_mm_hsub_pi32(__m64 __a, __m64 __b)
135193326Sed{
136249423Sdim    return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);
137193326Sed}
138193326Sed
139206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
140249423Sdim_mm_hsubs_epi16(__m128i __a, __m128i __b)
141193326Sed{
142249423Sdim    return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
143193326Sed}
144193326Sed
145206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
146249423Sdim_mm_hsubs_pi16(__m64 __a, __m64 __b)
147193326Sed{
148249423Sdim    return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);
149193326Sed}
150193326Sed
151206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
152249423Sdim_mm_maddubs_epi16(__m128i __a, __m128i __b)
153193326Sed{
154249423Sdim    return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
155193326Sed}
156193326Sed
157206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
158249423Sdim_mm_maddubs_pi16(__m64 __a, __m64 __b)
159193326Sed{
160249423Sdim    return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);
161193326Sed}
162193326Sed
163206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
164249423Sdim_mm_mulhrs_epi16(__m128i __a, __m128i __b)
165193326Sed{
166249423Sdim    return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
167193326Sed}
168193326Sed
169206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
170249423Sdim_mm_mulhrs_pi16(__m64 __a, __m64 __b)
171193326Sed{
172249423Sdim    return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);
173193326Sed}
174193326Sed
175206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
176249423Sdim_mm_shuffle_epi8(__m128i __a, __m128i __b)
177193326Sed{
178249423Sdim    return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
179193326Sed}
180193326Sed
181206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
182249423Sdim_mm_shuffle_pi8(__m64 __a, __m64 __b)
183193326Sed{
184249423Sdim    return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);
185193326Sed}
186193326Sed
187206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
188249423Sdim_mm_sign_epi8(__m128i __a, __m128i __b)
189193326Sed{
190249423Sdim    return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
191193326Sed}
192193326Sed
193206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
194249423Sdim_mm_sign_epi16(__m128i __a, __m128i __b)
195193326Sed{
196249423Sdim    return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
197193326Sed}
198193326Sed
199206084Srdivackystatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
200249423Sdim_mm_sign_epi32(__m128i __a, __m128i __b)
201193326Sed{
202249423Sdim    return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
203193326Sed}
204193326Sed
205206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
206249423Sdim_mm_sign_pi8(__m64 __a, __m64 __b)
207193326Sed{
208249423Sdim    return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);
209193326Sed}
210193326Sed
211206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
212249423Sdim_mm_sign_pi16(__m64 __a, __m64 __b)
213193326Sed{
214249423Sdim    return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);
215193326Sed}
216193326Sed
217206084Srdivackystatic __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
218249423Sdim_mm_sign_pi32(__m64 __a, __m64 __b)
219193326Sed{
220249423Sdim    return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b);
221193326Sed}
222193326Sed
223193326Sed#endif /* __SSSE3__ */
224193326Sed
225193326Sed#endif /* __TMMINTRIN_H */
226