1/* Copyright (C) 2006, 2007 Free Software Foundation, Inc.
2
3   This file is part of GCC.
4
5   GCC is free software; you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation; either version 2, or (at your option)
8   any later version.
9
10   GCC is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14
15   You should have received a copy of the GNU General Public License
16   along with GCC; see the file COPYING.  If not, write to
17   the Free Software Foundation, 59 Temple Place - Suite 330,
18   Boston, MA 02111-1307, USA.  */
19
20/* As a special exception, if you include this header file into source
21   files compiled by GCC, this header file does not by itself cause
22   the resulting executable to be covered by the GNU General Public
23   License.  This exception does not however invalidate any other
24   reasons why the executable file might be covered by the GNU General
25   Public License.  */
26
27/* Implemented from the specification included in the Intel C++ Compiler
28   User Guide and Reference, version 9.1.  */
29
30#ifndef _TMMINTRIN_H_INCLUDED
31#define _TMMINTRIN_H_INCLUDED
32
33#ifndef __SSSE3__
34# error "SSSE3 instruction set not enabled"
35#else
36
37/* We need definitions from the SSE3, SSE2 and SSE header files*/
38#include <pmmintrin.h>
39
40static __inline __m128i __attribute__((__always_inline__))
41_mm_hadd_epi16 (__m128i __X, __m128i __Y)
42{
43  return (__m128i) __builtin_ia32_phaddw128 ((__v8hi)__X, (__v8hi)__Y);
44}
45
46static __inline __m128i __attribute__((__always_inline__))
47_mm_hadd_epi32 (__m128i __X, __m128i __Y)
48{
49  return (__m128i) __builtin_ia32_phaddd128 ((__v4si)__X, (__v4si)__Y);
50}
51
52static __inline __m128i __attribute__((__always_inline__))
53_mm_hadds_epi16 (__m128i __X, __m128i __Y)
54{
55  return (__m128i) __builtin_ia32_phaddsw128 ((__v8hi)__X, (__v8hi)__Y);
56}
57
58static __inline __m64 __attribute__((__always_inline__))
59_mm_hadd_pi16 (__m64 __X, __m64 __Y)
60{
61  return (__m64) __builtin_ia32_phaddw ((__v4hi)__X, (__v4hi)__Y);
62}
63
64static __inline __m64 __attribute__((__always_inline__))
65_mm_hadd_pi32 (__m64 __X, __m64 __Y)
66{
67  return (__m64) __builtin_ia32_phaddd ((__v2si)__X, (__v2si)__Y);
68}
69
70static __inline __m64 __attribute__((__always_inline__))
71_mm_hadds_pi16 (__m64 __X, __m64 __Y)
72{
73  return (__m64) __builtin_ia32_phaddsw ((__v4hi)__X, (__v4hi)__Y);
74}
75
76static __inline __m128i __attribute__((__always_inline__))
77_mm_hsub_epi16 (__m128i __X, __m128i __Y)
78{
79  return (__m128i) __builtin_ia32_phsubw128 ((__v8hi)__X, (__v8hi)__Y);
80}
81
82static __inline __m128i __attribute__((__always_inline__))
83_mm_hsub_epi32 (__m128i __X, __m128i __Y)
84{
85  return (__m128i) __builtin_ia32_phsubd128 ((__v4si)__X, (__v4si)__Y);
86}
87
88static __inline __m128i __attribute__((__always_inline__))
89_mm_hsubs_epi16 (__m128i __X, __m128i __Y)
90{
91  return (__m128i) __builtin_ia32_phsubsw128 ((__v8hi)__X, (__v8hi)__Y);
92}
93
94static __inline __m64 __attribute__((__always_inline__))
95_mm_hsub_pi16 (__m64 __X, __m64 __Y)
96{
97  return (__m64) __builtin_ia32_phsubw ((__v4hi)__X, (__v4hi)__Y);
98}
99
100static __inline __m64 __attribute__((__always_inline__))
101_mm_hsub_pi32 (__m64 __X, __m64 __Y)
102{
103  return (__m64) __builtin_ia32_phsubd ((__v2si)__X, (__v2si)__Y);
104}
105
106static __inline __m64 __attribute__((__always_inline__))
107_mm_hsubs_pi16 (__m64 __X, __m64 __Y)
108{
109  return (__m64) __builtin_ia32_phsubsw ((__v4hi)__X, (__v4hi)__Y);
110}
111
112static __inline __m128i __attribute__((__always_inline__))
113_mm_maddubs_epi16 (__m128i __X, __m128i __Y)
114{
115  return (__m128i) __builtin_ia32_pmaddubsw128 ((__v16qi)__X, (__v16qi)__Y);
116}
117
118static __inline __m64 __attribute__((__always_inline__))
119_mm_maddubs_pi16 (__m64 __X, __m64 __Y)
120{
121  return (__m64) __builtin_ia32_pmaddubsw ((__v8qi)__X, (__v8qi)__Y);
122}
123
124static __inline __m128i __attribute__((__always_inline__))
125_mm_mulhrs_epi16 (__m128i __X, __m128i __Y)
126{
127  return (__m128i) __builtin_ia32_pmulhrsw128 ((__v8hi)__X, (__v8hi)__Y);
128}
129
130static __inline __m64 __attribute__((__always_inline__))
131_mm_mulhrs_pi16 (__m64 __X, __m64 __Y)
132{
133  return (__m64) __builtin_ia32_pmulhrsw ((__v4hi)__X, (__v4hi)__Y);
134}
135
136static __inline __m128i __attribute__((__always_inline__))
137_mm_shuffle_epi8 (__m128i __X, __m128i __Y)
138{
139  return (__m128i) __builtin_ia32_pshufb128 ((__v16qi)__X, (__v16qi)__Y);
140}
141
142static __inline __m64 __attribute__((__always_inline__))
143_mm_shuffle_pi8 (__m64 __X, __m64 __Y)
144{
145  return (__m64) __builtin_ia32_pshufb ((__v8qi)__X, (__v8qi)__Y);
146}
147
148static __inline __m128i __attribute__((__always_inline__))
149_mm_sign_epi8 (__m128i __X, __m128i __Y)
150{
151  return (__m128i) __builtin_ia32_psignb128 ((__v16qi)__X, (__v16qi)__Y);
152}
153
154static __inline __m128i __attribute__((__always_inline__))
155_mm_sign_epi16 (__m128i __X, __m128i __Y)
156{
157  return (__m128i) __builtin_ia32_psignw128 ((__v8hi)__X, (__v8hi)__Y);
158}
159
160static __inline __m128i __attribute__((__always_inline__))
161_mm_sign_epi32 (__m128i __X, __m128i __Y)
162{
163  return (__m128i) __builtin_ia32_psignd128 ((__v4si)__X, (__v4si)__Y);
164}
165
166static __inline __m64 __attribute__((__always_inline__))
167_mm_sign_pi8 (__m64 __X, __m64 __Y)
168{
169  return (__m64) __builtin_ia32_psignb ((__v8qi)__X, (__v8qi)__Y);
170}
171
172static __inline __m64 __attribute__((__always_inline__))
173_mm_sign_pi16 (__m64 __X, __m64 __Y)
174{
175  return (__m64) __builtin_ia32_psignw ((__v4hi)__X, (__v4hi)__Y);
176}
177
178static __inline __m64 __attribute__((__always_inline__))
179_mm_sign_pi32 (__m64 __X, __m64 __Y)
180{
181  return (__m64) __builtin_ia32_psignd ((__v2si)__X, (__v2si)__Y);
182}
183
184#define _mm_alignr_epi8(__X, __Y, __N) \
185  ((__m128i)__builtin_ia32_palignr128 ((__v2di) __X, (__v2di) __Y, (__N) * 8))
186
187#define _mm_alignr_pi8(__X, __Y, __N) \
188  ((__m64)__builtin_ia32_palignr ((long long) (__X), (long long) (__Y), (__N) * 8))
189
190static __inline __m128i __attribute__((__always_inline__))
191_mm_abs_epi8 (__m128i __X)
192{
193  return (__m128i) __builtin_ia32_pabsb128 ((__v16qi)__X);
194}
195
196static __inline __m128i __attribute__((__always_inline__))
197_mm_abs_epi16 (__m128i __X)
198{
199  return (__m128i) __builtin_ia32_pabsw128 ((__v8hi)__X);
200}
201
202static __inline __m128i __attribute__((__always_inline__))
203_mm_abs_epi32 (__m128i __X)
204{
205  return (__m128i) __builtin_ia32_pabsd128 ((__v4si)__X);
206}
207
208static __inline __m64 __attribute__((__always_inline__))
209_mm_abs_pi8 (__m64 __X)
210{
211  return (__m64) __builtin_ia32_pabsb ((__v8qi)__X);
212}
213
214static __inline __m64 __attribute__((__always_inline__))
215_mm_abs_pi16 (__m64 __X)
216{
217  return (__m64) __builtin_ia32_pabsw ((__v4hi)__X);
218}
219
220static __inline __m64 __attribute__((__always_inline__))
221_mm_abs_pi32 (__m64 __X)
222{
223  return (__m64) __builtin_ia32_pabsd ((__v2si)__X);
224}
225
226#endif /* __SSSE3__ */
227
228#endif /* _TMMINTRIN_H_INCLUDED */
229