Deleted Added
full compact
mmintrin.h (117395) mmintrin.h (122180)
1/* Copyright (C) 2002 Free Software Foundation, Inc.
1/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
2
3 This file is part of GNU CC.
4
5 GNU CC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
9

--- 10 unchanged lines hidden (view full) ---

20/* As a special exception, if you include this header file into source
21 files compiled by GCC, this header file does not by itself cause
22 the resulting executable to be covered by the GNU General Public
23 License. This exception does not however invalidate any other
24 reasons why the executable file might be covered by the GNU General
25 Public License. */
26
27/* Implemented from the specification included in the Intel C++ Compiler
2
3 This file is part of GNU CC.
4
5 GNU CC is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
9

--- 10 unchanged lines hidden (view full) ---

20/* As a special exception, if you include this header file into source
21 files compiled by GCC, this header file does not by itself cause
22 the resulting executable to be covered by the GNU General Public
23 License. This exception does not however invalidate any other
24 reasons why the executable file might be covered by the GNU General
25 Public License. */
26
27/* Implemented from the specification included in the Intel C++ Compiler
28 User Guide and Reference, version 5.0. */
28 User Guide and Reference, version 8.0. */
29
30#ifndef _MMINTRIN_H_INCLUDED
31#define _MMINTRIN_H_INCLUDED
32
33#ifndef __MMX__
34# error "MMX instruction set not enabled"
35#else
36/* The data type intended for user use. */

--- 6 unchanged lines hidden (view full) ---

43
44/* Empty the multimedia state. */
45static __inline void
46_mm_empty (void)
47{
48 __builtin_ia32_emms ();
49}
50
29
30#ifndef _MMINTRIN_H_INCLUDED
31#define _MMINTRIN_H_INCLUDED
32
33#ifndef __MMX__
34# error "MMX instruction set not enabled"
35#else
36/* The data type intended for user use. */

--- 6 unchanged lines hidden (view full) ---

43
44/* Empty the multimedia state. */
45static __inline void
46_mm_empty (void)
47{
48 __builtin_ia32_emms ();
49}
50
51static __inline void
52_m_empty (void)
53{
54 _mm_empty ();
55}
56
51/* Convert I to a __m64 object. The integer is zero-extended to 64-bits. */
52static __inline __m64
53_mm_cvtsi32_si64 (int __i)
54{
55 long long __tmp = (unsigned int)__i;
56 return (__m64) __tmp;
57}
58
57/* Convert I to a __m64 object. The integer is zero-extended to 64-bits. */
58static __inline __m64
59_mm_cvtsi32_si64 (int __i)
60{
61 long long __tmp = (unsigned int)__i;
62 return (__m64) __tmp;
63}
64
65static __inline __m64
66_m_from_int (int __i)
67{
68 return _mm_cvtsi32_si64 (__i);
69}
70
59#ifdef __x86_64__
60/* Convert I to a __m64 object. */
61static __inline __m64
62_mm_cvtsi64x_si64 (long long __i)
63{
64 return (__m64) __i;
65}
66

--- 8 unchanged lines hidden (view full) ---

75/* Convert the lower 32 bits of the __m64 object into an integer. */
76static __inline int
77_mm_cvtsi64_si32 (__m64 __i)
78{
79 long long __tmp = (long long)__i;
80 return __tmp;
81}
82
71#ifdef __x86_64__
72/* Convert I to a __m64 object. */
73static __inline __m64
74_mm_cvtsi64x_si64 (long long __i)
75{
76 return (__m64) __i;
77}
78

--- 8 unchanged lines hidden (view full) ---

87/* Convert the lower 32 bits of the __m64 object into an integer. */
88static __inline int
89_mm_cvtsi64_si32 (__m64 __i)
90{
91 long long __tmp = (long long)__i;
92 return __tmp;
93}
94
95static __inline int
96_m_to_int (__m64 __i)
97{
98 return _mm_cvtsi64_si32 (__i);
99}
100
83#ifdef __x86_64__
84/* Convert the lower 32 bits of the __m64 object into an integer. */
85static __inline long long
86_mm_cvtsi64_si64x (__m64 __i)
87{
88 return (long long)__i;
89}
90#endif
91
92/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
93 the result, and the four 16-bit values from M2 into the upper four 8-bit
94 values of the result, all with signed saturation. */
95static __inline __m64
96_mm_packs_pi16 (__m64 __m1, __m64 __m2)
97{
98 return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2);
99}
100
101#ifdef __x86_64__
102/* Convert the lower 32 bits of the __m64 object into an integer. */
103static __inline long long
104_mm_cvtsi64_si64x (__m64 __i)
105{
106 return (long long)__i;
107}
108#endif
109
110/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
111 the result, and the four 16-bit values from M2 into the upper four 8-bit
112 values of the result, all with signed saturation. */
113static __inline __m64
114_mm_packs_pi16 (__m64 __m1, __m64 __m2)
115{
116 return (__m64) __builtin_ia32_packsswb ((__v4hi)__m1, (__v4hi)__m2);
117}
118
119static __inline __m64
120_m_packsswb (__m64 __m1, __m64 __m2)
121{
122 return _mm_packs_pi16 (__m1, __m2);
123}
124
101/* Pack the two 32-bit values from M1 in to the lower two 16-bit values of
102 the result, and the two 32-bit values from M2 into the upper two 16-bit
103 values of the result, all with signed saturation. */
104static __inline __m64
105_mm_packs_pi32 (__m64 __m1, __m64 __m2)
106{
107 return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2);
108}
109
125/* Pack the two 32-bit values from M1 in to the lower two 16-bit values of
126 the result, and the two 32-bit values from M2 into the upper two 16-bit
127 values of the result, all with signed saturation. */
128static __inline __m64
129_mm_packs_pi32 (__m64 __m1, __m64 __m2)
130{
131 return (__m64) __builtin_ia32_packssdw ((__v2si)__m1, (__v2si)__m2);
132}
133
134static __inline __m64
135_m_packssdw (__m64 __m1, __m64 __m2)
136{
137 return _mm_packs_pi32 (__m1, __m2);
138}
139
110/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
111 the result, and the four 16-bit values from M2 into the upper four 8-bit
112 values of the result, all with unsigned saturation. */
113static __inline __m64
114_mm_packs_pu16 (__m64 __m1, __m64 __m2)
115{
116 return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2);
117}
118
140/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
141 the result, and the four 16-bit values from M2 into the upper four 8-bit
142 values of the result, all with unsigned saturation. */
143static __inline __m64
144_mm_packs_pu16 (__m64 __m1, __m64 __m2)
145{
146 return (__m64) __builtin_ia32_packuswb ((__v4hi)__m1, (__v4hi)__m2);
147}
148
149static __inline __m64
150_m_packuswb (__m64 __m1, __m64 __m2)
151{
152 return _mm_packs_pu16 (__m1, __m2);
153}
154
119/* Interleave the four 8-bit values from the high half of M1 with the four
120 8-bit values from the high half of M2. */
121static __inline __m64
122_mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
123{
124 return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2);
125}
126
155/* Interleave the four 8-bit values from the high half of M1 with the four
156 8-bit values from the high half of M2. */
157static __inline __m64
158_mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
159{
160 return (__m64) __builtin_ia32_punpckhbw ((__v8qi)__m1, (__v8qi)__m2);
161}
162
163static __inline __m64
164_m_punpckhbw (__m64 __m1, __m64 __m2)
165{
166 return _mm_unpackhi_pi8 (__m1, __m2);
167}
168
127/* Interleave the two 16-bit values from the high half of M1 with the two
128 16-bit values from the high half of M2. */
129static __inline __m64
130_mm_unpackhi_pi16 (__m64 __m1, __m64 __m2)
131{
132 return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2);
133}
134
169/* Interleave the two 16-bit values from the high half of M1 with the two
170 16-bit values from the high half of M2. */
171static __inline __m64
172_mm_unpackhi_pi16 (__m64 __m1, __m64 __m2)
173{
174 return (__m64) __builtin_ia32_punpckhwd ((__v4hi)__m1, (__v4hi)__m2);
175}
176
177static __inline __m64
178_m_punpckhwd (__m64 __m1, __m64 __m2)
179{
180 return _mm_unpackhi_pi16 (__m1, __m2);
181}
182
135/* Interleave the 32-bit value from the high half of M1 with the 32-bit
136 value from the high half of M2. */
137static __inline __m64
138_mm_unpackhi_pi32 (__m64 __m1, __m64 __m2)
139{
140 return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2);
141}
142
183/* Interleave the 32-bit value from the high half of M1 with the 32-bit
184 value from the high half of M2. */
185static __inline __m64
186_mm_unpackhi_pi32 (__m64 __m1, __m64 __m2)
187{
188 return (__m64) __builtin_ia32_punpckhdq ((__v2si)__m1, (__v2si)__m2);
189}
190
191static __inline __m64
192_m_punpckhdq (__m64 __m1, __m64 __m2)
193{
194 return _mm_unpackhi_pi32 (__m1, __m2);
195}
196
143/* Interleave the four 8-bit values from the low half of M1 with the four
144 8-bit values from the low half of M2. */
145static __inline __m64
146_mm_unpacklo_pi8 (__m64 __m1, __m64 __m2)
147{
148 return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2);
149}
150
197/* Interleave the four 8-bit values from the low half of M1 with the four
198 8-bit values from the low half of M2. */
199static __inline __m64
200_mm_unpacklo_pi8 (__m64 __m1, __m64 __m2)
201{
202 return (__m64) __builtin_ia32_punpcklbw ((__v8qi)__m1, (__v8qi)__m2);
203}
204
205static __inline __m64
206_m_punpcklbw (__m64 __m1, __m64 __m2)
207{
208 return _mm_unpacklo_pi8 (__m1, __m2);
209}
210
151/* Interleave the two 16-bit values from the low half of M1 with the two
152 16-bit values from the low half of M2. */
153static __inline __m64
154_mm_unpacklo_pi16 (__m64 __m1, __m64 __m2)
155{
156 return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2);
157}
158
211/* Interleave the two 16-bit values from the low half of M1 with the two
212 16-bit values from the low half of M2. */
213static __inline __m64
214_mm_unpacklo_pi16 (__m64 __m1, __m64 __m2)
215{
216 return (__m64) __builtin_ia32_punpcklwd ((__v4hi)__m1, (__v4hi)__m2);
217}
218
219static __inline __m64
220_m_punpcklwd (__m64 __m1, __m64 __m2)
221{
222 return _mm_unpacklo_pi16 (__m1, __m2);
223}
224
159/* Interleave the 32-bit value from the low half of M1 with the 32-bit
160 value from the low half of M2. */
161static __inline __m64
162_mm_unpacklo_pi32 (__m64 __m1, __m64 __m2)
163{
164 return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2);
165}
166
225/* Interleave the 32-bit value from the low half of M1 with the 32-bit
226 value from the low half of M2. */
227static __inline __m64
228_mm_unpacklo_pi32 (__m64 __m1, __m64 __m2)
229{
230 return (__m64) __builtin_ia32_punpckldq ((__v2si)__m1, (__v2si)__m2);
231}
232
233static __inline __m64
234_m_punpckldq (__m64 __m1, __m64 __m2)
235{
236 return _mm_unpacklo_pi32 (__m1, __m2);
237}
238
167/* Add the 8-bit values in M1 to the 8-bit values in M2. */
168static __inline __m64
169_mm_add_pi8 (__m64 __m1, __m64 __m2)
170{
171 return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2);
172}
173
239/* Add the 8-bit values in M1 to the 8-bit values in M2. */
240static __inline __m64
241_mm_add_pi8 (__m64 __m1, __m64 __m2)
242{
243 return (__m64) __builtin_ia32_paddb ((__v8qi)__m1, (__v8qi)__m2);
244}
245
246static __inline __m64
247_m_paddb (__m64 __m1, __m64 __m2)
248{
249 return _mm_add_pi8 (__m1, __m2);
250}
251
174/* Add the 16-bit values in M1 to the 16-bit values in M2. */
175static __inline __m64
176_mm_add_pi16 (__m64 __m1, __m64 __m2)
177{
178 return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2);
179}
180
252/* Add the 16-bit values in M1 to the 16-bit values in M2. */
253static __inline __m64
254_mm_add_pi16 (__m64 __m1, __m64 __m2)
255{
256 return (__m64) __builtin_ia32_paddw ((__v4hi)__m1, (__v4hi)__m2);
257}
258
259static __inline __m64
260_m_paddw (__m64 __m1, __m64 __m2)
261{
262 return _mm_add_pi16 (__m1, __m2);
263}
264
181/* Add the 32-bit values in M1 to the 32-bit values in M2. */
182static __inline __m64
183_mm_add_pi32 (__m64 __m1, __m64 __m2)
184{
185 return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2);
186}
187
265/* Add the 32-bit values in M1 to the 32-bit values in M2. */
266static __inline __m64
267_mm_add_pi32 (__m64 __m1, __m64 __m2)
268{
269 return (__m64) __builtin_ia32_paddd ((__v2si)__m1, (__v2si)__m2);
270}
271
272static __inline __m64
273_m_paddd (__m64 __m1, __m64 __m2)
274{
275 return _mm_add_pi32 (__m1, __m2);
276}
277
188/* Add the 64-bit values in M1 to the 64-bit values in M2. */
189static __inline __m64
190_mm_add_si64 (__m64 __m1, __m64 __m2)
191{
192 return (__m64) __builtin_ia32_paddq ((long long)__m1, (long long)__m2);
193}
194
195/* Add the 8-bit values in M1 to the 8-bit values in M2 using signed
196 saturated arithmetic. */
197static __inline __m64
198_mm_adds_pi8 (__m64 __m1, __m64 __m2)
199{
200 return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2);
201}
202
278/* Add the 64-bit values in M1 to the 64-bit values in M2. */
279static __inline __m64
280_mm_add_si64 (__m64 __m1, __m64 __m2)
281{
282 return (__m64) __builtin_ia32_paddq ((long long)__m1, (long long)__m2);
283}
284
285/* Add the 8-bit values in M1 to the 8-bit values in M2 using signed
286 saturated arithmetic. */
287static __inline __m64
288_mm_adds_pi8 (__m64 __m1, __m64 __m2)
289{
290 return (__m64) __builtin_ia32_paddsb ((__v8qi)__m1, (__v8qi)__m2);
291}
292
293static __inline __m64
294_m_paddsb (__m64 __m1, __m64 __m2)
295{
296 return _mm_adds_pi8 (__m1, __m2);
297}
298
203/* Add the 16-bit values in M1 to the 16-bit values in M2 using signed
204 saturated arithmetic. */
205static __inline __m64
206_mm_adds_pi16 (__m64 __m1, __m64 __m2)
207{
208 return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2);
209}
210
299/* Add the 16-bit values in M1 to the 16-bit values in M2 using signed
300 saturated arithmetic. */
301static __inline __m64
302_mm_adds_pi16 (__m64 __m1, __m64 __m2)
303{
304 return (__m64) __builtin_ia32_paddsw ((__v4hi)__m1, (__v4hi)__m2);
305}
306
307static __inline __m64
308_m_paddsw (__m64 __m1, __m64 __m2)
309{
310 return _mm_adds_pi16 (__m1, __m2);
311}
312
211/* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned
212 saturated arithmetic. */
213static __inline __m64
214_mm_adds_pu8 (__m64 __m1, __m64 __m2)
215{
216 return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2);
217}
218
313/* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned
314 saturated arithmetic. */
315static __inline __m64
316_mm_adds_pu8 (__m64 __m1, __m64 __m2)
317{
318 return (__m64) __builtin_ia32_paddusb ((__v8qi)__m1, (__v8qi)__m2);
319}
320
321static __inline __m64
322_m_paddusb (__m64 __m1, __m64 __m2)
323{
324 return _mm_adds_pu8 (__m1, __m2);
325}
326
219/* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned
220 saturated arithmetic. */
221static __inline __m64
222_mm_adds_pu16 (__m64 __m1, __m64 __m2)
223{
224 return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2);
225}
226
327/* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned
328 saturated arithmetic. */
329static __inline __m64
330_mm_adds_pu16 (__m64 __m1, __m64 __m2)
331{
332 return (__m64) __builtin_ia32_paddusw ((__v4hi)__m1, (__v4hi)__m2);
333}
334
335static __inline __m64
336_m_paddusw (__m64 __m1, __m64 __m2)
337{
338 return _mm_adds_pu16 (__m1, __m2);
339}
340
227/* Subtract the 8-bit values in M2 from the 8-bit values in M1. */
228static __inline __m64
229_mm_sub_pi8 (__m64 __m1, __m64 __m2)
230{
231 return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2);
232}
233
341/* Subtract the 8-bit values in M2 from the 8-bit values in M1. */
342static __inline __m64
343_mm_sub_pi8 (__m64 __m1, __m64 __m2)
344{
345 return (__m64) __builtin_ia32_psubb ((__v8qi)__m1, (__v8qi)__m2);
346}
347
348static __inline __m64
349_m_psubb (__m64 __m1, __m64 __m2)
350{
351 return _mm_sub_pi8 (__m1, __m2);
352}
353
234/* Subtract the 16-bit values in M2 from the 16-bit values in M1. */
235static __inline __m64
236_mm_sub_pi16 (__m64 __m1, __m64 __m2)
237{
238 return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2);
239}
240
354/* Subtract the 16-bit values in M2 from the 16-bit values in M1. */
355static __inline __m64
356_mm_sub_pi16 (__m64 __m1, __m64 __m2)
357{
358 return (__m64) __builtin_ia32_psubw ((__v4hi)__m1, (__v4hi)__m2);
359}
360
361static __inline __m64
362_m_psubw (__m64 __m1, __m64 __m2)
363{
364 return _mm_sub_pi16 (__m1, __m2);
365}
366
241/* Subtract the 32-bit values in M2 from the 32-bit values in M1. */
242static __inline __m64
243_mm_sub_pi32 (__m64 __m1, __m64 __m2)
244{
245 return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2);
246}
247
367/* Subtract the 32-bit values in M2 from the 32-bit values in M1. */
368static __inline __m64
369_mm_sub_pi32 (__m64 __m1, __m64 __m2)
370{
371 return (__m64) __builtin_ia32_psubd ((__v2si)__m1, (__v2si)__m2);
372}
373
374static __inline __m64
375_m_psubd (__m64 __m1, __m64 __m2)
376{
377 return _mm_sub_pi32 (__m1, __m2);
378}
379
248/* Add the 64-bit values in M1 to the 64-bit values in M2. */
249static __inline __m64
250_mm_sub_si64 (__m64 __m1, __m64 __m2)
251{
252 return (__m64) __builtin_ia32_psubq ((long long)__m1, (long long)__m2);
253}
254
255/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed
256 saturating arithmetic. */
257static __inline __m64
258_mm_subs_pi8 (__m64 __m1, __m64 __m2)
259{
260 return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2);
261}
262
380/* Add the 64-bit values in M1 to the 64-bit values in M2. */
381static __inline __m64
382_mm_sub_si64 (__m64 __m1, __m64 __m2)
383{
384 return (__m64) __builtin_ia32_psubq ((long long)__m1, (long long)__m2);
385}
386
387/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed
388 saturating arithmetic. */
389static __inline __m64
390_mm_subs_pi8 (__m64 __m1, __m64 __m2)
391{
392 return (__m64) __builtin_ia32_psubsb ((__v8qi)__m1, (__v8qi)__m2);
393}
394
395static __inline __m64
396_m_psubsb (__m64 __m1, __m64 __m2)
397{
398 return _mm_subs_pi8 (__m1, __m2);
399}
400
263/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
264 signed saturating arithmetic. */
265static __inline __m64
266_mm_subs_pi16 (__m64 __m1, __m64 __m2)
267{
268 return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2);
269}
270
401/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
402 signed saturating arithmetic. */
403static __inline __m64
404_mm_subs_pi16 (__m64 __m1, __m64 __m2)
405{
406 return (__m64) __builtin_ia32_psubsw ((__v4hi)__m1, (__v4hi)__m2);
407}
408
409static __inline __m64
410_m_psubsw (__m64 __m1, __m64 __m2)
411{
412 return _mm_subs_pi16 (__m1, __m2);
413}
414
271/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using
272 unsigned saturating arithmetic. */
273static __inline __m64
274_mm_subs_pu8 (__m64 __m1, __m64 __m2)
275{
276 return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2);
277}
278
415/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using
416 unsigned saturating arithmetic. */
417static __inline __m64
418_mm_subs_pu8 (__m64 __m1, __m64 __m2)
419{
420 return (__m64) __builtin_ia32_psubusb ((__v8qi)__m1, (__v8qi)__m2);
421}
422
423static __inline __m64
424_m_psubusb (__m64 __m1, __m64 __m2)
425{
426 return _mm_subs_pu8 (__m1, __m2);
427}
428
279/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
280 unsigned saturating arithmetic. */
281static __inline __m64
282_mm_subs_pu16 (__m64 __m1, __m64 __m2)
283{
284 return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2);
285}
286
429/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
430 unsigned saturating arithmetic. */
431static __inline __m64
432_mm_subs_pu16 (__m64 __m1, __m64 __m2)
433{
434 return (__m64) __builtin_ia32_psubusw ((__v4hi)__m1, (__v4hi)__m2);
435}
436
437static __inline __m64
438_m_psubusw (__m64 __m1, __m64 __m2)
439{
440 return _mm_subs_pu16 (__m1, __m2);
441}
442
287/* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing
288 four 32-bit intermediate results, which are then summed by pairs to
289 produce two 32-bit results. */
290static __inline __m64
291_mm_madd_pi16 (__m64 __m1, __m64 __m2)
292{
293 return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2);
294}
295
443/* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing
444 four 32-bit intermediate results, which are then summed by pairs to
445 produce two 32-bit results. */
446static __inline __m64
447_mm_madd_pi16 (__m64 __m1, __m64 __m2)
448{
449 return (__m64) __builtin_ia32_pmaddwd ((__v4hi)__m1, (__v4hi)__m2);
450}
451
452static __inline __m64
453_m_pmaddwd (__m64 __m1, __m64 __m2)
454{
455 return _mm_madd_pi16 (__m1, __m2);
456}
457
296/* Multiply four signed 16-bit values in M1 by four signed 16-bit values in
297 M2 and produce the high 16 bits of the 32-bit results. */
298static __inline __m64
299_mm_mulhi_pi16 (__m64 __m1, __m64 __m2)
300{
301 return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2);
302}
303
458/* Multiply four signed 16-bit values in M1 by four signed 16-bit values in
459 M2 and produce the high 16 bits of the 32-bit results. */
460static __inline __m64
461_mm_mulhi_pi16 (__m64 __m1, __m64 __m2)
462{
463 return (__m64) __builtin_ia32_pmulhw ((__v4hi)__m1, (__v4hi)__m2);
464}
465
466static __inline __m64
467_m_pmulhw (__m64 __m1, __m64 __m2)
468{
469 return _mm_mulhi_pi16 (__m1, __m2);
470}
471
304/* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce
305 the low 16 bits of the results. */
306static __inline __m64
307_mm_mullo_pi16 (__m64 __m1, __m64 __m2)
308{
309 return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2);
310}
311
472/* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce
473 the low 16 bits of the results. */
474static __inline __m64
475_mm_mullo_pi16 (__m64 __m1, __m64 __m2)
476{
477 return (__m64) __builtin_ia32_pmullw ((__v4hi)__m1, (__v4hi)__m2);
478}
479
480static __inline __m64
481_m_pmullw (__m64 __m1, __m64 __m2)
482{
483 return _mm_mullo_pi16 (__m1, __m2);
484}
485
312/* Shift four 16-bit values in M left by COUNT. */
313static __inline __m64
314_mm_sll_pi16 (__m64 __m, __m64 __count)
315{
316 return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (long long)__count);
317}
318
319static __inline __m64
486/* Shift four 16-bit values in M left by COUNT. */
487static __inline __m64
488_mm_sll_pi16 (__m64 __m, __m64 __count)
489{
490 return (__m64) __builtin_ia32_psllw ((__v4hi)__m, (long long)__count);
491}
492
493static __inline __m64
494_m_psllw (__m64 __m, __m64 __count)
495{
496 return _mm_sll_pi16 (__m, __count);
497}
498
499static __inline __m64
320_mm_slli_pi16 (__m64 __m, int __count)
321{
322 return (__m64) __builtin_ia32_psllw ((__v4hi)__m, __count);
323}
324
500_mm_slli_pi16 (__m64 __m, int __count)
501{
502 return (__m64) __builtin_ia32_psllw ((__v4hi)__m, __count);
503}
504
505static __inline __m64
506_m_psllwi (__m64 __m, int __count)
507{
508 return _mm_slli_pi16 (__m, __count);
509}
510
325/* Shift two 32-bit values in M left by COUNT. */
326static __inline __m64
327_mm_sll_pi32 (__m64 __m, __m64 __count)
328{
329 return (__m64) __builtin_ia32_pslld ((__v2si)__m, (long long)__count);
330}
331
332static __inline __m64
511/* Shift two 32-bit values in M left by COUNT. */
512static __inline __m64
513_mm_sll_pi32 (__m64 __m, __m64 __count)
514{
515 return (__m64) __builtin_ia32_pslld ((__v2si)__m, (long long)__count);
516}
517
518static __inline __m64
519_m_pslld (__m64 __m, __m64 __count)
520{
521 return _mm_sll_pi32 (__m, __count);
522}
523
524static __inline __m64
333_mm_slli_pi32 (__m64 __m, int __count)
334{
335 return (__m64) __builtin_ia32_pslld ((__v2si)__m, __count);
336}
337
525_mm_slli_pi32 (__m64 __m, int __count)
526{
527 return (__m64) __builtin_ia32_pslld ((__v2si)__m, __count);
528}
529
530static __inline __m64
531_m_pslldi (__m64 __m, int __count)
532{
533 return _mm_slli_pi32 (__m, __count);
534}
535
338/* Shift the 64-bit value in M left by COUNT. */
339static __inline __m64
340_mm_sll_si64 (__m64 __m, __m64 __count)
341{
342 return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count);
343}
344
345static __inline __m64
536/* Shift the 64-bit value in M left by COUNT. */
537static __inline __m64
538_mm_sll_si64 (__m64 __m, __m64 __count)
539{
540 return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count);
541}
542
543static __inline __m64
544_m_psllq (__m64 __m, __m64 __count)
545{
546 return _mm_sll_si64 (__m, __count);
547}
548
549static __inline __m64
346_mm_slli_si64 (__m64 __m, int __count)
347{
348 return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count);
349}
350
550_mm_slli_si64 (__m64 __m, int __count)
551{
552 return (__m64) __builtin_ia32_psllq ((long long)__m, (long long)__count);
553}
554
555static __inline __m64
556_m_psllqi (__m64 __m, int __count)
557{
558 return _mm_slli_si64 (__m, __count);
559}
560
351/* Shift four 16-bit values in M right by COUNT; shift in the sign bit. */
352static __inline __m64
353_mm_sra_pi16 (__m64 __m, __m64 __count)
354{
355 return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (long long)__count);
356}
357
358static __inline __m64
561/* Shift four 16-bit values in M right by COUNT; shift in the sign bit. */
562static __inline __m64
563_mm_sra_pi16 (__m64 __m, __m64 __count)
564{
565 return (__m64) __builtin_ia32_psraw ((__v4hi)__m, (long long)__count);
566}
567
568static __inline __m64
569_m_psraw (__m64 __m, __m64 __count)
570{
571 return _mm_sra_pi16 (__m, __count);
572}
573
574static __inline __m64
359_mm_srai_pi16 (__m64 __m, int __count)
360{
361 return (__m64) __builtin_ia32_psraw ((__v4hi)__m, __count);
362}
363
575_mm_srai_pi16 (__m64 __m, int __count)
576{
577 return (__m64) __builtin_ia32_psraw ((__v4hi)__m, __count);
578}
579
580static __inline __m64
581_m_psrawi (__m64 __m, int __count)
582{
583 return _mm_srai_pi16 (__m, __count);
584}
585
364/* Shift two 32-bit values in M right by COUNT; shift in the sign bit. */
365static __inline __m64
366_mm_sra_pi32 (__m64 __m, __m64 __count)
367{
368 return (__m64) __builtin_ia32_psrad ((__v2si)__m, (long long)__count);
369}
370
371static __inline __m64
586/* Shift two 32-bit values in M right by COUNT; shift in the sign bit. */
587static __inline __m64
588_mm_sra_pi32 (__m64 __m, __m64 __count)
589{
590 return (__m64) __builtin_ia32_psrad ((__v2si)__m, (long long)__count);
591}
592
593static __inline __m64
594_m_psrad (__m64 __m, __m64 __count)
595{
596 return _mm_sra_pi32 (__m, __count);
597}
598
599static __inline __m64
372_mm_srai_pi32 (__m64 __m, int __count)
373{
374 return (__m64) __builtin_ia32_psrad ((__v2si)__m, __count);
375}
376
600_mm_srai_pi32 (__m64 __m, int __count)
601{
602 return (__m64) __builtin_ia32_psrad ((__v2si)__m, __count);
603}
604
605static __inline __m64
606_m_psradi (__m64 __m, int __count)
607{
608 return _mm_srai_pi32 (__m, __count);
609}
610
377/* Shift four 16-bit values in M right by COUNT; shift in zeros. */
378static __inline __m64
379_mm_srl_pi16 (__m64 __m, __m64 __count)
380{
381 return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (long long)__count);
382}
383
384static __inline __m64
611/* Shift four 16-bit values in M right by COUNT; shift in zeros. */
612static __inline __m64
613_mm_srl_pi16 (__m64 __m, __m64 __count)
614{
615 return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, (long long)__count);
616}
617
618static __inline __m64
619_m_psrlw (__m64 __m, __m64 __count)
620{
621 return _mm_srl_pi16 (__m, __count);
622}
623
624static __inline __m64
385_mm_srli_pi16 (__m64 __m, int __count)
386{
387 return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, __count);
388}
389
625_mm_srli_pi16 (__m64 __m, int __count)
626{
627 return (__m64) __builtin_ia32_psrlw ((__v4hi)__m, __count);
628}
629
630static __inline __m64
631_m_psrlwi (__m64 __m, int __count)
632{
633 return _mm_srli_pi16 (__m, __count);
634}
635
390/* Shift two 32-bit values in M right by COUNT; shift in zeros. */
391static __inline __m64
392_mm_srl_pi32 (__m64 __m, __m64 __count)
393{
394 return (__m64) __builtin_ia32_psrld ((__v2si)__m, (long long)__count);
395}
396
397static __inline __m64
636/* Shift two 32-bit values in M right by COUNT; shift in zeros. */
637static __inline __m64
638_mm_srl_pi32 (__m64 __m, __m64 __count)
639{
640 return (__m64) __builtin_ia32_psrld ((__v2si)__m, (long long)__count);
641}
642
643static __inline __m64
644_m_psrld (__m64 __m, __m64 __count)
645{
646 return _mm_srl_pi32 (__m, __count);
647}
648
649static __inline __m64
398_mm_srli_pi32 (__m64 __m, int __count)
399{
400 return (__m64) __builtin_ia32_psrld ((__v2si)__m, __count);
401}
402
650_mm_srli_pi32 (__m64 __m, int __count)
651{
652 return (__m64) __builtin_ia32_psrld ((__v2si)__m, __count);
653}
654
655static __inline __m64
656_m_psrldi (__m64 __m, int __count)
657{
658 return _mm_srli_pi32 (__m, __count);
659}
660
403/* Shift the 64-bit value in M left by COUNT; shift in zeros. */
404static __inline __m64
405_mm_srl_si64 (__m64 __m, __m64 __count)
406{
407 return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count);
408}
409
410static __inline __m64
661/* Shift the 64-bit value in M left by COUNT; shift in zeros. */
662static __inline __m64
663_mm_srl_si64 (__m64 __m, __m64 __count)
664{
665 return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count);
666}
667
668static __inline __m64
669_m_psrlq (__m64 __m, __m64 __count)
670{
671 return _mm_srl_si64 (__m, __count);
672}
673
674static __inline __m64
411_mm_srli_si64 (__m64 __m, int __count)
412{
413 return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count);
414}
415
675_mm_srli_si64 (__m64 __m, int __count)
676{
677 return (__m64) __builtin_ia32_psrlq ((long long)__m, (long long)__count);
678}
679
680static __inline __m64
681_m_psrlqi (__m64 __m, int __count)
682{
683 return _mm_srli_si64 (__m, __count);
684}
685
416/* Bit-wise AND the 64-bit values in M1 and M2. */
417static __inline __m64
418_mm_and_si64 (__m64 __m1, __m64 __m2)
419{
420 return (__m64) __builtin_ia32_pand ((long long)__m1, (long long)__m2);
421}
422
686/* Bit-wise AND the 64-bit values in M1 and M2. */
687static __inline __m64
688_mm_and_si64 (__m64 __m1, __m64 __m2)
689{
690 return (__m64) __builtin_ia32_pand ((long long)__m1, (long long)__m2);
691}
692
693static __inline __m64
694_m_pand (__m64 __m1, __m64 __m2)
695{
696 return _mm_and_si64 (__m1, __m2);
697}
698
423/* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the
424 64-bit value in M2. */
425static __inline __m64
426_mm_andnot_si64 (__m64 __m1, __m64 __m2)
427{
428 return (__m64) __builtin_ia32_pandn ((long long)__m1, (long long)__m2);
429}
430
699/* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the
700 64-bit value in M2. */
701static __inline __m64
702_mm_andnot_si64 (__m64 __m1, __m64 __m2)
703{
704 return (__m64) __builtin_ia32_pandn ((long long)__m1, (long long)__m2);
705}
706
707static __inline __m64
708_m_pandn (__m64 __m1, __m64 __m2)
709{
710 return _mm_andnot_si64 (__m1, __m2);
711}
712
431/* Bit-wise inclusive OR the 64-bit values in M1 and M2. */
432static __inline __m64
433_mm_or_si64 (__m64 __m1, __m64 __m2)
434{
435 return (__m64)__builtin_ia32_por ((long long)__m1, (long long)__m2);
436}
437
713/* Bit-wise inclusive OR the 64-bit values in M1 and M2. */
714static __inline __m64
715_mm_or_si64 (__m64 __m1, __m64 __m2)
716{
717 return (__m64)__builtin_ia32_por ((long long)__m1, (long long)__m2);
718}
719
720static __inline __m64
721_m_por (__m64 __m1, __m64 __m2)
722{
723 return _mm_or_si64 (__m1, __m2);
724}
725
438/* Bit-wise exclusive OR the 64-bit values in M1 and M2. */
439static __inline __m64
440_mm_xor_si64 (__m64 __m1, __m64 __m2)
441{
442 return (__m64)__builtin_ia32_pxor ((long long)__m1, (long long)__m2);
443}
444
726/* Bit-wise exclusive OR the 64-bit values in M1 and M2. */
727static __inline __m64
728_mm_xor_si64 (__m64 __m1, __m64 __m2)
729{
730 return (__m64)__builtin_ia32_pxor ((long long)__m1, (long long)__m2);
731}
732
733static __inline __m64
734_m_pxor (__m64 __m1, __m64 __m2)
735{
736 return _mm_xor_si64 (__m1, __m2);
737}
738
445/* Compare eight 8-bit values. The result of the comparison is 0xFF if the
446 test is true and zero if false. */
447static __inline __m64
448_mm_cmpeq_pi8 (__m64 __m1, __m64 __m2)
449{
450 return (__m64) __builtin_ia32_pcmpeqb ((__v8qi)__m1, (__v8qi)__m2);
451}
452
453static __inline __m64
739/* Compare eight 8-bit values. The result of the comparison is 0xFF if the
740 test is true and zero if false. */
741static __inline __m64
742_mm_cmpeq_pi8 (__m64 __m1, __m64 __m2)
743{
744 return (__m64) __builtin_ia32_pcmpeqb ((__v8qi)__m1, (__v8qi)__m2);
745}
746
747static __inline __m64
748_m_pcmpeqb (__m64 __m1, __m64 __m2)
749{
750 return _mm_cmpeq_pi8 (__m1, __m2);
751}
752
753static __inline __m64
454_mm_cmpgt_pi8 (__m64 __m1, __m64 __m2)
455{
456 return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2);
457}
458
754_mm_cmpgt_pi8 (__m64 __m1, __m64 __m2)
755{
756 return (__m64) __builtin_ia32_pcmpgtb ((__v8qi)__m1, (__v8qi)__m2);
757}
758
759static __inline __m64
760_m_pcmpgtb (__m64 __m1, __m64 __m2)
761{
762 return _mm_cmpgt_pi8 (__m1, __m2);
763}
764
459/* Compare four 16-bit values. The result of the comparison is 0xFFFF if
460 the test is true and zero if false. */
461static __inline __m64
462_mm_cmpeq_pi16 (__m64 __m1, __m64 __m2)
463{
464 return (__m64) __builtin_ia32_pcmpeqw ((__v4hi)__m1, (__v4hi)__m2);
465}
466
467static __inline __m64
765/* Compare four 16-bit values. The result of the comparison is 0xFFFF if
766 the test is true and zero if false. */
767static __inline __m64
768_mm_cmpeq_pi16 (__m64 __m1, __m64 __m2)
769{
770 return (__m64) __builtin_ia32_pcmpeqw ((__v4hi)__m1, (__v4hi)__m2);
771}
772
773static __inline __m64
774_m_pcmpeqw (__m64 __m1, __m64 __m2)
775{
776 return _mm_cmpeq_pi16 (__m1, __m2);
777}
778
779static __inline __m64
468_mm_cmpgt_pi16 (__m64 __m1, __m64 __m2)
469{
470 return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2);
471}
472
780_mm_cmpgt_pi16 (__m64 __m1, __m64 __m2)
781{
782 return (__m64) __builtin_ia32_pcmpgtw ((__v4hi)__m1, (__v4hi)__m2);
783}
784
785static __inline __m64
786_m_pcmpgtw (__m64 __m1, __m64 __m2)
787{
788 return _mm_cmpgt_pi16 (__m1, __m2);
789}
790
473/* Compare two 32-bit values. The result of the comparison is 0xFFFFFFFF if
474 the test is true and zero if false. */
475static __inline __m64
476_mm_cmpeq_pi32 (__m64 __m1, __m64 __m2)
477{
478 return (__m64) __builtin_ia32_pcmpeqd ((__v2si)__m1, (__v2si)__m2);
479}
480
481static __inline __m64
791/* Compare two 32-bit values. The result of the comparison is 0xFFFFFFFF if
792 the test is true and zero if false. */
793static __inline __m64
794_mm_cmpeq_pi32 (__m64 __m1, __m64 __m2)
795{
796 return (__m64) __builtin_ia32_pcmpeqd ((__v2si)__m1, (__v2si)__m2);
797}
798
799static __inline __m64
800_m_pcmpeqd (__m64 __m1, __m64 __m2)
801{
802 return _mm_cmpeq_pi32 (__m1, __m2);
803}
804
805static __inline __m64
482_mm_cmpgt_pi32 (__m64 __m1, __m64 __m2)
483{
484 return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2);
485}
486
806_mm_cmpgt_pi32 (__m64 __m1, __m64 __m2)
807{
808 return (__m64) __builtin_ia32_pcmpgtd ((__v2si)__m1, (__v2si)__m2);
809}
810
811static __inline __m64
812_m_pcmpgtd (__m64 __m1, __m64 __m2)
813{
814 return _mm_cmpgt_pi32 (__m1, __m2);
815}
816
487/* Creates a 64-bit zero. */
488static __inline __m64
489_mm_setzero_si64 (void)
490{
491 return (__m64)__builtin_ia32_mmx_zero ();
492}
493
494/* Creates a vector of two 32-bit values; I0 is least significant. */

--- 74 unchanged lines hidden (view full) ---

569/* Creates a vector of four 16-bit values, all elements containing W. */
570static __inline __m64
571_mm_set1_pi16 (short __w)
572{
573 unsigned int __i = (unsigned short)__w << 16 | (unsigned short)__w;
574 return _mm_set1_pi32 (__i);
575}
576
817/* Creates a 64-bit zero. */
818static __inline __m64
819_mm_setzero_si64 (void)
820{
821 return (__m64)__builtin_ia32_mmx_zero ();
822}
823
824/* Creates a vector of two 32-bit values; I0 is least significant. */

--- 74 unchanged lines hidden (view full) ---

899/* Creates a vector of four 16-bit values, all elements containing W. */
900static __inline __m64
901_mm_set1_pi16 (short __w)
902{
903 unsigned int __i = (unsigned short)__w << 16 | (unsigned short)__w;
904 return _mm_set1_pi32 (__i);
905}
906
577/* Creates a vector of four 16-bit values, all elements containing B. */
907/* Creates a vector of eight 8-bit values, all elements containing B. */
578static __inline __m64
579_mm_set1_pi8 (char __b)
580{
581 unsigned int __w = (unsigned char)__b << 8 | (unsigned char)__b;
582 unsigned int __i = __w << 16 | __w;
583 return _mm_set1_pi32 (__i);
584}
585
586#endif /* __MMX__ */
587#endif /* _MMINTRIN_H_INCLUDED */
908static __inline __m64
909_mm_set1_pi8 (char __b)
910{
911 unsigned int __w = (unsigned char)__b << 8 | (unsigned char)__b;
912 unsigned int __i = __w << 16 | __w;
913 return _mm_set1_pi32 (__i);
914}
915
916#endif /* __MMX__ */
917#endif /* _MMINTRIN_H_INCLUDED */