mmintrin.h revision 206084
1/*===---- mmintrin.h - MMX intrinsics --------------------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24#ifndef __MMINTRIN_H
25#define __MMINTRIN_H
26
27#ifndef __MMX__
28#error "MMX instruction set not enabled"
29#else
30
31typedef long long __m64 __attribute__((__vector_size__(8)));
32
33typedef int __v2si __attribute__((__vector_size__(8)));
34typedef short __v4hi __attribute__((__vector_size__(8)));
35typedef char __v8qi __attribute__((__vector_size__(8)));
36
37static __inline__ void __attribute__((__always_inline__, __nodebug__))
38_mm_empty(void)
39{
40    __builtin_ia32_emms();
41}
42
43static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
44_mm_cvtsi32_si64(int __i)
45{
46    return (__m64)(__v2si){__i, 0};
47}
48
49static __inline__ int __attribute__((__always_inline__, __nodebug__))
50_mm_cvtsi64_si32(__m64 __m)
51{
52    __v2si __mmx_var2 = (__v2si)__m;
53    return __mmx_var2[0];
54}
55
56static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
57_mm_cvtsi64_m64(long long __i)
58{
59    return (__m64)__i;
60}
61
62static __inline__ long long __attribute__((__always_inline__, __nodebug__))
63_mm_cvtm64_si64(__m64 __m)
64{
65    return (long long)__m;
66}
67
68static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
69_mm_packs_pi16(__m64 __m1, __m64 __m2)
70{
71    return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);
72}
73
74static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
75_mm_packs_pi32(__m64 __m1, __m64 __m2)
76{
77    return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);
78}
79
80static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
81_mm_packs_pu16(__m64 __m1, __m64 __m2)
82{
83    return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2);
84}
85
86static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
87_mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
88{
89    return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2, 4, 8+4, 5,
90                                          8+5, 6, 8+6, 7, 8+7);
91}
92
93static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
94_mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
95{
96    return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2, 2, 4+2, 3,
97                                          4+3);
98}
99
100static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
101_mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
102{
103    return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 1, 2+1);
104}
105
106static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
107_mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
108{
109    return (__m64)__builtin_shufflevector((__v8qi)__m1, (__v8qi)__m2, 0, 8+0, 1,
110                                          8+1, 2, 8+2, 3, 8+3);
111}
112
113static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
114_mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
115{
116    return (__m64)__builtin_shufflevector((__v4hi)__m1, (__v4hi)__m2, 0, 4+0, 1,
117                                          4+1);
118}
119
120static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
121_mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
122{
123    return (__m64)__builtin_shufflevector((__v2si)__m1, (__v2si)__m2, 0, 2+0);
124}
125
126static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
127_mm_add_pi8(__m64 __m1, __m64 __m2)
128{
129    return (__m64)((__v8qi)__m1 + (__v8qi)__m2);
130}
131
132static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
133_mm_add_pi16(__m64 __m1, __m64 __m2)
134{
135    return (__m64)((__v4hi)__m1 + (__v4hi)__m2);
136}
137
138static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
139_mm_add_pi32(__m64 __m1, __m64 __m2)
140{
141    return (__m64)((__v2si)__m1 + (__v2si)__m2);
142}
143
144static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
145_mm_adds_pi8(__m64 __m1, __m64 __m2)
146{
147    return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);
148}
149
150static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
151_mm_adds_pi16(__m64 __m1, __m64 __m2)
152{
153    return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);
154}
155
156static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
157_mm_adds_pu8(__m64 __m1, __m64 __m2)
158{
159    return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);
160}
161
162static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
163_mm_adds_pu16(__m64 __m1, __m64 __m2)
164{
165    return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2);
166}
167
168static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
169_mm_sub_pi8(__m64 __m1, __m64 __m2)
170{
171    return (__m64)((__v8qi)__m1 - (__v8qi)__m2);
172}
173
174static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
175_mm_sub_pi16(__m64 __m1, __m64 __m2)
176{
177    return (__m64)((__v4hi)__m1 - (__v4hi)__m2);
178}
179
180static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
181_mm_sub_pi32(__m64 __m1, __m64 __m2)
182{
183    return (__m64)((__v2si)__m1 - (__v2si)__m2);
184}
185
186static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
187_mm_subs_pi8(__m64 __m1, __m64 __m2)
188{
189    return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2);
190}
191
192static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
193_mm_subs_pi16(__m64 __m1, __m64 __m2)
194{
195    return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2);
196}
197
198static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
199_mm_subs_pu8(__m64 __m1, __m64 __m2)
200{
201    return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2);
202}
203
204static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
205_mm_subs_pu16(__m64 __m1, __m64 __m2)
206{
207    return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2);
208}
209
210static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
211_mm_madd_pi16(__m64 __m1, __m64 __m2)
212{
213    return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2);
214}
215
216static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
217_mm_mulhi_pi16(__m64 __m1, __m64 __m2)
218{
219    return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2);
220}
221
222static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
223_mm_mullo_pi16(__m64 __m1, __m64 __m2)
224{
225    return (__m64)((__v4hi)__m1 * (__v4hi)__m2);
226}
227
228static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
229_mm_sll_pi16(__m64 __m, __m64 __count)
230{
231    return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count);
232}
233
234static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
235_mm_slli_pi16(__m64 __m, int __count)
236{
237    return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count);
238}
239
240static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
241_mm_sll_pi32(__m64 __m, __m64 __count)
242{
243    return (__m64)__builtin_ia32_pslld((__v2si)__m, __count);
244}
245
246static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
247_mm_slli_pi32(__m64 __m, int __count)
248{
249    return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count);
250}
251
252static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
253_mm_sll_si64(__m64 __m, __m64 __count)
254{
255    return __builtin_ia32_psllq(__m, __count);
256}
257
258static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
259_mm_slli_si64(__m64 __m, int __count)
260{
261    return __builtin_ia32_psllqi(__m, __count);
262}
263
264static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
265_mm_sra_pi16(__m64 __m, __m64 __count)
266{
267    return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count);
268}
269
270static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
271_mm_srai_pi16(__m64 __m, int __count)
272{
273    return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count);
274}
275
276static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
277_mm_sra_pi32(__m64 __m, __m64 __count)
278{
279    return (__m64)__builtin_ia32_psrad((__v2si)__m, __count);
280}
281
282static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
283_mm_srai_pi32(__m64 __m, int __count)
284{
285    return (__m64)__builtin_ia32_psradi((__v2si)__m, __count);
286}
287
288static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
289_mm_srl_pi16(__m64 __m, __m64 __count)
290{
291    return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count);
292}
293
294static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
295_mm_srli_pi16(__m64 __m, int __count)
296{
297    return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count);
298}
299
300static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
301_mm_srl_pi32(__m64 __m, __m64 __count)
302{
303    return (__m64)__builtin_ia32_psrld((__v2si)__m, __count);
304}
305
306static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
307_mm_srli_pi32(__m64 __m, int __count)
308{
309    return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count);
310}
311
312static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
313_mm_srl_si64(__m64 __m, __m64 __count)
314{
315    return (__m64)__builtin_ia32_psrlq(__m, __count);
316}
317
318static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
319_mm_srli_si64(__m64 __m, int __count)
320{
321    return __builtin_ia32_psrlqi(__m, __count);
322}
323
324static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
325_mm_and_si64(__m64 __m1, __m64 __m2)
326{
327    return __m1 & __m2;
328}
329
330static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
331_mm_andnot_si64(__m64 __m1, __m64 __m2)
332{
333    return ~__m1 & __m2;
334}
335
336static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
337_mm_or_si64(__m64 __m1, __m64 __m2)
338{
339    return __m1 | __m2;
340}
341
342static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
343_mm_xor_si64(__m64 __m1, __m64 __m2)
344{
345    return __m1 ^ __m2;
346}
347
348static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
349_mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
350{
351    return (__m64)((__v8qi)__m1 == (__v8qi)__m2);
352}
353
354static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
355_mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
356{
357    return (__m64)((__v4hi)__m1 == (__v4hi)__m2);
358}
359
360static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
361_mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
362{
363    return (__m64)((__v2si)__m1 == (__v2si)__m2);
364}
365
366static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
367_mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
368{
369    return (__m64)((__v8qi)__m1 > (__v8qi)__m2);
370}
371
372static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
373_mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
374{
375    return (__m64)((__v4hi)__m1 > (__v4hi)__m2);
376}
377
378static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
379_mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
380{
381    return (__m64)((__v2si)__m1 > (__v2si)__m2);
382}
383
384static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
385_mm_setzero_si64(void)
386{
387    return (__m64){ 0LL };
388}
389
390static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
391_mm_set_pi32(int __i1, int __i0)
392{
393    return (__m64)(__v2si){ __i0, __i1 };
394}
395
396static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
397_mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
398{
399    return (__m64)(__v4hi){ __s0, __s1, __s2, __s3 };
400}
401
402static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
403_mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
404            char __b1, char __b0)
405{
406    return (__m64)(__v8qi){ __b0, __b1, __b2, __b3, __b4, __b5, __b6, __b7 };
407}
408
409static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
410_mm_set1_pi32(int __i)
411{
412    return (__m64)(__v2si){ __i, __i };
413}
414
415static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
416_mm_set1_pi16(short __s)
417{
418    return (__m64)(__v4hi){ __s, __s, __s, __s };
419}
420
421static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
422_mm_set1_pi8(char __b)
423{
424    return (__m64)(__v8qi){ __b, __b, __b, __b, __b, __b, __b, __b };
425}
426
427static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
428_mm_setr_pi32(int __i1, int __i0)
429{
430    return (__m64)(__v2si){ __i1, __i0 };
431}
432
433static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
434_mm_setr_pi16(short __s3, short __s2, short __s1, short __s0)
435{
436    return (__m64)(__v4hi){ __s3, __s2, __s1, __s0 };
437}
438
439static __inline__ __m64 __attribute__((__always_inline__, __nodebug__))
440_mm_setr_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
441             char __b1, char __b0)
442{
443    return (__m64)(__v8qi){ __b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0 };
444}
445
446#endif /* __MMX__ */
447
448#endif /* __MMINTRIN_H */
449
450