1169689Skan/* Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
2122180Skan
3132718Skan   This file is part of GCC.
4122180Skan
5132718Skan   GCC is free software; you can redistribute it and/or modify
6122180Skan   it under the terms of the GNU General Public License as published by
7122180Skan   the Free Software Foundation; either version 2, or (at your option)
8122180Skan   any later version.
9122180Skan
10132718Skan   GCC is distributed in the hope that it will be useful,
11122180Skan   but WITHOUT ANY WARRANTY; without even the implied warranty of
12122180Skan   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13122180Skan   GNU General Public License for more details.
14122180Skan
15122180Skan   You should have received a copy of the GNU General Public License
16132718Skan   along with GCC; see the file COPYING.  If not, write to
17169689Skan   the Free Software Foundation, 51 Franklin Street, Fifth Floor,
18169689Skan   Boston, MA 02110-1301, USA.  */
19122180Skan
20122180Skan/* As a special exception, if you include this header file into source
21122180Skan   files compiled by GCC, this header file does not by itself cause
22122180Skan   the resulting executable to be covered by the GNU General Public
23122180Skan   License.  This exception does not however invalidate any other
24122180Skan   reasons why the executable file might be covered by the GNU General
25122180Skan   Public License.  */
26122180Skan
27122180Skan/* Implemented from the specification included in the Intel C++ Compiler
28169689Skan   User Guide and Reference, version 9.0.  */
29122180Skan
30122180Skan#ifndef _EMMINTRIN_H_INCLUDED
31122180Skan#define _EMMINTRIN_H_INCLUDED
32122180Skan
33251212Spfg#ifndef __SSE2__
34251212Spfg# error "SSE2 instruction set not enabled"
35251212Spfg#else
36251212Spfg
37251212Spfg/* We need definitions from the SSE header files*/
38122180Skan#include <xmmintrin.h>
39122180Skan
40122180Skan/* SSE2 */
41169689Skantypedef double __v2df __attribute__ ((__vector_size__ (16)));
42169689Skantypedef long long __v2di __attribute__ ((__vector_size__ (16)));
43169689Skantypedef int __v4si __attribute__ ((__vector_size__ (16)));
44169689Skantypedef short __v8hi __attribute__ ((__vector_size__ (16)));
45169689Skantypedef char __v16qi __attribute__ ((__vector_size__ (16)));
46122180Skan
47169689Skan/* The Intel API is flexible enough that we must allow aliasing with other
48169689Skan   vector types, and their scalar components.  */
49169689Skantypedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__));
50169689Skantypedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__));
51169689Skan
52122180Skan/* Create a selector for use with the SHUFPD instruction.  */
53122180Skan#define _MM_SHUFFLE2(fp1,fp0) \
54122180Skan (((fp1) << 1) | (fp0))
55122180Skan
56169689Skan/* Create a vector with element 0 as F and the rest zero.  */
57169689Skanstatic __inline __m128d __attribute__((__always_inline__))
58169689Skan_mm_set_sd (double __F)
59169689Skan{
60169689Skan  return __extension__ (__m128d){ __F, 0 };
61169689Skan}
62122180Skan
63169689Skan/* Create a vector with both elements equal to F.  */
64169689Skanstatic __inline __m128d __attribute__((__always_inline__))
65169689Skan_mm_set1_pd (double __F)
66122180Skan{
67169689Skan  return __extension__ (__m128d){ __F, __F };
68122180Skan}
69122180Skan
70169689Skanstatic __inline __m128d __attribute__((__always_inline__))
71169689Skan_mm_set_pd1 (double __F)
72122180Skan{
73169689Skan  return _mm_set1_pd (__F);
74122180Skan}
75122180Skan
76169689Skan/* Create a vector with the lower value X and upper value W.  */
77169689Skanstatic __inline __m128d __attribute__((__always_inline__))
78169689Skan_mm_set_pd (double __W, double __X)
79122180Skan{
80169689Skan  return __extension__ (__m128d){ __X, __W };
81122180Skan}
82122180Skan
83169689Skan/* Create a vector with the lower value W and upper value X.  */
84169689Skanstatic __inline __m128d __attribute__((__always_inline__))
85169689Skan_mm_setr_pd (double __W, double __X)
86169689Skan{
87169689Skan  return __extension__ (__m128d){ __W, __X };
88169689Skan}
89169689Skan
90169689Skan/* Create a vector of zeros.  */
91169689Skanstatic __inline __m128d __attribute__((__always_inline__))
92169689Skan_mm_setzero_pd (void)
93169689Skan{
94169689Skan  return __extension__ (__m128d){ 0.0, 0.0 };
95169689Skan}
96169689Skan
97169689Skan/* Sets the low DPFP value of A from the low value of B.  */
98169689Skanstatic __inline __m128d __attribute__((__always_inline__))
99169689Skan_mm_move_sd (__m128d __A, __m128d __B)
100169689Skan{
101169689Skan  return (__m128d) __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
102169689Skan}
103169689Skan
104122180Skan/* Load two DPFP values from P.  The address must be 16-byte aligned.  */
105169689Skanstatic __inline __m128d __attribute__((__always_inline__))
106122180Skan_mm_load_pd (double const *__P)
107122180Skan{
108169689Skan  return *(__m128d *)__P;
109122180Skan}
110122180Skan
111122180Skan/* Load two DPFP values from P.  The address need not be 16-byte aligned.  */
112169689Skanstatic __inline __m128d __attribute__((__always_inline__))
113122180Skan_mm_loadu_pd (double const *__P)
114122180Skan{
115169689Skan  return __builtin_ia32_loadupd (__P);
116122180Skan}
117122180Skan
118169689Skan/* Create a vector with all two elements equal to *P.  */
119169689Skanstatic __inline __m128d __attribute__((__always_inline__))
120169689Skan_mm_load1_pd (double const *__P)
121122180Skan{
122169689Skan  return _mm_set1_pd (*__P);
123122180Skan}
124122180Skan
125169689Skan/* Create a vector with element 0 as *P and the rest zero.  */
126169689Skanstatic __inline __m128d __attribute__((__always_inline__))
127169689Skan_mm_load_sd (double const *__P)
128122180Skan{
129169689Skan  return _mm_set_sd (*__P);
130122180Skan}
131122180Skan
132169689Skanstatic __inline __m128d __attribute__((__always_inline__))
133169689Skan_mm_load_pd1 (double const *__P)
134122180Skan{
135169689Skan  return _mm_load1_pd (__P);
136122180Skan}
137122180Skan
138169689Skan/* Load two DPFP values in reverse order.  The address must be aligned.  */
139169689Skanstatic __inline __m128d __attribute__((__always_inline__))
140169689Skan_mm_loadr_pd (double const *__P)
141122180Skan{
142169689Skan  __m128d __tmp = _mm_load_pd (__P);
143169689Skan  return __builtin_ia32_shufpd (__tmp, __tmp, _MM_SHUFFLE2 (0,1));
144122180Skan}
145122180Skan
146169689Skan/* Store two DPFP values.  The address must be 16-byte aligned.  */
147169689Skanstatic __inline void __attribute__((__always_inline__))
148169689Skan_mm_store_pd (double *__P, __m128d __A)
149122180Skan{
150169689Skan  *(__m128d *)__P = __A;
151122180Skan}
152122180Skan
153169689Skan/* Store two DPFP values.  The address need not be 16-byte aligned.  */
154169689Skanstatic __inline void __attribute__((__always_inline__))
155169689Skan_mm_storeu_pd (double *__P, __m128d __A)
156122180Skan{
157169689Skan  __builtin_ia32_storeupd (__P, __A);
158122180Skan}
159122180Skan
160169689Skan/* Stores the lower DPFP value.  */
161169689Skanstatic __inline void __attribute__((__always_inline__))
162169689Skan_mm_store_sd (double *__P, __m128d __A)
163122180Skan{
164169689Skan  *__P = __builtin_ia32_vec_ext_v2df (__A, 0);
165122180Skan}
166122180Skan
167169689Skanstatic __inline double __attribute__((__always_inline__))
168169689Skan_mm_cvtsd_f64 (__m128d __A)
169122180Skan{
170169689Skan  return __builtin_ia32_vec_ext_v2df (__A, 0);
171122180Skan}
172122180Skan
173169689Skanstatic __inline void __attribute__((__always_inline__))
174169689Skan_mm_storel_pd (double *__P, __m128d __A)
175169689Skan{
176169689Skan  _mm_store_sd (__P, __A);
177169689Skan}
178169689Skan
179169689Skan/* Stores the upper DPFP value.  */
180169689Skanstatic __inline void __attribute__((__always_inline__))
181169689Skan_mm_storeh_pd (double *__P, __m128d __A)
182169689Skan{
183169689Skan  *__P = __builtin_ia32_vec_ext_v2df (__A, 1);
184169689Skan}
185169689Skan
186169689Skan/* Store the lower DPFP value across two words.
187169689Skan   The address must be 16-byte aligned.  */
188169689Skanstatic __inline void __attribute__((__always_inline__))
189122180Skan_mm_store1_pd (double *__P, __m128d __A)
190122180Skan{
191169689Skan  _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,0)));
192122180Skan}
193122180Skan
194169689Skanstatic __inline void __attribute__((__always_inline__))
195122180Skan_mm_store_pd1 (double *__P, __m128d __A)
196122180Skan{
197122180Skan  _mm_store1_pd (__P, __A);
198122180Skan}
199122180Skan
200169689Skan/* Store two DPFP values in reverse order.  The address must be aligned.  */
201169689Skanstatic __inline void __attribute__((__always_inline__))
202169689Skan_mm_storer_pd (double *__P, __m128d __A)
203122180Skan{
204169689Skan  _mm_store_pd (__P, __builtin_ia32_shufpd (__A, __A, _MM_SHUFFLE2 (0,1)));
205122180Skan}
206122180Skan
207169689Skanstatic __inline int __attribute__((__always_inline__))
208169689Skan_mm_cvtsi128_si32 (__m128i __A)
209122180Skan{
210169689Skan  return __builtin_ia32_vec_ext_v4si ((__v4si)__A, 0);
211122180Skan}
212122180Skan
213169689Skan#ifdef __x86_64__
214169689Skan/* Intel intrinsic.  */
215169689Skanstatic __inline long long __attribute__((__always_inline__))
216169689Skan_mm_cvtsi128_si64 (__m128i __A)
217122180Skan{
218169689Skan  return __builtin_ia32_vec_ext_v2di ((__v2di)__A, 0);
219122180Skan}
220122180Skan
221169689Skan/* Microsoft intrinsic.  */
222169689Skanstatic __inline long long __attribute__((__always_inline__))
223169689Skan_mm_cvtsi128_si64x (__m128i __A)
224122180Skan{
225169689Skan  return __builtin_ia32_vec_ext_v2di ((__v2di)__A, 0);
226122180Skan}
227169689Skan#endif
228122180Skan
229169689Skanstatic __inline __m128d __attribute__((__always_inline__))
230122180Skan_mm_add_pd (__m128d __A, __m128d __B)
231122180Skan{
232122180Skan  return (__m128d)__builtin_ia32_addpd ((__v2df)__A, (__v2df)__B);
233122180Skan}
234122180Skan
235169689Skanstatic __inline __m128d __attribute__((__always_inline__))
236122180Skan_mm_add_sd (__m128d __A, __m128d __B)
237122180Skan{
238122180Skan  return (__m128d)__builtin_ia32_addsd ((__v2df)__A, (__v2df)__B);
239122180Skan}
240122180Skan
241169689Skanstatic __inline __m128d __attribute__((__always_inline__))
242122180Skan_mm_sub_pd (__m128d __A, __m128d __B)
243122180Skan{
244122180Skan  return (__m128d)__builtin_ia32_subpd ((__v2df)__A, (__v2df)__B);
245122180Skan}
246122180Skan
247169689Skanstatic __inline __m128d __attribute__((__always_inline__))
248122180Skan_mm_sub_sd (__m128d __A, __m128d __B)
249122180Skan{
250122180Skan  return (__m128d)__builtin_ia32_subsd ((__v2df)__A, (__v2df)__B);
251122180Skan}
252122180Skan
253169689Skanstatic __inline __m128d __attribute__((__always_inline__))
254122180Skan_mm_mul_pd (__m128d __A, __m128d __B)
255122180Skan{
256122180Skan  return (__m128d)__builtin_ia32_mulpd ((__v2df)__A, (__v2df)__B);
257122180Skan}
258122180Skan
259169689Skanstatic __inline __m128d __attribute__((__always_inline__))
260122180Skan_mm_mul_sd (__m128d __A, __m128d __B)
261122180Skan{
262122180Skan  return (__m128d)__builtin_ia32_mulsd ((__v2df)__A, (__v2df)__B);
263122180Skan}
264122180Skan
265169689Skanstatic __inline __m128d __attribute__((__always_inline__))
266122180Skan_mm_div_pd (__m128d __A, __m128d __B)
267122180Skan{
268122180Skan  return (__m128d)__builtin_ia32_divpd ((__v2df)__A, (__v2df)__B);
269122180Skan}
270122180Skan
271169689Skanstatic __inline __m128d __attribute__((__always_inline__))
272122180Skan_mm_div_sd (__m128d __A, __m128d __B)
273122180Skan{
274122180Skan  return (__m128d)__builtin_ia32_divsd ((__v2df)__A, (__v2df)__B);
275122180Skan}
276122180Skan
277169689Skanstatic __inline __m128d __attribute__((__always_inline__))
278122180Skan_mm_sqrt_pd (__m128d __A)
279122180Skan{
280122180Skan  return (__m128d)__builtin_ia32_sqrtpd ((__v2df)__A);
281122180Skan}
282122180Skan
283122180Skan/* Return pair {sqrt (A[0), B[1]}.  */
284169689Skanstatic __inline __m128d __attribute__((__always_inline__))
285122180Skan_mm_sqrt_sd (__m128d __A, __m128d __B)
286122180Skan{
287122180Skan  __v2df __tmp = __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
288122180Skan  return (__m128d)__builtin_ia32_sqrtsd ((__v2df)__tmp);
289122180Skan}
290122180Skan
291169689Skanstatic __inline __m128d __attribute__((__always_inline__))
292122180Skan_mm_min_pd (__m128d __A, __m128d __B)
293122180Skan{
294122180Skan  return (__m128d)__builtin_ia32_minpd ((__v2df)__A, (__v2df)__B);
295122180Skan}
296122180Skan
297169689Skanstatic __inline __m128d __attribute__((__always_inline__))
298122180Skan_mm_min_sd (__m128d __A, __m128d __B)
299122180Skan{
300122180Skan  return (__m128d)__builtin_ia32_minsd ((__v2df)__A, (__v2df)__B);
301122180Skan}
302122180Skan
303169689Skanstatic __inline __m128d __attribute__((__always_inline__))
304122180Skan_mm_max_pd (__m128d __A, __m128d __B)
305122180Skan{
306122180Skan  return (__m128d)__builtin_ia32_maxpd ((__v2df)__A, (__v2df)__B);
307122180Skan}
308122180Skan
309169689Skanstatic __inline __m128d __attribute__((__always_inline__))
310122180Skan_mm_max_sd (__m128d __A, __m128d __B)
311122180Skan{
312122180Skan  return (__m128d)__builtin_ia32_maxsd ((__v2df)__A, (__v2df)__B);
313122180Skan}
314122180Skan
315169689Skanstatic __inline __m128d __attribute__((__always_inline__))
316122180Skan_mm_and_pd (__m128d __A, __m128d __B)
317122180Skan{
318122180Skan  return (__m128d)__builtin_ia32_andpd ((__v2df)__A, (__v2df)__B);
319122180Skan}
320122180Skan
321169689Skanstatic __inline __m128d __attribute__((__always_inline__))
322122180Skan_mm_andnot_pd (__m128d __A, __m128d __B)
323122180Skan{
324122180Skan  return (__m128d)__builtin_ia32_andnpd ((__v2df)__A, (__v2df)__B);
325122180Skan}
326122180Skan
327169689Skanstatic __inline __m128d __attribute__((__always_inline__))
328122180Skan_mm_or_pd (__m128d __A, __m128d __B)
329122180Skan{
330122180Skan  return (__m128d)__builtin_ia32_orpd ((__v2df)__A, (__v2df)__B);
331122180Skan}
332122180Skan
333169689Skanstatic __inline __m128d __attribute__((__always_inline__))
334122180Skan_mm_xor_pd (__m128d __A, __m128d __B)
335122180Skan{
336122180Skan  return (__m128d)__builtin_ia32_xorpd ((__v2df)__A, (__v2df)__B);
337122180Skan}
338122180Skan
339169689Skanstatic __inline __m128d __attribute__((__always_inline__))
340122180Skan_mm_cmpeq_pd (__m128d __A, __m128d __B)
341122180Skan{
342122180Skan  return (__m128d)__builtin_ia32_cmpeqpd ((__v2df)__A, (__v2df)__B);
343122180Skan}
344122180Skan
345169689Skanstatic __inline __m128d __attribute__((__always_inline__))
346122180Skan_mm_cmplt_pd (__m128d __A, __m128d __B)
347122180Skan{
348122180Skan  return (__m128d)__builtin_ia32_cmpltpd ((__v2df)__A, (__v2df)__B);
349122180Skan}
350122180Skan
351169689Skanstatic __inline __m128d __attribute__((__always_inline__))
352122180Skan_mm_cmple_pd (__m128d __A, __m128d __B)
353122180Skan{
354122180Skan  return (__m128d)__builtin_ia32_cmplepd ((__v2df)__A, (__v2df)__B);
355122180Skan}
356122180Skan
357169689Skanstatic __inline __m128d __attribute__((__always_inline__))
358122180Skan_mm_cmpgt_pd (__m128d __A, __m128d __B)
359122180Skan{
360122180Skan  return (__m128d)__builtin_ia32_cmpgtpd ((__v2df)__A, (__v2df)__B);
361122180Skan}
362122180Skan
363169689Skanstatic __inline __m128d __attribute__((__always_inline__))
364122180Skan_mm_cmpge_pd (__m128d __A, __m128d __B)
365122180Skan{
366122180Skan  return (__m128d)__builtin_ia32_cmpgepd ((__v2df)__A, (__v2df)__B);
367122180Skan}
368122180Skan
369169689Skanstatic __inline __m128d __attribute__((__always_inline__))
370122180Skan_mm_cmpneq_pd (__m128d __A, __m128d __B)
371122180Skan{
372122180Skan  return (__m128d)__builtin_ia32_cmpneqpd ((__v2df)__A, (__v2df)__B);
373122180Skan}
374122180Skan
375169689Skanstatic __inline __m128d __attribute__((__always_inline__))
376122180Skan_mm_cmpnlt_pd (__m128d __A, __m128d __B)
377122180Skan{
378122180Skan  return (__m128d)__builtin_ia32_cmpnltpd ((__v2df)__A, (__v2df)__B);
379122180Skan}
380122180Skan
381169689Skanstatic __inline __m128d __attribute__((__always_inline__))
382122180Skan_mm_cmpnle_pd (__m128d __A, __m128d __B)
383122180Skan{
384122180Skan  return (__m128d)__builtin_ia32_cmpnlepd ((__v2df)__A, (__v2df)__B);
385122180Skan}
386122180Skan
387169689Skanstatic __inline __m128d __attribute__((__always_inline__))
388122180Skan_mm_cmpngt_pd (__m128d __A, __m128d __B)
389122180Skan{
390122180Skan  return (__m128d)__builtin_ia32_cmpngtpd ((__v2df)__A, (__v2df)__B);
391122180Skan}
392122180Skan
393169689Skanstatic __inline __m128d __attribute__((__always_inline__))
394122180Skan_mm_cmpnge_pd (__m128d __A, __m128d __B)
395122180Skan{
396122180Skan  return (__m128d)__builtin_ia32_cmpngepd ((__v2df)__A, (__v2df)__B);
397122180Skan}
398122180Skan
399169689Skanstatic __inline __m128d __attribute__((__always_inline__))
400122180Skan_mm_cmpord_pd (__m128d __A, __m128d __B)
401122180Skan{
402122180Skan  return (__m128d)__builtin_ia32_cmpordpd ((__v2df)__A, (__v2df)__B);
403122180Skan}
404122180Skan
405169689Skanstatic __inline __m128d __attribute__((__always_inline__))
406122180Skan_mm_cmpunord_pd (__m128d __A, __m128d __B)
407122180Skan{
408122180Skan  return (__m128d)__builtin_ia32_cmpunordpd ((__v2df)__A, (__v2df)__B);
409122180Skan}
410122180Skan
411169689Skanstatic __inline __m128d __attribute__((__always_inline__))
412122180Skan_mm_cmpeq_sd (__m128d __A, __m128d __B)
413122180Skan{
414122180Skan  return (__m128d)__builtin_ia32_cmpeqsd ((__v2df)__A, (__v2df)__B);
415122180Skan}
416122180Skan
417169689Skanstatic __inline __m128d __attribute__((__always_inline__))
418122180Skan_mm_cmplt_sd (__m128d __A, __m128d __B)
419122180Skan{
420122180Skan  return (__m128d)__builtin_ia32_cmpltsd ((__v2df)__A, (__v2df)__B);
421122180Skan}
422122180Skan
423169689Skanstatic __inline __m128d __attribute__((__always_inline__))
424122180Skan_mm_cmple_sd (__m128d __A, __m128d __B)
425122180Skan{
426122180Skan  return (__m128d)__builtin_ia32_cmplesd ((__v2df)__A, (__v2df)__B);
427122180Skan}
428122180Skan
429169689Skanstatic __inline __m128d __attribute__((__always_inline__))
430122180Skan_mm_cmpgt_sd (__m128d __A, __m128d __B)
431122180Skan{
432122180Skan  return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
433122180Skan					 (__v2df)
434122180Skan					 __builtin_ia32_cmpltsd ((__v2df) __B,
435122180Skan								 (__v2df)
436122180Skan								 __A));
437122180Skan}
438122180Skan
439169689Skanstatic __inline __m128d __attribute__((__always_inline__))
440122180Skan_mm_cmpge_sd (__m128d __A, __m128d __B)
441122180Skan{
442122180Skan  return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
443122180Skan					 (__v2df)
444122180Skan					 __builtin_ia32_cmplesd ((__v2df) __B,
445122180Skan								 (__v2df)
446122180Skan								 __A));
447122180Skan}
448122180Skan
449169689Skanstatic __inline __m128d __attribute__((__always_inline__))
450122180Skan_mm_cmpneq_sd (__m128d __A, __m128d __B)
451122180Skan{
452122180Skan  return (__m128d)__builtin_ia32_cmpneqsd ((__v2df)__A, (__v2df)__B);
453122180Skan}
454122180Skan
455169689Skanstatic __inline __m128d __attribute__((__always_inline__))
456122180Skan_mm_cmpnlt_sd (__m128d __A, __m128d __B)
457122180Skan{
458122180Skan  return (__m128d)__builtin_ia32_cmpnltsd ((__v2df)__A, (__v2df)__B);
459122180Skan}
460122180Skan
461169689Skanstatic __inline __m128d __attribute__((__always_inline__))
462122180Skan_mm_cmpnle_sd (__m128d __A, __m128d __B)
463122180Skan{
464122180Skan  return (__m128d)__builtin_ia32_cmpnlesd ((__v2df)__A, (__v2df)__B);
465122180Skan}
466122180Skan
467169689Skanstatic __inline __m128d __attribute__((__always_inline__))
468122180Skan_mm_cmpngt_sd (__m128d __A, __m128d __B)
469122180Skan{
470122180Skan  return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
471122180Skan					 (__v2df)
472122180Skan					 __builtin_ia32_cmpnltsd ((__v2df) __B,
473122180Skan								  (__v2df)
474122180Skan								  __A));
475122180Skan}
476122180Skan
477169689Skanstatic __inline __m128d __attribute__((__always_inline__))
478122180Skan_mm_cmpnge_sd (__m128d __A, __m128d __B)
479122180Skan{
480122180Skan  return (__m128d) __builtin_ia32_movsd ((__v2df) __A,
481122180Skan					 (__v2df)
482122180Skan					 __builtin_ia32_cmpnlesd ((__v2df) __B,
483122180Skan								  (__v2df)
484122180Skan								  __A));
485122180Skan}
486122180Skan
487169689Skanstatic __inline __m128d __attribute__((__always_inline__))
488122180Skan_mm_cmpord_sd (__m128d __A, __m128d __B)
489122180Skan{
490122180Skan  return (__m128d)__builtin_ia32_cmpordsd ((__v2df)__A, (__v2df)__B);
491122180Skan}
492122180Skan
493169689Skanstatic __inline __m128d __attribute__((__always_inline__))
494122180Skan_mm_cmpunord_sd (__m128d __A, __m128d __B)
495122180Skan{
496122180Skan  return (__m128d)__builtin_ia32_cmpunordsd ((__v2df)__A, (__v2df)__B);
497122180Skan}
498122180Skan
499169689Skanstatic __inline int __attribute__((__always_inline__))
500122180Skan_mm_comieq_sd (__m128d __A, __m128d __B)
501122180Skan{
502122180Skan  return __builtin_ia32_comisdeq ((__v2df)__A, (__v2df)__B);
503122180Skan}
504122180Skan
505169689Skanstatic __inline int __attribute__((__always_inline__))
506122180Skan_mm_comilt_sd (__m128d __A, __m128d __B)
507122180Skan{
508122180Skan  return __builtin_ia32_comisdlt ((__v2df)__A, (__v2df)__B);
509122180Skan}
510122180Skan
511169689Skanstatic __inline int __attribute__((__always_inline__))
512122180Skan_mm_comile_sd (__m128d __A, __m128d __B)
513122180Skan{
514122180Skan  return __builtin_ia32_comisdle ((__v2df)__A, (__v2df)__B);
515122180Skan}
516122180Skan
517169689Skanstatic __inline int __attribute__((__always_inline__))
518122180Skan_mm_comigt_sd (__m128d __A, __m128d __B)
519122180Skan{
520122180Skan  return __builtin_ia32_comisdgt ((__v2df)__A, (__v2df)__B);
521122180Skan}
522122180Skan
523169689Skanstatic __inline int __attribute__((__always_inline__))
524122180Skan_mm_comige_sd (__m128d __A, __m128d __B)
525122180Skan{
526122180Skan  return __builtin_ia32_comisdge ((__v2df)__A, (__v2df)__B);
527122180Skan}
528122180Skan
529169689Skanstatic __inline int __attribute__((__always_inline__))
530122180Skan_mm_comineq_sd (__m128d __A, __m128d __B)
531122180Skan{
532122180Skan  return __builtin_ia32_comisdneq ((__v2df)__A, (__v2df)__B);
533122180Skan}
534122180Skan
535169689Skanstatic __inline int __attribute__((__always_inline__))
536122180Skan_mm_ucomieq_sd (__m128d __A, __m128d __B)
537122180Skan{
538122180Skan  return __builtin_ia32_ucomisdeq ((__v2df)__A, (__v2df)__B);
539122180Skan}
540122180Skan
541169689Skanstatic __inline int __attribute__((__always_inline__))
542122180Skan_mm_ucomilt_sd (__m128d __A, __m128d __B)
543122180Skan{
544122180Skan  return __builtin_ia32_ucomisdlt ((__v2df)__A, (__v2df)__B);
545122180Skan}
546122180Skan
547169689Skanstatic __inline int __attribute__((__always_inline__))
548122180Skan_mm_ucomile_sd (__m128d __A, __m128d __B)
549122180Skan{
550122180Skan  return __builtin_ia32_ucomisdle ((__v2df)__A, (__v2df)__B);
551122180Skan}
552122180Skan
553169689Skanstatic __inline int __attribute__((__always_inline__))
554122180Skan_mm_ucomigt_sd (__m128d __A, __m128d __B)
555122180Skan{
556122180Skan  return __builtin_ia32_ucomisdgt ((__v2df)__A, (__v2df)__B);
557122180Skan}
558122180Skan
559169689Skanstatic __inline int __attribute__((__always_inline__))
560122180Skan_mm_ucomige_sd (__m128d __A, __m128d __B)
561122180Skan{
562122180Skan  return __builtin_ia32_ucomisdge ((__v2df)__A, (__v2df)__B);
563122180Skan}
564122180Skan
565169689Skanstatic __inline int __attribute__((__always_inline__))
566122180Skan_mm_ucomineq_sd (__m128d __A, __m128d __B)
567122180Skan{
568122180Skan  return __builtin_ia32_ucomisdneq ((__v2df)__A, (__v2df)__B);
569122180Skan}
570122180Skan
571169689Skan/* Create a vector of Qi, where i is the element number.  */
572122180Skan
573169689Skanstatic __inline __m128i __attribute__((__always_inline__))
574169689Skan_mm_set_epi64x (long long __q1, long long __q0)
575122180Skan{
576169689Skan  return __extension__ (__m128i)(__v2di){ __q0, __q1 };
577122180Skan}
578122180Skan
579169689Skanstatic __inline __m128i __attribute__((__always_inline__))
580169689Skan_mm_set_epi64 (__m64 __q1,  __m64 __q0)
581122180Skan{
582169689Skan  return _mm_set_epi64x ((long long)__q1, (long long)__q0);
583122180Skan}
584122180Skan
585169689Skanstatic __inline __m128i __attribute__((__always_inline__))
586169689Skan_mm_set_epi32 (int __q3, int __q2, int __q1, int __q0)
587122180Skan{
588169689Skan  return __extension__ (__m128i)(__v4si){ __q0, __q1, __q2, __q3 };
589122180Skan}
590122180Skan
591169689Skanstatic __inline __m128i __attribute__((__always_inline__))
592169689Skan_mm_set_epi16 (short __q7, short __q6, short __q5, short __q4,
593169689Skan	       short __q3, short __q2, short __q1, short __q0)
594122180Skan{
595169689Skan  return __extension__ (__m128i)(__v8hi){
596169689Skan    __q0, __q1, __q2, __q3, __q4, __q5, __q6, __q7 };
597122180Skan}
598122180Skan
599169689Skanstatic __inline __m128i __attribute__((__always_inline__))
600169689Skan_mm_set_epi8 (char __q15, char __q14, char __q13, char __q12,
601169689Skan	      char __q11, char __q10, char __q09, char __q08,
602169689Skan	      char __q07, char __q06, char __q05, char __q04,
603169689Skan	      char __q03, char __q02, char __q01, char __q00)
604122180Skan{
605169689Skan  return __extension__ (__m128i)(__v16qi){
606169689Skan    __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
607169689Skan    __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15
608169689Skan  };
609122180Skan}
610122180Skan
611169689Skan/* Set all of the elements of the vector to A.  */
612169689Skan
613169689Skanstatic __inline __m128i __attribute__((__always_inline__))
614169689Skan_mm_set1_epi64x (long long __A)
615122180Skan{
616169689Skan  return _mm_set_epi64x (__A, __A);
617122180Skan}
618122180Skan
619169689Skanstatic __inline __m128i __attribute__((__always_inline__))
620169689Skan_mm_set1_epi64 (__m64 __A)
621122180Skan{
622169689Skan  return _mm_set_epi64 (__A, __A);
623122180Skan}
624122180Skan
625169689Skanstatic __inline __m128i __attribute__((__always_inline__))
626169689Skan_mm_set1_epi32 (int __A)
627122180Skan{
628169689Skan  return _mm_set_epi32 (__A, __A, __A, __A);
629122180Skan}
630122180Skan
631169689Skanstatic __inline __m128i __attribute__((__always_inline__))
632169689Skan_mm_set1_epi16 (short __A)
633122180Skan{
634169689Skan  return _mm_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A);
635122180Skan}
636122180Skan
637169689Skanstatic __inline __m128i __attribute__((__always_inline__))
638169689Skan_mm_set1_epi8 (char __A)
639122180Skan{
640169689Skan  return _mm_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A,
641169689Skan		       __A, __A, __A, __A, __A, __A, __A, __A);
642122180Skan}
643122180Skan
644169689Skan/* Create a vector of Qi, where i is the element number.
645169689Skan   The parameter order is reversed from the _mm_set_epi* functions.  */
646169689Skan
647169689Skanstatic __inline __m128i __attribute__((__always_inline__))
648169689Skan_mm_setr_epi64 (__m64 __q0, __m64 __q1)
649122180Skan{
650169689Skan  return _mm_set_epi64 (__q1, __q0);
651122180Skan}
652122180Skan
653169689Skanstatic __inline __m128i __attribute__((__always_inline__))
654169689Skan_mm_setr_epi32 (int __q0, int __q1, int __q2, int __q3)
655122180Skan{
656169689Skan  return _mm_set_epi32 (__q3, __q2, __q1, __q0);
657122180Skan}
658122180Skan
659169689Skanstatic __inline __m128i __attribute__((__always_inline__))
660169689Skan_mm_setr_epi16 (short __q0, short __q1, short __q2, short __q3,
661169689Skan	        short __q4, short __q5, short __q6, short __q7)
662122180Skan{
663169689Skan  return _mm_set_epi16 (__q7, __q6, __q5, __q4, __q3, __q2, __q1, __q0);
664122180Skan}
665122180Skan
666169689Skanstatic __inline __m128i __attribute__((__always_inline__))
667169689Skan_mm_setr_epi8 (char __q00, char __q01, char __q02, char __q03,
668169689Skan	       char __q04, char __q05, char __q06, char __q07,
669169689Skan	       char __q08, char __q09, char __q10, char __q11,
670169689Skan	       char __q12, char __q13, char __q14, char __q15)
671122180Skan{
672169689Skan  return _mm_set_epi8 (__q15, __q14, __q13, __q12, __q11, __q10, __q09, __q08,
673169689Skan		       __q07, __q06, __q05, __q04, __q03, __q02, __q01, __q00);
674169689Skan}
675122180Skan
676169689Skan/* Create a vector with element 0 as *P and the rest zero.  */
677122180Skan
678169689Skanstatic __inline __m128i __attribute__((__always_inline__))
679169689Skan_mm_load_si128 (__m128i const *__P)
680169689Skan{
681169689Skan  return *__P;
682122180Skan}
683122180Skan
684169689Skanstatic __inline __m128i __attribute__((__always_inline__))
685169689Skan_mm_loadu_si128 (__m128i const *__P)
686122180Skan{
687169689Skan  return (__m128i) __builtin_ia32_loaddqu ((char const *)__P);
688122180Skan}
689122180Skan
690169689Skanstatic __inline __m128i __attribute__((__always_inline__))
691169689Skan_mm_loadl_epi64 (__m128i const *__P)
692122180Skan{
693169689Skan  return _mm_set_epi64 ((__m64)0LL, *(__m64 *)__P);
694122180Skan}
695122180Skan
696169689Skanstatic __inline void __attribute__((__always_inline__))
697169689Skan_mm_store_si128 (__m128i *__P, __m128i __B)
698122180Skan{
699169689Skan  *__P = __B;
700122180Skan}
701122180Skan
702169689Skanstatic __inline void __attribute__((__always_inline__))
703169689Skan_mm_storeu_si128 (__m128i *__P, __m128i __B)
704122180Skan{
705169689Skan  __builtin_ia32_storedqu ((char *)__P, (__v16qi)__B);
706122180Skan}
707122180Skan
708169689Skanstatic __inline void __attribute__((__always_inline__))
709169689Skan_mm_storel_epi64 (__m128i *__P, __m128i __B)
710122180Skan{
711169689Skan  *(long long *)__P = __builtin_ia32_vec_ext_v2di ((__v2di)__B, 0);
712122180Skan}
713122180Skan
714169689Skanstatic __inline __m64 __attribute__((__always_inline__))
715169689Skan_mm_movepi64_pi64 (__m128i __B)
716122180Skan{
717169689Skan  return (__m64) __builtin_ia32_vec_ext_v2di ((__v2di)__B, 0);
718122180Skan}
719122180Skan
720169689Skanstatic __inline __m128i __attribute__((__always_inline__))
721169689Skan_mm_movpi64_epi64 (__m64 __A)
722122180Skan{
723169689Skan  return _mm_set_epi64 ((__m64)0LL, __A);
724169689Skan}
725122180Skan
726169689Skanstatic __inline __m128i __attribute__((__always_inline__))
727169689Skan_mm_move_epi64 (__m128i __A)
728122180Skan{
729169689Skan  return _mm_set_epi64 ((__m64)0LL, _mm_movepi64_pi64 (__A));
730122180Skan}
731122180Skan
732169689Skan/* Create a vector of zeros.  */
733169689Skanstatic __inline __m128i __attribute__((__always_inline__))
734169689Skan_mm_setzero_si128 (void)
735122180Skan{
736169689Skan  return __extension__ (__m128i)(__v4si){ 0, 0, 0, 0 };
737122180Skan}
738122180Skan
739169689Skanstatic __inline __m128d __attribute__((__always_inline__))
740122180Skan_mm_cvtepi32_pd (__m128i __A)
741122180Skan{
742122180Skan  return (__m128d)__builtin_ia32_cvtdq2pd ((__v4si) __A);
743122180Skan}
744122180Skan
745169689Skanstatic __inline __m128 __attribute__((__always_inline__))
746122180Skan_mm_cvtepi32_ps (__m128i __A)
747122180Skan{
748122180Skan  return (__m128)__builtin_ia32_cvtdq2ps ((__v4si) __A);
749122180Skan}
750122180Skan
751169689Skanstatic __inline __m128i __attribute__((__always_inline__))
752122180Skan_mm_cvtpd_epi32 (__m128d __A)
753122180Skan{
754122180Skan  return (__m128i)__builtin_ia32_cvtpd2dq ((__v2df) __A);
755122180Skan}
756122180Skan
757169689Skanstatic __inline __m64 __attribute__((__always_inline__))
758122180Skan_mm_cvtpd_pi32 (__m128d __A)
759122180Skan{
760122180Skan  return (__m64)__builtin_ia32_cvtpd2pi ((__v2df) __A);
761122180Skan}
762122180Skan
763169689Skanstatic __inline __m128 __attribute__((__always_inline__))
764122180Skan_mm_cvtpd_ps (__m128d __A)
765122180Skan{
766122180Skan  return (__m128)__builtin_ia32_cvtpd2ps ((__v2df) __A);
767122180Skan}
768122180Skan
769169689Skanstatic __inline __m128i __attribute__((__always_inline__))
770122180Skan_mm_cvttpd_epi32 (__m128d __A)
771122180Skan{
772122180Skan  return (__m128i)__builtin_ia32_cvttpd2dq ((__v2df) __A);
773122180Skan}
774122180Skan
775169689Skanstatic __inline __m64 __attribute__((__always_inline__))
776122180Skan_mm_cvttpd_pi32 (__m128d __A)
777122180Skan{
778122180Skan  return (__m64)__builtin_ia32_cvttpd2pi ((__v2df) __A);
779122180Skan}
780122180Skan
781169689Skanstatic __inline __m128d __attribute__((__always_inline__))
782122180Skan_mm_cvtpi32_pd (__m64 __A)
783122180Skan{
784122180Skan  return (__m128d)__builtin_ia32_cvtpi2pd ((__v2si) __A);
785122180Skan}
786122180Skan
787169689Skanstatic __inline __m128i __attribute__((__always_inline__))
788122180Skan_mm_cvtps_epi32 (__m128 __A)
789122180Skan{
790122180Skan  return (__m128i)__builtin_ia32_cvtps2dq ((__v4sf) __A);
791122180Skan}
792122180Skan
793169689Skanstatic __inline __m128i __attribute__((__always_inline__))
794122180Skan_mm_cvttps_epi32 (__m128 __A)
795122180Skan{
796122180Skan  return (__m128i)__builtin_ia32_cvttps2dq ((__v4sf) __A);
797122180Skan}
798122180Skan
799169689Skanstatic __inline __m128d __attribute__((__always_inline__))
800122180Skan_mm_cvtps_pd (__m128 __A)
801122180Skan{
802122180Skan  return (__m128d)__builtin_ia32_cvtps2pd ((__v4sf) __A);
803122180Skan}
804122180Skan
805169689Skanstatic __inline int __attribute__((__always_inline__))
806122180Skan_mm_cvtsd_si32 (__m128d __A)
807122180Skan{
808122180Skan  return __builtin_ia32_cvtsd2si ((__v2df) __A);
809122180Skan}
810122180Skan
811122180Skan#ifdef __x86_64__
812169689Skan/* Intel intrinsic.  */
813169689Skanstatic __inline long long __attribute__((__always_inline__))
814169689Skan_mm_cvtsd_si64 (__m128d __A)
815169689Skan{
816169689Skan  return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
817169689Skan}
818169689Skan
819169689Skan/* Microsoft intrinsic.  */
820169689Skanstatic __inline long long __attribute__((__always_inline__))
821122180Skan_mm_cvtsd_si64x (__m128d __A)
822122180Skan{
823122180Skan  return __builtin_ia32_cvtsd2si64 ((__v2df) __A);
824122180Skan}
825122180Skan#endif
826122180Skan
827169689Skanstatic __inline int __attribute__((__always_inline__))
828122180Skan_mm_cvttsd_si32 (__m128d __A)
829122180Skan{
830122180Skan  return __builtin_ia32_cvttsd2si ((__v2df) __A);
831122180Skan}
832122180Skan
833122180Skan#ifdef __x86_64__
834169689Skan/* Intel intrinsic.  */
835169689Skanstatic __inline long long __attribute__((__always_inline__))
836169689Skan_mm_cvttsd_si64 (__m128d __A)
837169689Skan{
838169689Skan  return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
839169689Skan}
840169689Skan
841169689Skan/* Microsoft intrinsic.  */
842169689Skanstatic __inline long long __attribute__((__always_inline__))
843122180Skan_mm_cvttsd_si64x (__m128d __A)
844122180Skan{
845122180Skan  return __builtin_ia32_cvttsd2si64 ((__v2df) __A);
846122180Skan}
847122180Skan#endif
848122180Skan
849169689Skanstatic __inline __m128 __attribute__((__always_inline__))
850122180Skan_mm_cvtsd_ss (__m128 __A, __m128d __B)
851122180Skan{
852122180Skan  return (__m128)__builtin_ia32_cvtsd2ss ((__v4sf) __A, (__v2df) __B);
853122180Skan}
854122180Skan
855169689Skanstatic __inline __m128d __attribute__((__always_inline__))
856122180Skan_mm_cvtsi32_sd (__m128d __A, int __B)
857122180Skan{
858122180Skan  return (__m128d)__builtin_ia32_cvtsi2sd ((__v2df) __A, __B);
859122180Skan}
860122180Skan
861122180Skan#ifdef __x86_64__
862169689Skan/* Intel intrinsic.  */
863169689Skanstatic __inline __m128d __attribute__((__always_inline__))
864169689Skan_mm_cvtsi64_sd (__m128d __A, long long __B)
865169689Skan{
866169689Skan  return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
867169689Skan}
868169689Skan
869169689Skan/* Microsoft intrinsic.  */
870169689Skanstatic __inline __m128d __attribute__((__always_inline__))
871122180Skan_mm_cvtsi64x_sd (__m128d __A, long long __B)
872122180Skan{
873122180Skan  return (__m128d)__builtin_ia32_cvtsi642sd ((__v2df) __A, __B);
874122180Skan}
875122180Skan#endif
876122180Skan
877169689Skanstatic __inline __m128d __attribute__((__always_inline__))
878122180Skan_mm_cvtss_sd (__m128d __A, __m128 __B)
879122180Skan{
880122180Skan  return (__m128d)__builtin_ia32_cvtss2sd ((__v2df) __A, (__v4sf)__B);
881122180Skan}
882122180Skan
883122180Skan#define _mm_shuffle_pd(__A, __B, __C) ((__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, (__C)))
884122180Skan
885169689Skanstatic __inline __m128d __attribute__((__always_inline__))
886122180Skan_mm_unpackhi_pd (__m128d __A, __m128d __B)
887122180Skan{
888122180Skan  return (__m128d)__builtin_ia32_unpckhpd ((__v2df)__A, (__v2df)__B);
889122180Skan}
890122180Skan
891169689Skanstatic __inline __m128d __attribute__((__always_inline__))
892122180Skan_mm_unpacklo_pd (__m128d __A, __m128d __B)
893122180Skan{
894122180Skan  return (__m128d)__builtin_ia32_unpcklpd ((__v2df)__A, (__v2df)__B);
895122180Skan}
896122180Skan
897169689Skanstatic __inline __m128d __attribute__((__always_inline__))
898122180Skan_mm_loadh_pd (__m128d __A, double const *__B)
899122180Skan{
900169689Skan  return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B);
901122180Skan}
902122180Skan
903169689Skanstatic __inline __m128d __attribute__((__always_inline__))
904122180Skan_mm_loadl_pd (__m128d __A, double const *__B)
905122180Skan{
906169689Skan  return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B);
907122180Skan}
908122180Skan
909169689Skanstatic __inline int __attribute__((__always_inline__))
910122180Skan_mm_movemask_pd (__m128d __A)
911122180Skan{
912122180Skan  return __builtin_ia32_movmskpd ((__v2df)__A);
913122180Skan}
914122180Skan
915169689Skanstatic __inline __m128i __attribute__((__always_inline__))
916122180Skan_mm_packs_epi16 (__m128i __A, __m128i __B)
917122180Skan{
918122180Skan  return (__m128i)__builtin_ia32_packsswb128 ((__v8hi)__A, (__v8hi)__B);
919122180Skan}
920122180Skan
921169689Skanstatic __inline __m128i __attribute__((__always_inline__))
922122180Skan_mm_packs_epi32 (__m128i __A, __m128i __B)
923122180Skan{
924122180Skan  return (__m128i)__builtin_ia32_packssdw128 ((__v4si)__A, (__v4si)__B);
925122180Skan}
926122180Skan
927169689Skanstatic __inline __m128i __attribute__((__always_inline__))
928122180Skan_mm_packus_epi16 (__m128i __A, __m128i __B)
929122180Skan{
930122180Skan  return (__m128i)__builtin_ia32_packuswb128 ((__v8hi)__A, (__v8hi)__B);
931122180Skan}
932122180Skan
933169689Skanstatic __inline __m128i __attribute__((__always_inline__))
934122180Skan_mm_unpackhi_epi8 (__m128i __A, __m128i __B)
935122180Skan{
936122180Skan  return (__m128i)__builtin_ia32_punpckhbw128 ((__v16qi)__A, (__v16qi)__B);
937122180Skan}
938122180Skan
939169689Skanstatic __inline __m128i __attribute__((__always_inline__))
940122180Skan_mm_unpackhi_epi16 (__m128i __A, __m128i __B)
941122180Skan{
942122180Skan  return (__m128i)__builtin_ia32_punpckhwd128 ((__v8hi)__A, (__v8hi)__B);
943122180Skan}
944122180Skan
945169689Skanstatic __inline __m128i __attribute__((__always_inline__))
946122180Skan_mm_unpackhi_epi32 (__m128i __A, __m128i __B)
947122180Skan{
948122180Skan  return (__m128i)__builtin_ia32_punpckhdq128 ((__v4si)__A, (__v4si)__B);
949122180Skan}
950122180Skan
951169689Skanstatic __inline __m128i __attribute__((__always_inline__))
952122180Skan_mm_unpackhi_epi64 (__m128i __A, __m128i __B)
953122180Skan{
954122180Skan  return (__m128i)__builtin_ia32_punpckhqdq128 ((__v2di)__A, (__v2di)__B);
955122180Skan}
956122180Skan
957169689Skanstatic __inline __m128i __attribute__((__always_inline__))
958122180Skan_mm_unpacklo_epi8 (__m128i __A, __m128i __B)
959122180Skan{
960122180Skan  return (__m128i)__builtin_ia32_punpcklbw128 ((__v16qi)__A, (__v16qi)__B);
961122180Skan}
962122180Skan
963169689Skanstatic __inline __m128i __attribute__((__always_inline__))
964122180Skan_mm_unpacklo_epi16 (__m128i __A, __m128i __B)
965122180Skan{
966122180Skan  return (__m128i)__builtin_ia32_punpcklwd128 ((__v8hi)__A, (__v8hi)__B);
967122180Skan}
968122180Skan
969169689Skanstatic __inline __m128i __attribute__((__always_inline__))
970122180Skan_mm_unpacklo_epi32 (__m128i __A, __m128i __B)
971122180Skan{
972122180Skan  return (__m128i)__builtin_ia32_punpckldq128 ((__v4si)__A, (__v4si)__B);
973122180Skan}
974122180Skan
975169689Skanstatic __inline __m128i __attribute__((__always_inline__))
976122180Skan_mm_unpacklo_epi64 (__m128i __A, __m128i __B)
977122180Skan{
978122180Skan  return (__m128i)__builtin_ia32_punpcklqdq128 ((__v2di)__A, (__v2di)__B);
979122180Skan}
980122180Skan
981169689Skanstatic __inline __m128i __attribute__((__always_inline__))
982122180Skan_mm_add_epi8 (__m128i __A, __m128i __B)
983122180Skan{
984122180Skan  return (__m128i)__builtin_ia32_paddb128 ((__v16qi)__A, (__v16qi)__B);
985122180Skan}
986122180Skan
987169689Skanstatic __inline __m128i __attribute__((__always_inline__))
988122180Skan_mm_add_epi16 (__m128i __A, __m128i __B)
989122180Skan{
990122180Skan  return (__m128i)__builtin_ia32_paddw128 ((__v8hi)__A, (__v8hi)__B);
991122180Skan}
992122180Skan
993169689Skanstatic __inline __m128i __attribute__((__always_inline__))
994122180Skan_mm_add_epi32 (__m128i __A, __m128i __B)
995122180Skan{
996122180Skan  return (__m128i)__builtin_ia32_paddd128 ((__v4si)__A, (__v4si)__B);
997122180Skan}
998122180Skan
999169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1000122180Skan_mm_add_epi64 (__m128i __A, __m128i __B)
1001122180Skan{
1002122180Skan  return (__m128i)__builtin_ia32_paddq128 ((__v2di)__A, (__v2di)__B);
1003122180Skan}
1004122180Skan
1005169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1006122180Skan_mm_adds_epi8 (__m128i __A, __m128i __B)
1007122180Skan{
1008122180Skan  return (__m128i)__builtin_ia32_paddsb128 ((__v16qi)__A, (__v16qi)__B);
1009122180Skan}
1010122180Skan
1011169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1012122180Skan_mm_adds_epi16 (__m128i __A, __m128i __B)
1013122180Skan{
1014122180Skan  return (__m128i)__builtin_ia32_paddsw128 ((__v8hi)__A, (__v8hi)__B);
1015122180Skan}
1016122180Skan
1017169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1018122180Skan_mm_adds_epu8 (__m128i __A, __m128i __B)
1019122180Skan{
1020122180Skan  return (__m128i)__builtin_ia32_paddusb128 ((__v16qi)__A, (__v16qi)__B);
1021122180Skan}
1022122180Skan
1023169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1024122180Skan_mm_adds_epu16 (__m128i __A, __m128i __B)
1025122180Skan{
1026122180Skan  return (__m128i)__builtin_ia32_paddusw128 ((__v8hi)__A, (__v8hi)__B);
1027122180Skan}
1028122180Skan
1029169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1030122180Skan_mm_sub_epi8 (__m128i __A, __m128i __B)
1031122180Skan{
1032122180Skan  return (__m128i)__builtin_ia32_psubb128 ((__v16qi)__A, (__v16qi)__B);
1033122180Skan}
1034122180Skan
1035169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1036122180Skan_mm_sub_epi16 (__m128i __A, __m128i __B)
1037122180Skan{
1038122180Skan  return (__m128i)__builtin_ia32_psubw128 ((__v8hi)__A, (__v8hi)__B);
1039122180Skan}
1040122180Skan
1041169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1042122180Skan_mm_sub_epi32 (__m128i __A, __m128i __B)
1043122180Skan{
1044122180Skan  return (__m128i)__builtin_ia32_psubd128 ((__v4si)__A, (__v4si)__B);
1045122180Skan}
1046122180Skan
1047169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1048122180Skan_mm_sub_epi64 (__m128i __A, __m128i __B)
1049122180Skan{
1050122180Skan  return (__m128i)__builtin_ia32_psubq128 ((__v2di)__A, (__v2di)__B);
1051122180Skan}
1052122180Skan
1053169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1054122180Skan_mm_subs_epi8 (__m128i __A, __m128i __B)
1055122180Skan{
1056122180Skan  return (__m128i)__builtin_ia32_psubsb128 ((__v16qi)__A, (__v16qi)__B);
1057122180Skan}
1058122180Skan
1059169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1060122180Skan_mm_subs_epi16 (__m128i __A, __m128i __B)
1061122180Skan{
1062122180Skan  return (__m128i)__builtin_ia32_psubsw128 ((__v8hi)__A, (__v8hi)__B);
1063122180Skan}
1064122180Skan
1065169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1066122180Skan_mm_subs_epu8 (__m128i __A, __m128i __B)
1067122180Skan{
1068122180Skan  return (__m128i)__builtin_ia32_psubusb128 ((__v16qi)__A, (__v16qi)__B);
1069122180Skan}
1070122180Skan
1071169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1072122180Skan_mm_subs_epu16 (__m128i __A, __m128i __B)
1073122180Skan{
1074122180Skan  return (__m128i)__builtin_ia32_psubusw128 ((__v8hi)__A, (__v8hi)__B);
1075122180Skan}
1076122180Skan
1077169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1078122180Skan_mm_madd_epi16 (__m128i __A, __m128i __B)
1079122180Skan{
1080122180Skan  return (__m128i)__builtin_ia32_pmaddwd128 ((__v8hi)__A, (__v8hi)__B);
1081122180Skan}
1082122180Skan
1083169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1084122180Skan_mm_mulhi_epi16 (__m128i __A, __m128i __B)
1085122180Skan{
1086122180Skan  return (__m128i)__builtin_ia32_pmulhw128 ((__v8hi)__A, (__v8hi)__B);
1087122180Skan}
1088122180Skan
1089169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1090122180Skan_mm_mullo_epi16 (__m128i __A, __m128i __B)
1091122180Skan{
1092122180Skan  return (__m128i)__builtin_ia32_pmullw128 ((__v8hi)__A, (__v8hi)__B);
1093122180Skan}
1094122180Skan
1095169689Skanstatic __inline __m64 __attribute__((__always_inline__))
1096122180Skan_mm_mul_su32 (__m64 __A, __m64 __B)
1097122180Skan{
1098122180Skan  return (__m64)__builtin_ia32_pmuludq ((__v2si)__A, (__v2si)__B);
1099122180Skan}
1100122180Skan
1101169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1102122180Skan_mm_mul_epu32 (__m128i __A, __m128i __B)
1103122180Skan{
1104122180Skan  return (__m128i)__builtin_ia32_pmuludq128 ((__v4si)__A, (__v4si)__B);
1105122180Skan}
1106122180Skan
1107169689Skan#if 0
1108169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1109169689Skan_mm_slli_epi16 (__m128i __A, int __B)
1110122180Skan{
1111169689Skan  return (__m128i)__builtin_ia32_psllwi128 ((__v8hi)__A, __B);
1112122180Skan}
1113122180Skan
1114169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1115169689Skan_mm_slli_epi32 (__m128i __A, int __B)
1116122180Skan{
1117169689Skan  return (__m128i)__builtin_ia32_pslldi128 ((__v4si)__A, __B);
1118122180Skan}
1119122180Skan
1120169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1121169689Skan_mm_slli_epi64 (__m128i __A, int __B)
1122122180Skan{
1123169689Skan  return (__m128i)__builtin_ia32_psllqi128 ((__v2di)__A, __B);
1124122180Skan}
1125169689Skan#else
1126169689Skan#define _mm_slli_epi16(__A, __B) \
1127169689Skan  ((__m128i)__builtin_ia32_psllwi128 ((__v8hi)(__A), __B))
1128169689Skan#define _mm_slli_epi32(__A, __B) \
1129259216Sdim  ((__m128i)__builtin_ia32_pslldi128 ((__v4si)(__A), __B))
1130169689Skan#define _mm_slli_epi64(__A, __B) \
1131259216Sdim  ((__m128i)__builtin_ia32_psllqi128 ((__v2di)(__A), __B))
1132169689Skan#endif
1133122180Skan
1134169689Skan#if 0
1135169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1136169689Skan_mm_srai_epi16 (__m128i __A, int __B)
1137122180Skan{
1138169689Skan  return (__m128i)__builtin_ia32_psrawi128 ((__v8hi)__A, __B);
1139122180Skan}
1140122180Skan
1141169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1142169689Skan_mm_srai_epi32 (__m128i __A, int __B)
1143122180Skan{
1144169689Skan  return (__m128i)__builtin_ia32_psradi128 ((__v4si)__A, __B);
1145122180Skan}
1146169689Skan#else
1147169689Skan#define _mm_srai_epi16(__A, __B) \
1148169689Skan  ((__m128i)__builtin_ia32_psrawi128 ((__v8hi)(__A), __B))
1149169689Skan#define _mm_srai_epi32(__A, __B) \
1150259216Sdim  ((__m128i)__builtin_ia32_psradi128 ((__v4si)(__A), __B))
1151169689Skan#endif
1152122180Skan
1153169689Skan#if 0
1154169689Skanstatic __m128i __attribute__((__always_inline__))
1155169689Skan_mm_srli_si128 (__m128i __A, int __B)
1156122180Skan{
1157169689Skan  return ((__m128i)__builtin_ia32_psrldqi128 (__A, __B * 8));
1158122180Skan}
1159122180Skan
1160169689Skanstatic __m128i __attribute__((__always_inline__))
1161169689Skan_mm_srli_si128 (__m128i __A, int __B)
1162122180Skan{
1163169689Skan  return ((__m128i)__builtin_ia32_pslldqi128 (__A, __B * 8));
1164122180Skan}
1165169689Skan#else
1166169689Skan#define _mm_srli_si128(__A, __B) \
1167169689Skan  ((__m128i)__builtin_ia32_psrldqi128 (__A, (__B) * 8))
1168169689Skan#define _mm_slli_si128(__A, __B) \
1169169689Skan  ((__m128i)__builtin_ia32_pslldqi128 (__A, (__B) * 8))
1170169689Skan#endif
1171122180Skan
1172169689Skan#if 0
1173169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1174169689Skan_mm_srli_epi16 (__m128i __A, int __B)
1175122180Skan{
1176169689Skan  return (__m128i)__builtin_ia32_psrlwi128 ((__v8hi)__A, __B);
1177122180Skan}
1178122180Skan
1179169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1180169689Skan_mm_srli_epi32 (__m128i __A, int __B)
1181122180Skan{
1182169689Skan  return (__m128i)__builtin_ia32_psrldi128 ((__v4si)__A, __B);
1183122180Skan}
1184122180Skan
1185169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1186169689Skan_mm_srli_epi64 (__m128i __A, int __B)
1187122180Skan{
1188169689Skan  return (__m128i)__builtin_ia32_psrlqi128 ((__v2di)__A, __B);
1189122180Skan}
1190169689Skan#else
1191169689Skan#define _mm_srli_epi16(__A, __B) \
1192169689Skan  ((__m128i)__builtin_ia32_psrlwi128 ((__v8hi)(__A), __B))
1193169689Skan#define _mm_srli_epi32(__A, __B) \
1194169689Skan  ((__m128i)__builtin_ia32_psrldi128 ((__v4si)(__A), __B))
1195169689Skan#define _mm_srli_epi64(__A, __B) \
1196270099Sdim  ((__m128i)__builtin_ia32_psrlqi128 ((__v2di)(__A), __B))
1197169689Skan#endif
1198122180Skan
1199169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1200169689Skan_mm_sll_epi16 (__m128i __A, __m128i __B)
1201122180Skan{
1202169689Skan  return (__m128i)__builtin_ia32_psllw128((__v8hi)__A, (__v8hi)__B);
1203122180Skan}
1204122180Skan
1205169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1206169689Skan_mm_sll_epi32 (__m128i __A, __m128i __B)
1207122180Skan{
1208169689Skan  return (__m128i)__builtin_ia32_pslld128((__v4si)__A, (__v4si)__B);
1209122180Skan}
1210122180Skan
1211169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1212169689Skan_mm_sll_epi64 (__m128i __A, __m128i __B)
1213122180Skan{
1214169689Skan  return (__m128i)__builtin_ia32_psllq128((__v2di)__A, (__v2di)__B);
1215122180Skan}
1216122180Skan
1217169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1218169689Skan_mm_sra_epi16 (__m128i __A, __m128i __B)
1219122180Skan{
1220169689Skan  return (__m128i)__builtin_ia32_psraw128 ((__v8hi)__A, (__v8hi)__B);
1221122180Skan}
1222122180Skan
1223169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1224169689Skan_mm_sra_epi32 (__m128i __A, __m128i __B)
1225122180Skan{
1226169689Skan  return (__m128i)__builtin_ia32_psrad128 ((__v4si)__A, (__v4si)__B);
1227122180Skan}
1228122180Skan
1229169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1230169689Skan_mm_srl_epi16 (__m128i __A, __m128i __B)
1231122180Skan{
1232169689Skan  return (__m128i)__builtin_ia32_psrlw128 ((__v8hi)__A, (__v8hi)__B);
1233122180Skan}
1234122180Skan
1235169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1236169689Skan_mm_srl_epi32 (__m128i __A, __m128i __B)
1237122180Skan{
1238169689Skan  return (__m128i)__builtin_ia32_psrld128 ((__v4si)__A, (__v4si)__B);
1239122180Skan}
1240122180Skan
1241169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1242169689Skan_mm_srl_epi64 (__m128i __A, __m128i __B)
1243122180Skan{
1244169689Skan  return (__m128i)__builtin_ia32_psrlq128 ((__v2di)__A, (__v2di)__B);
1245122180Skan}
1246122180Skan
1247169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1248122180Skan_mm_and_si128 (__m128i __A, __m128i __B)
1249122180Skan{
1250122180Skan  return (__m128i)__builtin_ia32_pand128 ((__v2di)__A, (__v2di)__B);
1251122180Skan}
1252122180Skan
1253169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1254122180Skan_mm_andnot_si128 (__m128i __A, __m128i __B)
1255122180Skan{
1256122180Skan  return (__m128i)__builtin_ia32_pandn128 ((__v2di)__A, (__v2di)__B);
1257122180Skan}
1258122180Skan
1259169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1260122180Skan_mm_or_si128 (__m128i __A, __m128i __B)
1261122180Skan{
1262122180Skan  return (__m128i)__builtin_ia32_por128 ((__v2di)__A, (__v2di)__B);
1263122180Skan}
1264122180Skan
1265169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1266122180Skan_mm_xor_si128 (__m128i __A, __m128i __B)
1267122180Skan{
1268122180Skan  return (__m128i)__builtin_ia32_pxor128 ((__v2di)__A, (__v2di)__B);
1269122180Skan}
1270122180Skan
1271169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1272122180Skan_mm_cmpeq_epi8 (__m128i __A, __m128i __B)
1273122180Skan{
1274122180Skan  return (__m128i)__builtin_ia32_pcmpeqb128 ((__v16qi)__A, (__v16qi)__B);
1275122180Skan}
1276122180Skan
1277169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1278122180Skan_mm_cmpeq_epi16 (__m128i __A, __m128i __B)
1279122180Skan{
1280122180Skan  return (__m128i)__builtin_ia32_pcmpeqw128 ((__v8hi)__A, (__v8hi)__B);
1281122180Skan}
1282122180Skan
1283169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1284122180Skan_mm_cmpeq_epi32 (__m128i __A, __m128i __B)
1285122180Skan{
1286122180Skan  return (__m128i)__builtin_ia32_pcmpeqd128 ((__v4si)__A, (__v4si)__B);
1287122180Skan}
1288122180Skan
1289169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1290122180Skan_mm_cmplt_epi8 (__m128i __A, __m128i __B)
1291122180Skan{
1292122180Skan  return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__B, (__v16qi)__A);
1293122180Skan}
1294122180Skan
1295169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1296122180Skan_mm_cmplt_epi16 (__m128i __A, __m128i __B)
1297122180Skan{
1298122180Skan  return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__B, (__v8hi)__A);
1299122180Skan}
1300122180Skan
1301169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1302122180Skan_mm_cmplt_epi32 (__m128i __A, __m128i __B)
1303122180Skan{
1304122180Skan  return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__B, (__v4si)__A);
1305122180Skan}
1306122180Skan
1307169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1308122180Skan_mm_cmpgt_epi8 (__m128i __A, __m128i __B)
1309122180Skan{
1310122180Skan  return (__m128i)__builtin_ia32_pcmpgtb128 ((__v16qi)__A, (__v16qi)__B);
1311122180Skan}
1312122180Skan
1313169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1314122180Skan_mm_cmpgt_epi16 (__m128i __A, __m128i __B)
1315122180Skan{
1316122180Skan  return (__m128i)__builtin_ia32_pcmpgtw128 ((__v8hi)__A, (__v8hi)__B);
1317122180Skan}
1318122180Skan
1319169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1320122180Skan_mm_cmpgt_epi32 (__m128i __A, __m128i __B)
1321122180Skan{
1322122180Skan  return (__m128i)__builtin_ia32_pcmpgtd128 ((__v4si)__A, (__v4si)__B);
1323122180Skan}
1324122180Skan
1325169689Skan#if 0
1326169689Skanstatic __inline int __attribute__((__always_inline__))
1327169689Skan_mm_extract_epi16 (__m128i const __A, int const __N)
1328169689Skan{
1329169689Skan  return __builtin_ia32_vec_ext_v8hi ((__v8hi)__A, __N);
1330169689Skan}
1331122180Skan
1332169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1333169689Skan_mm_insert_epi16 (__m128i const __A, int const __D, int const __N)
1334169689Skan{
1335169689Skan  return (__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)__A, __D, __N);
1336169689Skan}
1337169689Skan#else
1338169689Skan#define _mm_extract_epi16(A, N) \
1339169689Skan  ((int) __builtin_ia32_vec_ext_v8hi ((__v8hi)(A), (N)))
1340169689Skan#define _mm_insert_epi16(A, D, N) \
1341169689Skan  ((__m128i) __builtin_ia32_vec_set_v8hi ((__v8hi)(A), (D), (N)))
1342169689Skan#endif
1343122180Skan
1344169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1345122180Skan_mm_max_epi16 (__m128i __A, __m128i __B)
1346122180Skan{
1347122180Skan  return (__m128i)__builtin_ia32_pmaxsw128 ((__v8hi)__A, (__v8hi)__B);
1348122180Skan}
1349122180Skan
1350169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1351122180Skan_mm_max_epu8 (__m128i __A, __m128i __B)
1352122180Skan{
1353122180Skan  return (__m128i)__builtin_ia32_pmaxub128 ((__v16qi)__A, (__v16qi)__B);
1354122180Skan}
1355122180Skan
1356169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1357122180Skan_mm_min_epi16 (__m128i __A, __m128i __B)
1358122180Skan{
1359122180Skan  return (__m128i)__builtin_ia32_pminsw128 ((__v8hi)__A, (__v8hi)__B);
1360122180Skan}
1361122180Skan
1362169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1363122180Skan_mm_min_epu8 (__m128i __A, __m128i __B)
1364122180Skan{
1365122180Skan  return (__m128i)__builtin_ia32_pminub128 ((__v16qi)__A, (__v16qi)__B);
1366122180Skan}
1367122180Skan
1368169689Skanstatic __inline int __attribute__((__always_inline__))
1369122180Skan_mm_movemask_epi8 (__m128i __A)
1370122180Skan{
1371122180Skan  return __builtin_ia32_pmovmskb128 ((__v16qi)__A);
1372122180Skan}
1373122180Skan
1374169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1375122180Skan_mm_mulhi_epu16 (__m128i __A, __m128i __B)
1376122180Skan{
1377122180Skan  return (__m128i)__builtin_ia32_pmulhuw128 ((__v8hi)__A, (__v8hi)__B);
1378122180Skan}
1379122180Skan
1380122180Skan#define _mm_shufflehi_epi16(__A, __B) ((__m128i)__builtin_ia32_pshufhw ((__v8hi)__A, __B))
1381122180Skan#define _mm_shufflelo_epi16(__A, __B) ((__m128i)__builtin_ia32_pshuflw ((__v8hi)__A, __B))
1382122180Skan#define _mm_shuffle_epi32(__A, __B) ((__m128i)__builtin_ia32_pshufd ((__v4si)__A, __B))
1383122180Skan
1384169689Skanstatic __inline void __attribute__((__always_inline__))
1385122180Skan_mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C)
1386122180Skan{
1387122180Skan  __builtin_ia32_maskmovdqu ((__v16qi)__A, (__v16qi)__B, __C);
1388122180Skan}
1389122180Skan
1390169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1391122180Skan_mm_avg_epu8 (__m128i __A, __m128i __B)
1392122180Skan{
1393122180Skan  return (__m128i)__builtin_ia32_pavgb128 ((__v16qi)__A, (__v16qi)__B);
1394122180Skan}
1395122180Skan
1396169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1397122180Skan_mm_avg_epu16 (__m128i __A, __m128i __B)
1398122180Skan{
1399122180Skan  return (__m128i)__builtin_ia32_pavgw128 ((__v8hi)__A, (__v8hi)__B);
1400122180Skan}
1401122180Skan
1402169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1403122180Skan_mm_sad_epu8 (__m128i __A, __m128i __B)
1404122180Skan{
1405122180Skan  return (__m128i)__builtin_ia32_psadbw128 ((__v16qi)__A, (__v16qi)__B);
1406122180Skan}
1407122180Skan
1408169689Skanstatic __inline void __attribute__((__always_inline__))
1409122180Skan_mm_stream_si32 (int *__A, int __B)
1410122180Skan{
1411122180Skan  __builtin_ia32_movnti (__A, __B);
1412122180Skan}
1413122180Skan
1414169689Skanstatic __inline void __attribute__((__always_inline__))
1415122180Skan_mm_stream_si128 (__m128i *__A, __m128i __B)
1416122180Skan{
1417122180Skan  __builtin_ia32_movntdq ((__v2di *)__A, (__v2di)__B);
1418122180Skan}
1419122180Skan
1420169689Skanstatic __inline void __attribute__((__always_inline__))
1421122180Skan_mm_stream_pd (double *__A, __m128d __B)
1422122180Skan{
1423122180Skan  __builtin_ia32_movntpd (__A, (__v2df)__B);
1424122180Skan}
1425122180Skan
1426169689Skanstatic __inline void __attribute__((__always_inline__))
1427122180Skan_mm_clflush (void const *__A)
1428122180Skan{
1429169689Skan  __builtin_ia32_clflush (__A);
1430122180Skan}
1431122180Skan
1432169689Skanstatic __inline void __attribute__((__always_inline__))
1433122180Skan_mm_lfence (void)
1434122180Skan{
1435122180Skan  __builtin_ia32_lfence ();
1436122180Skan}
1437122180Skan
1438169689Skanstatic __inline void __attribute__((__always_inline__))
1439122180Skan_mm_mfence (void)
1440122180Skan{
1441122180Skan  __builtin_ia32_mfence ();
1442122180Skan}
1443122180Skan
1444169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1445122180Skan_mm_cvtsi32_si128 (int __A)
1446122180Skan{
1447169689Skan  return _mm_set_epi32 (0, 0, 0, __A);
1448122180Skan}
1449122180Skan
1450122180Skan#ifdef __x86_64__
1451169689Skan/* Intel intrinsic.  */
1452169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1453169689Skan_mm_cvtsi64_si128 (long long __A)
1454169689Skan{
1455169689Skan  return _mm_set_epi64x (0, __A);
1456169689Skan}
1457169689Skan
1458169689Skan/* Microsoft intrinsic.  */
1459169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1460122180Skan_mm_cvtsi64x_si128 (long long __A)
1461122180Skan{
1462169689Skan  return _mm_set_epi64x (0, __A);
1463122180Skan}
1464122180Skan#endif
1465122180Skan
1466169689Skan/* Casts between various SP, DP, INT vector types.  Note that these do no
1467169689Skan   conversion of values, they just change the type.  */
1468169689Skanstatic __inline __m128 __attribute__((__always_inline__))
1469169689Skan_mm_castpd_ps(__m128d __A)
1470122180Skan{
1471169689Skan  return (__m128) __A;
1472122180Skan}
1473122180Skan
1474169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1475169689Skan_mm_castpd_si128(__m128d __A)
1476122180Skan{
1477169689Skan  return (__m128i) __A;
1478122180Skan}
1479122180Skan
1480169689Skanstatic __inline __m128d __attribute__((__always_inline__))
1481169689Skan_mm_castps_pd(__m128 __A)
1482169689Skan{
1483169689Skan  return (__m128d) __A;
1484169689Skan}
1485169689Skan
1486169689Skanstatic __inline __m128i __attribute__((__always_inline__))
1487169689Skan_mm_castps_si128(__m128 __A)
1488169689Skan{
1489169689Skan  return (__m128i) __A;
1490169689Skan}
1491169689Skan
1492169689Skanstatic __inline __m128 __attribute__((__always_inline__))
1493169689Skan_mm_castsi128_ps(__m128i __A)
1494169689Skan{
1495169689Skan  return (__m128) __A;
1496169689Skan}
1497169689Skan
1498169689Skanstatic __inline __m128d __attribute__((__always_inline__))
1499169689Skan_mm_castsi128_pd(__m128i __A)
1500169689Skan{
1501169689Skan  return (__m128d) __A;
1502169689Skan}
1503169689Skan
1504122180Skan#endif /* __SSE2__  */
1505122180Skan
1506122180Skan#endif /* _EMMINTRIN_H_INCLUDED */
1507