1193326Sed/*===---- mmintrin.h - MMX intrinsics --------------------------------------===
2193326Sed *
3353358Sdim * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim * See https://llvm.org/LICENSE.txt for license information.
5353358Sdim * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6193326Sed *
7193326Sed *===-----------------------------------------------------------------------===
8193326Sed */
9193326Sed
10193326Sed#ifndef __MMINTRIN_H
11193326Sed#define __MMINTRIN_H
12193326Sed
13353358Sdimtypedef long long __m64 __attribute__((__vector_size__(8), __aligned__(8)));
14193326Sed
15309124Sdimtypedef long long __v1di __attribute__((__vector_size__(8)));
16193326Sedtypedef int __v2si __attribute__((__vector_size__(8)));
17193326Sedtypedef short __v4hi __attribute__((__vector_size__(8)));
18193326Sedtypedef char __v8qi __attribute__((__vector_size__(8)));
19193326Sed
20288943Sdim/* Define the default attributes for the functions in this file. */
21341825Sdim#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx"), __min_vector_width__(64)))
22288943Sdim
23341825Sdim/// Clears the MMX state by setting the state of the x87 stack registers
24309124Sdim///    to empty.
25309124Sdim///
26309124Sdim/// \headerfile <x86intrin.h>
27309124Sdim///
28314564Sdim/// This intrinsic corresponds to the <c> EMMS </c> instruction.
29309124Sdim///
30341825Sdimstatic __inline__ void  __attribute__((__always_inline__, __nodebug__, __target__("mmx")))
31193326Sed_mm_empty(void)
32193326Sed{
33193326Sed    __builtin_ia32_emms();
34193326Sed}
35193326Sed
36341825Sdim/// Constructs a 64-bit integer vector, setting the lower 32 bits to the
37309124Sdim///    value of the 32-bit integer parameter and setting the upper 32 bits to 0.
38309124Sdim///
39309124Sdim/// \headerfile <x86intrin.h>
40309124Sdim///
41341825Sdim/// This intrinsic corresponds to the <c> MOVD </c> instruction.
42309124Sdim///
43309124Sdim/// \param __i
44309124Sdim///    A 32-bit integer value.
45309124Sdim/// \returns A 64-bit integer vector. The lower 32 bits contain the value of the
46309124Sdim///    parameter. The upper 32 bits are set to 0.
47288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
48193326Sed_mm_cvtsi32_si64(int __i)
49193326Sed{
50218893Sdim    return (__m64)__builtin_ia32_vec_init_v2si(__i, 0);
51193326Sed}
52193326Sed
53341825Sdim/// Returns the lower 32 bits of a 64-bit integer vector as a 32-bit
54309124Sdim///    signed integer.
55309124Sdim///
56309124Sdim/// \headerfile <x86intrin.h>
57309124Sdim///
58341825Sdim/// This intrinsic corresponds to the <c> MOVD </c> instruction.
59309124Sdim///
60309124Sdim/// \param __m
61309124Sdim///    A 64-bit integer vector.
62309124Sdim/// \returns A 32-bit signed integer value containing the lower 32 bits of the
63309124Sdim///    parameter.
64288943Sdimstatic __inline__ int __DEFAULT_FN_ATTRS
65193326Sed_mm_cvtsi64_si32(__m64 __m)
66193326Sed{
67218893Sdim    return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0);
68193326Sed}
69193326Sed
70341825Sdim/// Casts a 64-bit signed integer value into a 64-bit integer vector.
71309124Sdim///
72309124Sdim/// \headerfile <x86intrin.h>
73309124Sdim///
74341825Sdim/// This intrinsic corresponds to the <c> MOVQ </c> instruction.
75309124Sdim///
76309124Sdim/// \param __i
77309124Sdim///    A 64-bit signed integer.
78309124Sdim/// \returns A 64-bit integer vector containing the same bitwise pattern as the
79309124Sdim///    parameter.
80288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
81193326Sed_mm_cvtsi64_m64(long long __i)
82193326Sed{
83193326Sed    return (__m64)__i;
84193326Sed}
85193326Sed
86341825Sdim/// Casts a 64-bit integer vector into a 64-bit signed integer value.
87309124Sdim///
88309124Sdim/// \headerfile <x86intrin.h>
89309124Sdim///
90341825Sdim/// This intrinsic corresponds to the <c> MOVQ </c> instruction.
91309124Sdim///
92309124Sdim/// \param __m
93309124Sdim///    A 64-bit integer vector.
94309124Sdim/// \returns A 64-bit signed integer containing the same bitwise pattern as the
95309124Sdim///    parameter.
96288943Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS
97193326Sed_mm_cvtm64_si64(__m64 __m)
98193326Sed{
99193326Sed    return (long long)__m;
100193326Sed}
101193326Sed
102341825Sdim/// Converts 16-bit signed integers from both 64-bit integer vector
103309124Sdim///    parameters of [4 x i16] into 8-bit signed integer values, and constructs
104309124Sdim///    a 64-bit integer vector of [8 x i8] as the result. Positive values
105309124Sdim///    greater than 0x7F are saturated to 0x7F. Negative values less than 0x80
106309124Sdim///    are saturated to 0x80.
107309124Sdim///
108309124Sdim/// \headerfile <x86intrin.h>
109309124Sdim///
110314564Sdim/// This intrinsic corresponds to the <c> PACKSSWB </c> instruction.
111309124Sdim///
112309124Sdim/// \param __m1
113309124Sdim///    A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
114309124Sdim///    16-bit signed integer and is converted to an 8-bit signed integer with
115309124Sdim///    saturation. Positive values greater than 0x7F are saturated to 0x7F.
116309124Sdim///    Negative values less than 0x80 are saturated to 0x80. The converted
117309124Sdim///    [4 x i8] values are written to the lower 32 bits of the result.
118309124Sdim/// \param __m2
119309124Sdim///    A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
120309124Sdim///    16-bit signed integer and is converted to an 8-bit signed integer with
121309124Sdim///    saturation. Positive values greater than 0x7F are saturated to 0x7F.
122309124Sdim///    Negative values less than 0x80 are saturated to 0x80. The converted
123309124Sdim///    [4 x i8] values are written to the upper 32 bits of the result.
124309124Sdim/// \returns A 64-bit integer vector of [8 x i8] containing the converted
125309124Sdim///    values.
126288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
127193326Sed_mm_packs_pi16(__m64 __m1, __m64 __m2)
128193326Sed{
129193326Sed    return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2);
130193326Sed}
131193326Sed
132341825Sdim/// Converts 32-bit signed integers from both 64-bit integer vector
133309124Sdim///    parameters of [2 x i32] into 16-bit signed integer values, and constructs
134309124Sdim///    a 64-bit integer vector of [4 x i16] as the result. Positive values
135309124Sdim///    greater than 0x7FFF are saturated to 0x7FFF. Negative values less than
136309124Sdim///    0x8000 are saturated to 0x8000.
137309124Sdim///
138309124Sdim/// \headerfile <x86intrin.h>
139309124Sdim///
140314564Sdim/// This intrinsic corresponds to the <c> PACKSSDW </c> instruction.
141309124Sdim///
142309124Sdim/// \param __m1
143309124Sdim///    A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a
144309124Sdim///    32-bit signed integer and is converted to a 16-bit signed integer with
145309124Sdim///    saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.
146309124Sdim///    Negative values less than 0x8000 are saturated to 0x8000. The converted
147309124Sdim///    [2 x i16] values are written to the lower 32 bits of the result.
148309124Sdim/// \param __m2
149309124Sdim///    A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a
150309124Sdim///    32-bit signed integer and is converted to a 16-bit signed integer with
151309124Sdim///    saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF.
152309124Sdim///    Negative values less than 0x8000 are saturated to 0x8000. The converted
153309124Sdim///    [2 x i16] values are written to the upper 32 bits of the result.
154309124Sdim/// \returns A 64-bit integer vector of [4 x i16] containing the converted
155309124Sdim///    values.
156288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
157193326Sed_mm_packs_pi32(__m64 __m1, __m64 __m2)
158193326Sed{
159193326Sed    return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2);
160193326Sed}
161193326Sed
162341825Sdim/// Converts 16-bit signed integers from both 64-bit integer vector
163309124Sdim///    parameters of [4 x i16] into 8-bit unsigned integer values, and
164309124Sdim///    constructs a 64-bit integer vector of [8 x i8] as the result. Values
165309124Sdim///    greater than 0xFF are saturated to 0xFF. Values less than 0 are saturated
166309124Sdim///    to 0.
167309124Sdim///
168309124Sdim/// \headerfile <x86intrin.h>
169309124Sdim///
170314564Sdim/// This intrinsic corresponds to the <c> PACKUSWB </c> instruction.
171309124Sdim///
172309124Sdim/// \param __m1
173309124Sdim///    A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
174309124Sdim///    16-bit signed integer and is converted to an 8-bit unsigned integer with
175309124Sdim///    saturation. Values greater than 0xFF are saturated to 0xFF. Values less
176309124Sdim///    than 0 are saturated to 0. The converted [4 x i8] values are written to
177309124Sdim///    the lower 32 bits of the result.
178309124Sdim/// \param __m2
179309124Sdim///    A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a
180309124Sdim///    16-bit signed integer and is converted to an 8-bit unsigned integer with
181309124Sdim///    saturation. Values greater than 0xFF are saturated to 0xFF. Values less
182309124Sdim///    than 0 are saturated to 0. The converted [4 x i8] values are written to
183309124Sdim///    the upper 32 bits of the result.
184309124Sdim/// \returns A 64-bit integer vector of [8 x i8] containing the converted
185309124Sdim///    values.
186288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
187193326Sed_mm_packs_pu16(__m64 __m1, __m64 __m2)
188193326Sed{
189193326Sed    return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2);
190193326Sed}
191193326Sed
192341825Sdim/// Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8]
193309124Sdim///    and interleaves them into a 64-bit integer vector of [8 x i8].
194309124Sdim///
195309124Sdim/// \headerfile <x86intrin.h>
196309124Sdim///
197314564Sdim/// This intrinsic corresponds to the <c> PUNPCKHBW </c> instruction.
198309124Sdim///
199309124Sdim/// \param __m1
200321369Sdim///    A 64-bit integer vector of [8 x i8]. \n
201314564Sdim///    Bits [39:32] are written to bits [7:0] of the result. \n
202314564Sdim///    Bits [47:40] are written to bits [23:16] of the result. \n
203314564Sdim///    Bits [55:48] are written to bits [39:32] of the result. \n
204309124Sdim///    Bits [63:56] are written to bits [55:48] of the result.
205309124Sdim/// \param __m2
206309124Sdim///    A 64-bit integer vector of [8 x i8].
207314564Sdim///    Bits [39:32] are written to bits [15:8] of the result. \n
208314564Sdim///    Bits [47:40] are written to bits [31:24] of the result. \n
209314564Sdim///    Bits [55:48] are written to bits [47:40] of the result. \n
210309124Sdim///    Bits [63:56] are written to bits [63:56] of the result.
211309124Sdim/// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
212309124Sdim///    values.
213288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
214193326Sed_mm_unpackhi_pi8(__m64 __m1, __m64 __m2)
215193326Sed{
216218893Sdim    return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2);
217193326Sed}
218193326Sed
219341825Sdim/// Unpacks the upper 32 bits from two 64-bit integer vectors of
220309124Sdim///    [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
221309124Sdim///
222309124Sdim/// \headerfile <x86intrin.h>
223309124Sdim///
224314564Sdim/// This intrinsic corresponds to the <c> PUNPCKHWD </c> instruction.
225309124Sdim///
226309124Sdim/// \param __m1
227309124Sdim///    A 64-bit integer vector of [4 x i16].
228314564Sdim///    Bits [47:32] are written to bits [15:0] of the result. \n
229309124Sdim///    Bits [63:48] are written to bits [47:32] of the result.
230309124Sdim/// \param __m2
231309124Sdim///    A 64-bit integer vector of [4 x i16].
232314564Sdim///    Bits [47:32] are written to bits [31:16] of the result. \n
233309124Sdim///    Bits [63:48] are written to bits [63:48] of the result.
234309124Sdim/// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
235309124Sdim///    values.
236288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
237193326Sed_mm_unpackhi_pi16(__m64 __m1, __m64 __m2)
238193326Sed{
239218893Sdim    return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2);
240193326Sed}
241193326Sed
242341825Sdim/// Unpacks the upper 32 bits from two 64-bit integer vectors of
243309124Sdim///    [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
244309124Sdim///
245309124Sdim/// \headerfile <x86intrin.h>
246309124Sdim///
247314564Sdim/// This intrinsic corresponds to the <c> PUNPCKHDQ </c> instruction.
248309124Sdim///
249309124Sdim/// \param __m1
250309124Sdim///    A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to
251309124Sdim///    the lower 32 bits of the result.
252309124Sdim/// \param __m2
253309124Sdim///    A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to
254309124Sdim///    the upper 32 bits of the result.
255309124Sdim/// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
256309124Sdim///    values.
257288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
258193326Sed_mm_unpackhi_pi32(__m64 __m1, __m64 __m2)
259193326Sed{
260218893Sdim    return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2);
261193326Sed}
262193326Sed
263341825Sdim/// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8]
264309124Sdim///    and interleaves them into a 64-bit integer vector of [8 x i8].
265309124Sdim///
266309124Sdim/// \headerfile <x86intrin.h>
267309124Sdim///
268314564Sdim/// This intrinsic corresponds to the <c> PUNPCKLBW </c> instruction.
269309124Sdim///
270309124Sdim/// \param __m1
271309124Sdim///    A 64-bit integer vector of [8 x i8].
272314564Sdim///    Bits [7:0] are written to bits [7:0] of the result. \n
273314564Sdim///    Bits [15:8] are written to bits [23:16] of the result. \n
274314564Sdim///    Bits [23:16] are written to bits [39:32] of the result. \n
275309124Sdim///    Bits [31:24] are written to bits [55:48] of the result.
276309124Sdim/// \param __m2
277309124Sdim///    A 64-bit integer vector of [8 x i8].
278314564Sdim///    Bits [7:0] are written to bits [15:8] of the result. \n
279314564Sdim///    Bits [15:8] are written to bits [31:24] of the result. \n
280314564Sdim///    Bits [23:16] are written to bits [47:40] of the result. \n
281309124Sdim///    Bits [31:24] are written to bits [63:56] of the result.
282309124Sdim/// \returns A 64-bit integer vector of [8 x i8] containing the interleaved
283309124Sdim///    values.
284288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
285193326Sed_mm_unpacklo_pi8(__m64 __m1, __m64 __m2)
286193326Sed{
287218893Sdim    return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2);
288193326Sed}
289193326Sed
290341825Sdim/// Unpacks the lower 32 bits from two 64-bit integer vectors of
291309124Sdim///    [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16].
292309124Sdim///
293309124Sdim/// \headerfile <x86intrin.h>
294309124Sdim///
295314564Sdim/// This intrinsic corresponds to the <c> PUNPCKLWD </c> instruction.
296309124Sdim///
297309124Sdim/// \param __m1
298309124Sdim///    A 64-bit integer vector of [4 x i16].
299314564Sdim///    Bits [15:0] are written to bits [15:0] of the result. \n
300309124Sdim///    Bits [31:16] are written to bits [47:32] of the result.
301309124Sdim/// \param __m2
302309124Sdim///    A 64-bit integer vector of [4 x i16].
303314564Sdim///    Bits [15:0] are written to bits [31:16] of the result. \n
304309124Sdim///    Bits [31:16] are written to bits [63:48] of the result.
305309124Sdim/// \returns A 64-bit integer vector of [4 x i16] containing the interleaved
306309124Sdim///    values.
307288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
308193326Sed_mm_unpacklo_pi16(__m64 __m1, __m64 __m2)
309193326Sed{
310218893Sdim    return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2);
311193326Sed}
312193326Sed
313341825Sdim/// Unpacks the lower 32 bits from two 64-bit integer vectors of
314309124Sdim///    [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32].
315309124Sdim///
316309124Sdim/// \headerfile <x86intrin.h>
317309124Sdim///
318314564Sdim/// This intrinsic corresponds to the <c> PUNPCKLDQ </c> instruction.
319309124Sdim///
320309124Sdim/// \param __m1
321309124Sdim///    A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to
322309124Sdim///    the lower 32 bits of the result.
323309124Sdim/// \param __m2
324309124Sdim///    A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to
325309124Sdim///    the upper 32 bits of the result.
326309124Sdim/// \returns A 64-bit integer vector of [2 x i32] containing the interleaved
327309124Sdim///    values.
328288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
329193326Sed_mm_unpacklo_pi32(__m64 __m1, __m64 __m2)
330193326Sed{
331218893Sdim    return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2);
332193326Sed}
333193326Sed
334341825Sdim/// Adds each 8-bit integer element of the first 64-bit integer vector
335309124Sdim///    of [8 x i8] to the corresponding 8-bit integer element of the second
336309124Sdim///    64-bit integer vector of [8 x i8]. The lower 8 bits of the results are
337309124Sdim///    packed into a 64-bit integer vector of [8 x i8].
338309124Sdim///
339309124Sdim/// \headerfile <x86intrin.h>
340309124Sdim///
341314564Sdim/// This intrinsic corresponds to the <c> PADDB </c> instruction.
342309124Sdim///
343309124Sdim/// \param __m1
344309124Sdim///    A 64-bit integer vector of [8 x i8].
345309124Sdim/// \param __m2
346309124Sdim///    A 64-bit integer vector of [8 x i8].
347309124Sdim/// \returns A 64-bit integer vector of [8 x i8] containing the sums of both
348309124Sdim///    parameters.
349288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
350193326Sed_mm_add_pi8(__m64 __m1, __m64 __m2)
351193326Sed{
352218893Sdim    return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2);
353193326Sed}
354193326Sed
355341825Sdim/// Adds each 16-bit integer element of the first 64-bit integer vector
356309124Sdim///    of [4 x i16] to the corresponding 16-bit integer element of the second
357309124Sdim///    64-bit integer vector of [4 x i16]. The lower 16 bits of the results are
358309124Sdim///    packed into a 64-bit integer vector of [4 x i16].
359309124Sdim///
360309124Sdim/// \headerfile <x86intrin.h>
361309124Sdim///
362314564Sdim/// This intrinsic corresponds to the <c> PADDW </c> instruction.
363309124Sdim///
364309124Sdim/// \param __m1
365309124Sdim///    A 64-bit integer vector of [4 x i16].
366309124Sdim/// \param __m2
367309124Sdim///    A 64-bit integer vector of [4 x i16].
368309124Sdim/// \returns A 64-bit integer vector of [4 x i16] containing the sums of both
369309124Sdim///    parameters.
370288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
371193326Sed_mm_add_pi16(__m64 __m1, __m64 __m2)
372193326Sed{
373218893Sdim    return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2);
374193326Sed}
375193326Sed
376341825Sdim/// Adds each 32-bit integer element of the first 64-bit integer vector
377309124Sdim///    of [2 x i32] to the corresponding 32-bit integer element of the second
378309124Sdim///    64-bit integer vector of [2 x i32]. The lower 32 bits of the results are
379309124Sdim///    packed into a 64-bit integer vector of [2 x i32].
380309124Sdim///
381309124Sdim/// \headerfile <x86intrin.h>
382309124Sdim///
383314564Sdim/// This intrinsic corresponds to the <c> PADDD </c> instruction.
384309124Sdim///
385309124Sdim/// \param __m1
386309124Sdim///    A 64-bit integer vector of [2 x i32].
387309124Sdim/// \param __m2
388309124Sdim///    A 64-bit integer vector of [2 x i32].
389309124Sdim/// \returns A 64-bit integer vector of [2 x i32] containing the sums of both
390309124Sdim///    parameters.
391288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
392193326Sed_mm_add_pi32(__m64 __m1, __m64 __m2)
393193326Sed{
394218893Sdim    return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2);
395193326Sed}
396193326Sed
397341825Sdim/// Adds each 8-bit signed integer element of the first 64-bit integer
398309124Sdim///    vector of [8 x i8] to the corresponding 8-bit signed integer element of
399309124Sdim///    the second 64-bit integer vector of [8 x i8]. Positive sums greater than
400309124Sdim///    0x7F are saturated to 0x7F. Negative sums less than 0x80 are saturated to
401309124Sdim///    0x80. The results are packed into a 64-bit integer vector of [8 x i8].
402309124Sdim///
403309124Sdim/// \headerfile <x86intrin.h>
404309124Sdim///
405314564Sdim/// This intrinsic corresponds to the <c> PADDSB </c> instruction.
406309124Sdim///
407309124Sdim/// \param __m1
408309124Sdim///    A 64-bit integer vector of [8 x i8].
409309124Sdim/// \param __m2
410309124Sdim///    A 64-bit integer vector of [8 x i8].
411309124Sdim/// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums
412309124Sdim///    of both parameters.
413288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
414296417Sdim_mm_adds_pi8(__m64 __m1, __m64 __m2)
415193326Sed{
416193326Sed    return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2);
417193326Sed}
418193326Sed
419341825Sdim/// Adds each 16-bit signed integer element of the first 64-bit integer
420309124Sdim///    vector of [4 x i16] to the corresponding 16-bit signed integer element of
421309124Sdim///    the second 64-bit integer vector of [4 x i16]. Positive sums greater than
422309124Sdim///    0x7FFF are saturated to 0x7FFF. Negative sums less than 0x8000 are
423309124Sdim///    saturated to 0x8000. The results are packed into a 64-bit integer vector
424309124Sdim///    of [4 x i16].
425309124Sdim///
426309124Sdim/// \headerfile <x86intrin.h>
427309124Sdim///
428314564Sdim/// This intrinsic corresponds to the <c> PADDSW </c> instruction.
429309124Sdim///
430309124Sdim/// \param __m1
431309124Sdim///    A 64-bit integer vector of [4 x i16].
432309124Sdim/// \param __m2
433309124Sdim///    A 64-bit integer vector of [4 x i16].
434309124Sdim/// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums
435309124Sdim///    of both parameters.
436288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
437193326Sed_mm_adds_pi16(__m64 __m1, __m64 __m2)
438193326Sed{
439296417Sdim    return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2);
440193326Sed}
441193326Sed
442341825Sdim/// Adds each 8-bit unsigned integer element of the first 64-bit integer
443309124Sdim///    vector of [8 x i8] to the corresponding 8-bit unsigned integer element of
444309124Sdim///    the second 64-bit integer vector of [8 x i8]. Sums greater than 0xFF are
445309124Sdim///    saturated to 0xFF. The results are packed into a 64-bit integer vector of
446309124Sdim///    [8 x i8].
447309124Sdim///
448309124Sdim/// \headerfile <x86intrin.h>
449309124Sdim///
450314564Sdim/// This intrinsic corresponds to the <c> PADDUSB </c> instruction.
451309124Sdim///
452309124Sdim/// \param __m1
453309124Sdim///    A 64-bit integer vector of [8 x i8].
454309124Sdim/// \param __m2
455309124Sdim///    A 64-bit integer vector of [8 x i8].
456309124Sdim/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
457309124Sdim///    unsigned sums of both parameters.
458288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
459296417Sdim_mm_adds_pu8(__m64 __m1, __m64 __m2)
460193326Sed{
461193326Sed    return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2);
462193326Sed}
463296417Sdim
464341825Sdim/// Adds each 16-bit unsigned integer element of the first 64-bit integer
465309124Sdim///    vector of [4 x i16] to the corresponding 16-bit unsigned integer element
466309124Sdim///    of the second 64-bit integer vector of [4 x i16]. Sums greater than
467309124Sdim///    0xFFFF are saturated to 0xFFFF. The results are packed into a 64-bit
468309124Sdim///    integer vector of [4 x i16].
469309124Sdim///
470309124Sdim/// \headerfile <x86intrin.h>
471309124Sdim///
472314564Sdim/// This intrinsic corresponds to the <c> PADDUSW </c> instruction.
473309124Sdim///
474309124Sdim/// \param __m1
475309124Sdim///    A 64-bit integer vector of [4 x i16].
476309124Sdim/// \param __m2
477309124Sdim///    A 64-bit integer vector of [4 x i16].
478309124Sdim/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
479309124Sdim///    unsigned sums of both parameters.
480288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
481296417Sdim_mm_adds_pu16(__m64 __m1, __m64 __m2)
482193326Sed{
483193326Sed    return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2);
484193326Sed}
485193326Sed
486341825Sdim/// Subtracts each 8-bit integer element of the second 64-bit integer
487309124Sdim///    vector of [8 x i8] from the corresponding 8-bit integer element of the
488309124Sdim///    first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results
489309124Sdim///    are packed into a 64-bit integer vector of [8 x i8].
490309124Sdim///
491309124Sdim/// \headerfile <x86intrin.h>
492309124Sdim///
493314564Sdim/// This intrinsic corresponds to the <c> PSUBB </c> instruction.
494309124Sdim///
495309124Sdim/// \param __m1
496309124Sdim///    A 64-bit integer vector of [8 x i8] containing the minuends.
497309124Sdim/// \param __m2
498309124Sdim///    A 64-bit integer vector of [8 x i8] containing the subtrahends.
499309124Sdim/// \returns A 64-bit integer vector of [8 x i8] containing the differences of
500309124Sdim///    both parameters.
501288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
502193326Sed_mm_sub_pi8(__m64 __m1, __m64 __m2)
503193326Sed{
504218893Sdim    return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2);
505193326Sed}
506296417Sdim
507341825Sdim/// Subtracts each 16-bit integer element of the second 64-bit integer
508309124Sdim///    vector of [4 x i16] from the corresponding 16-bit integer element of the
509309124Sdim///    first 64-bit integer vector of [4 x i16]. The lower 16 bits of the
510309124Sdim///    results are packed into a 64-bit integer vector of [4 x i16].
511309124Sdim///
512309124Sdim/// \headerfile <x86intrin.h>
513309124Sdim///
514314564Sdim/// This intrinsic corresponds to the <c> PSUBW </c> instruction.
515309124Sdim///
516309124Sdim/// \param __m1
517309124Sdim///    A 64-bit integer vector of [4 x i16] containing the minuends.
518309124Sdim/// \param __m2
519309124Sdim///    A 64-bit integer vector of [4 x i16] containing the subtrahends.
520309124Sdim/// \returns A 64-bit integer vector of [4 x i16] containing the differences of
521309124Sdim///    both parameters.
522288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
523193326Sed_mm_sub_pi16(__m64 __m1, __m64 __m2)
524193326Sed{
525218893Sdim    return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2);
526193326Sed}
527296417Sdim
528341825Sdim/// Subtracts each 32-bit integer element of the second 64-bit integer
529309124Sdim///    vector of [2 x i32] from the corresponding 32-bit integer element of the
530309124Sdim///    first 64-bit integer vector of [2 x i32]. The lower 32 bits of the
531309124Sdim///    results are packed into a 64-bit integer vector of [2 x i32].
532309124Sdim///
533309124Sdim/// \headerfile <x86intrin.h>
534309124Sdim///
535314564Sdim/// This intrinsic corresponds to the <c> PSUBD </c> instruction.
536309124Sdim///
537309124Sdim/// \param __m1
538309124Sdim///    A 64-bit integer vector of [2 x i32] containing the minuends.
539309124Sdim/// \param __m2
540309124Sdim///    A 64-bit integer vector of [2 x i32] containing the subtrahends.
541309124Sdim/// \returns A 64-bit integer vector of [2 x i32] containing the differences of
542309124Sdim///    both parameters.
543288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
544193326Sed_mm_sub_pi32(__m64 __m1, __m64 __m2)
545193326Sed{
546218893Sdim    return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2);
547193326Sed}
548193326Sed
549341825Sdim/// Subtracts each 8-bit signed integer element of the second 64-bit
550309124Sdim///    integer vector of [8 x i8] from the corresponding 8-bit signed integer
551309124Sdim///    element of the first 64-bit integer vector of [8 x i8]. Positive results
552309124Sdim///    greater than 0x7F are saturated to 0x7F. Negative results less than 0x80
553309124Sdim///    are saturated to 0x80. The results are packed into a 64-bit integer
554309124Sdim///    vector of [8 x i8].
555309124Sdim///
556309124Sdim/// \headerfile <x86intrin.h>
557309124Sdim///
558314564Sdim/// This intrinsic corresponds to the <c> PSUBSB </c> instruction.
559309124Sdim///
560309124Sdim/// \param __m1
561309124Sdim///    A 64-bit integer vector of [8 x i8] containing the minuends.
562309124Sdim/// \param __m2
563309124Sdim///    A 64-bit integer vector of [8 x i8] containing the subtrahends.
564309124Sdim/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
565309124Sdim///    differences of both parameters.
566288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
567193326Sed_mm_subs_pi8(__m64 __m1, __m64 __m2)
568193326Sed{
569193326Sed    return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2);
570193326Sed}
571193326Sed
572341825Sdim/// Subtracts each 16-bit signed integer element of the second 64-bit
573309124Sdim///    integer vector of [4 x i16] from the corresponding 16-bit signed integer
574309124Sdim///    element of the first 64-bit integer vector of [4 x i16]. Positive results
575309124Sdim///    greater than 0x7FFF are saturated to 0x7FFF. Negative results less than
576309124Sdim///    0x8000 are saturated to 0x8000. The results are packed into a 64-bit
577309124Sdim///    integer vector of [4 x i16].
578309124Sdim///
579309124Sdim/// \headerfile <x86intrin.h>
580309124Sdim///
581314564Sdim/// This intrinsic corresponds to the <c> PSUBSW </c> instruction.
582309124Sdim///
583309124Sdim/// \param __m1
584309124Sdim///    A 64-bit integer vector of [4 x i16] containing the minuends.
585309124Sdim/// \param __m2
586309124Sdim///    A 64-bit integer vector of [4 x i16] containing the subtrahends.
587309124Sdim/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
588309124Sdim///    differences of both parameters.
589288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
590193326Sed_mm_subs_pi16(__m64 __m1, __m64 __m2)
591193326Sed{
592193326Sed    return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2);
593193326Sed}
594193326Sed
595341825Sdim/// Subtracts each 8-bit unsigned integer element of the second 64-bit
596309124Sdim///    integer vector of [8 x i8] from the corresponding 8-bit unsigned integer
597321369Sdim///    element of the first 64-bit integer vector of [8 x i8].
598309124Sdim///
599321369Sdim///    If an element of the first vector is less than the corresponding element
600321369Sdim///    of the second vector, the result is saturated to 0. The results are
601321369Sdim///    packed into a 64-bit integer vector of [8 x i8].
602321369Sdim///
603309124Sdim/// \headerfile <x86intrin.h>
604309124Sdim///
605314564Sdim/// This intrinsic corresponds to the <c> PSUBUSB </c> instruction.
606309124Sdim///
607309124Sdim/// \param __m1
608309124Sdim///    A 64-bit integer vector of [8 x i8] containing the minuends.
609309124Sdim/// \param __m2
610309124Sdim///    A 64-bit integer vector of [8 x i8] containing the subtrahends.
611309124Sdim/// \returns A 64-bit integer vector of [8 x i8] containing the saturated
612309124Sdim///    differences of both parameters.
613288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
614193326Sed_mm_subs_pu8(__m64 __m1, __m64 __m2)
615193326Sed{
616193326Sed    return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2);
617193326Sed}
618296417Sdim
619341825Sdim/// Subtracts each 16-bit unsigned integer element of the second 64-bit
620309124Sdim///    integer vector of [4 x i16] from the corresponding 16-bit unsigned
621321369Sdim///    integer element of the first 64-bit integer vector of [4 x i16].
622309124Sdim///
623321369Sdim///    If an element of the first vector is less than the corresponding element
624321369Sdim///    of the second vector, the result is saturated to 0. The results are
625321369Sdim///    packed into a 64-bit integer vector of [4 x i16].
626321369Sdim///
627309124Sdim/// \headerfile <x86intrin.h>
628309124Sdim///
629314564Sdim/// This intrinsic corresponds to the <c> PSUBUSW </c> instruction.
630309124Sdim///
631309124Sdim/// \param __m1
632309124Sdim///    A 64-bit integer vector of [4 x i16] containing the minuends.
633309124Sdim/// \param __m2
634309124Sdim///    A 64-bit integer vector of [4 x i16] containing the subtrahends.
635309124Sdim/// \returns A 64-bit integer vector of [4 x i16] containing the saturated
636309124Sdim///    differences of both parameters.
637288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
638193326Sed_mm_subs_pu16(__m64 __m1, __m64 __m2)
639193326Sed{
640193326Sed    return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2);
641193326Sed}
642193326Sed
643341825Sdim/// Multiplies each 16-bit signed integer element of the first 64-bit
644309124Sdim///    integer vector of [4 x i16] by the corresponding 16-bit signed integer
645309124Sdim///    element of the second 64-bit integer vector of [4 x i16] and get four
646309124Sdim///    32-bit products. Adds adjacent pairs of products to get two 32-bit sums.
647309124Sdim///    The lower 32 bits of these two sums are packed into a 64-bit integer
648321369Sdim///    vector of [2 x i32].
649309124Sdim///
650321369Sdim///    For example, bits [15:0] of both parameters are multiplied, bits [31:16]
651321369Sdim///    of both parameters are multiplied, and the sum of both results is written
652321369Sdim///    to bits [31:0] of the result.
653321369Sdim///
654309124Sdim/// \headerfile <x86intrin.h>
655309124Sdim///
656314564Sdim/// This intrinsic corresponds to the <c> PMADDWD </c> instruction.
657309124Sdim///
658309124Sdim/// \param __m1
659309124Sdim///    A 64-bit integer vector of [4 x i16].
660309124Sdim/// \param __m2
661309124Sdim///    A 64-bit integer vector of [4 x i16].
662309124Sdim/// \returns A 64-bit integer vector of [2 x i32] containing the sums of
663309124Sdim///    products of both parameters.
664288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
665193326Sed_mm_madd_pi16(__m64 __m1, __m64 __m2)
666193326Sed{
667193326Sed    return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2);
668193326Sed}
669193326Sed
670341825Sdim/// Multiplies each 16-bit signed integer element of the first 64-bit
671309124Sdim///    integer vector of [4 x i16] by the corresponding 16-bit signed integer
672309124Sdim///    element of the second 64-bit integer vector of [4 x i16]. Packs the upper
673309124Sdim///    16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
674309124Sdim///
675309124Sdim/// \headerfile <x86intrin.h>
676309124Sdim///
677314564Sdim/// This intrinsic corresponds to the <c> PMULHW </c> instruction.
678309124Sdim///
679309124Sdim/// \param __m1
680309124Sdim///    A 64-bit integer vector of [4 x i16].
681309124Sdim/// \param __m2
682309124Sdim///    A 64-bit integer vector of [4 x i16].
683309124Sdim/// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits
684309124Sdim///    of the products of both parameters.
685288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
686193326Sed_mm_mulhi_pi16(__m64 __m1, __m64 __m2)
687193326Sed{
688193326Sed    return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2);
689193326Sed}
690296417Sdim
691341825Sdim/// Multiplies each 16-bit signed integer element of the first 64-bit
692309124Sdim///    integer vector of [4 x i16] by the corresponding 16-bit signed integer
693309124Sdim///    element of the second 64-bit integer vector of [4 x i16]. Packs the lower
694309124Sdim///    16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16].
695309124Sdim///
696309124Sdim/// \headerfile <x86intrin.h>
697309124Sdim///
698314564Sdim/// This intrinsic corresponds to the <c> PMULLW </c> instruction.
699309124Sdim///
700309124Sdim/// \param __m1
701309124Sdim///    A 64-bit integer vector of [4 x i16].
702309124Sdim/// \param __m2
703309124Sdim///    A 64-bit integer vector of [4 x i16].
704309124Sdim/// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits
705309124Sdim///    of the products of both parameters.
706288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
707296417Sdim_mm_mullo_pi16(__m64 __m1, __m64 __m2)
708193326Sed{
709218893Sdim    return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2);
710193326Sed}
711193326Sed
712341825Sdim/// Left-shifts each 16-bit signed integer element of the first
713309124Sdim///    parameter, which is a 64-bit integer vector of [4 x i16], by the number
714309124Sdim///    of bits specified by the second parameter, which is a 64-bit integer. The
715309124Sdim///    lower 16 bits of the results are packed into a 64-bit integer vector of
716309124Sdim///    [4 x i16].
717309124Sdim///
718309124Sdim/// \headerfile <x86intrin.h>
719309124Sdim///
720314564Sdim/// This intrinsic corresponds to the <c> PSLLW </c> instruction.
721309124Sdim///
722309124Sdim/// \param __m
723309124Sdim///    A 64-bit integer vector of [4 x i16].
724309124Sdim/// \param __count
725309124Sdim///    A 64-bit integer vector interpreted as a single 64-bit integer.
726309124Sdim/// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
727314564Sdim///    values. If \a __count is greater or equal to 16, the result is set to all
728314564Sdim///    0.
729288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
730193326Sed_mm_sll_pi16(__m64 __m, __m64 __count)
731193326Sed{
732193326Sed    return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count);
733193326Sed}
734193326Sed
735341825Sdim/// Left-shifts each 16-bit signed integer element of a 64-bit integer
736309124Sdim///    vector of [4 x i16] by the number of bits specified by a 32-bit integer.
737309124Sdim///    The lower 16 bits of the results are packed into a 64-bit integer vector
738309124Sdim///    of [4 x i16].
739309124Sdim///
740309124Sdim/// \headerfile <x86intrin.h>
741309124Sdim///
742314564Sdim/// This intrinsic corresponds to the <c> PSLLW </c> instruction.
743309124Sdim///
744309124Sdim/// \param __m
745309124Sdim///    A 64-bit integer vector of [4 x i16].
746309124Sdim/// \param __count
747309124Sdim///    A 32-bit integer value.
748309124Sdim/// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted
749314564Sdim///    values. If \a __count is greater or equal to 16, the result is set to all
750314564Sdim///    0.
751288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
752193326Sed_mm_slli_pi16(__m64 __m, int __count)
753193326Sed{
754296417Sdim    return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count);
755193326Sed}
756193326Sed
757341825Sdim/// Left-shifts each 32-bit signed integer element of the first
758309124Sdim///    parameter, which is a 64-bit integer vector of [2 x i32], by the number
759309124Sdim///    of bits specified by the second parameter, which is a 64-bit integer. The
760309124Sdim///    lower 32 bits of the results are packed into a 64-bit integer vector of
761309124Sdim///    [2 x i32].
762309124Sdim///
763309124Sdim/// \headerfile <x86intrin.h>
764309124Sdim///
765314564Sdim/// This intrinsic corresponds to the <c> PSLLD </c> instruction.
766309124Sdim///
767309124Sdim/// \param __m
768309124Sdim///    A 64-bit integer vector of [2 x i32].
769309124Sdim/// \param __count
770309124Sdim///    A 64-bit integer vector interpreted as a single 64-bit integer.
771309124Sdim/// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
772314564Sdim///    values. If \a __count is greater or equal to 32, the result is set to all
773314564Sdim///    0.
774288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
775193326Sed_mm_sll_pi32(__m64 __m, __m64 __count)
776193326Sed{
777193326Sed    return (__m64)__builtin_ia32_pslld((__v2si)__m, __count);
778193326Sed}
779193326Sed
780341825Sdim/// Left-shifts each 32-bit signed integer element of a 64-bit integer
781309124Sdim///    vector of [2 x i32] by the number of bits specified by a 32-bit integer.
782309124Sdim///    The lower 32 bits of the results are packed into a 64-bit integer vector
783309124Sdim///    of [2 x i32].
784309124Sdim///
785309124Sdim/// \headerfile <x86intrin.h>
786309124Sdim///
787314564Sdim/// This intrinsic corresponds to the <c> PSLLD </c> instruction.
788309124Sdim///
789309124Sdim/// \param __m
790309124Sdim///    A 64-bit integer vector of [2 x i32].
791309124Sdim/// \param __count
792309124Sdim///    A 32-bit integer value.
793309124Sdim/// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted
794314564Sdim///    values. If \a __count is greater or equal to 32, the result is set to all
795314564Sdim///    0.
796288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
797193326Sed_mm_slli_pi32(__m64 __m, int __count)
798193326Sed{
799193326Sed    return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count);
800193326Sed}
801193326Sed
802341825Sdim/// Left-shifts the first 64-bit integer parameter by the number of bits
803309124Sdim///    specified by the second 64-bit integer parameter. The lower 64 bits of
804309124Sdim///    result are returned.
805309124Sdim///
806309124Sdim/// \headerfile <x86intrin.h>
807309124Sdim///
808314564Sdim/// This intrinsic corresponds to the <c> PSLLQ </c> instruction.
809309124Sdim///
810309124Sdim/// \param __m
811309124Sdim///    A 64-bit integer vector interpreted as a single 64-bit integer.
812309124Sdim/// \param __count
813309124Sdim///    A 64-bit integer vector interpreted as a single 64-bit integer.
814309124Sdim/// \returns A 64-bit integer vector containing the left-shifted value. If
815314564Sdim///     \a __count is greater or equal to 64, the result is set to 0.
816288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
817193326Sed_mm_sll_si64(__m64 __m, __m64 __count)
818193326Sed{
819309124Sdim    return (__m64)__builtin_ia32_psllq((__v1di)__m, __count);
820193326Sed}
821193326Sed
822341825Sdim/// Left-shifts the first parameter, which is a 64-bit integer, by the
823309124Sdim///    number of bits specified by the second parameter, which is a 32-bit
824309124Sdim///    integer. The lower 64 bits of result are returned.
825309124Sdim///
826309124Sdim/// \headerfile <x86intrin.h>
827309124Sdim///
828314564Sdim/// This intrinsic corresponds to the <c> PSLLQ </c> instruction.
829309124Sdim///
830309124Sdim/// \param __m
831309124Sdim///    A 64-bit integer vector interpreted as a single 64-bit integer.
832309124Sdim/// \param __count
833309124Sdim///    A 32-bit integer value.
834309124Sdim/// \returns A 64-bit integer vector containing the left-shifted value. If
835314564Sdim///     \a __count is greater or equal to 64, the result is set to 0.
836288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
837193326Sed_mm_slli_si64(__m64 __m, int __count)
838193326Sed{
839309124Sdim    return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count);
840193326Sed}
841193326Sed
842341825Sdim/// Right-shifts each 16-bit integer element of the first parameter,
843309124Sdim///    which is a 64-bit integer vector of [4 x i16], by the number of bits
844321369Sdim///    specified by the second parameter, which is a 64-bit integer.
845309124Sdim///
846321369Sdim///    High-order bits are filled with the sign bit of the initial value of each
847321369Sdim///    16-bit element. The 16-bit results are packed into a 64-bit integer
848321369Sdim///    vector of [4 x i16].
849321369Sdim///
850309124Sdim/// \headerfile <x86intrin.h>
851309124Sdim///
852314564Sdim/// This intrinsic corresponds to the <c> PSRAW </c> instruction.
853309124Sdim///
854309124Sdim/// \param __m
855309124Sdim///    A 64-bit integer vector of [4 x i16].
856309124Sdim/// \param __count
857309124Sdim///    A 64-bit integer vector interpreted as a single 64-bit integer.
858309124Sdim/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
859309124Sdim///    values.
860288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
861193326Sed_mm_sra_pi16(__m64 __m, __m64 __count)
862193326Sed{
863296417Sdim    return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count);
864193326Sed}
865193326Sed
866341825Sdim/// Right-shifts each 16-bit integer element of a 64-bit integer vector
867309124Sdim///    of [4 x i16] by the number of bits specified by a 32-bit integer.
868321369Sdim///
869309124Sdim///    High-order bits are filled with the sign bit of the initial value of each
870309124Sdim///    16-bit element. The 16-bit results are packed into a 64-bit integer
871309124Sdim///    vector of [4 x i16].
872309124Sdim///
873309124Sdim/// \headerfile <x86intrin.h>
874309124Sdim///
875314564Sdim/// This intrinsic corresponds to the <c> PSRAW </c> instruction.
876309124Sdim///
877309124Sdim/// \param __m
878309124Sdim///    A 64-bit integer vector of [4 x i16].
879309124Sdim/// \param __count
880309124Sdim///    A 32-bit integer value.
881309124Sdim/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
882309124Sdim///    values.
883288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
884193326Sed_mm_srai_pi16(__m64 __m, int __count)
885193326Sed{
886193326Sed    return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count);
887193326Sed}
888193326Sed
889341825Sdim/// Right-shifts each 32-bit integer element of the first parameter,
890309124Sdim///    which is a 64-bit integer vector of [2 x i32], by the number of bits
891321369Sdim///    specified by the second parameter, which is a 64-bit integer.
892309124Sdim///
893321369Sdim///    High-order bits are filled with the sign bit of the initial value of each
894321369Sdim///    32-bit element. The 32-bit results are packed into a 64-bit integer
895321369Sdim///    vector of [2 x i32].
896321369Sdim///
897309124Sdim/// \headerfile <x86intrin.h>
898309124Sdim///
899314564Sdim/// This intrinsic corresponds to the <c> PSRAD </c> instruction.
900309124Sdim///
901309124Sdim/// \param __m
902309124Sdim///    A 64-bit integer vector of [2 x i32].
903309124Sdim/// \param __count
904309124Sdim///    A 64-bit integer vector interpreted as a single 64-bit integer.
905309124Sdim/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
906309124Sdim///    values.
907288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
908193326Sed_mm_sra_pi32(__m64 __m, __m64 __count)
909193326Sed{
910296417Sdim    return (__m64)__builtin_ia32_psrad((__v2si)__m, __count);
911193326Sed}
912193326Sed
913341825Sdim/// Right-shifts each 32-bit integer element of a 64-bit integer vector
914309124Sdim///    of [2 x i32] by the number of bits specified by a 32-bit integer.
915321369Sdim///
916309124Sdim///    High-order bits are filled with the sign bit of the initial value of each
917309124Sdim///    32-bit element. The 32-bit results are packed into a 64-bit integer
918309124Sdim///    vector of [2 x i32].
919309124Sdim///
920309124Sdim/// \headerfile <x86intrin.h>
921309124Sdim///
922314564Sdim/// This intrinsic corresponds to the <c> PSRAD </c> instruction.
923309124Sdim///
924309124Sdim/// \param __m
925309124Sdim///    A 64-bit integer vector of [2 x i32].
926309124Sdim/// \param __count
927309124Sdim///    A 32-bit integer value.
928309124Sdim/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
929309124Sdim///    values.
930288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
931193326Sed_mm_srai_pi32(__m64 __m, int __count)
932193326Sed{
933193326Sed    return (__m64)__builtin_ia32_psradi((__v2si)__m, __count);
934193326Sed}
935193326Sed
936341825Sdim/// Right-shifts each 16-bit integer element of the first parameter,
937309124Sdim///    which is a 64-bit integer vector of [4 x i16], by the number of bits
938321369Sdim///    specified by the second parameter, which is a 64-bit integer.
939309124Sdim///
940321369Sdim///    High-order bits are cleared. The 16-bit results are packed into a 64-bit
941321369Sdim///    integer vector of [4 x i16].
942321369Sdim///
943309124Sdim/// \headerfile <x86intrin.h>
944309124Sdim///
945314564Sdim/// This intrinsic corresponds to the <c> PSRLW </c> instruction.
946309124Sdim///
947309124Sdim/// \param __m
948309124Sdim///    A 64-bit integer vector of [4 x i16].
949309124Sdim/// \param __count
950309124Sdim///    A 64-bit integer vector interpreted as a single 64-bit integer.
951309124Sdim/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
952309124Sdim///    values.
953288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
954193326Sed_mm_srl_pi16(__m64 __m, __m64 __count)
955193326Sed{
956296417Sdim    return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count);
957193326Sed}
958193326Sed
959341825Sdim/// Right-shifts each 16-bit integer element of a 64-bit integer vector
960309124Sdim///    of [4 x i16] by the number of bits specified by a 32-bit integer.
961321369Sdim///
962309124Sdim///    High-order bits are cleared. The 16-bit results are packed into a 64-bit
963309124Sdim///    integer vector of [4 x i16].
964309124Sdim///
965309124Sdim/// \headerfile <x86intrin.h>
966309124Sdim///
967314564Sdim/// This intrinsic corresponds to the <c> PSRLW </c> instruction.
968309124Sdim///
969309124Sdim/// \param __m
970309124Sdim///    A 64-bit integer vector of [4 x i16].
971309124Sdim/// \param __count
972309124Sdim///    A 32-bit integer value.
973309124Sdim/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted
974309124Sdim///    values.
975288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
976193326Sed_mm_srli_pi16(__m64 __m, int __count)
977193326Sed{
978296417Sdim    return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count);
979193326Sed}
980193326Sed
981341825Sdim/// Right-shifts each 32-bit integer element of the first parameter,
982309124Sdim///    which is a 64-bit integer vector of [2 x i32], by the number of bits
983321369Sdim///    specified by the second parameter, which is a 64-bit integer.
984309124Sdim///
985321369Sdim///    High-order bits are cleared. The 32-bit results are packed into a 64-bit
986321369Sdim///    integer vector of [2 x i32].
987321369Sdim///
988309124Sdim/// \headerfile <x86intrin.h>
989309124Sdim///
990314564Sdim/// This intrinsic corresponds to the <c> PSRLD </c> instruction.
991309124Sdim///
992309124Sdim/// \param __m
993309124Sdim///    A 64-bit integer vector of [2 x i32].
994309124Sdim/// \param __count
995309124Sdim///    A 64-bit integer vector interpreted as a single 64-bit integer.
996309124Sdim/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
997309124Sdim///    values.
998288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
999193326Sed_mm_srl_pi32(__m64 __m, __m64 __count)
1000193326Sed{
1001296417Sdim    return (__m64)__builtin_ia32_psrld((__v2si)__m, __count);
1002193326Sed}
1003193326Sed
1004341825Sdim/// Right-shifts each 32-bit integer element of a 64-bit integer vector
1005309124Sdim///    of [2 x i32] by the number of bits specified by a 32-bit integer.
1006321369Sdim///
1007309124Sdim///    High-order bits are cleared. The 32-bit results are packed into a 64-bit
1008309124Sdim///    integer vector of [2 x i32].
1009309124Sdim///
1010309124Sdim/// \headerfile <x86intrin.h>
1011309124Sdim///
1012314564Sdim/// This intrinsic corresponds to the <c> PSRLD </c> instruction.
1013309124Sdim///
1014309124Sdim/// \param __m
1015309124Sdim///    A 64-bit integer vector of [2 x i32].
1016309124Sdim/// \param __count
1017309124Sdim///    A 32-bit integer value.
1018309124Sdim/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted
1019309124Sdim///    values.
1020288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
1021193326Sed_mm_srli_pi32(__m64 __m, int __count)
1022193326Sed{
1023193326Sed    return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count);
1024193326Sed}
1025193326Sed
1026341825Sdim/// Right-shifts the first 64-bit integer parameter by the number of bits
1027321369Sdim///    specified by the second 64-bit integer parameter.
1028309124Sdim///
1029321369Sdim///    High-order bits are cleared.
1030321369Sdim///
1031309124Sdim/// \headerfile <x86intrin.h>
1032309124Sdim///
1033314564Sdim/// This intrinsic corresponds to the <c> PSRLQ </c> instruction.
1034309124Sdim///
1035309124Sdim/// \param __m
1036309124Sdim///    A 64-bit integer vector interpreted as a single 64-bit integer.
1037309124Sdim/// \param __count
1038309124Sdim///    A 64-bit integer vector interpreted as a single 64-bit integer.
1039309124Sdim/// \returns A 64-bit integer vector containing the right-shifted value.
1040288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
1041193326Sed_mm_srl_si64(__m64 __m, __m64 __count)
1042193326Sed{
1043309124Sdim    return (__m64)__builtin_ia32_psrlq((__v1di)__m, __count);
1044193326Sed}
1045193326Sed
1046341825Sdim/// Right-shifts the first parameter, which is a 64-bit integer, by the
1047309124Sdim///    number of bits specified by the second parameter, which is a 32-bit
1048321369Sdim///    integer.
1049309124Sdim///
1050321369Sdim///    High-order bits are cleared.
1051321369Sdim///
1052309124Sdim/// \headerfile <x86intrin.h>
1053309124Sdim///
1054314564Sdim/// This intrinsic corresponds to the <c> PSRLQ </c> instruction.
1055309124Sdim///
1056309124Sdim/// \param __m
1057309124Sdim///    A 64-bit integer vector interpreted as a single 64-bit integer.
1058309124Sdim/// \param __count
1059309124Sdim///    A 32-bit integer value.
1060309124Sdim/// \returns A 64-bit integer vector containing the right-shifted value.
1061288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
1062193326Sed_mm_srli_si64(__m64 __m, int __count)
1063193326Sed{
1064309124Sdim    return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count);
1065193326Sed}
1066193326Sed
1067341825Sdim/// Performs a bitwise AND of two 64-bit integer vectors.
1068309124Sdim///
1069309124Sdim/// \headerfile <x86intrin.h>
1070309124Sdim///
1071314564Sdim/// This intrinsic corresponds to the <c> PAND </c> instruction.
1072309124Sdim///
1073309124Sdim/// \param __m1
1074309124Sdim///    A 64-bit integer vector.
1075309124Sdim/// \param __m2
1076309124Sdim///    A 64-bit integer vector.
1077309124Sdim/// \returns A 64-bit integer vector containing the bitwise AND of both
1078309124Sdim///    parameters.
1079288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
1080193326Sed_mm_and_si64(__m64 __m1, __m64 __m2)
1081193326Sed{
1082309124Sdim    return __builtin_ia32_pand((__v1di)__m1, (__v1di)__m2);
1083193326Sed}
1084193326Sed
1085341825Sdim/// Performs a bitwise NOT of the first 64-bit integer vector, and then
1086309124Sdim///    performs a bitwise AND of the intermediate result and the second 64-bit
1087309124Sdim///    integer vector.
1088309124Sdim///
1089309124Sdim/// \headerfile <x86intrin.h>
1090309124Sdim///
1091314564Sdim/// This intrinsic corresponds to the <c> PANDN </c> instruction.
1092309124Sdim///
1093309124Sdim/// \param __m1
1094309124Sdim///    A 64-bit integer vector. The one's complement of this parameter is used
1095309124Sdim///    in the bitwise AND.
1096309124Sdim/// \param __m2
1097309124Sdim///    A 64-bit integer vector.
1098309124Sdim/// \returns A 64-bit integer vector containing the bitwise AND of the second
1099309124Sdim///    parameter and the one's complement of the first parameter.
1100288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
1101193326Sed_mm_andnot_si64(__m64 __m1, __m64 __m2)
1102193326Sed{
1103309124Sdim    return __builtin_ia32_pandn((__v1di)__m1, (__v1di)__m2);
1104193326Sed}
1105193326Sed
1106341825Sdim/// Performs a bitwise OR of two 64-bit integer vectors.
1107309124Sdim///
1108309124Sdim/// \headerfile <x86intrin.h>
1109309124Sdim///
1110314564Sdim/// This intrinsic corresponds to the <c> POR </c> instruction.
1111309124Sdim///
1112309124Sdim/// \param __m1
1113309124Sdim///    A 64-bit integer vector.
1114309124Sdim/// \param __m2
1115309124Sdim///    A 64-bit integer vector.
1116309124Sdim/// \returns A 64-bit integer vector containing the bitwise OR of both
1117309124Sdim///    parameters.
1118288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
1119193326Sed_mm_or_si64(__m64 __m1, __m64 __m2)
1120193326Sed{
1121309124Sdim    return __builtin_ia32_por((__v1di)__m1, (__v1di)__m2);
1122193326Sed}
1123193326Sed
1124341825Sdim/// Performs a bitwise exclusive OR of two 64-bit integer vectors.
1125309124Sdim///
1126309124Sdim/// \headerfile <x86intrin.h>
1127309124Sdim///
1128314564Sdim/// This intrinsic corresponds to the <c> PXOR </c> instruction.
1129309124Sdim///
1130309124Sdim/// \param __m1
1131309124Sdim///    A 64-bit integer vector.
1132309124Sdim/// \param __m2
1133309124Sdim///    A 64-bit integer vector.
1134309124Sdim/// \returns A 64-bit integer vector containing the bitwise exclusive OR of both
1135309124Sdim///    parameters.
1136288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
1137193326Sed_mm_xor_si64(__m64 __m1, __m64 __m2)
1138193326Sed{
1139309124Sdim    return __builtin_ia32_pxor((__v1di)__m1, (__v1di)__m2);
1140193326Sed}
1141193326Sed
1142341825Sdim/// Compares the 8-bit integer elements of two 64-bit integer vectors of
1143309124Sdim///    [8 x i8] to determine if the element of the first vector is equal to the
1144321369Sdim///    corresponding element of the second vector.
1145309124Sdim///
1146321369Sdim///    The comparison yields 0 for false, 0xFF for true.
1147321369Sdim///
1148309124Sdim/// \headerfile <x86intrin.h>
1149309124Sdim///
1150314564Sdim/// This intrinsic corresponds to the <c> PCMPEQB </c> instruction.
1151309124Sdim///
1152309124Sdim/// \param __m1
1153309124Sdim///    A 64-bit integer vector of [8 x i8].
1154309124Sdim/// \param __m2
1155309124Sdim///    A 64-bit integer vector of [8 x i8].
1156309124Sdim/// \returns A 64-bit integer vector of [8 x i8] containing the comparison
1157309124Sdim///    results.
1158288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
1159193326Sed_mm_cmpeq_pi8(__m64 __m1, __m64 __m2)
1160193326Sed{
1161218893Sdim    return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2);
1162193326Sed}
1163193326Sed
1164341825Sdim/// Compares the 16-bit integer elements of two 64-bit integer vectors of
1165309124Sdim///    [4 x i16] to determine if the element of the first vector is equal to the
1166321369Sdim///    corresponding element of the second vector.
1167309124Sdim///
1168321369Sdim///    The comparison yields 0 for false, 0xFFFF for true.
1169321369Sdim///
1170309124Sdim/// \headerfile <x86intrin.h>
1171309124Sdim///
1172314564Sdim/// This intrinsic corresponds to the <c> PCMPEQW </c> instruction.
1173309124Sdim///
1174309124Sdim/// \param __m1
1175309124Sdim///    A 64-bit integer vector of [4 x i16].
1176309124Sdim/// \param __m2
1177309124Sdim///    A 64-bit integer vector of [4 x i16].
1178309124Sdim/// \returns A 64-bit integer vector of [4 x i16] containing the comparison
1179309124Sdim///    results.
1180288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
1181193326Sed_mm_cmpeq_pi16(__m64 __m1, __m64 __m2)
1182193326Sed{
1183218893Sdim    return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2);
1184193326Sed}
1185193326Sed
1186341825Sdim/// Compares the 32-bit integer elements of two 64-bit integer vectors of
1187309124Sdim///    [2 x i32] to determine if the element of the first vector is equal to the
1188321369Sdim///    corresponding element of the second vector.
1189309124Sdim///
1190321369Sdim///    The comparison yields 0 for false, 0xFFFFFFFF for true.
1191321369Sdim///
1192309124Sdim/// \headerfile <x86intrin.h>
1193309124Sdim///
1194314564Sdim/// This intrinsic corresponds to the <c> PCMPEQD </c> instruction.
1195309124Sdim///
1196309124Sdim/// \param __m1
1197309124Sdim///    A 64-bit integer vector of [2 x i32].
1198309124Sdim/// \param __m2
1199309124Sdim///    A 64-bit integer vector of [2 x i32].
1200309124Sdim/// \returns A 64-bit integer vector of [2 x i32] containing the comparison
1201309124Sdim///    results.
1202288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
1203193326Sed_mm_cmpeq_pi32(__m64 __m1, __m64 __m2)
1204193326Sed{
1205218893Sdim    return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2);
1206193326Sed}
1207193326Sed
1208341825Sdim/// Compares the 8-bit integer elements of two 64-bit integer vectors of
1209309124Sdim///    [8 x i8] to determine if the element of the first vector is greater than
1210321369Sdim///    the corresponding element of the second vector.
1211309124Sdim///
1212321369Sdim///    The comparison yields 0 for false, 0xFF for true.
1213321369Sdim///
1214309124Sdim/// \headerfile <x86intrin.h>
1215309124Sdim///
1216314564Sdim/// This intrinsic corresponds to the <c> PCMPGTB </c> instruction.
1217309124Sdim///
1218309124Sdim/// \param __m1
1219309124Sdim///    A 64-bit integer vector of [8 x i8].
1220309124Sdim/// \param __m2
1221309124Sdim///    A 64-bit integer vector of [8 x i8].
1222309124Sdim/// \returns A 64-bit integer vector of [8 x i8] containing the comparison
1223309124Sdim///    results.
1224288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
1225193326Sed_mm_cmpgt_pi8(__m64 __m1, __m64 __m2)
1226193326Sed{
1227218893Sdim    return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2);
1228193326Sed}
1229193326Sed
1230341825Sdim/// Compares the 16-bit integer elements of two 64-bit integer vectors of
1231309124Sdim///    [4 x i16] to determine if the element of the first vector is greater than
1232321369Sdim///    the corresponding element of the second vector.
1233309124Sdim///
1234321369Sdim///    The comparison yields 0 for false, 0xFFFF for true.
1235321369Sdim///
1236309124Sdim/// \headerfile <x86intrin.h>
1237309124Sdim///
1238314564Sdim/// This intrinsic corresponds to the <c> PCMPGTW </c> instruction.
1239309124Sdim///
1240309124Sdim/// \param __m1
1241309124Sdim///    A 64-bit integer vector of [4 x i16].
1242309124Sdim/// \param __m2
1243309124Sdim///    A 64-bit integer vector of [4 x i16].
1244309124Sdim/// \returns A 64-bit integer vector of [4 x i16] containing the comparison
1245309124Sdim///    results.
1246288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
1247193326Sed_mm_cmpgt_pi16(__m64 __m1, __m64 __m2)
1248193326Sed{
1249218893Sdim    return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2);
1250193326Sed}
1251193326Sed
1252341825Sdim/// Compares the 32-bit integer elements of two 64-bit integer vectors of
1253309124Sdim///    [2 x i32] to determine if the element of the first vector is greater than
1254321369Sdim///    the corresponding element of the second vector.
1255309124Sdim///
1256321369Sdim///    The comparison yields 0 for false, 0xFFFFFFFF for true.
1257321369Sdim///
1258309124Sdim/// \headerfile <x86intrin.h>
1259309124Sdim///
1260314564Sdim/// This intrinsic corresponds to the <c> PCMPGTD </c> instruction.
1261309124Sdim///
1262309124Sdim/// \param __m1
1263309124Sdim///    A 64-bit integer vector of [2 x i32].
1264309124Sdim/// \param __m2
1265309124Sdim///    A 64-bit integer vector of [2 x i32].
1266309124Sdim/// \returns A 64-bit integer vector of [2 x i32] containing the comparison
1267309124Sdim///    results.
1268288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
1269193326Sed_mm_cmpgt_pi32(__m64 __m1, __m64 __m2)
1270193326Sed{
1271218893Sdim    return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2);
1272193326Sed}
1273193326Sed
1274341825Sdim/// Constructs a 64-bit integer vector initialized to zero.
1275309124Sdim///
1276309124Sdim/// \headerfile <x86intrin.h>
1277309124Sdim///
1278341825Sdim/// This intrinsic corresponds to the <c> PXOR </c> instruction.
1279309124Sdim///
1280309124Sdim/// \returns An initialized 64-bit integer vector with all elements set to zero.
1281288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
1282193326Sed_mm_setzero_si64(void)
1283193326Sed{
1284341825Sdim    return __extension__ (__m64){ 0LL };
1285193326Sed}
1286193326Sed
1287341825Sdim/// Constructs a 64-bit integer vector initialized with the specified
1288309124Sdim///    32-bit integer values.
1289309124Sdim///
1290309124Sdim/// \headerfile <x86intrin.h>
1291309124Sdim///
1292309124Sdim/// This intrinsic is a utility function and does not correspond to a specific
1293309124Sdim///    instruction.
1294309124Sdim///
1295309124Sdim/// \param __i1
1296309124Sdim///    A 32-bit integer value used to initialize the upper 32 bits of the
1297309124Sdim///    result.
1298309124Sdim/// \param __i0
1299309124Sdim///    A 32-bit integer value used to initialize the lower 32 bits of the
1300309124Sdim///    result.
1301309124Sdim/// \returns An initialized 64-bit integer vector.
1302288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
1303193326Sed_mm_set_pi32(int __i1, int __i0)
1304193326Sed{
1305218893Sdim    return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1);
1306193326Sed}
1307193326Sed
1308341825Sdim/// Constructs a 64-bit integer vector initialized with the specified
1309309124Sdim///    16-bit integer values.
1310309124Sdim///
1311309124Sdim/// \headerfile <x86intrin.h>
1312309124Sdim///
1313309124Sdim/// This intrinsic is a utility function and does not correspond to a specific
1314309124Sdim///    instruction.
1315309124Sdim///
1316309124Sdim/// \param __s3
1317309124Sdim///    A 16-bit integer value used to initialize bits [63:48] of the result.
1318309124Sdim/// \param __s2
1319309124Sdim///    A 16-bit integer value used to initialize bits [47:32] of the result.
1320309124Sdim/// \param __s1
1321309124Sdim///    A 16-bit integer value used to initialize bits [31:16] of the result.
1322309124Sdim/// \param __s0
1323309124Sdim///    A 16-bit integer value used to initialize bits [15:0] of the result.
1324309124Sdim/// \returns An initialized 64-bit integer vector.
1325288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
1326193326Sed_mm_set_pi16(short __s3, short __s2, short __s1, short __s0)
1327193326Sed{
1328218893Sdim    return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3);
1329193326Sed}
1330193326Sed
1331341825Sdim/// Constructs a 64-bit integer vector initialized with the specified
1332309124Sdim///    8-bit integer values.
1333309124Sdim///
1334309124Sdim/// \headerfile <x86intrin.h>
1335309124Sdim///
1336309124Sdim/// This intrinsic is a utility function and does not correspond to a specific
1337309124Sdim///    instruction.
1338309124Sdim///
1339309124Sdim/// \param __b7
1340309124Sdim///    An 8-bit integer value used to initialize bits [63:56] of the result.
1341309124Sdim/// \param __b6
1342309124Sdim///    An 8-bit integer value used to initialize bits [55:48] of the result.
1343309124Sdim/// \param __b5
1344309124Sdim///    An 8-bit integer value used to initialize bits [47:40] of the result.
1345309124Sdim/// \param __b4
1346309124Sdim///    An 8-bit integer value used to initialize bits [39:32] of the result.
1347309124Sdim/// \param __b3
1348309124Sdim///    An 8-bit integer value used to initialize bits [31:24] of the result.
1349309124Sdim/// \param __b2
1350309124Sdim///    An 8-bit integer value used to initialize bits [23:16] of the result.
1351309124Sdim/// \param __b1
1352309124Sdim///    An 8-bit integer value used to initialize bits [15:8] of the result.
1353309124Sdim/// \param __b0
1354309124Sdim///    An 8-bit integer value used to initialize bits [7:0] of the result.
1355309124Sdim/// \returns An initialized 64-bit integer vector.
1356288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
1357193326Sed_mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2,
1358193326Sed            char __b1, char __b0)
1359193326Sed{
1360218893Sdim    return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3,
1361218893Sdim                                               __b4, __b5, __b6, __b7);
1362193326Sed}
1363193326Sed
1364341825Sdim/// Constructs a 64-bit integer vector of [2 x i32], with each of the
1365309124Sdim///    32-bit integer vector elements set to the specified 32-bit integer
1366309124Sdim///    value.
1367309124Sdim///
1368309124Sdim/// \headerfile <x86intrin.h>
1369309124Sdim///
1370341825Sdim/// This intrinsic is a utility function and does not correspond to a specific
1371341825Sdim///    instruction.
1372309124Sdim///
1373309124Sdim/// \param __i
1374309124Sdim///    A 32-bit integer value used to initialize each vector element of the
1375309124Sdim///    result.
1376309124Sdim/// \returns An initialized 64-bit integer vector of [2 x i32].
1377288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
1378193326Sed_mm_set1_pi32(int __i)
1379193326Sed{
1380218893Sdim    return _mm_set_pi32(__i, __i);
1381193326Sed}
1382193326Sed
1383341825Sdim/// Constructs a 64-bit integer vector of [4 x i16], with each of the
1384309124Sdim///    16-bit integer vector elements set to the specified 16-bit integer
1385309124Sdim///    value.
1386309124Sdim///
1387309124Sdim/// \headerfile <x86intrin.h>
1388309124Sdim///
1389341825Sdim/// This intrinsic is a utility function and does not correspond to a specific
1390341825Sdim///    instruction.
1391309124Sdim///
1392309124Sdim/// \param __w
1393309124Sdim///    A 16-bit integer value used to initialize each vector element of the
1394309124Sdim///    result.
1395309124Sdim/// \returns An initialized 64-bit integer vector of [4 x i16].
1396288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
1397218893Sdim_mm_set1_pi16(short __w)
1398193326Sed{
1399218893Sdim    return _mm_set_pi16(__w, __w, __w, __w);
1400193326Sed}
1401193326Sed
1402341825Sdim/// Constructs a 64-bit integer vector of [8 x i8], with each of the
1403309124Sdim///    8-bit integer vector elements set to the specified 8-bit integer value.
1404309124Sdim///
1405309124Sdim/// \headerfile <x86intrin.h>
1406309124Sdim///
1407341825Sdim/// This intrinsic is a utility function and does not correspond to a specific
1408341825Sdim///    instruction.
1409309124Sdim///
1410309124Sdim/// \param __b
1411309124Sdim///    An 8-bit integer value used to initialize each vector element of the
1412309124Sdim///    result.
1413309124Sdim/// \returns An initialized 64-bit integer vector of [8 x i8].
1414288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
1415193326Sed_mm_set1_pi8(char __b)
1416193326Sed{
1417218893Sdim    return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b);
1418193326Sed}
1419193326Sed
1420341825Sdim/// Constructs a 64-bit integer vector, initialized in reverse order with
1421309124Sdim///    the specified 32-bit integer values.
1422309124Sdim///
1423309124Sdim/// \headerfile <x86intrin.h>
1424309124Sdim///
1425309124Sdim/// This intrinsic is a utility function and does not correspond to a specific
1426309124Sdim///    instruction.
1427309124Sdim///
1428309124Sdim/// \param __i0
1429309124Sdim///    A 32-bit integer value used to initialize the lower 32 bits of the
1430309124Sdim///    result.
1431309124Sdim/// \param __i1
1432309124Sdim///    A 32-bit integer value used to initialize the upper 32 bits of the
1433309124Sdim///    result.
1434309124Sdim/// \returns An initialized 64-bit integer vector.
1435288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
1436223017Sdim_mm_setr_pi32(int __i0, int __i1)
1437193326Sed{
1438218893Sdim    return _mm_set_pi32(__i1, __i0);
1439193326Sed}
1440193326Sed
1441341825Sdim/// Constructs a 64-bit integer vector, initialized in reverse order with
1442309124Sdim///    the specified 16-bit integer values.
1443309124Sdim///
1444309124Sdim/// \headerfile <x86intrin.h>
1445309124Sdim///
1446309124Sdim/// This intrinsic is a utility function and does not correspond to a specific
1447309124Sdim///    instruction.
1448309124Sdim///
1449309124Sdim/// \param __w0
1450309124Sdim///    A 16-bit integer value used to initialize bits [15:0] of the result.
1451309124Sdim/// \param __w1
1452309124Sdim///    A 16-bit integer value used to initialize bits [31:16] of the result.
1453309124Sdim/// \param __w2
1454309124Sdim///    A 16-bit integer value used to initialize bits [47:32] of the result.
1455309124Sdim/// \param __w3
1456309124Sdim///    A 16-bit integer value used to initialize bits [63:48] of the result.
1457309124Sdim/// \returns An initialized 64-bit integer vector.
1458288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
1459223017Sdim_mm_setr_pi16(short __w0, short __w1, short __w2, short __w3)
1460193326Sed{
1461218893Sdim    return _mm_set_pi16(__w3, __w2, __w1, __w0);
1462193326Sed}
1463193326Sed
1464341825Sdim/// Constructs a 64-bit integer vector, initialized in reverse order with
1465309124Sdim///    the specified 8-bit integer values.
1466309124Sdim///
1467309124Sdim/// \headerfile <x86intrin.h>
1468309124Sdim///
1469309124Sdim/// This intrinsic is a utility function and does not correspond to a specific
1470309124Sdim///    instruction.
1471309124Sdim///
1472309124Sdim/// \param __b0
1473309124Sdim///    An 8-bit integer value used to initialize bits [7:0] of the result.
1474309124Sdim/// \param __b1
1475309124Sdim///    An 8-bit integer value used to initialize bits [15:8] of the result.
1476309124Sdim/// \param __b2
1477309124Sdim///    An 8-bit integer value used to initialize bits [23:16] of the result.
1478309124Sdim/// \param __b3
1479309124Sdim///    An 8-bit integer value used to initialize bits [31:24] of the result.
1480309124Sdim/// \param __b4
1481309124Sdim///    An 8-bit integer value used to initialize bits [39:32] of the result.
1482309124Sdim/// \param __b5
1483309124Sdim///    An 8-bit integer value used to initialize bits [47:40] of the result.
1484309124Sdim/// \param __b6
1485309124Sdim///    An 8-bit integer value used to initialize bits [55:48] of the result.
1486309124Sdim/// \param __b7
1487309124Sdim///    An 8-bit integer value used to initialize bits [63:56] of the result.
1488309124Sdim/// \returns An initialized 64-bit integer vector.
1489288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS
1490223017Sdim_mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5,
1491223017Sdim             char __b6, char __b7)
1492193326Sed{
1493218893Sdim    return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
1494193326Sed}
1495193326Sed
1496288943Sdim#undef __DEFAULT_FN_ATTRS
1497212904Sdim
1498212904Sdim/* Aliases for compatibility. */
1499212904Sdim#define _m_empty _mm_empty
1500212904Sdim#define _m_from_int _mm_cvtsi32_si64
1501296417Sdim#define _m_from_int64 _mm_cvtsi64_m64
1502212904Sdim#define _m_to_int _mm_cvtsi64_si32
1503296417Sdim#define _m_to_int64 _mm_cvtm64_si64
1504212904Sdim#define _m_packsswb _mm_packs_pi16
1505212904Sdim#define _m_packssdw _mm_packs_pi32
1506212904Sdim#define _m_packuswb _mm_packs_pu16
1507212904Sdim#define _m_punpckhbw _mm_unpackhi_pi8
1508212904Sdim#define _m_punpckhwd _mm_unpackhi_pi16
1509212904Sdim#define _m_punpckhdq _mm_unpackhi_pi32
1510212904Sdim#define _m_punpcklbw _mm_unpacklo_pi8
1511212904Sdim#define _m_punpcklwd _mm_unpacklo_pi16
1512212904Sdim#define _m_punpckldq _mm_unpacklo_pi32
1513212904Sdim#define _m_paddb _mm_add_pi8
1514212904Sdim#define _m_paddw _mm_add_pi16
1515212904Sdim#define _m_paddd _mm_add_pi32
1516212904Sdim#define _m_paddsb _mm_adds_pi8
1517212904Sdim#define _m_paddsw _mm_adds_pi16
1518212904Sdim#define _m_paddusb _mm_adds_pu8
1519212904Sdim#define _m_paddusw _mm_adds_pu16
1520212904Sdim#define _m_psubb _mm_sub_pi8
1521212904Sdim#define _m_psubw _mm_sub_pi16
1522212904Sdim#define _m_psubd _mm_sub_pi32
1523212904Sdim#define _m_psubsb _mm_subs_pi8
1524212904Sdim#define _m_psubsw _mm_subs_pi16
1525212904Sdim#define _m_psubusb _mm_subs_pu8
1526212904Sdim#define _m_psubusw _mm_subs_pu16
1527212904Sdim#define _m_pmaddwd _mm_madd_pi16
1528212904Sdim#define _m_pmulhw _mm_mulhi_pi16
1529212904Sdim#define _m_pmullw _mm_mullo_pi16
1530212904Sdim#define _m_psllw _mm_sll_pi16
1531212904Sdim#define _m_psllwi _mm_slli_pi16
1532212904Sdim#define _m_pslld _mm_sll_pi32
1533212904Sdim#define _m_pslldi _mm_slli_pi32
1534212904Sdim#define _m_psllq _mm_sll_si64
1535212904Sdim#define _m_psllqi _mm_slli_si64
1536212904Sdim#define _m_psraw _mm_sra_pi16
1537212904Sdim#define _m_psrawi _mm_srai_pi16
1538212904Sdim#define _m_psrad _mm_sra_pi32
1539212904Sdim#define _m_psradi _mm_srai_pi32
1540212904Sdim#define _m_psrlw _mm_srl_pi16
1541212904Sdim#define _m_psrlwi _mm_srli_pi16
1542212904Sdim#define _m_psrld _mm_srl_pi32
1543212904Sdim#define _m_psrldi _mm_srli_pi32
1544212904Sdim#define _m_psrlq _mm_srl_si64
1545212904Sdim#define _m_psrlqi _mm_srli_si64
1546212904Sdim#define _m_pand _mm_and_si64
1547212904Sdim#define _m_pandn _mm_andnot_si64
1548212904Sdim#define _m_por _mm_or_si64
1549212904Sdim#define _m_pxor _mm_xor_si64
1550212904Sdim#define _m_pcmpeqb _mm_cmpeq_pi8
1551212904Sdim#define _m_pcmpeqw _mm_cmpeq_pi16
1552212904Sdim#define _m_pcmpeqd _mm_cmpeq_pi32
1553212904Sdim#define _m_pcmpgtb _mm_cmpgt_pi8
1554212904Sdim#define _m_pcmpgtw _mm_cmpgt_pi16
1555212904Sdim#define _m_pcmpgtd _mm_cmpgt_pi32
1556212904Sdim
1557193326Sed#endif /* __MMINTRIN_H */
1558193326Sed
1559