1193326Sed/*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------===
2193326Sed *
3353358Sdim * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4353358Sdim * See https://llvm.org/LICENSE.txt for license information.
5353358Sdim * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6193326Sed *
7193326Sed *===-----------------------------------------------------------------------===
8193326Sed */
9296417Sdim
10193326Sed#ifndef __TMMINTRIN_H
11193326Sed#define __TMMINTRIN_H
12193326Sed
13193326Sed#include <pmmintrin.h>
14193326Sed
15288943Sdim/* Define the default attributes for the functions in this file. */
16341825Sdim#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3"), __min_vector_width__(64)))
17341825Sdim#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,ssse3"), __min_vector_width__(64)))
18288943Sdim
19341825Sdim/// Computes the absolute value of each of the packed 8-bit signed
20309124Sdim///    integers in the source operand and stores the 8-bit unsigned integer
21309124Sdim///    results in the destination.
22309124Sdim///
23309124Sdim/// \headerfile <x86intrin.h>
24309124Sdim///
25309124Sdim/// This intrinsic corresponds to the \c PABSB instruction.
26309124Sdim///
27309124Sdim/// \param __a
28309124Sdim///    A 64-bit vector of [8 x i8].
29309124Sdim/// \returns A 64-bit integer vector containing the absolute values of the
30309124Sdim///    elements in the operand.
31341825Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
32249423Sdim_mm_abs_pi8(__m64 __a)
33193326Sed{
34249423Sdim    return (__m64)__builtin_ia32_pabsb((__v8qi)__a);
35193326Sed}
36193326Sed
37341825Sdim/// Computes the absolute value of each of the packed 8-bit signed
38309124Sdim///    integers in the source operand and stores the 8-bit unsigned integer
39309124Sdim///    results in the destination.
40309124Sdim///
41309124Sdim/// \headerfile <x86intrin.h>
42309124Sdim///
43309124Sdim/// This intrinsic corresponds to the \c VPABSB instruction.
44309124Sdim///
45309124Sdim/// \param __a
46309124Sdim///    A 128-bit vector of [16 x i8].
47309124Sdim/// \returns A 128-bit integer vector containing the absolute values of the
48309124Sdim///    elements in the operand.
49288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
50249423Sdim_mm_abs_epi8(__m128i __a)
51193326Sed{
52249423Sdim    return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a);
53193326Sed}
54193326Sed
55341825Sdim/// Computes the absolute value of each of the packed 16-bit signed
56309124Sdim///    integers in the source operand and stores the 16-bit unsigned integer
57309124Sdim///    results in the destination.
58309124Sdim///
59309124Sdim/// \headerfile <x86intrin.h>
60309124Sdim///
61309124Sdim/// This intrinsic corresponds to the \c PABSW instruction.
62309124Sdim///
63309124Sdim/// \param __a
64309124Sdim///    A 64-bit vector of [4 x i16].
65309124Sdim/// \returns A 64-bit integer vector containing the absolute values of the
66309124Sdim///    elements in the operand.
67341825Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
68249423Sdim_mm_abs_pi16(__m64 __a)
69193326Sed{
70249423Sdim    return (__m64)__builtin_ia32_pabsw((__v4hi)__a);
71193326Sed}
72193326Sed
73341825Sdim/// Computes the absolute value of each of the packed 16-bit signed
74309124Sdim///    integers in the source operand and stores the 16-bit unsigned integer
75309124Sdim///    results in the destination.
76309124Sdim///
77309124Sdim/// \headerfile <x86intrin.h>
78309124Sdim///
79309124Sdim/// This intrinsic corresponds to the \c VPABSW instruction.
80309124Sdim///
81309124Sdim/// \param __a
82309124Sdim///    A 128-bit vector of [8 x i16].
83309124Sdim/// \returns A 128-bit integer vector containing the absolute values of the
84309124Sdim///    elements in the operand.
85288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
86249423Sdim_mm_abs_epi16(__m128i __a)
87193326Sed{
88249423Sdim    return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a);
89193326Sed}
90193326Sed
91341825Sdim/// Computes the absolute value of each of the packed 32-bit signed
92309124Sdim///    integers in the source operand and stores the 32-bit unsigned integer
93309124Sdim///    results in the destination.
94309124Sdim///
95309124Sdim/// \headerfile <x86intrin.h>
96309124Sdim///
97309124Sdim/// This intrinsic corresponds to the \c PABSD instruction.
98309124Sdim///
99309124Sdim/// \param __a
100309124Sdim///    A 64-bit vector of [2 x i32].
101309124Sdim/// \returns A 64-bit integer vector containing the absolute values of the
102309124Sdim///    elements in the operand.
103341825Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
104249423Sdim_mm_abs_pi32(__m64 __a)
105193326Sed{
106249423Sdim    return (__m64)__builtin_ia32_pabsd((__v2si)__a);
107193326Sed}
108193326Sed
109341825Sdim/// Computes the absolute value of each of the packed 32-bit signed
110309124Sdim///    integers in the source operand and stores the 32-bit unsigned integer
111309124Sdim///    results in the destination.
112309124Sdim///
113309124Sdim/// \headerfile <x86intrin.h>
114309124Sdim///
115309124Sdim/// This intrinsic corresponds to the \c VPABSD instruction.
116309124Sdim///
117309124Sdim/// \param __a
118309124Sdim///    A 128-bit vector of [4 x i32].
119309124Sdim/// \returns A 128-bit integer vector containing the absolute values of the
120309124Sdim///    elements in the operand.
121288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
122249423Sdim_mm_abs_epi32(__m128i __a)
123193326Sed{
124249423Sdim    return (__m128i)__builtin_ia32_pabsd128((__v4si)__a);
125193326Sed}
126193326Sed
127341825Sdim/// Concatenates the two 128-bit integer vector operands, and
128309124Sdim///    right-shifts the result by the number of bytes specified in the immediate
129309124Sdim///    operand.
130309124Sdim///
131309124Sdim/// \headerfile <x86intrin.h>
132309124Sdim///
133309124Sdim/// \code
134309124Sdim/// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n);
135309124Sdim/// \endcode
136309124Sdim///
137309124Sdim/// This intrinsic corresponds to the \c PALIGNR instruction.
138309124Sdim///
139309124Sdim/// \param a
140309124Sdim///    A 128-bit vector of [16 x i8] containing one of the source operands.
141309124Sdim/// \param b
142309124Sdim///    A 128-bit vector of [16 x i8] containing one of the source operands.
143309124Sdim/// \param n
144309124Sdim///    An immediate operand specifying how many bytes to right-shift the result.
145309124Sdim/// \returns A 128-bit integer vector containing the concatenated right-shifted
146309124Sdim///    value.
147341825Sdim#define _mm_alignr_epi8(a, b, n) \
148296417Sdim  (__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
149341825Sdim                                     (__v16qi)(__m128i)(b), (n))
150193326Sed
151341825Sdim/// Concatenates the two 64-bit integer vector operands, and right-shifts
152309124Sdim///    the result by the number of bytes specified in the immediate operand.
153309124Sdim///
154309124Sdim/// \headerfile <x86intrin.h>
155309124Sdim///
156309124Sdim/// \code
157309124Sdim/// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n);
158309124Sdim/// \endcode
159309124Sdim///
160309124Sdim/// This intrinsic corresponds to the \c PALIGNR instruction.
161309124Sdim///
162309124Sdim/// \param a
163309124Sdim///    A 64-bit vector of [8 x i8] containing one of the source operands.
164309124Sdim/// \param b
165309124Sdim///    A 64-bit vector of [8 x i8] containing one of the source operands.
166309124Sdim/// \param n
167309124Sdim///    An immediate operand specifying how many bytes to right-shift the result.
168309124Sdim/// \returns A 64-bit integer vector containing the concatenated right-shifted
169309124Sdim///    value.
170341825Sdim#define _mm_alignr_pi8(a, b, n) \
171341825Sdim  (__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n))
172234353Sdim
173341825Sdim/// Horizontally adds the adjacent pairs of values contained in 2 packed
174309124Sdim///    128-bit vectors of [8 x i16].
175309124Sdim///
176309124Sdim/// \headerfile <x86intrin.h>
177309124Sdim///
178309124Sdim/// This intrinsic corresponds to the \c VPHADDW instruction.
179309124Sdim///
180309124Sdim/// \param __a
181309124Sdim///    A 128-bit vector of [8 x i16] containing one of the source operands. The
182309124Sdim///    horizontal sums of the values are stored in the lower bits of the
183309124Sdim///    destination.
184309124Sdim/// \param __b
185309124Sdim///    A 128-bit vector of [8 x i16] containing one of the source operands. The
186309124Sdim///    horizontal sums of the values are stored in the upper bits of the
187309124Sdim///    destination.
188309124Sdim/// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of
189309124Sdim///    both operands.
190288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
191249423Sdim_mm_hadd_epi16(__m128i __a, __m128i __b)
192193326Sed{
193249423Sdim    return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
194193326Sed}
195193326Sed
196341825Sdim/// Horizontally adds the adjacent pairs of values contained in 2 packed
197309124Sdim///    128-bit vectors of [4 x i32].
198309124Sdim///
199309124Sdim/// \headerfile <x86intrin.h>
200309124Sdim///
201309124Sdim/// This intrinsic corresponds to the \c VPHADDD instruction.
202309124Sdim///
203309124Sdim/// \param __a
204309124Sdim///    A 128-bit vector of [4 x i32] containing one of the source operands. The
205309124Sdim///    horizontal sums of the values are stored in the lower bits of the
206309124Sdim///    destination.
207309124Sdim/// \param __b
208309124Sdim///    A 128-bit vector of [4 x i32] containing one of the source operands. The
209309124Sdim///    horizontal sums of the values are stored in the upper bits of the
210309124Sdim///    destination.
211309124Sdim/// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of
212309124Sdim///    both operands.
213288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
214249423Sdim_mm_hadd_epi32(__m128i __a, __m128i __b)
215193326Sed{
216249423Sdim    return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
217193326Sed}
218193326Sed
219341825Sdim/// Horizontally adds the adjacent pairs of values contained in 2 packed
220309124Sdim///    64-bit vectors of [4 x i16].
221309124Sdim///
222309124Sdim/// \headerfile <x86intrin.h>
223309124Sdim///
224309124Sdim/// This intrinsic corresponds to the \c PHADDW instruction.
225309124Sdim///
226309124Sdim/// \param __a
227309124Sdim///    A 64-bit vector of [4 x i16] containing one of the source operands. The
228309124Sdim///    horizontal sums of the values are stored in the lower bits of the
229309124Sdim///    destination.
230309124Sdim/// \param __b
231309124Sdim///    A 64-bit vector of [4 x i16] containing one of the source operands. The
232309124Sdim///    horizontal sums of the values are stored in the upper bits of the
233309124Sdim///    destination.
234309124Sdim/// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both
235309124Sdim///    operands.
236341825Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
237249423Sdim_mm_hadd_pi16(__m64 __a, __m64 __b)
238193326Sed{
239249423Sdim    return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);
240193326Sed}
241193326Sed
242341825Sdim/// Horizontally adds the adjacent pairs of values contained in 2 packed
243309124Sdim///    64-bit vectors of [2 x i32].
244309124Sdim///
245309124Sdim/// \headerfile <x86intrin.h>
246309124Sdim///
247309124Sdim/// This intrinsic corresponds to the \c PHADDD instruction.
248309124Sdim///
249309124Sdim/// \param __a
250309124Sdim///    A 64-bit vector of [2 x i32] containing one of the source operands. The
251309124Sdim///    horizontal sums of the values are stored in the lower bits of the
252309124Sdim///    destination.
253309124Sdim/// \param __b
254309124Sdim///    A 64-bit vector of [2 x i32] containing one of the source operands. The
255309124Sdim///    horizontal sums of the values are stored in the upper bits of the
256309124Sdim///    destination.
257309124Sdim/// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both
258309124Sdim///    operands.
259341825Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
260249423Sdim_mm_hadd_pi32(__m64 __a, __m64 __b)
261193326Sed{
262249423Sdim    return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);
263193326Sed}
264193326Sed
265341825Sdim/// Horizontally adds the adjacent pairs of values contained in 2 packed
266341825Sdim///    128-bit vectors of [8 x i16]. Positive sums greater than 0x7FFF are
267341825Sdim///    saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
268341825Sdim///    0x8000.
269309124Sdim///
270309124Sdim/// \headerfile <x86intrin.h>
271309124Sdim///
272309124Sdim/// This intrinsic corresponds to the \c VPHADDSW instruction.
273309124Sdim///
274309124Sdim/// \param __a
275309124Sdim///    A 128-bit vector of [8 x i16] containing one of the source operands. The
276309124Sdim///    horizontal sums of the values are stored in the lower bits of the
277309124Sdim///    destination.
278309124Sdim/// \param __b
279309124Sdim///    A 128-bit vector of [8 x i16] containing one of the source operands. The
280309124Sdim///    horizontal sums of the values are stored in the upper bits of the
281309124Sdim///    destination.
282309124Sdim/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
283309124Sdim///    sums of both operands.
284288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
285249423Sdim_mm_hadds_epi16(__m128i __a, __m128i __b)
286193326Sed{
287249423Sdim    return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
288193326Sed}
289193326Sed
290341825Sdim/// Horizontally adds the adjacent pairs of values contained in 2 packed
291341825Sdim///    64-bit vectors of [4 x i16]. Positive sums greater than 0x7FFF are
292341825Sdim///    saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
293341825Sdim///    0x8000.
294309124Sdim///
295309124Sdim/// \headerfile <x86intrin.h>
296309124Sdim///
297309124Sdim/// This intrinsic corresponds to the \c PHADDSW instruction.
298309124Sdim///
299309124Sdim/// \param __a
300309124Sdim///    A 64-bit vector of [4 x i16] containing one of the source operands. The
301309124Sdim///    horizontal sums of the values are stored in the lower bits of the
302309124Sdim///    destination.
303309124Sdim/// \param __b
304309124Sdim///    A 64-bit vector of [4 x i16] containing one of the source operands. The
305309124Sdim///    horizontal sums of the values are stored in the upper bits of the
306309124Sdim///    destination.
307309124Sdim/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
308309124Sdim///    sums of both operands.
309341825Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
310249423Sdim_mm_hadds_pi16(__m64 __a, __m64 __b)
311193326Sed{
312249423Sdim    return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);
313193326Sed}
314193326Sed
315341825Sdim/// Horizontally subtracts the adjacent pairs of values contained in 2
316309124Sdim///    packed 128-bit vectors of [8 x i16].
317309124Sdim///
318309124Sdim/// \headerfile <x86intrin.h>
319309124Sdim///
320309124Sdim/// This intrinsic corresponds to the \c VPHSUBW instruction.
321309124Sdim///
322309124Sdim/// \param __a
323309124Sdim///    A 128-bit vector of [8 x i16] containing one of the source operands. The
324309124Sdim///    horizontal differences between the values are stored in the lower bits of
325309124Sdim///    the destination.
326309124Sdim/// \param __b
327309124Sdim///    A 128-bit vector of [8 x i16] containing one of the source operands. The
328309124Sdim///    horizontal differences between the values are stored in the upper bits of
329309124Sdim///    the destination.
330309124Sdim/// \returns A 128-bit vector of [8 x i16] containing the horizontal differences
331309124Sdim///    of both operands.
332288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
333249423Sdim_mm_hsub_epi16(__m128i __a, __m128i __b)
334193326Sed{
335249423Sdim    return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
336193326Sed}
337193326Sed
338341825Sdim/// Horizontally subtracts the adjacent pairs of values contained in 2
339309124Sdim///    packed 128-bit vectors of [4 x i32].
340309124Sdim///
341309124Sdim/// \headerfile <x86intrin.h>
342309124Sdim///
343309124Sdim/// This intrinsic corresponds to the \c VPHSUBD instruction.
344309124Sdim///
345309124Sdim/// \param __a
346309124Sdim///    A 128-bit vector of [4 x i32] containing one of the source operands. The
347309124Sdim///    horizontal differences between the values are stored in the lower bits of
348309124Sdim///    the destination.
349309124Sdim/// \param __b
350309124Sdim///    A 128-bit vector of [4 x i32] containing one of the source operands. The
351309124Sdim///    horizontal differences between the values are stored in the upper bits of
352309124Sdim///    the destination.
353309124Sdim/// \returns A 128-bit vector of [4 x i32] containing the horizontal differences
354309124Sdim///    of both operands.
355288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
356249423Sdim_mm_hsub_epi32(__m128i __a, __m128i __b)
357193326Sed{
358249423Sdim    return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
359193326Sed}
360193326Sed
361341825Sdim/// Horizontally subtracts the adjacent pairs of values contained in 2
362309124Sdim///    packed 64-bit vectors of [4 x i16].
363309124Sdim///
364309124Sdim/// \headerfile <x86intrin.h>
365309124Sdim///
366309124Sdim/// This intrinsic corresponds to the \c PHSUBW instruction.
367309124Sdim///
368309124Sdim/// \param __a
369309124Sdim///    A 64-bit vector of [4 x i16] containing one of the source operands. The
370309124Sdim///    horizontal differences between the values are stored in the lower bits of
371309124Sdim///    the destination.
372309124Sdim/// \param __b
373309124Sdim///    A 64-bit vector of [4 x i16] containing one of the source operands. The
374309124Sdim///    horizontal differences between the values are stored in the upper bits of
375309124Sdim///    the destination.
376309124Sdim/// \returns A 64-bit vector of [4 x i16] containing the horizontal differences
377309124Sdim///    of both operands.
378341825Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
379249423Sdim_mm_hsub_pi16(__m64 __a, __m64 __b)
380193326Sed{
381249423Sdim    return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);
382193326Sed}
383193326Sed
384341825Sdim/// Horizontally subtracts the adjacent pairs of values contained in 2
385309124Sdim///    packed 64-bit vectors of [2 x i32].
386309124Sdim///
387309124Sdim/// \headerfile <x86intrin.h>
388309124Sdim///
389309124Sdim/// This intrinsic corresponds to the \c PHSUBD instruction.
390309124Sdim///
391309124Sdim/// \param __a
392309124Sdim///    A 64-bit vector of [2 x i32] containing one of the source operands. The
393309124Sdim///    horizontal differences between the values are stored in the lower bits of
394309124Sdim///    the destination.
395309124Sdim/// \param __b
396309124Sdim///    A 64-bit vector of [2 x i32] containing one of the source operands. The
397309124Sdim///    horizontal differences between the values are stored in the upper bits of
398309124Sdim///    the destination.
399309124Sdim/// \returns A 64-bit vector of [2 x i32] containing the horizontal differences
400309124Sdim///    of both operands.
401341825Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
402249423Sdim_mm_hsub_pi32(__m64 __a, __m64 __b)
403193326Sed{
404249423Sdim    return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);
405193326Sed}
406193326Sed
407341825Sdim/// Horizontally subtracts the adjacent pairs of values contained in 2
408309124Sdim///    packed 128-bit vectors of [8 x i16]. Positive differences greater than
409341825Sdim///    0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
410341825Sdim///    saturated to 0x8000.
411309124Sdim///
412309124Sdim/// \headerfile <x86intrin.h>
413309124Sdim///
414309124Sdim/// This intrinsic corresponds to the \c VPHSUBSW instruction.
415309124Sdim///
416309124Sdim/// \param __a
417309124Sdim///    A 128-bit vector of [8 x i16] containing one of the source operands. The
418309124Sdim///    horizontal differences between the values are stored in the lower bits of
419309124Sdim///    the destination.
420309124Sdim/// \param __b
421309124Sdim///    A 128-bit vector of [8 x i16] containing one of the source operands. The
422309124Sdim///    horizontal differences between the values are stored in the upper bits of
423309124Sdim///    the destination.
424309124Sdim/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
425309124Sdim///    differences of both operands.
426288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
427249423Sdim_mm_hsubs_epi16(__m128i __a, __m128i __b)
428193326Sed{
429249423Sdim    return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
430193326Sed}
431193326Sed
432341825Sdim/// Horizontally subtracts the adjacent pairs of values contained in 2
433309124Sdim///    packed 64-bit vectors of [4 x i16]. Positive differences greater than
434341825Sdim///    0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
435341825Sdim///    saturated to 0x8000.
436309124Sdim///
437309124Sdim/// \headerfile <x86intrin.h>
438309124Sdim///
439309124Sdim/// This intrinsic corresponds to the \c PHSUBSW instruction.
440309124Sdim///
441309124Sdim/// \param __a
442309124Sdim///    A 64-bit vector of [4 x i16] containing one of the source operands. The
443309124Sdim///    horizontal differences between the values are stored in the lower bits of
444309124Sdim///    the destination.
445309124Sdim/// \param __b
446309124Sdim///    A 64-bit vector of [4 x i16] containing one of the source operands. The
447309124Sdim///    horizontal differences between the values are stored in the upper bits of
448309124Sdim///    the destination.
449309124Sdim/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
450309124Sdim///    differences of both operands.
451341825Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
452249423Sdim_mm_hsubs_pi16(__m64 __a, __m64 __b)
453193326Sed{
454249423Sdim    return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);
455193326Sed}
456193326Sed
457341825Sdim/// Multiplies corresponding pairs of packed 8-bit unsigned integer
458309124Sdim///    values contained in the first source operand and packed 8-bit signed
459309124Sdim///    integer values contained in the second source operand, adds pairs of
460309124Sdim///    contiguous products with signed saturation, and writes the 16-bit sums to
461321369Sdim///    the corresponding bits in the destination.
462309124Sdim///
463321369Sdim///    For example, bits [7:0] of both operands are multiplied, bits [15:8] of
464321369Sdim///    both operands are multiplied, and the sum of both results is written to
465321369Sdim///    bits [15:0] of the destination.
466321369Sdim///
467309124Sdim/// \headerfile <x86intrin.h>
468309124Sdim///
469309124Sdim/// This intrinsic corresponds to the \c VPMADDUBSW instruction.
470309124Sdim///
471309124Sdim/// \param __a
472309124Sdim///    A 128-bit integer vector containing the first source operand.
473309124Sdim/// \param __b
474309124Sdim///    A 128-bit integer vector containing the second source operand.
475309124Sdim/// \returns A 128-bit integer vector containing the sums of products of both
476314564Sdim///    operands: \n
477314564Sdim///    \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
478314564Sdim///    \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
479314564Sdim///    \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
480314564Sdim///    \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) \n
481314564Sdim///    \a R4 := (\a __a8 * \a __b8) + (\a __a9 * \a __b9) \n
482314564Sdim///    \a R5 := (\a __a10 * \a __b10) + (\a __a11 * \a __b11) \n
483314564Sdim///    \a R6 := (\a __a12 * \a __b12) + (\a __a13 * \a __b13) \n
484314564Sdim///    \a R7 := (\a __a14 * \a __b14) + (\a __a15 * \a __b15)
485288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
486249423Sdim_mm_maddubs_epi16(__m128i __a, __m128i __b)
487193326Sed{
488249423Sdim    return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
489193326Sed}
490193326Sed
491341825Sdim/// Multiplies corresponding pairs of packed 8-bit unsigned integer
492309124Sdim///    values contained in the first source operand and packed 8-bit signed
493309124Sdim///    integer values contained in the second source operand, adds pairs of
494309124Sdim///    contiguous products with signed saturation, and writes the 16-bit sums to
495321369Sdim///    the corresponding bits in the destination.
496309124Sdim///
497321369Sdim///    For example, bits [7:0] of both operands are multiplied, bits [15:8] of
498321369Sdim///    both operands are multiplied, and the sum of both results is written to
499321369Sdim///    bits [15:0] of the destination.
500321369Sdim///
501309124Sdim/// \headerfile <x86intrin.h>
502309124Sdim///
503309124Sdim/// This intrinsic corresponds to the \c PMADDUBSW instruction.
504309124Sdim///
505309124Sdim/// \param __a
506309124Sdim///    A 64-bit integer vector containing the first source operand.
507309124Sdim/// \param __b
508309124Sdim///    A 64-bit integer vector containing the second source operand.
509309124Sdim/// \returns A 64-bit integer vector containing the sums of products of both
510314564Sdim///    operands: \n
511314564Sdim///    \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
512314564Sdim///    \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
513314564Sdim///    \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
514314564Sdim///    \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7)
515341825Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
516249423Sdim_mm_maddubs_pi16(__m64 __a, __m64 __b)
517193326Sed{
518249423Sdim    return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);
519193326Sed}
520193326Sed
521341825Sdim/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
522309124Sdim///    products to the 18 most significant bits by right-shifting, rounds the
523309124Sdim///    truncated value by adding 1, and writes bits [16:1] to the destination.
524309124Sdim///
525309124Sdim/// \headerfile <x86intrin.h>
526309124Sdim///
527309124Sdim/// This intrinsic corresponds to the \c VPMULHRSW instruction.
528309124Sdim///
529309124Sdim/// \param __a
530309124Sdim///    A 128-bit vector of [8 x i16] containing one of the source operands.
531309124Sdim/// \param __b
532309124Sdim///    A 128-bit vector of [8 x i16] containing one of the source operands.
533309124Sdim/// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled
534309124Sdim///    products of both operands.
535288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
536249423Sdim_mm_mulhrs_epi16(__m128i __a, __m128i __b)
537193326Sed{
538249423Sdim    return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
539193326Sed}
540193326Sed
541341825Sdim/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
542309124Sdim///    products to the 18 most significant bits by right-shifting, rounds the
543309124Sdim///    truncated value by adding 1, and writes bits [16:1] to the destination.
544309124Sdim///
545309124Sdim/// \headerfile <x86intrin.h>
546309124Sdim///
547309124Sdim/// This intrinsic corresponds to the \c PMULHRSW instruction.
548309124Sdim///
549309124Sdim/// \param __a
550309124Sdim///    A 64-bit vector of [4 x i16] containing one of the source operands.
551309124Sdim/// \param __b
552309124Sdim///    A 64-bit vector of [4 x i16] containing one of the source operands.
553309124Sdim/// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled
554309124Sdim///    products of both operands.
555341825Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
556249423Sdim_mm_mulhrs_pi16(__m64 __a, __m64 __b)
557193326Sed{
558249423Sdim    return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);
559193326Sed}
560193326Sed
561341825Sdim/// Copies the 8-bit integers from a 128-bit integer vector to the
562309124Sdim///    destination or clears 8-bit values in the destination, as specified by
563309124Sdim///    the second source operand.
564309124Sdim///
565309124Sdim/// \headerfile <x86intrin.h>
566309124Sdim///
567309124Sdim/// This intrinsic corresponds to the \c VPSHUFB instruction.
568309124Sdim///
569309124Sdim/// \param __a
570309124Sdim///    A 128-bit integer vector containing the values to be copied.
571309124Sdim/// \param __b
572309124Sdim///    A 128-bit integer vector containing control bytes corresponding to
573309124Sdim///    positions in the destination:
574314564Sdim///    Bit 7: \n
575314564Sdim///    1: Clear the corresponding byte in the destination. \n
576309124Sdim///    0: Copy the selected source byte to the corresponding byte in the
577314564Sdim///    destination. \n
578314564Sdim///    Bits [6:4] Reserved.  \n
579309124Sdim///    Bits [3:0] select the source byte to be copied.
580309124Sdim/// \returns A 128-bit integer vector containing the copied or cleared values.
581288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
582249423Sdim_mm_shuffle_epi8(__m128i __a, __m128i __b)
583193326Sed{
584249423Sdim    return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
585193326Sed}
586193326Sed
587341825Sdim/// Copies the 8-bit integers from a 64-bit integer vector to the
588309124Sdim///    destination or clears 8-bit values in the destination, as specified by
589309124Sdim///    the second source operand.
590309124Sdim///
591309124Sdim/// \headerfile <x86intrin.h>
592309124Sdim///
593309124Sdim/// This intrinsic corresponds to the \c PSHUFB instruction.
594309124Sdim///
595309124Sdim/// \param __a
596309124Sdim///    A 64-bit integer vector containing the values to be copied.
597309124Sdim/// \param __b
598309124Sdim///    A 64-bit integer vector containing control bytes corresponding to
599309124Sdim///    positions in the destination:
600314564Sdim///    Bit 7: \n
601314564Sdim///    1: Clear the corresponding byte in the destination. \n
602309124Sdim///    0: Copy the selected source byte to the corresponding byte in the
603314564Sdim///    destination. \n
604309124Sdim///    Bits [3:0] select the source byte to be copied.
605309124Sdim/// \returns A 64-bit integer vector containing the copied or cleared values.
606341825Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
607249423Sdim_mm_shuffle_pi8(__m64 __a, __m64 __b)
608193326Sed{
609249423Sdim    return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);
610193326Sed}
611193326Sed
612341825Sdim/// For each 8-bit integer in the first source operand, perform one of
613321369Sdim///    the following actions as specified by the second source operand.
614309124Sdim///
615321369Sdim///    If the byte in the second source is negative, calculate the two's
616321369Sdim///    complement of the corresponding byte in the first source, and write that
617321369Sdim///    value to the destination. If the byte in the second source is positive,
618321369Sdim///    copy the corresponding byte from the first source to the destination. If
619321369Sdim///    the byte in the second source is zero, clear the corresponding byte in
620321369Sdim///    the destination.
621321369Sdim///
622309124Sdim/// \headerfile <x86intrin.h>
623309124Sdim///
624309124Sdim/// This intrinsic corresponds to the \c VPSIGNB instruction.
625309124Sdim///
626309124Sdim/// \param __a
627309124Sdim///    A 128-bit integer vector containing the values to be copied.
628309124Sdim/// \param __b
629309124Sdim///    A 128-bit integer vector containing control bytes corresponding to
630309124Sdim///    positions in the destination.
631309124Sdim/// \returns A 128-bit integer vector containing the resultant values.
632288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
633249423Sdim_mm_sign_epi8(__m128i __a, __m128i __b)
634193326Sed{
635249423Sdim    return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
636193326Sed}
637193326Sed
638341825Sdim/// For each 16-bit integer in the first source operand, perform one of
639321369Sdim///    the following actions as specified by the second source operand.
640309124Sdim///
641321369Sdim///    If the word in the second source is negative, calculate the two's
642321369Sdim///    complement of the corresponding word in the first source, and write that
643321369Sdim///    value to the destination. If the word in the second source is positive,
644321369Sdim///    copy the corresponding word from the first source to the destination. If
645321369Sdim///    the word in the second source is zero, clear the corresponding word in
646321369Sdim///    the destination.
647321369Sdim///
648309124Sdim/// \headerfile <x86intrin.h>
649309124Sdim///
650309124Sdim/// This intrinsic corresponds to the \c VPSIGNW instruction.
651309124Sdim///
652309124Sdim/// \param __a
653309124Sdim///    A 128-bit integer vector containing the values to be copied.
654309124Sdim/// \param __b
655309124Sdim///    A 128-bit integer vector containing control words corresponding to
656309124Sdim///    positions in the destination.
657309124Sdim/// \returns A 128-bit integer vector containing the resultant values.
658288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
659249423Sdim_mm_sign_epi16(__m128i __a, __m128i __b)
660193326Sed{
661249423Sdim    return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
662193326Sed}
663193326Sed
664341825Sdim/// For each 32-bit integer in the first source operand, perform one of
665321369Sdim///    the following actions as specified by the second source operand.
666321369Sdim///
667321369Sdim///    If the doubleword in the second source is negative, calculate the two's
668309124Sdim///    complement of the corresponding word in the first source, and write that
669309124Sdim///    value to the destination. If the doubleword in the second source is
670309124Sdim///    positive, copy the corresponding word from the first source to the
671309124Sdim///    destination. If the doubleword in the second source is zero, clear the
672309124Sdim///    corresponding word in the destination.
673309124Sdim///
674309124Sdim/// \headerfile <x86intrin.h>
675309124Sdim///
676309124Sdim/// This intrinsic corresponds to the \c VPSIGND instruction.
677309124Sdim///
678309124Sdim/// \param __a
679309124Sdim///    A 128-bit integer vector containing the values to be copied.
680309124Sdim/// \param __b
681309124Sdim///    A 128-bit integer vector containing control doublewords corresponding to
682309124Sdim///    positions in the destination.
683309124Sdim/// \returns A 128-bit integer vector containing the resultant values.
684288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
685249423Sdim_mm_sign_epi32(__m128i __a, __m128i __b)
686193326Sed{
687249423Sdim    return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
688193326Sed}
689193326Sed
690341825Sdim/// For each 8-bit integer in the first source operand, perform one of
691321369Sdim///    the following actions as specified by the second source operand.
692309124Sdim///
693321369Sdim///    If the byte in the second source is negative, calculate the two's
694321369Sdim///    complement of the corresponding byte in the first source, and write that
695321369Sdim///    value to the destination. If the byte in the second source is positive,
696321369Sdim///    copy the corresponding byte from the first source to the destination. If
697321369Sdim///    the byte in the second source is zero, clear the corresponding byte in
698321369Sdim///    the destination.
699321369Sdim///
700309124Sdim/// \headerfile <x86intrin.h>
701309124Sdim///
702309124Sdim/// This intrinsic corresponds to the \c PSIGNB instruction.
703309124Sdim///
704309124Sdim/// \param __a
705309124Sdim///    A 64-bit integer vector containing the values to be copied.
706309124Sdim/// \param __b
707309124Sdim///    A 64-bit integer vector containing control bytes corresponding to
708309124Sdim///    positions in the destination.
709309124Sdim/// \returns A 64-bit integer vector containing the resultant values.
710341825Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
711249423Sdim_mm_sign_pi8(__m64 __a, __m64 __b)
712193326Sed{
713249423Sdim    return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);
714193326Sed}
715193326Sed
716341825Sdim/// For each 16-bit integer in the first source operand, perform one of
717321369Sdim///    the following actions as specified by the second source operand.
718309124Sdim///
719321369Sdim///    If the word in the second source is negative, calculate the two's
720321369Sdim///    complement of the corresponding word in the first source, and write that
721321369Sdim///    value to the destination. If the word in the second source is positive,
722321369Sdim///    copy the corresponding word from the first source to the destination. If
723321369Sdim///    the word in the second source is zero, clear the corresponding word in
724321369Sdim///    the destination.
725321369Sdim///
726309124Sdim/// \headerfile <x86intrin.h>
727309124Sdim///
728309124Sdim/// This intrinsic corresponds to the \c PSIGNW instruction.
729309124Sdim///
730309124Sdim/// \param __a
731309124Sdim///    A 64-bit integer vector containing the values to be copied.
732309124Sdim/// \param __b
733309124Sdim///    A 64-bit integer vector containing control words corresponding to
734309124Sdim///    positions in the destination.
735309124Sdim/// \returns A 64-bit integer vector containing the resultant values.
736341825Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
737249423Sdim_mm_sign_pi16(__m64 __a, __m64 __b)
738193326Sed{
739249423Sdim    return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);
740193326Sed}
741193326Sed
742341825Sdim/// For each 32-bit integer in the first source operand, perform one of
743321369Sdim///    the following actions as specified by the second source operand.
744321369Sdim///
745321369Sdim///    If the doubleword in the second source is negative, calculate the two's
746309124Sdim///    complement of the corresponding doubleword in the first source, and
747309124Sdim///    write that value to the destination. If the doubleword in the second
748309124Sdim///    source is positive, copy the corresponding doubleword from the first
749309124Sdim///    source to the destination. If the doubleword in the second source is
750309124Sdim///    zero, clear the corresponding doubleword in the destination.
751309124Sdim///
752309124Sdim/// \headerfile <x86intrin.h>
753309124Sdim///
754309124Sdim/// This intrinsic corresponds to the \c PSIGND instruction.
755309124Sdim///
756309124Sdim/// \param __a
757309124Sdim///    A 64-bit integer vector containing the values to be copied.
758309124Sdim/// \param __b
759309124Sdim///    A 64-bit integer vector containing two control doublewords corresponding
760309124Sdim///    to positions in the destination.
761309124Sdim/// \returns A 64-bit integer vector containing the resultant values.
762341825Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
763249423Sdim_mm_sign_pi32(__m64 __a, __m64 __b)
764193326Sed{
765249423Sdim    return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b);
766193326Sed}
767193326Sed
768288943Sdim#undef __DEFAULT_FN_ATTRS
769341825Sdim#undef __DEFAULT_FN_ATTRS_MMX
770288943Sdim
771193326Sed#endif /* __TMMINTRIN_H */
772