tmmintrin.h revision 341825
1/*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------===
2 *
3 * Permission is hereby granted, free of charge, to any person obtaining a copy
4 * of this software and associated documentation files (the "Software"), to deal
5 * in the Software without restriction, including without limitation the rights
6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 * copies of the Software, and to permit persons to whom the Software is
8 * furnished to do so, subject to the following conditions:
9 *
10 * The above copyright notice and this permission notice shall be included in
11 * all copies or substantial portions of the Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 * THE SOFTWARE.
20 *
21 *===-----------------------------------------------------------------------===
22 */
23
24#ifndef __TMMINTRIN_H
25#define __TMMINTRIN_H
26
27#include <pmmintrin.h>
28
29/* Define the default attributes for the functions in this file. */
30#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3"), __min_vector_width__(64)))
31#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,ssse3"), __min_vector_width__(64)))
32
33/// Computes the absolute value of each of the packed 8-bit signed
34///    integers in the source operand and stores the 8-bit unsigned integer
35///    results in the destination.
36///
37/// \headerfile <x86intrin.h>
38///
39/// This intrinsic corresponds to the \c PABSB instruction.
40///
41/// \param __a
42///    A 64-bit vector of [8 x i8].
43/// \returns A 64-bit integer vector containing the absolute values of the
44///    elements in the operand.
45static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
46_mm_abs_pi8(__m64 __a)
47{
48    return (__m64)__builtin_ia32_pabsb((__v8qi)__a);
49}
50
51/// Computes the absolute value of each of the packed 8-bit signed
52///    integers in the source operand and stores the 8-bit unsigned integer
53///    results in the destination.
54///
55/// \headerfile <x86intrin.h>
56///
57/// This intrinsic corresponds to the \c VPABSB instruction.
58///
59/// \param __a
60///    A 128-bit vector of [16 x i8].
61/// \returns A 128-bit integer vector containing the absolute values of the
62///    elements in the operand.
63static __inline__ __m128i __DEFAULT_FN_ATTRS
64_mm_abs_epi8(__m128i __a)
65{
66    return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a);
67}
68
69/// Computes the absolute value of each of the packed 16-bit signed
70///    integers in the source operand and stores the 16-bit unsigned integer
71///    results in the destination.
72///
73/// \headerfile <x86intrin.h>
74///
75/// This intrinsic corresponds to the \c PABSW instruction.
76///
77/// \param __a
78///    A 64-bit vector of [4 x i16].
79/// \returns A 64-bit integer vector containing the absolute values of the
80///    elements in the operand.
81static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
82_mm_abs_pi16(__m64 __a)
83{
84    return (__m64)__builtin_ia32_pabsw((__v4hi)__a);
85}
86
87/// Computes the absolute value of each of the packed 16-bit signed
88///    integers in the source operand and stores the 16-bit unsigned integer
89///    results in the destination.
90///
91/// \headerfile <x86intrin.h>
92///
93/// This intrinsic corresponds to the \c VPABSW instruction.
94///
95/// \param __a
96///    A 128-bit vector of [8 x i16].
97/// \returns A 128-bit integer vector containing the absolute values of the
98///    elements in the operand.
99static __inline__ __m128i __DEFAULT_FN_ATTRS
100_mm_abs_epi16(__m128i __a)
101{
102    return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a);
103}
104
105/// Computes the absolute value of each of the packed 32-bit signed
106///    integers in the source operand and stores the 32-bit unsigned integer
107///    results in the destination.
108///
109/// \headerfile <x86intrin.h>
110///
111/// This intrinsic corresponds to the \c PABSD instruction.
112///
113/// \param __a
114///    A 64-bit vector of [2 x i32].
115/// \returns A 64-bit integer vector containing the absolute values of the
116///    elements in the operand.
117static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
118_mm_abs_pi32(__m64 __a)
119{
120    return (__m64)__builtin_ia32_pabsd((__v2si)__a);
121}
122
123/// Computes the absolute value of each of the packed 32-bit signed
124///    integers in the source operand and stores the 32-bit unsigned integer
125///    results in the destination.
126///
127/// \headerfile <x86intrin.h>
128///
129/// This intrinsic corresponds to the \c VPABSD instruction.
130///
131/// \param __a
132///    A 128-bit vector of [4 x i32].
133/// \returns A 128-bit integer vector containing the absolute values of the
134///    elements in the operand.
135static __inline__ __m128i __DEFAULT_FN_ATTRS
136_mm_abs_epi32(__m128i __a)
137{
138    return (__m128i)__builtin_ia32_pabsd128((__v4si)__a);
139}
140
141/// Concatenates the two 128-bit integer vector operands, and
142///    right-shifts the result by the number of bytes specified in the immediate
143///    operand.
144///
145/// \headerfile <x86intrin.h>
146///
147/// \code
148/// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n);
149/// \endcode
150///
151/// This intrinsic corresponds to the \c PALIGNR instruction.
152///
153/// \param a
154///    A 128-bit vector of [16 x i8] containing one of the source operands.
155/// \param b
156///    A 128-bit vector of [16 x i8] containing one of the source operands.
157/// \param n
158///    An immediate operand specifying how many bytes to right-shift the result.
159/// \returns A 128-bit integer vector containing the concatenated right-shifted
160///    value.
161#define _mm_alignr_epi8(a, b, n) \
162  (__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \
163                                     (__v16qi)(__m128i)(b), (n))
164
165/// Concatenates the two 64-bit integer vector operands, and right-shifts
166///    the result by the number of bytes specified in the immediate operand.
167///
168/// \headerfile <x86intrin.h>
169///
170/// \code
171/// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n);
172/// \endcode
173///
174/// This intrinsic corresponds to the \c PALIGNR instruction.
175///
176/// \param a
177///    A 64-bit vector of [8 x i8] containing one of the source operands.
178/// \param b
179///    A 64-bit vector of [8 x i8] containing one of the source operands.
180/// \param n
181///    An immediate operand specifying how many bytes to right-shift the result.
182/// \returns A 64-bit integer vector containing the concatenated right-shifted
183///    value.
184#define _mm_alignr_pi8(a, b, n) \
185  (__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n))
186
187/// Horizontally adds the adjacent pairs of values contained in 2 packed
188///    128-bit vectors of [8 x i16].
189///
190/// \headerfile <x86intrin.h>
191///
192/// This intrinsic corresponds to the \c VPHADDW instruction.
193///
194/// \param __a
195///    A 128-bit vector of [8 x i16] containing one of the source operands. The
196///    horizontal sums of the values are stored in the lower bits of the
197///    destination.
198/// \param __b
199///    A 128-bit vector of [8 x i16] containing one of the source operands. The
200///    horizontal sums of the values are stored in the upper bits of the
201///    destination.
202/// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of
203///    both operands.
204static __inline__ __m128i __DEFAULT_FN_ATTRS
205_mm_hadd_epi16(__m128i __a, __m128i __b)
206{
207    return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b);
208}
209
210/// Horizontally adds the adjacent pairs of values contained in 2 packed
211///    128-bit vectors of [4 x i32].
212///
213/// \headerfile <x86intrin.h>
214///
215/// This intrinsic corresponds to the \c VPHADDD instruction.
216///
217/// \param __a
218///    A 128-bit vector of [4 x i32] containing one of the source operands. The
219///    horizontal sums of the values are stored in the lower bits of the
220///    destination.
221/// \param __b
222///    A 128-bit vector of [4 x i32] containing one of the source operands. The
223///    horizontal sums of the values are stored in the upper bits of the
224///    destination.
225/// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of
226///    both operands.
227static __inline__ __m128i __DEFAULT_FN_ATTRS
228_mm_hadd_epi32(__m128i __a, __m128i __b)
229{
230    return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b);
231}
232
233/// Horizontally adds the adjacent pairs of values contained in 2 packed
234///    64-bit vectors of [4 x i16].
235///
236/// \headerfile <x86intrin.h>
237///
238/// This intrinsic corresponds to the \c PHADDW instruction.
239///
240/// \param __a
241///    A 64-bit vector of [4 x i16] containing one of the source operands. The
242///    horizontal sums of the values are stored in the lower bits of the
243///    destination.
244/// \param __b
245///    A 64-bit vector of [4 x i16] containing one of the source operands. The
246///    horizontal sums of the values are stored in the upper bits of the
247///    destination.
248/// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both
249///    operands.
250static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
251_mm_hadd_pi16(__m64 __a, __m64 __b)
252{
253    return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b);
254}
255
256/// Horizontally adds the adjacent pairs of values contained in 2 packed
257///    64-bit vectors of [2 x i32].
258///
259/// \headerfile <x86intrin.h>
260///
261/// This intrinsic corresponds to the \c PHADDD instruction.
262///
263/// \param __a
264///    A 64-bit vector of [2 x i32] containing one of the source operands. The
265///    horizontal sums of the values are stored in the lower bits of the
266///    destination.
267/// \param __b
268///    A 64-bit vector of [2 x i32] containing one of the source operands. The
269///    horizontal sums of the values are stored in the upper bits of the
270///    destination.
271/// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both
272///    operands.
273static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
274_mm_hadd_pi32(__m64 __a, __m64 __b)
275{
276    return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b);
277}
278
279/// Horizontally adds the adjacent pairs of values contained in 2 packed
280///    128-bit vectors of [8 x i16]. Positive sums greater than 0x7FFF are
281///    saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
282///    0x8000.
283///
284/// \headerfile <x86intrin.h>
285///
286/// This intrinsic corresponds to the \c VPHADDSW instruction.
287///
288/// \param __a
289///    A 128-bit vector of [8 x i16] containing one of the source operands. The
290///    horizontal sums of the values are stored in the lower bits of the
291///    destination.
292/// \param __b
293///    A 128-bit vector of [8 x i16] containing one of the source operands. The
294///    horizontal sums of the values are stored in the upper bits of the
295///    destination.
296/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
297///    sums of both operands.
298static __inline__ __m128i __DEFAULT_FN_ATTRS
299_mm_hadds_epi16(__m128i __a, __m128i __b)
300{
301    return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b);
302}
303
304/// Horizontally adds the adjacent pairs of values contained in 2 packed
305///    64-bit vectors of [4 x i16]. Positive sums greater than 0x7FFF are
306///    saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to
307///    0x8000.
308///
309/// \headerfile <x86intrin.h>
310///
311/// This intrinsic corresponds to the \c PHADDSW instruction.
312///
313/// \param __a
314///    A 64-bit vector of [4 x i16] containing one of the source operands. The
315///    horizontal sums of the values are stored in the lower bits of the
316///    destination.
317/// \param __b
318///    A 64-bit vector of [4 x i16] containing one of the source operands. The
319///    horizontal sums of the values are stored in the upper bits of the
320///    destination.
321/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
322///    sums of both operands.
323static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
324_mm_hadds_pi16(__m64 __a, __m64 __b)
325{
326    return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b);
327}
328
329/// Horizontally subtracts the adjacent pairs of values contained in 2
330///    packed 128-bit vectors of [8 x i16].
331///
332/// \headerfile <x86intrin.h>
333///
334/// This intrinsic corresponds to the \c VPHSUBW instruction.
335///
336/// \param __a
337///    A 128-bit vector of [8 x i16] containing one of the source operands. The
338///    horizontal differences between the values are stored in the lower bits of
339///    the destination.
340/// \param __b
341///    A 128-bit vector of [8 x i16] containing one of the source operands. The
342///    horizontal differences between the values are stored in the upper bits of
343///    the destination.
344/// \returns A 128-bit vector of [8 x i16] containing the horizontal differences
345///    of both operands.
346static __inline__ __m128i __DEFAULT_FN_ATTRS
347_mm_hsub_epi16(__m128i __a, __m128i __b)
348{
349    return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b);
350}
351
352/// Horizontally subtracts the adjacent pairs of values contained in 2
353///    packed 128-bit vectors of [4 x i32].
354///
355/// \headerfile <x86intrin.h>
356///
357/// This intrinsic corresponds to the \c VPHSUBD instruction.
358///
359/// \param __a
360///    A 128-bit vector of [4 x i32] containing one of the source operands. The
361///    horizontal differences between the values are stored in the lower bits of
362///    the destination.
363/// \param __b
364///    A 128-bit vector of [4 x i32] containing one of the source operands. The
365///    horizontal differences between the values are stored in the upper bits of
366///    the destination.
367/// \returns A 128-bit vector of [4 x i32] containing the horizontal differences
368///    of both operands.
369static __inline__ __m128i __DEFAULT_FN_ATTRS
370_mm_hsub_epi32(__m128i __a, __m128i __b)
371{
372    return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b);
373}
374
375/// Horizontally subtracts the adjacent pairs of values contained in 2
376///    packed 64-bit vectors of [4 x i16].
377///
378/// \headerfile <x86intrin.h>
379///
380/// This intrinsic corresponds to the \c PHSUBW instruction.
381///
382/// \param __a
383///    A 64-bit vector of [4 x i16] containing one of the source operands. The
384///    horizontal differences between the values are stored in the lower bits of
385///    the destination.
386/// \param __b
387///    A 64-bit vector of [4 x i16] containing one of the source operands. The
388///    horizontal differences between the values are stored in the upper bits of
389///    the destination.
390/// \returns A 64-bit vector of [4 x i16] containing the horizontal differences
391///    of both operands.
392static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
393_mm_hsub_pi16(__m64 __a, __m64 __b)
394{
395    return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b);
396}
397
398/// Horizontally subtracts the adjacent pairs of values contained in 2
399///    packed 64-bit vectors of [2 x i32].
400///
401/// \headerfile <x86intrin.h>
402///
403/// This intrinsic corresponds to the \c PHSUBD instruction.
404///
405/// \param __a
406///    A 64-bit vector of [2 x i32] containing one of the source operands. The
407///    horizontal differences between the values are stored in the lower bits of
408///    the destination.
409/// \param __b
410///    A 64-bit vector of [2 x i32] containing one of the source operands. The
411///    horizontal differences between the values are stored in the upper bits of
412///    the destination.
413/// \returns A 64-bit vector of [2 x i32] containing the horizontal differences
414///    of both operands.
415static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
416_mm_hsub_pi32(__m64 __a, __m64 __b)
417{
418    return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b);
419}
420
421/// Horizontally subtracts the adjacent pairs of values contained in 2
422///    packed 128-bit vectors of [8 x i16]. Positive differences greater than
423///    0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
424///    saturated to 0x8000.
425///
426/// \headerfile <x86intrin.h>
427///
428/// This intrinsic corresponds to the \c VPHSUBSW instruction.
429///
430/// \param __a
431///    A 128-bit vector of [8 x i16] containing one of the source operands. The
432///    horizontal differences between the values are stored in the lower bits of
433///    the destination.
434/// \param __b
435///    A 128-bit vector of [8 x i16] containing one of the source operands. The
436///    horizontal differences between the values are stored in the upper bits of
437///    the destination.
438/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated
439///    differences of both operands.
440static __inline__ __m128i __DEFAULT_FN_ATTRS
441_mm_hsubs_epi16(__m128i __a, __m128i __b)
442{
443    return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b);
444}
445
446/// Horizontally subtracts the adjacent pairs of values contained in 2
447///    packed 64-bit vectors of [4 x i16]. Positive differences greater than
448///    0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are
449///    saturated to 0x8000.
450///
451/// \headerfile <x86intrin.h>
452///
453/// This intrinsic corresponds to the \c PHSUBSW instruction.
454///
455/// \param __a
456///    A 64-bit vector of [4 x i16] containing one of the source operands. The
457///    horizontal differences between the values are stored in the lower bits of
458///    the destination.
459/// \param __b
460///    A 64-bit vector of [4 x i16] containing one of the source operands. The
461///    horizontal differences between the values are stored in the upper bits of
462///    the destination.
463/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated
464///    differences of both operands.
465static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
466_mm_hsubs_pi16(__m64 __a, __m64 __b)
467{
468    return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b);
469}
470
471/// Multiplies corresponding pairs of packed 8-bit unsigned integer
472///    values contained in the first source operand and packed 8-bit signed
473///    integer values contained in the second source operand, adds pairs of
474///    contiguous products with signed saturation, and writes the 16-bit sums to
475///    the corresponding bits in the destination.
476///
477///    For example, bits [7:0] of both operands are multiplied, bits [15:8] of
478///    both operands are multiplied, and the sum of both results is written to
479///    bits [15:0] of the destination.
480///
481/// \headerfile <x86intrin.h>
482///
483/// This intrinsic corresponds to the \c VPMADDUBSW instruction.
484///
485/// \param __a
486///    A 128-bit integer vector containing the first source operand.
487/// \param __b
488///    A 128-bit integer vector containing the second source operand.
489/// \returns A 128-bit integer vector containing the sums of products of both
490///    operands: \n
491///    \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
492///    \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
493///    \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
494///    \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) \n
495///    \a R4 := (\a __a8 * \a __b8) + (\a __a9 * \a __b9) \n
496///    \a R5 := (\a __a10 * \a __b10) + (\a __a11 * \a __b11) \n
497///    \a R6 := (\a __a12 * \a __b12) + (\a __a13 * \a __b13) \n
498///    \a R7 := (\a __a14 * \a __b14) + (\a __a15 * \a __b15)
499static __inline__ __m128i __DEFAULT_FN_ATTRS
500_mm_maddubs_epi16(__m128i __a, __m128i __b)
501{
502    return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b);
503}
504
505/// Multiplies corresponding pairs of packed 8-bit unsigned integer
506///    values contained in the first source operand and packed 8-bit signed
507///    integer values contained in the second source operand, adds pairs of
508///    contiguous products with signed saturation, and writes the 16-bit sums to
509///    the corresponding bits in the destination.
510///
511///    For example, bits [7:0] of both operands are multiplied, bits [15:8] of
512///    both operands are multiplied, and the sum of both results is written to
513///    bits [15:0] of the destination.
514///
515/// \headerfile <x86intrin.h>
516///
517/// This intrinsic corresponds to the \c PMADDUBSW instruction.
518///
519/// \param __a
520///    A 64-bit integer vector containing the first source operand.
521/// \param __b
522///    A 64-bit integer vector containing the second source operand.
523/// \returns A 64-bit integer vector containing the sums of products of both
524///    operands: \n
525///    \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n
526///    \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n
527///    \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n
528///    \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7)
529static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
530_mm_maddubs_pi16(__m64 __a, __m64 __b)
531{
532    return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b);
533}
534
535/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
536///    products to the 18 most significant bits by right-shifting, rounds the
537///    truncated value by adding 1, and writes bits [16:1] to the destination.
538///
539/// \headerfile <x86intrin.h>
540///
541/// This intrinsic corresponds to the \c VPMULHRSW instruction.
542///
543/// \param __a
544///    A 128-bit vector of [8 x i16] containing one of the source operands.
545/// \param __b
546///    A 128-bit vector of [8 x i16] containing one of the source operands.
547/// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled
548///    products of both operands.
549static __inline__ __m128i __DEFAULT_FN_ATTRS
550_mm_mulhrs_epi16(__m128i __a, __m128i __b)
551{
552    return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b);
553}
554
555/// Multiplies packed 16-bit signed integer values, truncates the 32-bit
556///    products to the 18 most significant bits by right-shifting, rounds the
557///    truncated value by adding 1, and writes bits [16:1] to the destination.
558///
559/// \headerfile <x86intrin.h>
560///
561/// This intrinsic corresponds to the \c PMULHRSW instruction.
562///
563/// \param __a
564///    A 64-bit vector of [4 x i16] containing one of the source operands.
565/// \param __b
566///    A 64-bit vector of [4 x i16] containing one of the source operands.
567/// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled
568///    products of both operands.
569static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
570_mm_mulhrs_pi16(__m64 __a, __m64 __b)
571{
572    return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b);
573}
574
575/// Copies the 8-bit integers from a 128-bit integer vector to the
576///    destination or clears 8-bit values in the destination, as specified by
577///    the second source operand.
578///
579/// \headerfile <x86intrin.h>
580///
581/// This intrinsic corresponds to the \c VPSHUFB instruction.
582///
583/// \param __a
584///    A 128-bit integer vector containing the values to be copied.
585/// \param __b
586///    A 128-bit integer vector containing control bytes corresponding to
587///    positions in the destination:
588///    Bit 7: \n
589///    1: Clear the corresponding byte in the destination. \n
590///    0: Copy the selected source byte to the corresponding byte in the
591///    destination. \n
592///    Bits [6:4] Reserved.  \n
593///    Bits [3:0] select the source byte to be copied.
594/// \returns A 128-bit integer vector containing the copied or cleared values.
595static __inline__ __m128i __DEFAULT_FN_ATTRS
596_mm_shuffle_epi8(__m128i __a, __m128i __b)
597{
598    return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
599}
600
601/// Copies the 8-bit integers from a 64-bit integer vector to the
602///    destination or clears 8-bit values in the destination, as specified by
603///    the second source operand.
604///
605/// \headerfile <x86intrin.h>
606///
607/// This intrinsic corresponds to the \c PSHUFB instruction.
608///
609/// \param __a
610///    A 64-bit integer vector containing the values to be copied.
611/// \param __b
612///    A 64-bit integer vector containing control bytes corresponding to
613///    positions in the destination:
614///    Bit 7: \n
615///    1: Clear the corresponding byte in the destination. \n
616///    0: Copy the selected source byte to the corresponding byte in the
617///    destination. \n
618///    Bits [3:0] select the source byte to be copied.
619/// \returns A 64-bit integer vector containing the copied or cleared values.
620static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
621_mm_shuffle_pi8(__m64 __a, __m64 __b)
622{
623    return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b);
624}
625
626/// For each 8-bit integer in the first source operand, perform one of
627///    the following actions as specified by the second source operand.
628///
629///    If the byte in the second source is negative, calculate the two's
630///    complement of the corresponding byte in the first source, and write that
631///    value to the destination. If the byte in the second source is positive,
632///    copy the corresponding byte from the first source to the destination. If
633///    the byte in the second source is zero, clear the corresponding byte in
634///    the destination.
635///
636/// \headerfile <x86intrin.h>
637///
638/// This intrinsic corresponds to the \c VPSIGNB instruction.
639///
640/// \param __a
641///    A 128-bit integer vector containing the values to be copied.
642/// \param __b
643///    A 128-bit integer vector containing control bytes corresponding to
644///    positions in the destination.
645/// \returns A 128-bit integer vector containing the resultant values.
646static __inline__ __m128i __DEFAULT_FN_ATTRS
647_mm_sign_epi8(__m128i __a, __m128i __b)
648{
649    return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b);
650}
651
652/// For each 16-bit integer in the first source operand, perform one of
653///    the following actions as specified by the second source operand.
654///
655///    If the word in the second source is negative, calculate the two's
656///    complement of the corresponding word in the first source, and write that
657///    value to the destination. If the word in the second source is positive,
658///    copy the corresponding word from the first source to the destination. If
659///    the word in the second source is zero, clear the corresponding word in
660///    the destination.
661///
662/// \headerfile <x86intrin.h>
663///
664/// This intrinsic corresponds to the \c VPSIGNW instruction.
665///
666/// \param __a
667///    A 128-bit integer vector containing the values to be copied.
668/// \param __b
669///    A 128-bit integer vector containing control words corresponding to
670///    positions in the destination.
671/// \returns A 128-bit integer vector containing the resultant values.
672static __inline__ __m128i __DEFAULT_FN_ATTRS
673_mm_sign_epi16(__m128i __a, __m128i __b)
674{
675    return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b);
676}
677
678/// For each 32-bit integer in the first source operand, perform one of
679///    the following actions as specified by the second source operand.
680///
681///    If the doubleword in the second source is negative, calculate the two's
682///    complement of the corresponding word in the first source, and write that
683///    value to the destination. If the doubleword in the second source is
684///    positive, copy the corresponding word from the first source to the
685///    destination. If the doubleword in the second source is zero, clear the
686///    corresponding word in the destination.
687///
688/// \headerfile <x86intrin.h>
689///
690/// This intrinsic corresponds to the \c VPSIGND instruction.
691///
692/// \param __a
693///    A 128-bit integer vector containing the values to be copied.
694/// \param __b
695///    A 128-bit integer vector containing control doublewords corresponding to
696///    positions in the destination.
697/// \returns A 128-bit integer vector containing the resultant values.
698static __inline__ __m128i __DEFAULT_FN_ATTRS
699_mm_sign_epi32(__m128i __a, __m128i __b)
700{
701    return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b);
702}
703
704/// For each 8-bit integer in the first source operand, perform one of
705///    the following actions as specified by the second source operand.
706///
707///    If the byte in the second source is negative, calculate the two's
708///    complement of the corresponding byte in the first source, and write that
709///    value to the destination. If the byte in the second source is positive,
710///    copy the corresponding byte from the first source to the destination. If
711///    the byte in the second source is zero, clear the corresponding byte in
712///    the destination.
713///
714/// \headerfile <x86intrin.h>
715///
716/// This intrinsic corresponds to the \c PSIGNB instruction.
717///
718/// \param __a
719///    A 64-bit integer vector containing the values to be copied.
720/// \param __b
721///    A 64-bit integer vector containing control bytes corresponding to
722///    positions in the destination.
723/// \returns A 64-bit integer vector containing the resultant values.
724static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
725_mm_sign_pi8(__m64 __a, __m64 __b)
726{
727    return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b);
728}
729
730/// For each 16-bit integer in the first source operand, perform one of
731///    the following actions as specified by the second source operand.
732///
733///    If the word in the second source is negative, calculate the two's
734///    complement of the corresponding word in the first source, and write that
735///    value to the destination. If the word in the second source is positive,
736///    copy the corresponding word from the first source to the destination. If
737///    the word in the second source is zero, clear the corresponding word in
738///    the destination.
739///
740/// \headerfile <x86intrin.h>
741///
742/// This intrinsic corresponds to the \c PSIGNW instruction.
743///
744/// \param __a
745///    A 64-bit integer vector containing the values to be copied.
746/// \param __b
747///    A 64-bit integer vector containing control words corresponding to
748///    positions in the destination.
749/// \returns A 64-bit integer vector containing the resultant values.
750static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
751_mm_sign_pi16(__m64 __a, __m64 __b)
752{
753    return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b);
754}
755
756/// For each 32-bit integer in the first source operand, perform one of
757///    the following actions as specified by the second source operand.
758///
759///    If the doubleword in the second source is negative, calculate the two's
760///    complement of the corresponding doubleword in the first source, and
761///    write that value to the destination. If the doubleword in the second
762///    source is positive, copy the corresponding doubleword from the first
763///    source to the destination. If the doubleword in the second source is
764///    zero, clear the corresponding doubleword in the destination.
765///
766/// \headerfile <x86intrin.h>
767///
768/// This intrinsic corresponds to the \c PSIGND instruction.
769///
770/// \param __a
771///    A 64-bit integer vector containing the values to be copied.
772/// \param __b
773///    A 64-bit integer vector containing two control doublewords corresponding
774///    to positions in the destination.
775/// \returns A 64-bit integer vector containing the resultant values.
776static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX
777_mm_sign_pi32(__m64 __a, __m64 __b)
778{
779    return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b);
780}
781
782#undef __DEFAULT_FN_ATTRS
783#undef __DEFAULT_FN_ATTRS_MMX
784
785#endif /* __TMMINTRIN_H */
786