tmmintrin.h revision 309124
132516Sgibbs/*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------=== 240029Sgibbs * 332516Sgibbs * Permission is hereby granted, free of charge, to any person obtaining a copy 432516Sgibbs * of this software and associated documentation files (the "Software"), to deal 532516Sgibbs * in the Software without restriction, including without limitation the rights 632516Sgibbs * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 732516Sgibbs * copies of the Software, and to permit persons to whom the Software is 832516Sgibbs * furnished to do so, subject to the following conditions: 932516Sgibbs * 1032516Sgibbs * The above copyright notice and this permission notice shall be included in 1132516Sgibbs * all copies or substantial portions of the Software. 1232516Sgibbs * 1332516Sgibbs * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1432516Sgibbs * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1532516Sgibbs * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 1632516Sgibbs * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1732516Sgibbs * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 1832516Sgibbs * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 1932516Sgibbs * THE SOFTWARE. 2032516Sgibbs * 2132516Sgibbs *===-----------------------------------------------------------------------=== 2232516Sgibbs */ 2332516Sgibbs 2432516Sgibbs#ifndef __TMMINTRIN_H 2532516Sgibbs#define __TMMINTRIN_H 2650477Speter 2732516Sgibbs#include <pmmintrin.h> 2832516Sgibbs 2932516Sgibbs/* Define the default attributes for the functions in this file. */ 3032516Sgibbs#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3"))) 3132516Sgibbs 3267551Sjhb/// \brief Computes the absolute value of each of the packed 8-bit signed 3367551Sjhb/// integers in the source operand and stores the 8-bit unsigned integer 34112346Smux/// results in the destination. 3576827Salfred/// 3679224Sdillon/// \headerfile <x86intrin.h> 3776827Salfred/// 38104486Ssam/// This intrinsic corresponds to the \c PABSB instruction. 39104486Ssam/// 4032516Sgibbs/// \param __a 4132516Sgibbs/// A 64-bit vector of [8 x i8]. 4232516Sgibbs/// \returns A 64-bit integer vector containing the absolute values of the 43104486Ssam/// elements in the operand. 4432516Sgibbsstatic __inline__ __m64 __DEFAULT_FN_ATTRS 45112436Smux_mm_abs_pi8(__m64 __a) 4632516Sgibbs{ 4732516Sgibbs return (__m64)__builtin_ia32_pabsb((__v8qi)__a); 4832516Sgibbs} 49113228Sjake 5032516Sgibbs/// \brief Computes the absolute value of each of the packed 8-bit signed 5132516Sgibbs/// integers in the source operand and stores the 8-bit unsigned integer 5232516Sgibbs/// results in the destination. 5335767Sgibbs/// 5432516Sgibbs/// \headerfile <x86intrin.h> 5532516Sgibbs/// 5632516Sgibbs/// This intrinsic corresponds to the \c VPABSB instruction. 5732516Sgibbs/// 5832516Sgibbs/// \param __a 5932516Sgibbs/// A 128-bit vector of [16 x i8]. 6035767Sgibbs/// \returns A 128-bit integer vector containing the absolute values of the 6132516Sgibbs/// elements in the operand. 6232516Sgibbsstatic __inline__ __m128i __DEFAULT_FN_ATTRS 6332516Sgibbs_mm_abs_epi8(__m128i __a) 6432516Sgibbs{ 6532516Sgibbs return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a); 6632516Sgibbs} 6732516Sgibbs 6832516Sgibbs/// \brief Computes the absolute value of each of the packed 16-bit signed 6932516Sgibbs/// integers in the source operand and stores the 16-bit unsigned integer 7032516Sgibbs/// results in the destination. 7132516Sgibbs/// 7260938Sjake/// \headerfile <x86intrin.h> 7332516Sgibbs/// 7432516Sgibbs/// This intrinsic corresponds to the \c PABSW instruction. 7532516Sgibbs/// 7632516Sgibbs/// \param __a 7760938Sjake/// A 64-bit vector of [4 x i16]. 7832516Sgibbs/// \returns A 64-bit integer vector containing the absolute values of the 7932516Sgibbs/// elements in the operand. 8032516Sgibbsstatic __inline__ __m64 __DEFAULT_FN_ATTRS 8132516Sgibbs_mm_abs_pi16(__m64 __a) 8232516Sgibbs{ 8332516Sgibbs return (__m64)__builtin_ia32_pabsw((__v4hi)__a); 8432516Sgibbs} 8532516Sgibbs 8632516Sgibbs/// \brief Computes the absolute value of each of the packed 16-bit signed 8732516Sgibbs/// integers in the source operand and stores the 16-bit unsigned integer 8832516Sgibbs/// results in the destination. 8932516Sgibbs/// 9032516Sgibbs/// \headerfile <x86intrin.h> 9132516Sgibbs/// 9232516Sgibbs/// This intrinsic corresponds to the \c VPABSW instruction. 9360938Sjake/// 9432516Sgibbs/// \param __a 9532516Sgibbs/// A 128-bit vector of [8 x i16]. 9660938Sjake/// \returns A 128-bit integer vector containing the absolute values of the 9760938Sjake/// elements in the operand. 9832516Sgibbsstatic __inline__ __m128i __DEFAULT_FN_ATTRS 9932516Sgibbs_mm_abs_epi16(__m128i __a) 100112346Smux{ 10132516Sgibbs return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a); 102113228Sjake} 103113228Sjake 104112569Sjake/// \brief Computes the absolute value of each of the packed 32-bit signed 10532516Sgibbs/// integers in the source operand and stores the 32-bit unsigned integer 10632516Sgibbs/// results in the destination. 10732516Sgibbs/// 10832516Sgibbs/// \headerfile <x86intrin.h> 109112346Smux/// 110112346Smux/// This intrinsic corresponds to the \c PABSD instruction. 111112346Smux/// 11295076Salfred/// \param __a 11395076Salfred/// A 64-bit vector of [2 x i32]. 11495076Salfred/// \returns A 64-bit integer vector containing the absolute values of the 11595076Salfred/// elements in the operand. 11695076Salfredstatic __inline__ __m64 __DEFAULT_FN_ATTRS 11795076Salfred_mm_abs_pi32(__m64 __a) 11895076Salfred{ 11995076Salfred return (__m64)__builtin_ia32_pabsd((__v2si)__a); 12032516Sgibbs} 12132516Sgibbs 12232516Sgibbs/// \brief Computes the absolute value of each of the packed 32-bit signed 12332516Sgibbs/// integers in the source operand and stores the 32-bit unsigned integer 12432516Sgibbs/// results in the destination. 12532516Sgibbs/// 12632516Sgibbs/// \headerfile <x86intrin.h> 12732516Sgibbs/// 12832516Sgibbs/// This intrinsic corresponds to the \c VPABSD instruction. 12932516Sgibbs/// 13032516Sgibbs/// \param __a 13132516Sgibbs/// A 128-bit vector of [4 x i32]. 13232516Sgibbs/// \returns A 128-bit integer vector containing the absolute values of the 13332516Sgibbs/// elements in the operand. 13432516Sgibbsstatic __inline__ __m128i __DEFAULT_FN_ATTRS 13532516Sgibbs_mm_abs_epi32(__m128i __a) 13632516Sgibbs{ 13732516Sgibbs return (__m128i)__builtin_ia32_pabsd128((__v4si)__a); 13835767Sgibbs} 13932516Sgibbs 14032516Sgibbs/// \brief Concatenates the two 128-bit integer vector operands, and 14132516Sgibbs/// right-shifts the result by the number of bytes specified in the immediate 14232516Sgibbs/// operand. 14335767Sgibbs/// 14435767Sgibbs/// \headerfile <x86intrin.h> 14535767Sgibbs/// 14635767Sgibbs/// \code 14735767Sgibbs/// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n); 14832516Sgibbs/// \endcode 14932516Sgibbs/// 15032516Sgibbs/// This intrinsic corresponds to the \c PALIGNR instruction. 15132516Sgibbs/// 15232516Sgibbs/// \param a 15332516Sgibbs/// A 128-bit vector of [16 x i8] containing one of the source operands. 15432516Sgibbs/// \param b 15532516Sgibbs/// A 128-bit vector of [16 x i8] containing one of the source operands. 15632516Sgibbs/// \param n 15732516Sgibbs/// An immediate operand specifying how many bytes to right-shift the result. 15832516Sgibbs/// \returns A 128-bit integer vector containing the concatenated right-shifted 15932516Sgibbs/// value. 16048449Smjacob#define _mm_alignr_epi8(a, b, n) __extension__ ({ \ 16132516Sgibbs (__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \ 162112569Sjake (__v16qi)(__m128i)(b), (n)); }) 163112569Sjake 164112569Sjake/// \brief Concatenates the two 64-bit integer vector operands, and right-shifts 16532516Sgibbs/// the result by the number of bytes specified in the immediate operand. 16632516Sgibbs/// 16732516Sgibbs/// \headerfile <x86intrin.h> 16832516Sgibbs/// 16932516Sgibbs/// \code 17032516Sgibbs/// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n); 17132516Sgibbs/// \endcode 17232516Sgibbs/// 17332516Sgibbs/// This intrinsic corresponds to the \c PALIGNR instruction. 17432516Sgibbs/// 17532516Sgibbs/// \param a 17632516Sgibbs/// A 64-bit vector of [8 x i8] containing one of the source operands. 17732516Sgibbs/// \param b 17832516Sgibbs/// A 64-bit vector of [8 x i8] containing one of the source operands. 17932516Sgibbs/// \param n 18032516Sgibbs/// An immediate operand specifying how many bytes to right-shift the result. 18132516Sgibbs/// \returns A 64-bit integer vector containing the concatenated right-shifted 18235767Sgibbs/// value. 18332516Sgibbs#define _mm_alignr_pi8(a, b, n) __extension__ ({ \ 18432516Sgibbs (__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)); }) 18532516Sgibbs 18635256Sdes/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed 18732516Sgibbs/// 128-bit vectors of [8 x i16]. 18832516Sgibbs/// 18932516Sgibbs/// \headerfile <x86intrin.h> 19032516Sgibbs/// 19132516Sgibbs/// This intrinsic corresponds to the \c VPHADDW instruction. 192112436Smux/// 193112436Smux/// \param __a 19432516Sgibbs/// A 128-bit vector of [8 x i16] containing one of the source operands. The 19532516Sgibbs/// horizontal sums of the values are stored in the lower bits of the 196112569Sjake/// destination. 197112569Sjake/// \param __b 19832516Sgibbs/// A 128-bit vector of [8 x i16] containing one of the source operands. The 19932516Sgibbs/// horizontal sums of the values are stored in the upper bits of the 20032516Sgibbs/// destination. 20132516Sgibbs/// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of 20232516Sgibbs/// both operands. 20332516Sgibbsstatic __inline__ __m128i __DEFAULT_FN_ATTRS 20432516Sgibbs_mm_hadd_epi16(__m128i __a, __m128i __b) 20535767Sgibbs{ 20632516Sgibbs return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b); 20732516Sgibbs} 20832516Sgibbs 20932516Sgibbs/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed 21032516Sgibbs/// 128-bit vectors of [4 x i32]. 21132516Sgibbs/// 21232516Sgibbs/// \headerfile <x86intrin.h> 21332516Sgibbs/// 21432516Sgibbs/// This intrinsic corresponds to the \c VPHADDD instruction. 21532516Sgibbs/// 21632516Sgibbs/// \param __a 21735767Sgibbs/// A 128-bit vector of [4 x i32] containing one of the source operands. The 21835767Sgibbs/// horizontal sums of the values are stored in the lower bits of the 21932516Sgibbs/// destination. 22032516Sgibbs/// \param __b 22132516Sgibbs/// A 128-bit vector of [4 x i32] containing one of the source operands. The 22232516Sgibbs/// horizontal sums of the values are stored in the upper bits of the 22332516Sgibbs/// destination. 22432516Sgibbs/// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of 22532516Sgibbs/// both operands. 22632516Sgibbsstatic __inline__ __m128i __DEFAULT_FN_ATTRS 22732516Sgibbs_mm_hadd_epi32(__m128i __a, __m128i __b) 22832516Sgibbs{ 22932516Sgibbs return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b); 23032516Sgibbs} 23132516Sgibbs 23232516Sgibbs/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed 23332516Sgibbs/// 64-bit vectors of [4 x i16]. 23432516Sgibbs/// 23532516Sgibbs/// \headerfile <x86intrin.h> 23632516Sgibbs/// 23732516Sgibbs/// This intrinsic corresponds to the \c PHADDW instruction. 23832516Sgibbs/// 23932516Sgibbs/// \param __a 24032516Sgibbs/// A 64-bit vector of [4 x i16] containing one of the source operands. The 241112436Smux/// horizontal sums of the values are stored in the lower bits of the 24232516Sgibbs/// destination. 24332516Sgibbs/// \param __b 24440029Sgibbs/// A 64-bit vector of [4 x i16] containing one of the source operands. The 24540029Sgibbs/// horizontal sums of the values are stored in the upper bits of the 24640029Sgibbs/// destination. 24740029Sgibbs/// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both 24840029Sgibbs/// operands. 24940029Sgibbsstatic __inline__ __m64 __DEFAULT_FN_ATTRS 25040029Sgibbs_mm_hadd_pi16(__m64 __a, __m64 __b) 25140029Sgibbs{ 25232516Sgibbs return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b); 25332516Sgibbs} 25432516Sgibbs 25532516Sgibbs/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed 25632516Sgibbs/// 64-bit vectors of [2 x i32]. 25732516Sgibbs/// 25832516Sgibbs/// \headerfile <x86intrin.h> 25932516Sgibbs/// 26032516Sgibbs/// This intrinsic corresponds to the \c PHADDD instruction. 26132516Sgibbs/// 26232516Sgibbs/// \param __a 26332516Sgibbs/// A 64-bit vector of [2 x i32] containing one of the source operands. The 26432516Sgibbs/// horizontal sums of the values are stored in the lower bits of the 26532516Sgibbs/// destination. 26632516Sgibbs/// \param __b 26732516Sgibbs/// A 64-bit vector of [2 x i32] containing one of the source operands. The 268112569Sjake/// horizontal sums of the values are stored in the upper bits of the 26932516Sgibbs/// destination. 27032516Sgibbs/// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both 27132516Sgibbs/// operands. 27232516Sgibbsstatic __inline__ __m64 __DEFAULT_FN_ATTRS 27369781Sdwmalone_mm_hadd_pi32(__m64 __a, __m64 __b) 27469781Sdwmalone{ 27535767Sgibbs return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b); 27669781Sdwmalone} 27769781Sdwmalone 27869781Sdwmalone/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed 27969781Sdwmalone/// 128-bit vectors of [8 x i16]. Positive sums greater than 7FFFh are 28032516Sgibbs/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h. 28132516Sgibbs/// 28232516Sgibbs/// \headerfile <x86intrin.h> 28332516Sgibbs/// 28432516Sgibbs/// This intrinsic corresponds to the \c VPHADDSW instruction. 28535767Sgibbs/// 28635767Sgibbs/// \param __a 28735767Sgibbs/// A 128-bit vector of [8 x i16] containing one of the source operands. The 28832516Sgibbs/// horizontal sums of the values are stored in the lower bits of the 28932516Sgibbs/// destination. 29035767Sgibbs/// \param __b 29135767Sgibbs/// A 128-bit vector of [8 x i16] containing one of the source operands. The 29235767Sgibbs/// horizontal sums of the values are stored in the upper bits of the 29335767Sgibbs/// destination. 29435767Sgibbs/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated 29535767Sgibbs/// sums of both operands. 29635767Sgibbsstatic __inline__ __m128i __DEFAULT_FN_ATTRS 29735767Sgibbs_mm_hadds_epi16(__m128i __a, __m128i __b) 298113228Sjake{ 29932516Sgibbs return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b); 300113228Sjake} 301113228Sjake 30235767Sgibbs/// \brief Horizontally adds the adjacent pairs of values contained in 2 packed 30335767Sgibbs/// 64-bit vectors of [4 x i16]. Positive sums greater than 7FFFh are 30435767Sgibbs/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h. 30535767Sgibbs/// 30635767Sgibbs/// \headerfile <x86intrin.h> 30735767Sgibbs/// 30835767Sgibbs/// This intrinsic corresponds to the \c PHADDSW instruction. 30932516Sgibbs/// 31032516Sgibbs/// \param __a 31140029Sgibbs/// A 64-bit vector of [4 x i16] containing one of the source operands. The 31232516Sgibbs/// horizontal sums of the values are stored in the lower bits of the 31332516Sgibbs/// destination. 31432516Sgibbs/// \param __b 31532516Sgibbs/// A 64-bit vector of [4 x i16] containing one of the source operands. The 31632516Sgibbs/// horizontal sums of the values are stored in the upper bits of the 31732516Sgibbs/// destination. 31832516Sgibbs/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated 31932516Sgibbs/// sums of both operands. 32032516Sgibbsstatic __inline__ __m64 __DEFAULT_FN_ATTRS 32132516Sgibbs_mm_hadds_pi16(__m64 __a, __m64 __b) 32232516Sgibbs{ 32332516Sgibbs return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b); 32432516Sgibbs} 32532516Sgibbs 32632516Sgibbs/// \brief Horizontally subtracts the adjacent pairs of values contained in 2 32732516Sgibbs/// packed 128-bit vectors of [8 x i16]. 32832516Sgibbs/// 32932516Sgibbs/// \headerfile <x86intrin.h> 33032516Sgibbs/// 33132516Sgibbs/// This intrinsic corresponds to the \c VPHSUBW instruction. 33232516Sgibbs/// 33332516Sgibbs/// \param __a 33435767Sgibbs/// A 128-bit vector of [8 x i16] containing one of the source operands. The 33535767Sgibbs/// horizontal differences between the values are stored in the lower bits of 33635767Sgibbs/// the destination. 33735767Sgibbs/// \param __b 33835767Sgibbs/// A 128-bit vector of [8 x i16] containing one of the source operands. The 33935767Sgibbs/// horizontal differences between the values are stored in the upper bits of 34035767Sgibbs/// the destination. 341110030Sscottl/// \returns A 128-bit vector of [8 x i16] containing the horizontal differences 342110030Sscottl/// of both operands. 34335767Sgibbsstatic __inline__ __m128i __DEFAULT_FN_ATTRS 344110030Sscottl_mm_hsub_epi16(__m128i __a, __m128i __b) 345110030Sscottl{ 346110030Sscottl return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b); 347110030Sscottl} 34835767Sgibbs 34940029Sgibbs/// \brief Horizontally subtracts the adjacent pairs of values contained in 2 35035767Sgibbs/// packed 128-bit vectors of [4 x i32]. 351112569Sjake/// 352112569Sjake/// \headerfile <x86intrin.h> 353110030Sscottl/// 354111119Simp/// This intrinsic corresponds to the \c VPHSUBD instruction. 35535767Sgibbs/// 35635767Sgibbs/// \param __a 35735767Sgibbs/// A 128-bit vector of [4 x i32] containing one of the source operands. The 35835767Sgibbs/// horizontal differences between the values are stored in the lower bits of 35935767Sgibbs/// the destination. 36035767Sgibbs/// \param __b 361112196Smux/// A 128-bit vector of [4 x i32] containing one of the source operands. The 362110030Sscottl/// horizontal differences between the values are stored in the upper bits of 363111119Simp/// the destination. 36448449Smjacob/// \returns A 128-bit vector of [4 x i32] containing the horizontal differences 36548449Smjacob/// of both operands. 366112196Smuxstatic __inline__ __m128i __DEFAULT_FN_ATTRS 36735767Sgibbs_mm_hsub_epi32(__m128i __a, __m128i __b) 36835767Sgibbs{ 36935767Sgibbs return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b); 37035767Sgibbs} 37135767Sgibbs 37235767Sgibbs/// \brief Horizontally subtracts the adjacent pairs of values contained in 2 373110030Sscottl/// packed 64-bit vectors of [4 x i16]. 374110030Sscottl/// 375110030Sscottl/// \headerfile <x86intrin.h> 376110030Sscottl/// 377110030Sscottl/// This intrinsic corresponds to the \c PHSUBW instruction. 378110030Sscottl/// 379110030Sscottl/// \param __a 38035767Sgibbs/// A 64-bit vector of [4 x i16] containing one of the source operands. The 38135767Sgibbs/// horizontal differences between the values are stored in the lower bits of 38295076Salfred/// the destination. 38335767Sgibbs/// \param __b 38435767Sgibbs/// A 64-bit vector of [4 x i16] containing one of the source operands. The 385110030Sscottl/// horizontal differences between the values are stored in the upper bits of 386110030Sscottl/// the destination. 38735767Sgibbs/// \returns A 64-bit vector of [4 x i16] containing the horizontal differences 38835767Sgibbs/// of both operands. 38935767Sgibbsstatic __inline__ __m64 __DEFAULT_FN_ATTRS 39035767Sgibbs_mm_hsub_pi16(__m64 __a, __m64 __b) 39135767Sgibbs{ 39249859Sgibbs return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b); 39335767Sgibbs} 394112569Sjake 39540029Sgibbs/// \brief Horizontally subtracts the adjacent pairs of values contained in 2 396112196Smux/// packed 64-bit vectors of [2 x i32]. 397112196Smux/// 398110030Sscottl/// \headerfile <x86intrin.h> 399112196Smux/// 400112196Smux/// This intrinsic corresponds to the \c PHSUBD instruction. 40135767Sgibbs/// 40235767Sgibbs/// \param __a 403110030Sscottl/// A 64-bit vector of [2 x i32] containing one of the source operands. The 404110030Sscottl/// horizontal differences between the values are stored in the lower bits of 405110030Sscottl/// the destination. 406110030Sscottl/// \param __b 407110030Sscottl/// A 64-bit vector of [2 x i32] containing one of the source operands. The 408110030Sscottl/// horizontal differences between the values are stored in the upper bits of 40932516Sgibbs/// the destination. 410104486Ssam/// \returns A 64-bit vector of [2 x i32] containing the horizontal differences 411104486Ssam/// of both operands. 412104486Ssamstatic __inline__ __m64 __DEFAULT_FN_ATTRS 413104486Ssam_mm_hsub_pi32(__m64 __a, __m64 __b) 414104486Ssam{ 415104486Ssam return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b); 416104486Ssam} 417113228Sjake 418104486Ssam/// \brief Horizontally subtracts the adjacent pairs of values contained in 2 419104486Ssam/// packed 128-bit vectors of [8 x i16]. Positive differences greater than 420104486Ssam/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are 421104486Ssam/// saturated to 8000h. 422113228Sjake/// 423104486Ssam/// \headerfile <x86intrin.h> 424104486Ssam/// 425104486Ssam/// This intrinsic corresponds to the \c VPHSUBSW instruction. 426104486Ssam/// 427104486Ssam/// \param __a 428113228Sjake/// A 128-bit vector of [8 x i16] containing one of the source operands. The 429113228Sjake/// horizontal differences between the values are stored in the lower bits of 430113228Sjake/// the destination. 431104486Ssam/// \param __b 432104486Ssam/// A 128-bit vector of [8 x i16] containing one of the source operands. The 433104486Ssam/// horizontal differences between the values are stored in the upper bits of 434113228Sjake/// the destination. 435113228Sjake/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated 436113228Sjake/// differences of both operands. 437104486Ssamstatic __inline__ __m128i __DEFAULT_FN_ATTRS 438104486Ssam_mm_hsubs_epi16(__m128i __a, __m128i __b) 439104486Ssam{ 440104486Ssam return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b); 441104486Ssam} 442113228Sjake 443113228Sjake/// \brief Horizontally subtracts the adjacent pairs of values contained in 2 444113228Sjake/// packed 64-bit vectors of [4 x i16]. Positive differences greater than 445113228Sjake/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are 446113228Sjake/// saturated to 8000h. 447113228Sjake/// 448113228Sjake/// \headerfile <x86intrin.h> 449113228Sjake/// 450113228Sjake/// This intrinsic corresponds to the \c PHSUBSW instruction. 451113228Sjake/// 452113228Sjake/// \param __a 453113228Sjake/// A 64-bit vector of [4 x i16] containing one of the source operands. The 454113228Sjake/// horizontal differences between the values are stored in the lower bits of 455113228Sjake/// the destination. 456113228Sjake/// \param __b 457113228Sjake/// A 64-bit vector of [4 x i16] containing one of the source operands. The 458113228Sjake/// horizontal differences between the values are stored in the upper bits of 459113228Sjake/// the destination. 460113228Sjake/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated 461113228Sjake/// differences of both operands. 462113228Sjakestatic __inline__ __m64 __DEFAULT_FN_ATTRS 463113228Sjake_mm_hsubs_pi16(__m64 __a, __m64 __b) 464113228Sjake{ 465113228Sjake return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b); 466113228Sjake} 467113472Ssimokawa 468113472Ssimokawa/// \brief Multiplies corresponding pairs of packed 8-bit unsigned integer 469113472Ssimokawa/// values contained in the first source operand and packed 8-bit signed 470113472Ssimokawa/// integer values contained in the second source operand, adds pairs of 471113472Ssimokawa/// contiguous products with signed saturation, and writes the 16-bit sums to 472113472Ssimokawa/// the corresponding bits in the destination. For example, bits [7:0] of 473113472Ssimokawa/// both operands are multiplied, bits [15:8] of both operands are 474113472Ssimokawa/// multiplied, and the sum of both results is written to bits [15:0] of the 475113472Ssimokawa/// destination. 476113472Ssimokawa/// 477113472Ssimokawa/// \headerfile <x86intrin.h> 478113472Ssimokawa/// 479113472Ssimokawa/// This intrinsic corresponds to the \c VPMADDUBSW instruction. 480113472Ssimokawa/// 481113472Ssimokawa/// \param __a 482113472Ssimokawa/// A 128-bit integer vector containing the first source operand. 483113228Sjake/// \param __b 484113228Sjake/// A 128-bit integer vector containing the second source operand. 485113228Sjake/// \returns A 128-bit integer vector containing the sums of products of both 486113228Sjake/// operands: 487104486Ssam/// R0 := (__a0 * __b0) + (__a1 * __b1) 488113228Sjake/// R1 := (__a2 * __b2) + (__a3 * __b3) 489104486Ssam/// R2 := (__a4 * __b4) + (__a5 * __b5) 490104486Ssam/// R3 := (__a6 * __b6) + (__a7 * __b7) 491104486Ssam/// R4 := (__a8 * __b8) + (__a9 * __b9) 492104486Ssam/// R5 := (__a10 * __b10) + (__a11 * __b11) 493104486Ssam/// R6 := (__a12 * __b12) + (__a13 * __b13) 494104486Ssam/// R7 := (__a14 * __b14) + (__a15 * __b15) 495104486Ssamstatic __inline__ __m128i __DEFAULT_FN_ATTRS 496104486Ssam_mm_maddubs_epi16(__m128i __a, __m128i __b) 497104486Ssam{ 498104486Ssam return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b); 499104486Ssam} 500104486Ssam 501104486Ssam/// \brief Multiplies corresponding pairs of packed 8-bit unsigned integer 502104486Ssam/// values contained in the first source operand and packed 8-bit signed 503104486Ssam/// integer values contained in the second source operand, adds pairs of 504104486Ssam/// contiguous products with signed saturation, and writes the 16-bit sums to 505104486Ssam/// the corresponding bits in the destination. For example, bits [7:0] of 506104486Ssam/// both operands are multiplied, bits [15:8] of both operands are 507104486Ssam/// multiplied, and the sum of both results is written to bits [15:0] of the 508104486Ssam/// destination. 509104486Ssam/// 510104486Ssam/// \headerfile <x86intrin.h> 511104486Ssam/// 512104486Ssam/// This intrinsic corresponds to the \c PMADDUBSW instruction. 513104486Ssam/// 514104486Ssam/// \param __a 515113228Sjake/// A 64-bit integer vector containing the first source operand. 516113228Sjake/// \param __b 517113228Sjake/// A 64-bit integer vector containing the second source operand. 518104486Ssam/// \returns A 64-bit integer vector containing the sums of products of both 519104486Ssam/// operands: 520104486Ssam/// R0 := (__a0 * __b0) + (__a1 * __b1) 521104486Ssam/// R1 := (__a2 * __b2) + (__a3 * __b3) 522104486Ssam/// R2 := (__a4 * __b4) + (__a5 * __b5) 523104486Ssam/// R3 := (__a6 * __b6) + (__a7 * __b7) 524104486Ssamstatic __inline__ __m64 __DEFAULT_FN_ATTRS 525104486Ssam_mm_maddubs_pi16(__m64 __a, __m64 __b) 526104486Ssam{ 527113228Sjake return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b); 528104486Ssam} 529104486Ssam 530104486Ssam/// \brief Multiplies packed 16-bit signed integer values, truncates the 32-bit 531104486Ssam/// products to the 18 most significant bits by right-shifting, rounds the 532104486Ssam/// truncated value by adding 1, and writes bits [16:1] to the destination. 533104486Ssam/// 534104486Ssam/// \headerfile <x86intrin.h> 535104486Ssam/// 536104486Ssam/// This intrinsic corresponds to the \c VPMULHRSW instruction. 537104486Ssam/// 538104486Ssam/// \param __a 539104486Ssam/// A 128-bit vector of [8 x i16] containing one of the source operands. 540104486Ssam/// \param __b 541104486Ssam/// A 128-bit vector of [8 x i16] containing one of the source operands. 542104486Ssam/// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled 543104486Ssam/// products of both operands. 544104486Ssamstatic __inline__ __m128i __DEFAULT_FN_ATTRS 545104486Ssam_mm_mulhrs_epi16(__m128i __a, __m128i __b) 546104486Ssam{ 547104486Ssam return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b); 548104486Ssam} 549104486Ssam 550104486Ssam/// \brief Multiplies packed 16-bit signed integer values, truncates the 32-bit 551104486Ssam/// products to the 18 most significant bits by right-shifting, rounds the 552104486Ssam/// truncated value by adding 1, and writes bits [16:1] to the destination. 553104486Ssam/// 554113459Ssimokawa/// \headerfile <x86intrin.h> 555113459Ssimokawa/// 556104486Ssam/// This intrinsic corresponds to the \c PMULHRSW instruction. 557113459Ssimokawa/// 558113459Ssimokawa/// \param __a 559113459Ssimokawa/// A 64-bit vector of [4 x i16] containing one of the source operands. 560113459Ssimokawa/// \param __b 561113459Ssimokawa/// A 64-bit vector of [4 x i16] containing one of the source operands. 562113459Ssimokawa/// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled 563113459Ssimokawa/// products of both operands. 564113459Ssimokawastatic __inline__ __m64 __DEFAULT_FN_ATTRS 565113459Ssimokawa_mm_mulhrs_pi16(__m64 __a, __m64 __b) 566113459Ssimokawa{ 567113459Ssimokawa return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b); 568113459Ssimokawa} 569113492Smux 570113459Ssimokawa/// \brief Copies the 8-bit integers from a 128-bit integer vector to the 571113459Ssimokawa/// destination or clears 8-bit values in the destination, as specified by 572113472Ssimokawa/// the second source operand. 573113472Ssimokawa/// 574113472Ssimokawa/// \headerfile <x86intrin.h> 575113472Ssimokawa/// 576113472Ssimokawa/// This intrinsic corresponds to the \c VPSHUFB instruction. 577113472Ssimokawa/// 578113459Ssimokawa/// \param __a 579113459Ssimokawa/// A 128-bit integer vector containing the values to be copied. 580113459Ssimokawa/// \param __b 581113472Ssimokawa/// A 128-bit integer vector containing control bytes corresponding to 582113492Smux/// positions in the destination: 583113472Ssimokawa/// Bit 7: 584113459Ssimokawa/// 1: Clear the corresponding byte in the destination. 585113459Ssimokawa/// 0: Copy the selected source byte to the corresponding byte in the 586113459Ssimokawa/// destination. 587113459Ssimokawa/// Bits [6:4] Reserved. 588113459Ssimokawa/// Bits [3:0] select the source byte to be copied. 589113459Ssimokawa/// \returns A 128-bit integer vector containing the copied or cleared values. 590113459Ssimokawastatic __inline__ __m128i __DEFAULT_FN_ATTRS 591113459Ssimokawa_mm_shuffle_epi8(__m128i __a, __m128i __b) 592113459Ssimokawa{ 593113459Ssimokawa return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b); 594104486Ssam} 595104486Ssam 596104486Ssam/// \brief Copies the 8-bit integers from a 64-bit integer vector to the 597104486Ssam/// destination or clears 8-bit values in the destination, as specified by 598104486Ssam/// the second source operand. 599104486Ssam/// 600104486Ssam/// \headerfile <x86intrin.h> 601104486Ssam/// 602104486Ssam/// This intrinsic corresponds to the \c PSHUFB instruction. 603104486Ssam/// 604104486Ssam/// \param __a 605104486Ssam/// A 64-bit integer vector containing the values to be copied. 606104486Ssam/// \param __b 607104486Ssam/// A 64-bit integer vector containing control bytes corresponding to 608104486Ssam/// positions in the destination: 609104486Ssam/// Bit 7: 610104486Ssam/// 1: Clear the corresponding byte in the destination. 611104486Ssam/// 0: Copy the selected source byte to the corresponding byte in the 612113472Ssimokawa/// destination. 613104486Ssam/// Bits [3:0] select the source byte to be copied. 614104486Ssam/// \returns A 64-bit integer vector containing the copied or cleared values. 615104486Ssamstatic __inline__ __m64 __DEFAULT_FN_ATTRS 616104486Ssam_mm_shuffle_pi8(__m64 __a, __m64 __b) 617113228Sjake{ 618104486Ssam return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b); 619104486Ssam} 620104486Ssam 621110335Sharti/// \brief For each 8-bit integer in the first source operand, perform one of 622113228Sjake/// the following actions as specified by the second source operand: If the 623110335Sharti/// byte in the second source is negative, calculate the two's complement of 624110335Sharti/// the corresponding byte in the first source, and write that value to the 625110335Sharti/// destination. If the byte in the second source is positive, copy the 626110335Sharti/// corresponding byte from the first source to the destination. If the byte 627110335Sharti/// in the second source is zero, clear the corresponding byte in the 628110335Sharti/// destination. 629104486Ssam/// 630104486Ssam/// \headerfile <x86intrin.h> 631104486Ssam/// 632104486Ssam/// This intrinsic corresponds to the \c VPSIGNB instruction. 633104486Ssam/// 634104486Ssam/// \param __a 635104486Ssam/// A 128-bit integer vector containing the values to be copied. 636104486Ssam/// \param __b 637104486Ssam/// A 128-bit integer vector containing control bytes corresponding to 638104486Ssam/// positions in the destination. 639104486Ssam/// \returns A 128-bit integer vector containing the resultant values. 640104486Ssamstatic __inline__ __m128i __DEFAULT_FN_ATTRS 641104486Ssam_mm_sign_epi8(__m128i __a, __m128i __b) 642104486Ssam{ 643104486Ssam return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b); 644104486Ssam} 645104486Ssam 646104486Ssam/// \brief For each 16-bit integer in the first source operand, perform one of 647104486Ssam/// the following actions as specified by the second source operand: If the 648104486Ssam/// word in the second source is negative, calculate the two's complement of 649104486Ssam/// the corresponding word in the first source, and write that value to the 650104486Ssam/// destination. If the word in the second source is positive, copy the 651104486Ssam/// corresponding word from the first source to the destination. If the word 652104486Ssam/// in the second source is zero, clear the corresponding word in the 653113228Sjake/// destination. 654104486Ssam/// 655104486Ssam/// \headerfile <x86intrin.h> 656104486Ssam/// 657104486Ssam/// This intrinsic corresponds to the \c VPSIGNW instruction. 658104486Ssam/// 659104486Ssam/// \param __a 660104486Ssam/// A 128-bit integer vector containing the values to be copied. 661104486Ssam/// \param __b 662104486Ssam/// A 128-bit integer vector containing control words corresponding to 663104486Ssam/// positions in the destination. 664113472Ssimokawa/// \returns A 128-bit integer vector containing the resultant values. 665104486Ssamstatic __inline__ __m128i __DEFAULT_FN_ATTRS 666104486Ssam_mm_sign_epi16(__m128i __a, __m128i __b) 667104486Ssam{ 668104486Ssam return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b); 669104486Ssam} 670104486Ssam 671104486Ssam/// \brief For each 32-bit integer in the first source operand, perform one of 672104486Ssam/// the following actions as specified by the second source operand: If the 673104486Ssam/// doubleword in the second source is negative, calculate the two's 674104486Ssam/// complement of the corresponding word in the first source, and write that 675104486Ssam/// value to the destination. If the doubleword in the second source is 676104486Ssam/// positive, copy the corresponding word from the first source to the 677104486Ssam/// destination. If the doubleword in the second source is zero, clear the 678104486Ssam/// corresponding word in the destination. 679104486Ssam/// 680104486Ssam/// \headerfile <x86intrin.h> 681104486Ssam/// 682104486Ssam/// This intrinsic corresponds to the \c VPSIGND instruction. 683104486Ssam/// 684104486Ssam/// \param __a 685104486Ssam/// A 128-bit integer vector containing the values to be copied. 686110335Sharti/// \param __b 687113228Sjake/// A 128-bit integer vector containing control doublewords corresponding to 688110335Sharti/// positions in the destination. 689110335Sharti/// \returns A 128-bit integer vector containing the resultant values. 690110335Shartistatic __inline__ __m128i __DEFAULT_FN_ATTRS 691110335Sharti_mm_sign_epi32(__m128i __a, __m128i __b) 692104486Ssam{ 693110335Sharti return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b); 694110335Sharti} 695104486Ssam 696104486Ssam/// \brief For each 8-bit integer in the first source operand, perform one of 697104486Ssam/// the following actions as specified by the second source operand: If the 698104486Ssam/// byte in the second source is negative, calculate the two's complement of 699104486Ssam/// the corresponding byte in the first source, and write that value to the 700104486Ssam/// destination. If the byte in the second source is positive, copy the 701104486Ssam/// corresponding byte from the first source to the destination. If the byte 702104486Ssam/// in the second source is zero, clear the corresponding byte in the 703104486Ssam/// destination. 704104486Ssam/// 705104486Ssam/// \headerfile <x86intrin.h> 706104486Ssam/// 707104486Ssam/// This intrinsic corresponds to the \c PSIGNB instruction. 70832516Sgibbs/// 70932516Sgibbs/// \param __a 71032516Sgibbs/// A 64-bit integer vector containing the values to be copied. 71132516Sgibbs/// \param __b 71232516Sgibbs/// A 64-bit integer vector containing control bytes corresponding to 71332516Sgibbs/// positions in the destination. 71432516Sgibbs/// \returns A 64-bit integer vector containing the resultant values. 71532516Sgibbsstatic __inline__ __m64 __DEFAULT_FN_ATTRS 71632516Sgibbs_mm_sign_pi8(__m64 __a, __m64 __b) 71732516Sgibbs{ 71832516Sgibbs return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b); 71932516Sgibbs} 72032516Sgibbs 72132516Sgibbs/// \brief For each 16-bit integer in the first source operand, perform one of 722113347Smux/// the following actions as specified by the second source operand: If the 72332516Sgibbs/// word in the second source is negative, calculate the two's complement of 72432516Sgibbs/// the corresponding word in the first source, and write that value to the 72532516Sgibbs/// destination. If the word in the second source is positive, copy the 72632516Sgibbs/// corresponding word from the first source to the destination. If the word 72732516Sgibbs/// in the second source is zero, clear the corresponding word in the 72832516Sgibbs/// destination. 72932516Sgibbs/// 73032516Sgibbs/// \headerfile <x86intrin.h> 73132516Sgibbs/// 732113347Smux/// This intrinsic corresponds to the \c PSIGNW instruction. 73332516Sgibbs/// 73432516Sgibbs/// \param __a 73532516Sgibbs/// A 64-bit integer vector containing the values to be copied. 73632516Sgibbs/// \param __b 73732516Sgibbs/// A 64-bit integer vector containing control words corresponding to 73832516Sgibbs/// positions in the destination. 739113347Smux/// \returns A 64-bit integer vector containing the resultant values. 74032516Sgibbsstatic __inline__ __m64 __DEFAULT_FN_ATTRS 741113347Smux_mm_sign_pi16(__m64 __a, __m64 __b) 74232516Sgibbs{ 74332516Sgibbs return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b); 74432516Sgibbs} 74532516Sgibbs 74632516Sgibbs/// \brief For each 32-bit integer in the first source operand, perform one of 74732516Sgibbs/// the following actions as specified by the second source operand: If the 74832516Sgibbs/// doubleword in the second source is negative, calculate the two's 74932516Sgibbs/// complement of the corresponding doubleword in the first source, and 75032516Sgibbs/// write that value to the destination. If the doubleword in the second 75132516Sgibbs/// source is positive, copy the corresponding doubleword from the first 752112346Smux/// source to the destination. If the doubleword in the second source is 753112346Smux/// zero, clear the corresponding doubleword in the destination. 754112346Smux/// 755112346Smux/// \headerfile <x86intrin.h> 756112346Smux/// 757112346Smux/// This intrinsic corresponds to the \c PSIGND instruction. 758112346Smux/// 759112346Smux/// \param __a 760112346Smux/// A 64-bit integer vector containing the values to be copied. 761112346Smux/// \param __b 762112346Smux/// A 64-bit integer vector containing two control doublewords corresponding 763112346Smux/// to positions in the destination. 764112346Smux/// \returns A 64-bit integer vector containing the resultant values. 765112346Smuxstatic __inline__ __m64 __DEFAULT_FN_ATTRS 766112346Smux_mm_sign_pi32(__m64 __a, __m64 __b) 76732516Sgibbs{ 76832516Sgibbs return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b); 76932516Sgibbs} 77032516Sgibbs 77132516Sgibbs#undef __DEFAULT_FN_ATTRS 77232516Sgibbs 77332516Sgibbs#endif /* __TMMINTRIN_H */ 77432516Sgibbs