tmmintrin.h revision 341825
1/*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------=== 2 * 3 * Permission is hereby granted, free of charge, to any person obtaining a copy 4 * of this software and associated documentation files (the "Software"), to deal 5 * in the Software without restriction, including without limitation the rights 6 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 * copies of the Software, and to permit persons to whom the Software is 8 * furnished to do so, subject to the following conditions: 9 * 10 * The above copyright notice and this permission notice shall be included in 11 * all copies or substantial portions of the Software. 12 * 13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 * THE SOFTWARE. 20 * 21 *===-----------------------------------------------------------------------=== 22 */ 23 24#ifndef __TMMINTRIN_H 25#define __TMMINTRIN_H 26 27#include <pmmintrin.h> 28 29/* Define the default attributes for the functions in this file. */ 30#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3"), __min_vector_width__(64))) 31#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,ssse3"), __min_vector_width__(64))) 32 33/// Computes the absolute value of each of the packed 8-bit signed 34/// integers in the source operand and stores the 8-bit unsigned integer 35/// results in the destination. 36/// 37/// \headerfile <x86intrin.h> 38/// 39/// This intrinsic corresponds to the \c PABSB instruction. 40/// 41/// \param __a 42/// A 64-bit vector of [8 x i8]. 43/// \returns A 64-bit integer vector containing the absolute values of the 44/// elements in the operand. 45static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 46_mm_abs_pi8(__m64 __a) 47{ 48 return (__m64)__builtin_ia32_pabsb((__v8qi)__a); 49} 50 51/// Computes the absolute value of each of the packed 8-bit signed 52/// integers in the source operand and stores the 8-bit unsigned integer 53/// results in the destination. 54/// 55/// \headerfile <x86intrin.h> 56/// 57/// This intrinsic corresponds to the \c VPABSB instruction. 58/// 59/// \param __a 60/// A 128-bit vector of [16 x i8]. 61/// \returns A 128-bit integer vector containing the absolute values of the 62/// elements in the operand. 63static __inline__ __m128i __DEFAULT_FN_ATTRS 64_mm_abs_epi8(__m128i __a) 65{ 66 return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a); 67} 68 69/// Computes the absolute value of each of the packed 16-bit signed 70/// integers in the source operand and stores the 16-bit unsigned integer 71/// results in the destination. 72/// 73/// \headerfile <x86intrin.h> 74/// 75/// This intrinsic corresponds to the \c PABSW instruction. 76/// 77/// \param __a 78/// A 64-bit vector of [4 x i16]. 79/// \returns A 64-bit integer vector containing the absolute values of the 80/// elements in the operand. 81static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 82_mm_abs_pi16(__m64 __a) 83{ 84 return (__m64)__builtin_ia32_pabsw((__v4hi)__a); 85} 86 87/// Computes the absolute value of each of the packed 16-bit signed 88/// integers in the source operand and stores the 16-bit unsigned integer 89/// results in the destination. 90/// 91/// \headerfile <x86intrin.h> 92/// 93/// This intrinsic corresponds to the \c VPABSW instruction. 94/// 95/// \param __a 96/// A 128-bit vector of [8 x i16]. 97/// \returns A 128-bit integer vector containing the absolute values of the 98/// elements in the operand. 99static __inline__ __m128i __DEFAULT_FN_ATTRS 100_mm_abs_epi16(__m128i __a) 101{ 102 return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a); 103} 104 105/// Computes the absolute value of each of the packed 32-bit signed 106/// integers in the source operand and stores the 32-bit unsigned integer 107/// results in the destination. 108/// 109/// \headerfile <x86intrin.h> 110/// 111/// This intrinsic corresponds to the \c PABSD instruction. 112/// 113/// \param __a 114/// A 64-bit vector of [2 x i32]. 115/// \returns A 64-bit integer vector containing the absolute values of the 116/// elements in the operand. 117static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 118_mm_abs_pi32(__m64 __a) 119{ 120 return (__m64)__builtin_ia32_pabsd((__v2si)__a); 121} 122 123/// Computes the absolute value of each of the packed 32-bit signed 124/// integers in the source operand and stores the 32-bit unsigned integer 125/// results in the destination. 126/// 127/// \headerfile <x86intrin.h> 128/// 129/// This intrinsic corresponds to the \c VPABSD instruction. 130/// 131/// \param __a 132/// A 128-bit vector of [4 x i32]. 133/// \returns A 128-bit integer vector containing the absolute values of the 134/// elements in the operand. 135static __inline__ __m128i __DEFAULT_FN_ATTRS 136_mm_abs_epi32(__m128i __a) 137{ 138 return (__m128i)__builtin_ia32_pabsd128((__v4si)__a); 139} 140 141/// Concatenates the two 128-bit integer vector operands, and 142/// right-shifts the result by the number of bytes specified in the immediate 143/// operand. 144/// 145/// \headerfile <x86intrin.h> 146/// 147/// \code 148/// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n); 149/// \endcode 150/// 151/// This intrinsic corresponds to the \c PALIGNR instruction. 152/// 153/// \param a 154/// A 128-bit vector of [16 x i8] containing one of the source operands. 155/// \param b 156/// A 128-bit vector of [16 x i8] containing one of the source operands. 157/// \param n 158/// An immediate operand specifying how many bytes to right-shift the result. 159/// \returns A 128-bit integer vector containing the concatenated right-shifted 160/// value. 161#define _mm_alignr_epi8(a, b, n) \ 162 (__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \ 163 (__v16qi)(__m128i)(b), (n)) 164 165/// Concatenates the two 64-bit integer vector operands, and right-shifts 166/// the result by the number of bytes specified in the immediate operand. 167/// 168/// \headerfile <x86intrin.h> 169/// 170/// \code 171/// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n); 172/// \endcode 173/// 174/// This intrinsic corresponds to the \c PALIGNR instruction. 175/// 176/// \param a 177/// A 64-bit vector of [8 x i8] containing one of the source operands. 178/// \param b 179/// A 64-bit vector of [8 x i8] containing one of the source operands. 180/// \param n 181/// An immediate operand specifying how many bytes to right-shift the result. 182/// \returns A 64-bit integer vector containing the concatenated right-shifted 183/// value. 184#define _mm_alignr_pi8(a, b, n) \ 185 (__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)) 186 187/// Horizontally adds the adjacent pairs of values contained in 2 packed 188/// 128-bit vectors of [8 x i16]. 189/// 190/// \headerfile <x86intrin.h> 191/// 192/// This intrinsic corresponds to the \c VPHADDW instruction. 193/// 194/// \param __a 195/// A 128-bit vector of [8 x i16] containing one of the source operands. The 196/// horizontal sums of the values are stored in the lower bits of the 197/// destination. 198/// \param __b 199/// A 128-bit vector of [8 x i16] containing one of the source operands. The 200/// horizontal sums of the values are stored in the upper bits of the 201/// destination. 202/// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of 203/// both operands. 204static __inline__ __m128i __DEFAULT_FN_ATTRS 205_mm_hadd_epi16(__m128i __a, __m128i __b) 206{ 207 return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b); 208} 209 210/// Horizontally adds the adjacent pairs of values contained in 2 packed 211/// 128-bit vectors of [4 x i32]. 212/// 213/// \headerfile <x86intrin.h> 214/// 215/// This intrinsic corresponds to the \c VPHADDD instruction. 216/// 217/// \param __a 218/// A 128-bit vector of [4 x i32] containing one of the source operands. The 219/// horizontal sums of the values are stored in the lower bits of the 220/// destination. 221/// \param __b 222/// A 128-bit vector of [4 x i32] containing one of the source operands. The 223/// horizontal sums of the values are stored in the upper bits of the 224/// destination. 225/// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of 226/// both operands. 227static __inline__ __m128i __DEFAULT_FN_ATTRS 228_mm_hadd_epi32(__m128i __a, __m128i __b) 229{ 230 return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b); 231} 232 233/// Horizontally adds the adjacent pairs of values contained in 2 packed 234/// 64-bit vectors of [4 x i16]. 235/// 236/// \headerfile <x86intrin.h> 237/// 238/// This intrinsic corresponds to the \c PHADDW instruction. 239/// 240/// \param __a 241/// A 64-bit vector of [4 x i16] containing one of the source operands. The 242/// horizontal sums of the values are stored in the lower bits of the 243/// destination. 244/// \param __b 245/// A 64-bit vector of [4 x i16] containing one of the source operands. The 246/// horizontal sums of the values are stored in the upper bits of the 247/// destination. 248/// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both 249/// operands. 250static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 251_mm_hadd_pi16(__m64 __a, __m64 __b) 252{ 253 return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b); 254} 255 256/// Horizontally adds the adjacent pairs of values contained in 2 packed 257/// 64-bit vectors of [2 x i32]. 258/// 259/// \headerfile <x86intrin.h> 260/// 261/// This intrinsic corresponds to the \c PHADDD instruction. 262/// 263/// \param __a 264/// A 64-bit vector of [2 x i32] containing one of the source operands. The 265/// horizontal sums of the values are stored in the lower bits of the 266/// destination. 267/// \param __b 268/// A 64-bit vector of [2 x i32] containing one of the source operands. The 269/// horizontal sums of the values are stored in the upper bits of the 270/// destination. 271/// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both 272/// operands. 273static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 274_mm_hadd_pi32(__m64 __a, __m64 __b) 275{ 276 return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b); 277} 278 279/// Horizontally adds the adjacent pairs of values contained in 2 packed 280/// 128-bit vectors of [8 x i16]. Positive sums greater than 0x7FFF are 281/// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to 282/// 0x8000. 283/// 284/// \headerfile <x86intrin.h> 285/// 286/// This intrinsic corresponds to the \c VPHADDSW instruction. 287/// 288/// \param __a 289/// A 128-bit vector of [8 x i16] containing one of the source operands. The 290/// horizontal sums of the values are stored in the lower bits of the 291/// destination. 292/// \param __b 293/// A 128-bit vector of [8 x i16] containing one of the source operands. The 294/// horizontal sums of the values are stored in the upper bits of the 295/// destination. 296/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated 297/// sums of both operands. 298static __inline__ __m128i __DEFAULT_FN_ATTRS 299_mm_hadds_epi16(__m128i __a, __m128i __b) 300{ 301 return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b); 302} 303 304/// Horizontally adds the adjacent pairs of values contained in 2 packed 305/// 64-bit vectors of [4 x i16]. Positive sums greater than 0x7FFF are 306/// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to 307/// 0x8000. 308/// 309/// \headerfile <x86intrin.h> 310/// 311/// This intrinsic corresponds to the \c PHADDSW instruction. 312/// 313/// \param __a 314/// A 64-bit vector of [4 x i16] containing one of the source operands. The 315/// horizontal sums of the values are stored in the lower bits of the 316/// destination. 317/// \param __b 318/// A 64-bit vector of [4 x i16] containing one of the source operands. The 319/// horizontal sums of the values are stored in the upper bits of the 320/// destination. 321/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated 322/// sums of both operands. 323static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 324_mm_hadds_pi16(__m64 __a, __m64 __b) 325{ 326 return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b); 327} 328 329/// Horizontally subtracts the adjacent pairs of values contained in 2 330/// packed 128-bit vectors of [8 x i16]. 331/// 332/// \headerfile <x86intrin.h> 333/// 334/// This intrinsic corresponds to the \c VPHSUBW instruction. 335/// 336/// \param __a 337/// A 128-bit vector of [8 x i16] containing one of the source operands. The 338/// horizontal differences between the values are stored in the lower bits of 339/// the destination. 340/// \param __b 341/// A 128-bit vector of [8 x i16] containing one of the source operands. The 342/// horizontal differences between the values are stored in the upper bits of 343/// the destination. 344/// \returns A 128-bit vector of [8 x i16] containing the horizontal differences 345/// of both operands. 346static __inline__ __m128i __DEFAULT_FN_ATTRS 347_mm_hsub_epi16(__m128i __a, __m128i __b) 348{ 349 return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b); 350} 351 352/// Horizontally subtracts the adjacent pairs of values contained in 2 353/// packed 128-bit vectors of [4 x i32]. 354/// 355/// \headerfile <x86intrin.h> 356/// 357/// This intrinsic corresponds to the \c VPHSUBD instruction. 358/// 359/// \param __a 360/// A 128-bit vector of [4 x i32] containing one of the source operands. The 361/// horizontal differences between the values are stored in the lower bits of 362/// the destination. 363/// \param __b 364/// A 128-bit vector of [4 x i32] containing one of the source operands. The 365/// horizontal differences between the values are stored in the upper bits of 366/// the destination. 367/// \returns A 128-bit vector of [4 x i32] containing the horizontal differences 368/// of both operands. 369static __inline__ __m128i __DEFAULT_FN_ATTRS 370_mm_hsub_epi32(__m128i __a, __m128i __b) 371{ 372 return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b); 373} 374 375/// Horizontally subtracts the adjacent pairs of values contained in 2 376/// packed 64-bit vectors of [4 x i16]. 377/// 378/// \headerfile <x86intrin.h> 379/// 380/// This intrinsic corresponds to the \c PHSUBW instruction. 381/// 382/// \param __a 383/// A 64-bit vector of [4 x i16] containing one of the source operands. The 384/// horizontal differences between the values are stored in the lower bits of 385/// the destination. 386/// \param __b 387/// A 64-bit vector of [4 x i16] containing one of the source operands. The 388/// horizontal differences between the values are stored in the upper bits of 389/// the destination. 390/// \returns A 64-bit vector of [4 x i16] containing the horizontal differences 391/// of both operands. 392static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 393_mm_hsub_pi16(__m64 __a, __m64 __b) 394{ 395 return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b); 396} 397 398/// Horizontally subtracts the adjacent pairs of values contained in 2 399/// packed 64-bit vectors of [2 x i32]. 400/// 401/// \headerfile <x86intrin.h> 402/// 403/// This intrinsic corresponds to the \c PHSUBD instruction. 404/// 405/// \param __a 406/// A 64-bit vector of [2 x i32] containing one of the source operands. The 407/// horizontal differences between the values are stored in the lower bits of 408/// the destination. 409/// \param __b 410/// A 64-bit vector of [2 x i32] containing one of the source operands. The 411/// horizontal differences between the values are stored in the upper bits of 412/// the destination. 413/// \returns A 64-bit vector of [2 x i32] containing the horizontal differences 414/// of both operands. 415static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 416_mm_hsub_pi32(__m64 __a, __m64 __b) 417{ 418 return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b); 419} 420 421/// Horizontally subtracts the adjacent pairs of values contained in 2 422/// packed 128-bit vectors of [8 x i16]. Positive differences greater than 423/// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are 424/// saturated to 0x8000. 425/// 426/// \headerfile <x86intrin.h> 427/// 428/// This intrinsic corresponds to the \c VPHSUBSW instruction. 429/// 430/// \param __a 431/// A 128-bit vector of [8 x i16] containing one of the source operands. The 432/// horizontal differences between the values are stored in the lower bits of 433/// the destination. 434/// \param __b 435/// A 128-bit vector of [8 x i16] containing one of the source operands. The 436/// horizontal differences between the values are stored in the upper bits of 437/// the destination. 438/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated 439/// differences of both operands. 440static __inline__ __m128i __DEFAULT_FN_ATTRS 441_mm_hsubs_epi16(__m128i __a, __m128i __b) 442{ 443 return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b); 444} 445 446/// Horizontally subtracts the adjacent pairs of values contained in 2 447/// packed 64-bit vectors of [4 x i16]. Positive differences greater than 448/// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are 449/// saturated to 0x8000. 450/// 451/// \headerfile <x86intrin.h> 452/// 453/// This intrinsic corresponds to the \c PHSUBSW instruction. 454/// 455/// \param __a 456/// A 64-bit vector of [4 x i16] containing one of the source operands. The 457/// horizontal differences between the values are stored in the lower bits of 458/// the destination. 459/// \param __b 460/// A 64-bit vector of [4 x i16] containing one of the source operands. The 461/// horizontal differences between the values are stored in the upper bits of 462/// the destination. 463/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated 464/// differences of both operands. 465static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 466_mm_hsubs_pi16(__m64 __a, __m64 __b) 467{ 468 return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b); 469} 470 471/// Multiplies corresponding pairs of packed 8-bit unsigned integer 472/// values contained in the first source operand and packed 8-bit signed 473/// integer values contained in the second source operand, adds pairs of 474/// contiguous products with signed saturation, and writes the 16-bit sums to 475/// the corresponding bits in the destination. 476/// 477/// For example, bits [7:0] of both operands are multiplied, bits [15:8] of 478/// both operands are multiplied, and the sum of both results is written to 479/// bits [15:0] of the destination. 480/// 481/// \headerfile <x86intrin.h> 482/// 483/// This intrinsic corresponds to the \c VPMADDUBSW instruction. 484/// 485/// \param __a 486/// A 128-bit integer vector containing the first source operand. 487/// \param __b 488/// A 128-bit integer vector containing the second source operand. 489/// \returns A 128-bit integer vector containing the sums of products of both 490/// operands: \n 491/// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n 492/// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n 493/// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n 494/// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) \n 495/// \a R4 := (\a __a8 * \a __b8) + (\a __a9 * \a __b9) \n 496/// \a R5 := (\a __a10 * \a __b10) + (\a __a11 * \a __b11) \n 497/// \a R6 := (\a __a12 * \a __b12) + (\a __a13 * \a __b13) \n 498/// \a R7 := (\a __a14 * \a __b14) + (\a __a15 * \a __b15) 499static __inline__ __m128i __DEFAULT_FN_ATTRS 500_mm_maddubs_epi16(__m128i __a, __m128i __b) 501{ 502 return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b); 503} 504 505/// Multiplies corresponding pairs of packed 8-bit unsigned integer 506/// values contained in the first source operand and packed 8-bit signed 507/// integer values contained in the second source operand, adds pairs of 508/// contiguous products with signed saturation, and writes the 16-bit sums to 509/// the corresponding bits in the destination. 510/// 511/// For example, bits [7:0] of both operands are multiplied, bits [15:8] of 512/// both operands are multiplied, and the sum of both results is written to 513/// bits [15:0] of the destination. 514/// 515/// \headerfile <x86intrin.h> 516/// 517/// This intrinsic corresponds to the \c PMADDUBSW instruction. 518/// 519/// \param __a 520/// A 64-bit integer vector containing the first source operand. 521/// \param __b 522/// A 64-bit integer vector containing the second source operand. 523/// \returns A 64-bit integer vector containing the sums of products of both 524/// operands: \n 525/// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n 526/// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n 527/// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n 528/// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) 529static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 530_mm_maddubs_pi16(__m64 __a, __m64 __b) 531{ 532 return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b); 533} 534 535/// Multiplies packed 16-bit signed integer values, truncates the 32-bit 536/// products to the 18 most significant bits by right-shifting, rounds the 537/// truncated value by adding 1, and writes bits [16:1] to the destination. 538/// 539/// \headerfile <x86intrin.h> 540/// 541/// This intrinsic corresponds to the \c VPMULHRSW instruction. 542/// 543/// \param __a 544/// A 128-bit vector of [8 x i16] containing one of the source operands. 545/// \param __b 546/// A 128-bit vector of [8 x i16] containing one of the source operands. 547/// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled 548/// products of both operands. 549static __inline__ __m128i __DEFAULT_FN_ATTRS 550_mm_mulhrs_epi16(__m128i __a, __m128i __b) 551{ 552 return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b); 553} 554 555/// Multiplies packed 16-bit signed integer values, truncates the 32-bit 556/// products to the 18 most significant bits by right-shifting, rounds the 557/// truncated value by adding 1, and writes bits [16:1] to the destination. 558/// 559/// \headerfile <x86intrin.h> 560/// 561/// This intrinsic corresponds to the \c PMULHRSW instruction. 562/// 563/// \param __a 564/// A 64-bit vector of [4 x i16] containing one of the source operands. 565/// \param __b 566/// A 64-bit vector of [4 x i16] containing one of the source operands. 567/// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled 568/// products of both operands. 569static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 570_mm_mulhrs_pi16(__m64 __a, __m64 __b) 571{ 572 return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b); 573} 574 575/// Copies the 8-bit integers from a 128-bit integer vector to the 576/// destination or clears 8-bit values in the destination, as specified by 577/// the second source operand. 578/// 579/// \headerfile <x86intrin.h> 580/// 581/// This intrinsic corresponds to the \c VPSHUFB instruction. 582/// 583/// \param __a 584/// A 128-bit integer vector containing the values to be copied. 585/// \param __b 586/// A 128-bit integer vector containing control bytes corresponding to 587/// positions in the destination: 588/// Bit 7: \n 589/// 1: Clear the corresponding byte in the destination. \n 590/// 0: Copy the selected source byte to the corresponding byte in the 591/// destination. \n 592/// Bits [6:4] Reserved. \n 593/// Bits [3:0] select the source byte to be copied. 594/// \returns A 128-bit integer vector containing the copied or cleared values. 595static __inline__ __m128i __DEFAULT_FN_ATTRS 596_mm_shuffle_epi8(__m128i __a, __m128i __b) 597{ 598 return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b); 599} 600 601/// Copies the 8-bit integers from a 64-bit integer vector to the 602/// destination or clears 8-bit values in the destination, as specified by 603/// the second source operand. 604/// 605/// \headerfile <x86intrin.h> 606/// 607/// This intrinsic corresponds to the \c PSHUFB instruction. 608/// 609/// \param __a 610/// A 64-bit integer vector containing the values to be copied. 611/// \param __b 612/// A 64-bit integer vector containing control bytes corresponding to 613/// positions in the destination: 614/// Bit 7: \n 615/// 1: Clear the corresponding byte in the destination. \n 616/// 0: Copy the selected source byte to the corresponding byte in the 617/// destination. \n 618/// Bits [3:0] select the source byte to be copied. 619/// \returns A 64-bit integer vector containing the copied or cleared values. 620static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 621_mm_shuffle_pi8(__m64 __a, __m64 __b) 622{ 623 return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b); 624} 625 626/// For each 8-bit integer in the first source operand, perform one of 627/// the following actions as specified by the second source operand. 628/// 629/// If the byte in the second source is negative, calculate the two's 630/// complement of the corresponding byte in the first source, and write that 631/// value to the destination. If the byte in the second source is positive, 632/// copy the corresponding byte from the first source to the destination. If 633/// the byte in the second source is zero, clear the corresponding byte in 634/// the destination. 635/// 636/// \headerfile <x86intrin.h> 637/// 638/// This intrinsic corresponds to the \c VPSIGNB instruction. 639/// 640/// \param __a 641/// A 128-bit integer vector containing the values to be copied. 642/// \param __b 643/// A 128-bit integer vector containing control bytes corresponding to 644/// positions in the destination. 645/// \returns A 128-bit integer vector containing the resultant values. 646static __inline__ __m128i __DEFAULT_FN_ATTRS 647_mm_sign_epi8(__m128i __a, __m128i __b) 648{ 649 return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b); 650} 651 652/// For each 16-bit integer in the first source operand, perform one of 653/// the following actions as specified by the second source operand. 654/// 655/// If the word in the second source is negative, calculate the two's 656/// complement of the corresponding word in the first source, and write that 657/// value to the destination. If the word in the second source is positive, 658/// copy the corresponding word from the first source to the destination. If 659/// the word in the second source is zero, clear the corresponding word in 660/// the destination. 661/// 662/// \headerfile <x86intrin.h> 663/// 664/// This intrinsic corresponds to the \c VPSIGNW instruction. 665/// 666/// \param __a 667/// A 128-bit integer vector containing the values to be copied. 668/// \param __b 669/// A 128-bit integer vector containing control words corresponding to 670/// positions in the destination. 671/// \returns A 128-bit integer vector containing the resultant values. 672static __inline__ __m128i __DEFAULT_FN_ATTRS 673_mm_sign_epi16(__m128i __a, __m128i __b) 674{ 675 return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b); 676} 677 678/// For each 32-bit integer in the first source operand, perform one of 679/// the following actions as specified by the second source operand. 680/// 681/// If the doubleword in the second source is negative, calculate the two's 682/// complement of the corresponding word in the first source, and write that 683/// value to the destination. If the doubleword in the second source is 684/// positive, copy the corresponding word from the first source to the 685/// destination. If the doubleword in the second source is zero, clear the 686/// corresponding word in the destination. 687/// 688/// \headerfile <x86intrin.h> 689/// 690/// This intrinsic corresponds to the \c VPSIGND instruction. 691/// 692/// \param __a 693/// A 128-bit integer vector containing the values to be copied. 694/// \param __b 695/// A 128-bit integer vector containing control doublewords corresponding to 696/// positions in the destination. 697/// \returns A 128-bit integer vector containing the resultant values. 698static __inline__ __m128i __DEFAULT_FN_ATTRS 699_mm_sign_epi32(__m128i __a, __m128i __b) 700{ 701 return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b); 702} 703 704/// For each 8-bit integer in the first source operand, perform one of 705/// the following actions as specified by the second source operand. 706/// 707/// If the byte in the second source is negative, calculate the two's 708/// complement of the corresponding byte in the first source, and write that 709/// value to the destination. If the byte in the second source is positive, 710/// copy the corresponding byte from the first source to the destination. If 711/// the byte in the second source is zero, clear the corresponding byte in 712/// the destination. 713/// 714/// \headerfile <x86intrin.h> 715/// 716/// This intrinsic corresponds to the \c PSIGNB instruction. 717/// 718/// \param __a 719/// A 64-bit integer vector containing the values to be copied. 720/// \param __b 721/// A 64-bit integer vector containing control bytes corresponding to 722/// positions in the destination. 723/// \returns A 64-bit integer vector containing the resultant values. 724static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 725_mm_sign_pi8(__m64 __a, __m64 __b) 726{ 727 return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b); 728} 729 730/// For each 16-bit integer in the first source operand, perform one of 731/// the following actions as specified by the second source operand. 732/// 733/// If the word in the second source is negative, calculate the two's 734/// complement of the corresponding word in the first source, and write that 735/// value to the destination. If the word in the second source is positive, 736/// copy the corresponding word from the first source to the destination. If 737/// the word in the second source is zero, clear the corresponding word in 738/// the destination. 739/// 740/// \headerfile <x86intrin.h> 741/// 742/// This intrinsic corresponds to the \c PSIGNW instruction. 743/// 744/// \param __a 745/// A 64-bit integer vector containing the values to be copied. 746/// \param __b 747/// A 64-bit integer vector containing control words corresponding to 748/// positions in the destination. 749/// \returns A 64-bit integer vector containing the resultant values. 750static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 751_mm_sign_pi16(__m64 __a, __m64 __b) 752{ 753 return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b); 754} 755 756/// For each 32-bit integer in the first source operand, perform one of 757/// the following actions as specified by the second source operand. 758/// 759/// If the doubleword in the second source is negative, calculate the two's 760/// complement of the corresponding doubleword in the first source, and 761/// write that value to the destination. If the doubleword in the second 762/// source is positive, copy the corresponding doubleword from the first 763/// source to the destination. If the doubleword in the second source is 764/// zero, clear the corresponding doubleword in the destination. 765/// 766/// \headerfile <x86intrin.h> 767/// 768/// This intrinsic corresponds to the \c PSIGND instruction. 769/// 770/// \param __a 771/// A 64-bit integer vector containing the values to be copied. 772/// \param __b 773/// A 64-bit integer vector containing two control doublewords corresponding 774/// to positions in the destination. 775/// \returns A 64-bit integer vector containing the resultant values. 776static __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 777_mm_sign_pi32(__m64 __a, __m64 __b) 778{ 779 return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b); 780} 781 782#undef __DEFAULT_FN_ATTRS 783#undef __DEFAULT_FN_ATTRS_MMX 784 785#endif /* __TMMINTRIN_H */ 786