1193326Sed/*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------=== 2193326Sed * 3353358Sdim * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim * See https://llvm.org/LICENSE.txt for license information. 5353358Sdim * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6193326Sed * 7193326Sed *===-----------------------------------------------------------------------=== 8193326Sed */ 9296417Sdim 10193326Sed#ifndef __TMMINTRIN_H 11193326Sed#define __TMMINTRIN_H 12193326Sed 13193326Sed#include <pmmintrin.h> 14193326Sed 15288943Sdim/* Define the default attributes for the functions in this file. */ 16341825Sdim#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3"), __min_vector_width__(64))) 17341825Sdim#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,ssse3"), __min_vector_width__(64))) 18288943Sdim 19341825Sdim/// Computes the absolute value of each of the packed 8-bit signed 20309124Sdim/// integers in the source operand and stores the 8-bit unsigned integer 21309124Sdim/// results in the destination. 22309124Sdim/// 23309124Sdim/// \headerfile <x86intrin.h> 24309124Sdim/// 25309124Sdim/// This intrinsic corresponds to the \c PABSB instruction. 26309124Sdim/// 27309124Sdim/// \param __a 28309124Sdim/// A 64-bit vector of [8 x i8]. 29309124Sdim/// \returns A 64-bit integer vector containing the absolute values of the 30309124Sdim/// elements in the operand. 31341825Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 32249423Sdim_mm_abs_pi8(__m64 __a) 33193326Sed{ 34249423Sdim return (__m64)__builtin_ia32_pabsb((__v8qi)__a); 35193326Sed} 36193326Sed 37341825Sdim/// Computes the absolute value of each of the packed 8-bit signed 38309124Sdim/// integers in the source operand and stores the 8-bit unsigned integer 39309124Sdim/// results in the destination. 40309124Sdim/// 41309124Sdim/// \headerfile <x86intrin.h> 42309124Sdim/// 43309124Sdim/// This intrinsic corresponds to the \c VPABSB instruction. 44309124Sdim/// 45309124Sdim/// \param __a 46309124Sdim/// A 128-bit vector of [16 x i8]. 47309124Sdim/// \returns A 128-bit integer vector containing the absolute values of the 48309124Sdim/// elements in the operand. 49288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 50249423Sdim_mm_abs_epi8(__m128i __a) 51193326Sed{ 52249423Sdim return (__m128i)__builtin_ia32_pabsb128((__v16qi)__a); 53193326Sed} 54193326Sed 55341825Sdim/// Computes the absolute value of each of the packed 16-bit signed 56309124Sdim/// integers in the source operand and stores the 16-bit unsigned integer 57309124Sdim/// results in the destination. 58309124Sdim/// 59309124Sdim/// \headerfile <x86intrin.h> 60309124Sdim/// 61309124Sdim/// This intrinsic corresponds to the \c PABSW instruction. 62309124Sdim/// 63309124Sdim/// \param __a 64309124Sdim/// A 64-bit vector of [4 x i16]. 65309124Sdim/// \returns A 64-bit integer vector containing the absolute values of the 66309124Sdim/// elements in the operand. 67341825Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 68249423Sdim_mm_abs_pi16(__m64 __a) 69193326Sed{ 70249423Sdim return (__m64)__builtin_ia32_pabsw((__v4hi)__a); 71193326Sed} 72193326Sed 73341825Sdim/// Computes the absolute value of each of the packed 16-bit signed 74309124Sdim/// integers in the source operand and stores the 16-bit unsigned integer 75309124Sdim/// results in the destination. 76309124Sdim/// 77309124Sdim/// \headerfile <x86intrin.h> 78309124Sdim/// 79309124Sdim/// This intrinsic corresponds to the \c VPABSW instruction. 80309124Sdim/// 81309124Sdim/// \param __a 82309124Sdim/// A 128-bit vector of [8 x i16]. 83309124Sdim/// \returns A 128-bit integer vector containing the absolute values of the 84309124Sdim/// elements in the operand. 85288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 86249423Sdim_mm_abs_epi16(__m128i __a) 87193326Sed{ 88249423Sdim return (__m128i)__builtin_ia32_pabsw128((__v8hi)__a); 89193326Sed} 90193326Sed 91341825Sdim/// Computes the absolute value of each of the packed 32-bit signed 92309124Sdim/// integers in the source operand and stores the 32-bit unsigned integer 93309124Sdim/// results in the destination. 94309124Sdim/// 95309124Sdim/// \headerfile <x86intrin.h> 96309124Sdim/// 97309124Sdim/// This intrinsic corresponds to the \c PABSD instruction. 98309124Sdim/// 99309124Sdim/// \param __a 100309124Sdim/// A 64-bit vector of [2 x i32]. 101309124Sdim/// \returns A 64-bit integer vector containing the absolute values of the 102309124Sdim/// elements in the operand. 103341825Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 104249423Sdim_mm_abs_pi32(__m64 __a) 105193326Sed{ 106249423Sdim return (__m64)__builtin_ia32_pabsd((__v2si)__a); 107193326Sed} 108193326Sed 109341825Sdim/// Computes the absolute value of each of the packed 32-bit signed 110309124Sdim/// integers in the source operand and stores the 32-bit unsigned integer 111309124Sdim/// results in the destination. 112309124Sdim/// 113309124Sdim/// \headerfile <x86intrin.h> 114309124Sdim/// 115309124Sdim/// This intrinsic corresponds to the \c VPABSD instruction. 116309124Sdim/// 117309124Sdim/// \param __a 118309124Sdim/// A 128-bit vector of [4 x i32]. 119309124Sdim/// \returns A 128-bit integer vector containing the absolute values of the 120309124Sdim/// elements in the operand. 121288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 122249423Sdim_mm_abs_epi32(__m128i __a) 123193326Sed{ 124249423Sdim return (__m128i)__builtin_ia32_pabsd128((__v4si)__a); 125193326Sed} 126193326Sed 127341825Sdim/// Concatenates the two 128-bit integer vector operands, and 128309124Sdim/// right-shifts the result by the number of bytes specified in the immediate 129309124Sdim/// operand. 130309124Sdim/// 131309124Sdim/// \headerfile <x86intrin.h> 132309124Sdim/// 133309124Sdim/// \code 134309124Sdim/// __m128i _mm_alignr_epi8(__m128i a, __m128i b, const int n); 135309124Sdim/// \endcode 136309124Sdim/// 137309124Sdim/// This intrinsic corresponds to the \c PALIGNR instruction. 138309124Sdim/// 139309124Sdim/// \param a 140309124Sdim/// A 128-bit vector of [16 x i8] containing one of the source operands. 141309124Sdim/// \param b 142309124Sdim/// A 128-bit vector of [16 x i8] containing one of the source operands. 143309124Sdim/// \param n 144309124Sdim/// An immediate operand specifying how many bytes to right-shift the result. 145309124Sdim/// \returns A 128-bit integer vector containing the concatenated right-shifted 146309124Sdim/// value. 147341825Sdim#define _mm_alignr_epi8(a, b, n) \ 148296417Sdim (__m128i)__builtin_ia32_palignr128((__v16qi)(__m128i)(a), \ 149341825Sdim (__v16qi)(__m128i)(b), (n)) 150193326Sed 151341825Sdim/// Concatenates the two 64-bit integer vector operands, and right-shifts 152309124Sdim/// the result by the number of bytes specified in the immediate operand. 153309124Sdim/// 154309124Sdim/// \headerfile <x86intrin.h> 155309124Sdim/// 156309124Sdim/// \code 157309124Sdim/// __m64 _mm_alignr_pi8(__m64 a, __m64 b, const int n); 158309124Sdim/// \endcode 159309124Sdim/// 160309124Sdim/// This intrinsic corresponds to the \c PALIGNR instruction. 161309124Sdim/// 162309124Sdim/// \param a 163309124Sdim/// A 64-bit vector of [8 x i8] containing one of the source operands. 164309124Sdim/// \param b 165309124Sdim/// A 64-bit vector of [8 x i8] containing one of the source operands. 166309124Sdim/// \param n 167309124Sdim/// An immediate operand specifying how many bytes to right-shift the result. 168309124Sdim/// \returns A 64-bit integer vector containing the concatenated right-shifted 169309124Sdim/// value. 170341825Sdim#define _mm_alignr_pi8(a, b, n) \ 171341825Sdim (__m64)__builtin_ia32_palignr((__v8qi)(__m64)(a), (__v8qi)(__m64)(b), (n)) 172234353Sdim 173341825Sdim/// Horizontally adds the adjacent pairs of values contained in 2 packed 174309124Sdim/// 128-bit vectors of [8 x i16]. 175309124Sdim/// 176309124Sdim/// \headerfile <x86intrin.h> 177309124Sdim/// 178309124Sdim/// This intrinsic corresponds to the \c VPHADDW instruction. 179309124Sdim/// 180309124Sdim/// \param __a 181309124Sdim/// A 128-bit vector of [8 x i16] containing one of the source operands. The 182309124Sdim/// horizontal sums of the values are stored in the lower bits of the 183309124Sdim/// destination. 184309124Sdim/// \param __b 185309124Sdim/// A 128-bit vector of [8 x i16] containing one of the source operands. The 186309124Sdim/// horizontal sums of the values are stored in the upper bits of the 187309124Sdim/// destination. 188309124Sdim/// \returns A 128-bit vector of [8 x i16] containing the horizontal sums of 189309124Sdim/// both operands. 190288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 191249423Sdim_mm_hadd_epi16(__m128i __a, __m128i __b) 192193326Sed{ 193249423Sdim return (__m128i)__builtin_ia32_phaddw128((__v8hi)__a, (__v8hi)__b); 194193326Sed} 195193326Sed 196341825Sdim/// Horizontally adds the adjacent pairs of values contained in 2 packed 197309124Sdim/// 128-bit vectors of [4 x i32]. 198309124Sdim/// 199309124Sdim/// \headerfile <x86intrin.h> 200309124Sdim/// 201309124Sdim/// This intrinsic corresponds to the \c VPHADDD instruction. 202309124Sdim/// 203309124Sdim/// \param __a 204309124Sdim/// A 128-bit vector of [4 x i32] containing one of the source operands. The 205309124Sdim/// horizontal sums of the values are stored in the lower bits of the 206309124Sdim/// destination. 207309124Sdim/// \param __b 208309124Sdim/// A 128-bit vector of [4 x i32] containing one of the source operands. The 209309124Sdim/// horizontal sums of the values are stored in the upper bits of the 210309124Sdim/// destination. 211309124Sdim/// \returns A 128-bit vector of [4 x i32] containing the horizontal sums of 212309124Sdim/// both operands. 213288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 214249423Sdim_mm_hadd_epi32(__m128i __a, __m128i __b) 215193326Sed{ 216249423Sdim return (__m128i)__builtin_ia32_phaddd128((__v4si)__a, (__v4si)__b); 217193326Sed} 218193326Sed 219341825Sdim/// Horizontally adds the adjacent pairs of values contained in 2 packed 220309124Sdim/// 64-bit vectors of [4 x i16]. 221309124Sdim/// 222309124Sdim/// \headerfile <x86intrin.h> 223309124Sdim/// 224309124Sdim/// This intrinsic corresponds to the \c PHADDW instruction. 225309124Sdim/// 226309124Sdim/// \param __a 227309124Sdim/// A 64-bit vector of [4 x i16] containing one of the source operands. The 228309124Sdim/// horizontal sums of the values are stored in the lower bits of the 229309124Sdim/// destination. 230309124Sdim/// \param __b 231309124Sdim/// A 64-bit vector of [4 x i16] containing one of the source operands. The 232309124Sdim/// horizontal sums of the values are stored in the upper bits of the 233309124Sdim/// destination. 234309124Sdim/// \returns A 64-bit vector of [4 x i16] containing the horizontal sums of both 235309124Sdim/// operands. 236341825Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 237249423Sdim_mm_hadd_pi16(__m64 __a, __m64 __b) 238193326Sed{ 239249423Sdim return (__m64)__builtin_ia32_phaddw((__v4hi)__a, (__v4hi)__b); 240193326Sed} 241193326Sed 242341825Sdim/// Horizontally adds the adjacent pairs of values contained in 2 packed 243309124Sdim/// 64-bit vectors of [2 x i32]. 244309124Sdim/// 245309124Sdim/// \headerfile <x86intrin.h> 246309124Sdim/// 247309124Sdim/// This intrinsic corresponds to the \c PHADDD instruction. 248309124Sdim/// 249309124Sdim/// \param __a 250309124Sdim/// A 64-bit vector of [2 x i32] containing one of the source operands. The 251309124Sdim/// horizontal sums of the values are stored in the lower bits of the 252309124Sdim/// destination. 253309124Sdim/// \param __b 254309124Sdim/// A 64-bit vector of [2 x i32] containing one of the source operands. The 255309124Sdim/// horizontal sums of the values are stored in the upper bits of the 256309124Sdim/// destination. 257309124Sdim/// \returns A 64-bit vector of [2 x i32] containing the horizontal sums of both 258309124Sdim/// operands. 259341825Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 260249423Sdim_mm_hadd_pi32(__m64 __a, __m64 __b) 261193326Sed{ 262249423Sdim return (__m64)__builtin_ia32_phaddd((__v2si)__a, (__v2si)__b); 263193326Sed} 264193326Sed 265341825Sdim/// Horizontally adds the adjacent pairs of values contained in 2 packed 266341825Sdim/// 128-bit vectors of [8 x i16]. Positive sums greater than 0x7FFF are 267341825Sdim/// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to 268341825Sdim/// 0x8000. 269309124Sdim/// 270309124Sdim/// \headerfile <x86intrin.h> 271309124Sdim/// 272309124Sdim/// This intrinsic corresponds to the \c VPHADDSW instruction. 273309124Sdim/// 274309124Sdim/// \param __a 275309124Sdim/// A 128-bit vector of [8 x i16] containing one of the source operands. The 276309124Sdim/// horizontal sums of the values are stored in the lower bits of the 277309124Sdim/// destination. 278309124Sdim/// \param __b 279309124Sdim/// A 128-bit vector of [8 x i16] containing one of the source operands. The 280309124Sdim/// horizontal sums of the values are stored in the upper bits of the 281309124Sdim/// destination. 282309124Sdim/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated 283309124Sdim/// sums of both operands. 284288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 285249423Sdim_mm_hadds_epi16(__m128i __a, __m128i __b) 286193326Sed{ 287249423Sdim return (__m128i)__builtin_ia32_phaddsw128((__v8hi)__a, (__v8hi)__b); 288193326Sed} 289193326Sed 290341825Sdim/// Horizontally adds the adjacent pairs of values contained in 2 packed 291341825Sdim/// 64-bit vectors of [4 x i16]. Positive sums greater than 0x7FFF are 292341825Sdim/// saturated to 0x7FFF. Negative sums less than 0x8000 are saturated to 293341825Sdim/// 0x8000. 294309124Sdim/// 295309124Sdim/// \headerfile <x86intrin.h> 296309124Sdim/// 297309124Sdim/// This intrinsic corresponds to the \c PHADDSW instruction. 298309124Sdim/// 299309124Sdim/// \param __a 300309124Sdim/// A 64-bit vector of [4 x i16] containing one of the source operands. The 301309124Sdim/// horizontal sums of the values are stored in the lower bits of the 302309124Sdim/// destination. 303309124Sdim/// \param __b 304309124Sdim/// A 64-bit vector of [4 x i16] containing one of the source operands. The 305309124Sdim/// horizontal sums of the values are stored in the upper bits of the 306309124Sdim/// destination. 307309124Sdim/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated 308309124Sdim/// sums of both operands. 309341825Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 310249423Sdim_mm_hadds_pi16(__m64 __a, __m64 __b) 311193326Sed{ 312249423Sdim return (__m64)__builtin_ia32_phaddsw((__v4hi)__a, (__v4hi)__b); 313193326Sed} 314193326Sed 315341825Sdim/// Horizontally subtracts the adjacent pairs of values contained in 2 316309124Sdim/// packed 128-bit vectors of [8 x i16]. 317309124Sdim/// 318309124Sdim/// \headerfile <x86intrin.h> 319309124Sdim/// 320309124Sdim/// This intrinsic corresponds to the \c VPHSUBW instruction. 321309124Sdim/// 322309124Sdim/// \param __a 323309124Sdim/// A 128-bit vector of [8 x i16] containing one of the source operands. The 324309124Sdim/// horizontal differences between the values are stored in the lower bits of 325309124Sdim/// the destination. 326309124Sdim/// \param __b 327309124Sdim/// A 128-bit vector of [8 x i16] containing one of the source operands. The 328309124Sdim/// horizontal differences between the values are stored in the upper bits of 329309124Sdim/// the destination. 330309124Sdim/// \returns A 128-bit vector of [8 x i16] containing the horizontal differences 331309124Sdim/// of both operands. 332288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 333249423Sdim_mm_hsub_epi16(__m128i __a, __m128i __b) 334193326Sed{ 335249423Sdim return (__m128i)__builtin_ia32_phsubw128((__v8hi)__a, (__v8hi)__b); 336193326Sed} 337193326Sed 338341825Sdim/// Horizontally subtracts the adjacent pairs of values contained in 2 339309124Sdim/// packed 128-bit vectors of [4 x i32]. 340309124Sdim/// 341309124Sdim/// \headerfile <x86intrin.h> 342309124Sdim/// 343309124Sdim/// This intrinsic corresponds to the \c VPHSUBD instruction. 344309124Sdim/// 345309124Sdim/// \param __a 346309124Sdim/// A 128-bit vector of [4 x i32] containing one of the source operands. The 347309124Sdim/// horizontal differences between the values are stored in the lower bits of 348309124Sdim/// the destination. 349309124Sdim/// \param __b 350309124Sdim/// A 128-bit vector of [4 x i32] containing one of the source operands. The 351309124Sdim/// horizontal differences between the values are stored in the upper bits of 352309124Sdim/// the destination. 353309124Sdim/// \returns A 128-bit vector of [4 x i32] containing the horizontal differences 354309124Sdim/// of both operands. 355288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 356249423Sdim_mm_hsub_epi32(__m128i __a, __m128i __b) 357193326Sed{ 358249423Sdim return (__m128i)__builtin_ia32_phsubd128((__v4si)__a, (__v4si)__b); 359193326Sed} 360193326Sed 361341825Sdim/// Horizontally subtracts the adjacent pairs of values contained in 2 362309124Sdim/// packed 64-bit vectors of [4 x i16]. 363309124Sdim/// 364309124Sdim/// \headerfile <x86intrin.h> 365309124Sdim/// 366309124Sdim/// This intrinsic corresponds to the \c PHSUBW instruction. 367309124Sdim/// 368309124Sdim/// \param __a 369309124Sdim/// A 64-bit vector of [4 x i16] containing one of the source operands. The 370309124Sdim/// horizontal differences between the values are stored in the lower bits of 371309124Sdim/// the destination. 372309124Sdim/// \param __b 373309124Sdim/// A 64-bit vector of [4 x i16] containing one of the source operands. The 374309124Sdim/// horizontal differences between the values are stored in the upper bits of 375309124Sdim/// the destination. 376309124Sdim/// \returns A 64-bit vector of [4 x i16] containing the horizontal differences 377309124Sdim/// of both operands. 378341825Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 379249423Sdim_mm_hsub_pi16(__m64 __a, __m64 __b) 380193326Sed{ 381249423Sdim return (__m64)__builtin_ia32_phsubw((__v4hi)__a, (__v4hi)__b); 382193326Sed} 383193326Sed 384341825Sdim/// Horizontally subtracts the adjacent pairs of values contained in 2 385309124Sdim/// packed 64-bit vectors of [2 x i32]. 386309124Sdim/// 387309124Sdim/// \headerfile <x86intrin.h> 388309124Sdim/// 389309124Sdim/// This intrinsic corresponds to the \c PHSUBD instruction. 390309124Sdim/// 391309124Sdim/// \param __a 392309124Sdim/// A 64-bit vector of [2 x i32] containing one of the source operands. The 393309124Sdim/// horizontal differences between the values are stored in the lower bits of 394309124Sdim/// the destination. 395309124Sdim/// \param __b 396309124Sdim/// A 64-bit vector of [2 x i32] containing one of the source operands. The 397309124Sdim/// horizontal differences between the values are stored in the upper bits of 398309124Sdim/// the destination. 399309124Sdim/// \returns A 64-bit vector of [2 x i32] containing the horizontal differences 400309124Sdim/// of both operands. 401341825Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 402249423Sdim_mm_hsub_pi32(__m64 __a, __m64 __b) 403193326Sed{ 404249423Sdim return (__m64)__builtin_ia32_phsubd((__v2si)__a, (__v2si)__b); 405193326Sed} 406193326Sed 407341825Sdim/// Horizontally subtracts the adjacent pairs of values contained in 2 408309124Sdim/// packed 128-bit vectors of [8 x i16]. Positive differences greater than 409341825Sdim/// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are 410341825Sdim/// saturated to 0x8000. 411309124Sdim/// 412309124Sdim/// \headerfile <x86intrin.h> 413309124Sdim/// 414309124Sdim/// This intrinsic corresponds to the \c VPHSUBSW instruction. 415309124Sdim/// 416309124Sdim/// \param __a 417309124Sdim/// A 128-bit vector of [8 x i16] containing one of the source operands. The 418309124Sdim/// horizontal differences between the values are stored in the lower bits of 419309124Sdim/// the destination. 420309124Sdim/// \param __b 421309124Sdim/// A 128-bit vector of [8 x i16] containing one of the source operands. The 422309124Sdim/// horizontal differences between the values are stored in the upper bits of 423309124Sdim/// the destination. 424309124Sdim/// \returns A 128-bit vector of [8 x i16] containing the horizontal saturated 425309124Sdim/// differences of both operands. 426288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 427249423Sdim_mm_hsubs_epi16(__m128i __a, __m128i __b) 428193326Sed{ 429249423Sdim return (__m128i)__builtin_ia32_phsubsw128((__v8hi)__a, (__v8hi)__b); 430193326Sed} 431193326Sed 432341825Sdim/// Horizontally subtracts the adjacent pairs of values contained in 2 433309124Sdim/// packed 64-bit vectors of [4 x i16]. Positive differences greater than 434341825Sdim/// 0x7FFF are saturated to 0x7FFF. Negative differences less than 0x8000 are 435341825Sdim/// saturated to 0x8000. 436309124Sdim/// 437309124Sdim/// \headerfile <x86intrin.h> 438309124Sdim/// 439309124Sdim/// This intrinsic corresponds to the \c PHSUBSW instruction. 440309124Sdim/// 441309124Sdim/// \param __a 442309124Sdim/// A 64-bit vector of [4 x i16] containing one of the source operands. The 443309124Sdim/// horizontal differences between the values are stored in the lower bits of 444309124Sdim/// the destination. 445309124Sdim/// \param __b 446309124Sdim/// A 64-bit vector of [4 x i16] containing one of the source operands. The 447309124Sdim/// horizontal differences between the values are stored in the upper bits of 448309124Sdim/// the destination. 449309124Sdim/// \returns A 64-bit vector of [4 x i16] containing the horizontal saturated 450309124Sdim/// differences of both operands. 451341825Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 452249423Sdim_mm_hsubs_pi16(__m64 __a, __m64 __b) 453193326Sed{ 454249423Sdim return (__m64)__builtin_ia32_phsubsw((__v4hi)__a, (__v4hi)__b); 455193326Sed} 456193326Sed 457341825Sdim/// Multiplies corresponding pairs of packed 8-bit unsigned integer 458309124Sdim/// values contained in the first source operand and packed 8-bit signed 459309124Sdim/// integer values contained in the second source operand, adds pairs of 460309124Sdim/// contiguous products with signed saturation, and writes the 16-bit sums to 461321369Sdim/// the corresponding bits in the destination. 462309124Sdim/// 463321369Sdim/// For example, bits [7:0] of both operands are multiplied, bits [15:8] of 464321369Sdim/// both operands are multiplied, and the sum of both results is written to 465321369Sdim/// bits [15:0] of the destination. 466321369Sdim/// 467309124Sdim/// \headerfile <x86intrin.h> 468309124Sdim/// 469309124Sdim/// This intrinsic corresponds to the \c VPMADDUBSW instruction. 470309124Sdim/// 471309124Sdim/// \param __a 472309124Sdim/// A 128-bit integer vector containing the first source operand. 473309124Sdim/// \param __b 474309124Sdim/// A 128-bit integer vector containing the second source operand. 475309124Sdim/// \returns A 128-bit integer vector containing the sums of products of both 476314564Sdim/// operands: \n 477314564Sdim/// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n 478314564Sdim/// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n 479314564Sdim/// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n 480314564Sdim/// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) \n 481314564Sdim/// \a R4 := (\a __a8 * \a __b8) + (\a __a9 * \a __b9) \n 482314564Sdim/// \a R5 := (\a __a10 * \a __b10) + (\a __a11 * \a __b11) \n 483314564Sdim/// \a R6 := (\a __a12 * \a __b12) + (\a __a13 * \a __b13) \n 484314564Sdim/// \a R7 := (\a __a14 * \a __b14) + (\a __a15 * \a __b15) 485288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 486249423Sdim_mm_maddubs_epi16(__m128i __a, __m128i __b) 487193326Sed{ 488249423Sdim return (__m128i)__builtin_ia32_pmaddubsw128((__v16qi)__a, (__v16qi)__b); 489193326Sed} 490193326Sed 491341825Sdim/// Multiplies corresponding pairs of packed 8-bit unsigned integer 492309124Sdim/// values contained in the first source operand and packed 8-bit signed 493309124Sdim/// integer values contained in the second source operand, adds pairs of 494309124Sdim/// contiguous products with signed saturation, and writes the 16-bit sums to 495321369Sdim/// the corresponding bits in the destination. 496309124Sdim/// 497321369Sdim/// For example, bits [7:0] of both operands are multiplied, bits [15:8] of 498321369Sdim/// both operands are multiplied, and the sum of both results is written to 499321369Sdim/// bits [15:0] of the destination. 500321369Sdim/// 501309124Sdim/// \headerfile <x86intrin.h> 502309124Sdim/// 503309124Sdim/// This intrinsic corresponds to the \c PMADDUBSW instruction. 504309124Sdim/// 505309124Sdim/// \param __a 506309124Sdim/// A 64-bit integer vector containing the first source operand. 507309124Sdim/// \param __b 508309124Sdim/// A 64-bit integer vector containing the second source operand. 509309124Sdim/// \returns A 64-bit integer vector containing the sums of products of both 510314564Sdim/// operands: \n 511314564Sdim/// \a R0 := (\a __a0 * \a __b0) + (\a __a1 * \a __b1) \n 512314564Sdim/// \a R1 := (\a __a2 * \a __b2) + (\a __a3 * \a __b3) \n 513314564Sdim/// \a R2 := (\a __a4 * \a __b4) + (\a __a5 * \a __b5) \n 514314564Sdim/// \a R3 := (\a __a6 * \a __b6) + (\a __a7 * \a __b7) 515341825Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 516249423Sdim_mm_maddubs_pi16(__m64 __a, __m64 __b) 517193326Sed{ 518249423Sdim return (__m64)__builtin_ia32_pmaddubsw((__v8qi)__a, (__v8qi)__b); 519193326Sed} 520193326Sed 521341825Sdim/// Multiplies packed 16-bit signed integer values, truncates the 32-bit 522309124Sdim/// products to the 18 most significant bits by right-shifting, rounds the 523309124Sdim/// truncated value by adding 1, and writes bits [16:1] to the destination. 524309124Sdim/// 525309124Sdim/// \headerfile <x86intrin.h> 526309124Sdim/// 527309124Sdim/// This intrinsic corresponds to the \c VPMULHRSW instruction. 528309124Sdim/// 529309124Sdim/// \param __a 530309124Sdim/// A 128-bit vector of [8 x i16] containing one of the source operands. 531309124Sdim/// \param __b 532309124Sdim/// A 128-bit vector of [8 x i16] containing one of the source operands. 533309124Sdim/// \returns A 128-bit vector of [8 x i16] containing the rounded and scaled 534309124Sdim/// products of both operands. 535288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 536249423Sdim_mm_mulhrs_epi16(__m128i __a, __m128i __b) 537193326Sed{ 538249423Sdim return (__m128i)__builtin_ia32_pmulhrsw128((__v8hi)__a, (__v8hi)__b); 539193326Sed} 540193326Sed 541341825Sdim/// Multiplies packed 16-bit signed integer values, truncates the 32-bit 542309124Sdim/// products to the 18 most significant bits by right-shifting, rounds the 543309124Sdim/// truncated value by adding 1, and writes bits [16:1] to the destination. 544309124Sdim/// 545309124Sdim/// \headerfile <x86intrin.h> 546309124Sdim/// 547309124Sdim/// This intrinsic corresponds to the \c PMULHRSW instruction. 548309124Sdim/// 549309124Sdim/// \param __a 550309124Sdim/// A 64-bit vector of [4 x i16] containing one of the source operands. 551309124Sdim/// \param __b 552309124Sdim/// A 64-bit vector of [4 x i16] containing one of the source operands. 553309124Sdim/// \returns A 64-bit vector of [4 x i16] containing the rounded and scaled 554309124Sdim/// products of both operands. 555341825Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 556249423Sdim_mm_mulhrs_pi16(__m64 __a, __m64 __b) 557193326Sed{ 558249423Sdim return (__m64)__builtin_ia32_pmulhrsw((__v4hi)__a, (__v4hi)__b); 559193326Sed} 560193326Sed 561341825Sdim/// Copies the 8-bit integers from a 128-bit integer vector to the 562309124Sdim/// destination or clears 8-bit values in the destination, as specified by 563309124Sdim/// the second source operand. 564309124Sdim/// 565309124Sdim/// \headerfile <x86intrin.h> 566309124Sdim/// 567309124Sdim/// This intrinsic corresponds to the \c VPSHUFB instruction. 568309124Sdim/// 569309124Sdim/// \param __a 570309124Sdim/// A 128-bit integer vector containing the values to be copied. 571309124Sdim/// \param __b 572309124Sdim/// A 128-bit integer vector containing control bytes corresponding to 573309124Sdim/// positions in the destination: 574314564Sdim/// Bit 7: \n 575314564Sdim/// 1: Clear the corresponding byte in the destination. \n 576309124Sdim/// 0: Copy the selected source byte to the corresponding byte in the 577314564Sdim/// destination. \n 578314564Sdim/// Bits [6:4] Reserved. \n 579309124Sdim/// Bits [3:0] select the source byte to be copied. 580309124Sdim/// \returns A 128-bit integer vector containing the copied or cleared values. 581288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 582249423Sdim_mm_shuffle_epi8(__m128i __a, __m128i __b) 583193326Sed{ 584249423Sdim return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b); 585193326Sed} 586193326Sed 587341825Sdim/// Copies the 8-bit integers from a 64-bit integer vector to the 588309124Sdim/// destination or clears 8-bit values in the destination, as specified by 589309124Sdim/// the second source operand. 590309124Sdim/// 591309124Sdim/// \headerfile <x86intrin.h> 592309124Sdim/// 593309124Sdim/// This intrinsic corresponds to the \c PSHUFB instruction. 594309124Sdim/// 595309124Sdim/// \param __a 596309124Sdim/// A 64-bit integer vector containing the values to be copied. 597309124Sdim/// \param __b 598309124Sdim/// A 64-bit integer vector containing control bytes corresponding to 599309124Sdim/// positions in the destination: 600314564Sdim/// Bit 7: \n 601314564Sdim/// 1: Clear the corresponding byte in the destination. \n 602309124Sdim/// 0: Copy the selected source byte to the corresponding byte in the 603314564Sdim/// destination. \n 604309124Sdim/// Bits [3:0] select the source byte to be copied. 605309124Sdim/// \returns A 64-bit integer vector containing the copied or cleared values. 606341825Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 607249423Sdim_mm_shuffle_pi8(__m64 __a, __m64 __b) 608193326Sed{ 609249423Sdim return (__m64)__builtin_ia32_pshufb((__v8qi)__a, (__v8qi)__b); 610193326Sed} 611193326Sed 612341825Sdim/// For each 8-bit integer in the first source operand, perform one of 613321369Sdim/// the following actions as specified by the second source operand. 614309124Sdim/// 615321369Sdim/// If the byte in the second source is negative, calculate the two's 616321369Sdim/// complement of the corresponding byte in the first source, and write that 617321369Sdim/// value to the destination. If the byte in the second source is positive, 618321369Sdim/// copy the corresponding byte from the first source to the destination. If 619321369Sdim/// the byte in the second source is zero, clear the corresponding byte in 620321369Sdim/// the destination. 621321369Sdim/// 622309124Sdim/// \headerfile <x86intrin.h> 623309124Sdim/// 624309124Sdim/// This intrinsic corresponds to the \c VPSIGNB instruction. 625309124Sdim/// 626309124Sdim/// \param __a 627309124Sdim/// A 128-bit integer vector containing the values to be copied. 628309124Sdim/// \param __b 629309124Sdim/// A 128-bit integer vector containing control bytes corresponding to 630309124Sdim/// positions in the destination. 631309124Sdim/// \returns A 128-bit integer vector containing the resultant values. 632288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 633249423Sdim_mm_sign_epi8(__m128i __a, __m128i __b) 634193326Sed{ 635249423Sdim return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b); 636193326Sed} 637193326Sed 638341825Sdim/// For each 16-bit integer in the first source operand, perform one of 639321369Sdim/// the following actions as specified by the second source operand. 640309124Sdim/// 641321369Sdim/// If the word in the second source is negative, calculate the two's 642321369Sdim/// complement of the corresponding word in the first source, and write that 643321369Sdim/// value to the destination. If the word in the second source is positive, 644321369Sdim/// copy the corresponding word from the first source to the destination. If 645321369Sdim/// the word in the second source is zero, clear the corresponding word in 646321369Sdim/// the destination. 647321369Sdim/// 648309124Sdim/// \headerfile <x86intrin.h> 649309124Sdim/// 650309124Sdim/// This intrinsic corresponds to the \c VPSIGNW instruction. 651309124Sdim/// 652309124Sdim/// \param __a 653309124Sdim/// A 128-bit integer vector containing the values to be copied. 654309124Sdim/// \param __b 655309124Sdim/// A 128-bit integer vector containing control words corresponding to 656309124Sdim/// positions in the destination. 657309124Sdim/// \returns A 128-bit integer vector containing the resultant values. 658288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 659249423Sdim_mm_sign_epi16(__m128i __a, __m128i __b) 660193326Sed{ 661249423Sdim return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b); 662193326Sed} 663193326Sed 664341825Sdim/// For each 32-bit integer in the first source operand, perform one of 665321369Sdim/// the following actions as specified by the second source operand. 666321369Sdim/// 667321369Sdim/// If the doubleword in the second source is negative, calculate the two's 668309124Sdim/// complement of the corresponding word in the first source, and write that 669309124Sdim/// value to the destination. If the doubleword in the second source is 670309124Sdim/// positive, copy the corresponding word from the first source to the 671309124Sdim/// destination. If the doubleword in the second source is zero, clear the 672309124Sdim/// corresponding word in the destination. 673309124Sdim/// 674309124Sdim/// \headerfile <x86intrin.h> 675309124Sdim/// 676309124Sdim/// This intrinsic corresponds to the \c VPSIGND instruction. 677309124Sdim/// 678309124Sdim/// \param __a 679309124Sdim/// A 128-bit integer vector containing the values to be copied. 680309124Sdim/// \param __b 681309124Sdim/// A 128-bit integer vector containing control doublewords corresponding to 682309124Sdim/// positions in the destination. 683309124Sdim/// \returns A 128-bit integer vector containing the resultant values. 684288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS 685249423Sdim_mm_sign_epi32(__m128i __a, __m128i __b) 686193326Sed{ 687249423Sdim return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b); 688193326Sed} 689193326Sed 690341825Sdim/// For each 8-bit integer in the first source operand, perform one of 691321369Sdim/// the following actions as specified by the second source operand. 692309124Sdim/// 693321369Sdim/// If the byte in the second source is negative, calculate the two's 694321369Sdim/// complement of the corresponding byte in the first source, and write that 695321369Sdim/// value to the destination. If the byte in the second source is positive, 696321369Sdim/// copy the corresponding byte from the first source to the destination. If 697321369Sdim/// the byte in the second source is zero, clear the corresponding byte in 698321369Sdim/// the destination. 699321369Sdim/// 700309124Sdim/// \headerfile <x86intrin.h> 701309124Sdim/// 702309124Sdim/// This intrinsic corresponds to the \c PSIGNB instruction. 703309124Sdim/// 704309124Sdim/// \param __a 705309124Sdim/// A 64-bit integer vector containing the values to be copied. 706309124Sdim/// \param __b 707309124Sdim/// A 64-bit integer vector containing control bytes corresponding to 708309124Sdim/// positions in the destination. 709309124Sdim/// \returns A 64-bit integer vector containing the resultant values. 710341825Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 711249423Sdim_mm_sign_pi8(__m64 __a, __m64 __b) 712193326Sed{ 713249423Sdim return (__m64)__builtin_ia32_psignb((__v8qi)__a, (__v8qi)__b); 714193326Sed} 715193326Sed 716341825Sdim/// For each 16-bit integer in the first source operand, perform one of 717321369Sdim/// the following actions as specified by the second source operand. 718309124Sdim/// 719321369Sdim/// If the word in the second source is negative, calculate the two's 720321369Sdim/// complement of the corresponding word in the first source, and write that 721321369Sdim/// value to the destination. If the word in the second source is positive, 722321369Sdim/// copy the corresponding word from the first source to the destination. If 723321369Sdim/// the word in the second source is zero, clear the corresponding word in 724321369Sdim/// the destination. 725321369Sdim/// 726309124Sdim/// \headerfile <x86intrin.h> 727309124Sdim/// 728309124Sdim/// This intrinsic corresponds to the \c PSIGNW instruction. 729309124Sdim/// 730309124Sdim/// \param __a 731309124Sdim/// A 64-bit integer vector containing the values to be copied. 732309124Sdim/// \param __b 733309124Sdim/// A 64-bit integer vector containing control words corresponding to 734309124Sdim/// positions in the destination. 735309124Sdim/// \returns A 64-bit integer vector containing the resultant values. 736341825Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 737249423Sdim_mm_sign_pi16(__m64 __a, __m64 __b) 738193326Sed{ 739249423Sdim return (__m64)__builtin_ia32_psignw((__v4hi)__a, (__v4hi)__b); 740193326Sed} 741193326Sed 742341825Sdim/// For each 32-bit integer in the first source operand, perform one of 743321369Sdim/// the following actions as specified by the second source operand. 744321369Sdim/// 745321369Sdim/// If the doubleword in the second source is negative, calculate the two's 746309124Sdim/// complement of the corresponding doubleword in the first source, and 747309124Sdim/// write that value to the destination. If the doubleword in the second 748309124Sdim/// source is positive, copy the corresponding doubleword from the first 749309124Sdim/// source to the destination. If the doubleword in the second source is 750309124Sdim/// zero, clear the corresponding doubleword in the destination. 751309124Sdim/// 752309124Sdim/// \headerfile <x86intrin.h> 753309124Sdim/// 754309124Sdim/// This intrinsic corresponds to the \c PSIGND instruction. 755309124Sdim/// 756309124Sdim/// \param __a 757309124Sdim/// A 64-bit integer vector containing the values to be copied. 758309124Sdim/// \param __b 759309124Sdim/// A 64-bit integer vector containing two control doublewords corresponding 760309124Sdim/// to positions in the destination. 761309124Sdim/// \returns A 64-bit integer vector containing the resultant values. 762341825Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS_MMX 763249423Sdim_mm_sign_pi32(__m64 __a, __m64 __b) 764193326Sed{ 765249423Sdim return (__m64)__builtin_ia32_psignd((__v2si)__a, (__v2si)__b); 766193326Sed} 767193326Sed 768288943Sdim#undef __DEFAULT_FN_ATTRS 769341825Sdim#undef __DEFAULT_FN_ATTRS_MMX 770288943Sdim 771193326Sed#endif /* __TMMINTRIN_H */ 772