1/*===---- mmintrin.h - MMX intrinsics --------------------------------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 10#ifndef __MMINTRIN_H 11#define __MMINTRIN_H 12 13typedef long long __m64 __attribute__((__vector_size__(8), __aligned__(8))); 14 15typedef long long __v1di __attribute__((__vector_size__(8))); 16typedef int __v2si __attribute__((__vector_size__(8))); 17typedef short __v4hi __attribute__((__vector_size__(8))); 18typedef char __v8qi __attribute__((__vector_size__(8))); 19 20/* Define the default attributes for the functions in this file. */ 21#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx"), __min_vector_width__(64))) 22 23/// Clears the MMX state by setting the state of the x87 stack registers 24/// to empty. 25/// 26/// \headerfile <x86intrin.h> 27/// 28/// This intrinsic corresponds to the <c> EMMS </c> instruction. 29/// 30static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("mmx"))) 31_mm_empty(void) 32{ 33 __builtin_ia32_emms(); 34} 35 36/// Constructs a 64-bit integer vector, setting the lower 32 bits to the 37/// value of the 32-bit integer parameter and setting the upper 32 bits to 0. 38/// 39/// \headerfile <x86intrin.h> 40/// 41/// This intrinsic corresponds to the <c> MOVD </c> instruction. 42/// 43/// \param __i 44/// A 32-bit integer value. 45/// \returns A 64-bit integer vector. The lower 32 bits contain the value of the 46/// parameter. The upper 32 bits are set to 0. 47static __inline__ __m64 __DEFAULT_FN_ATTRS 48_mm_cvtsi32_si64(int __i) 49{ 50 return (__m64)__builtin_ia32_vec_init_v2si(__i, 0); 51} 52 53/// Returns the lower 32 bits of a 64-bit integer vector as a 32-bit 54/// signed integer. 55/// 56/// \headerfile <x86intrin.h> 57/// 58/// This intrinsic corresponds to the <c> MOVD </c> instruction. 59/// 60/// \param __m 61/// A 64-bit integer vector. 62/// \returns A 32-bit signed integer value containing the lower 32 bits of the 63/// parameter. 64static __inline__ int __DEFAULT_FN_ATTRS 65_mm_cvtsi64_si32(__m64 __m) 66{ 67 return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0); 68} 69 70/// Casts a 64-bit signed integer value into a 64-bit integer vector. 71/// 72/// \headerfile <x86intrin.h> 73/// 74/// This intrinsic corresponds to the <c> MOVQ </c> instruction. 75/// 76/// \param __i 77/// A 64-bit signed integer. 78/// \returns A 64-bit integer vector containing the same bitwise pattern as the 79/// parameter. 80static __inline__ __m64 __DEFAULT_FN_ATTRS 81_mm_cvtsi64_m64(long long __i) 82{ 83 return (__m64)__i; 84} 85 86/// Casts a 64-bit integer vector into a 64-bit signed integer value. 87/// 88/// \headerfile <x86intrin.h> 89/// 90/// This intrinsic corresponds to the <c> MOVQ </c> instruction. 91/// 92/// \param __m 93/// A 64-bit integer vector. 94/// \returns A 64-bit signed integer containing the same bitwise pattern as the 95/// parameter. 96static __inline__ long long __DEFAULT_FN_ATTRS 97_mm_cvtm64_si64(__m64 __m) 98{ 99 return (long long)__m; 100} 101 102/// Converts 16-bit signed integers from both 64-bit integer vector 103/// parameters of [4 x i16] into 8-bit signed integer values, and constructs 104/// a 64-bit integer vector of [8 x i8] as the result. Positive values 105/// greater than 0x7F are saturated to 0x7F. Negative values less than 0x80 106/// are saturated to 0x80. 107/// 108/// \headerfile <x86intrin.h> 109/// 110/// This intrinsic corresponds to the <c> PACKSSWB </c> instruction. 111/// 112/// \param __m1 113/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a 114/// 16-bit signed integer and is converted to an 8-bit signed integer with 115/// saturation. Positive values greater than 0x7F are saturated to 0x7F. 116/// Negative values less than 0x80 are saturated to 0x80. The converted 117/// [4 x i8] values are written to the lower 32 bits of the result. 118/// \param __m2 119/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a 120/// 16-bit signed integer and is converted to an 8-bit signed integer with 121/// saturation. Positive values greater than 0x7F are saturated to 0x7F. 122/// Negative values less than 0x80 are saturated to 0x80. The converted 123/// [4 x i8] values are written to the upper 32 bits of the result. 124/// \returns A 64-bit integer vector of [8 x i8] containing the converted 125/// values. 126static __inline__ __m64 __DEFAULT_FN_ATTRS 127_mm_packs_pi16(__m64 __m1, __m64 __m2) 128{ 129 return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2); 130} 131 132/// Converts 32-bit signed integers from both 64-bit integer vector 133/// parameters of [2 x i32] into 16-bit signed integer values, and constructs 134/// a 64-bit integer vector of [4 x i16] as the result. Positive values 135/// greater than 0x7FFF are saturated to 0x7FFF. Negative values less than 136/// 0x8000 are saturated to 0x8000. 137/// 138/// \headerfile <x86intrin.h> 139/// 140/// This intrinsic corresponds to the <c> PACKSSDW </c> instruction. 141/// 142/// \param __m1 143/// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a 144/// 32-bit signed integer and is converted to a 16-bit signed integer with 145/// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF. 146/// Negative values less than 0x8000 are saturated to 0x8000. The converted 147/// [2 x i16] values are written to the lower 32 bits of the result. 148/// \param __m2 149/// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a 150/// 32-bit signed integer and is converted to a 16-bit signed integer with 151/// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF. 152/// Negative values less than 0x8000 are saturated to 0x8000. The converted 153/// [2 x i16] values are written to the upper 32 bits of the result. 154/// \returns A 64-bit integer vector of [4 x i16] containing the converted 155/// values. 156static __inline__ __m64 __DEFAULT_FN_ATTRS 157_mm_packs_pi32(__m64 __m1, __m64 __m2) 158{ 159 return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2); 160} 161 162/// Converts 16-bit signed integers from both 64-bit integer vector 163/// parameters of [4 x i16] into 8-bit unsigned integer values, and 164/// constructs a 64-bit integer vector of [8 x i8] as the result. Values 165/// greater than 0xFF are saturated to 0xFF. Values less than 0 are saturated 166/// to 0. 167/// 168/// \headerfile <x86intrin.h> 169/// 170/// This intrinsic corresponds to the <c> PACKUSWB </c> instruction. 171/// 172/// \param __m1 173/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a 174/// 16-bit signed integer and is converted to an 8-bit unsigned integer with 175/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less 176/// than 0 are saturated to 0. The converted [4 x i8] values are written to 177/// the lower 32 bits of the result. 178/// \param __m2 179/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a 180/// 16-bit signed integer and is converted to an 8-bit unsigned integer with 181/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less 182/// than 0 are saturated to 0. The converted [4 x i8] values are written to 183/// the upper 32 bits of the result. 184/// \returns A 64-bit integer vector of [8 x i8] containing the converted 185/// values. 186static __inline__ __m64 __DEFAULT_FN_ATTRS 187_mm_packs_pu16(__m64 __m1, __m64 __m2) 188{ 189 return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2); 190} 191 192/// Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8] 193/// and interleaves them into a 64-bit integer vector of [8 x i8]. 194/// 195/// \headerfile <x86intrin.h> 196/// 197/// This intrinsic corresponds to the <c> PUNPCKHBW </c> instruction. 198/// 199/// \param __m1 200/// A 64-bit integer vector of [8 x i8]. \n 201/// Bits [39:32] are written to bits [7:0] of the result. \n 202/// Bits [47:40] are written to bits [23:16] of the result. \n 203/// Bits [55:48] are written to bits [39:32] of the result. \n 204/// Bits [63:56] are written to bits [55:48] of the result. 205/// \param __m2 206/// A 64-bit integer vector of [8 x i8]. 207/// Bits [39:32] are written to bits [15:8] of the result. \n 208/// Bits [47:40] are written to bits [31:24] of the result. \n 209/// Bits [55:48] are written to bits [47:40] of the result. \n 210/// Bits [63:56] are written to bits [63:56] of the result. 211/// \returns A 64-bit integer vector of [8 x i8] containing the interleaved 212/// values. 213static __inline__ __m64 __DEFAULT_FN_ATTRS 214_mm_unpackhi_pi8(__m64 __m1, __m64 __m2) 215{ 216 return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2); 217} 218 219/// Unpacks the upper 32 bits from two 64-bit integer vectors of 220/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16]. 221/// 222/// \headerfile <x86intrin.h> 223/// 224/// This intrinsic corresponds to the <c> PUNPCKHWD </c> instruction. 225/// 226/// \param __m1 227/// A 64-bit integer vector of [4 x i16]. 228/// Bits [47:32] are written to bits [15:0] of the result. \n 229/// Bits [63:48] are written to bits [47:32] of the result. 230/// \param __m2 231/// A 64-bit integer vector of [4 x i16]. 232/// Bits [47:32] are written to bits [31:16] of the result. \n 233/// Bits [63:48] are written to bits [63:48] of the result. 234/// \returns A 64-bit integer vector of [4 x i16] containing the interleaved 235/// values. 236static __inline__ __m64 __DEFAULT_FN_ATTRS 237_mm_unpackhi_pi16(__m64 __m1, __m64 __m2) 238{ 239 return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2); 240} 241 242/// Unpacks the upper 32 bits from two 64-bit integer vectors of 243/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32]. 244/// 245/// \headerfile <x86intrin.h> 246/// 247/// This intrinsic corresponds to the <c> PUNPCKHDQ </c> instruction. 248/// 249/// \param __m1 250/// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to 251/// the lower 32 bits of the result. 252/// \param __m2 253/// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to 254/// the upper 32 bits of the result. 255/// \returns A 64-bit integer vector of [2 x i32] containing the interleaved 256/// values. 257static __inline__ __m64 __DEFAULT_FN_ATTRS 258_mm_unpackhi_pi32(__m64 __m1, __m64 __m2) 259{ 260 return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2); 261} 262 263/// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8] 264/// and interleaves them into a 64-bit integer vector of [8 x i8]. 265/// 266/// \headerfile <x86intrin.h> 267/// 268/// This intrinsic corresponds to the <c> PUNPCKLBW </c> instruction. 269/// 270/// \param __m1 271/// A 64-bit integer vector of [8 x i8]. 272/// Bits [7:0] are written to bits [7:0] of the result. \n 273/// Bits [15:8] are written to bits [23:16] of the result. \n 274/// Bits [23:16] are written to bits [39:32] of the result. \n 275/// Bits [31:24] are written to bits [55:48] of the result. 276/// \param __m2 277/// A 64-bit integer vector of [8 x i8]. 278/// Bits [7:0] are written to bits [15:8] of the result. \n 279/// Bits [15:8] are written to bits [31:24] of the result. \n 280/// Bits [23:16] are written to bits [47:40] of the result. \n 281/// Bits [31:24] are written to bits [63:56] of the result. 282/// \returns A 64-bit integer vector of [8 x i8] containing the interleaved 283/// values. 284static __inline__ __m64 __DEFAULT_FN_ATTRS 285_mm_unpacklo_pi8(__m64 __m1, __m64 __m2) 286{ 287 return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2); 288} 289 290/// Unpacks the lower 32 bits from two 64-bit integer vectors of 291/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16]. 292/// 293/// \headerfile <x86intrin.h> 294/// 295/// This intrinsic corresponds to the <c> PUNPCKLWD </c> instruction. 296/// 297/// \param __m1 298/// A 64-bit integer vector of [4 x i16]. 299/// Bits [15:0] are written to bits [15:0] of the result. \n 300/// Bits [31:16] are written to bits [47:32] of the result. 301/// \param __m2 302/// A 64-bit integer vector of [4 x i16]. 303/// Bits [15:0] are written to bits [31:16] of the result. \n 304/// Bits [31:16] are written to bits [63:48] of the result. 305/// \returns A 64-bit integer vector of [4 x i16] containing the interleaved 306/// values. 307static __inline__ __m64 __DEFAULT_FN_ATTRS 308_mm_unpacklo_pi16(__m64 __m1, __m64 __m2) 309{ 310 return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2); 311} 312 313/// Unpacks the lower 32 bits from two 64-bit integer vectors of 314/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32]. 315/// 316/// \headerfile <x86intrin.h> 317/// 318/// This intrinsic corresponds to the <c> PUNPCKLDQ </c> instruction. 319/// 320/// \param __m1 321/// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to 322/// the lower 32 bits of the result. 323/// \param __m2 324/// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to 325/// the upper 32 bits of the result. 326/// \returns A 64-bit integer vector of [2 x i32] containing the interleaved 327/// values. 328static __inline__ __m64 __DEFAULT_FN_ATTRS 329_mm_unpacklo_pi32(__m64 __m1, __m64 __m2) 330{ 331 return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2); 332} 333 334/// Adds each 8-bit integer element of the first 64-bit integer vector 335/// of [8 x i8] to the corresponding 8-bit integer element of the second 336/// 64-bit integer vector of [8 x i8]. The lower 8 bits of the results are 337/// packed into a 64-bit integer vector of [8 x i8]. 338/// 339/// \headerfile <x86intrin.h> 340/// 341/// This intrinsic corresponds to the <c> PADDB </c> instruction. 342/// 343/// \param __m1 344/// A 64-bit integer vector of [8 x i8]. 345/// \param __m2 346/// A 64-bit integer vector of [8 x i8]. 347/// \returns A 64-bit integer vector of [8 x i8] containing the sums of both 348/// parameters. 349static __inline__ __m64 __DEFAULT_FN_ATTRS 350_mm_add_pi8(__m64 __m1, __m64 __m2) 351{ 352 return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2); 353} 354 355/// Adds each 16-bit integer element of the first 64-bit integer vector 356/// of [4 x i16] to the corresponding 16-bit integer element of the second 357/// 64-bit integer vector of [4 x i16]. The lower 16 bits of the results are 358/// packed into a 64-bit integer vector of [4 x i16]. 359/// 360/// \headerfile <x86intrin.h> 361/// 362/// This intrinsic corresponds to the <c> PADDW </c> instruction. 363/// 364/// \param __m1 365/// A 64-bit integer vector of [4 x i16]. 366/// \param __m2 367/// A 64-bit integer vector of [4 x i16]. 368/// \returns A 64-bit integer vector of [4 x i16] containing the sums of both 369/// parameters. 370static __inline__ __m64 __DEFAULT_FN_ATTRS 371_mm_add_pi16(__m64 __m1, __m64 __m2) 372{ 373 return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2); 374} 375 376/// Adds each 32-bit integer element of the first 64-bit integer vector 377/// of [2 x i32] to the corresponding 32-bit integer element of the second 378/// 64-bit integer vector of [2 x i32]. The lower 32 bits of the results are 379/// packed into a 64-bit integer vector of [2 x i32]. 380/// 381/// \headerfile <x86intrin.h> 382/// 383/// This intrinsic corresponds to the <c> PADDD </c> instruction. 384/// 385/// \param __m1 386/// A 64-bit integer vector of [2 x i32]. 387/// \param __m2 388/// A 64-bit integer vector of [2 x i32]. 389/// \returns A 64-bit integer vector of [2 x i32] containing the sums of both 390/// parameters. 391static __inline__ __m64 __DEFAULT_FN_ATTRS 392_mm_add_pi32(__m64 __m1, __m64 __m2) 393{ 394 return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2); 395} 396 397/// Adds each 8-bit signed integer element of the first 64-bit integer 398/// vector of [8 x i8] to the corresponding 8-bit signed integer element of 399/// the second 64-bit integer vector of [8 x i8]. Positive sums greater than 400/// 0x7F are saturated to 0x7F. Negative sums less than 0x80 are saturated to 401/// 0x80. The results are packed into a 64-bit integer vector of [8 x i8]. 402/// 403/// \headerfile <x86intrin.h> 404/// 405/// This intrinsic corresponds to the <c> PADDSB </c> instruction. 406/// 407/// \param __m1 408/// A 64-bit integer vector of [8 x i8]. 409/// \param __m2 410/// A 64-bit integer vector of [8 x i8]. 411/// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums 412/// of both parameters. 413static __inline__ __m64 __DEFAULT_FN_ATTRS 414_mm_adds_pi8(__m64 __m1, __m64 __m2) 415{ 416 return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2); 417} 418 419/// Adds each 16-bit signed integer element of the first 64-bit integer 420/// vector of [4 x i16] to the corresponding 16-bit signed integer element of 421/// the second 64-bit integer vector of [4 x i16]. Positive sums greater than 422/// 0x7FFF are saturated to 0x7FFF. Negative sums less than 0x8000 are 423/// saturated to 0x8000. The results are packed into a 64-bit integer vector 424/// of [4 x i16]. 425/// 426/// \headerfile <x86intrin.h> 427/// 428/// This intrinsic corresponds to the <c> PADDSW </c> instruction. 429/// 430/// \param __m1 431/// A 64-bit integer vector of [4 x i16]. 432/// \param __m2 433/// A 64-bit integer vector of [4 x i16]. 434/// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums 435/// of both parameters. 436static __inline__ __m64 __DEFAULT_FN_ATTRS 437_mm_adds_pi16(__m64 __m1, __m64 __m2) 438{ 439 return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2); 440} 441 442/// Adds each 8-bit unsigned integer element of the first 64-bit integer 443/// vector of [8 x i8] to the corresponding 8-bit unsigned integer element of 444/// the second 64-bit integer vector of [8 x i8]. Sums greater than 0xFF are 445/// saturated to 0xFF. The results are packed into a 64-bit integer vector of 446/// [8 x i8]. 447/// 448/// \headerfile <x86intrin.h> 449/// 450/// This intrinsic corresponds to the <c> PADDUSB </c> instruction. 451/// 452/// \param __m1 453/// A 64-bit integer vector of [8 x i8]. 454/// \param __m2 455/// A 64-bit integer vector of [8 x i8]. 456/// \returns A 64-bit integer vector of [8 x i8] containing the saturated 457/// unsigned sums of both parameters. 458static __inline__ __m64 __DEFAULT_FN_ATTRS 459_mm_adds_pu8(__m64 __m1, __m64 __m2) 460{ 461 return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2); 462} 463 464/// Adds each 16-bit unsigned integer element of the first 64-bit integer 465/// vector of [4 x i16] to the corresponding 16-bit unsigned integer element 466/// of the second 64-bit integer vector of [4 x i16]. Sums greater than 467/// 0xFFFF are saturated to 0xFFFF. The results are packed into a 64-bit 468/// integer vector of [4 x i16]. 469/// 470/// \headerfile <x86intrin.h> 471/// 472/// This intrinsic corresponds to the <c> PADDUSW </c> instruction. 473/// 474/// \param __m1 475/// A 64-bit integer vector of [4 x i16]. 476/// \param __m2 477/// A 64-bit integer vector of [4 x i16]. 478/// \returns A 64-bit integer vector of [4 x i16] containing the saturated 479/// unsigned sums of both parameters. 480static __inline__ __m64 __DEFAULT_FN_ATTRS 481_mm_adds_pu16(__m64 __m1, __m64 __m2) 482{ 483 return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2); 484} 485 486/// Subtracts each 8-bit integer element of the second 64-bit integer 487/// vector of [8 x i8] from the corresponding 8-bit integer element of the 488/// first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results 489/// are packed into a 64-bit integer vector of [8 x i8]. 490/// 491/// \headerfile <x86intrin.h> 492/// 493/// This intrinsic corresponds to the <c> PSUBB </c> instruction. 494/// 495/// \param __m1 496/// A 64-bit integer vector of [8 x i8] containing the minuends. 497/// \param __m2 498/// A 64-bit integer vector of [8 x i8] containing the subtrahends. 499/// \returns A 64-bit integer vector of [8 x i8] containing the differences of 500/// both parameters. 501static __inline__ __m64 __DEFAULT_FN_ATTRS 502_mm_sub_pi8(__m64 __m1, __m64 __m2) 503{ 504 return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2); 505} 506 507/// Subtracts each 16-bit integer element of the second 64-bit integer 508/// vector of [4 x i16] from the corresponding 16-bit integer element of the 509/// first 64-bit integer vector of [4 x i16]. The lower 16 bits of the 510/// results are packed into a 64-bit integer vector of [4 x i16]. 511/// 512/// \headerfile <x86intrin.h> 513/// 514/// This intrinsic corresponds to the <c> PSUBW </c> instruction. 515/// 516/// \param __m1 517/// A 64-bit integer vector of [4 x i16] containing the minuends. 518/// \param __m2 519/// A 64-bit integer vector of [4 x i16] containing the subtrahends. 520/// \returns A 64-bit integer vector of [4 x i16] containing the differences of 521/// both parameters. 522static __inline__ __m64 __DEFAULT_FN_ATTRS 523_mm_sub_pi16(__m64 __m1, __m64 __m2) 524{ 525 return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2); 526} 527 528/// Subtracts each 32-bit integer element of the second 64-bit integer 529/// vector of [2 x i32] from the corresponding 32-bit integer element of the 530/// first 64-bit integer vector of [2 x i32]. The lower 32 bits of the 531/// results are packed into a 64-bit integer vector of [2 x i32]. 532/// 533/// \headerfile <x86intrin.h> 534/// 535/// This intrinsic corresponds to the <c> PSUBD </c> instruction. 536/// 537/// \param __m1 538/// A 64-bit integer vector of [2 x i32] containing the minuends. 539/// \param __m2 540/// A 64-bit integer vector of [2 x i32] containing the subtrahends. 541/// \returns A 64-bit integer vector of [2 x i32] containing the differences of 542/// both parameters. 543static __inline__ __m64 __DEFAULT_FN_ATTRS 544_mm_sub_pi32(__m64 __m1, __m64 __m2) 545{ 546 return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2); 547} 548 549/// Subtracts each 8-bit signed integer element of the second 64-bit 550/// integer vector of [8 x i8] from the corresponding 8-bit signed integer 551/// element of the first 64-bit integer vector of [8 x i8]. Positive results 552/// greater than 0x7F are saturated to 0x7F. Negative results less than 0x80 553/// are saturated to 0x80. The results are packed into a 64-bit integer 554/// vector of [8 x i8]. 555/// 556/// \headerfile <x86intrin.h> 557/// 558/// This intrinsic corresponds to the <c> PSUBSB </c> instruction. 559/// 560/// \param __m1 561/// A 64-bit integer vector of [8 x i8] containing the minuends. 562/// \param __m2 563/// A 64-bit integer vector of [8 x i8] containing the subtrahends. 564/// \returns A 64-bit integer vector of [8 x i8] containing the saturated 565/// differences of both parameters. 566static __inline__ __m64 __DEFAULT_FN_ATTRS 567_mm_subs_pi8(__m64 __m1, __m64 __m2) 568{ 569 return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2); 570} 571 572/// Subtracts each 16-bit signed integer element of the second 64-bit 573/// integer vector of [4 x i16] from the corresponding 16-bit signed integer 574/// element of the first 64-bit integer vector of [4 x i16]. Positive results 575/// greater than 0x7FFF are saturated to 0x7FFF. Negative results less than 576/// 0x8000 are saturated to 0x8000. The results are packed into a 64-bit 577/// integer vector of [4 x i16]. 578/// 579/// \headerfile <x86intrin.h> 580/// 581/// This intrinsic corresponds to the <c> PSUBSW </c> instruction. 582/// 583/// \param __m1 584/// A 64-bit integer vector of [4 x i16] containing the minuends. 585/// \param __m2 586/// A 64-bit integer vector of [4 x i16] containing the subtrahends. 587/// \returns A 64-bit integer vector of [4 x i16] containing the saturated 588/// differences of both parameters. 589static __inline__ __m64 __DEFAULT_FN_ATTRS 590_mm_subs_pi16(__m64 __m1, __m64 __m2) 591{ 592 return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2); 593} 594 595/// Subtracts each 8-bit unsigned integer element of the second 64-bit 596/// integer vector of [8 x i8] from the corresponding 8-bit unsigned integer 597/// element of the first 64-bit integer vector of [8 x i8]. 598/// 599/// If an element of the first vector is less than the corresponding element 600/// of the second vector, the result is saturated to 0. The results are 601/// packed into a 64-bit integer vector of [8 x i8]. 602/// 603/// \headerfile <x86intrin.h> 604/// 605/// This intrinsic corresponds to the <c> PSUBUSB </c> instruction. 606/// 607/// \param __m1 608/// A 64-bit integer vector of [8 x i8] containing the minuends. 609/// \param __m2 610/// A 64-bit integer vector of [8 x i8] containing the subtrahends. 611/// \returns A 64-bit integer vector of [8 x i8] containing the saturated 612/// differences of both parameters. 613static __inline__ __m64 __DEFAULT_FN_ATTRS 614_mm_subs_pu8(__m64 __m1, __m64 __m2) 615{ 616 return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2); 617} 618 619/// Subtracts each 16-bit unsigned integer element of the second 64-bit 620/// integer vector of [4 x i16] from the corresponding 16-bit unsigned 621/// integer element of the first 64-bit integer vector of [4 x i16]. 622/// 623/// If an element of the first vector is less than the corresponding element 624/// of the second vector, the result is saturated to 0. The results are 625/// packed into a 64-bit integer vector of [4 x i16]. 626/// 627/// \headerfile <x86intrin.h> 628/// 629/// This intrinsic corresponds to the <c> PSUBUSW </c> instruction. 630/// 631/// \param __m1 632/// A 64-bit integer vector of [4 x i16] containing the minuends. 633/// \param __m2 634/// A 64-bit integer vector of [4 x i16] containing the subtrahends. 635/// \returns A 64-bit integer vector of [4 x i16] containing the saturated 636/// differences of both parameters. 637static __inline__ __m64 __DEFAULT_FN_ATTRS 638_mm_subs_pu16(__m64 __m1, __m64 __m2) 639{ 640 return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2); 641} 642 643/// Multiplies each 16-bit signed integer element of the first 64-bit 644/// integer vector of [4 x i16] by the corresponding 16-bit signed integer 645/// element of the second 64-bit integer vector of [4 x i16] and get four 646/// 32-bit products. Adds adjacent pairs of products to get two 32-bit sums. 647/// The lower 32 bits of these two sums are packed into a 64-bit integer 648/// vector of [2 x i32]. 649/// 650/// For example, bits [15:0] of both parameters are multiplied, bits [31:16] 651/// of both parameters are multiplied, and the sum of both results is written 652/// to bits [31:0] of the result. 653/// 654/// \headerfile <x86intrin.h> 655/// 656/// This intrinsic corresponds to the <c> PMADDWD </c> instruction. 657/// 658/// \param __m1 659/// A 64-bit integer vector of [4 x i16]. 660/// \param __m2 661/// A 64-bit integer vector of [4 x i16]. 662/// \returns A 64-bit integer vector of [2 x i32] containing the sums of 663/// products of both parameters. 664static __inline__ __m64 __DEFAULT_FN_ATTRS 665_mm_madd_pi16(__m64 __m1, __m64 __m2) 666{ 667 return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2); 668} 669 670/// Multiplies each 16-bit signed integer element of the first 64-bit 671/// integer vector of [4 x i16] by the corresponding 16-bit signed integer 672/// element of the second 64-bit integer vector of [4 x i16]. Packs the upper 673/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16]. 674/// 675/// \headerfile <x86intrin.h> 676/// 677/// This intrinsic corresponds to the <c> PMULHW </c> instruction. 678/// 679/// \param __m1 680/// A 64-bit integer vector of [4 x i16]. 681/// \param __m2 682/// A 64-bit integer vector of [4 x i16]. 683/// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits 684/// of the products of both parameters. 685static __inline__ __m64 __DEFAULT_FN_ATTRS 686_mm_mulhi_pi16(__m64 __m1, __m64 __m2) 687{ 688 return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2); 689} 690 691/// Multiplies each 16-bit signed integer element of the first 64-bit 692/// integer vector of [4 x i16] by the corresponding 16-bit signed integer 693/// element of the second 64-bit integer vector of [4 x i16]. Packs the lower 694/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16]. 695/// 696/// \headerfile <x86intrin.h> 697/// 698/// This intrinsic corresponds to the <c> PMULLW </c> instruction. 699/// 700/// \param __m1 701/// A 64-bit integer vector of [4 x i16]. 702/// \param __m2 703/// A 64-bit integer vector of [4 x i16]. 704/// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits 705/// of the products of both parameters. 706static __inline__ __m64 __DEFAULT_FN_ATTRS 707_mm_mullo_pi16(__m64 __m1, __m64 __m2) 708{ 709 return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2); 710} 711 712/// Left-shifts each 16-bit signed integer element of the first 713/// parameter, which is a 64-bit integer vector of [4 x i16], by the number 714/// of bits specified by the second parameter, which is a 64-bit integer. The 715/// lower 16 bits of the results are packed into a 64-bit integer vector of 716/// [4 x i16]. 717/// 718/// \headerfile <x86intrin.h> 719/// 720/// This intrinsic corresponds to the <c> PSLLW </c> instruction. 721/// 722/// \param __m 723/// A 64-bit integer vector of [4 x i16]. 724/// \param __count 725/// A 64-bit integer vector interpreted as a single 64-bit integer. 726/// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted 727/// values. If \a __count is greater or equal to 16, the result is set to all 728/// 0. 729static __inline__ __m64 __DEFAULT_FN_ATTRS 730_mm_sll_pi16(__m64 __m, __m64 __count) 731{ 732 return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count); 733} 734 735/// Left-shifts each 16-bit signed integer element of a 64-bit integer 736/// vector of [4 x i16] by the number of bits specified by a 32-bit integer. 737/// The lower 16 bits of the results are packed into a 64-bit integer vector 738/// of [4 x i16]. 739/// 740/// \headerfile <x86intrin.h> 741/// 742/// This intrinsic corresponds to the <c> PSLLW </c> instruction. 743/// 744/// \param __m 745/// A 64-bit integer vector of [4 x i16]. 746/// \param __count 747/// A 32-bit integer value. 748/// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted 749/// values. If \a __count is greater or equal to 16, the result is set to all 750/// 0. 751static __inline__ __m64 __DEFAULT_FN_ATTRS 752_mm_slli_pi16(__m64 __m, int __count) 753{ 754 return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count); 755} 756 757/// Left-shifts each 32-bit signed integer element of the first 758/// parameter, which is a 64-bit integer vector of [2 x i32], by the number 759/// of bits specified by the second parameter, which is a 64-bit integer. The 760/// lower 32 bits of the results are packed into a 64-bit integer vector of 761/// [2 x i32]. 762/// 763/// \headerfile <x86intrin.h> 764/// 765/// This intrinsic corresponds to the <c> PSLLD </c> instruction. 766/// 767/// \param __m 768/// A 64-bit integer vector of [2 x i32]. 769/// \param __count 770/// A 64-bit integer vector interpreted as a single 64-bit integer. 771/// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted 772/// values. If \a __count is greater or equal to 32, the result is set to all 773/// 0. 774static __inline__ __m64 __DEFAULT_FN_ATTRS 775_mm_sll_pi32(__m64 __m, __m64 __count) 776{ 777 return (__m64)__builtin_ia32_pslld((__v2si)__m, __count); 778} 779 780/// Left-shifts each 32-bit signed integer element of a 64-bit integer 781/// vector of [2 x i32] by the number of bits specified by a 32-bit integer. 782/// The lower 32 bits of the results are packed into a 64-bit integer vector 783/// of [2 x i32]. 784/// 785/// \headerfile <x86intrin.h> 786/// 787/// This intrinsic corresponds to the <c> PSLLD </c> instruction. 788/// 789/// \param __m 790/// A 64-bit integer vector of [2 x i32]. 791/// \param __count 792/// A 32-bit integer value. 793/// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted 794/// values. If \a __count is greater or equal to 32, the result is set to all 795/// 0. 796static __inline__ __m64 __DEFAULT_FN_ATTRS 797_mm_slli_pi32(__m64 __m, int __count) 798{ 799 return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count); 800} 801 802/// Left-shifts the first 64-bit integer parameter by the number of bits 803/// specified by the second 64-bit integer parameter. The lower 64 bits of 804/// result are returned. 805/// 806/// \headerfile <x86intrin.h> 807/// 808/// This intrinsic corresponds to the <c> PSLLQ </c> instruction. 809/// 810/// \param __m 811/// A 64-bit integer vector interpreted as a single 64-bit integer. 812/// \param __count 813/// A 64-bit integer vector interpreted as a single 64-bit integer. 814/// \returns A 64-bit integer vector containing the left-shifted value. If 815/// \a __count is greater or equal to 64, the result is set to 0. 816static __inline__ __m64 __DEFAULT_FN_ATTRS 817_mm_sll_si64(__m64 __m, __m64 __count) 818{ 819 return (__m64)__builtin_ia32_psllq((__v1di)__m, __count); 820} 821 822/// Left-shifts the first parameter, which is a 64-bit integer, by the 823/// number of bits specified by the second parameter, which is a 32-bit 824/// integer. The lower 64 bits of result are returned. 825/// 826/// \headerfile <x86intrin.h> 827/// 828/// This intrinsic corresponds to the <c> PSLLQ </c> instruction. 829/// 830/// \param __m 831/// A 64-bit integer vector interpreted as a single 64-bit integer. 832/// \param __count 833/// A 32-bit integer value. 834/// \returns A 64-bit integer vector containing the left-shifted value. If 835/// \a __count is greater or equal to 64, the result is set to 0. 836static __inline__ __m64 __DEFAULT_FN_ATTRS 837_mm_slli_si64(__m64 __m, int __count) 838{ 839 return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count); 840} 841 842/// Right-shifts each 16-bit integer element of the first parameter, 843/// which is a 64-bit integer vector of [4 x i16], by the number of bits 844/// specified by the second parameter, which is a 64-bit integer. 845/// 846/// High-order bits are filled with the sign bit of the initial value of each 847/// 16-bit element. The 16-bit results are packed into a 64-bit integer 848/// vector of [4 x i16]. 849/// 850/// \headerfile <x86intrin.h> 851/// 852/// This intrinsic corresponds to the <c> PSRAW </c> instruction. 853/// 854/// \param __m 855/// A 64-bit integer vector of [4 x i16]. 856/// \param __count 857/// A 64-bit integer vector interpreted as a single 64-bit integer. 858/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted 859/// values. 860static __inline__ __m64 __DEFAULT_FN_ATTRS 861_mm_sra_pi16(__m64 __m, __m64 __count) 862{ 863 return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count); 864} 865 866/// Right-shifts each 16-bit integer element of a 64-bit integer vector 867/// of [4 x i16] by the number of bits specified by a 32-bit integer. 868/// 869/// High-order bits are filled with the sign bit of the initial value of each 870/// 16-bit element. The 16-bit results are packed into a 64-bit integer 871/// vector of [4 x i16]. 872/// 873/// \headerfile <x86intrin.h> 874/// 875/// This intrinsic corresponds to the <c> PSRAW </c> instruction. 876/// 877/// \param __m 878/// A 64-bit integer vector of [4 x i16]. 879/// \param __count 880/// A 32-bit integer value. 881/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted 882/// values. 883static __inline__ __m64 __DEFAULT_FN_ATTRS 884_mm_srai_pi16(__m64 __m, int __count) 885{ 886 return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count); 887} 888 889/// Right-shifts each 32-bit integer element of the first parameter, 890/// which is a 64-bit integer vector of [2 x i32], by the number of bits 891/// specified by the second parameter, which is a 64-bit integer. 892/// 893/// High-order bits are filled with the sign bit of the initial value of each 894/// 32-bit element. The 32-bit results are packed into a 64-bit integer 895/// vector of [2 x i32]. 896/// 897/// \headerfile <x86intrin.h> 898/// 899/// This intrinsic corresponds to the <c> PSRAD </c> instruction. 900/// 901/// \param __m 902/// A 64-bit integer vector of [2 x i32]. 903/// \param __count 904/// A 64-bit integer vector interpreted as a single 64-bit integer. 905/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted 906/// values. 907static __inline__ __m64 __DEFAULT_FN_ATTRS 908_mm_sra_pi32(__m64 __m, __m64 __count) 909{ 910 return (__m64)__builtin_ia32_psrad((__v2si)__m, __count); 911} 912 913/// Right-shifts each 32-bit integer element of a 64-bit integer vector 914/// of [2 x i32] by the number of bits specified by a 32-bit integer. 915/// 916/// High-order bits are filled with the sign bit of the initial value of each 917/// 32-bit element. The 32-bit results are packed into a 64-bit integer 918/// vector of [2 x i32]. 919/// 920/// \headerfile <x86intrin.h> 921/// 922/// This intrinsic corresponds to the <c> PSRAD </c> instruction. 923/// 924/// \param __m 925/// A 64-bit integer vector of [2 x i32]. 926/// \param __count 927/// A 32-bit integer value. 928/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted 929/// values. 930static __inline__ __m64 __DEFAULT_FN_ATTRS 931_mm_srai_pi32(__m64 __m, int __count) 932{ 933 return (__m64)__builtin_ia32_psradi((__v2si)__m, __count); 934} 935 936/// Right-shifts each 16-bit integer element of the first parameter, 937/// which is a 64-bit integer vector of [4 x i16], by the number of bits 938/// specified by the second parameter, which is a 64-bit integer. 939/// 940/// High-order bits are cleared. The 16-bit results are packed into a 64-bit 941/// integer vector of [4 x i16]. 942/// 943/// \headerfile <x86intrin.h> 944/// 945/// This intrinsic corresponds to the <c> PSRLW </c> instruction. 946/// 947/// \param __m 948/// A 64-bit integer vector of [4 x i16]. 949/// \param __count 950/// A 64-bit integer vector interpreted as a single 64-bit integer. 951/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted 952/// values. 953static __inline__ __m64 __DEFAULT_FN_ATTRS 954_mm_srl_pi16(__m64 __m, __m64 __count) 955{ 956 return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count); 957} 958 959/// Right-shifts each 16-bit integer element of a 64-bit integer vector 960/// of [4 x i16] by the number of bits specified by a 32-bit integer. 961/// 962/// High-order bits are cleared. The 16-bit results are packed into a 64-bit 963/// integer vector of [4 x i16]. 964/// 965/// \headerfile <x86intrin.h> 966/// 967/// This intrinsic corresponds to the <c> PSRLW </c> instruction. 968/// 969/// \param __m 970/// A 64-bit integer vector of [4 x i16]. 971/// \param __count 972/// A 32-bit integer value. 973/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted 974/// values. 975static __inline__ __m64 __DEFAULT_FN_ATTRS 976_mm_srli_pi16(__m64 __m, int __count) 977{ 978 return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count); 979} 980 981/// Right-shifts each 32-bit integer element of the first parameter, 982/// which is a 64-bit integer vector of [2 x i32], by the number of bits 983/// specified by the second parameter, which is a 64-bit integer. 984/// 985/// High-order bits are cleared. The 32-bit results are packed into a 64-bit 986/// integer vector of [2 x i32]. 987/// 988/// \headerfile <x86intrin.h> 989/// 990/// This intrinsic corresponds to the <c> PSRLD </c> instruction. 991/// 992/// \param __m 993/// A 64-bit integer vector of [2 x i32]. 994/// \param __count 995/// A 64-bit integer vector interpreted as a single 64-bit integer. 996/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted 997/// values. 998static __inline__ __m64 __DEFAULT_FN_ATTRS 999_mm_srl_pi32(__m64 __m, __m64 __count) 1000{ 1001 return (__m64)__builtin_ia32_psrld((__v2si)__m, __count); 1002} 1003 1004/// Right-shifts each 32-bit integer element of a 64-bit integer vector 1005/// of [2 x i32] by the number of bits specified by a 32-bit integer. 1006/// 1007/// High-order bits are cleared. The 32-bit results are packed into a 64-bit 1008/// integer vector of [2 x i32]. 1009/// 1010/// \headerfile <x86intrin.h> 1011/// 1012/// This intrinsic corresponds to the <c> PSRLD </c> instruction. 1013/// 1014/// \param __m 1015/// A 64-bit integer vector of [2 x i32]. 1016/// \param __count 1017/// A 32-bit integer value. 1018/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted 1019/// values. 1020static __inline__ __m64 __DEFAULT_FN_ATTRS 1021_mm_srli_pi32(__m64 __m, int __count) 1022{ 1023 return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count); 1024} 1025 1026/// Right-shifts the first 64-bit integer parameter by the number of bits 1027/// specified by the second 64-bit integer parameter. 1028/// 1029/// High-order bits are cleared. 1030/// 1031/// \headerfile <x86intrin.h> 1032/// 1033/// This intrinsic corresponds to the <c> PSRLQ </c> instruction. 1034/// 1035/// \param __m 1036/// A 64-bit integer vector interpreted as a single 64-bit integer. 1037/// \param __count 1038/// A 64-bit integer vector interpreted as a single 64-bit integer. 1039/// \returns A 64-bit integer vector containing the right-shifted value. 1040static __inline__ __m64 __DEFAULT_FN_ATTRS 1041_mm_srl_si64(__m64 __m, __m64 __count) 1042{ 1043 return (__m64)__builtin_ia32_psrlq((__v1di)__m, __count); 1044} 1045 1046/// Right-shifts the first parameter, which is a 64-bit integer, by the 1047/// number of bits specified by the second parameter, which is a 32-bit 1048/// integer. 1049/// 1050/// High-order bits are cleared. 1051/// 1052/// \headerfile <x86intrin.h> 1053/// 1054/// This intrinsic corresponds to the <c> PSRLQ </c> instruction. 1055/// 1056/// \param __m 1057/// A 64-bit integer vector interpreted as a single 64-bit integer. 1058/// \param __count 1059/// A 32-bit integer value. 1060/// \returns A 64-bit integer vector containing the right-shifted value. 1061static __inline__ __m64 __DEFAULT_FN_ATTRS 1062_mm_srli_si64(__m64 __m, int __count) 1063{ 1064 return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count); 1065} 1066 1067/// Performs a bitwise AND of two 64-bit integer vectors. 1068/// 1069/// \headerfile <x86intrin.h> 1070/// 1071/// This intrinsic corresponds to the <c> PAND </c> instruction. 1072/// 1073/// \param __m1 1074/// A 64-bit integer vector. 1075/// \param __m2 1076/// A 64-bit integer vector. 1077/// \returns A 64-bit integer vector containing the bitwise AND of both 1078/// parameters. 1079static __inline__ __m64 __DEFAULT_FN_ATTRS 1080_mm_and_si64(__m64 __m1, __m64 __m2) 1081{ 1082 return __builtin_ia32_pand((__v1di)__m1, (__v1di)__m2); 1083} 1084 1085/// Performs a bitwise NOT of the first 64-bit integer vector, and then 1086/// performs a bitwise AND of the intermediate result and the second 64-bit 1087/// integer vector. 1088/// 1089/// \headerfile <x86intrin.h> 1090/// 1091/// This intrinsic corresponds to the <c> PANDN </c> instruction. 1092/// 1093/// \param __m1 1094/// A 64-bit integer vector. The one's complement of this parameter is used 1095/// in the bitwise AND. 1096/// \param __m2 1097/// A 64-bit integer vector. 1098/// \returns A 64-bit integer vector containing the bitwise AND of the second 1099/// parameter and the one's complement of the first parameter. 1100static __inline__ __m64 __DEFAULT_FN_ATTRS 1101_mm_andnot_si64(__m64 __m1, __m64 __m2) 1102{ 1103 return __builtin_ia32_pandn((__v1di)__m1, (__v1di)__m2); 1104} 1105 1106/// Performs a bitwise OR of two 64-bit integer vectors. 1107/// 1108/// \headerfile <x86intrin.h> 1109/// 1110/// This intrinsic corresponds to the <c> POR </c> instruction. 1111/// 1112/// \param __m1 1113/// A 64-bit integer vector. 1114/// \param __m2 1115/// A 64-bit integer vector. 1116/// \returns A 64-bit integer vector containing the bitwise OR of both 1117/// parameters. 1118static __inline__ __m64 __DEFAULT_FN_ATTRS 1119_mm_or_si64(__m64 __m1, __m64 __m2) 1120{ 1121 return __builtin_ia32_por((__v1di)__m1, (__v1di)__m2); 1122} 1123 1124/// Performs a bitwise exclusive OR of two 64-bit integer vectors. 1125/// 1126/// \headerfile <x86intrin.h> 1127/// 1128/// This intrinsic corresponds to the <c> PXOR </c> instruction. 1129/// 1130/// \param __m1 1131/// A 64-bit integer vector. 1132/// \param __m2 1133/// A 64-bit integer vector. 1134/// \returns A 64-bit integer vector containing the bitwise exclusive OR of both 1135/// parameters. 1136static __inline__ __m64 __DEFAULT_FN_ATTRS 1137_mm_xor_si64(__m64 __m1, __m64 __m2) 1138{ 1139 return __builtin_ia32_pxor((__v1di)__m1, (__v1di)__m2); 1140} 1141 1142/// Compares the 8-bit integer elements of two 64-bit integer vectors of 1143/// [8 x i8] to determine if the element of the first vector is equal to the 1144/// corresponding element of the second vector. 1145/// 1146/// The comparison yields 0 for false, 0xFF for true. 1147/// 1148/// \headerfile <x86intrin.h> 1149/// 1150/// This intrinsic corresponds to the <c> PCMPEQB </c> instruction. 1151/// 1152/// \param __m1 1153/// A 64-bit integer vector of [8 x i8]. 1154/// \param __m2 1155/// A 64-bit integer vector of [8 x i8]. 1156/// \returns A 64-bit integer vector of [8 x i8] containing the comparison 1157/// results. 1158static __inline__ __m64 __DEFAULT_FN_ATTRS 1159_mm_cmpeq_pi8(__m64 __m1, __m64 __m2) 1160{ 1161 return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2); 1162} 1163 1164/// Compares the 16-bit integer elements of two 64-bit integer vectors of 1165/// [4 x i16] to determine if the element of the first vector is equal to the 1166/// corresponding element of the second vector. 1167/// 1168/// The comparison yields 0 for false, 0xFFFF for true. 1169/// 1170/// \headerfile <x86intrin.h> 1171/// 1172/// This intrinsic corresponds to the <c> PCMPEQW </c> instruction. 1173/// 1174/// \param __m1 1175/// A 64-bit integer vector of [4 x i16]. 1176/// \param __m2 1177/// A 64-bit integer vector of [4 x i16]. 1178/// \returns A 64-bit integer vector of [4 x i16] containing the comparison 1179/// results. 1180static __inline__ __m64 __DEFAULT_FN_ATTRS 1181_mm_cmpeq_pi16(__m64 __m1, __m64 __m2) 1182{ 1183 return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2); 1184} 1185 1186/// Compares the 32-bit integer elements of two 64-bit integer vectors of 1187/// [2 x i32] to determine if the element of the first vector is equal to the 1188/// corresponding element of the second vector. 1189/// 1190/// The comparison yields 0 for false, 0xFFFFFFFF for true. 1191/// 1192/// \headerfile <x86intrin.h> 1193/// 1194/// This intrinsic corresponds to the <c> PCMPEQD </c> instruction. 1195/// 1196/// \param __m1 1197/// A 64-bit integer vector of [2 x i32]. 1198/// \param __m2 1199/// A 64-bit integer vector of [2 x i32]. 1200/// \returns A 64-bit integer vector of [2 x i32] containing the comparison 1201/// results. 1202static __inline__ __m64 __DEFAULT_FN_ATTRS 1203_mm_cmpeq_pi32(__m64 __m1, __m64 __m2) 1204{ 1205 return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2); 1206} 1207 1208/// Compares the 8-bit integer elements of two 64-bit integer vectors of 1209/// [8 x i8] to determine if the element of the first vector is greater than 1210/// the corresponding element of the second vector. 1211/// 1212/// The comparison yields 0 for false, 0xFF for true. 1213/// 1214/// \headerfile <x86intrin.h> 1215/// 1216/// This intrinsic corresponds to the <c> PCMPGTB </c> instruction. 1217/// 1218/// \param __m1 1219/// A 64-bit integer vector of [8 x i8]. 1220/// \param __m2 1221/// A 64-bit integer vector of [8 x i8]. 1222/// \returns A 64-bit integer vector of [8 x i8] containing the comparison 1223/// results. 1224static __inline__ __m64 __DEFAULT_FN_ATTRS 1225_mm_cmpgt_pi8(__m64 __m1, __m64 __m2) 1226{ 1227 return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2); 1228} 1229 1230/// Compares the 16-bit integer elements of two 64-bit integer vectors of 1231/// [4 x i16] to determine if the element of the first vector is greater than 1232/// the corresponding element of the second vector. 1233/// 1234/// The comparison yields 0 for false, 0xFFFF for true. 1235/// 1236/// \headerfile <x86intrin.h> 1237/// 1238/// This intrinsic corresponds to the <c> PCMPGTW </c> instruction. 1239/// 1240/// \param __m1 1241/// A 64-bit integer vector of [4 x i16]. 1242/// \param __m2 1243/// A 64-bit integer vector of [4 x i16]. 1244/// \returns A 64-bit integer vector of [4 x i16] containing the comparison 1245/// results. 1246static __inline__ __m64 __DEFAULT_FN_ATTRS 1247_mm_cmpgt_pi16(__m64 __m1, __m64 __m2) 1248{ 1249 return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2); 1250} 1251 1252/// Compares the 32-bit integer elements of two 64-bit integer vectors of 1253/// [2 x i32] to determine if the element of the first vector is greater than 1254/// the corresponding element of the second vector. 1255/// 1256/// The comparison yields 0 for false, 0xFFFFFFFF for true. 1257/// 1258/// \headerfile <x86intrin.h> 1259/// 1260/// This intrinsic corresponds to the <c> PCMPGTD </c> instruction. 1261/// 1262/// \param __m1 1263/// A 64-bit integer vector of [2 x i32]. 1264/// \param __m2 1265/// A 64-bit integer vector of [2 x i32]. 1266/// \returns A 64-bit integer vector of [2 x i32] containing the comparison 1267/// results. 1268static __inline__ __m64 __DEFAULT_FN_ATTRS 1269_mm_cmpgt_pi32(__m64 __m1, __m64 __m2) 1270{ 1271 return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2); 1272} 1273 1274/// Constructs a 64-bit integer vector initialized to zero. 1275/// 1276/// \headerfile <x86intrin.h> 1277/// 1278/// This intrinsic corresponds to the <c> PXOR </c> instruction. 1279/// 1280/// \returns An initialized 64-bit integer vector with all elements set to zero. 1281static __inline__ __m64 __DEFAULT_FN_ATTRS 1282_mm_setzero_si64(void) 1283{ 1284 return __extension__ (__m64){ 0LL }; 1285} 1286 1287/// Constructs a 64-bit integer vector initialized with the specified 1288/// 32-bit integer values. 1289/// 1290/// \headerfile <x86intrin.h> 1291/// 1292/// This intrinsic is a utility function and does not correspond to a specific 1293/// instruction. 1294/// 1295/// \param __i1 1296/// A 32-bit integer value used to initialize the upper 32 bits of the 1297/// result. 1298/// \param __i0 1299/// A 32-bit integer value used to initialize the lower 32 bits of the 1300/// result. 1301/// \returns An initialized 64-bit integer vector. 1302static __inline__ __m64 __DEFAULT_FN_ATTRS 1303_mm_set_pi32(int __i1, int __i0) 1304{ 1305 return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1); 1306} 1307 1308/// Constructs a 64-bit integer vector initialized with the specified 1309/// 16-bit integer values. 1310/// 1311/// \headerfile <x86intrin.h> 1312/// 1313/// This intrinsic is a utility function and does not correspond to a specific 1314/// instruction. 1315/// 1316/// \param __s3 1317/// A 16-bit integer value used to initialize bits [63:48] of the result. 1318/// \param __s2 1319/// A 16-bit integer value used to initialize bits [47:32] of the result. 1320/// \param __s1 1321/// A 16-bit integer value used to initialize bits [31:16] of the result. 1322/// \param __s0 1323/// A 16-bit integer value used to initialize bits [15:0] of the result. 1324/// \returns An initialized 64-bit integer vector. 1325static __inline__ __m64 __DEFAULT_FN_ATTRS 1326_mm_set_pi16(short __s3, short __s2, short __s1, short __s0) 1327{ 1328 return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3); 1329} 1330 1331/// Constructs a 64-bit integer vector initialized with the specified 1332/// 8-bit integer values. 1333/// 1334/// \headerfile <x86intrin.h> 1335/// 1336/// This intrinsic is a utility function and does not correspond to a specific 1337/// instruction. 1338/// 1339/// \param __b7 1340/// An 8-bit integer value used to initialize bits [63:56] of the result. 1341/// \param __b6 1342/// An 8-bit integer value used to initialize bits [55:48] of the result. 1343/// \param __b5 1344/// An 8-bit integer value used to initialize bits [47:40] of the result. 1345/// \param __b4 1346/// An 8-bit integer value used to initialize bits [39:32] of the result. 1347/// \param __b3 1348/// An 8-bit integer value used to initialize bits [31:24] of the result. 1349/// \param __b2 1350/// An 8-bit integer value used to initialize bits [23:16] of the result. 1351/// \param __b1 1352/// An 8-bit integer value used to initialize bits [15:8] of the result. 1353/// \param __b0 1354/// An 8-bit integer value used to initialize bits [7:0] of the result. 1355/// \returns An initialized 64-bit integer vector. 1356static __inline__ __m64 __DEFAULT_FN_ATTRS 1357_mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, 1358 char __b1, char __b0) 1359{ 1360 return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3, 1361 __b4, __b5, __b6, __b7); 1362} 1363 1364/// Constructs a 64-bit integer vector of [2 x i32], with each of the 1365/// 32-bit integer vector elements set to the specified 32-bit integer 1366/// value. 1367/// 1368/// \headerfile <x86intrin.h> 1369/// 1370/// This intrinsic is a utility function and does not correspond to a specific 1371/// instruction. 1372/// 1373/// \param __i 1374/// A 32-bit integer value used to initialize each vector element of the 1375/// result. 1376/// \returns An initialized 64-bit integer vector of [2 x i32]. 1377static __inline__ __m64 __DEFAULT_FN_ATTRS 1378_mm_set1_pi32(int __i) 1379{ 1380 return _mm_set_pi32(__i, __i); 1381} 1382 1383/// Constructs a 64-bit integer vector of [4 x i16], with each of the 1384/// 16-bit integer vector elements set to the specified 16-bit integer 1385/// value. 1386/// 1387/// \headerfile <x86intrin.h> 1388/// 1389/// This intrinsic is a utility function and does not correspond to a specific 1390/// instruction. 1391/// 1392/// \param __w 1393/// A 16-bit integer value used to initialize each vector element of the 1394/// result. 1395/// \returns An initialized 64-bit integer vector of [4 x i16]. 1396static __inline__ __m64 __DEFAULT_FN_ATTRS 1397_mm_set1_pi16(short __w) 1398{ 1399 return _mm_set_pi16(__w, __w, __w, __w); 1400} 1401 1402/// Constructs a 64-bit integer vector of [8 x i8], with each of the 1403/// 8-bit integer vector elements set to the specified 8-bit integer value. 1404/// 1405/// \headerfile <x86intrin.h> 1406/// 1407/// This intrinsic is a utility function and does not correspond to a specific 1408/// instruction. 1409/// 1410/// \param __b 1411/// An 8-bit integer value used to initialize each vector element of the 1412/// result. 1413/// \returns An initialized 64-bit integer vector of [8 x i8]. 1414static __inline__ __m64 __DEFAULT_FN_ATTRS 1415_mm_set1_pi8(char __b) 1416{ 1417 return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b); 1418} 1419 1420/// Constructs a 64-bit integer vector, initialized in reverse order with 1421/// the specified 32-bit integer values. 1422/// 1423/// \headerfile <x86intrin.h> 1424/// 1425/// This intrinsic is a utility function and does not correspond to a specific 1426/// instruction. 1427/// 1428/// \param __i0 1429/// A 32-bit integer value used to initialize the lower 32 bits of the 1430/// result. 1431/// \param __i1 1432/// A 32-bit integer value used to initialize the upper 32 bits of the 1433/// result. 1434/// \returns An initialized 64-bit integer vector. 1435static __inline__ __m64 __DEFAULT_FN_ATTRS 1436_mm_setr_pi32(int __i0, int __i1) 1437{ 1438 return _mm_set_pi32(__i1, __i0); 1439} 1440 1441/// Constructs a 64-bit integer vector, initialized in reverse order with 1442/// the specified 16-bit integer values. 1443/// 1444/// \headerfile <x86intrin.h> 1445/// 1446/// This intrinsic is a utility function and does not correspond to a specific 1447/// instruction. 1448/// 1449/// \param __w0 1450/// A 16-bit integer value used to initialize bits [15:0] of the result. 1451/// \param __w1 1452/// A 16-bit integer value used to initialize bits [31:16] of the result. 1453/// \param __w2 1454/// A 16-bit integer value used to initialize bits [47:32] of the result. 1455/// \param __w3 1456/// A 16-bit integer value used to initialize bits [63:48] of the result. 1457/// \returns An initialized 64-bit integer vector. 1458static __inline__ __m64 __DEFAULT_FN_ATTRS 1459_mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) 1460{ 1461 return _mm_set_pi16(__w3, __w2, __w1, __w0); 1462} 1463 1464/// Constructs a 64-bit integer vector, initialized in reverse order with 1465/// the specified 8-bit integer values. 1466/// 1467/// \headerfile <x86intrin.h> 1468/// 1469/// This intrinsic is a utility function and does not correspond to a specific 1470/// instruction. 1471/// 1472/// \param __b0 1473/// An 8-bit integer value used to initialize bits [7:0] of the result. 1474/// \param __b1 1475/// An 8-bit integer value used to initialize bits [15:8] of the result. 1476/// \param __b2 1477/// An 8-bit integer value used to initialize bits [23:16] of the result. 1478/// \param __b3 1479/// An 8-bit integer value used to initialize bits [31:24] of the result. 1480/// \param __b4 1481/// An 8-bit integer value used to initialize bits [39:32] of the result. 1482/// \param __b5 1483/// An 8-bit integer value used to initialize bits [47:40] of the result. 1484/// \param __b6 1485/// An 8-bit integer value used to initialize bits [55:48] of the result. 1486/// \param __b7 1487/// An 8-bit integer value used to initialize bits [63:56] of the result. 1488/// \returns An initialized 64-bit integer vector. 1489static __inline__ __m64 __DEFAULT_FN_ATTRS 1490_mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, 1491 char __b6, char __b7) 1492{ 1493 return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); 1494} 1495 1496#undef __DEFAULT_FN_ATTRS 1497 1498/* Aliases for compatibility. */ 1499#define _m_empty _mm_empty 1500#define _m_from_int _mm_cvtsi32_si64 1501#define _m_from_int64 _mm_cvtsi64_m64 1502#define _m_to_int _mm_cvtsi64_si32 1503#define _m_to_int64 _mm_cvtm64_si64 1504#define _m_packsswb _mm_packs_pi16 1505#define _m_packssdw _mm_packs_pi32 1506#define _m_packuswb _mm_packs_pu16 1507#define _m_punpckhbw _mm_unpackhi_pi8 1508#define _m_punpckhwd _mm_unpackhi_pi16 1509#define _m_punpckhdq _mm_unpackhi_pi32 1510#define _m_punpcklbw _mm_unpacklo_pi8 1511#define _m_punpcklwd _mm_unpacklo_pi16 1512#define _m_punpckldq _mm_unpacklo_pi32 1513#define _m_paddb _mm_add_pi8 1514#define _m_paddw _mm_add_pi16 1515#define _m_paddd _mm_add_pi32 1516#define _m_paddsb _mm_adds_pi8 1517#define _m_paddsw _mm_adds_pi16 1518#define _m_paddusb _mm_adds_pu8 1519#define _m_paddusw _mm_adds_pu16 1520#define _m_psubb _mm_sub_pi8 1521#define _m_psubw _mm_sub_pi16 1522#define _m_psubd _mm_sub_pi32 1523#define _m_psubsb _mm_subs_pi8 1524#define _m_psubsw _mm_subs_pi16 1525#define _m_psubusb _mm_subs_pu8 1526#define _m_psubusw _mm_subs_pu16 1527#define _m_pmaddwd _mm_madd_pi16 1528#define _m_pmulhw _mm_mulhi_pi16 1529#define _m_pmullw _mm_mullo_pi16 1530#define _m_psllw _mm_sll_pi16 1531#define _m_psllwi _mm_slli_pi16 1532#define _m_pslld _mm_sll_pi32 1533#define _m_pslldi _mm_slli_pi32 1534#define _m_psllq _mm_sll_si64 1535#define _m_psllqi _mm_slli_si64 1536#define _m_psraw _mm_sra_pi16 1537#define _m_psrawi _mm_srai_pi16 1538#define _m_psrad _mm_sra_pi32 1539#define _m_psradi _mm_srai_pi32 1540#define _m_psrlw _mm_srl_pi16 1541#define _m_psrlwi _mm_srli_pi16 1542#define _m_psrld _mm_srl_pi32 1543#define _m_psrldi _mm_srli_pi32 1544#define _m_psrlq _mm_srl_si64 1545#define _m_psrlqi _mm_srli_si64 1546#define _m_pand _mm_and_si64 1547#define _m_pandn _mm_andnot_si64 1548#define _m_por _mm_or_si64 1549#define _m_pxor _mm_xor_si64 1550#define _m_pcmpeqb _mm_cmpeq_pi8 1551#define _m_pcmpeqw _mm_cmpeq_pi16 1552#define _m_pcmpeqd _mm_cmpeq_pi32 1553#define _m_pcmpgtb _mm_cmpgt_pi8 1554#define _m_pcmpgtw _mm_cmpgt_pi16 1555#define _m_pcmpgtd _mm_cmpgt_pi32 1556 1557#endif /* __MMINTRIN_H */ 1558 1559