1/*===---- mmintrin.h - MMX intrinsics --------------------------------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 10#ifndef __MMINTRIN_H 11#define __MMINTRIN_H 12 13#if !defined(__i386__) && !defined(__x86_64__) 14#error "This header is only meant to be used on x86 and x64 architecture" 15#endif 16 17typedef long long __m64 __attribute__((__vector_size__(8), __aligned__(8))); 18 19typedef long long __v1di __attribute__((__vector_size__(8))); 20typedef int __v2si __attribute__((__vector_size__(8))); 21typedef short __v4hi __attribute__((__vector_size__(8))); 22typedef char __v8qi __attribute__((__vector_size__(8))); 23 24/* Define the default attributes for the functions in this file. */ 25#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx"), __min_vector_width__(64))) 26 27/// Clears the MMX state by setting the state of the x87 stack registers 28/// to empty. 29/// 30/// \headerfile <x86intrin.h> 31/// 32/// This intrinsic corresponds to the <c> EMMS </c> instruction. 33/// 34static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("mmx"))) 35_mm_empty(void) 36{ 37 __builtin_ia32_emms(); 38} 39 40/// Constructs a 64-bit integer vector, setting the lower 32 bits to the 41/// value of the 32-bit integer parameter and setting the upper 32 bits to 0. 42/// 43/// \headerfile <x86intrin.h> 44/// 45/// This intrinsic corresponds to the <c> MOVD </c> instruction. 46/// 47/// \param __i 48/// A 32-bit integer value. 49/// \returns A 64-bit integer vector. The lower 32 bits contain the value of the 50/// parameter. The upper 32 bits are set to 0. 51static __inline__ __m64 __DEFAULT_FN_ATTRS 52_mm_cvtsi32_si64(int __i) 53{ 54 return (__m64)__builtin_ia32_vec_init_v2si(__i, 0); 55} 56 57/// Returns the lower 32 bits of a 64-bit integer vector as a 32-bit 58/// signed integer. 59/// 60/// \headerfile <x86intrin.h> 61/// 62/// This intrinsic corresponds to the <c> MOVD </c> instruction. 63/// 64/// \param __m 65/// A 64-bit integer vector. 66/// \returns A 32-bit signed integer value containing the lower 32 bits of the 67/// parameter. 68static __inline__ int __DEFAULT_FN_ATTRS 69_mm_cvtsi64_si32(__m64 __m) 70{ 71 return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0); 72} 73 74/// Casts a 64-bit signed integer value into a 64-bit integer vector. 75/// 76/// \headerfile <x86intrin.h> 77/// 78/// This intrinsic corresponds to the <c> MOVQ </c> instruction. 79/// 80/// \param __i 81/// A 64-bit signed integer. 82/// \returns A 64-bit integer vector containing the same bitwise pattern as the 83/// parameter. 84static __inline__ __m64 __DEFAULT_FN_ATTRS 85_mm_cvtsi64_m64(long long __i) 86{ 87 return (__m64)__i; 88} 89 90/// Casts a 64-bit integer vector into a 64-bit signed integer value. 91/// 92/// \headerfile <x86intrin.h> 93/// 94/// This intrinsic corresponds to the <c> MOVQ </c> instruction. 95/// 96/// \param __m 97/// A 64-bit integer vector. 98/// \returns A 64-bit signed integer containing the same bitwise pattern as the 99/// parameter. 100static __inline__ long long __DEFAULT_FN_ATTRS 101_mm_cvtm64_si64(__m64 __m) 102{ 103 return (long long)__m; 104} 105 106/// Converts 16-bit signed integers from both 64-bit integer vector 107/// parameters of [4 x i16] into 8-bit signed integer values, and constructs 108/// a 64-bit integer vector of [8 x i8] as the result. Positive values 109/// greater than 0x7F are saturated to 0x7F. Negative values less than 0x80 110/// are saturated to 0x80. 111/// 112/// \headerfile <x86intrin.h> 113/// 114/// This intrinsic corresponds to the <c> PACKSSWB </c> instruction. 115/// 116/// \param __m1 117/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a 118/// 16-bit signed integer and is converted to an 8-bit signed integer with 119/// saturation. Positive values greater than 0x7F are saturated to 0x7F. 120/// Negative values less than 0x80 are saturated to 0x80. The converted 121/// [4 x i8] values are written to the lower 32 bits of the result. 122/// \param __m2 123/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a 124/// 16-bit signed integer and is converted to an 8-bit signed integer with 125/// saturation. Positive values greater than 0x7F are saturated to 0x7F. 126/// Negative values less than 0x80 are saturated to 0x80. The converted 127/// [4 x i8] values are written to the upper 32 bits of the result. 128/// \returns A 64-bit integer vector of [8 x i8] containing the converted 129/// values. 130static __inline__ __m64 __DEFAULT_FN_ATTRS 131_mm_packs_pi16(__m64 __m1, __m64 __m2) 132{ 133 return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2); 134} 135 136/// Converts 32-bit signed integers from both 64-bit integer vector 137/// parameters of [2 x i32] into 16-bit signed integer values, and constructs 138/// a 64-bit integer vector of [4 x i16] as the result. Positive values 139/// greater than 0x7FFF are saturated to 0x7FFF. Negative values less than 140/// 0x8000 are saturated to 0x8000. 141/// 142/// \headerfile <x86intrin.h> 143/// 144/// This intrinsic corresponds to the <c> PACKSSDW </c> instruction. 145/// 146/// \param __m1 147/// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a 148/// 32-bit signed integer and is converted to a 16-bit signed integer with 149/// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF. 150/// Negative values less than 0x8000 are saturated to 0x8000. The converted 151/// [2 x i16] values are written to the lower 32 bits of the result. 152/// \param __m2 153/// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a 154/// 32-bit signed integer and is converted to a 16-bit signed integer with 155/// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF. 156/// Negative values less than 0x8000 are saturated to 0x8000. The converted 157/// [2 x i16] values are written to the upper 32 bits of the result. 158/// \returns A 64-bit integer vector of [4 x i16] containing the converted 159/// values. 160static __inline__ __m64 __DEFAULT_FN_ATTRS 161_mm_packs_pi32(__m64 __m1, __m64 __m2) 162{ 163 return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2); 164} 165 166/// Converts 16-bit signed integers from both 64-bit integer vector 167/// parameters of [4 x i16] into 8-bit unsigned integer values, and 168/// constructs a 64-bit integer vector of [8 x i8] as the result. Values 169/// greater than 0xFF are saturated to 0xFF. Values less than 0 are saturated 170/// to 0. 171/// 172/// \headerfile <x86intrin.h> 173/// 174/// This intrinsic corresponds to the <c> PACKUSWB </c> instruction. 175/// 176/// \param __m1 177/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a 178/// 16-bit signed integer and is converted to an 8-bit unsigned integer with 179/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less 180/// than 0 are saturated to 0. The converted [4 x i8] values are written to 181/// the lower 32 bits of the result. 182/// \param __m2 183/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a 184/// 16-bit signed integer and is converted to an 8-bit unsigned integer with 185/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less 186/// than 0 are saturated to 0. The converted [4 x i8] values are written to 187/// the upper 32 bits of the result. 188/// \returns A 64-bit integer vector of [8 x i8] containing the converted 189/// values. 190static __inline__ __m64 __DEFAULT_FN_ATTRS 191_mm_packs_pu16(__m64 __m1, __m64 __m2) 192{ 193 return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2); 194} 195 196/// Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8] 197/// and interleaves them into a 64-bit integer vector of [8 x i8]. 198/// 199/// \headerfile <x86intrin.h> 200/// 201/// This intrinsic corresponds to the <c> PUNPCKHBW </c> instruction. 202/// 203/// \param __m1 204/// A 64-bit integer vector of [8 x i8]. \n 205/// Bits [39:32] are written to bits [7:0] of the result. \n 206/// Bits [47:40] are written to bits [23:16] of the result. \n 207/// Bits [55:48] are written to bits [39:32] of the result. \n 208/// Bits [63:56] are written to bits [55:48] of the result. 209/// \param __m2 210/// A 64-bit integer vector of [8 x i8]. 211/// Bits [39:32] are written to bits [15:8] of the result. \n 212/// Bits [47:40] are written to bits [31:24] of the result. \n 213/// Bits [55:48] are written to bits [47:40] of the result. \n 214/// Bits [63:56] are written to bits [63:56] of the result. 215/// \returns A 64-bit integer vector of [8 x i8] containing the interleaved 216/// values. 217static __inline__ __m64 __DEFAULT_FN_ATTRS 218_mm_unpackhi_pi8(__m64 __m1, __m64 __m2) 219{ 220 return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2); 221} 222 223/// Unpacks the upper 32 bits from two 64-bit integer vectors of 224/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16]. 225/// 226/// \headerfile <x86intrin.h> 227/// 228/// This intrinsic corresponds to the <c> PUNPCKHWD </c> instruction. 229/// 230/// \param __m1 231/// A 64-bit integer vector of [4 x i16]. 232/// Bits [47:32] are written to bits [15:0] of the result. \n 233/// Bits [63:48] are written to bits [47:32] of the result. 234/// \param __m2 235/// A 64-bit integer vector of [4 x i16]. 236/// Bits [47:32] are written to bits [31:16] of the result. \n 237/// Bits [63:48] are written to bits [63:48] of the result. 238/// \returns A 64-bit integer vector of [4 x i16] containing the interleaved 239/// values. 240static __inline__ __m64 __DEFAULT_FN_ATTRS 241_mm_unpackhi_pi16(__m64 __m1, __m64 __m2) 242{ 243 return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2); 244} 245 246/// Unpacks the upper 32 bits from two 64-bit integer vectors of 247/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32]. 248/// 249/// \headerfile <x86intrin.h> 250/// 251/// This intrinsic corresponds to the <c> PUNPCKHDQ </c> instruction. 252/// 253/// \param __m1 254/// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to 255/// the lower 32 bits of the result. 256/// \param __m2 257/// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to 258/// the upper 32 bits of the result. 259/// \returns A 64-bit integer vector of [2 x i32] containing the interleaved 260/// values. 261static __inline__ __m64 __DEFAULT_FN_ATTRS 262_mm_unpackhi_pi32(__m64 __m1, __m64 __m2) 263{ 264 return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2); 265} 266 267/// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8] 268/// and interleaves them into a 64-bit integer vector of [8 x i8]. 269/// 270/// \headerfile <x86intrin.h> 271/// 272/// This intrinsic corresponds to the <c> PUNPCKLBW </c> instruction. 273/// 274/// \param __m1 275/// A 64-bit integer vector of [8 x i8]. 276/// Bits [7:0] are written to bits [7:0] of the result. \n 277/// Bits [15:8] are written to bits [23:16] of the result. \n 278/// Bits [23:16] are written to bits [39:32] of the result. \n 279/// Bits [31:24] are written to bits [55:48] of the result. 280/// \param __m2 281/// A 64-bit integer vector of [8 x i8]. 282/// Bits [7:0] are written to bits [15:8] of the result. \n 283/// Bits [15:8] are written to bits [31:24] of the result. \n 284/// Bits [23:16] are written to bits [47:40] of the result. \n 285/// Bits [31:24] are written to bits [63:56] of the result. 286/// \returns A 64-bit integer vector of [8 x i8] containing the interleaved 287/// values. 288static __inline__ __m64 __DEFAULT_FN_ATTRS 289_mm_unpacklo_pi8(__m64 __m1, __m64 __m2) 290{ 291 return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2); 292} 293 294/// Unpacks the lower 32 bits from two 64-bit integer vectors of 295/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16]. 296/// 297/// \headerfile <x86intrin.h> 298/// 299/// This intrinsic corresponds to the <c> PUNPCKLWD </c> instruction. 300/// 301/// \param __m1 302/// A 64-bit integer vector of [4 x i16]. 303/// Bits [15:0] are written to bits [15:0] of the result. \n 304/// Bits [31:16] are written to bits [47:32] of the result. 305/// \param __m2 306/// A 64-bit integer vector of [4 x i16]. 307/// Bits [15:0] are written to bits [31:16] of the result. \n 308/// Bits [31:16] are written to bits [63:48] of the result. 309/// \returns A 64-bit integer vector of [4 x i16] containing the interleaved 310/// values. 311static __inline__ __m64 __DEFAULT_FN_ATTRS 312_mm_unpacklo_pi16(__m64 __m1, __m64 __m2) 313{ 314 return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2); 315} 316 317/// Unpacks the lower 32 bits from two 64-bit integer vectors of 318/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32]. 319/// 320/// \headerfile <x86intrin.h> 321/// 322/// This intrinsic corresponds to the <c> PUNPCKLDQ </c> instruction. 323/// 324/// \param __m1 325/// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to 326/// the lower 32 bits of the result. 327/// \param __m2 328/// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to 329/// the upper 32 bits of the result. 330/// \returns A 64-bit integer vector of [2 x i32] containing the interleaved 331/// values. 332static __inline__ __m64 __DEFAULT_FN_ATTRS 333_mm_unpacklo_pi32(__m64 __m1, __m64 __m2) 334{ 335 return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2); 336} 337 338/// Adds each 8-bit integer element of the first 64-bit integer vector 339/// of [8 x i8] to the corresponding 8-bit integer element of the second 340/// 64-bit integer vector of [8 x i8]. The lower 8 bits of the results are 341/// packed into a 64-bit integer vector of [8 x i8]. 342/// 343/// \headerfile <x86intrin.h> 344/// 345/// This intrinsic corresponds to the <c> PADDB </c> instruction. 346/// 347/// \param __m1 348/// A 64-bit integer vector of [8 x i8]. 349/// \param __m2 350/// A 64-bit integer vector of [8 x i8]. 351/// \returns A 64-bit integer vector of [8 x i8] containing the sums of both 352/// parameters. 353static __inline__ __m64 __DEFAULT_FN_ATTRS 354_mm_add_pi8(__m64 __m1, __m64 __m2) 355{ 356 return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2); 357} 358 359/// Adds each 16-bit integer element of the first 64-bit integer vector 360/// of [4 x i16] to the corresponding 16-bit integer element of the second 361/// 64-bit integer vector of [4 x i16]. The lower 16 bits of the results are 362/// packed into a 64-bit integer vector of [4 x i16]. 363/// 364/// \headerfile <x86intrin.h> 365/// 366/// This intrinsic corresponds to the <c> PADDW </c> instruction. 367/// 368/// \param __m1 369/// A 64-bit integer vector of [4 x i16]. 370/// \param __m2 371/// A 64-bit integer vector of [4 x i16]. 372/// \returns A 64-bit integer vector of [4 x i16] containing the sums of both 373/// parameters. 374static __inline__ __m64 __DEFAULT_FN_ATTRS 375_mm_add_pi16(__m64 __m1, __m64 __m2) 376{ 377 return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2); 378} 379 380/// Adds each 32-bit integer element of the first 64-bit integer vector 381/// of [2 x i32] to the corresponding 32-bit integer element of the second 382/// 64-bit integer vector of [2 x i32]. The lower 32 bits of the results are 383/// packed into a 64-bit integer vector of [2 x i32]. 384/// 385/// \headerfile <x86intrin.h> 386/// 387/// This intrinsic corresponds to the <c> PADDD </c> instruction. 388/// 389/// \param __m1 390/// A 64-bit integer vector of [2 x i32]. 391/// \param __m2 392/// A 64-bit integer vector of [2 x i32]. 393/// \returns A 64-bit integer vector of [2 x i32] containing the sums of both 394/// parameters. 395static __inline__ __m64 __DEFAULT_FN_ATTRS 396_mm_add_pi32(__m64 __m1, __m64 __m2) 397{ 398 return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2); 399} 400 401/// Adds each 8-bit signed integer element of the first 64-bit integer 402/// vector of [8 x i8] to the corresponding 8-bit signed integer element of 403/// the second 64-bit integer vector of [8 x i8]. Positive sums greater than 404/// 0x7F are saturated to 0x7F. Negative sums less than 0x80 are saturated to 405/// 0x80. The results are packed into a 64-bit integer vector of [8 x i8]. 406/// 407/// \headerfile <x86intrin.h> 408/// 409/// This intrinsic corresponds to the <c> PADDSB </c> instruction. 410/// 411/// \param __m1 412/// A 64-bit integer vector of [8 x i8]. 413/// \param __m2 414/// A 64-bit integer vector of [8 x i8]. 415/// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums 416/// of both parameters. 417static __inline__ __m64 __DEFAULT_FN_ATTRS 418_mm_adds_pi8(__m64 __m1, __m64 __m2) 419{ 420 return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2); 421} 422 423/// Adds each 16-bit signed integer element of the first 64-bit integer 424/// vector of [4 x i16] to the corresponding 16-bit signed integer element of 425/// the second 64-bit integer vector of [4 x i16]. Positive sums greater than 426/// 0x7FFF are saturated to 0x7FFF. Negative sums less than 0x8000 are 427/// saturated to 0x8000. The results are packed into a 64-bit integer vector 428/// of [4 x i16]. 429/// 430/// \headerfile <x86intrin.h> 431/// 432/// This intrinsic corresponds to the <c> PADDSW </c> instruction. 433/// 434/// \param __m1 435/// A 64-bit integer vector of [4 x i16]. 436/// \param __m2 437/// A 64-bit integer vector of [4 x i16]. 438/// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums 439/// of both parameters. 440static __inline__ __m64 __DEFAULT_FN_ATTRS 441_mm_adds_pi16(__m64 __m1, __m64 __m2) 442{ 443 return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2); 444} 445 446/// Adds each 8-bit unsigned integer element of the first 64-bit integer 447/// vector of [8 x i8] to the corresponding 8-bit unsigned integer element of 448/// the second 64-bit integer vector of [8 x i8]. Sums greater than 0xFF are 449/// saturated to 0xFF. The results are packed into a 64-bit integer vector of 450/// [8 x i8]. 451/// 452/// \headerfile <x86intrin.h> 453/// 454/// This intrinsic corresponds to the <c> PADDUSB </c> instruction. 455/// 456/// \param __m1 457/// A 64-bit integer vector of [8 x i8]. 458/// \param __m2 459/// A 64-bit integer vector of [8 x i8]. 460/// \returns A 64-bit integer vector of [8 x i8] containing the saturated 461/// unsigned sums of both parameters. 462static __inline__ __m64 __DEFAULT_FN_ATTRS 463_mm_adds_pu8(__m64 __m1, __m64 __m2) 464{ 465 return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2); 466} 467 468/// Adds each 16-bit unsigned integer element of the first 64-bit integer 469/// vector of [4 x i16] to the corresponding 16-bit unsigned integer element 470/// of the second 64-bit integer vector of [4 x i16]. Sums greater than 471/// 0xFFFF are saturated to 0xFFFF. The results are packed into a 64-bit 472/// integer vector of [4 x i16]. 473/// 474/// \headerfile <x86intrin.h> 475/// 476/// This intrinsic corresponds to the <c> PADDUSW </c> instruction. 477/// 478/// \param __m1 479/// A 64-bit integer vector of [4 x i16]. 480/// \param __m2 481/// A 64-bit integer vector of [4 x i16]. 482/// \returns A 64-bit integer vector of [4 x i16] containing the saturated 483/// unsigned sums of both parameters. 484static __inline__ __m64 __DEFAULT_FN_ATTRS 485_mm_adds_pu16(__m64 __m1, __m64 __m2) 486{ 487 return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2); 488} 489 490/// Subtracts each 8-bit integer element of the second 64-bit integer 491/// vector of [8 x i8] from the corresponding 8-bit integer element of the 492/// first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results 493/// are packed into a 64-bit integer vector of [8 x i8]. 494/// 495/// \headerfile <x86intrin.h> 496/// 497/// This intrinsic corresponds to the <c> PSUBB </c> instruction. 498/// 499/// \param __m1 500/// A 64-bit integer vector of [8 x i8] containing the minuends. 501/// \param __m2 502/// A 64-bit integer vector of [8 x i8] containing the subtrahends. 503/// \returns A 64-bit integer vector of [8 x i8] containing the differences of 504/// both parameters. 505static __inline__ __m64 __DEFAULT_FN_ATTRS 506_mm_sub_pi8(__m64 __m1, __m64 __m2) 507{ 508 return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2); 509} 510 511/// Subtracts each 16-bit integer element of the second 64-bit integer 512/// vector of [4 x i16] from the corresponding 16-bit integer element of the 513/// first 64-bit integer vector of [4 x i16]. The lower 16 bits of the 514/// results are packed into a 64-bit integer vector of [4 x i16]. 515/// 516/// \headerfile <x86intrin.h> 517/// 518/// This intrinsic corresponds to the <c> PSUBW </c> instruction. 519/// 520/// \param __m1 521/// A 64-bit integer vector of [4 x i16] containing the minuends. 522/// \param __m2 523/// A 64-bit integer vector of [4 x i16] containing the subtrahends. 524/// \returns A 64-bit integer vector of [4 x i16] containing the differences of 525/// both parameters. 526static __inline__ __m64 __DEFAULT_FN_ATTRS 527_mm_sub_pi16(__m64 __m1, __m64 __m2) 528{ 529 return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2); 530} 531 532/// Subtracts each 32-bit integer element of the second 64-bit integer 533/// vector of [2 x i32] from the corresponding 32-bit integer element of the 534/// first 64-bit integer vector of [2 x i32]. The lower 32 bits of the 535/// results are packed into a 64-bit integer vector of [2 x i32]. 536/// 537/// \headerfile <x86intrin.h> 538/// 539/// This intrinsic corresponds to the <c> PSUBD </c> instruction. 540/// 541/// \param __m1 542/// A 64-bit integer vector of [2 x i32] containing the minuends. 543/// \param __m2 544/// A 64-bit integer vector of [2 x i32] containing the subtrahends. 545/// \returns A 64-bit integer vector of [2 x i32] containing the differences of 546/// both parameters. 547static __inline__ __m64 __DEFAULT_FN_ATTRS 548_mm_sub_pi32(__m64 __m1, __m64 __m2) 549{ 550 return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2); 551} 552 553/// Subtracts each 8-bit signed integer element of the second 64-bit 554/// integer vector of [8 x i8] from the corresponding 8-bit signed integer 555/// element of the first 64-bit integer vector of [8 x i8]. Positive results 556/// greater than 0x7F are saturated to 0x7F. Negative results less than 0x80 557/// are saturated to 0x80. The results are packed into a 64-bit integer 558/// vector of [8 x i8]. 559/// 560/// \headerfile <x86intrin.h> 561/// 562/// This intrinsic corresponds to the <c> PSUBSB </c> instruction. 563/// 564/// \param __m1 565/// A 64-bit integer vector of [8 x i8] containing the minuends. 566/// \param __m2 567/// A 64-bit integer vector of [8 x i8] containing the subtrahends. 568/// \returns A 64-bit integer vector of [8 x i8] containing the saturated 569/// differences of both parameters. 570static __inline__ __m64 __DEFAULT_FN_ATTRS 571_mm_subs_pi8(__m64 __m1, __m64 __m2) 572{ 573 return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2); 574} 575 576/// Subtracts each 16-bit signed integer element of the second 64-bit 577/// integer vector of [4 x i16] from the corresponding 16-bit signed integer 578/// element of the first 64-bit integer vector of [4 x i16]. Positive results 579/// greater than 0x7FFF are saturated to 0x7FFF. Negative results less than 580/// 0x8000 are saturated to 0x8000. The results are packed into a 64-bit 581/// integer vector of [4 x i16]. 582/// 583/// \headerfile <x86intrin.h> 584/// 585/// This intrinsic corresponds to the <c> PSUBSW </c> instruction. 586/// 587/// \param __m1 588/// A 64-bit integer vector of [4 x i16] containing the minuends. 589/// \param __m2 590/// A 64-bit integer vector of [4 x i16] containing the subtrahends. 591/// \returns A 64-bit integer vector of [4 x i16] containing the saturated 592/// differences of both parameters. 593static __inline__ __m64 __DEFAULT_FN_ATTRS 594_mm_subs_pi16(__m64 __m1, __m64 __m2) 595{ 596 return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2); 597} 598 599/// Subtracts each 8-bit unsigned integer element of the second 64-bit 600/// integer vector of [8 x i8] from the corresponding 8-bit unsigned integer 601/// element of the first 64-bit integer vector of [8 x i8]. 602/// 603/// If an element of the first vector is less than the corresponding element 604/// of the second vector, the result is saturated to 0. The results are 605/// packed into a 64-bit integer vector of [8 x i8]. 606/// 607/// \headerfile <x86intrin.h> 608/// 609/// This intrinsic corresponds to the <c> PSUBUSB </c> instruction. 610/// 611/// \param __m1 612/// A 64-bit integer vector of [8 x i8] containing the minuends. 613/// \param __m2 614/// A 64-bit integer vector of [8 x i8] containing the subtrahends. 615/// \returns A 64-bit integer vector of [8 x i8] containing the saturated 616/// differences of both parameters. 617static __inline__ __m64 __DEFAULT_FN_ATTRS 618_mm_subs_pu8(__m64 __m1, __m64 __m2) 619{ 620 return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2); 621} 622 623/// Subtracts each 16-bit unsigned integer element of the second 64-bit 624/// integer vector of [4 x i16] from the corresponding 16-bit unsigned 625/// integer element of the first 64-bit integer vector of [4 x i16]. 626/// 627/// If an element of the first vector is less than the corresponding element 628/// of the second vector, the result is saturated to 0. The results are 629/// packed into a 64-bit integer vector of [4 x i16]. 630/// 631/// \headerfile <x86intrin.h> 632/// 633/// This intrinsic corresponds to the <c> PSUBUSW </c> instruction. 634/// 635/// \param __m1 636/// A 64-bit integer vector of [4 x i16] containing the minuends. 637/// \param __m2 638/// A 64-bit integer vector of [4 x i16] containing the subtrahends. 639/// \returns A 64-bit integer vector of [4 x i16] containing the saturated 640/// differences of both parameters. 641static __inline__ __m64 __DEFAULT_FN_ATTRS 642_mm_subs_pu16(__m64 __m1, __m64 __m2) 643{ 644 return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2); 645} 646 647/// Multiplies each 16-bit signed integer element of the first 64-bit 648/// integer vector of [4 x i16] by the corresponding 16-bit signed integer 649/// element of the second 64-bit integer vector of [4 x i16] and get four 650/// 32-bit products. Adds adjacent pairs of products to get two 32-bit sums. 651/// The lower 32 bits of these two sums are packed into a 64-bit integer 652/// vector of [2 x i32]. 653/// 654/// For example, bits [15:0] of both parameters are multiplied, bits [31:16] 655/// of both parameters are multiplied, and the sum of both results is written 656/// to bits [31:0] of the result. 657/// 658/// \headerfile <x86intrin.h> 659/// 660/// This intrinsic corresponds to the <c> PMADDWD </c> instruction. 661/// 662/// \param __m1 663/// A 64-bit integer vector of [4 x i16]. 664/// \param __m2 665/// A 64-bit integer vector of [4 x i16]. 666/// \returns A 64-bit integer vector of [2 x i32] containing the sums of 667/// products of both parameters. 668static __inline__ __m64 __DEFAULT_FN_ATTRS 669_mm_madd_pi16(__m64 __m1, __m64 __m2) 670{ 671 return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2); 672} 673 674/// Multiplies each 16-bit signed integer element of the first 64-bit 675/// integer vector of [4 x i16] by the corresponding 16-bit signed integer 676/// element of the second 64-bit integer vector of [4 x i16]. Packs the upper 677/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16]. 678/// 679/// \headerfile <x86intrin.h> 680/// 681/// This intrinsic corresponds to the <c> PMULHW </c> instruction. 682/// 683/// \param __m1 684/// A 64-bit integer vector of [4 x i16]. 685/// \param __m2 686/// A 64-bit integer vector of [4 x i16]. 687/// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits 688/// of the products of both parameters. 689static __inline__ __m64 __DEFAULT_FN_ATTRS 690_mm_mulhi_pi16(__m64 __m1, __m64 __m2) 691{ 692 return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2); 693} 694 695/// Multiplies each 16-bit signed integer element of the first 64-bit 696/// integer vector of [4 x i16] by the corresponding 16-bit signed integer 697/// element of the second 64-bit integer vector of [4 x i16]. Packs the lower 698/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16]. 699/// 700/// \headerfile <x86intrin.h> 701/// 702/// This intrinsic corresponds to the <c> PMULLW </c> instruction. 703/// 704/// \param __m1 705/// A 64-bit integer vector of [4 x i16]. 706/// \param __m2 707/// A 64-bit integer vector of [4 x i16]. 708/// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits 709/// of the products of both parameters. 710static __inline__ __m64 __DEFAULT_FN_ATTRS 711_mm_mullo_pi16(__m64 __m1, __m64 __m2) 712{ 713 return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2); 714} 715 716/// Left-shifts each 16-bit signed integer element of the first 717/// parameter, which is a 64-bit integer vector of [4 x i16], by the number 718/// of bits specified by the second parameter, which is a 64-bit integer. The 719/// lower 16 bits of the results are packed into a 64-bit integer vector of 720/// [4 x i16]. 721/// 722/// \headerfile <x86intrin.h> 723/// 724/// This intrinsic corresponds to the <c> PSLLW </c> instruction. 725/// 726/// \param __m 727/// A 64-bit integer vector of [4 x i16]. 728/// \param __count 729/// A 64-bit integer vector interpreted as a single 64-bit integer. 730/// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted 731/// values. If \a __count is greater or equal to 16, the result is set to all 732/// 0. 733static __inline__ __m64 __DEFAULT_FN_ATTRS 734_mm_sll_pi16(__m64 __m, __m64 __count) 735{ 736 return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count); 737} 738 739/// Left-shifts each 16-bit signed integer element of a 64-bit integer 740/// vector of [4 x i16] by the number of bits specified by a 32-bit integer. 741/// The lower 16 bits of the results are packed into a 64-bit integer vector 742/// of [4 x i16]. 743/// 744/// \headerfile <x86intrin.h> 745/// 746/// This intrinsic corresponds to the <c> PSLLW </c> instruction. 747/// 748/// \param __m 749/// A 64-bit integer vector of [4 x i16]. 750/// \param __count 751/// A 32-bit integer value. 752/// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted 753/// values. If \a __count is greater or equal to 16, the result is set to all 754/// 0. 755static __inline__ __m64 __DEFAULT_FN_ATTRS 756_mm_slli_pi16(__m64 __m, int __count) 757{ 758 return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count); 759} 760 761/// Left-shifts each 32-bit signed integer element of the first 762/// parameter, which is a 64-bit integer vector of [2 x i32], by the number 763/// of bits specified by the second parameter, which is a 64-bit integer. The 764/// lower 32 bits of the results are packed into a 64-bit integer vector of 765/// [2 x i32]. 766/// 767/// \headerfile <x86intrin.h> 768/// 769/// This intrinsic corresponds to the <c> PSLLD </c> instruction. 770/// 771/// \param __m 772/// A 64-bit integer vector of [2 x i32]. 773/// \param __count 774/// A 64-bit integer vector interpreted as a single 64-bit integer. 775/// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted 776/// values. If \a __count is greater or equal to 32, the result is set to all 777/// 0. 778static __inline__ __m64 __DEFAULT_FN_ATTRS 779_mm_sll_pi32(__m64 __m, __m64 __count) 780{ 781 return (__m64)__builtin_ia32_pslld((__v2si)__m, __count); 782} 783 784/// Left-shifts each 32-bit signed integer element of a 64-bit integer 785/// vector of [2 x i32] by the number of bits specified by a 32-bit integer. 786/// The lower 32 bits of the results are packed into a 64-bit integer vector 787/// of [2 x i32]. 788/// 789/// \headerfile <x86intrin.h> 790/// 791/// This intrinsic corresponds to the <c> PSLLD </c> instruction. 792/// 793/// \param __m 794/// A 64-bit integer vector of [2 x i32]. 795/// \param __count 796/// A 32-bit integer value. 797/// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted 798/// values. If \a __count is greater or equal to 32, the result is set to all 799/// 0. 800static __inline__ __m64 __DEFAULT_FN_ATTRS 801_mm_slli_pi32(__m64 __m, int __count) 802{ 803 return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count); 804} 805 806/// Left-shifts the first 64-bit integer parameter by the number of bits 807/// specified by the second 64-bit integer parameter. The lower 64 bits of 808/// result are returned. 809/// 810/// \headerfile <x86intrin.h> 811/// 812/// This intrinsic corresponds to the <c> PSLLQ </c> instruction. 813/// 814/// \param __m 815/// A 64-bit integer vector interpreted as a single 64-bit integer. 816/// \param __count 817/// A 64-bit integer vector interpreted as a single 64-bit integer. 818/// \returns A 64-bit integer vector containing the left-shifted value. If 819/// \a __count is greater or equal to 64, the result is set to 0. 820static __inline__ __m64 __DEFAULT_FN_ATTRS 821_mm_sll_si64(__m64 __m, __m64 __count) 822{ 823 return (__m64)__builtin_ia32_psllq((__v1di)__m, __count); 824} 825 826/// Left-shifts the first parameter, which is a 64-bit integer, by the 827/// number of bits specified by the second parameter, which is a 32-bit 828/// integer. The lower 64 bits of result are returned. 829/// 830/// \headerfile <x86intrin.h> 831/// 832/// This intrinsic corresponds to the <c> PSLLQ </c> instruction. 833/// 834/// \param __m 835/// A 64-bit integer vector interpreted as a single 64-bit integer. 836/// \param __count 837/// A 32-bit integer value. 838/// \returns A 64-bit integer vector containing the left-shifted value. If 839/// \a __count is greater or equal to 64, the result is set to 0. 840static __inline__ __m64 __DEFAULT_FN_ATTRS 841_mm_slli_si64(__m64 __m, int __count) 842{ 843 return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count); 844} 845 846/// Right-shifts each 16-bit integer element of the first parameter, 847/// which is a 64-bit integer vector of [4 x i16], by the number of bits 848/// specified by the second parameter, which is a 64-bit integer. 849/// 850/// High-order bits are filled with the sign bit of the initial value of each 851/// 16-bit element. The 16-bit results are packed into a 64-bit integer 852/// vector of [4 x i16]. 853/// 854/// \headerfile <x86intrin.h> 855/// 856/// This intrinsic corresponds to the <c> PSRAW </c> instruction. 857/// 858/// \param __m 859/// A 64-bit integer vector of [4 x i16]. 860/// \param __count 861/// A 64-bit integer vector interpreted as a single 64-bit integer. 862/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted 863/// values. 864static __inline__ __m64 __DEFAULT_FN_ATTRS 865_mm_sra_pi16(__m64 __m, __m64 __count) 866{ 867 return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count); 868} 869 870/// Right-shifts each 16-bit integer element of a 64-bit integer vector 871/// of [4 x i16] by the number of bits specified by a 32-bit integer. 872/// 873/// High-order bits are filled with the sign bit of the initial value of each 874/// 16-bit element. The 16-bit results are packed into a 64-bit integer 875/// vector of [4 x i16]. 876/// 877/// \headerfile <x86intrin.h> 878/// 879/// This intrinsic corresponds to the <c> PSRAW </c> instruction. 880/// 881/// \param __m 882/// A 64-bit integer vector of [4 x i16]. 883/// \param __count 884/// A 32-bit integer value. 885/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted 886/// values. 887static __inline__ __m64 __DEFAULT_FN_ATTRS 888_mm_srai_pi16(__m64 __m, int __count) 889{ 890 return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count); 891} 892 893/// Right-shifts each 32-bit integer element of the first parameter, 894/// which is a 64-bit integer vector of [2 x i32], by the number of bits 895/// specified by the second parameter, which is a 64-bit integer. 896/// 897/// High-order bits are filled with the sign bit of the initial value of each 898/// 32-bit element. The 32-bit results are packed into a 64-bit integer 899/// vector of [2 x i32]. 900/// 901/// \headerfile <x86intrin.h> 902/// 903/// This intrinsic corresponds to the <c> PSRAD </c> instruction. 904/// 905/// \param __m 906/// A 64-bit integer vector of [2 x i32]. 907/// \param __count 908/// A 64-bit integer vector interpreted as a single 64-bit integer. 909/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted 910/// values. 911static __inline__ __m64 __DEFAULT_FN_ATTRS 912_mm_sra_pi32(__m64 __m, __m64 __count) 913{ 914 return (__m64)__builtin_ia32_psrad((__v2si)__m, __count); 915} 916 917/// Right-shifts each 32-bit integer element of a 64-bit integer vector 918/// of [2 x i32] by the number of bits specified by a 32-bit integer. 919/// 920/// High-order bits are filled with the sign bit of the initial value of each 921/// 32-bit element. The 32-bit results are packed into a 64-bit integer 922/// vector of [2 x i32]. 923/// 924/// \headerfile <x86intrin.h> 925/// 926/// This intrinsic corresponds to the <c> PSRAD </c> instruction. 927/// 928/// \param __m 929/// A 64-bit integer vector of [2 x i32]. 930/// \param __count 931/// A 32-bit integer value. 932/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted 933/// values. 934static __inline__ __m64 __DEFAULT_FN_ATTRS 935_mm_srai_pi32(__m64 __m, int __count) 936{ 937 return (__m64)__builtin_ia32_psradi((__v2si)__m, __count); 938} 939 940/// Right-shifts each 16-bit integer element of the first parameter, 941/// which is a 64-bit integer vector of [4 x i16], by the number of bits 942/// specified by the second parameter, which is a 64-bit integer. 943/// 944/// High-order bits are cleared. The 16-bit results are packed into a 64-bit 945/// integer vector of [4 x i16]. 946/// 947/// \headerfile <x86intrin.h> 948/// 949/// This intrinsic corresponds to the <c> PSRLW </c> instruction. 950/// 951/// \param __m 952/// A 64-bit integer vector of [4 x i16]. 953/// \param __count 954/// A 64-bit integer vector interpreted as a single 64-bit integer. 955/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted 956/// values. 957static __inline__ __m64 __DEFAULT_FN_ATTRS 958_mm_srl_pi16(__m64 __m, __m64 __count) 959{ 960 return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count); 961} 962 963/// Right-shifts each 16-bit integer element of a 64-bit integer vector 964/// of [4 x i16] by the number of bits specified by a 32-bit integer. 965/// 966/// High-order bits are cleared. The 16-bit results are packed into a 64-bit 967/// integer vector of [4 x i16]. 968/// 969/// \headerfile <x86intrin.h> 970/// 971/// This intrinsic corresponds to the <c> PSRLW </c> instruction. 972/// 973/// \param __m 974/// A 64-bit integer vector of [4 x i16]. 975/// \param __count 976/// A 32-bit integer value. 977/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted 978/// values. 979static __inline__ __m64 __DEFAULT_FN_ATTRS 980_mm_srli_pi16(__m64 __m, int __count) 981{ 982 return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count); 983} 984 985/// Right-shifts each 32-bit integer element of the first parameter, 986/// which is a 64-bit integer vector of [2 x i32], by the number of bits 987/// specified by the second parameter, which is a 64-bit integer. 988/// 989/// High-order bits are cleared. The 32-bit results are packed into a 64-bit 990/// integer vector of [2 x i32]. 991/// 992/// \headerfile <x86intrin.h> 993/// 994/// This intrinsic corresponds to the <c> PSRLD </c> instruction. 995/// 996/// \param __m 997/// A 64-bit integer vector of [2 x i32]. 998/// \param __count 999/// A 64-bit integer vector interpreted as a single 64-bit integer. 1000/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted 1001/// values. 1002static __inline__ __m64 __DEFAULT_FN_ATTRS 1003_mm_srl_pi32(__m64 __m, __m64 __count) 1004{ 1005 return (__m64)__builtin_ia32_psrld((__v2si)__m, __count); 1006} 1007 1008/// Right-shifts each 32-bit integer element of a 64-bit integer vector 1009/// of [2 x i32] by the number of bits specified by a 32-bit integer. 1010/// 1011/// High-order bits are cleared. The 32-bit results are packed into a 64-bit 1012/// integer vector of [2 x i32]. 1013/// 1014/// \headerfile <x86intrin.h> 1015/// 1016/// This intrinsic corresponds to the <c> PSRLD </c> instruction. 1017/// 1018/// \param __m 1019/// A 64-bit integer vector of [2 x i32]. 1020/// \param __count 1021/// A 32-bit integer value. 1022/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted 1023/// values. 1024static __inline__ __m64 __DEFAULT_FN_ATTRS 1025_mm_srli_pi32(__m64 __m, int __count) 1026{ 1027 return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count); 1028} 1029 1030/// Right-shifts the first 64-bit integer parameter by the number of bits 1031/// specified by the second 64-bit integer parameter. 1032/// 1033/// High-order bits are cleared. 1034/// 1035/// \headerfile <x86intrin.h> 1036/// 1037/// This intrinsic corresponds to the <c> PSRLQ </c> instruction. 1038/// 1039/// \param __m 1040/// A 64-bit integer vector interpreted as a single 64-bit integer. 1041/// \param __count 1042/// A 64-bit integer vector interpreted as a single 64-bit integer. 1043/// \returns A 64-bit integer vector containing the right-shifted value. 1044static __inline__ __m64 __DEFAULT_FN_ATTRS 1045_mm_srl_si64(__m64 __m, __m64 __count) 1046{ 1047 return (__m64)__builtin_ia32_psrlq((__v1di)__m, __count); 1048} 1049 1050/// Right-shifts the first parameter, which is a 64-bit integer, by the 1051/// number of bits specified by the second parameter, which is a 32-bit 1052/// integer. 1053/// 1054/// High-order bits are cleared. 1055/// 1056/// \headerfile <x86intrin.h> 1057/// 1058/// This intrinsic corresponds to the <c> PSRLQ </c> instruction. 1059/// 1060/// \param __m 1061/// A 64-bit integer vector interpreted as a single 64-bit integer. 1062/// \param __count 1063/// A 32-bit integer value. 1064/// \returns A 64-bit integer vector containing the right-shifted value. 1065static __inline__ __m64 __DEFAULT_FN_ATTRS 1066_mm_srli_si64(__m64 __m, int __count) 1067{ 1068 return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count); 1069} 1070 1071/// Performs a bitwise AND of two 64-bit integer vectors. 1072/// 1073/// \headerfile <x86intrin.h> 1074/// 1075/// This intrinsic corresponds to the <c> PAND </c> instruction. 1076/// 1077/// \param __m1 1078/// A 64-bit integer vector. 1079/// \param __m2 1080/// A 64-bit integer vector. 1081/// \returns A 64-bit integer vector containing the bitwise AND of both 1082/// parameters. 1083static __inline__ __m64 __DEFAULT_FN_ATTRS 1084_mm_and_si64(__m64 __m1, __m64 __m2) 1085{ 1086 return __builtin_ia32_pand((__v1di)__m1, (__v1di)__m2); 1087} 1088 1089/// Performs a bitwise NOT of the first 64-bit integer vector, and then 1090/// performs a bitwise AND of the intermediate result and the second 64-bit 1091/// integer vector. 1092/// 1093/// \headerfile <x86intrin.h> 1094/// 1095/// This intrinsic corresponds to the <c> PANDN </c> instruction. 1096/// 1097/// \param __m1 1098/// A 64-bit integer vector. The one's complement of this parameter is used 1099/// in the bitwise AND. 1100/// \param __m2 1101/// A 64-bit integer vector. 1102/// \returns A 64-bit integer vector containing the bitwise AND of the second 1103/// parameter and the one's complement of the first parameter. 1104static __inline__ __m64 __DEFAULT_FN_ATTRS 1105_mm_andnot_si64(__m64 __m1, __m64 __m2) 1106{ 1107 return __builtin_ia32_pandn((__v1di)__m1, (__v1di)__m2); 1108} 1109 1110/// Performs a bitwise OR of two 64-bit integer vectors. 1111/// 1112/// \headerfile <x86intrin.h> 1113/// 1114/// This intrinsic corresponds to the <c> POR </c> instruction. 1115/// 1116/// \param __m1 1117/// A 64-bit integer vector. 1118/// \param __m2 1119/// A 64-bit integer vector. 1120/// \returns A 64-bit integer vector containing the bitwise OR of both 1121/// parameters. 1122static __inline__ __m64 __DEFAULT_FN_ATTRS 1123_mm_or_si64(__m64 __m1, __m64 __m2) 1124{ 1125 return __builtin_ia32_por((__v1di)__m1, (__v1di)__m2); 1126} 1127 1128/// Performs a bitwise exclusive OR of two 64-bit integer vectors. 1129/// 1130/// \headerfile <x86intrin.h> 1131/// 1132/// This intrinsic corresponds to the <c> PXOR </c> instruction. 1133/// 1134/// \param __m1 1135/// A 64-bit integer vector. 1136/// \param __m2 1137/// A 64-bit integer vector. 1138/// \returns A 64-bit integer vector containing the bitwise exclusive OR of both 1139/// parameters. 1140static __inline__ __m64 __DEFAULT_FN_ATTRS 1141_mm_xor_si64(__m64 __m1, __m64 __m2) 1142{ 1143 return __builtin_ia32_pxor((__v1di)__m1, (__v1di)__m2); 1144} 1145 1146/// Compares the 8-bit integer elements of two 64-bit integer vectors of 1147/// [8 x i8] to determine if the element of the first vector is equal to the 1148/// corresponding element of the second vector. 1149/// 1150/// The comparison yields 0 for false, 0xFF for true. 1151/// 1152/// \headerfile <x86intrin.h> 1153/// 1154/// This intrinsic corresponds to the <c> PCMPEQB </c> instruction. 1155/// 1156/// \param __m1 1157/// A 64-bit integer vector of [8 x i8]. 1158/// \param __m2 1159/// A 64-bit integer vector of [8 x i8]. 1160/// \returns A 64-bit integer vector of [8 x i8] containing the comparison 1161/// results. 1162static __inline__ __m64 __DEFAULT_FN_ATTRS 1163_mm_cmpeq_pi8(__m64 __m1, __m64 __m2) 1164{ 1165 return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2); 1166} 1167 1168/// Compares the 16-bit integer elements of two 64-bit integer vectors of 1169/// [4 x i16] to determine if the element of the first vector is equal to the 1170/// corresponding element of the second vector. 1171/// 1172/// The comparison yields 0 for false, 0xFFFF for true. 1173/// 1174/// \headerfile <x86intrin.h> 1175/// 1176/// This intrinsic corresponds to the <c> PCMPEQW </c> instruction. 1177/// 1178/// \param __m1 1179/// A 64-bit integer vector of [4 x i16]. 1180/// \param __m2 1181/// A 64-bit integer vector of [4 x i16]. 1182/// \returns A 64-bit integer vector of [4 x i16] containing the comparison 1183/// results. 1184static __inline__ __m64 __DEFAULT_FN_ATTRS 1185_mm_cmpeq_pi16(__m64 __m1, __m64 __m2) 1186{ 1187 return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2); 1188} 1189 1190/// Compares the 32-bit integer elements of two 64-bit integer vectors of 1191/// [2 x i32] to determine if the element of the first vector is equal to the 1192/// corresponding element of the second vector. 1193/// 1194/// The comparison yields 0 for false, 0xFFFFFFFF for true. 1195/// 1196/// \headerfile <x86intrin.h> 1197/// 1198/// This intrinsic corresponds to the <c> PCMPEQD </c> instruction. 1199/// 1200/// \param __m1 1201/// A 64-bit integer vector of [2 x i32]. 1202/// \param __m2 1203/// A 64-bit integer vector of [2 x i32]. 1204/// \returns A 64-bit integer vector of [2 x i32] containing the comparison 1205/// results. 1206static __inline__ __m64 __DEFAULT_FN_ATTRS 1207_mm_cmpeq_pi32(__m64 __m1, __m64 __m2) 1208{ 1209 return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2); 1210} 1211 1212/// Compares the 8-bit integer elements of two 64-bit integer vectors of 1213/// [8 x i8] to determine if the element of the first vector is greater than 1214/// the corresponding element of the second vector. 1215/// 1216/// The comparison yields 0 for false, 0xFF for true. 1217/// 1218/// \headerfile <x86intrin.h> 1219/// 1220/// This intrinsic corresponds to the <c> PCMPGTB </c> instruction. 1221/// 1222/// \param __m1 1223/// A 64-bit integer vector of [8 x i8]. 1224/// \param __m2 1225/// A 64-bit integer vector of [8 x i8]. 1226/// \returns A 64-bit integer vector of [8 x i8] containing the comparison 1227/// results. 1228static __inline__ __m64 __DEFAULT_FN_ATTRS 1229_mm_cmpgt_pi8(__m64 __m1, __m64 __m2) 1230{ 1231 return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2); 1232} 1233 1234/// Compares the 16-bit integer elements of two 64-bit integer vectors of 1235/// [4 x i16] to determine if the element of the first vector is greater than 1236/// the corresponding element of the second vector. 1237/// 1238/// The comparison yields 0 for false, 0xFFFF for true. 1239/// 1240/// \headerfile <x86intrin.h> 1241/// 1242/// This intrinsic corresponds to the <c> PCMPGTW </c> instruction. 1243/// 1244/// \param __m1 1245/// A 64-bit integer vector of [4 x i16]. 1246/// \param __m2 1247/// A 64-bit integer vector of [4 x i16]. 1248/// \returns A 64-bit integer vector of [4 x i16] containing the comparison 1249/// results. 1250static __inline__ __m64 __DEFAULT_FN_ATTRS 1251_mm_cmpgt_pi16(__m64 __m1, __m64 __m2) 1252{ 1253 return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2); 1254} 1255 1256/// Compares the 32-bit integer elements of two 64-bit integer vectors of 1257/// [2 x i32] to determine if the element of the first vector is greater than 1258/// the corresponding element of the second vector. 1259/// 1260/// The comparison yields 0 for false, 0xFFFFFFFF for true. 1261/// 1262/// \headerfile <x86intrin.h> 1263/// 1264/// This intrinsic corresponds to the <c> PCMPGTD </c> instruction. 1265/// 1266/// \param __m1 1267/// A 64-bit integer vector of [2 x i32]. 1268/// \param __m2 1269/// A 64-bit integer vector of [2 x i32]. 1270/// \returns A 64-bit integer vector of [2 x i32] containing the comparison 1271/// results. 1272static __inline__ __m64 __DEFAULT_FN_ATTRS 1273_mm_cmpgt_pi32(__m64 __m1, __m64 __m2) 1274{ 1275 return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2); 1276} 1277 1278/// Constructs a 64-bit integer vector initialized to zero. 1279/// 1280/// \headerfile <x86intrin.h> 1281/// 1282/// This intrinsic corresponds to the <c> PXOR </c> instruction. 1283/// 1284/// \returns An initialized 64-bit integer vector with all elements set to zero. 1285static __inline__ __m64 __DEFAULT_FN_ATTRS 1286_mm_setzero_si64(void) 1287{ 1288 return __extension__ (__m64){ 0LL }; 1289} 1290 1291/// Constructs a 64-bit integer vector initialized with the specified 1292/// 32-bit integer values. 1293/// 1294/// \headerfile <x86intrin.h> 1295/// 1296/// This intrinsic is a utility function and does not correspond to a specific 1297/// instruction. 1298/// 1299/// \param __i1 1300/// A 32-bit integer value used to initialize the upper 32 bits of the 1301/// result. 1302/// \param __i0 1303/// A 32-bit integer value used to initialize the lower 32 bits of the 1304/// result. 1305/// \returns An initialized 64-bit integer vector. 1306static __inline__ __m64 __DEFAULT_FN_ATTRS 1307_mm_set_pi32(int __i1, int __i0) 1308{ 1309 return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1); 1310} 1311 1312/// Constructs a 64-bit integer vector initialized with the specified 1313/// 16-bit integer values. 1314/// 1315/// \headerfile <x86intrin.h> 1316/// 1317/// This intrinsic is a utility function and does not correspond to a specific 1318/// instruction. 1319/// 1320/// \param __s3 1321/// A 16-bit integer value used to initialize bits [63:48] of the result. 1322/// \param __s2 1323/// A 16-bit integer value used to initialize bits [47:32] of the result. 1324/// \param __s1 1325/// A 16-bit integer value used to initialize bits [31:16] of the result. 1326/// \param __s0 1327/// A 16-bit integer value used to initialize bits [15:0] of the result. 1328/// \returns An initialized 64-bit integer vector. 1329static __inline__ __m64 __DEFAULT_FN_ATTRS 1330_mm_set_pi16(short __s3, short __s2, short __s1, short __s0) 1331{ 1332 return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3); 1333} 1334 1335/// Constructs a 64-bit integer vector initialized with the specified 1336/// 8-bit integer values. 1337/// 1338/// \headerfile <x86intrin.h> 1339/// 1340/// This intrinsic is a utility function and does not correspond to a specific 1341/// instruction. 1342/// 1343/// \param __b7 1344/// An 8-bit integer value used to initialize bits [63:56] of the result. 1345/// \param __b6 1346/// An 8-bit integer value used to initialize bits [55:48] of the result. 1347/// \param __b5 1348/// An 8-bit integer value used to initialize bits [47:40] of the result. 1349/// \param __b4 1350/// An 8-bit integer value used to initialize bits [39:32] of the result. 1351/// \param __b3 1352/// An 8-bit integer value used to initialize bits [31:24] of the result. 1353/// \param __b2 1354/// An 8-bit integer value used to initialize bits [23:16] of the result. 1355/// \param __b1 1356/// An 8-bit integer value used to initialize bits [15:8] of the result. 1357/// \param __b0 1358/// An 8-bit integer value used to initialize bits [7:0] of the result. 1359/// \returns An initialized 64-bit integer vector. 1360static __inline__ __m64 __DEFAULT_FN_ATTRS 1361_mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, 1362 char __b1, char __b0) 1363{ 1364 return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3, 1365 __b4, __b5, __b6, __b7); 1366} 1367 1368/// Constructs a 64-bit integer vector of [2 x i32], with each of the 1369/// 32-bit integer vector elements set to the specified 32-bit integer 1370/// value. 1371/// 1372/// \headerfile <x86intrin.h> 1373/// 1374/// This intrinsic is a utility function and does not correspond to a specific 1375/// instruction. 1376/// 1377/// \param __i 1378/// A 32-bit integer value used to initialize each vector element of the 1379/// result. 1380/// \returns An initialized 64-bit integer vector of [2 x i32]. 1381static __inline__ __m64 __DEFAULT_FN_ATTRS 1382_mm_set1_pi32(int __i) 1383{ 1384 return _mm_set_pi32(__i, __i); 1385} 1386 1387/// Constructs a 64-bit integer vector of [4 x i16], with each of the 1388/// 16-bit integer vector elements set to the specified 16-bit integer 1389/// value. 1390/// 1391/// \headerfile <x86intrin.h> 1392/// 1393/// This intrinsic is a utility function and does not correspond to a specific 1394/// instruction. 1395/// 1396/// \param __w 1397/// A 16-bit integer value used to initialize each vector element of the 1398/// result. 1399/// \returns An initialized 64-bit integer vector of [4 x i16]. 1400static __inline__ __m64 __DEFAULT_FN_ATTRS 1401_mm_set1_pi16(short __w) 1402{ 1403 return _mm_set_pi16(__w, __w, __w, __w); 1404} 1405 1406/// Constructs a 64-bit integer vector of [8 x i8], with each of the 1407/// 8-bit integer vector elements set to the specified 8-bit integer value. 1408/// 1409/// \headerfile <x86intrin.h> 1410/// 1411/// This intrinsic is a utility function and does not correspond to a specific 1412/// instruction. 1413/// 1414/// \param __b 1415/// An 8-bit integer value used to initialize each vector element of the 1416/// result. 1417/// \returns An initialized 64-bit integer vector of [8 x i8]. 1418static __inline__ __m64 __DEFAULT_FN_ATTRS 1419_mm_set1_pi8(char __b) 1420{ 1421 return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b); 1422} 1423 1424/// Constructs a 64-bit integer vector, initialized in reverse order with 1425/// the specified 32-bit integer values. 1426/// 1427/// \headerfile <x86intrin.h> 1428/// 1429/// This intrinsic is a utility function and does not correspond to a specific 1430/// instruction. 1431/// 1432/// \param __i0 1433/// A 32-bit integer value used to initialize the lower 32 bits of the 1434/// result. 1435/// \param __i1 1436/// A 32-bit integer value used to initialize the upper 32 bits of the 1437/// result. 1438/// \returns An initialized 64-bit integer vector. 1439static __inline__ __m64 __DEFAULT_FN_ATTRS 1440_mm_setr_pi32(int __i0, int __i1) 1441{ 1442 return _mm_set_pi32(__i1, __i0); 1443} 1444 1445/// Constructs a 64-bit integer vector, initialized in reverse order with 1446/// the specified 16-bit integer values. 1447/// 1448/// \headerfile <x86intrin.h> 1449/// 1450/// This intrinsic is a utility function and does not correspond to a specific 1451/// instruction. 1452/// 1453/// \param __w0 1454/// A 16-bit integer value used to initialize bits [15:0] of the result. 1455/// \param __w1 1456/// A 16-bit integer value used to initialize bits [31:16] of the result. 1457/// \param __w2 1458/// A 16-bit integer value used to initialize bits [47:32] of the result. 1459/// \param __w3 1460/// A 16-bit integer value used to initialize bits [63:48] of the result. 1461/// \returns An initialized 64-bit integer vector. 1462static __inline__ __m64 __DEFAULT_FN_ATTRS 1463_mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) 1464{ 1465 return _mm_set_pi16(__w3, __w2, __w1, __w0); 1466} 1467 1468/// Constructs a 64-bit integer vector, initialized in reverse order with 1469/// the specified 8-bit integer values. 1470/// 1471/// \headerfile <x86intrin.h> 1472/// 1473/// This intrinsic is a utility function and does not correspond to a specific 1474/// instruction. 1475/// 1476/// \param __b0 1477/// An 8-bit integer value used to initialize bits [7:0] of the result. 1478/// \param __b1 1479/// An 8-bit integer value used to initialize bits [15:8] of the result. 1480/// \param __b2 1481/// An 8-bit integer value used to initialize bits [23:16] of the result. 1482/// \param __b3 1483/// An 8-bit integer value used to initialize bits [31:24] of the result. 1484/// \param __b4 1485/// An 8-bit integer value used to initialize bits [39:32] of the result. 1486/// \param __b5 1487/// An 8-bit integer value used to initialize bits [47:40] of the result. 1488/// \param __b6 1489/// An 8-bit integer value used to initialize bits [55:48] of the result. 1490/// \param __b7 1491/// An 8-bit integer value used to initialize bits [63:56] of the result. 1492/// \returns An initialized 64-bit integer vector. 1493static __inline__ __m64 __DEFAULT_FN_ATTRS 1494_mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, 1495 char __b6, char __b7) 1496{ 1497 return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); 1498} 1499 1500#undef __DEFAULT_FN_ATTRS 1501 1502/* Aliases for compatibility. */ 1503#define _m_empty _mm_empty 1504#define _m_from_int _mm_cvtsi32_si64 1505#define _m_from_int64 _mm_cvtsi64_m64 1506#define _m_to_int _mm_cvtsi64_si32 1507#define _m_to_int64 _mm_cvtm64_si64 1508#define _m_packsswb _mm_packs_pi16 1509#define _m_packssdw _mm_packs_pi32 1510#define _m_packuswb _mm_packs_pu16 1511#define _m_punpckhbw _mm_unpackhi_pi8 1512#define _m_punpckhwd _mm_unpackhi_pi16 1513#define _m_punpckhdq _mm_unpackhi_pi32 1514#define _m_punpcklbw _mm_unpacklo_pi8 1515#define _m_punpcklwd _mm_unpacklo_pi16 1516#define _m_punpckldq _mm_unpacklo_pi32 1517#define _m_paddb _mm_add_pi8 1518#define _m_paddw _mm_add_pi16 1519#define _m_paddd _mm_add_pi32 1520#define _m_paddsb _mm_adds_pi8 1521#define _m_paddsw _mm_adds_pi16 1522#define _m_paddusb _mm_adds_pu8 1523#define _m_paddusw _mm_adds_pu16 1524#define _m_psubb _mm_sub_pi8 1525#define _m_psubw _mm_sub_pi16 1526#define _m_psubd _mm_sub_pi32 1527#define _m_psubsb _mm_subs_pi8 1528#define _m_psubsw _mm_subs_pi16 1529#define _m_psubusb _mm_subs_pu8 1530#define _m_psubusw _mm_subs_pu16 1531#define _m_pmaddwd _mm_madd_pi16 1532#define _m_pmulhw _mm_mulhi_pi16 1533#define _m_pmullw _mm_mullo_pi16 1534#define _m_psllw _mm_sll_pi16 1535#define _m_psllwi _mm_slli_pi16 1536#define _m_pslld _mm_sll_pi32 1537#define _m_pslldi _mm_slli_pi32 1538#define _m_psllq _mm_sll_si64 1539#define _m_psllqi _mm_slli_si64 1540#define _m_psraw _mm_sra_pi16 1541#define _m_psrawi _mm_srai_pi16 1542#define _m_psrad _mm_sra_pi32 1543#define _m_psradi _mm_srai_pi32 1544#define _m_psrlw _mm_srl_pi16 1545#define _m_psrlwi _mm_srli_pi16 1546#define _m_psrld _mm_srl_pi32 1547#define _m_psrldi _mm_srli_pi32 1548#define _m_psrlq _mm_srl_si64 1549#define _m_psrlqi _mm_srli_si64 1550#define _m_pand _mm_and_si64 1551#define _m_pandn _mm_andnot_si64 1552#define _m_por _mm_or_si64 1553#define _m_pxor _mm_xor_si64 1554#define _m_pcmpeqb _mm_cmpeq_pi8 1555#define _m_pcmpeqw _mm_cmpeq_pi16 1556#define _m_pcmpeqd _mm_cmpeq_pi32 1557#define _m_pcmpgtb _mm_cmpgt_pi8 1558#define _m_pcmpgtw _mm_cmpgt_pi16 1559#define _m_pcmpgtd _mm_cmpgt_pi32 1560 1561#endif /* __MMINTRIN_H */ 1562 1563