1193326Sed/*===---- mmintrin.h - MMX intrinsics --------------------------------------=== 2193326Sed * 3353358Sdim * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4353358Sdim * See https://llvm.org/LICENSE.txt for license information. 5353358Sdim * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6193326Sed * 7193326Sed *===-----------------------------------------------------------------------=== 8193326Sed */ 9193326Sed 10193326Sed#ifndef __MMINTRIN_H 11193326Sed#define __MMINTRIN_H 12193326Sed 13353358Sdimtypedef long long __m64 __attribute__((__vector_size__(8), __aligned__(8))); 14193326Sed 15309124Sdimtypedef long long __v1di __attribute__((__vector_size__(8))); 16193326Sedtypedef int __v2si __attribute__((__vector_size__(8))); 17193326Sedtypedef short __v4hi __attribute__((__vector_size__(8))); 18193326Sedtypedef char __v8qi __attribute__((__vector_size__(8))); 19193326Sed 20288943Sdim/* Define the default attributes for the functions in this file. */ 21341825Sdim#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx"), __min_vector_width__(64))) 22288943Sdim 23341825Sdim/// Clears the MMX state by setting the state of the x87 stack registers 24309124Sdim/// to empty. 25309124Sdim/// 26309124Sdim/// \headerfile <x86intrin.h> 27309124Sdim/// 28314564Sdim/// This intrinsic corresponds to the <c> EMMS </c> instruction. 29309124Sdim/// 30341825Sdimstatic __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("mmx"))) 31193326Sed_mm_empty(void) 32193326Sed{ 33193326Sed __builtin_ia32_emms(); 34193326Sed} 35193326Sed 36341825Sdim/// Constructs a 64-bit integer vector, setting the lower 32 bits to the 37309124Sdim/// value of the 32-bit integer parameter and setting the upper 32 bits to 0. 38309124Sdim/// 39309124Sdim/// \headerfile <x86intrin.h> 40309124Sdim/// 41341825Sdim/// This intrinsic corresponds to the <c> MOVD </c> instruction. 42309124Sdim/// 43309124Sdim/// \param __i 44309124Sdim/// A 32-bit integer value. 45309124Sdim/// \returns A 64-bit integer vector. The lower 32 bits contain the value of the 46309124Sdim/// parameter. The upper 32 bits are set to 0. 47288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 48193326Sed_mm_cvtsi32_si64(int __i) 49193326Sed{ 50218893Sdim return (__m64)__builtin_ia32_vec_init_v2si(__i, 0); 51193326Sed} 52193326Sed 53341825Sdim/// Returns the lower 32 bits of a 64-bit integer vector as a 32-bit 54309124Sdim/// signed integer. 55309124Sdim/// 56309124Sdim/// \headerfile <x86intrin.h> 57309124Sdim/// 58341825Sdim/// This intrinsic corresponds to the <c> MOVD </c> instruction. 59309124Sdim/// 60309124Sdim/// \param __m 61309124Sdim/// A 64-bit integer vector. 62309124Sdim/// \returns A 32-bit signed integer value containing the lower 32 bits of the 63309124Sdim/// parameter. 64288943Sdimstatic __inline__ int __DEFAULT_FN_ATTRS 65193326Sed_mm_cvtsi64_si32(__m64 __m) 66193326Sed{ 67218893Sdim return __builtin_ia32_vec_ext_v2si((__v2si)__m, 0); 68193326Sed} 69193326Sed 70341825Sdim/// Casts a 64-bit signed integer value into a 64-bit integer vector. 71309124Sdim/// 72309124Sdim/// \headerfile <x86intrin.h> 73309124Sdim/// 74341825Sdim/// This intrinsic corresponds to the <c> MOVQ </c> instruction. 75309124Sdim/// 76309124Sdim/// \param __i 77309124Sdim/// A 64-bit signed integer. 78309124Sdim/// \returns A 64-bit integer vector containing the same bitwise pattern as the 79309124Sdim/// parameter. 80288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 81193326Sed_mm_cvtsi64_m64(long long __i) 82193326Sed{ 83193326Sed return (__m64)__i; 84193326Sed} 85193326Sed 86341825Sdim/// Casts a 64-bit integer vector into a 64-bit signed integer value. 87309124Sdim/// 88309124Sdim/// \headerfile <x86intrin.h> 89309124Sdim/// 90341825Sdim/// This intrinsic corresponds to the <c> MOVQ </c> instruction. 91309124Sdim/// 92309124Sdim/// \param __m 93309124Sdim/// A 64-bit integer vector. 94309124Sdim/// \returns A 64-bit signed integer containing the same bitwise pattern as the 95309124Sdim/// parameter. 96288943Sdimstatic __inline__ long long __DEFAULT_FN_ATTRS 97193326Sed_mm_cvtm64_si64(__m64 __m) 98193326Sed{ 99193326Sed return (long long)__m; 100193326Sed} 101193326Sed 102341825Sdim/// Converts 16-bit signed integers from both 64-bit integer vector 103309124Sdim/// parameters of [4 x i16] into 8-bit signed integer values, and constructs 104309124Sdim/// a 64-bit integer vector of [8 x i8] as the result. Positive values 105309124Sdim/// greater than 0x7F are saturated to 0x7F. Negative values less than 0x80 106309124Sdim/// are saturated to 0x80. 107309124Sdim/// 108309124Sdim/// \headerfile <x86intrin.h> 109309124Sdim/// 110314564Sdim/// This intrinsic corresponds to the <c> PACKSSWB </c> instruction. 111309124Sdim/// 112309124Sdim/// \param __m1 113309124Sdim/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a 114309124Sdim/// 16-bit signed integer and is converted to an 8-bit signed integer with 115309124Sdim/// saturation. Positive values greater than 0x7F are saturated to 0x7F. 116309124Sdim/// Negative values less than 0x80 are saturated to 0x80. The converted 117309124Sdim/// [4 x i8] values are written to the lower 32 bits of the result. 118309124Sdim/// \param __m2 119309124Sdim/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a 120309124Sdim/// 16-bit signed integer and is converted to an 8-bit signed integer with 121309124Sdim/// saturation. Positive values greater than 0x7F are saturated to 0x7F. 122309124Sdim/// Negative values less than 0x80 are saturated to 0x80. The converted 123309124Sdim/// [4 x i8] values are written to the upper 32 bits of the result. 124309124Sdim/// \returns A 64-bit integer vector of [8 x i8] containing the converted 125309124Sdim/// values. 126288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 127193326Sed_mm_packs_pi16(__m64 __m1, __m64 __m2) 128193326Sed{ 129193326Sed return (__m64)__builtin_ia32_packsswb((__v4hi)__m1, (__v4hi)__m2); 130193326Sed} 131193326Sed 132341825Sdim/// Converts 32-bit signed integers from both 64-bit integer vector 133309124Sdim/// parameters of [2 x i32] into 16-bit signed integer values, and constructs 134309124Sdim/// a 64-bit integer vector of [4 x i16] as the result. Positive values 135309124Sdim/// greater than 0x7FFF are saturated to 0x7FFF. Negative values less than 136309124Sdim/// 0x8000 are saturated to 0x8000. 137309124Sdim/// 138309124Sdim/// \headerfile <x86intrin.h> 139309124Sdim/// 140314564Sdim/// This intrinsic corresponds to the <c> PACKSSDW </c> instruction. 141309124Sdim/// 142309124Sdim/// \param __m1 143309124Sdim/// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a 144309124Sdim/// 32-bit signed integer and is converted to a 16-bit signed integer with 145309124Sdim/// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF. 146309124Sdim/// Negative values less than 0x8000 are saturated to 0x8000. The converted 147309124Sdim/// [2 x i16] values are written to the lower 32 bits of the result. 148309124Sdim/// \param __m2 149309124Sdim/// A 64-bit integer vector of [2 x i32]. Each 32-bit element is treated as a 150309124Sdim/// 32-bit signed integer and is converted to a 16-bit signed integer with 151309124Sdim/// saturation. Positive values greater than 0x7FFF are saturated to 0x7FFF. 152309124Sdim/// Negative values less than 0x8000 are saturated to 0x8000. The converted 153309124Sdim/// [2 x i16] values are written to the upper 32 bits of the result. 154309124Sdim/// \returns A 64-bit integer vector of [4 x i16] containing the converted 155309124Sdim/// values. 156288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 157193326Sed_mm_packs_pi32(__m64 __m1, __m64 __m2) 158193326Sed{ 159193326Sed return (__m64)__builtin_ia32_packssdw((__v2si)__m1, (__v2si)__m2); 160193326Sed} 161193326Sed 162341825Sdim/// Converts 16-bit signed integers from both 64-bit integer vector 163309124Sdim/// parameters of [4 x i16] into 8-bit unsigned integer values, and 164309124Sdim/// constructs a 64-bit integer vector of [8 x i8] as the result. Values 165309124Sdim/// greater than 0xFF are saturated to 0xFF. Values less than 0 are saturated 166309124Sdim/// to 0. 167309124Sdim/// 168309124Sdim/// \headerfile <x86intrin.h> 169309124Sdim/// 170314564Sdim/// This intrinsic corresponds to the <c> PACKUSWB </c> instruction. 171309124Sdim/// 172309124Sdim/// \param __m1 173309124Sdim/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a 174309124Sdim/// 16-bit signed integer and is converted to an 8-bit unsigned integer with 175309124Sdim/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less 176309124Sdim/// than 0 are saturated to 0. The converted [4 x i8] values are written to 177309124Sdim/// the lower 32 bits of the result. 178309124Sdim/// \param __m2 179309124Sdim/// A 64-bit integer vector of [4 x i16]. Each 16-bit element is treated as a 180309124Sdim/// 16-bit signed integer and is converted to an 8-bit unsigned integer with 181309124Sdim/// saturation. Values greater than 0xFF are saturated to 0xFF. Values less 182309124Sdim/// than 0 are saturated to 0. The converted [4 x i8] values are written to 183309124Sdim/// the upper 32 bits of the result. 184309124Sdim/// \returns A 64-bit integer vector of [8 x i8] containing the converted 185309124Sdim/// values. 186288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 187193326Sed_mm_packs_pu16(__m64 __m1, __m64 __m2) 188193326Sed{ 189193326Sed return (__m64)__builtin_ia32_packuswb((__v4hi)__m1, (__v4hi)__m2); 190193326Sed} 191193326Sed 192341825Sdim/// Unpacks the upper 32 bits from two 64-bit integer vectors of [8 x i8] 193309124Sdim/// and interleaves them into a 64-bit integer vector of [8 x i8]. 194309124Sdim/// 195309124Sdim/// \headerfile <x86intrin.h> 196309124Sdim/// 197314564Sdim/// This intrinsic corresponds to the <c> PUNPCKHBW </c> instruction. 198309124Sdim/// 199309124Sdim/// \param __m1 200321369Sdim/// A 64-bit integer vector of [8 x i8]. \n 201314564Sdim/// Bits [39:32] are written to bits [7:0] of the result. \n 202314564Sdim/// Bits [47:40] are written to bits [23:16] of the result. \n 203314564Sdim/// Bits [55:48] are written to bits [39:32] of the result. \n 204309124Sdim/// Bits [63:56] are written to bits [55:48] of the result. 205309124Sdim/// \param __m2 206309124Sdim/// A 64-bit integer vector of [8 x i8]. 207314564Sdim/// Bits [39:32] are written to bits [15:8] of the result. \n 208314564Sdim/// Bits [47:40] are written to bits [31:24] of the result. \n 209314564Sdim/// Bits [55:48] are written to bits [47:40] of the result. \n 210309124Sdim/// Bits [63:56] are written to bits [63:56] of the result. 211309124Sdim/// \returns A 64-bit integer vector of [8 x i8] containing the interleaved 212309124Sdim/// values. 213288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 214193326Sed_mm_unpackhi_pi8(__m64 __m1, __m64 __m2) 215193326Sed{ 216218893Sdim return (__m64)__builtin_ia32_punpckhbw((__v8qi)__m1, (__v8qi)__m2); 217193326Sed} 218193326Sed 219341825Sdim/// Unpacks the upper 32 bits from two 64-bit integer vectors of 220309124Sdim/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16]. 221309124Sdim/// 222309124Sdim/// \headerfile <x86intrin.h> 223309124Sdim/// 224314564Sdim/// This intrinsic corresponds to the <c> PUNPCKHWD </c> instruction. 225309124Sdim/// 226309124Sdim/// \param __m1 227309124Sdim/// A 64-bit integer vector of [4 x i16]. 228314564Sdim/// Bits [47:32] are written to bits [15:0] of the result. \n 229309124Sdim/// Bits [63:48] are written to bits [47:32] of the result. 230309124Sdim/// \param __m2 231309124Sdim/// A 64-bit integer vector of [4 x i16]. 232314564Sdim/// Bits [47:32] are written to bits [31:16] of the result. \n 233309124Sdim/// Bits [63:48] are written to bits [63:48] of the result. 234309124Sdim/// \returns A 64-bit integer vector of [4 x i16] containing the interleaved 235309124Sdim/// values. 236288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 237193326Sed_mm_unpackhi_pi16(__m64 __m1, __m64 __m2) 238193326Sed{ 239218893Sdim return (__m64)__builtin_ia32_punpckhwd((__v4hi)__m1, (__v4hi)__m2); 240193326Sed} 241193326Sed 242341825Sdim/// Unpacks the upper 32 bits from two 64-bit integer vectors of 243309124Sdim/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32]. 244309124Sdim/// 245309124Sdim/// \headerfile <x86intrin.h> 246309124Sdim/// 247314564Sdim/// This intrinsic corresponds to the <c> PUNPCKHDQ </c> instruction. 248309124Sdim/// 249309124Sdim/// \param __m1 250309124Sdim/// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to 251309124Sdim/// the lower 32 bits of the result. 252309124Sdim/// \param __m2 253309124Sdim/// A 64-bit integer vector of [2 x i32]. The upper 32 bits are written to 254309124Sdim/// the upper 32 bits of the result. 255309124Sdim/// \returns A 64-bit integer vector of [2 x i32] containing the interleaved 256309124Sdim/// values. 257288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 258193326Sed_mm_unpackhi_pi32(__m64 __m1, __m64 __m2) 259193326Sed{ 260218893Sdim return (__m64)__builtin_ia32_punpckhdq((__v2si)__m1, (__v2si)__m2); 261193326Sed} 262193326Sed 263341825Sdim/// Unpacks the lower 32 bits from two 64-bit integer vectors of [8 x i8] 264309124Sdim/// and interleaves them into a 64-bit integer vector of [8 x i8]. 265309124Sdim/// 266309124Sdim/// \headerfile <x86intrin.h> 267309124Sdim/// 268314564Sdim/// This intrinsic corresponds to the <c> PUNPCKLBW </c> instruction. 269309124Sdim/// 270309124Sdim/// \param __m1 271309124Sdim/// A 64-bit integer vector of [8 x i8]. 272314564Sdim/// Bits [7:0] are written to bits [7:0] of the result. \n 273314564Sdim/// Bits [15:8] are written to bits [23:16] of the result. \n 274314564Sdim/// Bits [23:16] are written to bits [39:32] of the result. \n 275309124Sdim/// Bits [31:24] are written to bits [55:48] of the result. 276309124Sdim/// \param __m2 277309124Sdim/// A 64-bit integer vector of [8 x i8]. 278314564Sdim/// Bits [7:0] are written to bits [15:8] of the result. \n 279314564Sdim/// Bits [15:8] are written to bits [31:24] of the result. \n 280314564Sdim/// Bits [23:16] are written to bits [47:40] of the result. \n 281309124Sdim/// Bits [31:24] are written to bits [63:56] of the result. 282309124Sdim/// \returns A 64-bit integer vector of [8 x i8] containing the interleaved 283309124Sdim/// values. 284288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 285193326Sed_mm_unpacklo_pi8(__m64 __m1, __m64 __m2) 286193326Sed{ 287218893Sdim return (__m64)__builtin_ia32_punpcklbw((__v8qi)__m1, (__v8qi)__m2); 288193326Sed} 289193326Sed 290341825Sdim/// Unpacks the lower 32 bits from two 64-bit integer vectors of 291309124Sdim/// [4 x i16] and interleaves them into a 64-bit integer vector of [4 x i16]. 292309124Sdim/// 293309124Sdim/// \headerfile <x86intrin.h> 294309124Sdim/// 295314564Sdim/// This intrinsic corresponds to the <c> PUNPCKLWD </c> instruction. 296309124Sdim/// 297309124Sdim/// \param __m1 298309124Sdim/// A 64-bit integer vector of [4 x i16]. 299314564Sdim/// Bits [15:0] are written to bits [15:0] of the result. \n 300309124Sdim/// Bits [31:16] are written to bits [47:32] of the result. 301309124Sdim/// \param __m2 302309124Sdim/// A 64-bit integer vector of [4 x i16]. 303314564Sdim/// Bits [15:0] are written to bits [31:16] of the result. \n 304309124Sdim/// Bits [31:16] are written to bits [63:48] of the result. 305309124Sdim/// \returns A 64-bit integer vector of [4 x i16] containing the interleaved 306309124Sdim/// values. 307288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 308193326Sed_mm_unpacklo_pi16(__m64 __m1, __m64 __m2) 309193326Sed{ 310218893Sdim return (__m64)__builtin_ia32_punpcklwd((__v4hi)__m1, (__v4hi)__m2); 311193326Sed} 312193326Sed 313341825Sdim/// Unpacks the lower 32 bits from two 64-bit integer vectors of 314309124Sdim/// [2 x i32] and interleaves them into a 64-bit integer vector of [2 x i32]. 315309124Sdim/// 316309124Sdim/// \headerfile <x86intrin.h> 317309124Sdim/// 318314564Sdim/// This intrinsic corresponds to the <c> PUNPCKLDQ </c> instruction. 319309124Sdim/// 320309124Sdim/// \param __m1 321309124Sdim/// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to 322309124Sdim/// the lower 32 bits of the result. 323309124Sdim/// \param __m2 324309124Sdim/// A 64-bit integer vector of [2 x i32]. The lower 32 bits are written to 325309124Sdim/// the upper 32 bits of the result. 326309124Sdim/// \returns A 64-bit integer vector of [2 x i32] containing the interleaved 327309124Sdim/// values. 328288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 329193326Sed_mm_unpacklo_pi32(__m64 __m1, __m64 __m2) 330193326Sed{ 331218893Sdim return (__m64)__builtin_ia32_punpckldq((__v2si)__m1, (__v2si)__m2); 332193326Sed} 333193326Sed 334341825Sdim/// Adds each 8-bit integer element of the first 64-bit integer vector 335309124Sdim/// of [8 x i8] to the corresponding 8-bit integer element of the second 336309124Sdim/// 64-bit integer vector of [8 x i8]. The lower 8 bits of the results are 337309124Sdim/// packed into a 64-bit integer vector of [8 x i8]. 338309124Sdim/// 339309124Sdim/// \headerfile <x86intrin.h> 340309124Sdim/// 341314564Sdim/// This intrinsic corresponds to the <c> PADDB </c> instruction. 342309124Sdim/// 343309124Sdim/// \param __m1 344309124Sdim/// A 64-bit integer vector of [8 x i8]. 345309124Sdim/// \param __m2 346309124Sdim/// A 64-bit integer vector of [8 x i8]. 347309124Sdim/// \returns A 64-bit integer vector of [8 x i8] containing the sums of both 348309124Sdim/// parameters. 349288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 350193326Sed_mm_add_pi8(__m64 __m1, __m64 __m2) 351193326Sed{ 352218893Sdim return (__m64)__builtin_ia32_paddb((__v8qi)__m1, (__v8qi)__m2); 353193326Sed} 354193326Sed 355341825Sdim/// Adds each 16-bit integer element of the first 64-bit integer vector 356309124Sdim/// of [4 x i16] to the corresponding 16-bit integer element of the second 357309124Sdim/// 64-bit integer vector of [4 x i16]. The lower 16 bits of the results are 358309124Sdim/// packed into a 64-bit integer vector of [4 x i16]. 359309124Sdim/// 360309124Sdim/// \headerfile <x86intrin.h> 361309124Sdim/// 362314564Sdim/// This intrinsic corresponds to the <c> PADDW </c> instruction. 363309124Sdim/// 364309124Sdim/// \param __m1 365309124Sdim/// A 64-bit integer vector of [4 x i16]. 366309124Sdim/// \param __m2 367309124Sdim/// A 64-bit integer vector of [4 x i16]. 368309124Sdim/// \returns A 64-bit integer vector of [4 x i16] containing the sums of both 369309124Sdim/// parameters. 370288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 371193326Sed_mm_add_pi16(__m64 __m1, __m64 __m2) 372193326Sed{ 373218893Sdim return (__m64)__builtin_ia32_paddw((__v4hi)__m1, (__v4hi)__m2); 374193326Sed} 375193326Sed 376341825Sdim/// Adds each 32-bit integer element of the first 64-bit integer vector 377309124Sdim/// of [2 x i32] to the corresponding 32-bit integer element of the second 378309124Sdim/// 64-bit integer vector of [2 x i32]. The lower 32 bits of the results are 379309124Sdim/// packed into a 64-bit integer vector of [2 x i32]. 380309124Sdim/// 381309124Sdim/// \headerfile <x86intrin.h> 382309124Sdim/// 383314564Sdim/// This intrinsic corresponds to the <c> PADDD </c> instruction. 384309124Sdim/// 385309124Sdim/// \param __m1 386309124Sdim/// A 64-bit integer vector of [2 x i32]. 387309124Sdim/// \param __m2 388309124Sdim/// A 64-bit integer vector of [2 x i32]. 389309124Sdim/// \returns A 64-bit integer vector of [2 x i32] containing the sums of both 390309124Sdim/// parameters. 391288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 392193326Sed_mm_add_pi32(__m64 __m1, __m64 __m2) 393193326Sed{ 394218893Sdim return (__m64)__builtin_ia32_paddd((__v2si)__m1, (__v2si)__m2); 395193326Sed} 396193326Sed 397341825Sdim/// Adds each 8-bit signed integer element of the first 64-bit integer 398309124Sdim/// vector of [8 x i8] to the corresponding 8-bit signed integer element of 399309124Sdim/// the second 64-bit integer vector of [8 x i8]. Positive sums greater than 400309124Sdim/// 0x7F are saturated to 0x7F. Negative sums less than 0x80 are saturated to 401309124Sdim/// 0x80. The results are packed into a 64-bit integer vector of [8 x i8]. 402309124Sdim/// 403309124Sdim/// \headerfile <x86intrin.h> 404309124Sdim/// 405314564Sdim/// This intrinsic corresponds to the <c> PADDSB </c> instruction. 406309124Sdim/// 407309124Sdim/// \param __m1 408309124Sdim/// A 64-bit integer vector of [8 x i8]. 409309124Sdim/// \param __m2 410309124Sdim/// A 64-bit integer vector of [8 x i8]. 411309124Sdim/// \returns A 64-bit integer vector of [8 x i8] containing the saturated sums 412309124Sdim/// of both parameters. 413288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 414296417Sdim_mm_adds_pi8(__m64 __m1, __m64 __m2) 415193326Sed{ 416193326Sed return (__m64)__builtin_ia32_paddsb((__v8qi)__m1, (__v8qi)__m2); 417193326Sed} 418193326Sed 419341825Sdim/// Adds each 16-bit signed integer element of the first 64-bit integer 420309124Sdim/// vector of [4 x i16] to the corresponding 16-bit signed integer element of 421309124Sdim/// the second 64-bit integer vector of [4 x i16]. Positive sums greater than 422309124Sdim/// 0x7FFF are saturated to 0x7FFF. Negative sums less than 0x8000 are 423309124Sdim/// saturated to 0x8000. The results are packed into a 64-bit integer vector 424309124Sdim/// of [4 x i16]. 425309124Sdim/// 426309124Sdim/// \headerfile <x86intrin.h> 427309124Sdim/// 428314564Sdim/// This intrinsic corresponds to the <c> PADDSW </c> instruction. 429309124Sdim/// 430309124Sdim/// \param __m1 431309124Sdim/// A 64-bit integer vector of [4 x i16]. 432309124Sdim/// \param __m2 433309124Sdim/// A 64-bit integer vector of [4 x i16]. 434309124Sdim/// \returns A 64-bit integer vector of [4 x i16] containing the saturated sums 435309124Sdim/// of both parameters. 436288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 437193326Sed_mm_adds_pi16(__m64 __m1, __m64 __m2) 438193326Sed{ 439296417Sdim return (__m64)__builtin_ia32_paddsw((__v4hi)__m1, (__v4hi)__m2); 440193326Sed} 441193326Sed 442341825Sdim/// Adds each 8-bit unsigned integer element of the first 64-bit integer 443309124Sdim/// vector of [8 x i8] to the corresponding 8-bit unsigned integer element of 444309124Sdim/// the second 64-bit integer vector of [8 x i8]. Sums greater than 0xFF are 445309124Sdim/// saturated to 0xFF. The results are packed into a 64-bit integer vector of 446309124Sdim/// [8 x i8]. 447309124Sdim/// 448309124Sdim/// \headerfile <x86intrin.h> 449309124Sdim/// 450314564Sdim/// This intrinsic corresponds to the <c> PADDUSB </c> instruction. 451309124Sdim/// 452309124Sdim/// \param __m1 453309124Sdim/// A 64-bit integer vector of [8 x i8]. 454309124Sdim/// \param __m2 455309124Sdim/// A 64-bit integer vector of [8 x i8]. 456309124Sdim/// \returns A 64-bit integer vector of [8 x i8] containing the saturated 457309124Sdim/// unsigned sums of both parameters. 458288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 459296417Sdim_mm_adds_pu8(__m64 __m1, __m64 __m2) 460193326Sed{ 461193326Sed return (__m64)__builtin_ia32_paddusb((__v8qi)__m1, (__v8qi)__m2); 462193326Sed} 463296417Sdim 464341825Sdim/// Adds each 16-bit unsigned integer element of the first 64-bit integer 465309124Sdim/// vector of [4 x i16] to the corresponding 16-bit unsigned integer element 466309124Sdim/// of the second 64-bit integer vector of [4 x i16]. Sums greater than 467309124Sdim/// 0xFFFF are saturated to 0xFFFF. The results are packed into a 64-bit 468309124Sdim/// integer vector of [4 x i16]. 469309124Sdim/// 470309124Sdim/// \headerfile <x86intrin.h> 471309124Sdim/// 472314564Sdim/// This intrinsic corresponds to the <c> PADDUSW </c> instruction. 473309124Sdim/// 474309124Sdim/// \param __m1 475309124Sdim/// A 64-bit integer vector of [4 x i16]. 476309124Sdim/// \param __m2 477309124Sdim/// A 64-bit integer vector of [4 x i16]. 478309124Sdim/// \returns A 64-bit integer vector of [4 x i16] containing the saturated 479309124Sdim/// unsigned sums of both parameters. 480288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 481296417Sdim_mm_adds_pu16(__m64 __m1, __m64 __m2) 482193326Sed{ 483193326Sed return (__m64)__builtin_ia32_paddusw((__v4hi)__m1, (__v4hi)__m2); 484193326Sed} 485193326Sed 486341825Sdim/// Subtracts each 8-bit integer element of the second 64-bit integer 487309124Sdim/// vector of [8 x i8] from the corresponding 8-bit integer element of the 488309124Sdim/// first 64-bit integer vector of [8 x i8]. The lower 8 bits of the results 489309124Sdim/// are packed into a 64-bit integer vector of [8 x i8]. 490309124Sdim/// 491309124Sdim/// \headerfile <x86intrin.h> 492309124Sdim/// 493314564Sdim/// This intrinsic corresponds to the <c> PSUBB </c> instruction. 494309124Sdim/// 495309124Sdim/// \param __m1 496309124Sdim/// A 64-bit integer vector of [8 x i8] containing the minuends. 497309124Sdim/// \param __m2 498309124Sdim/// A 64-bit integer vector of [8 x i8] containing the subtrahends. 499309124Sdim/// \returns A 64-bit integer vector of [8 x i8] containing the differences of 500309124Sdim/// both parameters. 501288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 502193326Sed_mm_sub_pi8(__m64 __m1, __m64 __m2) 503193326Sed{ 504218893Sdim return (__m64)__builtin_ia32_psubb((__v8qi)__m1, (__v8qi)__m2); 505193326Sed} 506296417Sdim 507341825Sdim/// Subtracts each 16-bit integer element of the second 64-bit integer 508309124Sdim/// vector of [4 x i16] from the corresponding 16-bit integer element of the 509309124Sdim/// first 64-bit integer vector of [4 x i16]. The lower 16 bits of the 510309124Sdim/// results are packed into a 64-bit integer vector of [4 x i16]. 511309124Sdim/// 512309124Sdim/// \headerfile <x86intrin.h> 513309124Sdim/// 514314564Sdim/// This intrinsic corresponds to the <c> PSUBW </c> instruction. 515309124Sdim/// 516309124Sdim/// \param __m1 517309124Sdim/// A 64-bit integer vector of [4 x i16] containing the minuends. 518309124Sdim/// \param __m2 519309124Sdim/// A 64-bit integer vector of [4 x i16] containing the subtrahends. 520309124Sdim/// \returns A 64-bit integer vector of [4 x i16] containing the differences of 521309124Sdim/// both parameters. 522288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 523193326Sed_mm_sub_pi16(__m64 __m1, __m64 __m2) 524193326Sed{ 525218893Sdim return (__m64)__builtin_ia32_psubw((__v4hi)__m1, (__v4hi)__m2); 526193326Sed} 527296417Sdim 528341825Sdim/// Subtracts each 32-bit integer element of the second 64-bit integer 529309124Sdim/// vector of [2 x i32] from the corresponding 32-bit integer element of the 530309124Sdim/// first 64-bit integer vector of [2 x i32]. The lower 32 bits of the 531309124Sdim/// results are packed into a 64-bit integer vector of [2 x i32]. 532309124Sdim/// 533309124Sdim/// \headerfile <x86intrin.h> 534309124Sdim/// 535314564Sdim/// This intrinsic corresponds to the <c> PSUBD </c> instruction. 536309124Sdim/// 537309124Sdim/// \param __m1 538309124Sdim/// A 64-bit integer vector of [2 x i32] containing the minuends. 539309124Sdim/// \param __m2 540309124Sdim/// A 64-bit integer vector of [2 x i32] containing the subtrahends. 541309124Sdim/// \returns A 64-bit integer vector of [2 x i32] containing the differences of 542309124Sdim/// both parameters. 543288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 544193326Sed_mm_sub_pi32(__m64 __m1, __m64 __m2) 545193326Sed{ 546218893Sdim return (__m64)__builtin_ia32_psubd((__v2si)__m1, (__v2si)__m2); 547193326Sed} 548193326Sed 549341825Sdim/// Subtracts each 8-bit signed integer element of the second 64-bit 550309124Sdim/// integer vector of [8 x i8] from the corresponding 8-bit signed integer 551309124Sdim/// element of the first 64-bit integer vector of [8 x i8]. Positive results 552309124Sdim/// greater than 0x7F are saturated to 0x7F. Negative results less than 0x80 553309124Sdim/// are saturated to 0x80. The results are packed into a 64-bit integer 554309124Sdim/// vector of [8 x i8]. 555309124Sdim/// 556309124Sdim/// \headerfile <x86intrin.h> 557309124Sdim/// 558314564Sdim/// This intrinsic corresponds to the <c> PSUBSB </c> instruction. 559309124Sdim/// 560309124Sdim/// \param __m1 561309124Sdim/// A 64-bit integer vector of [8 x i8] containing the minuends. 562309124Sdim/// \param __m2 563309124Sdim/// A 64-bit integer vector of [8 x i8] containing the subtrahends. 564309124Sdim/// \returns A 64-bit integer vector of [8 x i8] containing the saturated 565309124Sdim/// differences of both parameters. 566288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 567193326Sed_mm_subs_pi8(__m64 __m1, __m64 __m2) 568193326Sed{ 569193326Sed return (__m64)__builtin_ia32_psubsb((__v8qi)__m1, (__v8qi)__m2); 570193326Sed} 571193326Sed 572341825Sdim/// Subtracts each 16-bit signed integer element of the second 64-bit 573309124Sdim/// integer vector of [4 x i16] from the corresponding 16-bit signed integer 574309124Sdim/// element of the first 64-bit integer vector of [4 x i16]. Positive results 575309124Sdim/// greater than 0x7FFF are saturated to 0x7FFF. Negative results less than 576309124Sdim/// 0x8000 are saturated to 0x8000. The results are packed into a 64-bit 577309124Sdim/// integer vector of [4 x i16]. 578309124Sdim/// 579309124Sdim/// \headerfile <x86intrin.h> 580309124Sdim/// 581314564Sdim/// This intrinsic corresponds to the <c> PSUBSW </c> instruction. 582309124Sdim/// 583309124Sdim/// \param __m1 584309124Sdim/// A 64-bit integer vector of [4 x i16] containing the minuends. 585309124Sdim/// \param __m2 586309124Sdim/// A 64-bit integer vector of [4 x i16] containing the subtrahends. 587309124Sdim/// \returns A 64-bit integer vector of [4 x i16] containing the saturated 588309124Sdim/// differences of both parameters. 589288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 590193326Sed_mm_subs_pi16(__m64 __m1, __m64 __m2) 591193326Sed{ 592193326Sed return (__m64)__builtin_ia32_psubsw((__v4hi)__m1, (__v4hi)__m2); 593193326Sed} 594193326Sed 595341825Sdim/// Subtracts each 8-bit unsigned integer element of the second 64-bit 596309124Sdim/// integer vector of [8 x i8] from the corresponding 8-bit unsigned integer 597321369Sdim/// element of the first 64-bit integer vector of [8 x i8]. 598309124Sdim/// 599321369Sdim/// If an element of the first vector is less than the corresponding element 600321369Sdim/// of the second vector, the result is saturated to 0. The results are 601321369Sdim/// packed into a 64-bit integer vector of [8 x i8]. 602321369Sdim/// 603309124Sdim/// \headerfile <x86intrin.h> 604309124Sdim/// 605314564Sdim/// This intrinsic corresponds to the <c> PSUBUSB </c> instruction. 606309124Sdim/// 607309124Sdim/// \param __m1 608309124Sdim/// A 64-bit integer vector of [8 x i8] containing the minuends. 609309124Sdim/// \param __m2 610309124Sdim/// A 64-bit integer vector of [8 x i8] containing the subtrahends. 611309124Sdim/// \returns A 64-bit integer vector of [8 x i8] containing the saturated 612309124Sdim/// differences of both parameters. 613288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 614193326Sed_mm_subs_pu8(__m64 __m1, __m64 __m2) 615193326Sed{ 616193326Sed return (__m64)__builtin_ia32_psubusb((__v8qi)__m1, (__v8qi)__m2); 617193326Sed} 618296417Sdim 619341825Sdim/// Subtracts each 16-bit unsigned integer element of the second 64-bit 620309124Sdim/// integer vector of [4 x i16] from the corresponding 16-bit unsigned 621321369Sdim/// integer element of the first 64-bit integer vector of [4 x i16]. 622309124Sdim/// 623321369Sdim/// If an element of the first vector is less than the corresponding element 624321369Sdim/// of the second vector, the result is saturated to 0. The results are 625321369Sdim/// packed into a 64-bit integer vector of [4 x i16]. 626321369Sdim/// 627309124Sdim/// \headerfile <x86intrin.h> 628309124Sdim/// 629314564Sdim/// This intrinsic corresponds to the <c> PSUBUSW </c> instruction. 630309124Sdim/// 631309124Sdim/// \param __m1 632309124Sdim/// A 64-bit integer vector of [4 x i16] containing the minuends. 633309124Sdim/// \param __m2 634309124Sdim/// A 64-bit integer vector of [4 x i16] containing the subtrahends. 635309124Sdim/// \returns A 64-bit integer vector of [4 x i16] containing the saturated 636309124Sdim/// differences of both parameters. 637288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 638193326Sed_mm_subs_pu16(__m64 __m1, __m64 __m2) 639193326Sed{ 640193326Sed return (__m64)__builtin_ia32_psubusw((__v4hi)__m1, (__v4hi)__m2); 641193326Sed} 642193326Sed 643341825Sdim/// Multiplies each 16-bit signed integer element of the first 64-bit 644309124Sdim/// integer vector of [4 x i16] by the corresponding 16-bit signed integer 645309124Sdim/// element of the second 64-bit integer vector of [4 x i16] and get four 646309124Sdim/// 32-bit products. Adds adjacent pairs of products to get two 32-bit sums. 647309124Sdim/// The lower 32 bits of these two sums are packed into a 64-bit integer 648321369Sdim/// vector of [2 x i32]. 649309124Sdim/// 650321369Sdim/// For example, bits [15:0] of both parameters are multiplied, bits [31:16] 651321369Sdim/// of both parameters are multiplied, and the sum of both results is written 652321369Sdim/// to bits [31:0] of the result. 653321369Sdim/// 654309124Sdim/// \headerfile <x86intrin.h> 655309124Sdim/// 656314564Sdim/// This intrinsic corresponds to the <c> PMADDWD </c> instruction. 657309124Sdim/// 658309124Sdim/// \param __m1 659309124Sdim/// A 64-bit integer vector of [4 x i16]. 660309124Sdim/// \param __m2 661309124Sdim/// A 64-bit integer vector of [4 x i16]. 662309124Sdim/// \returns A 64-bit integer vector of [2 x i32] containing the sums of 663309124Sdim/// products of both parameters. 664288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 665193326Sed_mm_madd_pi16(__m64 __m1, __m64 __m2) 666193326Sed{ 667193326Sed return (__m64)__builtin_ia32_pmaddwd((__v4hi)__m1, (__v4hi)__m2); 668193326Sed} 669193326Sed 670341825Sdim/// Multiplies each 16-bit signed integer element of the first 64-bit 671309124Sdim/// integer vector of [4 x i16] by the corresponding 16-bit signed integer 672309124Sdim/// element of the second 64-bit integer vector of [4 x i16]. Packs the upper 673309124Sdim/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16]. 674309124Sdim/// 675309124Sdim/// \headerfile <x86intrin.h> 676309124Sdim/// 677314564Sdim/// This intrinsic corresponds to the <c> PMULHW </c> instruction. 678309124Sdim/// 679309124Sdim/// \param __m1 680309124Sdim/// A 64-bit integer vector of [4 x i16]. 681309124Sdim/// \param __m2 682309124Sdim/// A 64-bit integer vector of [4 x i16]. 683309124Sdim/// \returns A 64-bit integer vector of [4 x i16] containing the upper 16 bits 684309124Sdim/// of the products of both parameters. 685288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 686193326Sed_mm_mulhi_pi16(__m64 __m1, __m64 __m2) 687193326Sed{ 688193326Sed return (__m64)__builtin_ia32_pmulhw((__v4hi)__m1, (__v4hi)__m2); 689193326Sed} 690296417Sdim 691341825Sdim/// Multiplies each 16-bit signed integer element of the first 64-bit 692309124Sdim/// integer vector of [4 x i16] by the corresponding 16-bit signed integer 693309124Sdim/// element of the second 64-bit integer vector of [4 x i16]. Packs the lower 694309124Sdim/// 16 bits of the 32-bit products into a 64-bit integer vector of [4 x i16]. 695309124Sdim/// 696309124Sdim/// \headerfile <x86intrin.h> 697309124Sdim/// 698314564Sdim/// This intrinsic corresponds to the <c> PMULLW </c> instruction. 699309124Sdim/// 700309124Sdim/// \param __m1 701309124Sdim/// A 64-bit integer vector of [4 x i16]. 702309124Sdim/// \param __m2 703309124Sdim/// A 64-bit integer vector of [4 x i16]. 704309124Sdim/// \returns A 64-bit integer vector of [4 x i16] containing the lower 16 bits 705309124Sdim/// of the products of both parameters. 706288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 707296417Sdim_mm_mullo_pi16(__m64 __m1, __m64 __m2) 708193326Sed{ 709218893Sdim return (__m64)__builtin_ia32_pmullw((__v4hi)__m1, (__v4hi)__m2); 710193326Sed} 711193326Sed 712341825Sdim/// Left-shifts each 16-bit signed integer element of the first 713309124Sdim/// parameter, which is a 64-bit integer vector of [4 x i16], by the number 714309124Sdim/// of bits specified by the second parameter, which is a 64-bit integer. The 715309124Sdim/// lower 16 bits of the results are packed into a 64-bit integer vector of 716309124Sdim/// [4 x i16]. 717309124Sdim/// 718309124Sdim/// \headerfile <x86intrin.h> 719309124Sdim/// 720314564Sdim/// This intrinsic corresponds to the <c> PSLLW </c> instruction. 721309124Sdim/// 722309124Sdim/// \param __m 723309124Sdim/// A 64-bit integer vector of [4 x i16]. 724309124Sdim/// \param __count 725309124Sdim/// A 64-bit integer vector interpreted as a single 64-bit integer. 726309124Sdim/// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted 727314564Sdim/// values. If \a __count is greater or equal to 16, the result is set to all 728314564Sdim/// 0. 729288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 730193326Sed_mm_sll_pi16(__m64 __m, __m64 __count) 731193326Sed{ 732193326Sed return (__m64)__builtin_ia32_psllw((__v4hi)__m, __count); 733193326Sed} 734193326Sed 735341825Sdim/// Left-shifts each 16-bit signed integer element of a 64-bit integer 736309124Sdim/// vector of [4 x i16] by the number of bits specified by a 32-bit integer. 737309124Sdim/// The lower 16 bits of the results are packed into a 64-bit integer vector 738309124Sdim/// of [4 x i16]. 739309124Sdim/// 740309124Sdim/// \headerfile <x86intrin.h> 741309124Sdim/// 742314564Sdim/// This intrinsic corresponds to the <c> PSLLW </c> instruction. 743309124Sdim/// 744309124Sdim/// \param __m 745309124Sdim/// A 64-bit integer vector of [4 x i16]. 746309124Sdim/// \param __count 747309124Sdim/// A 32-bit integer value. 748309124Sdim/// \returns A 64-bit integer vector of [4 x i16] containing the left-shifted 749314564Sdim/// values. If \a __count is greater or equal to 16, the result is set to all 750314564Sdim/// 0. 751288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 752193326Sed_mm_slli_pi16(__m64 __m, int __count) 753193326Sed{ 754296417Sdim return (__m64)__builtin_ia32_psllwi((__v4hi)__m, __count); 755193326Sed} 756193326Sed 757341825Sdim/// Left-shifts each 32-bit signed integer element of the first 758309124Sdim/// parameter, which is a 64-bit integer vector of [2 x i32], by the number 759309124Sdim/// of bits specified by the second parameter, which is a 64-bit integer. The 760309124Sdim/// lower 32 bits of the results are packed into a 64-bit integer vector of 761309124Sdim/// [2 x i32]. 762309124Sdim/// 763309124Sdim/// \headerfile <x86intrin.h> 764309124Sdim/// 765314564Sdim/// This intrinsic corresponds to the <c> PSLLD </c> instruction. 766309124Sdim/// 767309124Sdim/// \param __m 768309124Sdim/// A 64-bit integer vector of [2 x i32]. 769309124Sdim/// \param __count 770309124Sdim/// A 64-bit integer vector interpreted as a single 64-bit integer. 771309124Sdim/// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted 772314564Sdim/// values. If \a __count is greater or equal to 32, the result is set to all 773314564Sdim/// 0. 774288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 775193326Sed_mm_sll_pi32(__m64 __m, __m64 __count) 776193326Sed{ 777193326Sed return (__m64)__builtin_ia32_pslld((__v2si)__m, __count); 778193326Sed} 779193326Sed 780341825Sdim/// Left-shifts each 32-bit signed integer element of a 64-bit integer 781309124Sdim/// vector of [2 x i32] by the number of bits specified by a 32-bit integer. 782309124Sdim/// The lower 32 bits of the results are packed into a 64-bit integer vector 783309124Sdim/// of [2 x i32]. 784309124Sdim/// 785309124Sdim/// \headerfile <x86intrin.h> 786309124Sdim/// 787314564Sdim/// This intrinsic corresponds to the <c> PSLLD </c> instruction. 788309124Sdim/// 789309124Sdim/// \param __m 790309124Sdim/// A 64-bit integer vector of [2 x i32]. 791309124Sdim/// \param __count 792309124Sdim/// A 32-bit integer value. 793309124Sdim/// \returns A 64-bit integer vector of [2 x i32] containing the left-shifted 794314564Sdim/// values. If \a __count is greater or equal to 32, the result is set to all 795314564Sdim/// 0. 796288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 797193326Sed_mm_slli_pi32(__m64 __m, int __count) 798193326Sed{ 799193326Sed return (__m64)__builtin_ia32_pslldi((__v2si)__m, __count); 800193326Sed} 801193326Sed 802341825Sdim/// Left-shifts the first 64-bit integer parameter by the number of bits 803309124Sdim/// specified by the second 64-bit integer parameter. The lower 64 bits of 804309124Sdim/// result are returned. 805309124Sdim/// 806309124Sdim/// \headerfile <x86intrin.h> 807309124Sdim/// 808314564Sdim/// This intrinsic corresponds to the <c> PSLLQ </c> instruction. 809309124Sdim/// 810309124Sdim/// \param __m 811309124Sdim/// A 64-bit integer vector interpreted as a single 64-bit integer. 812309124Sdim/// \param __count 813309124Sdim/// A 64-bit integer vector interpreted as a single 64-bit integer. 814309124Sdim/// \returns A 64-bit integer vector containing the left-shifted value. If 815314564Sdim/// \a __count is greater or equal to 64, the result is set to 0. 816288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 817193326Sed_mm_sll_si64(__m64 __m, __m64 __count) 818193326Sed{ 819309124Sdim return (__m64)__builtin_ia32_psllq((__v1di)__m, __count); 820193326Sed} 821193326Sed 822341825Sdim/// Left-shifts the first parameter, which is a 64-bit integer, by the 823309124Sdim/// number of bits specified by the second parameter, which is a 32-bit 824309124Sdim/// integer. The lower 64 bits of result are returned. 825309124Sdim/// 826309124Sdim/// \headerfile <x86intrin.h> 827309124Sdim/// 828314564Sdim/// This intrinsic corresponds to the <c> PSLLQ </c> instruction. 829309124Sdim/// 830309124Sdim/// \param __m 831309124Sdim/// A 64-bit integer vector interpreted as a single 64-bit integer. 832309124Sdim/// \param __count 833309124Sdim/// A 32-bit integer value. 834309124Sdim/// \returns A 64-bit integer vector containing the left-shifted value. If 835314564Sdim/// \a __count is greater or equal to 64, the result is set to 0. 836288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 837193326Sed_mm_slli_si64(__m64 __m, int __count) 838193326Sed{ 839309124Sdim return (__m64)__builtin_ia32_psllqi((__v1di)__m, __count); 840193326Sed} 841193326Sed 842341825Sdim/// Right-shifts each 16-bit integer element of the first parameter, 843309124Sdim/// which is a 64-bit integer vector of [4 x i16], by the number of bits 844321369Sdim/// specified by the second parameter, which is a 64-bit integer. 845309124Sdim/// 846321369Sdim/// High-order bits are filled with the sign bit of the initial value of each 847321369Sdim/// 16-bit element. The 16-bit results are packed into a 64-bit integer 848321369Sdim/// vector of [4 x i16]. 849321369Sdim/// 850309124Sdim/// \headerfile <x86intrin.h> 851309124Sdim/// 852314564Sdim/// This intrinsic corresponds to the <c> PSRAW </c> instruction. 853309124Sdim/// 854309124Sdim/// \param __m 855309124Sdim/// A 64-bit integer vector of [4 x i16]. 856309124Sdim/// \param __count 857309124Sdim/// A 64-bit integer vector interpreted as a single 64-bit integer. 858309124Sdim/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted 859309124Sdim/// values. 860288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 861193326Sed_mm_sra_pi16(__m64 __m, __m64 __count) 862193326Sed{ 863296417Sdim return (__m64)__builtin_ia32_psraw((__v4hi)__m, __count); 864193326Sed} 865193326Sed 866341825Sdim/// Right-shifts each 16-bit integer element of a 64-bit integer vector 867309124Sdim/// of [4 x i16] by the number of bits specified by a 32-bit integer. 868321369Sdim/// 869309124Sdim/// High-order bits are filled with the sign bit of the initial value of each 870309124Sdim/// 16-bit element. The 16-bit results are packed into a 64-bit integer 871309124Sdim/// vector of [4 x i16]. 872309124Sdim/// 873309124Sdim/// \headerfile <x86intrin.h> 874309124Sdim/// 875314564Sdim/// This intrinsic corresponds to the <c> PSRAW </c> instruction. 876309124Sdim/// 877309124Sdim/// \param __m 878309124Sdim/// A 64-bit integer vector of [4 x i16]. 879309124Sdim/// \param __count 880309124Sdim/// A 32-bit integer value. 881309124Sdim/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted 882309124Sdim/// values. 883288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 884193326Sed_mm_srai_pi16(__m64 __m, int __count) 885193326Sed{ 886193326Sed return (__m64)__builtin_ia32_psrawi((__v4hi)__m, __count); 887193326Sed} 888193326Sed 889341825Sdim/// Right-shifts each 32-bit integer element of the first parameter, 890309124Sdim/// which is a 64-bit integer vector of [2 x i32], by the number of bits 891321369Sdim/// specified by the second parameter, which is a 64-bit integer. 892309124Sdim/// 893321369Sdim/// High-order bits are filled with the sign bit of the initial value of each 894321369Sdim/// 32-bit element. The 32-bit results are packed into a 64-bit integer 895321369Sdim/// vector of [2 x i32]. 896321369Sdim/// 897309124Sdim/// \headerfile <x86intrin.h> 898309124Sdim/// 899314564Sdim/// This intrinsic corresponds to the <c> PSRAD </c> instruction. 900309124Sdim/// 901309124Sdim/// \param __m 902309124Sdim/// A 64-bit integer vector of [2 x i32]. 903309124Sdim/// \param __count 904309124Sdim/// A 64-bit integer vector interpreted as a single 64-bit integer. 905309124Sdim/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted 906309124Sdim/// values. 907288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 908193326Sed_mm_sra_pi32(__m64 __m, __m64 __count) 909193326Sed{ 910296417Sdim return (__m64)__builtin_ia32_psrad((__v2si)__m, __count); 911193326Sed} 912193326Sed 913341825Sdim/// Right-shifts each 32-bit integer element of a 64-bit integer vector 914309124Sdim/// of [2 x i32] by the number of bits specified by a 32-bit integer. 915321369Sdim/// 916309124Sdim/// High-order bits are filled with the sign bit of the initial value of each 917309124Sdim/// 32-bit element. The 32-bit results are packed into a 64-bit integer 918309124Sdim/// vector of [2 x i32]. 919309124Sdim/// 920309124Sdim/// \headerfile <x86intrin.h> 921309124Sdim/// 922314564Sdim/// This intrinsic corresponds to the <c> PSRAD </c> instruction. 923309124Sdim/// 924309124Sdim/// \param __m 925309124Sdim/// A 64-bit integer vector of [2 x i32]. 926309124Sdim/// \param __count 927309124Sdim/// A 32-bit integer value. 928309124Sdim/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted 929309124Sdim/// values. 930288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 931193326Sed_mm_srai_pi32(__m64 __m, int __count) 932193326Sed{ 933193326Sed return (__m64)__builtin_ia32_psradi((__v2si)__m, __count); 934193326Sed} 935193326Sed 936341825Sdim/// Right-shifts each 16-bit integer element of the first parameter, 937309124Sdim/// which is a 64-bit integer vector of [4 x i16], by the number of bits 938321369Sdim/// specified by the second parameter, which is a 64-bit integer. 939309124Sdim/// 940321369Sdim/// High-order bits are cleared. The 16-bit results are packed into a 64-bit 941321369Sdim/// integer vector of [4 x i16]. 942321369Sdim/// 943309124Sdim/// \headerfile <x86intrin.h> 944309124Sdim/// 945314564Sdim/// This intrinsic corresponds to the <c> PSRLW </c> instruction. 946309124Sdim/// 947309124Sdim/// \param __m 948309124Sdim/// A 64-bit integer vector of [4 x i16]. 949309124Sdim/// \param __count 950309124Sdim/// A 64-bit integer vector interpreted as a single 64-bit integer. 951309124Sdim/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted 952309124Sdim/// values. 953288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 954193326Sed_mm_srl_pi16(__m64 __m, __m64 __count) 955193326Sed{ 956296417Sdim return (__m64)__builtin_ia32_psrlw((__v4hi)__m, __count); 957193326Sed} 958193326Sed 959341825Sdim/// Right-shifts each 16-bit integer element of a 64-bit integer vector 960309124Sdim/// of [4 x i16] by the number of bits specified by a 32-bit integer. 961321369Sdim/// 962309124Sdim/// High-order bits are cleared. The 16-bit results are packed into a 64-bit 963309124Sdim/// integer vector of [4 x i16]. 964309124Sdim/// 965309124Sdim/// \headerfile <x86intrin.h> 966309124Sdim/// 967314564Sdim/// This intrinsic corresponds to the <c> PSRLW </c> instruction. 968309124Sdim/// 969309124Sdim/// \param __m 970309124Sdim/// A 64-bit integer vector of [4 x i16]. 971309124Sdim/// \param __count 972309124Sdim/// A 32-bit integer value. 973309124Sdim/// \returns A 64-bit integer vector of [4 x i16] containing the right-shifted 974309124Sdim/// values. 975288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 976193326Sed_mm_srli_pi16(__m64 __m, int __count) 977193326Sed{ 978296417Sdim return (__m64)__builtin_ia32_psrlwi((__v4hi)__m, __count); 979193326Sed} 980193326Sed 981341825Sdim/// Right-shifts each 32-bit integer element of the first parameter, 982309124Sdim/// which is a 64-bit integer vector of [2 x i32], by the number of bits 983321369Sdim/// specified by the second parameter, which is a 64-bit integer. 984309124Sdim/// 985321369Sdim/// High-order bits are cleared. The 32-bit results are packed into a 64-bit 986321369Sdim/// integer vector of [2 x i32]. 987321369Sdim/// 988309124Sdim/// \headerfile <x86intrin.h> 989309124Sdim/// 990314564Sdim/// This intrinsic corresponds to the <c> PSRLD </c> instruction. 991309124Sdim/// 992309124Sdim/// \param __m 993309124Sdim/// A 64-bit integer vector of [2 x i32]. 994309124Sdim/// \param __count 995309124Sdim/// A 64-bit integer vector interpreted as a single 64-bit integer. 996309124Sdim/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted 997309124Sdim/// values. 998288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 999193326Sed_mm_srl_pi32(__m64 __m, __m64 __count) 1000193326Sed{ 1001296417Sdim return (__m64)__builtin_ia32_psrld((__v2si)__m, __count); 1002193326Sed} 1003193326Sed 1004341825Sdim/// Right-shifts each 32-bit integer element of a 64-bit integer vector 1005309124Sdim/// of [2 x i32] by the number of bits specified by a 32-bit integer. 1006321369Sdim/// 1007309124Sdim/// High-order bits are cleared. The 32-bit results are packed into a 64-bit 1008309124Sdim/// integer vector of [2 x i32]. 1009309124Sdim/// 1010309124Sdim/// \headerfile <x86intrin.h> 1011309124Sdim/// 1012314564Sdim/// This intrinsic corresponds to the <c> PSRLD </c> instruction. 1013309124Sdim/// 1014309124Sdim/// \param __m 1015309124Sdim/// A 64-bit integer vector of [2 x i32]. 1016309124Sdim/// \param __count 1017309124Sdim/// A 32-bit integer value. 1018309124Sdim/// \returns A 64-bit integer vector of [2 x i32] containing the right-shifted 1019309124Sdim/// values. 1020288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 1021193326Sed_mm_srli_pi32(__m64 __m, int __count) 1022193326Sed{ 1023193326Sed return (__m64)__builtin_ia32_psrldi((__v2si)__m, __count); 1024193326Sed} 1025193326Sed 1026341825Sdim/// Right-shifts the first 64-bit integer parameter by the number of bits 1027321369Sdim/// specified by the second 64-bit integer parameter. 1028309124Sdim/// 1029321369Sdim/// High-order bits are cleared. 1030321369Sdim/// 1031309124Sdim/// \headerfile <x86intrin.h> 1032309124Sdim/// 1033314564Sdim/// This intrinsic corresponds to the <c> PSRLQ </c> instruction. 1034309124Sdim/// 1035309124Sdim/// \param __m 1036309124Sdim/// A 64-bit integer vector interpreted as a single 64-bit integer. 1037309124Sdim/// \param __count 1038309124Sdim/// A 64-bit integer vector interpreted as a single 64-bit integer. 1039309124Sdim/// \returns A 64-bit integer vector containing the right-shifted value. 1040288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 1041193326Sed_mm_srl_si64(__m64 __m, __m64 __count) 1042193326Sed{ 1043309124Sdim return (__m64)__builtin_ia32_psrlq((__v1di)__m, __count); 1044193326Sed} 1045193326Sed 1046341825Sdim/// Right-shifts the first parameter, which is a 64-bit integer, by the 1047309124Sdim/// number of bits specified by the second parameter, which is a 32-bit 1048321369Sdim/// integer. 1049309124Sdim/// 1050321369Sdim/// High-order bits are cleared. 1051321369Sdim/// 1052309124Sdim/// \headerfile <x86intrin.h> 1053309124Sdim/// 1054314564Sdim/// This intrinsic corresponds to the <c> PSRLQ </c> instruction. 1055309124Sdim/// 1056309124Sdim/// \param __m 1057309124Sdim/// A 64-bit integer vector interpreted as a single 64-bit integer. 1058309124Sdim/// \param __count 1059309124Sdim/// A 32-bit integer value. 1060309124Sdim/// \returns A 64-bit integer vector containing the right-shifted value. 1061288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 1062193326Sed_mm_srli_si64(__m64 __m, int __count) 1063193326Sed{ 1064309124Sdim return (__m64)__builtin_ia32_psrlqi((__v1di)__m, __count); 1065193326Sed} 1066193326Sed 1067341825Sdim/// Performs a bitwise AND of two 64-bit integer vectors. 1068309124Sdim/// 1069309124Sdim/// \headerfile <x86intrin.h> 1070309124Sdim/// 1071314564Sdim/// This intrinsic corresponds to the <c> PAND </c> instruction. 1072309124Sdim/// 1073309124Sdim/// \param __m1 1074309124Sdim/// A 64-bit integer vector. 1075309124Sdim/// \param __m2 1076309124Sdim/// A 64-bit integer vector. 1077309124Sdim/// \returns A 64-bit integer vector containing the bitwise AND of both 1078309124Sdim/// parameters. 1079288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 1080193326Sed_mm_and_si64(__m64 __m1, __m64 __m2) 1081193326Sed{ 1082309124Sdim return __builtin_ia32_pand((__v1di)__m1, (__v1di)__m2); 1083193326Sed} 1084193326Sed 1085341825Sdim/// Performs a bitwise NOT of the first 64-bit integer vector, and then 1086309124Sdim/// performs a bitwise AND of the intermediate result and the second 64-bit 1087309124Sdim/// integer vector. 1088309124Sdim/// 1089309124Sdim/// \headerfile <x86intrin.h> 1090309124Sdim/// 1091314564Sdim/// This intrinsic corresponds to the <c> PANDN </c> instruction. 1092309124Sdim/// 1093309124Sdim/// \param __m1 1094309124Sdim/// A 64-bit integer vector. The one's complement of this parameter is used 1095309124Sdim/// in the bitwise AND. 1096309124Sdim/// \param __m2 1097309124Sdim/// A 64-bit integer vector. 1098309124Sdim/// \returns A 64-bit integer vector containing the bitwise AND of the second 1099309124Sdim/// parameter and the one's complement of the first parameter. 1100288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 1101193326Sed_mm_andnot_si64(__m64 __m1, __m64 __m2) 1102193326Sed{ 1103309124Sdim return __builtin_ia32_pandn((__v1di)__m1, (__v1di)__m2); 1104193326Sed} 1105193326Sed 1106341825Sdim/// Performs a bitwise OR of two 64-bit integer vectors. 1107309124Sdim/// 1108309124Sdim/// \headerfile <x86intrin.h> 1109309124Sdim/// 1110314564Sdim/// This intrinsic corresponds to the <c> POR </c> instruction. 1111309124Sdim/// 1112309124Sdim/// \param __m1 1113309124Sdim/// A 64-bit integer vector. 1114309124Sdim/// \param __m2 1115309124Sdim/// A 64-bit integer vector. 1116309124Sdim/// \returns A 64-bit integer vector containing the bitwise OR of both 1117309124Sdim/// parameters. 1118288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 1119193326Sed_mm_or_si64(__m64 __m1, __m64 __m2) 1120193326Sed{ 1121309124Sdim return __builtin_ia32_por((__v1di)__m1, (__v1di)__m2); 1122193326Sed} 1123193326Sed 1124341825Sdim/// Performs a bitwise exclusive OR of two 64-bit integer vectors. 1125309124Sdim/// 1126309124Sdim/// \headerfile <x86intrin.h> 1127309124Sdim/// 1128314564Sdim/// This intrinsic corresponds to the <c> PXOR </c> instruction. 1129309124Sdim/// 1130309124Sdim/// \param __m1 1131309124Sdim/// A 64-bit integer vector. 1132309124Sdim/// \param __m2 1133309124Sdim/// A 64-bit integer vector. 1134309124Sdim/// \returns A 64-bit integer vector containing the bitwise exclusive OR of both 1135309124Sdim/// parameters. 1136288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 1137193326Sed_mm_xor_si64(__m64 __m1, __m64 __m2) 1138193326Sed{ 1139309124Sdim return __builtin_ia32_pxor((__v1di)__m1, (__v1di)__m2); 1140193326Sed} 1141193326Sed 1142341825Sdim/// Compares the 8-bit integer elements of two 64-bit integer vectors of 1143309124Sdim/// [8 x i8] to determine if the element of the first vector is equal to the 1144321369Sdim/// corresponding element of the second vector. 1145309124Sdim/// 1146321369Sdim/// The comparison yields 0 for false, 0xFF for true. 1147321369Sdim/// 1148309124Sdim/// \headerfile <x86intrin.h> 1149309124Sdim/// 1150314564Sdim/// This intrinsic corresponds to the <c> PCMPEQB </c> instruction. 1151309124Sdim/// 1152309124Sdim/// \param __m1 1153309124Sdim/// A 64-bit integer vector of [8 x i8]. 1154309124Sdim/// \param __m2 1155309124Sdim/// A 64-bit integer vector of [8 x i8]. 1156309124Sdim/// \returns A 64-bit integer vector of [8 x i8] containing the comparison 1157309124Sdim/// results. 1158288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 1159193326Sed_mm_cmpeq_pi8(__m64 __m1, __m64 __m2) 1160193326Sed{ 1161218893Sdim return (__m64)__builtin_ia32_pcmpeqb((__v8qi)__m1, (__v8qi)__m2); 1162193326Sed} 1163193326Sed 1164341825Sdim/// Compares the 16-bit integer elements of two 64-bit integer vectors of 1165309124Sdim/// [4 x i16] to determine if the element of the first vector is equal to the 1166321369Sdim/// corresponding element of the second vector. 1167309124Sdim/// 1168321369Sdim/// The comparison yields 0 for false, 0xFFFF for true. 1169321369Sdim/// 1170309124Sdim/// \headerfile <x86intrin.h> 1171309124Sdim/// 1172314564Sdim/// This intrinsic corresponds to the <c> PCMPEQW </c> instruction. 1173309124Sdim/// 1174309124Sdim/// \param __m1 1175309124Sdim/// A 64-bit integer vector of [4 x i16]. 1176309124Sdim/// \param __m2 1177309124Sdim/// A 64-bit integer vector of [4 x i16]. 1178309124Sdim/// \returns A 64-bit integer vector of [4 x i16] containing the comparison 1179309124Sdim/// results. 1180288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 1181193326Sed_mm_cmpeq_pi16(__m64 __m1, __m64 __m2) 1182193326Sed{ 1183218893Sdim return (__m64)__builtin_ia32_pcmpeqw((__v4hi)__m1, (__v4hi)__m2); 1184193326Sed} 1185193326Sed 1186341825Sdim/// Compares the 32-bit integer elements of two 64-bit integer vectors of 1187309124Sdim/// [2 x i32] to determine if the element of the first vector is equal to the 1188321369Sdim/// corresponding element of the second vector. 1189309124Sdim/// 1190321369Sdim/// The comparison yields 0 for false, 0xFFFFFFFF for true. 1191321369Sdim/// 1192309124Sdim/// \headerfile <x86intrin.h> 1193309124Sdim/// 1194314564Sdim/// This intrinsic corresponds to the <c> PCMPEQD </c> instruction. 1195309124Sdim/// 1196309124Sdim/// \param __m1 1197309124Sdim/// A 64-bit integer vector of [2 x i32]. 1198309124Sdim/// \param __m2 1199309124Sdim/// A 64-bit integer vector of [2 x i32]. 1200309124Sdim/// \returns A 64-bit integer vector of [2 x i32] containing the comparison 1201309124Sdim/// results. 1202288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 1203193326Sed_mm_cmpeq_pi32(__m64 __m1, __m64 __m2) 1204193326Sed{ 1205218893Sdim return (__m64)__builtin_ia32_pcmpeqd((__v2si)__m1, (__v2si)__m2); 1206193326Sed} 1207193326Sed 1208341825Sdim/// Compares the 8-bit integer elements of two 64-bit integer vectors of 1209309124Sdim/// [8 x i8] to determine if the element of the first vector is greater than 1210321369Sdim/// the corresponding element of the second vector. 1211309124Sdim/// 1212321369Sdim/// The comparison yields 0 for false, 0xFF for true. 1213321369Sdim/// 1214309124Sdim/// \headerfile <x86intrin.h> 1215309124Sdim/// 1216314564Sdim/// This intrinsic corresponds to the <c> PCMPGTB </c> instruction. 1217309124Sdim/// 1218309124Sdim/// \param __m1 1219309124Sdim/// A 64-bit integer vector of [8 x i8]. 1220309124Sdim/// \param __m2 1221309124Sdim/// A 64-bit integer vector of [8 x i8]. 1222309124Sdim/// \returns A 64-bit integer vector of [8 x i8] containing the comparison 1223309124Sdim/// results. 1224288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 1225193326Sed_mm_cmpgt_pi8(__m64 __m1, __m64 __m2) 1226193326Sed{ 1227218893Sdim return (__m64)__builtin_ia32_pcmpgtb((__v8qi)__m1, (__v8qi)__m2); 1228193326Sed} 1229193326Sed 1230341825Sdim/// Compares the 16-bit integer elements of two 64-bit integer vectors of 1231309124Sdim/// [4 x i16] to determine if the element of the first vector is greater than 1232321369Sdim/// the corresponding element of the second vector. 1233309124Sdim/// 1234321369Sdim/// The comparison yields 0 for false, 0xFFFF for true. 1235321369Sdim/// 1236309124Sdim/// \headerfile <x86intrin.h> 1237309124Sdim/// 1238314564Sdim/// This intrinsic corresponds to the <c> PCMPGTW </c> instruction. 1239309124Sdim/// 1240309124Sdim/// \param __m1 1241309124Sdim/// A 64-bit integer vector of [4 x i16]. 1242309124Sdim/// \param __m2 1243309124Sdim/// A 64-bit integer vector of [4 x i16]. 1244309124Sdim/// \returns A 64-bit integer vector of [4 x i16] containing the comparison 1245309124Sdim/// results. 1246288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 1247193326Sed_mm_cmpgt_pi16(__m64 __m1, __m64 __m2) 1248193326Sed{ 1249218893Sdim return (__m64)__builtin_ia32_pcmpgtw((__v4hi)__m1, (__v4hi)__m2); 1250193326Sed} 1251193326Sed 1252341825Sdim/// Compares the 32-bit integer elements of two 64-bit integer vectors of 1253309124Sdim/// [2 x i32] to determine if the element of the first vector is greater than 1254321369Sdim/// the corresponding element of the second vector. 1255309124Sdim/// 1256321369Sdim/// The comparison yields 0 for false, 0xFFFFFFFF for true. 1257321369Sdim/// 1258309124Sdim/// \headerfile <x86intrin.h> 1259309124Sdim/// 1260314564Sdim/// This intrinsic corresponds to the <c> PCMPGTD </c> instruction. 1261309124Sdim/// 1262309124Sdim/// \param __m1 1263309124Sdim/// A 64-bit integer vector of [2 x i32]. 1264309124Sdim/// \param __m2 1265309124Sdim/// A 64-bit integer vector of [2 x i32]. 1266309124Sdim/// \returns A 64-bit integer vector of [2 x i32] containing the comparison 1267309124Sdim/// results. 1268288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 1269193326Sed_mm_cmpgt_pi32(__m64 __m1, __m64 __m2) 1270193326Sed{ 1271218893Sdim return (__m64)__builtin_ia32_pcmpgtd((__v2si)__m1, (__v2si)__m2); 1272193326Sed} 1273193326Sed 1274341825Sdim/// Constructs a 64-bit integer vector initialized to zero. 1275309124Sdim/// 1276309124Sdim/// \headerfile <x86intrin.h> 1277309124Sdim/// 1278341825Sdim/// This intrinsic corresponds to the <c> PXOR </c> instruction. 1279309124Sdim/// 1280309124Sdim/// \returns An initialized 64-bit integer vector with all elements set to zero. 1281288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 1282193326Sed_mm_setzero_si64(void) 1283193326Sed{ 1284341825Sdim return __extension__ (__m64){ 0LL }; 1285193326Sed} 1286193326Sed 1287341825Sdim/// Constructs a 64-bit integer vector initialized with the specified 1288309124Sdim/// 32-bit integer values. 1289309124Sdim/// 1290309124Sdim/// \headerfile <x86intrin.h> 1291309124Sdim/// 1292309124Sdim/// This intrinsic is a utility function and does not correspond to a specific 1293309124Sdim/// instruction. 1294309124Sdim/// 1295309124Sdim/// \param __i1 1296309124Sdim/// A 32-bit integer value used to initialize the upper 32 bits of the 1297309124Sdim/// result. 1298309124Sdim/// \param __i0 1299309124Sdim/// A 32-bit integer value used to initialize the lower 32 bits of the 1300309124Sdim/// result. 1301309124Sdim/// \returns An initialized 64-bit integer vector. 1302288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 1303193326Sed_mm_set_pi32(int __i1, int __i0) 1304193326Sed{ 1305218893Sdim return (__m64)__builtin_ia32_vec_init_v2si(__i0, __i1); 1306193326Sed} 1307193326Sed 1308341825Sdim/// Constructs a 64-bit integer vector initialized with the specified 1309309124Sdim/// 16-bit integer values. 1310309124Sdim/// 1311309124Sdim/// \headerfile <x86intrin.h> 1312309124Sdim/// 1313309124Sdim/// This intrinsic is a utility function and does not correspond to a specific 1314309124Sdim/// instruction. 1315309124Sdim/// 1316309124Sdim/// \param __s3 1317309124Sdim/// A 16-bit integer value used to initialize bits [63:48] of the result. 1318309124Sdim/// \param __s2 1319309124Sdim/// A 16-bit integer value used to initialize bits [47:32] of the result. 1320309124Sdim/// \param __s1 1321309124Sdim/// A 16-bit integer value used to initialize bits [31:16] of the result. 1322309124Sdim/// \param __s0 1323309124Sdim/// A 16-bit integer value used to initialize bits [15:0] of the result. 1324309124Sdim/// \returns An initialized 64-bit integer vector. 1325288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 1326193326Sed_mm_set_pi16(short __s3, short __s2, short __s1, short __s0) 1327193326Sed{ 1328218893Sdim return (__m64)__builtin_ia32_vec_init_v4hi(__s0, __s1, __s2, __s3); 1329193326Sed} 1330193326Sed 1331341825Sdim/// Constructs a 64-bit integer vector initialized with the specified 1332309124Sdim/// 8-bit integer values. 1333309124Sdim/// 1334309124Sdim/// \headerfile <x86intrin.h> 1335309124Sdim/// 1336309124Sdim/// This intrinsic is a utility function and does not correspond to a specific 1337309124Sdim/// instruction. 1338309124Sdim/// 1339309124Sdim/// \param __b7 1340309124Sdim/// An 8-bit integer value used to initialize bits [63:56] of the result. 1341309124Sdim/// \param __b6 1342309124Sdim/// An 8-bit integer value used to initialize bits [55:48] of the result. 1343309124Sdim/// \param __b5 1344309124Sdim/// An 8-bit integer value used to initialize bits [47:40] of the result. 1345309124Sdim/// \param __b4 1346309124Sdim/// An 8-bit integer value used to initialize bits [39:32] of the result. 1347309124Sdim/// \param __b3 1348309124Sdim/// An 8-bit integer value used to initialize bits [31:24] of the result. 1349309124Sdim/// \param __b2 1350309124Sdim/// An 8-bit integer value used to initialize bits [23:16] of the result. 1351309124Sdim/// \param __b1 1352309124Sdim/// An 8-bit integer value used to initialize bits [15:8] of the result. 1353309124Sdim/// \param __b0 1354309124Sdim/// An 8-bit integer value used to initialize bits [7:0] of the result. 1355309124Sdim/// \returns An initialized 64-bit integer vector. 1356288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 1357193326Sed_mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, char __b2, 1358193326Sed char __b1, char __b0) 1359193326Sed{ 1360218893Sdim return (__m64)__builtin_ia32_vec_init_v8qi(__b0, __b1, __b2, __b3, 1361218893Sdim __b4, __b5, __b6, __b7); 1362193326Sed} 1363193326Sed 1364341825Sdim/// Constructs a 64-bit integer vector of [2 x i32], with each of the 1365309124Sdim/// 32-bit integer vector elements set to the specified 32-bit integer 1366309124Sdim/// value. 1367309124Sdim/// 1368309124Sdim/// \headerfile <x86intrin.h> 1369309124Sdim/// 1370341825Sdim/// This intrinsic is a utility function and does not correspond to a specific 1371341825Sdim/// instruction. 1372309124Sdim/// 1373309124Sdim/// \param __i 1374309124Sdim/// A 32-bit integer value used to initialize each vector element of the 1375309124Sdim/// result. 1376309124Sdim/// \returns An initialized 64-bit integer vector of [2 x i32]. 1377288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 1378193326Sed_mm_set1_pi32(int __i) 1379193326Sed{ 1380218893Sdim return _mm_set_pi32(__i, __i); 1381193326Sed} 1382193326Sed 1383341825Sdim/// Constructs a 64-bit integer vector of [4 x i16], with each of the 1384309124Sdim/// 16-bit integer vector elements set to the specified 16-bit integer 1385309124Sdim/// value. 1386309124Sdim/// 1387309124Sdim/// \headerfile <x86intrin.h> 1388309124Sdim/// 1389341825Sdim/// This intrinsic is a utility function and does not correspond to a specific 1390341825Sdim/// instruction. 1391309124Sdim/// 1392309124Sdim/// \param __w 1393309124Sdim/// A 16-bit integer value used to initialize each vector element of the 1394309124Sdim/// result. 1395309124Sdim/// \returns An initialized 64-bit integer vector of [4 x i16]. 1396288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 1397218893Sdim_mm_set1_pi16(short __w) 1398193326Sed{ 1399218893Sdim return _mm_set_pi16(__w, __w, __w, __w); 1400193326Sed} 1401193326Sed 1402341825Sdim/// Constructs a 64-bit integer vector of [8 x i8], with each of the 1403309124Sdim/// 8-bit integer vector elements set to the specified 8-bit integer value. 1404309124Sdim/// 1405309124Sdim/// \headerfile <x86intrin.h> 1406309124Sdim/// 1407341825Sdim/// This intrinsic is a utility function and does not correspond to a specific 1408341825Sdim/// instruction. 1409309124Sdim/// 1410309124Sdim/// \param __b 1411309124Sdim/// An 8-bit integer value used to initialize each vector element of the 1412309124Sdim/// result. 1413309124Sdim/// \returns An initialized 64-bit integer vector of [8 x i8]. 1414288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 1415193326Sed_mm_set1_pi8(char __b) 1416193326Sed{ 1417218893Sdim return _mm_set_pi8(__b, __b, __b, __b, __b, __b, __b, __b); 1418193326Sed} 1419193326Sed 1420341825Sdim/// Constructs a 64-bit integer vector, initialized in reverse order with 1421309124Sdim/// the specified 32-bit integer values. 1422309124Sdim/// 1423309124Sdim/// \headerfile <x86intrin.h> 1424309124Sdim/// 1425309124Sdim/// This intrinsic is a utility function and does not correspond to a specific 1426309124Sdim/// instruction. 1427309124Sdim/// 1428309124Sdim/// \param __i0 1429309124Sdim/// A 32-bit integer value used to initialize the lower 32 bits of the 1430309124Sdim/// result. 1431309124Sdim/// \param __i1 1432309124Sdim/// A 32-bit integer value used to initialize the upper 32 bits of the 1433309124Sdim/// result. 1434309124Sdim/// \returns An initialized 64-bit integer vector. 1435288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 1436223017Sdim_mm_setr_pi32(int __i0, int __i1) 1437193326Sed{ 1438218893Sdim return _mm_set_pi32(__i1, __i0); 1439193326Sed} 1440193326Sed 1441341825Sdim/// Constructs a 64-bit integer vector, initialized in reverse order with 1442309124Sdim/// the specified 16-bit integer values. 1443309124Sdim/// 1444309124Sdim/// \headerfile <x86intrin.h> 1445309124Sdim/// 1446309124Sdim/// This intrinsic is a utility function and does not correspond to a specific 1447309124Sdim/// instruction. 1448309124Sdim/// 1449309124Sdim/// \param __w0 1450309124Sdim/// A 16-bit integer value used to initialize bits [15:0] of the result. 1451309124Sdim/// \param __w1 1452309124Sdim/// A 16-bit integer value used to initialize bits [31:16] of the result. 1453309124Sdim/// \param __w2 1454309124Sdim/// A 16-bit integer value used to initialize bits [47:32] of the result. 1455309124Sdim/// \param __w3 1456309124Sdim/// A 16-bit integer value used to initialize bits [63:48] of the result. 1457309124Sdim/// \returns An initialized 64-bit integer vector. 1458288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 1459223017Sdim_mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) 1460193326Sed{ 1461218893Sdim return _mm_set_pi16(__w3, __w2, __w1, __w0); 1462193326Sed} 1463193326Sed 1464341825Sdim/// Constructs a 64-bit integer vector, initialized in reverse order with 1465309124Sdim/// the specified 8-bit integer values. 1466309124Sdim/// 1467309124Sdim/// \headerfile <x86intrin.h> 1468309124Sdim/// 1469309124Sdim/// This intrinsic is a utility function and does not correspond to a specific 1470309124Sdim/// instruction. 1471309124Sdim/// 1472309124Sdim/// \param __b0 1473309124Sdim/// An 8-bit integer value used to initialize bits [7:0] of the result. 1474309124Sdim/// \param __b1 1475309124Sdim/// An 8-bit integer value used to initialize bits [15:8] of the result. 1476309124Sdim/// \param __b2 1477309124Sdim/// An 8-bit integer value used to initialize bits [23:16] of the result. 1478309124Sdim/// \param __b3 1479309124Sdim/// An 8-bit integer value used to initialize bits [31:24] of the result. 1480309124Sdim/// \param __b4 1481309124Sdim/// An 8-bit integer value used to initialize bits [39:32] of the result. 1482309124Sdim/// \param __b5 1483309124Sdim/// An 8-bit integer value used to initialize bits [47:40] of the result. 1484309124Sdim/// \param __b6 1485309124Sdim/// An 8-bit integer value used to initialize bits [55:48] of the result. 1486309124Sdim/// \param __b7 1487309124Sdim/// An 8-bit integer value used to initialize bits [63:56] of the result. 1488309124Sdim/// \returns An initialized 64-bit integer vector. 1489288943Sdimstatic __inline__ __m64 __DEFAULT_FN_ATTRS 1490223017Sdim_mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, char __b5, 1491223017Sdim char __b6, char __b7) 1492193326Sed{ 1493218893Sdim return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); 1494193326Sed} 1495193326Sed 1496288943Sdim#undef __DEFAULT_FN_ATTRS 1497212904Sdim 1498212904Sdim/* Aliases for compatibility. */ 1499212904Sdim#define _m_empty _mm_empty 1500212904Sdim#define _m_from_int _mm_cvtsi32_si64 1501296417Sdim#define _m_from_int64 _mm_cvtsi64_m64 1502212904Sdim#define _m_to_int _mm_cvtsi64_si32 1503296417Sdim#define _m_to_int64 _mm_cvtm64_si64 1504212904Sdim#define _m_packsswb _mm_packs_pi16 1505212904Sdim#define _m_packssdw _mm_packs_pi32 1506212904Sdim#define _m_packuswb _mm_packs_pu16 1507212904Sdim#define _m_punpckhbw _mm_unpackhi_pi8 1508212904Sdim#define _m_punpckhwd _mm_unpackhi_pi16 1509212904Sdim#define _m_punpckhdq _mm_unpackhi_pi32 1510212904Sdim#define _m_punpcklbw _mm_unpacklo_pi8 1511212904Sdim#define _m_punpcklwd _mm_unpacklo_pi16 1512212904Sdim#define _m_punpckldq _mm_unpacklo_pi32 1513212904Sdim#define _m_paddb _mm_add_pi8 1514212904Sdim#define _m_paddw _mm_add_pi16 1515212904Sdim#define _m_paddd _mm_add_pi32 1516212904Sdim#define _m_paddsb _mm_adds_pi8 1517212904Sdim#define _m_paddsw _mm_adds_pi16 1518212904Sdim#define _m_paddusb _mm_adds_pu8 1519212904Sdim#define _m_paddusw _mm_adds_pu16 1520212904Sdim#define _m_psubb _mm_sub_pi8 1521212904Sdim#define _m_psubw _mm_sub_pi16 1522212904Sdim#define _m_psubd _mm_sub_pi32 1523212904Sdim#define _m_psubsb _mm_subs_pi8 1524212904Sdim#define _m_psubsw _mm_subs_pi16 1525212904Sdim#define _m_psubusb _mm_subs_pu8 1526212904Sdim#define _m_psubusw _mm_subs_pu16 1527212904Sdim#define _m_pmaddwd _mm_madd_pi16 1528212904Sdim#define _m_pmulhw _mm_mulhi_pi16 1529212904Sdim#define _m_pmullw _mm_mullo_pi16 1530212904Sdim#define _m_psllw _mm_sll_pi16 1531212904Sdim#define _m_psllwi _mm_slli_pi16 1532212904Sdim#define _m_pslld _mm_sll_pi32 1533212904Sdim#define _m_pslldi _mm_slli_pi32 1534212904Sdim#define _m_psllq _mm_sll_si64 1535212904Sdim#define _m_psllqi _mm_slli_si64 1536212904Sdim#define _m_psraw _mm_sra_pi16 1537212904Sdim#define _m_psrawi _mm_srai_pi16 1538212904Sdim#define _m_psrad _mm_sra_pi32 1539212904Sdim#define _m_psradi _mm_srai_pi32 1540212904Sdim#define _m_psrlw _mm_srl_pi16 1541212904Sdim#define _m_psrlwi _mm_srli_pi16 1542212904Sdim#define _m_psrld _mm_srl_pi32 1543212904Sdim#define _m_psrldi _mm_srli_pi32 1544212904Sdim#define _m_psrlq _mm_srl_si64 1545212904Sdim#define _m_psrlqi _mm_srli_si64 1546212904Sdim#define _m_pand _mm_and_si64 1547212904Sdim#define _m_pandn _mm_andnot_si64 1548212904Sdim#define _m_por _mm_or_si64 1549212904Sdim#define _m_pxor _mm_xor_si64 1550212904Sdim#define _m_pcmpeqb _mm_cmpeq_pi8 1551212904Sdim#define _m_pcmpeqw _mm_cmpeq_pi16 1552212904Sdim#define _m_pcmpeqd _mm_cmpeq_pi32 1553212904Sdim#define _m_pcmpgtb _mm_cmpgt_pi8 1554212904Sdim#define _m_pcmpgtw _mm_cmpgt_pi16 1555212904Sdim#define _m_pcmpgtd _mm_cmpgt_pi32 1556212904Sdim 1557193326Sed#endif /* __MMINTRIN_H */ 1558193326Sed 1559