smmintrin.h revision 327952
1168404Spjd/*===---- smmintrin.h - SSE4 intrinsics ------------------------------------=== 2168404Spjd * 3168404Spjd * Permission is hereby granted, free of charge, to any person obtaining a copy 4168404Spjd * of this software and associated documentation files (the "Software"), to deal 5168404Spjd * in the Software without restriction, including without limitation the rights 6168404Spjd * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7168404Spjd * copies of the Software, and to permit persons to whom the Software is 8168404Spjd * furnished to do so, subject to the following conditions: 9168404Spjd * 10168404Spjd * The above copyright notice and this permission notice shall be included in 11168404Spjd * all copies or substantial portions of the Software. 12168404Spjd * 13168404Spjd * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14168404Spjd * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15168404Spjd * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16168404Spjd * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17168404Spjd * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18168404Spjd * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19168404Spjd * THE SOFTWARE. 20168404Spjd * 21168404Spjd *===-----------------------------------------------------------------------=== 22168404Spjd */ 23219089Spjd 24227497Smm#ifndef _SMMINTRIN_H 25268720Sdelphij#define _SMMINTRIN_H 26236145Smm 27236155Smm#include <tmmintrin.h> 28254758Sdelphij 29168404Spjd/* Define the default attributes for the functions in this file. */ 30168404Spjd#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4.1"))) 31168404Spjd 32168404Spjd/* SSE4 Rounding macros. */ 33168404Spjd#define _MM_FROUND_TO_NEAREST_INT 0x00 34168404Spjd#define _MM_FROUND_TO_NEG_INF 0x01 35168404Spjd#define _MM_FROUND_TO_POS_INF 0x02 36168404Spjd#define _MM_FROUND_TO_ZERO 0x03 37168404Spjd#define _MM_FROUND_CUR_DIRECTION 0x04 38168404Spjd 39168404Spjd#define _MM_FROUND_RAISE_EXC 0x00 40168404Spjd#define _MM_FROUND_NO_EXC 0x08 41168404Spjd 42168404Spjd#define _MM_FROUND_NINT (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEAREST_INT) 43168404Spjd#define _MM_FROUND_FLOOR (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEG_INF) 44168404Spjd#define _MM_FROUND_CEIL (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_POS_INF) 45168404Spjd#define _MM_FROUND_TRUNC (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_ZERO) 46168404Spjd#define _MM_FROUND_RINT (_MM_FROUND_RAISE_EXC | _MM_FROUND_CUR_DIRECTION) 47185029Spjd#define _MM_FROUND_NEARBYINT (_MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTION) 48185029Spjd 49168404Spjd/// \brief Rounds up each element of the 128-bit vector of [4 x float] to an 50236155Smm/// integer and returns the rounded values in a 128-bit vector of 51168404Spjd/// [4 x float]. 52168404Spjd/// 53168404Spjd/// \headerfile <x86intrin.h> 54168404Spjd/// 55168404Spjd/// \code 56168404Spjd/// __m128 _mm_ceil_ps(__m128 X); 57185029Spjd/// \endcode 58236884Smm/// 59168404Spjd/// This intrinsic corresponds to the <c> VROUNDPS / ROUNDPS </c> instruction. 60219089Spjd/// 61219089Spjd/// \param X 62168404Spjd/// A 128-bit vector of [4 x float] values to be rounded up. 63168404Spjd/// \returns A 128-bit vector of [4 x float] containing the rounded values. 64168404Spjd#define _mm_ceil_ps(X) _mm_round_ps((X), _MM_FROUND_CEIL) 65168404Spjd 66168404Spjd/// \brief Rounds up each element of the 128-bit vector of [2 x double] to an 67224171Sgibbs/// integer and returns the rounded values in a 128-bit vector of 68168404Spjd/// [2 x double]. 69168404Spjd/// 70168404Spjd/// \headerfile <x86intrin.h> 71168404Spjd/// 72168404Spjd/// \code 73168404Spjd/// __m128d _mm_ceil_pd(__m128d X); 74168404Spjd/// \endcode 75168404Spjd/// 76236155Smm/// This intrinsic corresponds to the <c> VROUNDPD / ROUNDPD </c> instruction. 77168404Spjd/// 78228103Smm/// \param X 79228103Smm/// A 128-bit vector of [2 x double] values to be rounded up. 80168404Spjd/// \returns A 128-bit vector of [2 x double] containing the rounded values. 81168404Spjd#define _mm_ceil_pd(X) _mm_round_pd((X), _MM_FROUND_CEIL) 82168404Spjd 83219089Spjd/// \brief Copies three upper elements of the first 128-bit vector operand to 84168404Spjd/// the corresponding three upper elements of the 128-bit result vector of 85168404Spjd/// [4 x float]. Rounds up the lowest element of the second 128-bit vector 86168404Spjd/// operand to an integer and copies it to the lowest element of the 128-bit 87168404Spjd/// result vector of [4 x float]. 88168404Spjd/// 89168404Spjd/// \headerfile <x86intrin.h> 90168404Spjd/// 91168404Spjd/// \code 92168404Spjd/// __m128 _mm_ceil_ss(__m128 X, __m128 Y); 93168404Spjd/// \endcode 94168404Spjd/// 95168404Spjd/// This intrinsic corresponds to the <c> VROUNDSS / ROUNDSS </c> instruction. 96168404Spjd/// 97168404Spjd/// \param X 98168404Spjd/// A 128-bit vector of [4 x float]. The values stored in bits [127:32] are 99168404Spjd/// copied to the corresponding bits of the result. 100168404Spjd/// \param Y 101185029Spjd/// A 128-bit vector of [4 x float]. The value stored in bits [31:0] is 102185029Spjd/// rounded up to the nearest integer and copied to the corresponding bits 103168404Spjd/// of the result. 104168404Spjd/// \returns A 128-bit vector of [4 x float] containing the copied and rounded 105168404Spjd/// values. 106168404Spjd#define _mm_ceil_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_CEIL) 107168404Spjd 108168404Spjd/// \brief Copies the upper element of the first 128-bit vector operand to the 109168404Spjd/// corresponding upper element of the 128-bit result vector of [2 x double]. 110168404Spjd/// Rounds up the lower element of the second 128-bit vector operand to an 111168404Spjd/// integer and copies it to the lower element of the 128-bit result vector 112168404Spjd/// of [2 x double]. 113168404Spjd/// 114185029Spjd/// \headerfile <x86intrin.h> 115168404Spjd/// 116168404Spjd/// \code 117168404Spjd/// __m128d _mm_ceil_sd(__m128d X, __m128d Y); 118168404Spjd/// \endcode 119168404Spjd/// 120168404Spjd/// This intrinsic corresponds to the <c> VROUNDSD / ROUNDSD </c> instruction. 121168404Spjd/// 122168404Spjd/// \param X 123168404Spjd/// A 128-bit vector of [2 x double]. The value stored in bits [127:64] is 124168404Spjd/// copied to the corresponding bits of the result. 125168404Spjd/// \param Y 126168404Spjd/// A 128-bit vector of [2 x double]. The value stored in bits [63:0] is 127224171Sgibbs/// rounded up to the nearest integer and copied to the corresponding bits 128168404Spjd/// of the result. 129168404Spjd/// \returns A 128-bit vector of [2 x double] containing the copied and rounded 130168404Spjd/// values. 131168404Spjd#define _mm_ceil_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_CEIL) 132168404Spjd 133168404Spjd/// \brief Rounds down each element of the 128-bit vector of [4 x float] to an 134168404Spjd/// an integer and returns the rounded values in a 128-bit vector of 135168404Spjd/// [4 x float]. 136168404Spjd/// 137219089Spjd/// \headerfile <x86intrin.h> 138228103Smm/// 139236155Smm/// \code 140236155Smm/// __m128 _mm_floor_ps(__m128 X); 141168404Spjd/// \endcode 142168404Spjd/// 143168404Spjd/// This intrinsic corresponds to the <c> VROUNDPS / ROUNDPS </c> instruction. 144168404Spjd/// 145168404Spjd/// \param X 146168404Spjd/// A 128-bit vector of [4 x float] values to be rounded down. 147168404Spjd/// \returns A 128-bit vector of [4 x float] containing the rounded values. 148168404Spjd#define _mm_floor_ps(X) _mm_round_ps((X), _MM_FROUND_FLOOR) 149168404Spjd 150168404Spjd/// \brief Rounds down each element of the 128-bit vector of [2 x double] to an 151168404Spjd/// integer and returns the rounded values in a 128-bit vector of 152168404Spjd/// [2 x double]. 153168404Spjd/// 154168404Spjd/// \headerfile <x86intrin.h> 155168404Spjd/// 156168404Spjd/// \code 157168404Spjd/// __m128d _mm_floor_pd(__m128d X); 158168404Spjd/// \endcode 159168404Spjd/// 160168404Spjd/// This intrinsic corresponds to the <c> VROUNDPD / ROUNDPD </c> instruction. 161168404Spjd/// 162168404Spjd/// \param X 163168404Spjd/// A 128-bit vector of [2 x double]. 164168404Spjd/// \returns A 128-bit vector of [2 x double] containing the rounded values. 165168404Spjd#define _mm_floor_pd(X) _mm_round_pd((X), _MM_FROUND_FLOOR) 166224171Sgibbs 167224171Sgibbs/// \brief Copies three upper elements of the first 128-bit vector operand to 168168404Spjd/// the corresponding three upper elements of the 128-bit result vector of 169168404Spjd/// [4 x float]. Rounds down the lowest element of the second 128-bit vector 170168404Spjd/// operand to an integer and copies it to the lowest element of the 128-bit 171168404Spjd/// result vector of [4 x float]. 172168404Spjd/// 173168404Spjd/// \headerfile <x86intrin.h> 174168404Spjd/// 175236155Smm/// \code 176168404Spjd/// __m128 _mm_floor_ss(__m128 X, __m128 Y); 177168404Spjd/// \endcode 178168404Spjd/// 179168404Spjd/// This intrinsic corresponds to the <c> VROUNDSS / ROUNDSS </c> instruction. 180219089Spjd/// 181168404Spjd/// \param X 182168404Spjd/// A 128-bit vector of [4 x float]. The values stored in bits [127:32] are 183168404Spjd/// copied to the corresponding bits of the result. 184168404Spjd/// \param Y 185168404Spjd/// A 128-bit vector of [4 x float]. The value stored in bits [31:0] is 186168404Spjd/// rounded down to the nearest integer and copied to the corresponding bits 187228103Smm/// of the result. 188168404Spjd/// \returns A 128-bit vector of [4 x float] containing the copied and rounded 189168404Spjd/// values. 190168404Spjd#define _mm_floor_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_FLOOR) 191168404Spjd 192168404Spjd/// \brief Copies the upper element of the first 128-bit vector operand to the 193168404Spjd/// corresponding upper element of the 128-bit result vector of [2 x double]. 194168404Spjd/// Rounds down the lower element of the second 128-bit vector operand to an 195168404Spjd/// integer and copies it to the lower element of the 128-bit result vector 196248571Smm/// of [2 x double]. 197185029Spjd/// 198248571Smm/// \headerfile <x86intrin.h> 199219089Spjd/// 200219089Spjd/// \code 201168404Spjd/// __m128d _mm_floor_sd(__m128d X, __m128d Y); 202168404Spjd/// \endcode 203168404Spjd/// 204168404Spjd/// This intrinsic corresponds to the <c> VROUNDSD / ROUNDSD </c> instruction. 205168404Spjd/// 206168404Spjd/// \param X 207168404Spjd/// A 128-bit vector of [2 x double]. The value stored in bits [127:64] is 208185029Spjd/// copied to the corresponding bits of the result. 209168404Spjd/// \param Y 210219089Spjd/// A 128-bit vector of [2 x double]. The value stored in bits [63:0] is 211168404Spjd/// rounded down to the nearest integer and copied to the corresponding bits 212236884Smm/// of the result. 213185029Spjd/// \returns A 128-bit vector of [2 x double] containing the copied and rounded 214185029Spjd/// values. 215168404Spjd#define _mm_floor_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_FLOOR) 216168404Spjd 217168404Spjd/// \brief Rounds each element of the 128-bit vector of [4 x float] to an 218168404Spjd/// integer value according to the rounding control specified by the second 219168404Spjd/// argument and returns the rounded values in a 128-bit vector of 220168404Spjd/// [4 x float]. 221168404Spjd/// 222185029Spjd/// \headerfile <x86intrin.h> 223168404Spjd/// 224168404Spjd/// \code 225219089Spjd/// __m128 _mm_round_ps(__m128 X, const int M); 226185029Spjd/// \endcode 227219089Spjd/// 228219089Spjd/// This intrinsic corresponds to the <c> VROUNDPS / ROUNDPS </c> instruction. 229185029Spjd/// 230219089Spjd/// \param X 231219089Spjd/// A 128-bit vector of [4 x float]. 232219089Spjd/// \param M 233168404Spjd/// An integer value that specifies the rounding operation. \n 234219089Spjd/// Bits [7:4] are reserved. \n 235168404Spjd/// Bit [3] is a precision exception value: \n 236224171Sgibbs/// 0: A normal PE exception is used \n 237224171Sgibbs/// 1: The PE field is not updated \n 238168404Spjd/// Bit [2] is the rounding control source: \n 239263889Sdelphij/// 0: Use bits [1:0] of \a M \n 240219089Spjd/// 1: Use the current MXCSR setting \n 241168404Spjd/// Bits [1:0] contain the rounding control definition: \n 242168404Spjd/// 00: Nearest \n 243168404Spjd/// 01: Downward (toward negative infinity) \n 244228020Smm/// 10: Upward (toward positive infinity) \n 245168404Spjd/// 11: Truncated 246168404Spjd/// \returns A 128-bit vector of [4 x float] containing the rounded values. 247185029Spjd#define _mm_round_ps(X, M) __extension__ ({ \ 248168404Spjd (__m128)__builtin_ia32_roundps((__v4sf)(__m128)(X), (M)); }) 249185029Spjd 250236155Smm/// \brief Copies three upper elements of the first 128-bit vector operand to 251260138Sdelphij/// the corresponding three upper elements of the 128-bit result vector of 252168404Spjd/// [4 x float]. Rounds the lowest element of the second 128-bit vector 253168404Spjd/// operand to an integer value according to the rounding control specified 254168404Spjd/// by the third argument and copies it to the lowest element of the 128-bit 255219089Spjd/// result vector of [4 x float]. 256219089Spjd/// 257168404Spjd/// \headerfile <x86intrin.h> 258228020Smm/// 259185029Spjd/// \code 260168404Spjd/// __m128 _mm_round_ss(__m128 X, __m128 Y, const int M); 261263889Sdelphij/// \endcode 262263889Sdelphij/// 263168404Spjd/// This intrinsic corresponds to the <c> VROUNDSS / ROUNDSS </c> instruction. 264168404Spjd/// 265219089Spjd/// \param X 266219089Spjd/// A 128-bit vector of [4 x float]. The values stored in bits [127:32] are 267219089Spjd/// copied to the corresponding bits of the result. 268219089Spjd/// \param Y 269228103Smm/// A 128-bit vector of [4 x float]. The value stored in bits [31:0] is 270228103Smm/// rounded to the nearest integer using the specified rounding control and 271168404Spjd/// copied to the corresponding bits of the result. 272168404Spjd/// \param M 273168404Spjd/// An integer value that specifies the rounding operation. \n 274168404Spjd/// Bits [7:4] are reserved. \n 275168404Spjd/// Bit [3] is a precision exception value: \n 276168404Spjd/// 0: A normal PE exception is used \n 277168404Spjd/// 1: The PE field is not updated \n 278168404Spjd/// Bit [2] is the rounding control source: \n 279168404Spjd/// 0: Use bits [1:0] of \a M \n 280168404Spjd/// 1: Use the current MXCSR setting \n 281185029Spjd/// Bits [1:0] contain the rounding control definition: \n 282185029Spjd/// 00: Nearest \n 283168404Spjd/// 01: Downward (toward negative infinity) \n 284168404Spjd/// 10: Upward (toward positive infinity) \n 285168404Spjd/// 11: Truncated 286219089Spjd/// \returns A 128-bit vector of [4 x float] containing the copied and rounded 287168404Spjd/// values. 288185029Spjd#define _mm_round_ss(X, Y, M) __extension__ ({ \ 289185029Spjd (__m128)__builtin_ia32_roundss((__v4sf)(__m128)(X), \ 290185029Spjd (__v4sf)(__m128)(Y), (M)); }) 291219089Spjd 292185029Spjd/// \brief Rounds each element of the 128-bit vector of [2 x double] to an 293168404Spjd/// integer value according to the rounding control specified by the second 294168404Spjd/// argument and returns the rounded values in a 128-bit vector of 295168404Spjd/// [2 x double]. 296168404Spjd/// 297168404Spjd/// \headerfile <x86intrin.h> 298185029Spjd/// 299168404Spjd/// \code 300168404Spjd/// __m128d _mm_round_pd(__m128d X, const int M); 301168404Spjd/// \endcode 302168404Spjd/// 303168404Spjd/// This intrinsic corresponds to the <c> VROUNDPD / ROUNDPD </c> instruction. 304168404Spjd/// 305168404Spjd/// \param X 306168404Spjd/// A 128-bit vector of [2 x double]. 307168404Spjd/// \param M 308168404Spjd/// An integer value that specifies the rounding operation. \n 309168404Spjd/// Bits [7:4] are reserved. \n 310168404Spjd/// Bit [3] is a precision exception value: \n 311168404Spjd/// 0: A normal PE exception is used \n 312168404Spjd/// 1: The PE field is not updated \n 313168404Spjd/// Bit [2] is the rounding control source: \n 314168404Spjd/// 0: Use bits [1:0] of \a M \n 315168404Spjd/// 1: Use the current MXCSR setting \n 316168404Spjd/// Bits [1:0] contain the rounding control definition: \n 317168404Spjd/// 00: Nearest \n 318168404Spjd/// 01: Downward (toward negative infinity) \n 319168404Spjd/// 10: Upward (toward positive infinity) \n 320168404Spjd/// 11: Truncated 321168404Spjd/// \returns A 128-bit vector of [2 x double] containing the rounded values. 322168404Spjd#define _mm_round_pd(X, M) __extension__ ({ \ 323168404Spjd (__m128d)__builtin_ia32_roundpd((__v2df)(__m128d)(X), (M)); }) 324168404Spjd 325168404Spjd/// \brief Copies the upper element of the first 128-bit vector operand to the 326168404Spjd/// corresponding upper element of the 128-bit result vector of [2 x double]. 327168404Spjd/// Rounds the lower element of the second 128-bit vector operand to an 328168404Spjd/// integer value according to the rounding control specified by the third 329168404Spjd/// argument and copies it to the lower element of the 128-bit result vector 330168404Spjd/// of [2 x double]. 331168404Spjd/// 332185029Spjd/// \headerfile <x86intrin.h> 333185029Spjd/// 334168404Spjd/// \code 335168404Spjd/// __m128d _mm_round_sd(__m128d X, __m128d Y, const int M); 336168404Spjd/// \endcode 337168404Spjd/// 338219089Spjd/// This intrinsic corresponds to the <c> VROUNDSD / ROUNDSD </c> instruction. 339185029Spjd/// 340168404Spjd/// \param X 341168404Spjd/// A 128-bit vector of [2 x double]. The value stored in bits [127:64] is 342185029Spjd/// copied to the corresponding bits of the result. 343185029Spjd/// \param Y 344236884Smm/// A 128-bit vector of [2 x double]. The value stored in bits [63:0] is 345236884Smm/// rounded to the nearest integer using the specified rounding control and 346236884Smm/// copied to the corresponding bits of the result. 347236884Smm/// \param M 348236884Smm/// An integer value that specifies the rounding operation. \n 349243014Smm/// Bits [7:4] are reserved. \n 350168404Spjd/// Bit [3] is a precision exception value: \n 351168404Spjd/// 0: A normal PE exception is used \n 352168404Spjd/// 1: The PE field is not updated \n 353168404Spjd/// Bit [2] is the rounding control source: \n 354168404Spjd/// 0: Use bits [1:0] of \a M \n 355168404Spjd/// 1: Use the current MXCSR setting \n 356168404Spjd/// Bits [1:0] contain the rounding control definition: \n 357168404Spjd/// 00: Nearest \n 358168404Spjd/// 01: Downward (toward negative infinity) \n 359168404Spjd/// 10: Upward (toward positive infinity) \n 360168404Spjd/// 11: Truncated 361168404Spjd/// \returns A 128-bit vector of [2 x double] containing the copied and rounded 362168404Spjd/// values. 363168404Spjd#define _mm_round_sd(X, Y, M) __extension__ ({ \ 364185029Spjd (__m128d)__builtin_ia32_roundsd((__v2df)(__m128d)(X), \ 365185029Spjd (__v2df)(__m128d)(Y), (M)); }) 366168404Spjd 367168404Spjd/* SSE4 Packed Blending Intrinsics. */ 368168404Spjd/// \brief Returns a 128-bit vector of [2 x double] where the values are 369168404Spjd/// selected from either the first or second operand as specified by the 370168404Spjd/// third operand, the control mask. 371168404Spjd/// 372168404Spjd/// \headerfile <x86intrin.h> 373168404Spjd/// 374168404Spjd/// \code 375168404Spjd/// __m128d _mm_blend_pd(__m128d V1, __m128d V2, const int M); 376168404Spjd/// \endcode 377168404Spjd/// 378168404Spjd/// This intrinsic corresponds to the <c> VBLENDPD / BLENDPD </c> instruction. 379185029Spjd/// 380185029Spjd/// \param V1 381185029Spjd/// A 128-bit vector of [2 x double]. 382185029Spjd/// \param V2 383185029Spjd/// A 128-bit vector of [2 x double]. 384185029Spjd/// \param M 385185029Spjd/// An immediate integer operand, with mask bits [1:0] specifying how the 386219089Spjd/// values are to be copied. The position of the mask bit corresponds to the 387185029Spjd/// index of a copied value. When a mask bit is 0, the corresponding 64-bit 388185029Spjd/// element in operand \a V1 is copied to the same position in the result. 389168404Spjd/// When a mask bit is 1, the corresponding 64-bit element in operand \a V2 390168404Spjd/// is copied to the same position in the result. 391168404Spjd/// \returns A 128-bit vector of [2 x double] containing the copied values. 392168404Spjd#define _mm_blend_pd(V1, V2, M) __extension__ ({ \ 393238926Smm (__m128d)__builtin_shufflevector((__v2df)(__m128d)(V1), \ 394238926Smm (__v2df)(__m128d)(V2), \ 395238926Smm (((M) & 0x01) ? 2 : 0), \ 396238926Smm (((M) & 0x02) ? 3 : 1)); }) 397238926Smm 398238926Smm/// \brief Returns a 128-bit vector of [4 x float] where the values are selected 399238926Smm/// from either the first or second operand as specified by the third 400238926Smm/// operand, the control mask. 401238926Smm/// 402238926Smm/// \headerfile <x86intrin.h> 403238926Smm/// 404238926Smm/// \code 405168404Spjd/// __m128 _mm_blend_ps(__m128 V1, __m128 V2, const int M); 406185029Spjd/// \endcode 407185029Spjd/// 408185029Spjd/// This intrinsic corresponds to the <c> VBLENDPS / BLENDPS </c> instruction. 409185029Spjd/// 410185029Spjd/// \param V1 411185029Spjd/// A 128-bit vector of [4 x float]. 412185029Spjd/// \param V2 413185029Spjd/// A 128-bit vector of [4 x float]. 414185029Spjd/// \param M 415185029Spjd/// An immediate integer operand, with mask bits [3:0] specifying how the 416185029Spjd/// values are to be copied. The position of the mask bit corresponds to the 417185029Spjd/// index of a copied value. When a mask bit is 0, the corresponding 32-bit 418185029Spjd/// element in operand \a V1 is copied to the same position in the result. 419185029Spjd/// When a mask bit is 1, the corresponding 32-bit element in operand \a V2 420185029Spjd/// is copied to the same position in the result. 421185029Spjd/// \returns A 128-bit vector of [4 x float] containing the copied values. 422185029Spjd#define _mm_blend_ps(V1, V2, M) __extension__ ({ \ 423185029Spjd (__m128)__builtin_shufflevector((__v4sf)(__m128)(V1), (__v4sf)(__m128)(V2), \ 424185029Spjd (((M) & 0x01) ? 4 : 0), \ 425185029Spjd (((M) & 0x02) ? 5 : 1), \ 426185029Spjd (((M) & 0x04) ? 6 : 2), \ 427185029Spjd (((M) & 0x08) ? 7 : 3)); }) 428238926Smm 429238926Smm/// \brief Returns a 128-bit vector of [2 x double] where the values are 430236884Smm/// selected from either the first or second operand as specified by the 431236884Smm/// third operand, the control mask. 432185029Spjd/// 433185029Spjd/// \headerfile <x86intrin.h> 434185029Spjd/// 435185029Spjd/// This intrinsic corresponds to the <c> VBLENDVPD / BLENDVPD </c> instruction. 436238926Smm/// 437238926Smm/// \param __V1 438238926Smm/// A 128-bit vector of [2 x double]. 439238926Smm/// \param __V2 440238926Smm/// A 128-bit vector of [2 x double]. 441238926Smm/// \param __M 442238926Smm/// A 128-bit vector operand, with mask bits 127 and 63 specifying how the 443238926Smm/// values are to be copied. The position of the mask bit corresponds to the 444238926Smm/// most significant bit of a copied value. When a mask bit is 0, the 445238926Smm/// corresponding 64-bit element in operand \a __V1 is copied to the same 446238926Smm/// position in the result. When a mask bit is 1, the corresponding 64-bit 447238926Smm/// element in operand \a __V2 is copied to the same position in the result. 448238926Smm/// \returns A 128-bit vector of [2 x double] containing the copied values. 449238926Smmstatic __inline__ __m128d __DEFAULT_FN_ATTRS 450238926Smm_mm_blendv_pd (__m128d __V1, __m128d __V2, __m128d __M) 451238926Smm{ 452236884Smm return (__m128d) __builtin_ia32_blendvpd ((__v2df)__V1, (__v2df)__V2, 453236884Smm (__v2df)__M); 454236884Smm} 455236884Smm 456185029Spjd/// \brief Returns a 128-bit vector of [4 x float] where the values are 457209962Smm/// selected from either the first or second operand as specified by the 458209962Smm/// third operand, the control mask. 459209962Smm/// 460209962Smm/// \headerfile <x86intrin.h> 461185029Spjd/// 462185029Spjd/// This intrinsic corresponds to the <c> VBLENDVPS / BLENDVPS </c> instruction. 463185029Spjd/// 464185029Spjd/// \param __V1 465185029Spjd/// A 128-bit vector of [4 x float]. 466185029Spjd/// \param __V2 467185029Spjd/// A 128-bit vector of [4 x float]. 468185029Spjd/// \param __M 469185029Spjd/// A 128-bit vector operand, with mask bits 127, 95, 63, and 31 specifying 470185029Spjd/// how the values are to be copied. The position of the mask bit corresponds 471185029Spjd/// to the most significant bit of a copied value. When a mask bit is 0, the 472185029Spjd/// corresponding 32-bit element in operand \a __V1 is copied to the same 473185029Spjd/// position in the result. When a mask bit is 1, the corresponding 32-bit 474185029Spjd/// element in operand \a __V2 is copied to the same position in the result. 475185029Spjd/// \returns A 128-bit vector of [4 x float] containing the copied values. 476185029Spjdstatic __inline__ __m128 __DEFAULT_FN_ATTRS 477185029Spjd_mm_blendv_ps (__m128 __V1, __m128 __V2, __m128 __M) 478185029Spjd{ 479185029Spjd return (__m128) __builtin_ia32_blendvps ((__v4sf)__V1, (__v4sf)__V2, 480185029Spjd (__v4sf)__M); 481168404Spjd} 482168404Spjd 483168404Spjd/// \brief Returns a 128-bit vector of [16 x i8] where the values are selected 484168404Spjd/// from either of the first or second operand as specified by the third 485168404Spjd/// operand, the control mask. 486168404Spjd/// 487168404Spjd/// \headerfile <x86intrin.h> 488168404Spjd/// 489168404Spjd/// This intrinsic corresponds to the <c> VPBLENDVB / PBLENDVB </c> instruction. 490168404Spjd/// 491168404Spjd/// \param __V1 492168404Spjd/// A 128-bit vector of [16 x i8]. 493168404Spjd/// \param __V2 494168404Spjd/// A 128-bit vector of [16 x i8]. 495168404Spjd/// \param __M 496168404Spjd/// A 128-bit vector operand, with mask bits 127, 119, 111 ... 7 specifying 497168404Spjd/// how the values are to be copied. The position of the mask bit corresponds 498168404Spjd/// to the most significant bit of a copied value. When a mask bit is 0, the 499168404Spjd/// corresponding 8-bit element in operand \a __V1 is copied to the same 500168404Spjd/// position in the result. When a mask bit is 1, the corresponding 8-bit 501168404Spjd/// element in operand \a __V2 is copied to the same position in the result. 502168404Spjd/// \returns A 128-bit vector of [16 x i8] containing the copied values. 503168404Spjdstatic __inline__ __m128i __DEFAULT_FN_ATTRS 504168404Spjd_mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M) 505168404Spjd{ 506168404Spjd return (__m128i) __builtin_ia32_pblendvb128 ((__v16qi)__V1, (__v16qi)__V2, 507168404Spjd (__v16qi)__M); 508168404Spjd} 509168404Spjd 510168404Spjd/// \brief Returns a 128-bit vector of [8 x i16] where the values are selected 511168404Spjd/// from either of the first or second operand as specified by the third 512168404Spjd/// operand, the control mask. 513168404Spjd/// 514168404Spjd/// \headerfile <x86intrin.h> 515168404Spjd/// 516168404Spjd/// \code 517168404Spjd/// __m128i _mm_blend_epi16(__m128i V1, __m128i V2, const int M); 518168404Spjd/// \endcode 519168404Spjd/// 520168404Spjd/// This intrinsic corresponds to the <c> VPBLENDW / PBLENDW </c> instruction. 521168404Spjd/// 522168404Spjd/// \param V1 523168404Spjd/// A 128-bit vector of [8 x i16]. 524168404Spjd/// \param V2 525168404Spjd/// A 128-bit vector of [8 x i16]. 526168404Spjd/// \param M 527168404Spjd/// An immediate integer operand, with mask bits [7:0] specifying how the 528168404Spjd/// values are to be copied. The position of the mask bit corresponds to the 529168404Spjd/// index of a copied value. When a mask bit is 0, the corresponding 16-bit 530168404Spjd/// element in operand \a V1 is copied to the same position in the result. 531168404Spjd/// When a mask bit is 1, the corresponding 16-bit element in operand \a V2 532168404Spjd/// is copied to the same position in the result. 533168404Spjd/// \returns A 128-bit vector of [8 x i16] containing the copied values. 534168404Spjd#define _mm_blend_epi16(V1, V2, M) __extension__ ({ \ 535168404Spjd (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(V1), \ 536168404Spjd (__v8hi)(__m128i)(V2), \ 537168404Spjd (((M) & 0x01) ? 8 : 0), \ 538168404Spjd (((M) & 0x02) ? 9 : 1), \ 539168404Spjd (((M) & 0x04) ? 10 : 2), \ 540168404Spjd (((M) & 0x08) ? 11 : 3), \ 541168404Spjd (((M) & 0x10) ? 12 : 4), \ 542168404Spjd (((M) & 0x20) ? 13 : 5), \ 543168404Spjd (((M) & 0x40) ? 14 : 6), \ 544168404Spjd (((M) & 0x80) ? 15 : 7)); }) 545168404Spjd 546168404Spjd/* SSE4 Dword Multiply Instructions. */ 547168404Spjd/// \brief Multiples corresponding elements of two 128-bit vectors of [4 x i32] 548185029Spjd/// and returns the lower 32 bits of the each product in a 128-bit vector of 549185029Spjd/// [4 x i32]. 550168404Spjd/// 551168404Spjd/// \headerfile <x86intrin.h> 552168404Spjd/// 553168404Spjd/// This intrinsic corresponds to the <c> VPMULLD / PMULLD </c> instruction. 554168404Spjd/// 555168404Spjd/// \param __V1 556168404Spjd/// A 128-bit integer vector. 557168404Spjd/// \param __V2 558168404Spjd/// A 128-bit integer vector. 559168404Spjd/// \returns A 128-bit integer vector containing the products of both operands. 560168404Spjdstatic __inline__ __m128i __DEFAULT_FN_ATTRS 561168404Spjd_mm_mullo_epi32 (__m128i __V1, __m128i __V2) 562168404Spjd{ 563168404Spjd return (__m128i) ((__v4su)__V1 * (__v4su)__V2); 564185029Spjd} 565185029Spjd 566185029Spjd/// \brief Multiplies corresponding even-indexed elements of two 128-bit 567168404Spjd/// vectors of [4 x i32] and returns a 128-bit vector of [2 x i64] 568185029Spjd/// containing the products. 569185029Spjd/// 570185029Spjd/// \headerfile <x86intrin.h> 571185029Spjd/// 572185029Spjd/// This intrinsic corresponds to the <c> VPMULDQ / PMULDQ </c> instruction. 573185029Spjd/// 574185029Spjd/// \param __V1 575185029Spjd/// A 128-bit vector of [4 x i32]. 576168404Spjd/// \param __V2 577168404Spjd/// A 128-bit vector of [4 x i32]. 578168404Spjd/// \returns A 128-bit vector of [2 x i64] containing the products of both 579168404Spjd/// operands. 580168404Spjdstatic __inline__ __m128i __DEFAULT_FN_ATTRS 581168404Spjd_mm_mul_epi32 (__m128i __V1, __m128i __V2) 582168404Spjd{ 583168404Spjd return (__m128i) __builtin_ia32_pmuldq128 ((__v4si)__V1, (__v4si)__V2); 584168404Spjd} 585168404Spjd 586168404Spjd/* SSE4 Floating Point Dot Product Instructions. */ 587168404Spjd/// \brief Computes the dot product of the two 128-bit vectors of [4 x float] 588219089Spjd/// and returns it in the elements of the 128-bit result vector of 589168404Spjd/// [4 x float]. 590219089Spjd/// 591219089Spjd/// The immediate integer operand controls which input elements 592168404Spjd/// will contribute to the dot product, and where the final results are 593168404Spjd/// returned. 594168404Spjd/// 595168404Spjd/// \headerfile <x86intrin.h> 596168404Spjd/// 597185029Spjd/// \code 598168404Spjd/// __m128 _mm_dp_ps(__m128 X, __m128 Y, const int M); 599168404Spjd/// \endcode 600168404Spjd/// 601168404Spjd/// This intrinsic corresponds to the <c> VDPPS / DPPS </c> instruction. 602168404Spjd/// 603168404Spjd/// \param X 604168404Spjd/// A 128-bit vector of [4 x float]. 605168404Spjd/// \param Y 606168404Spjd/// A 128-bit vector of [4 x float]. 607168404Spjd/// \param M 608168404Spjd/// An immediate integer operand. Mask bits [7:4] determine which elements 609168404Spjd/// of the input vectors are used, with bit [4] corresponding to the lowest 610168404Spjd/// element and bit [7] corresponding to the highest element of each [4 x 611168404Spjd/// float] vector. If a bit is set, the corresponding elements from the two 612168404Spjd/// input vectors are used as an input for dot product; otherwise that input 613168404Spjd/// is treated as zero. Bits [3:0] determine which elements of the result 614168404Spjd/// will receive a copy of the final dot product, with bit [0] corresponding 615168404Spjd/// to the lowest element and bit [3] corresponding to the highest element of 616168404Spjd/// each [4 x float] subvector. If a bit is set, the dot product is returned 617168404Spjd/// in the corresponding element; otherwise that element is set to zero. 618185029Spjd/// \returns A 128-bit vector of [4 x float] containing the dot product. 619185029Spjd#define _mm_dp_ps(X, Y, M) __extension__ ({ \ 620185029Spjd (__m128) __builtin_ia32_dpps((__v4sf)(__m128)(X), \ 621168404Spjd (__v4sf)(__m128)(Y), (M)); }) 622168404Spjd 623168404Spjd/// \brief Computes the dot product of the two 128-bit vectors of [2 x double] 624168404Spjd/// and returns it in the elements of the 128-bit result vector of 625168404Spjd/// [2 x double]. 626168404Spjd/// 627224171Sgibbs/// The immediate integer operand controls which input 628224171Sgibbs/// elements will contribute to the dot product, and where the final results 629224171Sgibbs/// are returned. 630224171Sgibbs/// 631224171Sgibbs/// \headerfile <x86intrin.h> 632224171Sgibbs/// 633224171Sgibbs/// \code 634224171Sgibbs/// __m128d _mm_dp_pd(__m128d X, __m128d Y, const int M); 635224171Sgibbs/// \endcode 636224171Sgibbs/// 637224171Sgibbs/// This intrinsic corresponds to the <c> VDPPD / DPPD </c> instruction. 638224171Sgibbs/// 639224171Sgibbs/// \param X 640224171Sgibbs/// A 128-bit vector of [2 x double]. 641224171Sgibbs/// \param Y 642224171Sgibbs/// A 128-bit vector of [2 x double]. 643224171Sgibbs/// \param M 644224171Sgibbs/// An immediate integer operand. Mask bits [5:4] determine which elements 645224171Sgibbs/// of the input vectors are used, with bit [4] corresponding to the lowest 646224171Sgibbs/// element and bit [5] corresponding to the highest element of each of [2 x 647224171Sgibbs/// double] vector. If a bit is set, the corresponding elements from the two 648224171Sgibbs/// input vectors are used as an input for dot product; otherwise that input 649224171Sgibbs/// is treated as zero. Bits [1:0] determine which elements of the result 650224171Sgibbs/// will receive a copy of the final dot product, with bit [0] corresponding 651224171Sgibbs/// to the lowest element and bit [1] corresponding to the highest element of 652224171Sgibbs/// each [2 x double] vector. If a bit is set, the dot product is returned in 653224171Sgibbs/// the corresponding element; otherwise that element is set to zero. 654224171Sgibbs#define _mm_dp_pd(X, Y, M) __extension__ ({\ 655224171Sgibbs (__m128d) __builtin_ia32_dppd((__v2df)(__m128d)(X), \ 656224171Sgibbs (__v2df)(__m128d)(Y), (M)); }) 657224171Sgibbs 658224171Sgibbs/* SSE4 Streaming Load Hint Instruction. */ 659224171Sgibbs/// \brief Loads integer values from a 128-bit aligned memory location to a 660224171Sgibbs/// 128-bit integer vector. 661224171Sgibbs/// 662224171Sgibbs/// \headerfile <x86intrin.h> 663224171Sgibbs/// 664224171Sgibbs/// This intrinsic corresponds to the <c> VMOVNTDQA / MOVNTDQA </c> instruction. 665224171Sgibbs/// 666224171Sgibbs/// \param __V 667224171Sgibbs/// A pointer to a 128-bit aligned memory location that contains the integer 668224171Sgibbs/// values. 669224171Sgibbs/// \returns A 128-bit integer vector containing the data stored at the 670224171Sgibbs/// specified memory location. 671224171Sgibbsstatic __inline__ __m128i __DEFAULT_FN_ATTRS 672224171Sgibbs_mm_stream_load_si128 (__m128i const *__V) 673224171Sgibbs{ 674224171Sgibbs return (__m128i) __builtin_nontemporal_load ((const __v2di *) __V); 675224171Sgibbs} 676224171Sgibbs 677224171Sgibbs/* SSE4 Packed Integer Min/Max Instructions. */ 678224171Sgibbs/// \brief Compares the corresponding elements of two 128-bit vectors of 679224171Sgibbs/// [16 x i8] and returns a 128-bit vector of [16 x i8] containing the lesser 680224171Sgibbs/// of the two values. 681224171Sgibbs/// 682224171Sgibbs/// \headerfile <x86intrin.h> 683224171Sgibbs/// 684224171Sgibbs/// This intrinsic corresponds to the <c> VPMINSB / PMINSB </c> instruction. 685224171Sgibbs/// 686224171Sgibbs/// \param __V1 687224171Sgibbs/// A 128-bit vector of [16 x i8]. 688224171Sgibbs/// \param __V2 689224171Sgibbs/// A 128-bit vector of [16 x i8] 690224171Sgibbs/// \returns A 128-bit vector of [16 x i8] containing the lesser values. 691224171Sgibbsstatic __inline__ __m128i __DEFAULT_FN_ATTRS 692224171Sgibbs_mm_min_epi8 (__m128i __V1, __m128i __V2) 693224171Sgibbs{ 694224171Sgibbs return (__m128i) __builtin_ia32_pminsb128 ((__v16qi) __V1, (__v16qi) __V2); 695224171Sgibbs} 696224171Sgibbs 697224171Sgibbs/// \brief Compares the corresponding elements of two 128-bit vectors of 698224171Sgibbs/// [16 x i8] and returns a 128-bit vector of [16 x i8] containing the 699224171Sgibbs/// greater value of the two. 700224171Sgibbs/// 701224171Sgibbs/// \headerfile <x86intrin.h> 702224171Sgibbs/// 703224171Sgibbs/// This intrinsic corresponds to the <c> VPMAXSB / PMAXSB </c> instruction. 704224171Sgibbs/// 705224171Sgibbs/// \param __V1 706224171Sgibbs/// A 128-bit vector of [16 x i8]. 707224171Sgibbs/// \param __V2 708224171Sgibbs/// A 128-bit vector of [16 x i8]. 709224171Sgibbs/// \returns A 128-bit vector of [16 x i8] containing the greater values. 710224171Sgibbsstatic __inline__ __m128i __DEFAULT_FN_ATTRS 711224171Sgibbs_mm_max_epi8 (__m128i __V1, __m128i __V2) 712224171Sgibbs{ 713224171Sgibbs return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi) __V1, (__v16qi) __V2); 714224171Sgibbs} 715224171Sgibbs 716224171Sgibbs/// \brief Compares the corresponding elements of two 128-bit vectors of 717224171Sgibbs/// [8 x u16] and returns a 128-bit vector of [8 x u16] containing the lesser 718224171Sgibbs/// value of the two. 719224171Sgibbs/// 720224171Sgibbs/// \headerfile <x86intrin.h> 721224171Sgibbs/// 722224171Sgibbs/// This intrinsic corresponds to the <c> VPMINUW / PMINUW </c> instruction. 723224171Sgibbs/// 724224171Sgibbs/// \param __V1 725224171Sgibbs/// A 128-bit vector of [8 x u16]. 726224171Sgibbs/// \param __V2 727224171Sgibbs/// A 128-bit vector of [8 x u16]. 728224171Sgibbs/// \returns A 128-bit vector of [8 x u16] containing the lesser values. 729224171Sgibbsstatic __inline__ __m128i __DEFAULT_FN_ATTRS 730224171Sgibbs_mm_min_epu16 (__m128i __V1, __m128i __V2) 731224171Sgibbs{ 732224171Sgibbs return (__m128i) __builtin_ia32_pminuw128 ((__v8hi) __V1, (__v8hi) __V2); 733224171Sgibbs} 734224171Sgibbs 735224171Sgibbs/// \brief Compares the corresponding elements of two 128-bit vectors of 736224171Sgibbs/// [8 x u16] and returns a 128-bit vector of [8 x u16] containing the 737224171Sgibbs/// greater value of the two. 738224171Sgibbs/// 739224171Sgibbs/// \headerfile <x86intrin.h> 740224171Sgibbs/// 741224171Sgibbs/// This intrinsic corresponds to the <c> VPMAXUW / PMAXUW </c> instruction. 742224171Sgibbs/// 743224171Sgibbs/// \param __V1 744224171Sgibbs/// A 128-bit vector of [8 x u16]. 745224171Sgibbs/// \param __V2 746236884Smm/// A 128-bit vector of [8 x u16]. 747185029Spjd/// \returns A 128-bit vector of [8 x u16] containing the greater values. 748185029Spjdstatic __inline__ __m128i __DEFAULT_FN_ATTRS 749168404Spjd_mm_max_epu16 (__m128i __V1, __m128i __V2) 750168404Spjd{ 751168404Spjd return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi) __V1, (__v8hi) __V2); 752168404Spjd} 753168404Spjd 754168404Spjd/// \brief Compares the corresponding elements of two 128-bit vectors of 755236884Smm/// [4 x i32] and returns a 128-bit vector of [4 x i32] containing the lesser 756185029Spjd/// value of the two. 757236884Smm/// 758236884Smm/// \headerfile <x86intrin.h> 759185029Spjd/// 760168404Spjd/// This intrinsic corresponds to the <c> VPMINSD / PMINSD </c> instruction. 761168404Spjd/// 762168404Spjd/// \param __V1 763168404Spjd/// A 128-bit vector of [4 x i32]. 764168404Spjd/// \param __V2 765168404Spjd/// A 128-bit vector of [4 x i32]. 766168404Spjd/// \returns A 128-bit vector of [4 x i32] containing the lesser values. 767168404Spjdstatic __inline__ __m128i __DEFAULT_FN_ATTRS 768168404Spjd_mm_min_epi32 (__m128i __V1, __m128i __V2) 769168404Spjd{ 770168404Spjd return (__m128i) __builtin_ia32_pminsd128 ((__v4si) __V1, (__v4si) __V2); 771236884Smm} 772168404Spjd 773185029Spjd/// \brief Compares the corresponding elements of two 128-bit vectors of 774168404Spjd/// [4 x i32] and returns a 128-bit vector of [4 x i32] containing the 775185029Spjd/// greater value of the two. 776168404Spjd/// 777168404Spjd/// \headerfile <x86intrin.h> 778185029Spjd/// 779185029Spjd/// This intrinsic corresponds to the <c> VPMAXSD / PMAXSD </c> instruction. 780185029Spjd/// 781168404Spjd/// \param __V1 782168404Spjd/// A 128-bit vector of [4 x i32]. 783236884Smm/// \param __V2 784168404Spjd/// A 128-bit vector of [4 x i32]. 785168404Spjd/// \returns A 128-bit vector of [4 x i32] containing the greater values. 786168404Spjdstatic __inline__ __m128i __DEFAULT_FN_ATTRS 787168404Spjd_mm_max_epi32 (__m128i __V1, __m128i __V2) 788168404Spjd{ 789168404Spjd return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si) __V1, (__v4si) __V2); 790168404Spjd} 791236884Smm 792236884Smm/// \brief Compares the corresponding elements of two 128-bit vectors of 793236884Smm/// [4 x u32] and returns a 128-bit vector of [4 x u32] containing the lesser 794168404Spjd/// value of the two. 795168404Spjd/// 796185029Spjd/// \headerfile <x86intrin.h> 797185029Spjd/// 798185029Spjd/// This intrinsic corresponds to the <c> VPMINUD / PMINUD </c> instruction. 799185029Spjd/// 800185029Spjd/// \param __V1 801185029Spjd/// A 128-bit vector of [4 x u32]. 802185029Spjd/// \param __V2 803185029Spjd/// A 128-bit vector of [4 x u32]. 804185029Spjd/// \returns A 128-bit vector of [4 x u32] containing the lesser values. 805185029Spjdstatic __inline__ __m128i __DEFAULT_FN_ATTRS 806168404Spjd_mm_min_epu32 (__m128i __V1, __m128i __V2) 807168404Spjd{ 808251634Sdelphij return (__m128i) __builtin_ia32_pminud128((__v4si) __V1, (__v4si) __V2); 809168404Spjd} 810168404Spjd 811185029Spjd/// \brief Compares the corresponding elements of two 128-bit vectors of 812185029Spjd/// [4 x u32] and returns a 128-bit vector of [4 x u32] containing the 813185029Spjd/// greater value of the two. 814185029Spjd/// 815185029Spjd/// \headerfile <x86intrin.h> 816185029Spjd/// 817185029Spjd/// This intrinsic corresponds to the <c> VPMAXUD / PMAXUD </c> instruction. 818185029Spjd/// 819185029Spjd/// \param __V1 820185029Spjd/// A 128-bit vector of [4 x u32]. 821185029Spjd/// \param __V2 822236884Smm/// A 128-bit vector of [4 x u32]. 823236884Smm/// \returns A 128-bit vector of [4 x u32] containing the greater values. 824236884Smmstatic __inline__ __m128i __DEFAULT_FN_ATTRS 825236884Smm_mm_max_epu32 (__m128i __V1, __m128i __V2) 826236884Smm{ 827236884Smm return (__m128i) __builtin_ia32_pmaxud128((__v4si) __V1, (__v4si) __V2); 828236884Smm} 829236884Smm 830236884Smm/* SSE4 Insertion and Extraction from XMM Register Instructions. */ 831236884Smm/// \brief Takes the first argument \a X and inserts an element from the second 832236884Smm/// argument \a Y as selected by the third argument \a N. That result then 833236884Smm/// has elements zeroed out also as selected by the third argument \a N. The 834236884Smm/// resulting 128-bit vector of [4 x float] is then returned. 835236884Smm/// 836236884Smm/// \headerfile <x86intrin.h> 837185029Spjd/// 838185029Spjd/// \code 839185029Spjd/// __m128 _mm_insert_ps(__m128 X, __m128 Y, const int N); 840185029Spjd/// \endcode 841185029Spjd/// 842185029Spjd/// This intrinsic corresponds to the <c> VINSERTPS </c> instruction. 843185029Spjd/// 844185029Spjd/// \param X 845185029Spjd/// A 128-bit vector source operand of [4 x float]. With the exception of 846185029Spjd/// those bits in the result copied from parameter \a Y and zeroed by bits 847251634Sdelphij/// [3:0] of \a N, all bits from this parameter are copied to the result. 848251634Sdelphij/// \param Y 849251634Sdelphij/// A 128-bit vector source operand of [4 x float]. One single-precision 850251634Sdelphij/// floating-point element from this source, as determined by the immediate 851251634Sdelphij/// parameter, is copied to the result. 852251634Sdelphij/// \param N 853251634Sdelphij/// Specifies which bits from operand \a Y will be copied, which bits in the 854251634Sdelphij/// result they will be be copied to, and which bits in the result will be 855251634Sdelphij/// cleared. The following assignments are made: \n 856251634Sdelphij/// Bits [7:6] specify the bits to copy from operand \a Y: \n 857185029Spjd/// 00: Selects bits [31:0] from operand \a Y. \n 858251634Sdelphij/// 01: Selects bits [63:32] from operand \a Y. \n 859185029Spjd/// 10: Selects bits [95:64] from operand \a Y. \n 860168404Spjd/// 11: Selects bits [127:96] from operand \a Y. \n 861168404Spjd/// Bits [5:4] specify the bits in the result to which the selected bits 862168404Spjd/// from operand \a Y are copied: \n 863185029Spjd/// 00: Copies the selected bits from \a Y to result bits [31:0]. \n 864168404Spjd/// 01: Copies the selected bits from \a Y to result bits [63:32]. \n 865168404Spjd/// 10: Copies the selected bits from \a Y to result bits [95:64]. \n 866168404Spjd/// 11: Copies the selected bits from \a Y to result bits [127:96]. \n 867185029Spjd/// Bits[3:0]: If any of these bits are set, the corresponding result 868168404Spjd/// element is cleared. 869168404Spjd/// \returns A 128-bit vector of [4 x float] containing the copied 870168404Spjd/// single-precision floating point elements from the operands. 871168404Spjd#define _mm_insert_ps(X, Y, N) __builtin_ia32_insertps128((X), (Y), (N)) 872168404Spjd 873168404Spjd/// \brief Extracts a 32-bit integer from a 128-bit vector of [4 x float] and 874168404Spjd/// returns it, using the immediate value parameter \a N as a selector. 875168404Spjd/// 876168404Spjd/// \headerfile <x86intrin.h> 877185029Spjd/// 878168404Spjd/// \code 879168404Spjd/// int _mm_extract_ps(__m128 X, const int N); 880168404Spjd/// \endcode 881185029Spjd/// 882168404Spjd/// This intrinsic corresponds to the <c> VEXTRACTPS / EXTRACTPS </c> 883168404Spjd/// instruction. 884168404Spjd/// 885168404Spjd/// \param X 886168404Spjd/// A 128-bit vector of [4 x float]. 887168404Spjd/// \param N 888168404Spjd/// An immediate value. Bits [1:0] determines which bits from the argument 889168404Spjd/// \a X are extracted and returned: \n 890168404Spjd/// 00: Bits [31:0] of parameter \a X are returned. \n 891168404Spjd/// 01: Bits [63:32] of parameter \a X are returned. \n 892168404Spjd/// 10: Bits [95:64] of parameter \a X are returned. \n 893168404Spjd/// 11: Bits [127:96] of parameter \a X are returned. 894168404Spjd/// \returns A 32-bit integer containing the extracted 32 bits of float data. 895185029Spjd#define _mm_extract_ps(X, N) (__extension__ \ 896168404Spjd ({ union { int __i; float __f; } __t; \ 897168404Spjd __v4sf __a = (__v4sf)(__m128)(X); \ 898168404Spjd __t.__f = __a[(N) & 3]; \ 899185029Spjd __t.__i;})) 900185029Spjd 901168404Spjd/* Miscellaneous insert and extract macros. */ 902185029Spjd/* Extract a single-precision float from X at index N into D. */ 903168404Spjd#define _MM_EXTRACT_FLOAT(D, X, N) (__extension__ ({ __v4sf __a = (__v4sf)(X); \ 904168404Spjd (D) = __a[N]; })) 905185029Spjd 906168404Spjd/* Or together 2 sets of indexes (X and Y) with the zeroing bits (Z) to create 907168404Spjd an index suitable for _mm_insert_ps. */ 908168404Spjd#define _MM_MK_INSERTPS_NDX(X, Y, Z) (((X) << 6) | ((Y) << 4) | (Z)) 909185029Spjd 910168404Spjd/* Extract a float from X at index N into the first index of the return. */ 911168404Spjd#define _MM_PICK_OUT_PS(X, N) _mm_insert_ps (_mm_setzero_ps(), (X), \ 912168404Spjd _MM_MK_INSERTPS_NDX((N), 0, 0x0e)) 913168404Spjd 914168404Spjd/* Insert int into packed integer array at index. */ 915185029Spjd/// \brief Constructs a 128-bit vector of [16 x i8] by first making a copy of 916168404Spjd/// the 128-bit integer vector parameter, and then inserting the lower 8 bits 917168404Spjd/// of an integer parameter \a I into an offset specified by the immediate 918168404Spjd/// value parameter \a N. 919168404Spjd/// 920168404Spjd/// \headerfile <x86intrin.h> 921244857Spjd/// 922168404Spjd/// \code 923244857Spjd/// __m128i _mm_insert_epi8(__m128i X, int I, const int N); 924168404Spjd/// \endcode 925244857Spjd/// 926168404Spjd/// This intrinsic corresponds to the <c> VPINSRB / PINSRB </c> instruction. 927185029Spjd/// 928168404Spjd/// \param X 929168404Spjd/// A 128-bit integer vector of [16 x i8]. This vector is copied to the 930168404Spjd/// result and then one of the sixteen elements in the result vector is 931168404Spjd/// replaced by the lower 8 bits of \a I. 932168404Spjd/// \param I 933185029Spjd/// An integer. The lower 8 bits of this operand are written to the result 934168404Spjd/// beginning at the offset specified by \a N. 935168404Spjd/// \param N 936168404Spjd/// An immediate value. Bits [3:0] specify the bit offset in the result at 937168404Spjd/// which the lower 8 bits of \a I are written. \n 938168404Spjd/// 0000: Bits [7:0] of the result are used for insertion. \n 939168404Spjd/// 0001: Bits [15:8] of the result are used for insertion. \n 940168404Spjd/// 0010: Bits [23:16] of the result are used for insertion. \n 941168404Spjd/// 0011: Bits [31:24] of the result are used for insertion. \n 942168404Spjd/// 0100: Bits [39:32] of the result are used for insertion. \n 943168404Spjd/// 0101: Bits [47:40] of the result are used for insertion. \n 944168404Spjd/// 0110: Bits [55:48] of the result are used for insertion. \n 945168404Spjd/// 0111: Bits [63:56] of the result are used for insertion. \n 946168404Spjd/// 1000: Bits [71:64] of the result are used for insertion. \n 947168404Spjd/// 1001: Bits [79:72] of the result are used for insertion. \n 948168404Spjd/// 1010: Bits [87:80] of the result are used for insertion. \n 949168404Spjd/// 1011: Bits [95:88] of the result are used for insertion. \n 950168404Spjd/// 1100: Bits [103:96] of the result are used for insertion. \n 951168404Spjd/// 1101: Bits [111:104] of the result are used for insertion. \n 952185029Spjd/// 1110: Bits [119:112] of the result are used for insertion. \n 953185029Spjd/// 1111: Bits [127:120] of the result are used for insertion. 954185029Spjd/// \returns A 128-bit integer vector containing the constructed values. 955185029Spjd#define _mm_insert_epi8(X, I, N) (__extension__ \ 956185029Spjd ({ __v16qi __a = (__v16qi)(__m128i)(X); \ 957185029Spjd __a[(N) & 15] = (I); \ 958185029Spjd (__m128i)__a;})) 959185029Spjd 960185029Spjd/// \brief Constructs a 128-bit vector of [4 x i32] by first making a copy of 961185029Spjd/// the 128-bit integer vector parameter, and then inserting the 32-bit 962185029Spjd/// integer parameter \a I at the offset specified by the immediate value 963185029Spjd/// parameter \a N. 964185029Spjd/// 965185029Spjd/// \headerfile <x86intrin.h> 966168404Spjd/// 967168404Spjd/// \code 968185029Spjd/// __m128i _mm_insert_epi32(__m128i X, int I, const int N); 969185029Spjd/// \endcode 970185029Spjd/// 971185029Spjd/// This intrinsic corresponds to the <c> VPINSRD / PINSRD </c> instruction. 972185029Spjd/// 973168404Spjd/// \param X 974168404Spjd/// A 128-bit integer vector of [4 x i32]. This vector is copied to the 975168404Spjd/// result and then one of the four elements in the result vector is 976251634Sdelphij/// replaced by \a I. 977251634Sdelphij/// \param I 978251634Sdelphij/// A 32-bit integer that is written to the result beginning at the offset 979251634Sdelphij/// specified by \a N. 980251634Sdelphij/// \param N 981251634Sdelphij/// An immediate value. Bits [1:0] specify the bit offset in the result at 982251634Sdelphij/// which the integer \a I is written. \n 983251634Sdelphij/// 00: Bits [31:0] of the result are used for insertion. \n 984251634Sdelphij/// 01: Bits [63:32] of the result are used for insertion. \n 985251634Sdelphij/// 10: Bits [95:64] of the result are used for insertion. \n 986251634Sdelphij/// 11: Bits [127:96] of the result are used for insertion. 987251634Sdelphij/// \returns A 128-bit integer vector containing the constructed values. 988168404Spjd#define _mm_insert_epi32(X, I, N) (__extension__ \ 989168404Spjd ({ __v4si __a = (__v4si)(__m128i)(X); \ 990168404Spjd __a[(N) & 3] = (I); \ 991168404Spjd (__m128i)__a;})) 992168404Spjd 993168404Spjd#ifdef __x86_64__ 994168404Spjd/// \brief Constructs a 128-bit vector of [2 x i64] by first making a copy of 995168404Spjd/// the 128-bit integer vector parameter, and then inserting the 64-bit 996168404Spjd/// integer parameter \a I, using the immediate value parameter \a N as an 997185029Spjd/// insertion location selector. 998185029Spjd/// 999185029Spjd/// \headerfile <x86intrin.h> 1000168404Spjd/// 1001168404Spjd/// \code 1002168404Spjd/// __m128i _mm_insert_epi64(__m128i X, long long I, const int N); 1003168404Spjd/// \endcode 1004168404Spjd/// 1005168404Spjd/// This intrinsic corresponds to the <c> VPINSRQ / PINSRQ </c> instruction. 1006236884Smm/// 1007259813Sdelphij/// \param X 1008236884Smm/// A 128-bit integer vector of [2 x i64]. This vector is copied to the 1009236884Smm/// result and then one of the two elements in the result vector is replaced 1010236884Smm/// by \a I. 1011236884Smm/// \param I 1012236884Smm/// A 64-bit integer that is written to the result beginning at the offset 1013236884Smm/// specified by \a N. 1014236884Smm/// \param N 1015236884Smm/// An immediate value. Bit [0] specifies the bit offset in the result at 1016236884Smm/// which the integer \a I is written. \n 1017236884Smm/// 0: Bits [63:0] of the result are used for insertion. \n 1018236884Smm/// 1: Bits [127:64] of the result are used for insertion. \n 1019236884Smm/// \returns A 128-bit integer vector containing the constructed values. 1020236884Smm#define _mm_insert_epi64(X, I, N) (__extension__ \ 1021236884Smm ({ __v2di __a = (__v2di)(__m128i)(X); \ 1022251634Sdelphij __a[(N) & 1] = (I); \ 1023251634Sdelphij (__m128i)__a;})) 1024251634Sdelphij#endif /* __x86_64__ */ 1025236884Smm 1026236884Smm/* Extract int from packed integer array at index. This returns the element 1027236884Smm * as a zero extended value, so it is unsigned. 1028251634Sdelphij */ 1029251634Sdelphij/// \brief Extracts an 8-bit element from the 128-bit integer vector of 1030185029Spjd/// [16 x i8], using the immediate value parameter \a N as a selector. 1031185029Spjd/// 1032168404Spjd/// \headerfile <x86intrin.h> 1033168404Spjd/// 1034168404Spjd/// \code 1035168404Spjd/// int _mm_extract_epi8(__m128i X, const int N); 1036185029Spjd/// \endcode 1037168404Spjd/// 1038168404Spjd/// This intrinsic corresponds to the <c> VPEXTRB / PEXTRB </c> instruction. 1039168404Spjd/// 1040168404Spjd/// \param X 1041168404Spjd/// A 128-bit integer vector. 1042168404Spjd/// \param N 1043168404Spjd/// An immediate value. Bits [3:0] specify which 8-bit vector element from 1044168404Spjd/// the argument \a X to extract and copy to the result. \n 1045185029Spjd/// 0000: Bits [7:0] of parameter \a X are extracted. \n 1046168404Spjd/// 0001: Bits [15:8] of the parameter \a X are extracted. \n 1047185029Spjd/// 0010: Bits [23:16] of the parameter \a X are extracted. \n 1048185029Spjd/// 0011: Bits [31:24] of the parameter \a X are extracted. \n 1049168404Spjd/// 0100: Bits [39:32] of the parameter \a X are extracted. \n 1050185029Spjd/// 0101: Bits [47:40] of the parameter \a X are extracted. \n 1051185029Spjd/// 0110: Bits [55:48] of the parameter \a X are extracted. \n 1052185029Spjd/// 0111: Bits [63:56] of the parameter \a X are extracted. \n 1053185029Spjd/// 1000: Bits [71:64] of the parameter \a X are extracted. \n 1054185029Spjd/// 1001: Bits [79:72] of the parameter \a X are extracted. \n 1055168404Spjd/// 1010: Bits [87:80] of the parameter \a X are extracted. \n 1056168404Spjd/// 1011: Bits [95:88] of the parameter \a X are extracted. \n 1057168404Spjd/// 1100: Bits [103:96] of the parameter \a X are extracted. \n 1058168404Spjd/// 1101: Bits [111:104] of the parameter \a X are extracted. \n 1059168404Spjd/// 1110: Bits [119:112] of the parameter \a X are extracted. \n 1060168404Spjd/// 1111: Bits [127:120] of the parameter \a X are extracted. 1061168404Spjd/// \returns An unsigned integer, whose lower 8 bits are selected from the 1062168404Spjd/// 128-bit integer vector parameter and the remaining bits are assigned 1063168404Spjd/// zeros. 1064168404Spjd#define _mm_extract_epi8(X, N) (__extension__ \ 1065168404Spjd ({ __v16qi __a = (__v16qi)(__m128i)(X); \ 1066168404Spjd (int)(unsigned char) __a[(N) & 15];})) 1067168404Spjd 1068168404Spjd/// \brief Extracts a 32-bit element from the 128-bit integer vector of 1069168404Spjd/// [4 x i32], using the immediate value parameter \a N as a selector. 1070168404Spjd/// 1071168404Spjd/// \headerfile <x86intrin.h> 1072168404Spjd/// 1073168404Spjd/// \code 1074168404Spjd/// int _mm_extract_epi32(__m128i X, const int N); 1075168404Spjd/// \endcode 1076168404Spjd/// 1077168404Spjd/// This intrinsic corresponds to the <c> VPEXTRD / PEXTRD </c> instruction. 1078168404Spjd/// 1079168404Spjd/// \param X 1080168404Spjd/// A 128-bit integer vector. 1081168404Spjd/// \param N 1082168404Spjd/// An immediate value. Bits [1:0] specify which 32-bit vector element from 1083168404Spjd/// the argument \a X to extract and copy to the result. \n 1084168404Spjd/// 00: Bits [31:0] of the parameter \a X are extracted. \n 1085168404Spjd/// 01: Bits [63:32] of the parameter \a X are extracted. \n 1086168404Spjd/// 10: Bits [95:64] of the parameter \a X are extracted. \n 1087168404Spjd/// 11: Bits [127:96] of the parameter \a X are exracted. 1088168404Spjd/// \returns An integer, whose lower 32 bits are selected from the 128-bit 1089168404Spjd/// integer vector parameter and the remaining bits are assigned zeros. 1090168404Spjd#define _mm_extract_epi32(X, N) (__extension__ \ 1091168404Spjd ({ __v4si __a = (__v4si)(__m128i)(X); \ 1092168404Spjd (int)__a[(N) & 3];})) 1093168404Spjd 1094168404Spjd#ifdef __x86_64__ 1095168404Spjd/// \brief Extracts a 64-bit element from the 128-bit integer vector of 1096168404Spjd/// [2 x i64], using the immediate value parameter \a N as a selector. 1097168404Spjd/// 1098168404Spjd/// \headerfile <x86intrin.h> 1099168404Spjd/// 1100168404Spjd/// \code 1101168404Spjd/// long long _mm_extract_epi64(__m128i X, const int N); 1102168404Spjd/// \endcode 1103168404Spjd/// 1104168404Spjd/// This intrinsic corresponds to the <c> VPEXTRQ / PEXTRQ </c> instruction. 1105168404Spjd/// 1106168404Spjd/// \param X 1107168404Spjd/// A 128-bit integer vector. 1108168404Spjd/// \param N 1109168404Spjd/// An immediate value. Bit [0] specifies which 64-bit vector element from 1110168404Spjd/// the argument \a X to return. \n 1111168404Spjd/// 0: Bits [63:0] are returned. \n 1112168404Spjd/// 1: Bits [127:64] are returned. \n 1113168404Spjd/// \returns A 64-bit integer. 1114168404Spjd#define _mm_extract_epi64(X, N) (__extension__ \ 1115168404Spjd ({ __v2di __a = (__v2di)(__m128i)(X); \ 1116168404Spjd (long long)__a[(N) & 1];})) 1117168404Spjd#endif /* __x86_64 */ 1118248571Smm 1119248571Smm/* SSE4 128-bit Packed Integer Comparisons. */ 1120168404Spjd/// \brief Tests whether the specified bits in a 128-bit integer vector are all 1121248571Smm/// zeros. 1122248571Smm/// 1123168404Spjd/// \headerfile <x86intrin.h> 1124168404Spjd/// 1125168404Spjd/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction. 1126168404Spjd/// 1127168404Spjd/// \param __M 1128168404Spjd/// A 128-bit integer vector containing the bits to be tested. 1129168404Spjd/// \param __V 1130168404Spjd/// A 128-bit integer vector selecting which bits to test in operand \a __M. 1131168404Spjd/// \returns TRUE if the specified bits are all zeros; FALSE otherwise. 1132168404Spjdstatic __inline__ int __DEFAULT_FN_ATTRS 1133168404Spjd_mm_testz_si128(__m128i __M, __m128i __V) 1134168404Spjd{ 1135168404Spjd return __builtin_ia32_ptestz128((__v2di)__M, (__v2di)__V); 1136168404Spjd} 1137168404Spjd 1138168404Spjd/// \brief Tests whether the specified bits in a 128-bit integer vector are all 1139168404Spjd/// ones. 1140168404Spjd/// 1141207670Smm/// \headerfile <x86intrin.h> 1142168404Spjd/// 1143168404Spjd/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction. 1144168404Spjd/// 1145168404Spjd/// \param __M 1146168404Spjd/// A 128-bit integer vector containing the bits to be tested. 1147168404Spjd/// \param __V 1148207670Smm/// A 128-bit integer vector selecting which bits to test in operand \a __M. 1149168404Spjd/// \returns TRUE if the specified bits are all ones; FALSE otherwise. 1150168404Spjdstatic __inline__ int __DEFAULT_FN_ATTRS 1151168404Spjd_mm_testc_si128(__m128i __M, __m128i __V) 1152168404Spjd{ 1153207670Smm return __builtin_ia32_ptestc128((__v2di)__M, (__v2di)__V); 1154207670Smm} 1155207670Smm 1156168404Spjd/// \brief Tests whether the specified bits in a 128-bit integer vector are 1157168404Spjd/// neither all zeros nor all ones. 1158168404Spjd/// 1159168404Spjd/// \headerfile <x86intrin.h> 1160168404Spjd/// 1161168404Spjd/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction. 1162168404Spjd/// 1163168404Spjd/// \param __M 1164168404Spjd/// A 128-bit integer vector containing the bits to be tested. 1165168404Spjd/// \param __V 1166168404Spjd/// A 128-bit integer vector selecting which bits to test in operand \a __M. 1167168404Spjd/// \returns TRUE if the specified bits are neither all zeros nor all ones; 1168168404Spjd/// FALSE otherwise. 1169168404Spjdstatic __inline__ int __DEFAULT_FN_ATTRS 1170168404Spjd_mm_testnzc_si128(__m128i __M, __m128i __V) 1171168404Spjd{ 1172168404Spjd return __builtin_ia32_ptestnzc128((__v2di)__M, (__v2di)__V); 1173168404Spjd} 1174168404Spjd 1175168404Spjd/// \brief Tests whether the specified bits in a 128-bit integer vector are all 1176168404Spjd/// ones. 1177168404Spjd/// 1178168404Spjd/// \headerfile <x86intrin.h> 1179168404Spjd/// 1180168404Spjd/// \code 1181168404Spjd/// int _mm_test_all_ones(__m128i V); 1182168404Spjd/// \endcode 1183168404Spjd/// 1184168404Spjd/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction. 1185248571Smm/// 1186248571Smm/// \param V 1187248571Smm/// A 128-bit integer vector containing the bits to be tested. 1188207670Smm/// \returns TRUE if the bits specified in the operand are all set to 1; FALSE 1189248571Smm/// otherwise. 1190207670Smm#define _mm_test_all_ones(V) _mm_testc_si128((V), _mm_cmpeq_epi32((V), (V))) 1191248571Smm 1192168404Spjd/// \brief Tests whether the specified bits in a 128-bit integer vector are 1193207670Smm/// neither all zeros nor all ones. 1194168404Spjd/// 1195168404Spjd/// \headerfile <x86intrin.h> 1196168404Spjd/// 1197168404Spjd/// \code 1198168404Spjd/// int _mm_test_mix_ones_zeros(__m128i M, __m128i V); 1199168404Spjd/// \endcode 1200168404Spjd/// 1201168404Spjd/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction. 1202168404Spjd/// 1203168404Spjd/// \param M 1204168404Spjd/// A 128-bit integer vector containing the bits to be tested. 1205168404Spjd/// \param V 1206168404Spjd/// A 128-bit integer vector selecting which bits to test in operand \a M. 1207168404Spjd/// \returns TRUE if the specified bits are neither all zeros nor all ones; 1208219089Spjd/// FALSE otherwise. 1209168404Spjd#define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128((M), (V)) 1210168404Spjd 1211168404Spjd/// \brief Tests whether the specified bits in a 128-bit integer vector are all 1212168404Spjd/// zeros. 1213168404Spjd/// 1214168404Spjd/// \headerfile <x86intrin.h> 1215168404Spjd/// 1216168404Spjd/// \code 1217168404Spjd/// int _mm_test_all_zeros(__m128i M, __m128i V); 1218168404Spjd/// \endcode 1219168404Spjd/// 1220168404Spjd/// This intrinsic corresponds to the <c> VPTEST / PTEST </c> instruction. 1221168404Spjd/// 1222168404Spjd/// \param M 1223168404Spjd/// A 128-bit integer vector containing the bits to be tested. 1224168404Spjd/// \param V 1225168404Spjd/// A 128-bit integer vector selecting which bits to test in operand \a M. 1226185029Spjd/// \returns TRUE if the specified bits are all zeros; FALSE otherwise. 1227185029Spjd#define _mm_test_all_zeros(M, V) _mm_testz_si128 ((M), (V)) 1228185029Spjd 1229185029Spjd/* SSE4 64-bit Packed Integer Comparisons. */ 1230185029Spjd/// \brief Compares each of the corresponding 64-bit values of the 128-bit 1231185029Spjd/// integer vectors for equality. 1232185029Spjd/// 1233185029Spjd/// \headerfile <x86intrin.h> 1234168404Spjd/// 1235168404Spjd/// This intrinsic corresponds to the <c> VPCMPEQQ / PCMPEQQ </c> instruction. 1236168404Spjd/// 1237168404Spjd/// \param __V1 1238168404Spjd/// A 128-bit integer vector. 1239168404Spjd/// \param __V2 1240168404Spjd/// A 128-bit integer vector. 1241168404Spjd/// \returns A 128-bit integer vector containing the comparison results. 1242168404Spjdstatic __inline__ __m128i __DEFAULT_FN_ATTRS 1243168404Spjd_mm_cmpeq_epi64(__m128i __V1, __m128i __V2) 1244168404Spjd{ 1245168404Spjd return (__m128i)((__v2di)__V1 == (__v2di)__V2); 1246213197Smm} 1247213197Smm 1248213197Smm/* SSE4 Packed Integer Sign-Extension. */ 1249213197Smm/// \brief Sign-extends each of the lower eight 8-bit integer elements of a 1250168404Spjd/// 128-bit vector of [16 x i8] to 16-bit values and returns them in a 1251213197Smm/// 128-bit vector of [8 x i16]. The upper eight elements of the input vector 1252213197Smm/// are unused. 1253213197Smm/// 1254213197Smm/// \headerfile <x86intrin.h> 1255213197Smm/// 1256213197Smm/// This intrinsic corresponds to the <c> VPMOVSXBW / PMOVSXBW </c> instruction. 1257213197Smm/// 1258213197Smm/// \param __V 1259213197Smm/// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are sign- 1260213197Smm/// extended to 16-bit values. 1261213197Smm/// \returns A 128-bit vector of [8 x i16] containing the sign-extended values. 1262213197Smmstatic __inline__ __m128i __DEFAULT_FN_ATTRS 1263213197Smm_mm_cvtepi8_epi16(__m128i __V) 1264213197Smm{ 1265213197Smm /* This function always performs a signed extension, but __v16qi is a char 1266213197Smm which may be signed or unsigned, so use __v16qs. */ 1267213197Smm return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi); 1268213197Smm} 1269213197Smm 1270213197Smm/// \brief Sign-extends each of the lower four 8-bit integer elements of a 1271213197Smm/// 128-bit vector of [16 x i8] to 32-bit values and returns them in a 1272213197Smm/// 128-bit vector of [4 x i32]. The upper twelve elements of the input 1273213197Smm/// vector are unused. 1274213197Smm/// 1275213197Smm/// \headerfile <x86intrin.h> 1276213197Smm/// 1277213197Smm/// This intrinsic corresponds to the <c> VPMOVSXBD / PMOVSXBD </c> instruction. 1278213197Smm/// 1279213197Smm/// \param __V 1280213197Smm/// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are sign- 1281213197Smm/// extended to 32-bit values. 1282213197Smm/// \returns A 128-bit vector of [4 x i32] containing the sign-extended values. 1283213197Smmstatic __inline__ __m128i __DEFAULT_FN_ATTRS 1284213197Smm_mm_cvtepi8_epi32(__m128i __V) 1285213197Smm{ 1286213197Smm /* This function always performs a signed extension, but __v16qi is a char 1287213197Smm which may be signed or unsigned, so use __v16qs. */ 1288213197Smm return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4si); 1289213197Smm} 1290213197Smm 1291168404Spjd/// \brief Sign-extends each of the lower two 8-bit integer elements of a 1292213197Smm/// 128-bit integer vector of [16 x i8] to 64-bit values and returns them in 1293213197Smm/// a 128-bit vector of [2 x i64]. The upper fourteen elements of the input 1294213197Smm/// vector are unused. 1295213197Smm/// 1296213197Smm/// \headerfile <x86intrin.h> 1297213197Smm/// 1298213197Smm/// This intrinsic corresponds to the <c> VPMOVSXBQ / PMOVSXBQ </c> instruction. 1299254591Sgibbs/// 1300219089Spjd/// \param __V 1301213197Smm/// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are sign- 1302219089Spjd/// extended to 64-bit values. 1303213197Smm/// \returns A 128-bit vector of [2 x i64] containing the sign-extended values. 1304213197Smmstatic __inline__ __m128i __DEFAULT_FN_ATTRS 1305254591Sgibbs_mm_cvtepi8_epi64(__m128i __V) 1306213197Smm{ 1307224169Sgibbs /* This function always performs a signed extension, but __v16qi is a char 1308213197Smm which may be signed or unsigned, so use __v16qs. */ 1309213197Smm return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1), __v2di); 1310213197Smm} 1311213197Smm 1312213197Smm/// \brief Sign-extends each of the lower four 16-bit integer elements of a 1313219089Spjd/// 128-bit integer vector of [8 x i16] to 32-bit values and returns them in 1314254591Sgibbs/// a 128-bit vector of [4 x i32]. The upper four elements of the input 1315219089Spjd/// vector are unused. 1316213197Smm/// 1317213197Smm/// \headerfile <x86intrin.h> 1318213197Smm/// 1319213197Smm/// This intrinsic corresponds to the <c> VPMOVSXWD / PMOVSXWD </c> instruction. 1320213197Smm/// 1321213197Smm/// \param __V 1322213197Smm/// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are sign- 1323213197Smm/// extended to 32-bit values. 1324213197Smm/// \returns A 128-bit vector of [4 x i32] containing the sign-extended values. 1325213197Smmstatic __inline__ __m128i __DEFAULT_FN_ATTRS 1326213197Smm_mm_cvtepi16_epi32(__m128i __V) 1327213197Smm{ 1328213197Smm return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4si); 1329213197Smm} 1330213197Smm 1331213197Smm/// \brief Sign-extends each of the lower two 16-bit integer elements of a 1332213197Smm/// 128-bit integer vector of [8 x i16] to 64-bit values and returns them in 1333213197Smm/// a 128-bit vector of [2 x i64]. The upper six elements of the input 1334213197Smm/// vector are unused. 1335213197Smm/// 1336213197Smm/// \headerfile <x86intrin.h> 1337213197Smm/// 1338213197Smm/// This intrinsic corresponds to the <c> VPMOVSXWQ / PMOVSXWQ </c> instruction. 1339224170Sgibbs/// 1340224170Sgibbs/// \param __V 1341213197Smm/// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are sign- 1342224170Sgibbs/// extended to 64-bit values. 1343224170Sgibbs/// \returns A 128-bit vector of [2 x i64] containing the sign-extended values. 1344213197Smmstatic __inline__ __m128i __DEFAULT_FN_ATTRS 1345213197Smm_mm_cvtepi16_epi64(__m128i __V) 1346213197Smm{ 1347213197Smm return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1), __v2di); 1348213197Smm} 1349213197Smm 1350213197Smm/// \brief Sign-extends each of the lower two 32-bit integer elements of a 1351213197Smm/// 128-bit integer vector of [4 x i32] to 64-bit values and returns them in 1352213197Smm/// a 128-bit vector of [2 x i64]. The upper two elements of the input vector 1353213197Smm/// are unused. 1354213197Smm/// 1355213197Smm/// \headerfile <x86intrin.h> 1356213197Smm/// 1357213197Smm/// This intrinsic corresponds to the <c> VPMOVSXDQ / PMOVSXDQ </c> instruction. 1358213197Smm/// 1359213197Smm/// \param __V 1360213197Smm/// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are sign- 1361213197Smm/// extended to 64-bit values. 1362213197Smm/// \returns A 128-bit vector of [2 x i64] containing the sign-extended values. 1363213197Smmstatic __inline__ __m128i __DEFAULT_FN_ATTRS 1364236884Smm_mm_cvtepi32_epi64(__m128i __V) 1365236884Smm{ 1366236884Smm return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v4si)__V, (__v4si)__V, 0, 1), __v2di); 1367236884Smm} 1368254591Sgibbs 1369254591Sgibbs/* SSE4 Packed Integer Zero-Extension. */ 1370254591Sgibbs/// \brief Zero-extends each of the lower eight 8-bit integer elements of a 1371254591Sgibbs/// 128-bit vector of [16 x i8] to 16-bit values and returns them in a 1372213197Smm/// 128-bit vector of [8 x i16]. The upper eight elements of the input vector 1373213197Smm/// are unused. 1374213197Smm/// 1375213197Smm/// \headerfile <x86intrin.h> 1376213197Smm/// 1377213197Smm/// This intrinsic corresponds to the <c> VPMOVZXBW / PMOVZXBW </c> instruction. 1378213197Smm/// 1379213197Smm/// \param __V 1380213197Smm/// A 128-bit vector of [16 x i8]. The lower eight 8-bit elements are zero- 1381213197Smm/// extended to 16-bit values. 1382213197Smm/// \returns A 128-bit vector of [8 x i16] containing the zero-extended values. 1383213197Smmstatic __inline__ __m128i __DEFAULT_FN_ATTRS 1384213197Smm_mm_cvtepu8_epi16(__m128i __V) 1385213197Smm{ 1386213197Smm return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8hi); 1387213197Smm} 1388213197Smm 1389213197Smm/// \brief Zero-extends each of the lower four 8-bit integer elements of a 1390213197Smm/// 128-bit vector of [16 x i8] to 32-bit values and returns them in a 1391213197Smm/// 128-bit vector of [4 x i32]. The upper twelve elements of the input 1392213197Smm/// vector are unused. 1393213197Smm/// 1394213197Smm/// \headerfile <x86intrin.h> 1395213197Smm/// 1396213197Smm/// This intrinsic corresponds to the <c> VPMOVZXBD / PMOVZXBD </c> instruction. 1397213197Smm/// 1398213197Smm/// \param __V 1399213197Smm/// A 128-bit vector of [16 x i8]. The lower four 8-bit elements are zero- 1400213197Smm/// extended to 32-bit values. 1401213197Smm/// \returns A 128-bit vector of [4 x i32] containing the zero-extended values. 1402219089Spjdstatic __inline__ __m128i __DEFAULT_FN_ATTRS 1403219089Spjd_mm_cvtepu8_epi32(__m128i __V) 1404219089Spjd{ 1405219089Spjd return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4si); 1406219089Spjd} 1407219089Spjd 1408219089Spjd/// \brief Zero-extends each of the lower two 8-bit integer elements of a 1409219089Spjd/// 128-bit integer vector of [16 x i8] to 64-bit values and returns them in 1410213197Smm/// a 128-bit vector of [2 x i64]. The upper fourteen elements of the input 1411213197Smm/// vector are unused. 1412213197Smm/// 1413213197Smm/// \headerfile <x86intrin.h> 1414254591Sgibbs/// 1415254591Sgibbs/// This intrinsic corresponds to the <c> VPMOVZXBQ / PMOVZXBQ </c> instruction. 1416254591Sgibbs/// 1417254591Sgibbs/// \param __V 1418254591Sgibbs/// A 128-bit vector of [16 x i8]. The lower two 8-bit elements are zero- 1419254591Sgibbs/// extended to 64-bit values. 1420213197Smm/// \returns A 128-bit vector of [2 x i64] containing the zero-extended values. 1421213197Smmstatic __inline__ __m128i __DEFAULT_FN_ATTRS 1422219089Spjd_mm_cvtepu8_epi64(__m128i __V) 1423219089Spjd{ 1424219089Spjd return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1), __v2di); 1425219089Spjd} 1426219089Spjd 1427219089Spjd/// \brief Zero-extends each of the lower four 16-bit integer elements of a 1428219089Spjd/// 128-bit integer vector of [8 x i16] to 32-bit values and returns them in 1429219089Spjd/// a 128-bit vector of [4 x i32]. The upper four elements of the input 1430219089Spjd/// vector are unused. 1431219089Spjd/// 1432213197Smm/// \headerfile <x86intrin.h> 1433213197Smm/// 1434213197Smm/// This intrinsic corresponds to the <c> VPMOVZXWD / PMOVZXWD </c> instruction. 1435219089Spjd/// 1436213197Smm/// \param __V 1437219089Spjd/// A 128-bit vector of [8 x i16]. The lower four 16-bit elements are zero- 1438213197Smm/// extended to 32-bit values. 1439219089Spjd/// \returns A 128-bit vector of [4 x i32] containing the zero-extended values. 1440219089Spjdstatic __inline__ __m128i __DEFAULT_FN_ATTRS 1441219089Spjd_mm_cvtepu16_epi32(__m128i __V) 1442219089Spjd{ 1443213197Smm return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4si); 1444219089Spjd} 1445213197Smm 1446213197Smm/// \brief Zero-extends each of the lower two 16-bit integer elements of a 1447213197Smm/// 128-bit integer vector of [8 x i16] to 64-bit values and returns them in 1448213197Smm/// a 128-bit vector of [2 x i64]. The upper six elements of the input vector 1449213197Smm/// are unused. 1450213197Smm/// 1451213197Smm/// \headerfile <x86intrin.h> 1452213197Smm/// 1453168404Spjd/// This intrinsic corresponds to the <c> VPMOVZXWQ / PMOVZXWQ </c> instruction. 1454168404Spjd/// 1455168404Spjd/// \param __V 1456168404Spjd/// A 128-bit vector of [8 x i16]. The lower two 16-bit elements are zero- 1457213197Smm/// extended to 64-bit values. 1458168404Spjd/// \returns A 128-bit vector of [2 x i64] containing the zero-extended values. 1459168404Spjdstatic __inline__ __m128i __DEFAULT_FN_ATTRS 1460168404Spjd_mm_cvtepu16_epi64(__m128i __V) 1461168404Spjd{ 1462168404Spjd return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1), __v2di); 1463168404Spjd} 1464168404Spjd 1465219089Spjd/// \brief Zero-extends each of the lower two 32-bit integer elements of a 1466219089Spjd/// 128-bit integer vector of [4 x i32] to 64-bit values and returns them in 1467168404Spjd/// a 128-bit vector of [2 x i64]. The upper two elements of the input vector 1468168404Spjd/// are unused. 1469219089Spjd/// 1470168404Spjd/// \headerfile <x86intrin.h> 1471168404Spjd/// 1472168404Spjd/// This intrinsic corresponds to the <c> VPMOVZXDQ / PMOVZXDQ </c> instruction. 1473185029Spjd/// 1474168404Spjd/// \param __V 1475168404Spjd/// A 128-bit vector of [4 x i32]. The lower two 32-bit elements are zero- 1476185029Spjd/// extended to 64-bit values. 1477168404Spjd/// \returns A 128-bit vector of [2 x i64] containing the zero-extended values. 1478168404Spjdstatic __inline__ __m128i __DEFAULT_FN_ATTRS 1479168404Spjd_mm_cvtepu32_epi64(__m128i __V) 1480168404Spjd{ 1481168404Spjd return (__m128i)__builtin_convertvector(__builtin_shufflevector((__v4su)__V, (__v4su)__V, 0, 1), __v2di); 1482168404Spjd} 1483168404Spjd 1484168404Spjd/* SSE4 Pack with Unsigned Saturation. */ 1485168404Spjd/// \brief Converts 32-bit signed integers from both 128-bit integer vector 1486168404Spjd/// operands into 16-bit unsigned integers, and returns the packed result. 1487168404Spjd/// Values greater than 0xFFFF are saturated to 0xFFFF. Values less than 1488168404Spjd/// 0x0000 are saturated to 0x0000. 1489168404Spjd/// 1490168404Spjd/// \headerfile <x86intrin.h> 1491168404Spjd/// 1492168404Spjd/// This intrinsic corresponds to the <c> VPACKUSDW / PACKUSDW </c> instruction. 1493168404Spjd/// 1494168404Spjd/// \param __V1 1495236884Smm/// A 128-bit vector of [4 x i32]. Each 32-bit element is treated as a 1496236884Smm/// signed integer and is converted to a 16-bit unsigned integer with 1497236884Smm/// saturation. Values greater than 0xFFFF are saturated to 0xFFFF. Values 1498236884Smm/// less than 0x0000 are saturated to 0x0000. The converted [4 x i16] values 1499185029Spjd/// are written to the lower 64 bits of the result. 1500185029Spjd/// \param __V2 1501185029Spjd/// A 128-bit vector of [4 x i32]. Each 32-bit element is treated as a 1502185029Spjd/// signed integer and is converted to a 16-bit unsigned integer with 1503168404Spjd/// saturation. Values greater than 0xFFFF are saturated to 0xFFFF. Values 1504168404Spjd/// less than 0x0000 are saturated to 0x0000. The converted [4 x i16] values 1505168404Spjd/// are written to the higher 64 bits of the result. 1506168404Spjd/// \returns A 128-bit vector of [8 x i16] containing the converted values. 1507168404Spjdstatic __inline__ __m128i __DEFAULT_FN_ATTRS 1508168404Spjd_mm_packus_epi32(__m128i __V1, __m128i __V2) 1509168404Spjd{ 1510168404Spjd return (__m128i) __builtin_ia32_packusdw128((__v4si)__V1, (__v4si)__V2); 1511168404Spjd} 1512168404Spjd 1513168404Spjd/* SSE4 Multiple Packed Sums of Absolute Difference. */ 1514168404Spjd/// \brief Subtracts 8-bit unsigned integer values and computes the absolute 1515185029Spjd/// values of the differences to the corresponding bits in the destination. 1516185029Spjd/// Then sums of the absolute differences are returned according to the bit 1517185029Spjd/// fields in the immediate operand. 1518185029Spjd/// 1519213197Smm/// \headerfile <x86intrin.h> 1520185029Spjd/// 1521185029Spjd/// \code 1522219089Spjd/// __m128i _mm_mpsadbw_epu8(__m128i X, __m128i Y, const int M); 1523213197Smm/// \endcode 1524168404Spjd/// 1525168404Spjd/// This intrinsic corresponds to the <c> VMPSADBW / MPSADBW </c> instruction. 1526168404Spjd/// 1527185029Spjd/// \param X 1528185029Spjd/// A 128-bit vector of [16 x i8]. 1529185029Spjd/// \param Y 1530185029Spjd/// A 128-bit vector of [16 x i8]. 1531219089Spjd/// \param M 1532185029Spjd/// An 8-bit immediate operand specifying how the absolute differences are to 1533185029Spjd/// be calculated, according to the following algorithm: 1534185029Spjd/// \code 1535185029Spjd/// // M2 represents bit 2 of the immediate operand 1536185029Spjd/// // M10 represents bits [1:0] of the immediate operand 1537168404Spjd/// i = M2 * 4 1538185029Spjd/// j = M10 * 4 1539185029Spjd/// for (k = 0; k < 8; k = k + 1) { 1540185029Spjd/// d0 = abs(X[i + k + 0] - Y[j + 0]) 1541219089Spjd/// d1 = abs(X[i + k + 1] - Y[j + 1]) 1542185029Spjd/// d2 = abs(X[i + k + 2] - Y[j + 2]) 1543185029Spjd/// d3 = abs(X[i + k + 3] - Y[j + 3]) 1544185029Spjd/// r[k] = d0 + d1 + d2 + d3 1545168404Spjd/// } 1546168404Spjd/// \endcode 1547168404Spjd/// \returns A 128-bit integer vector containing the sums of the sets of 1548168404Spjd/// absolute differences between both operands. 1549213197Smm#define _mm_mpsadbw_epu8(X, Y, M) __extension__ ({ \ 1550213197Smm (__m128i) __builtin_ia32_mpsadbw128((__v16qi)(__m128i)(X), \ 1551213197Smm (__v16qi)(__m128i)(Y), (M)); }) 1552213197Smm 1553213197Smm/// \brief Finds the minimum unsigned 16-bit element in the input 128-bit 1554213197Smm/// vector of [8 x u16] and returns it and along with its index. 1555213197Smm/// 1556213197Smm/// \headerfile <x86intrin.h> 1557213197Smm/// 1558213197Smm/// This intrinsic corresponds to the <c> VPHMINPOSUW / PHMINPOSUW </c> 1559213197Smm/// instruction. 1560213197Smm/// 1561213197Smm/// \param __V 1562213197Smm/// A 128-bit vector of [8 x u16]. 1563213197Smm/// \returns A 128-bit value where bits [15:0] contain the minimum value found 1564213197Smm/// in parameter \a __V, bits [18:16] contain the index of the minimum value 1565213197Smm/// and the remaining bits are set to 0. 1566213197Smmstatic __inline__ __m128i __DEFAULT_FN_ATTRS 1567213197Smm_mm_minpos_epu16(__m128i __V) 1568213197Smm{ 1569213197Smm return (__m128i) __builtin_ia32_phminposuw128((__v8hi)__V); 1570213197Smm} 1571213197Smm 1572213197Smm/* Handle the sse4.2 definitions here. */ 1573213197Smm 1574213197Smm/* These definitions are normally in nmmintrin.h, but gcc puts them in here 1575213197Smm so we'll do the same. */ 1576219089Spjd 1577213197Smm#undef __DEFAULT_FN_ATTRS 1578213197Smm#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4.2"))) 1579213197Smm 1580213197Smm/* These specify the type of data that we're comparing. */ 1581213197Smm#define _SIDD_UBYTE_OPS 0x00 1582213197Smm#define _SIDD_UWORD_OPS 0x01 1583213197Smm#define _SIDD_SBYTE_OPS 0x02 1584213197Smm#define _SIDD_SWORD_OPS 0x03 1585219089Spjd 1586213197Smm/* These specify the type of comparison operation. */ 1587168404Spjd#define _SIDD_CMP_EQUAL_ANY 0x00 1588168404Spjd#define _SIDD_CMP_RANGES 0x04 1589168404Spjd#define _SIDD_CMP_EQUAL_EACH 0x08 1590168404Spjd#define _SIDD_CMP_EQUAL_ORDERED 0x0c 1591168404Spjd 1592168404Spjd/* These macros specify the polarity of the operation. */ 1593168404Spjd#define _SIDD_POSITIVE_POLARITY 0x00 1594168404Spjd#define _SIDD_NEGATIVE_POLARITY 0x10 1595168404Spjd#define _SIDD_MASKED_POSITIVE_POLARITY 0x20 1596168404Spjd#define _SIDD_MASKED_NEGATIVE_POLARITY 0x30 1597168404Spjd 1598168404Spjd/* These macros are used in _mm_cmpXstri() to specify the return. */ 1599168404Spjd#define _SIDD_LEAST_SIGNIFICANT 0x00 1600168404Spjd#define _SIDD_MOST_SIGNIFICANT 0x40 1601168404Spjd 1602228103Smm/* These macros are used in _mm_cmpXstri() to specify the return. */ 1603168404Spjd#define _SIDD_BIT_MASK 0x00 1604168404Spjd#define _SIDD_UNIT_MASK 0x40 1605168404Spjd 1606168404Spjd/* SSE4.2 Packed Comparison Intrinsics. */ 1607168404Spjd/// \brief Uses the immediate operand \a M to perform a comparison of string 1608168404Spjd/// data with implicitly defined lengths that is contained in source operands 1609168404Spjd/// \a A and \a B. Returns a 128-bit integer vector representing the result 1610168404Spjd/// mask of the comparison. 1611168404Spjd/// 1612168404Spjd/// \headerfile <x86intrin.h> 1613219089Spjd/// 1614168404Spjd/// \code 1615185029Spjd/// __m128i _mm_cmpistrm(__m128i A, __m128i B, const int M); 1616168404Spjd/// \endcode 1617168404Spjd/// 1618168404Spjd/// This intrinsic corresponds to the <c> VPCMPISTRM / PCMPISTRM </c> 1619228103Smm/// instruction. 1620228103Smm/// 1621228103Smm/// \param A 1622168404Spjd/// A 128-bit integer vector containing one of the source operands to be 1623168404Spjd/// compared. 1624168404Spjd/// \param B 1625168404Spjd/// A 128-bit integer vector containing one of the source operands to be 1626168404Spjd/// compared. 1627168404Spjd/// \param M 1628168404Spjd/// An 8-bit immediate operand specifying whether the characters are bytes or 1629168404Spjd/// words, the type of comparison to perform, and the format of the return 1630228103Smm/// value. \n 1631228103Smm/// Bits [1:0]: Determine source data format. \n 1632168404Spjd/// 00: 16 unsigned bytes \n 1633168404Spjd/// 01: 8 unsigned words \n 1634168404Spjd/// 10: 16 signed bytes \n 1635168404Spjd/// 11: 8 signed words \n 1636228103Smm/// Bits [3:2]: Determine comparison type and aggregation method. \n 1637168404Spjd/// 00: Subset: Each character in \a B is compared for equality with all 1638168404Spjd/// the characters in \a A. \n 1639168404Spjd/// 01: Ranges: Each character in \a B is compared to \a A. The comparison 1640168404Spjd/// basis is greater than or equal for even-indexed elements in \a A, 1641228103Smm/// and less than or equal for odd-indexed elements in \a A. \n 1642228103Smm/// 10: Match: Compare each pair of corresponding characters in \a A and 1643168404Spjd/// \a B for equality. \n 1644168404Spjd/// 11: Substring: Search \a B for substring matches of \a A. \n 1645168404Spjd/// Bits [5:4]: Determine whether to perform a one's complement on the bit 1646228103Smm/// mask of the comparison results. \n 1647168404Spjd/// 00: No effect. \n 1648168404Spjd/// 01: Negate the bit mask. \n 1649168404Spjd/// 10: No effect. \n 1650168404Spjd/// 11: Negate the bit mask only for bits with an index less than or equal 1651228103Smm/// to the size of \a A or \a B. \n 1652168404Spjd/// Bit [6]: Determines whether the result is zero-extended or expanded to 16 1653168404Spjd/// bytes. \n 1654168404Spjd/// 0: The result is zero-extended to 16 bytes. \n 1655168404Spjd/// 1: The result is expanded to 16 bytes (this expansion is performed by 1656238926Smm/// repeating each bit 8 or 16 times). 1657238926Smm/// \returns Returns a 128-bit integer vector representing the result mask of 1658168404Spjd/// the comparison. 1659168404Spjd#define _mm_cmpistrm(A, B, M) \ 1660168404Spjd (__m128i)__builtin_ia32_pcmpistrm128((__v16qi)(__m128i)(A), \ 1661228103Smm (__v16qi)(__m128i)(B), (int)(M)) 1662168404Spjd 1663168404Spjd/// \brief Uses the immediate operand \a M to perform a comparison of string 1664168404Spjd/// data with implicitly defined lengths that is contained in source operands 1665238926Smm/// \a A and \a B. Returns an integer representing the result index of the 1666238926Smm/// comparison. 1667238926Smm/// 1668238926Smm/// \headerfile <x86intrin.h> 1669238926Smm/// 1670236884Smm/// \code 1671236884Smm/// int _mm_cmpistri(__m128i A, __m128i B, const int M); 1672236884Smm/// \endcode 1673236884Smm/// 1674236884Smm/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c> 1675236884Smm/// instruction. 1676236884Smm/// 1677236884Smm/// \param A 1678236884Smm/// A 128-bit integer vector containing one of the source operands to be 1679236884Smm/// compared. 1680236884Smm/// \param B 1681236884Smm/// A 128-bit integer vector containing one of the source operands to be 1682236884Smm/// compared. 1683236884Smm/// \param M 1684168498Spjd/// An 8-bit immediate operand specifying whether the characters are bytes or 1685228103Smm/// words, the type of comparison to perform, and the format of the return 1686168498Spjd/// value. \n 1687168498Spjd/// Bits [1:0]: Determine source data format. \n 1688185029Spjd/// 00: 16 unsigned bytes \n 1689185029Spjd/// 01: 8 unsigned words \n 1690185029Spjd/// 10: 16 signed bytes \n 1691228103Smm/// 11: 8 signed words \n 1692185029Spjd/// Bits [3:2]: Determine comparison type and aggregation method. \n 1693185029Spjd/// 00: Subset: Each character in \a B is compared for equality with all 1694185029Spjd/// the characters in \a A. \n 1695185029Spjd/// 01: Ranges: Each character in \a B is compared to \a A. The comparison 1696228103Smm/// basis is greater than or equal for even-indexed elements in \a A, 1697185029Spjd/// and less than or equal for odd-indexed elements in \a A. \n 1698185029Spjd/// 10: Match: Compare each pair of corresponding characters in \a A and 1699185029Spjd/// \a B for equality. \n 1700219089Spjd/// 11: Substring: Search B for substring matches of \a A. \n 1701228103Smm/// Bits [5:4]: Determine whether to perform a one's complement on the bit 1702219089Spjd/// mask of the comparison results. \n 1703219089Spjd/// 00: No effect. \n 1704219089Spjd/// 01: Negate the bit mask. \n 1705259131Sdelphij/// 10: No effect. \n 1706259131Sdelphij/// 11: Negate the bit mask only for bits with an index less than or equal 1707259131Sdelphij/// to the size of \a A or \a B. \n 1708259131Sdelphij/// Bit [6]: Determines whether the index of the lowest set bit or the 1709259131Sdelphij/// highest set bit is returned. \n 1710259131Sdelphij/// 0: The index of the least significant set bit. \n 1711168404Spjd/// 1: The index of the most significant set bit. \n 1712168404Spjd/// \returns Returns an integer representing the result index of the comparison. 1713168404Spjd#define _mm_cmpistri(A, B, M) \ 1714168404Spjd (int)__builtin_ia32_pcmpistri128((__v16qi)(__m128i)(A), \ 1715168404Spjd (__v16qi)(__m128i)(B), (int)(M)) 1716168404Spjd 1717168404Spjd/// \brief Uses the immediate operand \a M to perform a comparison of string 1718168404Spjd/// data with explicitly defined lengths that is contained in source operands 1719168404Spjd/// \a A and \a B. Returns a 128-bit integer vector representing the result 1720168404Spjd/// mask of the comparison. 1721168404Spjd/// 1722238926Smm/// \headerfile <x86intrin.h> 1723238926Smm/// 1724228103Smm/// \code 1725168404Spjd/// __m128i _mm_cmpestrm(__m128i A, int LA, __m128i B, int LB, const int M); 1726168404Spjd/// \endcode 1727168404Spjd/// 1728238926Smm/// This intrinsic corresponds to the <c> VPCMPESTRM / PCMPESTRM </c> 1729228103Smm/// instruction. 1730168498Spjd/// 1731168498Spjd/// \param A 1732238926Smm/// A 128-bit integer vector containing one of the source operands to be 1733228103Smm/// compared. 1734168404Spjd/// \param LA 1735168404Spjd/// An integer that specifies the length of the string in \a A. 1736238926Smm/// \param B 1737168404Spjd/// A 128-bit integer vector containing one of the source operands to be 1738228103Smm/// compared. 1739168404Spjd/// \param LB 1740168404Spjd/// An integer that specifies the length of the string in \a B. 1741168404Spjd/// \param M 1742168404Spjd/// An 8-bit immediate operand specifying whether the characters are bytes or 1743168404Spjd/// words, the type of comparison to perform, and the format of the return 1744228103Smm/// value. \n 1745168404Spjd/// Bits [1:0]: Determine source data format. \n 1746168404Spjd/// 00: 16 unsigned bytes \n 1747168404Spjd/// 01: 8 unsigned words \n 1748168404Spjd/// 10: 16 signed bytes \n 1749236884Smm/// 11: 8 signed words \n 1750236884Smm/// Bits [3:2]: Determine comparison type and aggregation method. \n 1751236884Smm/// 00: Subset: Each character in \a B is compared for equality with all 1752236884Smm/// the characters in \a A. \n 1753236884Smm/// 01: Ranges: Each character in \a B is compared to \a A. The comparison 1754236884Smm/// basis is greater than or equal for even-indexed elements in \a A, 1755236884Smm/// and less than or equal for odd-indexed elements in \a A. \n 1756236884Smm/// 10: Match: Compare each pair of corresponding characters in \a A and 1757236884Smm/// \a B for equality. \n 1758236884Smm/// 11: Substring: Search \a B for substring matches of \a A. \n 1759236884Smm/// Bits [5:4]: Determine whether to perform a one's complement on the bit 1760236884Smm/// mask of the comparison results. \n 1761236884Smm/// 00: No effect. \n 1762236884Smm/// 01: Negate the bit mask. \n 1763168404Spjd/// 10: No effect. \n 1764168404Spjd/// 11: Negate the bit mask only for bits with an index less than or equal 1765168404Spjd/// to the size of \a A or \a B. \n 1766228103Smm/// Bit [6]: Determines whether the result is zero-extended or expanded to 16 1767168404Spjd/// bytes. \n 1768168404Spjd/// 0: The result is zero-extended to 16 bytes. \n 1769168404Spjd/// 1: The result is expanded to 16 bytes (this expansion is performed by 1770168404Spjd/// repeating each bit 8 or 16 times). \n 1771228103Smm/// \returns Returns a 128-bit integer vector representing the result mask of 1772168404Spjd/// the comparison. 1773168404Spjd#define _mm_cmpestrm(A, LA, B, LB, M) \ 1774168404Spjd (__m128i)__builtin_ia32_pcmpestrm128((__v16qi)(__m128i)(A), (int)(LA), \ 1775168404Spjd (__v16qi)(__m128i)(B), (int)(LB), \ 1776228103Smm (int)(M)) 1777228103Smm 1778228103Smm/// \brief Uses the immediate operand \a M to perform a comparison of string 1779228103Smm/// data with explicitly defined lengths that is contained in source operands 1780168404Spjd/// \a A and \a B. Returns an integer representing the result index of the 1781168404Spjd/// comparison. 1782168404Spjd/// 1783168404Spjd/// \headerfile <x86intrin.h> 1784168404Spjd/// 1785168404Spjd/// \code 1786168404Spjd/// int _mm_cmpestri(__m128i A, int LA, __m128i B, int LB, const int M); 1787168404Spjd/// \endcode 1788168404Spjd/// 1789168404Spjd/// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c> 1790168404Spjd/// instruction. 1791168404Spjd/// 1792185029Spjd/// \param A 1793168404Spjd/// A 128-bit integer vector containing one of the source operands to be 1794168404Spjd/// compared. 1795168404Spjd/// \param LA 1796168404Spjd/// An integer that specifies the length of the string in \a A. 1797236146Smm/// \param B 1798168404Spjd/// A 128-bit integer vector containing one of the source operands to be 1799168404Spjd/// compared. 1800228103Smm/// \param LB 1801168404Spjd/// An integer that specifies the length of the string in \a B. 1802168404Spjd/// \param M 1803168404Spjd/// An 8-bit immediate operand specifying whether the characters are bytes or 1804168404Spjd/// words, the type of comparison to perform, and the format of the return 1805168404Spjd/// value. \n 1806213197Smm/// Bits [1:0]: Determine source data format. \n 1807213197Smm/// 00: 16 unsigned bytes \n 1808213197Smm/// 01: 8 unsigned words \n 1809185029Spjd/// 10: 16 signed bytes \n 1810168404Spjd/// 11: 8 signed words \n 1811168404Spjd/// Bits [3:2]: Determine comparison type and aggregation method. \n 1812168404Spjd/// 00: Subset: Each character in \a B is compared for equality with all 1813168404Spjd/// the characters in \a A. \n 1814168404Spjd/// 01: Ranges: Each character in \a B is compared to \a A. The comparison 1815168404Spjd/// basis is greater than or equal for even-indexed elements in \a A, 1816168404Spjd/// and less than or equal for odd-indexed elements in \a A. \n 1817168404Spjd/// 10: Match: Compare each pair of corresponding characters in \a A and 1818168404Spjd/// \a B for equality. \n 1819185029Spjd/// 11: Substring: Search B for substring matches of \a A. \n 1820185029Spjd/// Bits [5:4]: Determine whether to perform a one's complement on the bit 1821168404Spjd/// mask of the comparison results. \n 1822168404Spjd/// 00: No effect. \n 1823168404Spjd/// 01: Negate the bit mask. \n 1824219089Spjd/// 10: No effect. \n 1825168404Spjd/// 11: Negate the bit mask only for bits with an index less than or equal 1826168404Spjd/// to the size of \a A or \a B. \n 1827168404Spjd/// Bit [6]: Determines whether the index of the lowest set bit or the 1828168404Spjd/// highest set bit is returned. \n 1829168404Spjd/// 0: The index of the least significant set bit. \n 1830168404Spjd/// 1: The index of the most significant set bit. \n 1831168404Spjd/// \returns Returns an integer representing the result index of the comparison. 1832168404Spjd#define _mm_cmpestri(A, LA, B, LB, M) \ 1833168404Spjd (int)__builtin_ia32_pcmpestri128((__v16qi)(__m128i)(A), (int)(LA), \ 1834168404Spjd (__v16qi)(__m128i)(B), (int)(LB), \ 1835168404Spjd (int)(M)) 1836168404Spjd 1837168404Spjd/* SSE4.2 Packed Comparison Intrinsics and EFlag Reading. */ 1838236884Smm/// \brief Uses the immediate operand \a M to perform a comparison of string 1839168404Spjd/// data with implicitly defined lengths that is contained in source operands 1840236884Smm/// \a A and \a B. Returns 1 if the bit mask is zero and the length of the 1841168404Spjd/// string in \a B is the maximum, otherwise, returns 0. 1842219089Spjd/// 1843219089Spjd/// \headerfile <x86intrin.h> 1844168498Spjd/// 1845168498Spjd/// \code 1846168498Spjd/// int _mm_cmpistra(__m128i A, __m128i B, const int M); 1847168498Spjd/// \endcode 1848168498Spjd/// 1849168498Spjd/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c> 1850168498Spjd/// instruction. 1851168498Spjd/// 1852168498Spjd/// \param A 1853168498Spjd/// A 128-bit integer vector containing one of the source operands to be 1854168498Spjd/// compared. 1855168498Spjd/// \param B 1856168498Spjd/// A 128-bit integer vector containing one of the source operands to be 1857168498Spjd/// compared. 1858168498Spjd/// \param M 1859168498Spjd/// An 8-bit immediate operand specifying whether the characters are bytes or 1860168498Spjd/// words and the type of comparison to perform. \n 1861168498Spjd/// Bits [1:0]: Determine source data format. \n 1862168498Spjd/// 00: 16 unsigned bytes \n 1863168498Spjd/// 01: 8 unsigned words \n 1864168498Spjd/// 10: 16 signed bytes \n 1865168498Spjd/// 11: 8 signed words \n 1866168498Spjd/// Bits [3:2]: Determine comparison type and aggregation method. \n 1867168498Spjd/// 00: Subset: Each character in \a B is compared for equality with all 1868168498Spjd/// the characters in \a A. \n 1869168498Spjd/// 01: Ranges: Each character in \a B is compared to \a A. The comparison 1870168498Spjd/// basis is greater than or equal for even-indexed elements in \a A, 1871168498Spjd/// and less than or equal for odd-indexed elements in \a A. \n 1872168498Spjd/// 10: Match: Compare each pair of corresponding characters in \a A and 1873168498Spjd/// \a B for equality. \n 1874168498Spjd/// 11: Substring: Search \a B for substring matches of \a A. \n 1875168404Spjd/// Bits [5:4]: Determine whether to perform a one's complement on the bit 1876168404Spjd/// mask of the comparison results. \n 1877219089Spjd/// 00: No effect. \n 1878168404Spjd/// 01: Negate the bit mask. \n 1879168404Spjd/// 10: No effect. \n 1880168404Spjd/// 11: Negate the bit mask only for bits with an index less than or equal 1881168404Spjd/// to the size of \a A or \a B. \n 1882168404Spjd/// \returns Returns 1 if the bit mask is zero and the length of the string in 1883209962Smm/// \a B is the maximum; otherwise, returns 0. 1884209962Smm#define _mm_cmpistra(A, B, M) \ 1885168404Spjd (int)__builtin_ia32_pcmpistria128((__v16qi)(__m128i)(A), \ 1886209962Smm (__v16qi)(__m128i)(B), (int)(M)) 1887219089Spjd 1888209962Smm/// \brief Uses the immediate operand \a M to perform a comparison of string 1889168404Spjd/// data with implicitly defined lengths that is contained in source operands 1890168404Spjd/// \a A and \a B. Returns 1 if the bit mask is non-zero, otherwise, returns 1891168404Spjd/// 0. 1892168404Spjd/// 1893168404Spjd/// \headerfile <x86intrin.h> 1894219089Spjd/// 1895168404Spjd/// \code 1896168404Spjd/// int _mm_cmpistrc(__m128i A, __m128i B, const int M); 1897168404Spjd/// \endcode 1898168404Spjd/// 1899185029Spjd/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c> 1900185029Spjd/// instruction. 1901185029Spjd/// 1902219089Spjd/// \param A 1903168404Spjd/// A 128-bit integer vector containing one of the source operands to be 1904185029Spjd/// compared. 1905185029Spjd/// \param B 1906185029Spjd/// A 128-bit integer vector containing one of the source operands to be 1907168404Spjd/// compared. 1908168404Spjd/// \param M 1909168404Spjd/// An 8-bit immediate operand specifying whether the characters are bytes or 1910168404Spjd/// words and the type of comparison to perform. \n 1911168404Spjd/// Bits [1:0]: Determine source data format. \n 1912168404Spjd/// 00: 16 unsigned bytes \n 1913168404Spjd/// 01: 8 unsigned words \n 1914168404Spjd/// 10: 16 signed bytes \n 1915168404Spjd/// 11: 8 signed words \n 1916168404Spjd/// Bits [3:2]: Determine comparison type and aggregation method. \n 1917219089Spjd/// 00: Subset: Each character in \a B is compared for equality with all 1918185029Spjd/// the characters in \a A. \n 1919185029Spjd/// 01: Ranges: Each character in \a B is compared to \a A. The comparison 1920209962Smm/// basis is greater than or equal for even-indexed elements in \a A, 1921209962Smm/// and less than or equal for odd-indexed elements in \a A. \n 1922185029Spjd/// 10: Match: Compare each pair of corresponding characters in \a A and 1923219089Spjd/// \a B for equality. \n 1924219089Spjd/// 11: Substring: Search B for substring matches of \a A. \n 1925219089Spjd/// Bits [5:4]: Determine whether to perform a one's complement on the bit 1926219089Spjd/// mask of the comparison results. \n 1927219089Spjd/// 00: No effect. \n 1928219089Spjd/// 01: Negate the bit mask. \n 1929219089Spjd/// 10: No effect. \n 1930219089Spjd/// 11: Negate the bit mask only for bits with an index less than or equal 1931219089Spjd/// to the size of \a A or \a B. 1932219089Spjd/// \returns Returns 1 if the bit mask is non-zero, otherwise, returns 0. 1933219089Spjd#define _mm_cmpistrc(A, B, M) \ 1934168404Spjd (int)__builtin_ia32_pcmpistric128((__v16qi)(__m128i)(A), \ 1935168404Spjd (__v16qi)(__m128i)(B), (int)(M)) 1936185029Spjd 1937185029Spjd/// \brief Uses the immediate operand \a M to perform a comparison of string 1938168404Spjd/// data with implicitly defined lengths that is contained in source operands 1939168404Spjd/// \a A and \a B. Returns bit 0 of the resulting bit mask. 1940168404Spjd/// 1941168404Spjd/// \headerfile <x86intrin.h> 1942168404Spjd/// 1943168404Spjd/// \code 1944168404Spjd/// int _mm_cmpistro(__m128i A, __m128i B, const int M); 1945168404Spjd/// \endcode 1946168404Spjd/// 1947219089Spjd/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c> 1948185029Spjd/// instruction. 1949168404Spjd/// 1950168404Spjd/// \param A 1951168404Spjd/// A 128-bit integer vector containing one of the source operands to be 1952168404Spjd/// compared. 1953168404Spjd/// \param B 1954185029Spjd/// A 128-bit integer vector containing one of the source operands to be 1955185029Spjd/// compared. 1956185029Spjd/// \param M 1957168404Spjd/// An 8-bit immediate operand specifying whether the characters are bytes or 1958219089Spjd/// words and the type of comparison to perform. \n 1959185029Spjd/// Bits [1:0]: Determine source data format. \n 1960168404Spjd/// 00: 16 unsigned bytes \n 1961219089Spjd/// 01: 8 unsigned words \n 1962219089Spjd/// 10: 16 signed bytes \n 1963219089Spjd/// 11: 8 signed words \n 1964219089Spjd/// Bits [3:2]: Determine comparison type and aggregation method. \n 1965219089Spjd/// 00: Subset: Each character in \a B is compared for equality with all 1966219089Spjd/// the characters in \a A. \n 1967185029Spjd/// 01: Ranges: Each character in \a B is compared to \a A. The comparison 1968219089Spjd/// basis is greater than or equal for even-indexed elements in \a A, 1969219089Spjd/// and less than or equal for odd-indexed elements in \a A. \n 1970168404Spjd/// 10: Match: Compare each pair of corresponding characters in \a A and 1971168404Spjd/// \a B for equality. \n 1972263385Sdelphij/// 11: Substring: Search B for substring matches of \a A. \n 1973168404Spjd/// Bits [5:4]: Determine whether to perform a one's complement on the bit 1974168404Spjd/// mask of the comparison results. \n 1975168404Spjd/// 00: No effect. \n 1976168404Spjd/// 01: Negate the bit mask. \n 1977185029Spjd/// 10: No effect. \n 1978185029Spjd/// 11: Negate the bit mask only for bits with an index less than or equal 1979185029Spjd/// to the size of \a A or \a B. \n 1980168404Spjd/// \returns Returns bit 0 of the resulting bit mask. 1981168404Spjd#define _mm_cmpistro(A, B, M) \ 1982168404Spjd (int)__builtin_ia32_pcmpistrio128((__v16qi)(__m128i)(A), \ 1983168404Spjd (__v16qi)(__m128i)(B), (int)(M)) 1984168404Spjd 1985168404Spjd/// \brief Uses the immediate operand \a M to perform a comparison of string 1986168404Spjd/// data with implicitly defined lengths that is contained in source operands 1987168404Spjd/// \a A and \a B. Returns 1 if the length of the string in \a A is less than 1988168404Spjd/// the maximum, otherwise, returns 0. 1989168404Spjd/// 1990168404Spjd/// \headerfile <x86intrin.h> 1991168404Spjd/// 1992168404Spjd/// \code 1993168404Spjd/// int _mm_cmpistrs(__m128i A, __m128i B, const int M); 1994168404Spjd/// \endcode 1995168404Spjd/// 1996168404Spjd/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c> 1997219089Spjd/// instruction. 1998168404Spjd/// 1999185029Spjd/// \param A 2000219089Spjd/// A 128-bit integer vector containing one of the source operands to be 2001185029Spjd/// compared. 2002219089Spjd/// \param B 2003219089Spjd/// A 128-bit integer vector containing one of the source operands to be 2004219089Spjd/// compared. 2005219089Spjd/// \param M 2006219089Spjd/// An 8-bit immediate operand specifying whether the characters are bytes or 2007219089Spjd/// words and the type of comparison to perform. \n 2008219089Spjd/// Bits [1:0]: Determine source data format. \n 2009219089Spjd/// 00: 16 unsigned bytes \n 2010219089Spjd/// 01: 8 unsigned words \n 2011168404Spjd/// 10: 16 signed bytes \n 2012185029Spjd/// 11: 8 signed words \n 2013185029Spjd/// Bits [3:2]: Determine comparison type and aggregation method. \n 2014185029Spjd/// 00: Subset: Each character in \a B is compared for equality with all 2015185029Spjd/// the characters in \a A. \n 2016185029Spjd/// 01: Ranges: Each character in \a B is compared to \a A. The comparison 2017185029Spjd/// basis is greater than or equal for even-indexed elements in \a A, 2018185029Spjd/// and less than or equal for odd-indexed elements in \a A. \n 2019185029Spjd/// 10: Match: Compare each pair of corresponding characters in \a A and 2020185029Spjd/// \a B for equality. \n 2021168404Spjd/// 11: Substring: Search \a B for substring matches of \a A. \n 2022168404Spjd/// Bits [5:4]: Determine whether to perform a one's complement on the bit 2023185029Spjd/// mask of the comparison results. \n 2024185029Spjd/// 00: No effect. \n 2025185029Spjd/// 01: Negate the bit mask. \n 2026185029Spjd/// 10: No effect. \n 2027185029Spjd/// 11: Negate the bit mask only for bits with an index less than or equal 2028185029Spjd/// to the size of \a A or \a B. \n 2029185029Spjd/// \returns Returns 1 if the length of the string in \a A is less than the 2030185029Spjd/// maximum, otherwise, returns 0. 2031185029Spjd#define _mm_cmpistrs(A, B, M) \ 2032185029Spjd (int)__builtin_ia32_pcmpistris128((__v16qi)(__m128i)(A), \ 2033168404Spjd (__v16qi)(__m128i)(B), (int)(M)) 2034219089Spjd 2035219089Spjd/// \brief Uses the immediate operand \a M to perform a comparison of string 2036268720Sdelphij/// data with implicitly defined lengths that is contained in source operands 2037219089Spjd/// \a A and \a B. Returns 1 if the length of the string in \a B is less than 2038219089Spjd/// the maximum, otherwise, returns 0. 2039219089Spjd/// 2040219089Spjd/// \headerfile <x86intrin.h> 2041219089Spjd/// 2042219089Spjd/// \code 2043219089Spjd/// int _mm_cmpistrz(__m128i A, __m128i B, const int M); 2044219089Spjd/// \endcode 2045219089Spjd/// 2046219089Spjd/// This intrinsic corresponds to the <c> VPCMPISTRI / PCMPISTRI </c> 2047219089Spjd/// instruction. 2048219089Spjd/// 2049219089Spjd/// \param A 2050168404Spjd/// A 128-bit integer vector containing one of the source operands to be 2051168404Spjd/// compared. 2052168404Spjd/// \param B 2053168404Spjd/// A 128-bit integer vector containing one of the source operands to be 2054168404Spjd/// compared. 2055168404Spjd/// \param M 2056168404Spjd/// An 8-bit immediate operand specifying whether the characters are bytes or 2057168404Spjd/// words and the type of comparison to perform. \n 2058168404Spjd/// Bits [1:0]: Determine source data format. \n 2059168404Spjd/// 00: 16 unsigned bytes \n 2060168404Spjd/// 01: 8 unsigned words \n 2061168404Spjd/// 10: 16 signed bytes \n 2062168404Spjd/// 11: 8 signed words \n 2063168404Spjd/// Bits [3:2]: Determine comparison type and aggregation method. \n 2064168404Spjd/// 00: Subset: Each character in \a B is compared for equality with all 2065185029Spjd/// the characters in \a A. \n 2066185029Spjd/// 01: Ranges: Each character in \a B is compared to \a A. The comparison 2067185029Spjd/// basis is greater than or equal for even-indexed elements in \a A, 2068185029Spjd/// and less than or equal for odd-indexed elements in \a A. \n 2069185029Spjd/// 10: Match: Compare each pair of corresponding characters in \a A and 2070219089Spjd/// \a B for equality. \n 2071219089Spjd/// 11: Substring: Search \a B for substring matches of \a A. \n 2072219089Spjd/// Bits [5:4]: Determine whether to perform a one's complement on the bit 2073219089Spjd/// mask of the comparison results. \n 2074219089Spjd/// 00: No effect. \n 2075219089Spjd/// 01: Negate the bit mask. \n 2076219089Spjd/// 10: No effect. \n 2077219089Spjd/// 11: Negate the bit mask only for bits with an index less than or equal 2078219089Spjd/// to the size of \a A or \a B. 2079219089Spjd/// \returns Returns 1 if the length of the string in \a B is less than the 2080219089Spjd/// maximum, otherwise, returns 0. 2081219089Spjd#define _mm_cmpistrz(A, B, M) \ 2082219089Spjd (int)__builtin_ia32_pcmpistriz128((__v16qi)(__m128i)(A), \ 2083219089Spjd (__v16qi)(__m128i)(B), (int)(M)) 2084219089Spjd 2085219089Spjd/// \brief Uses the immediate operand \a M to perform a comparison of string 2086219089Spjd/// data with explicitly defined lengths that is contained in source operands 2087219089Spjd/// \a A and \a B. Returns 1 if the bit mask is zero and the length of the 2088168404Spjd/// string in \a B is the maximum, otherwise, returns 0. 2089168404Spjd/// 2090235478Savg/// \headerfile <x86intrin.h> 2091168404Spjd/// 2092168404Spjd/// \code 2093168404Spjd/// int _mm_cmpestra(__m128i A, int LA, __m128i B, int LB, const int M); 2094168404Spjd/// \endcode 2095168404Spjd/// 2096168404Spjd/// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c> 2097168404Spjd/// instruction. 2098168404Spjd/// 2099168404Spjd/// \param A 2100168404Spjd/// A 128-bit integer vector containing one of the source operands to be 2101168404Spjd/// compared. 2102168404Spjd/// \param LA 2103168404Spjd/// An integer that specifies the length of the string in \a A. 2104168404Spjd/// \param B 2105168404Spjd/// A 128-bit integer vector containing one of the source operands to be 2106168404Spjd/// compared. 2107168404Spjd/// \param LB 2108168404Spjd/// An integer that specifies the length of the string in \a B. 2109168404Spjd/// \param M 2110168404Spjd/// An 8-bit immediate operand specifying whether the characters are bytes or 2111168404Spjd/// words and the type of comparison to perform. \n 2112168404Spjd/// Bits [1:0]: Determine source data format. \n 2113168404Spjd/// 00: 16 unsigned bytes \n 2114168404Spjd/// 01: 8 unsigned words \n 2115219089Spjd/// 10: 16 signed bytes \n 2116168404Spjd/// 11: 8 signed words \n 2117168404Spjd/// Bits [3:2]: Determine comparison type and aggregation method. \n 2118168404Spjd/// 00: Subset: Each character in \a B is compared for equality with all 2119168404Spjd/// the characters in \a A. \n 2120168404Spjd/// 01: Ranges: Each character in \a B is compared to \a A. The comparison 2121168404Spjd/// basis is greater than or equal for even-indexed elements in \a A, 2122168404Spjd/// and less than or equal for odd-indexed elements in \a A. \n 2123168404Spjd/// 10: Match: Compare each pair of corresponding characters in \a A and 2124168404Spjd/// \a B for equality. \n 2125168404Spjd/// 11: Substring: Search \a B for substring matches of \a A. \n 2126168404Spjd/// Bits [5:4]: Determine whether to perform a one's complement on the bit 2127168404Spjd/// mask of the comparison results. \n 2128168404Spjd/// 00: No effect. \n 2129168404Spjd/// 01: Negate the bit mask. \n 2130168404Spjd/// 10: No effect. \n 2131168404Spjd/// 11: Negate the bit mask only for bits with an index less than or equal 2132168404Spjd/// to the size of \a A or \a B. 2133168404Spjd/// \returns Returns 1 if the bit mask is zero and the length of the string in 2134168404Spjd/// \a B is the maximum, otherwise, returns 0. 2135168404Spjd#define _mm_cmpestra(A, LA, B, LB, M) \ 2136168404Spjd (int)__builtin_ia32_pcmpestria128((__v16qi)(__m128i)(A), (int)(LA), \ 2137168404Spjd (__v16qi)(__m128i)(B), (int)(LB), \ 2138254758Sdelphij (int)(M)) 2139168404Spjd 2140254758Sdelphij/// \brief Uses the immediate operand \a M to perform a comparison of string 2141254758Sdelphij/// data with explicitly defined lengths that is contained in source operands 2142168404Spjd/// \a A and \a B. Returns 1 if the resulting mask is non-zero, otherwise, 2143168404Spjd/// returns 0. 2144185029Spjd/// 2145219089Spjd/// \headerfile <x86intrin.h> 2146185029Spjd/// 2147219089Spjd/// \code 2148185029Spjd/// int _mm_cmpestrc(__m128i A, int LA, __m128i B, int LB, const int M); 2149185029Spjd/// \endcode 2150219089Spjd/// 2151219089Spjd/// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c> 2152219089Spjd/// instruction. 2153219089Spjd/// 2154219089Spjd/// \param A 2155219089Spjd/// A 128-bit integer vector containing one of the source operands to be 2156219089Spjd/// compared. 2157219089Spjd/// \param LA 2158219089Spjd/// An integer that specifies the length of the string in \a A. 2159219089Spjd/// \param B 2160219089Spjd/// A 128-bit integer vector containing one of the source operands to be 2161219089Spjd/// compared. 2162219089Spjd/// \param LB 2163219089Spjd/// An integer that specifies the length of the string in \a B. 2164219089Spjd/// \param M 2165219089Spjd/// An 8-bit immediate operand specifying whether the characters are bytes or 2166219089Spjd/// words and the type of comparison to perform. \n 2167219089Spjd/// Bits [1:0]: Determine source data format. \n 2168219089Spjd/// 00: 16 unsigned bytes \n 2169219089Spjd/// 01: 8 unsigned words \n 2170219089Spjd/// 10: 16 signed bytes \n 2171219089Spjd/// 11: 8 signed words \n 2172219089Spjd/// Bits [3:2]: Determine comparison type and aggregation method. \n 2173219089Spjd/// 00: Subset: Each character in \a B is compared for equality with all 2174219089Spjd/// the characters in \a A. \n 2175219089Spjd/// 01: Ranges: Each character in \a B is compared to \a A. The comparison 2176185029Spjd/// basis is greater than or equal for even-indexed elements in \a A, 2177185029Spjd/// and less than or equal for odd-indexed elements in \a A. \n 2178185029Spjd/// 10: Match: Compare each pair of corresponding characters in \a A and 2179185029Spjd/// \a B for equality. \n 2180219089Spjd/// 11: Substring: Search \a B for substring matches of \a A. \n 2181219089Spjd/// Bits [5:4]: Determine whether to perform a one's complement on the bit 2182219089Spjd/// mask of the comparison results. \n 2183219089Spjd/// 00: No effect. \n 2184185029Spjd/// 01: Negate the bit mask. \n 2185219089Spjd/// 10: No effect. \n 2186185029Spjd/// 11: Negate the bit mask only for bits with an index less than or equal 2187185029Spjd/// to the size of \a A or \a B. \n 2188185029Spjd/// \returns Returns 1 if the resulting mask is non-zero, otherwise, returns 0. 2189185029Spjd#define _mm_cmpestrc(A, LA, B, LB, M) \ 2190185029Spjd (int)__builtin_ia32_pcmpestric128((__v16qi)(__m128i)(A), (int)(LA), \ 2191185029Spjd (__v16qi)(__m128i)(B), (int)(LB), \ 2192185029Spjd (int)(M)) 2193185029Spjd 2194185029Spjd/// \brief Uses the immediate operand \a M to perform a comparison of string 2195168404Spjd/// data with explicitly defined lengths that is contained in source operands 2196168404Spjd/// \a A and \a B. Returns bit 0 of the resulting bit mask. 2197168404Spjd/// 2198168404Spjd/// \headerfile <x86intrin.h> 2199168404Spjd/// 2200168404Spjd/// \code 2201168404Spjd/// int _mm_cmpestro(__m128i A, int LA, __m128i B, int LB, const int M); 2202168404Spjd/// \endcode 2203168404Spjd/// 2204168404Spjd/// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c> 2205168404Spjd/// instruction. 2206168404Spjd/// 2207168404Spjd/// \param A 2208168404Spjd/// A 128-bit integer vector containing one of the source operands to be 2209219089Spjd/// compared. 2210219089Spjd/// \param LA 2211219089Spjd/// An integer that specifies the length of the string in \a A. 2212168404Spjd/// \param B 2213168404Spjd/// A 128-bit integer vector containing one of the source operands to be 2214168404Spjd/// compared. 2215168404Spjd/// \param LB 2216168404Spjd/// An integer that specifies the length of the string in \a B. 2217168404Spjd/// \param M 2218219089Spjd/// An 8-bit immediate operand specifying whether the characters are bytes or 2219168404Spjd/// words and the type of comparison to perform. \n 2220219089Spjd/// Bits [1:0]: Determine source data format. \n 2221219089Spjd/// 00: 16 unsigned bytes \n 2222168404Spjd/// 01: 8 unsigned words \n 2223219089Spjd/// 10: 16 signed bytes \n 2224168404Spjd/// 11: 8 signed words \n 2225168404Spjd/// Bits [3:2]: Determine comparison type and aggregation method. \n 2226168404Spjd/// 00: Subset: Each character in \a B is compared for equality with all 2227168404Spjd/// the characters in \a A. \n 2228168404Spjd/// 01: Ranges: Each character in \a B is compared to \a A. The comparison 2229168404Spjd/// basis is greater than or equal for even-indexed elements in \a A, 2230168404Spjd/// and less than or equal for odd-indexed elements in \a A. \n 2231168404Spjd/// 10: Match: Compare each pair of corresponding characters in \a A and 2232168404Spjd/// \a B for equality. \n 2233168404Spjd/// 11: Substring: Search \a B for substring matches of \a A. \n 2234168404Spjd/// Bits [5:4]: Determine whether to perform a one's complement on the bit 2235168404Spjd/// mask of the comparison results. \n 2236168404Spjd/// 00: No effect. \n 2237168404Spjd/// 01: Negate the bit mask. \n 2238168404Spjd/// 10: No effect. \n 2239168404Spjd/// 11: Negate the bit mask only for bits with an index less than or equal 2240168404Spjd/// to the size of \a A or \a B. 2241168404Spjd/// \returns Returns bit 0 of the resulting bit mask. 2242168404Spjd#define _mm_cmpestro(A, LA, B, LB, M) \ 2243168404Spjd (int)__builtin_ia32_pcmpestrio128((__v16qi)(__m128i)(A), (int)(LA), \ 2244168404Spjd (__v16qi)(__m128i)(B), (int)(LB), \ 2245168404Spjd (int)(M)) 2246168404Spjd 2247168404Spjd/// \brief Uses the immediate operand \a M to perform a comparison of string 2248168404Spjd/// data with explicitly defined lengths that is contained in source operands 2249168404Spjd/// \a A and \a B. Returns 1 if the length of the string in \a A is less than 2250168404Spjd/// the maximum, otherwise, returns 0. 2251168404Spjd/// 2252168404Spjd/// \headerfile <x86intrin.h> 2253168404Spjd/// 2254168404Spjd/// \code 2255168404Spjd/// int _mm_cmpestrs(__m128i A, int LA, __m128i B, int LB, const int M); 2256168404Spjd/// \endcode 2257168404Spjd/// 2258168404Spjd/// This intrinsic corresponds to the <c> VPCMPESTRI / PCMPESTRI </c> 2259168404Spjd/// instruction. 2260168404Spjd/// 2261168404Spjd/// \param A 2262168404Spjd/// A 128-bit integer vector containing one of the source operands to be 2263168404Spjd/// compared. 2264168404Spjd/// \param LA 2265168404Spjd/// An integer that specifies the length of the string in \a A. 2266168404Spjd/// \param B 2267168404Spjd/// A 128-bit integer vector containing one of the source operands to be 2268168404Spjd/// compared. 2269219089Spjd/// \param LB 2270168404Spjd/// An integer that specifies the length of the string in \a B. 2271168404Spjd/// \param M 2272168404Spjd/// An 8-bit immediate operand specifying whether the characters are bytes or 2273168404Spjd/// words and the type of comparison to perform. \n 2274168404Spjd/// Bits [1:0]: Determine source data format. \n 2275168404Spjd/// 00: 16 unsigned bytes \n 2276168404Spjd/// 01: 8 unsigned words \n 2277168404Spjd/// 10: 16 signed bytes \n 2278168404Spjd/// 11: 8 signed words \n 2279168404Spjd/// Bits [3:2]: Determine comparison type and aggregation method. \n 2280168404Spjd/// 00: Subset: Each character in \a B is compared for equality with all 2281185029Spjd/// the characters in \a A. \n 2282185029Spjd/// 01: Ranges: Each character in \a B is compared to \a A. The comparison 2283168404Spjd/// basis is greater than or equal for even-indexed elements in \a A, 2284219089Spjd/// and less than or equal for odd-indexed elements in \a A. \n 2285168404Spjd/// 10: Match: Compare each pair of corresponding characters in \a A and 2286168404Spjd/// \a B for equality. \n 2287168404Spjd/// 11: Substring: Search \a B for substring matches of \a A. \n 2288168404Spjd/// Bits [5:4]: Determine whether to perform a one's complement in the bit 2289168404Spjd/// mask of the comparison results. \n 2290168404Spjd/// 00: No effect. \n 2291236155Smm/// 01: Negate the bit mask. \n 2292236155Smm/// 10: No effect. \n 2293236155Smm/// 11: Negate the bit mask only for bits with an index less than or equal 2294168404Spjd/// to the size of \a A or \a B. \n 2295168404Spjd/// \returns Returns 1 if the length of the string in \a A is less than the 2296168404Spjd/// maximum, otherwise, returns 0. 2297168404Spjd#define _mm_cmpestrs(A, LA, B, LB, M) \ 2298168404Spjd (int)__builtin_ia32_pcmpestris128((__v16qi)(__m128i)(A), (int)(LA), \ 2299168404Spjd (__v16qi)(__m128i)(B), (int)(LB), \ 2300168404Spjd (int)(M)) 2301168404Spjd 2302168404Spjd/// \brief Uses the immediate operand \a M to perform a comparison of string 2303168404Spjd/// data with explicitly defined lengths that is contained in source operands 2304168404Spjd/// \a A and \a B. Returns 1 if the length of the string in \a B is less than 2305168404Spjd/// the maximum, otherwise, returns 0. 2306168404Spjd/// 2307168404Spjd/// \headerfile <x86intrin.h> 2308168404Spjd/// 2309168404Spjd/// \code 2310168404Spjd/// int _mm_cmpestrz(__m128i A, int LA, __m128i B, int LB, const int M); 2311168404Spjd/// \endcode 2312219089Spjd/// 2313168404Spjd/// This intrinsic corresponds to the <c> VPCMPESTRI </c> instruction. 2314168404Spjd/// 2315168404Spjd/// \param A 2316168404Spjd/// A 128-bit integer vector containing one of the source operands to be 2317168404Spjd/// compared. 2318168404Spjd/// \param LA 2319168404Spjd/// An integer that specifies the length of the string in \a A. 2320185029Spjd/// \param B 2321168404Spjd/// A 128-bit integer vector containing one of the source operands to be 2322168404Spjd/// compared. 2323168404Spjd/// \param LB 2324168404Spjd/// An integer that specifies the length of the string in \a B. 2325168404Spjd/// \param M 2326168404Spjd/// An 8-bit immediate operand specifying whether the characters are bytes or 2327168404Spjd/// words and the type of comparison to perform. \n 2328168404Spjd/// Bits [1:0]: Determine source data format. \n 2329168404Spjd/// 00: 16 unsigned bytes \n 2330168404Spjd/// 01: 8 unsigned words \n 2331168404Spjd/// 10: 16 signed bytes \n 2332168404Spjd/// 11: 8 signed words \n 2333168404Spjd/// Bits [3:2]: Determine comparison type and aggregation method. \n 2334168404Spjd/// 00: Subset: Each character in \a B is compared for equality with all 2335168404Spjd/// the characters in \a A. \n 2336168404Spjd/// 01: Ranges: Each character in \a B is compared to \a A. The comparison 2337168404Spjd/// basis is greater than or equal for even-indexed elements in \a A, 2338168404Spjd/// and less than or equal for odd-indexed elements in \a A. \n 2339168404Spjd/// 10: Match: Compare each pair of corresponding characters in \a A and 2340168404Spjd/// \a B for equality. \n 2341168404Spjd/// 11: Substring: Search \a B for substring matches of \a A. \n 2342168404Spjd/// Bits [5:4]: Determine whether to perform a one's complement on the bit 2343168404Spjd/// mask of the comparison results. \n 2344168404Spjd/// 00: No effect. \n 2345168404Spjd/// 01: Negate the bit mask. \n 2346168404Spjd/// 10: No effect. \n 2347219089Spjd/// 11: Negate the bit mask only for bits with an index less than or equal 2348219089Spjd/// to the size of \a A or \a B. 2349168404Spjd/// \returns Returns 1 if the length of the string in \a B is less than the 2350168404Spjd/// maximum, otherwise, returns 0. 2351168404Spjd#define _mm_cmpestrz(A, LA, B, LB, M) \ 2352168404Spjd (int)__builtin_ia32_pcmpestriz128((__v16qi)(__m128i)(A), (int)(LA), \ 2353219089Spjd (__v16qi)(__m128i)(B), (int)(LB), \ 2354168404Spjd (int)(M)) 2355168404Spjd 2356168404Spjd/* SSE4.2 Compare Packed Data -- Greater Than. */ 2357168404Spjd/// \brief Compares each of the corresponding 64-bit values of the 128-bit 2358168404Spjd/// integer vectors to determine if the values in the first operand are 2359168404Spjd/// greater than those in the second operand. 2360168404Spjd/// 2361168404Spjd/// \headerfile <x86intrin.h> 2362168404Spjd/// 2363168404Spjd/// This intrinsic corresponds to the <c> VPCMPGTQ / PCMPGTQ </c> instruction. 2364168404Spjd/// 2365168404Spjd/// \param __V1 2366168404Spjd/// A 128-bit integer vector. 2367168404Spjd/// \param __V2 2368168404Spjd/// A 128-bit integer vector. 2369168404Spjd/// \returns A 128-bit integer vector containing the comparison results. 2370168404Spjdstatic __inline__ __m128i __DEFAULT_FN_ATTRS 2371168404Spjd_mm_cmpgt_epi64(__m128i __V1, __m128i __V2) 2372168404Spjd{ 2373168404Spjd return (__m128i)((__v2di)__V1 > (__v2di)__V2); 2374168404Spjd} 2375168404Spjd 2376168404Spjd/* SSE4.2 Accumulate CRC32. */ 2377168404Spjd/// \brief Adds the unsigned integer operand to the CRC-32C checksum of the 2378168404Spjd/// unsigned char operand. 2379168404Spjd/// 2380168404Spjd/// \headerfile <x86intrin.h> 2381168404Spjd/// 2382168404Spjd/// This intrinsic corresponds to the <c> CRC32B </c> instruction. 2383168404Spjd/// 2384168404Spjd/// \param __C 2385168404Spjd/// An unsigned integer operand to add to the CRC-32C checksum of operand 2386168404Spjd/// \a __D. 2387168404Spjd/// \param __D 2388168404Spjd/// An unsigned 8-bit integer operand used to compute the CRC-32C checksum. 2389168404Spjd/// \returns The result of adding operand \a __C to the CRC-32C checksum of 2390168404Spjd/// operand \a __D. 2391168404Spjdstatic __inline__ unsigned int __DEFAULT_FN_ATTRS 2392168404Spjd_mm_crc32_u8(unsigned int __C, unsigned char __D) 2393168404Spjd{ 2394168404Spjd return __builtin_ia32_crc32qi(__C, __D); 2395168404Spjd} 2396168404Spjd 2397168404Spjd/// \brief Adds the unsigned integer operand to the CRC-32C checksum of the 2398168404Spjd/// unsigned short operand. 2399168404Spjd/// 2400168404Spjd/// \headerfile <x86intrin.h> 2401168404Spjd/// 2402168404Spjd/// This intrinsic corresponds to the <c> CRC32W </c> instruction. 2403227497Smm/// 2404219089Spjd/// \param __C 2405227497Smm/// An unsigned integer operand to add to the CRC-32C checksum of operand 2406227497Smm/// \a __D. 2407227497Smm/// \param __D 2408227497Smm/// An unsigned 16-bit integer operand used to compute the CRC-32C checksum. 2409227497Smm/// \returns The result of adding operand \a __C to the CRC-32C checksum of 2410227497Smm/// operand \a __D. 2411227497Smmstatic __inline__ unsigned int __DEFAULT_FN_ATTRS 2412219089Spjd_mm_crc32_u16(unsigned int __C, unsigned short __D) 2413219089Spjd{ 2414219089Spjd return __builtin_ia32_crc32hi(__C, __D); 2415168404Spjd} 2416168404Spjd 2417168404Spjd/// \brief Adds the first unsigned integer operand to the CRC-32C checksum of 2418168404Spjd/// the second unsigned integer operand. 2419185029Spjd/// 2420185029Spjd/// \headerfile <x86intrin.h> 2421227497Smm/// 2422227497Smm/// This intrinsic corresponds to the <c> CRC32L </c> instruction. 2423227497Smm/// 2424227497Smm/// \param __C 2425227497Smm/// An unsigned integer operand to add to the CRC-32C checksum of operand 2426227497Smm/// \a __D. 2427227497Smm/// \param __D 2428227497Smm/// An unsigned 32-bit integer operand used to compute the CRC-32C checksum. 2429227497Smm/// \returns The result of adding operand \a __C to the CRC-32C checksum of 2430227497Smm/// operand \a __D. 2431227497Smmstatic __inline__ unsigned int __DEFAULT_FN_ATTRS 2432227497Smm_mm_crc32_u32(unsigned int __C, unsigned int __D) 2433227497Smm{ 2434227497Smm return __builtin_ia32_crc32si(__C, __D); 2435227497Smm} 2436227497Smm 2437227497Smm#ifdef __x86_64__ 2438227497Smm/// \brief Adds the unsigned integer operand to the CRC-32C checksum of the 2439227497Smm/// unsigned 64-bit integer operand. 2440227497Smm/// 2441227497Smm/// \headerfile <x86intrin.h> 2442227497Smm/// 2443227497Smm/// This intrinsic corresponds to the <c> CRC32Q </c> instruction. 2444227497Smm/// 2445227497Smm/// \param __C 2446185029Spjd/// An unsigned integer operand to add to the CRC-32C checksum of operand 2447185029Spjd/// \a __D. 2448185029Spjd/// \param __D 2449185029Spjd/// An unsigned 64-bit integer operand used to compute the CRC-32C checksum. 2450185029Spjd/// \returns The result of adding operand \a __C to the CRC-32C checksum of 2451185029Spjd/// operand \a __D. 2452185029Spjdstatic __inline__ unsigned long long __DEFAULT_FN_ATTRS 2453185029Spjd_mm_crc32_u64(unsigned long long __C, unsigned long long __D) 2454185029Spjd{ 2455185029Spjd return __builtin_ia32_crc32di(__C, __D); 2456185029Spjd} 2457185029Spjd#endif /* __x86_64__ */ 2458185029Spjd 2459185029Spjd#undef __DEFAULT_FN_ATTRS 2460219089Spjd 2461219089Spjd#ifdef __POPCNT__ 2462185029Spjd#include <popcntintrin.h> 2463185029Spjd#endif 2464185029Spjd 2465185029Spjd#endif /* _SMMINTRIN_H */ 2466185029Spjd