smmintrin.h revision 208954
150276Speter/*===---- smmintrin.h - SSE4 intrinsics ------------------------------------=== 2178866Srafan * 3166124Srafan * Permission is hereby granted, free of charge, to any person obtaining a copy 4178866Srafan * of this software and associated documentation files (the "Software"), to deal 5166124Srafan * in the Software without restriction, including without limitation the rights 6166124Srafan * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7166124Srafan * copies of the Software, and to permit persons to whom the Software is 8166124Srafan * furnished to do so, subject to the following conditions: 9166124Srafan * 1050276Speter * The above copyright notice and this permission notice shall be included in 11166124Srafan * all copies or substantial portions of the Software. 12166124Srafan * 13166124Srafan * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14166124Srafan * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15166124Srafan * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16166124Srafan * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1750276Speter * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18166124Srafan * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19166124Srafan * THE SOFTWARE. 2050276Speter * 21166124Srafan *===-----------------------------------------------------------------------=== 22166124Srafan */ 2362449Speter 24166124Srafan#ifndef _SMMINTRIN_H 25166124Srafan#define _SMMINTRIN_H 26166124Srafan 27166124Srafan#ifndef __SSE4_1__ 28166124Srafan#error "SSE4.1 instruction set not enabled" 29166124Srafan#else 30166124Srafan 31166124Srafan#include <tmmintrin.h> 32166124Srafan 33166124Srafan/* Type defines. */ 34166124Srafantypedef double __v2df __attribute__ ((__vector_size__ (16))); 35166124Srafantypedef long long __v2di __attribute__ ((__vector_size__ (16))); 36166124Srafan 37166124Srafan/* SSE4 Rounding macros. */ 38166124Srafan#define _MM_FROUND_TO_NEAREST_INT 0x00 39166124Srafan#define _MM_FROUND_TO_NEG_INF 0x01 40166124Srafan#define _MM_FROUND_TO_POS_INF 0x02 41166124Srafan#define _MM_FROUND_TO_ZERO 0x03 42166124Srafan#define _MM_FROUND_CUR_DIRECTION 0x04 43166124Srafan 44166124Srafan#define _MM_FROUND_RAISE_EXC 0x00 45166124Srafan#define _MM_FROUND_NO_EXC 0x08 46166124Srafan 47166124Srafan#define _MM_FROUND_NINT (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEAREST_INT) 48166124Srafan#define _MM_FROUND_FLOOR (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEG_INF) 49166124Srafan#define _MM_FROUND_CEIL (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_POS_INF) 50166124Srafan#define _MM_FROUND_TRUNC (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_ZERO) 51166124Srafan#define _MM_FROUND_RINT (_MM_FROUND_RAISE_EXC | _MM_FROUND_CUR_DIRECTION) 52166124Srafan#define _MM_FROUND_NEARBYINT (_MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTION) 53166124Srafan 54166124Srafan#define _mm_ceil_ps(X) _mm_round_ps((X), _MM_FROUND_CEIL) 55166124Srafan#define _mm_ceil_pd(X) _mm_round_pd((X), _MM_FROUND_CEIL) 56166124Srafan#define _mm_ceil_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_CEIL) 57166124Srafan#define _mm_ceil_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_CEIL) 58166124Srafan 59166124Srafan#define _mm_floor_ps(X) _mm_round_ps((X), _MM_FROUND_FLOOR) 60166124Srafan#define _mm_floor_pd(X) _mm_round_pd((X), _MM_FROUND_FLOOR) 61166124Srafan#define _mm_floor_ss(X, Y) _mm_round_ss((X), (Y), _MM_FROUND_FLOOR) 62166124Srafan#define _mm_floor_sd(X, Y) _mm_round_sd((X), (Y), _MM_FROUND_FLOOR) 63166124Srafan 64166124Srafan#define _mm_round_ps(X, Y) __builtin_ia32_roundps((X), (Y)) 65166124Srafan#define _mm_round_ss(X, Y, M) __builtin_ia32_roundss((X), (Y), (M)) 66166124Srafan#define _mm_round_pd(X, M) __builtin_ia32_roundpd((X), (M)) 67166124Srafan#define _mm_round_sd(X, Y, M) __builtin_ia32_roundsd((X), (Y), (M)) 68166124Srafan 69166124Srafan/* SSE4 Packed Blending Intrinsics. */ 70166124Srafanstatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 71166124Srafan_mm_blend_pd (__m128d __V1, __m128d __V2, const int __M) 72166124Srafan{ 73166124Srafan return (__m128d) __builtin_ia32_blendpd ((__v2df)__V1, (__v2df)__V2, __M); 74166124Srafan} 75166124Srafan 76166124Srafanstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 77166124Srafan_mm_blend_ps (__m128 __V1, __m128 __V2, const int __M) 78166124Srafan{ 79166124Srafan return (__m128) __builtin_ia32_blendps ((__v4sf)__V1, (__v4sf)__V2, __M); 80166124Srafan} 81166124Srafan 82166124Srafanstatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__)) 83166124Srafan_mm_blendv_pd (__m128d __V1, __m128d __V2, __m128d __M) 84166124Srafan{ 85166124Srafan return (__m128d) __builtin_ia32_blendvpd ((__v2df)__V1, (__v2df)__V2, 86166124Srafan (__v2df)__M); 87166124Srafan} 88166124Srafan 89166124Srafanstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__)) 90166124Srafan_mm_blendv_ps (__m128 __V1, __m128 __V2, __m128 __M) 91166124Srafan{ 92166124Srafan return (__m128) __builtin_ia32_blendvps ((__v4sf)__V1, (__v4sf)__V2, 93166124Srafan (__v4sf)__M); 9450276Speter} 95166124Srafan 96166124Srafanstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 9750276Speter_mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M) 98166124Srafan{ 99166124Srafan return (__m128i) __builtin_ia32_pblendvb128 ((__v16qi)__V1, (__v16qi)__V2, 100166124Srafan (__v16qi)__M); 101166124Srafan} 10250276Speter 103166124Srafanstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 104166124Srafan_mm_blend_epi16 (__m128i __V1, __m128i __V2, const int __M) 105166124Srafan{ 106166124Srafan return (__m128i) __builtin_ia32_pblendw128 ((__v8hi)__V1, (__v8hi)__V2, __M); 107166124Srafan} 108166124Srafan 109166124Srafan/* SSE4 Dword Multiply Instructions. */ 110166124Srafanstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 111166124Srafan_mm_mullo_epi32 (__m128i __V1, __m128i __V2) 112166124Srafan{ 113166124Srafan return (__m128i) ((__v4si)__V1 * (__v4si)__V2); 114166124Srafan} 115166124Srafan 116166124Srafanstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 117166124Srafan_mm_mul_epi32 (__m128i __V1, __m128i __V2) 118166124Srafan{ 119166124Srafan return (__m128i) __builtin_ia32_pmuldq128 ((__v4si)__V1, (__v4si)__V2); 120166124Srafan} 121166124Srafan 122166124Srafan/* SSE4 Floating Point Dot Product Instructions. */ 123166124Srafan#define _mm_dp_ps(X, Y, M) __builtin_ia32_dpps ((X), (Y), (M)) 124166124Srafan#define _mm_dp_pd(X, Y, M) __builtin_ia32_dppd ((X), (Y), (M)) 125166124Srafan 126166124Srafan/* SSE4 Streaming Load Hint Instruction. */ 127166124Srafanstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 128166124Srafan_mm_stream_load_si128 (__m128i *__V) 129166124Srafan{ 130166124Srafan return (__m128i) __builtin_ia32_movntdqa ((__v2di *) __V); 131166124Srafan} 132166124Srafan 133166124Srafan/* SSE4 Packed Integer Min/Max Instructions. */ 134166124Srafanstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 135166124Srafan_mm_min_epi8 (__m128i __V1, __m128i __V2) 136166124Srafan{ 137166124Srafan return (__m128i) __builtin_ia32_pminsb128 ((__v16qi) __V1, (__v16qi) __V2); 138166124Srafan} 139166124Srafan 140166124Srafanstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 141166124Srafan_mm_max_epi8 (__m128i __V1, __m128i __V2) 142166124Srafan{ 143166124Srafan return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi) __V1, (__v16qi) __V2); 144166124Srafan} 145166124Srafan 14650276Speterstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 147166124Srafan_mm_min_epu16 (__m128i __V1, __m128i __V2) 148166124Srafan{ 14950276Speter return (__m128i) __builtin_ia32_pminuw128 ((__v8hi) __V1, (__v8hi) __V2); 15050276Speter} 151166124Srafan 15250276Speterstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 15350276Speter_mm_max_epu16 (__m128i __V1, __m128i __V2) 15450276Speter{ 15550276Speter return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi) __V1, (__v8hi) __V2); 15650276Speter} 15750276Speter 15850276Speterstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 15950276Speter_mm_min_epi32 (__m128i __V1, __m128i __V2) 16050276Speter{ 16150276Speter return (__m128i) __builtin_ia32_pminsd128 ((__v4si) __V1, (__v4si) __V2); 16250276Speter} 16350276Speter 16450276Speterstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 165166124Srafan_mm_max_epi32 (__m128i __V1, __m128i __V2) 166166124Srafan{ 167166124Srafan return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si) __V1, (__v4si) __V2); 168166124Srafan} 169166124Srafan 170166124Srafanstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 17150276Speter_mm_min_epu32 (__m128i __V1, __m128i __V2) 17250276Speter{ 17350276Speter return (__m128i) __builtin_ia32_pminud128((__v4si) __V1, (__v4si) __V2); 17450276Speter} 17550276Speter 17650276Speterstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 17750276Speter_mm_max_epu32 (__m128i __V1, __m128i __V2) 17850276Speter{ 17950276Speter return (__m128i) __builtin_ia32_pmaxud128((__v4si) __V1, (__v4si) __V2); 18050276Speter} 18150276Speter 18250276Speter/* SSE4 Insertion and Extraction from XMM Register Instructions. */ 18350276Speter#define _mm_insert_ps(X, Y, N) __builtin_ia32_insertps128((X), (Y), (N)) 184166124Srafan#define _mm_extract_ps(X, N) (__extension__ \ 185166124Srafan ({ union { int i; float f; } __t; \ 186166124Srafan __v4sf __a = (__v4sf)X; \ 187166124Srafan __t.f = __a[N]; \ 188166124Srafan __t.i;})) 189166124Srafan 19050276Speter/* Miscellaneous insert and extract macros. */ 19150276Speter/* Extract a single-precision float from X at index N into D. */ 19250276Speter#define _MM_EXTRACT_FLOAT(D, X, N) (__extension__ ({ __v4sf __a = (__v4sf)X; \ 19350276Speter (D) = __a[N]; })) 19450276Speter 19550276Speter/* Or together 2 sets of indexes (X and Y) with the zeroing bits (Z) to create 19650276Speter an index suitable for _mm_insert_ps. */ 19750276Speter#define _MM_MK_INSERTPS_NDX(X, Y, Z) (((X) << 6) | ((Y) << 4) | (Z)) 19850276Speter 19950276Speter/* Extract a float from X at index N into the first index of the return. */ 20050276Speter#define _MM_PICK_OUT_PS(X, N) _mm_insert_ps (_mm_setzero_ps(), (X), \ 201166124Srafan _MM_MK_INSERTPS_NDX((N), 0, 0x0e)) 20250276Speter 20350276Speter/* Insert int into packed integer array at index. */ 20450276Speter#define _mm_insert_epi8(X, I, N) (__extension__ ({ __v16qi __a = (__v16qi)X; \ 205166124Srafan __a[N] = I; \ 20650276Speter __a;})) 20750276Speter#define _mm_insert_epi32(X, I, N) (__extension__ ({ __v4si __a = (__v4si)X; \ 20850276Speter __a[N] = I; \ 20950276Speter __a;})) 210166124Srafan#ifdef __x86_64__ 21150276Speter#define _mm_insert_epi64(X, I, N) (__extension__ ({ __v2di __a = (__v2di)X; \ 21250276Speter __a[N] = I; \ 213166124Srafan __a;})) 21450276Speter#endif /* __x86_64__ */ 215166124Srafan 21650276Speter/* Extract int from packed integer array at index. */ 21750276Speter#define _mm_extract_epi8(X, N) (__extension__ ({ __v16qi __a = (__v16qi)X; \ 21850276Speter __a[N];})) 21950276Speter#define _mm_extract_epi32(X, N) (__extension__ ({ __v4si __a = (__v4si)X; \ 22050276Speter __a[N];})) 22150276Speter#ifdef __x86_64__ 222166124Srafan#define _mm_extract_epi64(X, N) (__extension__ ({ __v2di __a = (__v2di)X; \ 22350276Speter __a[N];})) 224166124Srafan#endif /* __x86_64 */ 225166124Srafan 226166124Srafan/* SSE4 128-bit Packed Integer Comparisons. */ 22750276Speterstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 22850276Speter_mm_testz_si128(__m128i __M, __m128i __V) 22950276Speter{ 23050276Speter return __builtin_ia32_ptestz128((__v2di)__M, (__v2di)__V); 231166124Srafan} 23250276Speter 23350276Speterstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 234166124Srafan_mm_testc_si128(__m128i __M, __m128i __V) 23550276Speter{ 236166124Srafan return __builtin_ia32_ptestc128((__v2di)__M, (__v2di)__V); 237166124Srafan} 238166124Srafan 239166124Srafanstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 240166124Srafan_mm_testnzc_si128(__m128i __M, __m128i __V) 24150276Speter{ 24250276Speter return __builtin_ia32_ptestnzc128((__v2di)__M, (__v2di)__V); 243166124Srafan} 24450276Speter 245166124Srafan#define _mm_test_all_ones(V) _mm_testc_si128((V), _mm_cmpeq_epi32((V), (V))) 246166124Srafan#define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128((M), (V)) 247166124Srafan#define _mm_test_all_zeros(M, V) _mm_testz_si128 ((V), (V)) 248166124Srafan 249166124Srafan/* SSE4 64-bit Packed Integer Comparisons. */ 250166124Srafanstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 25150276Speter_mm_cmpeq_epi64(__m128i __V1, __m128i __V2) 25250276Speter{ 253166124Srafan return (__m128i) __builtin_ia32_pcmpeqq((__v2di)__V1, (__v2di)__V2); 25450276Speter} 25550276Speter 25650276Speter/* SSE4 Packed Integer Sign-Extension. */ 25750276Speterstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 25850276Speter_mm_cvtepi8_epi16(__m128i __V) 25950276Speter{ 26050276Speter return (__m128i) __builtin_ia32_pmovsxbw128((__v16qi) __V); 26150276Speter} 262166124Srafan 26350276Speterstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 26450276Speter_mm_cvtepi8_epi32(__m128i __V) 26550276Speter{ 26650276Speter return (__m128i) __builtin_ia32_pmovsxbd128((__v16qi) __V); 26750276Speter} 268166124Srafan 269166124Srafanstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 270166124Srafan_mm_cvtepi8_epi64(__m128i __V) 271166124Srafan{ 272166124Srafan return (__m128i) __builtin_ia32_pmovsxbq128((__v16qi) __V); 273166124Srafan} 27450276Speter 27550276Speterstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 276166124Srafan_mm_cvtepi16_epi32(__m128i __V) 27750276Speter{ 278166124Srafan return (__m128i) __builtin_ia32_pmovsxwd128((__v8hi) __V); 27950276Speter} 28050276Speter 28150276Speterstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 28250276Speter_mm_cvtepi16_epi64(__m128i __V) 28350276Speter{ 28450276Speter return (__m128i) __builtin_ia32_pmovsxwq128((__v8hi)__V); 285166124Srafan} 28650276Speter 28750276Speterstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 28850276Speter_mm_cvtepi32_epi64(__m128i __V) 28950276Speter{ 290166124Srafan return (__m128i) __builtin_ia32_pmovsxdq128((__v4si)__V); 29150276Speter} 29250276Speter 29350276Speter/* SSE4 Packed Integer Zero-Extension. */ 29450276Speterstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 295166124Srafan_mm_cvtepu8_epi16(__m128i __V) 29650276Speter{ 29750276Speter return (__m128i) __builtin_ia32_pmovzxbw128((__v16qi) __V); 29850276Speter} 29950276Speter 30050276Speterstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 30150276Speter_mm_cvtepu8_epi32(__m128i __V) 302166124Srafan{ 30350276Speter return (__m128i) __builtin_ia32_pmovzxbd128((__v16qi)__V); 30450276Speter} 30550276Speter 30650276Speterstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 30750276Speter_mm_cvtepu8_epi64(__m128i __V) 30850276Speter{ 30950276Speter return (__m128i) __builtin_ia32_pmovzxbq128((__v16qi)__V); 31050276Speter} 311166124Srafan 31250276Speterstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 31350276Speter_mm_cvtepu16_epi32(__m128i __V) 31450276Speter{ 31550276Speter return (__m128i) __builtin_ia32_pmovzxwd128((__v8hi)__V); 316166124Srafan} 31750276Speter 31850276Speterstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 31950276Speter_mm_cvtepu16_epi64(__m128i __V) 32050276Speter{ 32150276Speter return (__m128i) __builtin_ia32_pmovzxwq128((__v8hi)__V); 32250276Speter} 32350276Speter 32450276Speterstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 32550276Speter_mm_cvtepu32_epi64(__m128i __V) 32650276Speter{ 32750276Speter return (__m128i) __builtin_ia32_pmovzxdq128((__v4si)__V); 32850276Speter} 32950276Speter 33050276Speter/* SSE4 Pack with Unsigned Saturation. */ 33150276Speterstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 33250276Speter_mm_packus_epi32(__m128i __V1, __m128i __V2) 33350276Speter{ 33450276Speter return (__m128i) __builtin_ia32_packusdw128((__v4si)__V1, (__v4si)__V2); 33550276Speter} 33650276Speter 337166124Srafan/* SSE4 Multiple Packed Sums of Absolute Difference. */ 33850276Speter#define _mm_mpsadbw_epu8(X, Y, M) __builtin_ia32_mpsadbw128((X), (Y), (M)) 33950276Speter 34050276Speter/* These definitions are normally in nmmintrin.h, but gcc puts them in here 34150276Speter so we'll do the same. */ 342166124Srafan#ifdef __SSE4_2__ 34350276Speter 34450276Speter/* These specify the type of data that we're comparing. */ 34550276Speter#define _SIDD_UBYTE_OPS 0x00 34650276Speter#define _SIDD_UWORD_OPS 0x01 34750276Speter#define _SIDD_SBYTE_OPS 0x02 34850276Speter#define _SIDD_SWORD_OPS 0x03 349166124Srafan 35050276Speter/* These specify the type of comparison operation. */ 35150276Speter#define _SIDD_CMP_EQUAL_ANY 0x00 35250276Speter#define _SIDD_CMP_RANGES 0x04 35350276Speter#define _SIDD_CMP_EQUAL_EACH 0x08 35450276Speter#define _SIDD_CMP_EQUAL_ORDERED 0x0c 35550276Speter 356166124Srafan/* These macros specify the polarity of the operation. */ 35750276Speter#define _SIDD_POSITIVE_POLARITY 0x00 35850276Speter#define _SIDD_NEGATIVE_POLARITY 0x10 35950276Speter#define _SIDD_MASKED_POSITIVE_POLARITY 0x20 36050276Speter#define _SIDD_MASKED_NEGATIVE_POLARITY 0x30 36150276Speter 36250276Speter/* These macros are used in _mm_cmpXstri() to specify the return. */ 36350276Speter#define _SIDD_LEAST_SIGNIFICANT 0x00 36450276Speter#define _SIDD_MOST_SIGNIFICANT 0x40 36550276Speter 36650276Speter/* These macros are used in _mm_cmpXstri() to specify the return. */ 36750276Speter#define _SIDD_BIT_MASK 0x00 36850276Speter#define _SIDD_UNIT_MASK 0x40 36950276Speter 37050276Speter/* SSE4.2 Packed Comparison Intrinsics. */ 37150276Speter#define _mm_cmpistrm(A, B, M) __builtin_ia32_pcmpistrm128((A), (B), (M)) 37250276Speter#define _mm_cmpistri(A, B, M) __builtin_ia32_pcmpistri128((A), (B), (M)) 373166124Srafan 37450276Speter#define _mm_cmpestrm(A, LA, B, LB, M) \ 37550276Speter __builtin_ia32_pcmpestrm128((A), (LA), (B), (LB), (M)) 37650276Speter#define _mm_cmpestri(X, LX, Y, LY, M) \ 37750276Speter __builtin_ia32_pcmpestri128((A), (LA), (B), (LB), (M)) 37850276Speter 37950276Speter/* SSE4.2 Packed Comparison Intrinsics and EFlag Reading. */ 38050276Speter#define _mm_cmpistra(A, LA, B, LB, M) \ 38150276Speter __builtin_ia32_pcmpistria128((A), (LA), (B), (LB), (M)) 38250276Speter#define _mm_cmpistrc(A, LA, B, LB, M) \ 383166124Srafan __builtin_ia32_pcmpistric128((A), (LA), (B), (LB), (M)) 38450276Speter#define _mm_cmpistro(A, LA, B, LB, M) \ 38550276Speter __builtin_ia32_pcmpistrio128((A), (LA), (B), (LB), (M)) 38650276Speter#define _mm_cmpistrs(A, LA, B, LB, M) \ 38750276Speter __builtin_ia32_pcmpistris128((A), (LA), (B), (LB), (M)) 38850276Speter#define _mm_cmpistrz(A, LA, B, LB, M) \ 38950276Speter __builtin_ia32_pcmpistriz128((A), (LA), (B), (LB), (M)) 39050276Speter 39150276Speter#define _mm_cmpestra(A, LA, B, LB, M) \ 39250276Speter __builtin_ia32_pcmpestria128((A), (LA), (B), (LB), (M)) 39350276Speter#define _mm_cmpestrc(A, LA, B, LB, M) \ 394166124Srafan __builtin_ia32_pcmpestric128((A), (LA), (B), (LB), (M)) 39550276Speter#define _mm_cmpestro(A, LA, B, LB, M) \ 39650276Speter __builtin_ia32_pcmpestrio128((A), (LA), (B), (LB), (M)) 39750276Speter#define _mm_cmpestrs(A, LA, B, LB, M) \ 39850276Speter __builtin_ia32_pcmpestris128((A), (LA), (B), (LB), (M)) 399166124Srafan#define _mm_cmpestrz(A, LA, B, LB, M) \ 40050276Speter __builtin_ia32_pcmpestriz128((A), (LA), (B), (LB), (M)) 40150276Speter 40250276Speter/* SSE4.2 Compare Packed Data -- Greater Than. */ 40350276Speterstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__)) 404166124Srafan_mm_cmpgt_epi64(__m128i __V1, __m128i __V2) 40550276Speter{ 40650276Speter return __builtin_ia32_pcmpgtq((__v2di)__V1, (__v2di)__V2); 40750276Speter} 40850276Speter 40950276Speter/* SSE4.2 Accumulate CRC32. */ 41050276Speterstatic __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) 411166124Srafan_mm_crc32_u8(unsigned int __C, unsigned char __D) 41250276Speter{ 41350276Speter return __builtin_ia32_crc32qi(__C, __D); 414166124Srafan} 41550276Speter 416166124Srafanstatic __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) 41750276Speter_mm_crc32_u16(unsigned int __C, unsigned short __D) 41850276Speter{ 41950276Speter return __builtin_ia32_crc32hi(__C, __D); 42050276Speter} 421166124Srafan 422166124Srafanstatic __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) 42350276Speter_mm_crc32_u32(unsigned int __C, unsigned int __D) 42450276Speter{ 425166124Srafan return __builtin_ia32_crc32si(__C, __D); 42650276Speter} 427166124Srafan 428166124Srafan#ifdef __x86_64__ 429166124Srafanstatic __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) 43050276Speter_mm_crc32_u64(unsigned long long __C, unsigned long long __D) 431166124Srafan{ 432166124Srafan return __builtin_ia32_crc32di(__C, __D); 43350276Speter} 43450276Speter#endif /* __x86_64__ */ 435166124Srafan 43650276Speter/* SSE4.2 Population Count. */ 43750276Speterstatic __inline__ int __attribute__((__always_inline__, __nodebug__)) 438166124Srafan_mm_popcnt_u32(unsigned int __A) 43950276Speter{ 440166124Srafan return __builtin_popcount(__A); 441166124Srafan} 442166124Srafan 443166124Srafan#ifdef __x86_64__ 444166124Srafanstatic __inline__ long long __attribute__((__always_inline__, __nodebug__)) 44550276Speter_mm_popcnt_u64(unsigned long long __A) 44650276Speter{ 44750276Speter return __builtin_popcountll(__A); 44850276Speter} 44950276Speter#endif /* __x86_64__ */ 45050276Speter 45150276Speter#endif /* __SSE4_2__ */ 45250276Speter#endif /* __SSE4_1__ */ 45350276Speter 45450276Speter#endif /* _SMMINTRIN_H */ 455166124Srafan