smmintrin.h revision 208954
150276Speter/*===---- smmintrin.h - SSE4 intrinsics ------------------------------------===
2178866Srafan *
3166124Srafan * Permission is hereby granted, free of charge, to any person obtaining a copy
4178866Srafan * of this software and associated documentation files (the "Software"), to deal
5166124Srafan * in the Software without restriction, including without limitation the rights
6166124Srafan * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7166124Srafan * copies of the Software, and to permit persons to whom the Software is
8166124Srafan * furnished to do so, subject to the following conditions:
9166124Srafan *
1050276Speter * The above copyright notice and this permission notice shall be included in
11166124Srafan * all copies or substantial portions of the Software.
12166124Srafan *
13166124Srafan * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14166124Srafan * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15166124Srafan * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16166124Srafan * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1750276Speter * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18166124Srafan * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19166124Srafan * THE SOFTWARE.
2050276Speter *
21166124Srafan *===-----------------------------------------------------------------------===
22166124Srafan */
2362449Speter
24166124Srafan#ifndef _SMMINTRIN_H
25166124Srafan#define _SMMINTRIN_H
26166124Srafan
27166124Srafan#ifndef __SSE4_1__
28166124Srafan#error "SSE4.1 instruction set not enabled"
29166124Srafan#else
30166124Srafan
31166124Srafan#include <tmmintrin.h>
32166124Srafan
33166124Srafan/* Type defines.  */
34166124Srafantypedef double __v2df __attribute__ ((__vector_size__ (16)));
35166124Srafantypedef long long __v2di __attribute__ ((__vector_size__ (16)));
36166124Srafan
37166124Srafan/* SSE4 Rounding macros. */
38166124Srafan#define _MM_FROUND_TO_NEAREST_INT    0x00
39166124Srafan#define _MM_FROUND_TO_NEG_INF        0x01
40166124Srafan#define _MM_FROUND_TO_POS_INF        0x02
41166124Srafan#define _MM_FROUND_TO_ZERO           0x03
42166124Srafan#define _MM_FROUND_CUR_DIRECTION     0x04
43166124Srafan
44166124Srafan#define _MM_FROUND_RAISE_EXC         0x00
45166124Srafan#define _MM_FROUND_NO_EXC            0x08
46166124Srafan
47166124Srafan#define _MM_FROUND_NINT      (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEAREST_INT)
48166124Srafan#define _MM_FROUND_FLOOR     (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_NEG_INF)
49166124Srafan#define _MM_FROUND_CEIL      (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_POS_INF)
50166124Srafan#define _MM_FROUND_TRUNC     (_MM_FROUND_RAISE_EXC | _MM_FROUND_TO_ZERO)
51166124Srafan#define _MM_FROUND_RINT      (_MM_FROUND_RAISE_EXC | _MM_FROUND_CUR_DIRECTION)
52166124Srafan#define _MM_FROUND_NEARBYINT (_MM_FROUND_NO_EXC | _MM_FROUND_CUR_DIRECTION)
53166124Srafan
54166124Srafan#define _mm_ceil_ps(X)       _mm_round_ps((X), _MM_FROUND_CEIL)
55166124Srafan#define _mm_ceil_pd(X)       _mm_round_pd((X), _MM_FROUND_CEIL)
56166124Srafan#define _mm_ceil_ss(X, Y)    _mm_round_ss((X), (Y), _MM_FROUND_CEIL)
57166124Srafan#define _mm_ceil_sd(X, Y)    _mm_round_sd((X), (Y), _MM_FROUND_CEIL)
58166124Srafan
59166124Srafan#define _mm_floor_ps(X)      _mm_round_ps((X), _MM_FROUND_FLOOR)
60166124Srafan#define _mm_floor_pd(X)      _mm_round_pd((X), _MM_FROUND_FLOOR)
61166124Srafan#define _mm_floor_ss(X, Y)   _mm_round_ss((X), (Y), _MM_FROUND_FLOOR)
62166124Srafan#define _mm_floor_sd(X, Y)   _mm_round_sd((X), (Y), _MM_FROUND_FLOOR)
63166124Srafan
64166124Srafan#define _mm_round_ps(X, Y)      __builtin_ia32_roundps((X), (Y))
65166124Srafan#define _mm_round_ss(X, Y, M)   __builtin_ia32_roundss((X), (Y), (M))
66166124Srafan#define _mm_round_pd(X, M)      __builtin_ia32_roundpd((X), (M))
67166124Srafan#define _mm_round_sd(X, Y, M)   __builtin_ia32_roundsd((X), (Y), (M))
68166124Srafan
69166124Srafan/* SSE4 Packed Blending Intrinsics.  */
70166124Srafanstatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
71166124Srafan_mm_blend_pd (__m128d __V1, __m128d __V2, const int __M)
72166124Srafan{
73166124Srafan  return (__m128d) __builtin_ia32_blendpd ((__v2df)__V1, (__v2df)__V2, __M);
74166124Srafan}
75166124Srafan
76166124Srafanstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
77166124Srafan_mm_blend_ps (__m128 __V1, __m128 __V2, const int __M)
78166124Srafan{
79166124Srafan  return (__m128) __builtin_ia32_blendps ((__v4sf)__V1, (__v4sf)__V2, __M);
80166124Srafan}
81166124Srafan
82166124Srafanstatic __inline__ __m128d __attribute__((__always_inline__, __nodebug__))
83166124Srafan_mm_blendv_pd (__m128d __V1, __m128d __V2, __m128d __M)
84166124Srafan{
85166124Srafan  return (__m128d) __builtin_ia32_blendvpd ((__v2df)__V1, (__v2df)__V2,
86166124Srafan                                            (__v2df)__M);
87166124Srafan}
88166124Srafan
89166124Srafanstatic __inline__ __m128 __attribute__((__always_inline__, __nodebug__))
90166124Srafan_mm_blendv_ps (__m128 __V1, __m128 __V2, __m128 __M)
91166124Srafan{
92166124Srafan  return (__m128) __builtin_ia32_blendvps ((__v4sf)__V1, (__v4sf)__V2,
93166124Srafan                                           (__v4sf)__M);
9450276Speter}
95166124Srafan
96166124Srafanstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
9750276Speter_mm_blendv_epi8 (__m128i __V1, __m128i __V2, __m128i __M)
98166124Srafan{
99166124Srafan  return (__m128i) __builtin_ia32_pblendvb128 ((__v16qi)__V1, (__v16qi)__V2,
100166124Srafan                                               (__v16qi)__M);
101166124Srafan}
10250276Speter
103166124Srafanstatic __inline__  __m128i __attribute__((__always_inline__, __nodebug__))
104166124Srafan_mm_blend_epi16 (__m128i __V1, __m128i __V2, const int __M)
105166124Srafan{
106166124Srafan  return (__m128i) __builtin_ia32_pblendw128 ((__v8hi)__V1, (__v8hi)__V2, __M);
107166124Srafan}
108166124Srafan
109166124Srafan/* SSE4 Dword Multiply Instructions.  */
110166124Srafanstatic __inline__  __m128i __attribute__((__always_inline__, __nodebug__))
111166124Srafan_mm_mullo_epi32 (__m128i __V1, __m128i __V2)
112166124Srafan{
113166124Srafan  return (__m128i) ((__v4si)__V1 * (__v4si)__V2);
114166124Srafan}
115166124Srafan
116166124Srafanstatic __inline__  __m128i __attribute__((__always_inline__, __nodebug__))
117166124Srafan_mm_mul_epi32 (__m128i __V1, __m128i __V2)
118166124Srafan{
119166124Srafan  return (__m128i) __builtin_ia32_pmuldq128 ((__v4si)__V1, (__v4si)__V2);
120166124Srafan}
121166124Srafan
122166124Srafan/* SSE4 Floating Point Dot Product Instructions.  */
123166124Srafan#define _mm_dp_ps(X, Y, M) __builtin_ia32_dpps ((X), (Y), (M))
124166124Srafan#define _mm_dp_pd(X, Y, M) __builtin_ia32_dppd ((X), (Y), (M))
125166124Srafan
126166124Srafan/* SSE4 Streaming Load Hint Instruction.  */
127166124Srafanstatic __inline__  __m128i __attribute__((__always_inline__, __nodebug__))
128166124Srafan_mm_stream_load_si128 (__m128i *__V)
129166124Srafan{
130166124Srafan  return (__m128i) __builtin_ia32_movntdqa ((__v2di *) __V);
131166124Srafan}
132166124Srafan
133166124Srafan/* SSE4 Packed Integer Min/Max Instructions.  */
134166124Srafanstatic __inline__  __m128i __attribute__((__always_inline__, __nodebug__))
135166124Srafan_mm_min_epi8 (__m128i __V1, __m128i __V2)
136166124Srafan{
137166124Srafan  return (__m128i) __builtin_ia32_pminsb128 ((__v16qi) __V1, (__v16qi) __V2);
138166124Srafan}
139166124Srafan
140166124Srafanstatic __inline__  __m128i __attribute__((__always_inline__, __nodebug__))
141166124Srafan_mm_max_epi8 (__m128i __V1, __m128i __V2)
142166124Srafan{
143166124Srafan  return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi) __V1, (__v16qi) __V2);
144166124Srafan}
145166124Srafan
14650276Speterstatic __inline__  __m128i __attribute__((__always_inline__, __nodebug__))
147166124Srafan_mm_min_epu16 (__m128i __V1, __m128i __V2)
148166124Srafan{
14950276Speter  return (__m128i) __builtin_ia32_pminuw128 ((__v8hi) __V1, (__v8hi) __V2);
15050276Speter}
151166124Srafan
15250276Speterstatic __inline__  __m128i __attribute__((__always_inline__, __nodebug__))
15350276Speter_mm_max_epu16 (__m128i __V1, __m128i __V2)
15450276Speter{
15550276Speter  return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi) __V1, (__v8hi) __V2);
15650276Speter}
15750276Speter
15850276Speterstatic __inline__  __m128i __attribute__((__always_inline__, __nodebug__))
15950276Speter_mm_min_epi32 (__m128i __V1, __m128i __V2)
16050276Speter{
16150276Speter  return (__m128i) __builtin_ia32_pminsd128 ((__v4si) __V1, (__v4si) __V2);
16250276Speter}
16350276Speter
16450276Speterstatic __inline__  __m128i __attribute__((__always_inline__, __nodebug__))
165166124Srafan_mm_max_epi32 (__m128i __V1, __m128i __V2)
166166124Srafan{
167166124Srafan  return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si) __V1, (__v4si) __V2);
168166124Srafan}
169166124Srafan
170166124Srafanstatic __inline__  __m128i __attribute__((__always_inline__, __nodebug__))
17150276Speter_mm_min_epu32 (__m128i __V1, __m128i __V2)
17250276Speter{
17350276Speter  return (__m128i) __builtin_ia32_pminud128((__v4si) __V1, (__v4si) __V2);
17450276Speter}
17550276Speter
17650276Speterstatic __inline__  __m128i __attribute__((__always_inline__, __nodebug__))
17750276Speter_mm_max_epu32 (__m128i __V1, __m128i __V2)
17850276Speter{
17950276Speter  return (__m128i) __builtin_ia32_pmaxud128((__v4si) __V1, (__v4si) __V2);
18050276Speter}
18150276Speter
18250276Speter/* SSE4 Insertion and Extraction from XMM Register Instructions.  */
18350276Speter#define _mm_insert_ps(X, Y, N) __builtin_ia32_insertps128((X), (Y), (N))
184166124Srafan#define _mm_extract_ps(X, N) (__extension__                      \
185166124Srafan                              ({ union { int i; float f; } __t;  \
186166124Srafan                                 __v4sf __a = (__v4sf)X;         \
187166124Srafan                                 __t.f = __a[N];                 \
188166124Srafan                                 __t.i;}))
189166124Srafan
19050276Speter/* Miscellaneous insert and extract macros.  */
19150276Speter/* Extract a single-precision float from X at index N into D.  */
19250276Speter#define _MM_EXTRACT_FLOAT(D, X, N) (__extension__ ({ __v4sf __a = (__v4sf)X; \
19350276Speter                                                    (D) = __a[N]; }))
19450276Speter
19550276Speter/* Or together 2 sets of indexes (X and Y) with the zeroing bits (Z) to create
19650276Speter   an index suitable for _mm_insert_ps.  */
19750276Speter#define _MM_MK_INSERTPS_NDX(X, Y, Z) (((X) << 6) | ((Y) << 4) | (Z))
19850276Speter
19950276Speter/* Extract a float from X at index N into the first index of the return.  */
20050276Speter#define _MM_PICK_OUT_PS(X, N) _mm_insert_ps (_mm_setzero_ps(), (X),   \
201166124Srafan                                             _MM_MK_INSERTPS_NDX((N), 0, 0x0e))
20250276Speter
20350276Speter/* Insert int into packed integer array at index.  */
20450276Speter#define _mm_insert_epi8(X, I, N) (__extension__ ({ __v16qi __a = (__v16qi)X; \
205166124Srafan                                                   __a[N] = I;               \
20650276Speter                                                   __a;}))
20750276Speter#define _mm_insert_epi32(X, I, N) (__extension__ ({ __v4si __a = (__v4si)X; \
20850276Speter                                                    __a[N] = I;             \
20950276Speter                                                    __a;}))
210166124Srafan#ifdef __x86_64__
21150276Speter#define _mm_insert_epi64(X, I, N) (__extension__ ({ __v2di __a = (__v2di)X; \
21250276Speter                                                    __a[N] = I;             \
213166124Srafan                                                    __a;}))
21450276Speter#endif /* __x86_64__ */
215166124Srafan
21650276Speter/* Extract int from packed integer array at index.  */
21750276Speter#define _mm_extract_epi8(X, N) (__extension__ ({ __v16qi __a = (__v16qi)X; \
21850276Speter                                                 __a[N];}))
21950276Speter#define _mm_extract_epi32(X, N) (__extension__ ({ __v4si __a = (__v4si)X; \
22050276Speter                                                  __a[N];}))
22150276Speter#ifdef __x86_64__
222166124Srafan#define _mm_extract_epi64(X, N) (__extension__ ({ __v2di __a = (__v2di)X; \
22350276Speter                                                  __a[N];}))
224166124Srafan#endif /* __x86_64 */
225166124Srafan
226166124Srafan/* SSE4 128-bit Packed Integer Comparisons.  */
22750276Speterstatic __inline__ int __attribute__((__always_inline__, __nodebug__))
22850276Speter_mm_testz_si128(__m128i __M, __m128i __V)
22950276Speter{
23050276Speter  return __builtin_ia32_ptestz128((__v2di)__M, (__v2di)__V);
231166124Srafan}
23250276Speter
23350276Speterstatic __inline__ int __attribute__((__always_inline__, __nodebug__))
234166124Srafan_mm_testc_si128(__m128i __M, __m128i __V)
23550276Speter{
236166124Srafan  return __builtin_ia32_ptestc128((__v2di)__M, (__v2di)__V);
237166124Srafan}
238166124Srafan
239166124Srafanstatic __inline__ int __attribute__((__always_inline__, __nodebug__))
240166124Srafan_mm_testnzc_si128(__m128i __M, __m128i __V)
24150276Speter{
24250276Speter  return __builtin_ia32_ptestnzc128((__v2di)__M, (__v2di)__V);
243166124Srafan}
24450276Speter
245166124Srafan#define _mm_test_all_ones(V) _mm_testc_si128((V), _mm_cmpeq_epi32((V), (V)))
246166124Srafan#define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128((M), (V))
247166124Srafan#define _mm_test_all_zeros(M, V) _mm_testz_si128 ((V), (V))
248166124Srafan
249166124Srafan/* SSE4 64-bit Packed Integer Comparisons.  */
250166124Srafanstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
25150276Speter_mm_cmpeq_epi64(__m128i __V1, __m128i __V2)
25250276Speter{
253166124Srafan  return (__m128i) __builtin_ia32_pcmpeqq((__v2di)__V1, (__v2di)__V2);
25450276Speter}
25550276Speter
25650276Speter/* SSE4 Packed Integer Sign-Extension.  */
25750276Speterstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
25850276Speter_mm_cvtepi8_epi16(__m128i __V)
25950276Speter{
26050276Speter  return (__m128i) __builtin_ia32_pmovsxbw128((__v16qi) __V);
26150276Speter}
262166124Srafan
26350276Speterstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
26450276Speter_mm_cvtepi8_epi32(__m128i __V)
26550276Speter{
26650276Speter  return (__m128i) __builtin_ia32_pmovsxbd128((__v16qi) __V);
26750276Speter}
268166124Srafan
269166124Srafanstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
270166124Srafan_mm_cvtepi8_epi64(__m128i __V)
271166124Srafan{
272166124Srafan  return (__m128i) __builtin_ia32_pmovsxbq128((__v16qi) __V);
273166124Srafan}
27450276Speter
27550276Speterstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
276166124Srafan_mm_cvtepi16_epi32(__m128i __V)
27750276Speter{
278166124Srafan  return (__m128i) __builtin_ia32_pmovsxwd128((__v8hi) __V);
27950276Speter}
28050276Speter
28150276Speterstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
28250276Speter_mm_cvtepi16_epi64(__m128i __V)
28350276Speter{
28450276Speter  return (__m128i) __builtin_ia32_pmovsxwq128((__v8hi)__V);
285166124Srafan}
28650276Speter
28750276Speterstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
28850276Speter_mm_cvtepi32_epi64(__m128i __V)
28950276Speter{
290166124Srafan  return (__m128i) __builtin_ia32_pmovsxdq128((__v4si)__V);
29150276Speter}
29250276Speter
29350276Speter/* SSE4 Packed Integer Zero-Extension.  */
29450276Speterstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
295166124Srafan_mm_cvtepu8_epi16(__m128i __V)
29650276Speter{
29750276Speter  return (__m128i) __builtin_ia32_pmovzxbw128((__v16qi) __V);
29850276Speter}
29950276Speter
30050276Speterstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
30150276Speter_mm_cvtepu8_epi32(__m128i __V)
302166124Srafan{
30350276Speter  return (__m128i) __builtin_ia32_pmovzxbd128((__v16qi)__V);
30450276Speter}
30550276Speter
30650276Speterstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
30750276Speter_mm_cvtepu8_epi64(__m128i __V)
30850276Speter{
30950276Speter  return (__m128i) __builtin_ia32_pmovzxbq128((__v16qi)__V);
31050276Speter}
311166124Srafan
31250276Speterstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
31350276Speter_mm_cvtepu16_epi32(__m128i __V)
31450276Speter{
31550276Speter  return (__m128i) __builtin_ia32_pmovzxwd128((__v8hi)__V);
316166124Srafan}
31750276Speter
31850276Speterstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
31950276Speter_mm_cvtepu16_epi64(__m128i __V)
32050276Speter{
32150276Speter  return (__m128i) __builtin_ia32_pmovzxwq128((__v8hi)__V);
32250276Speter}
32350276Speter
32450276Speterstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
32550276Speter_mm_cvtepu32_epi64(__m128i __V)
32650276Speter{
32750276Speter  return (__m128i) __builtin_ia32_pmovzxdq128((__v4si)__V);
32850276Speter}
32950276Speter
33050276Speter/* SSE4 Pack with Unsigned Saturation.  */
33150276Speterstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
33250276Speter_mm_packus_epi32(__m128i __V1, __m128i __V2)
33350276Speter{
33450276Speter  return (__m128i) __builtin_ia32_packusdw128((__v4si)__V1, (__v4si)__V2);
33550276Speter}
33650276Speter
337166124Srafan/* SSE4 Multiple Packed Sums of Absolute Difference.  */
33850276Speter#define _mm_mpsadbw_epu8(X, Y, M) __builtin_ia32_mpsadbw128((X), (Y), (M))
33950276Speter
34050276Speter/* These definitions are normally in nmmintrin.h, but gcc puts them in here
34150276Speter   so we'll do the same.  */
342166124Srafan#ifdef __SSE4_2__
34350276Speter
34450276Speter/* These specify the type of data that we're comparing.  */
34550276Speter#define _SIDD_UBYTE_OPS                 0x00
34650276Speter#define _SIDD_UWORD_OPS                 0x01
34750276Speter#define _SIDD_SBYTE_OPS                 0x02
34850276Speter#define _SIDD_SWORD_OPS                 0x03
349166124Srafan
35050276Speter/* These specify the type of comparison operation.  */
35150276Speter#define _SIDD_CMP_EQUAL_ANY             0x00
35250276Speter#define _SIDD_CMP_RANGES                0x04
35350276Speter#define _SIDD_CMP_EQUAL_EACH            0x08
35450276Speter#define _SIDD_CMP_EQUAL_ORDERED         0x0c
35550276Speter
356166124Srafan/* These macros specify the polarity of the operation.  */
35750276Speter#define _SIDD_POSITIVE_POLARITY         0x00
35850276Speter#define _SIDD_NEGATIVE_POLARITY         0x10
35950276Speter#define _SIDD_MASKED_POSITIVE_POLARITY  0x20
36050276Speter#define _SIDD_MASKED_NEGATIVE_POLARITY  0x30
36150276Speter
36250276Speter/* These macros are used in _mm_cmpXstri() to specify the return.  */
36350276Speter#define _SIDD_LEAST_SIGNIFICANT         0x00
36450276Speter#define _SIDD_MOST_SIGNIFICANT          0x40
36550276Speter
36650276Speter/* These macros are used in _mm_cmpXstri() to specify the return.  */
36750276Speter#define _SIDD_BIT_MASK                  0x00
36850276Speter#define _SIDD_UNIT_MASK                 0x40
36950276Speter
37050276Speter/* SSE4.2 Packed Comparison Intrinsics.  */
37150276Speter#define _mm_cmpistrm(A, B, M) __builtin_ia32_pcmpistrm128((A), (B), (M))
37250276Speter#define _mm_cmpistri(A, B, M) __builtin_ia32_pcmpistri128((A), (B), (M))
373166124Srafan
37450276Speter#define _mm_cmpestrm(A, LA, B, LB, M) \
37550276Speter     __builtin_ia32_pcmpestrm128((A), (LA), (B), (LB), (M))
37650276Speter#define _mm_cmpestri(X, LX, Y, LY, M) \
37750276Speter     __builtin_ia32_pcmpestri128((A), (LA), (B), (LB), (M))
37850276Speter
37950276Speter/* SSE4.2 Packed Comparison Intrinsics and EFlag Reading.  */
38050276Speter#define _mm_cmpistra(A, LA, B, LB, M) \
38150276Speter     __builtin_ia32_pcmpistria128((A), (LA), (B), (LB), (M))
38250276Speter#define _mm_cmpistrc(A, LA, B, LB, M) \
383166124Srafan     __builtin_ia32_pcmpistric128((A), (LA), (B), (LB), (M))
38450276Speter#define _mm_cmpistro(A, LA, B, LB, M) \
38550276Speter     __builtin_ia32_pcmpistrio128((A), (LA), (B), (LB), (M))
38650276Speter#define _mm_cmpistrs(A, LA, B, LB, M) \
38750276Speter     __builtin_ia32_pcmpistris128((A), (LA), (B), (LB), (M))
38850276Speter#define _mm_cmpistrz(A, LA, B, LB, M) \
38950276Speter     __builtin_ia32_pcmpistriz128((A), (LA), (B), (LB), (M))
39050276Speter
39150276Speter#define _mm_cmpestra(A, LA, B, LB, M) \
39250276Speter     __builtin_ia32_pcmpestria128((A), (LA), (B), (LB), (M))
39350276Speter#define _mm_cmpestrc(A, LA, B, LB, M) \
394166124Srafan     __builtin_ia32_pcmpestric128((A), (LA), (B), (LB), (M))
39550276Speter#define _mm_cmpestro(A, LA, B, LB, M) \
39650276Speter     __builtin_ia32_pcmpestrio128((A), (LA), (B), (LB), (M))
39750276Speter#define _mm_cmpestrs(A, LA, B, LB, M) \
39850276Speter     __builtin_ia32_pcmpestris128((A), (LA), (B), (LB), (M))
399166124Srafan#define _mm_cmpestrz(A, LA, B, LB, M) \
40050276Speter     __builtin_ia32_pcmpestriz128((A), (LA), (B), (LB), (M))
40150276Speter
40250276Speter/* SSE4.2 Compare Packed Data -- Greater Than.  */
40350276Speterstatic __inline__ __m128i __attribute__((__always_inline__, __nodebug__))
404166124Srafan_mm_cmpgt_epi64(__m128i __V1, __m128i __V2)
40550276Speter{
40650276Speter  return __builtin_ia32_pcmpgtq((__v2di)__V1, (__v2di)__V2);
40750276Speter}
40850276Speter
40950276Speter/* SSE4.2 Accumulate CRC32.  */
41050276Speterstatic __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
411166124Srafan_mm_crc32_u8(unsigned int __C, unsigned char __D)
41250276Speter{
41350276Speter  return __builtin_ia32_crc32qi(__C, __D);
414166124Srafan}
41550276Speter
416166124Srafanstatic __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
41750276Speter_mm_crc32_u16(unsigned int __C, unsigned short __D)
41850276Speter{
41950276Speter  return __builtin_ia32_crc32hi(__C, __D);
42050276Speter}
421166124Srafan
422166124Srafanstatic __inline__ unsigned int __attribute__((__always_inline__, __nodebug__))
42350276Speter_mm_crc32_u32(unsigned int __C, unsigned int __D)
42450276Speter{
425166124Srafan  return __builtin_ia32_crc32si(__C, __D);
42650276Speter}
427166124Srafan
428166124Srafan#ifdef __x86_64__
429166124Srafanstatic __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__))
43050276Speter_mm_crc32_u64(unsigned long long __C, unsigned long long __D)
431166124Srafan{
432166124Srafan  return __builtin_ia32_crc32di(__C, __D);
43350276Speter}
43450276Speter#endif /* __x86_64__ */
435166124Srafan
43650276Speter/* SSE4.2 Population Count.  */
43750276Speterstatic __inline__ int __attribute__((__always_inline__, __nodebug__))
438166124Srafan_mm_popcnt_u32(unsigned int __A)
43950276Speter{
440166124Srafan  return __builtin_popcount(__A);
441166124Srafan}
442166124Srafan
443166124Srafan#ifdef __x86_64__
444166124Srafanstatic __inline__ long long __attribute__((__always_inline__, __nodebug__))
44550276Speter_mm_popcnt_u64(unsigned long long __A)
44650276Speter{
44750276Speter  return __builtin_popcountll(__A);
44850276Speter}
44950276Speter#endif /* __x86_64__ */
45050276Speter
45150276Speter#endif /* __SSE4_2__ */
45250276Speter#endif /* __SSE4_1__ */
45350276Speter
45450276Speter#endif /* _SMMINTRIN_H */
455166124Srafan