xopintrin.h revision 341825
1251662Sdim/*===---- xopintrin.h - XOP intrinsics -------------------------------------===
2239313Sdim *
3239313Sdim * Permission is hereby granted, free of charge, to any person obtaining a copy
4239313Sdim * of this software and associated documentation files (the "Software"), to deal
5239313Sdim * in the Software without restriction, including without limitation the rights
6239313Sdim * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7239313Sdim * copies of the Software, and to permit persons to whom the Software is
8239313Sdim * furnished to do so, subject to the following conditions:
9239313Sdim *
10239313Sdim * The above copyright notice and this permission notice shall be included in
11239313Sdim * all copies or substantial portions of the Software.
12239313Sdim *
13239313Sdim * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14239313Sdim * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15239313Sdim * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16239313Sdim * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17239313Sdim * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18239313Sdim * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19239313Sdim * THE SOFTWARE.
20239313Sdim *
21239313Sdim *===-----------------------------------------------------------------------===
22239313Sdim */
23239313Sdim
24239313Sdim#ifndef __X86INTRIN_H
25251662Sdim#error "Never use <xopintrin.h> directly; include <x86intrin.h> instead."
26239313Sdim#endif
27239313Sdim
28239313Sdim#ifndef __XOPINTRIN_H
29239313Sdim#define __XOPINTRIN_H
30239313Sdim
31239313Sdim#include <fma4intrin.h>
32239313Sdim
33288943Sdim/* Define the default attributes for the functions in this file. */
34341825Sdim#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xop"), __min_vector_width__(128)))
35341825Sdim#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("xop"), __min_vector_width__(256)))
36288943Sdim
37288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
38239313Sdim_mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
39239313Sdim{
40239313Sdim  return (__m128i)__builtin_ia32_vpmacssww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
41239313Sdim}
42239313Sdim
43288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
44239313Sdim_mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C)
45239313Sdim{
46239313Sdim  return (__m128i)__builtin_ia32_vpmacsww((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
47239313Sdim}
48239313Sdim
49288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
50239313Sdim_mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C)
51239313Sdim{
52239313Sdim  return (__m128i)__builtin_ia32_vpmacsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
53239313Sdim}
54239313Sdim
55288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
56239313Sdim_mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C)
57239313Sdim{
58239313Sdim  return (__m128i)__builtin_ia32_vpmacswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
59239313Sdim}
60239313Sdim
61288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
62239313Sdim_mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C)
63239313Sdim{
64239313Sdim  return (__m128i)__builtin_ia32_vpmacssdd((__v4si)__A, (__v4si)__B, (__v4si)__C);
65239313Sdim}
66239313Sdim
67288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
68239313Sdim_mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C)
69239313Sdim{
70239313Sdim  return (__m128i)__builtin_ia32_vpmacsdd((__v4si)__A, (__v4si)__B, (__v4si)__C);
71239313Sdim}
72239313Sdim
73288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
74239313Sdim_mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C)
75239313Sdim{
76239313Sdim  return (__m128i)__builtin_ia32_vpmacssdql((__v4si)__A, (__v4si)__B, (__v2di)__C);
77239313Sdim}
78239313Sdim
79288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
80239313Sdim_mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C)
81239313Sdim{
82239313Sdim  return (__m128i)__builtin_ia32_vpmacsdql((__v4si)__A, (__v4si)__B, (__v2di)__C);
83239313Sdim}
84239313Sdim
85288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
86239313Sdim_mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C)
87239313Sdim{
88239313Sdim  return (__m128i)__builtin_ia32_vpmacssdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);
89239313Sdim}
90239313Sdim
91288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
92239313Sdim_mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C)
93239313Sdim{
94239313Sdim  return (__m128i)__builtin_ia32_vpmacsdqh((__v4si)__A, (__v4si)__B, (__v2di)__C);
95239313Sdim}
96239313Sdim
97288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
98239313Sdim_mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C)
99239313Sdim{
100239313Sdim  return (__m128i)__builtin_ia32_vpmadcsswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
101239313Sdim}
102239313Sdim
103288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
104239313Sdim_mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C)
105239313Sdim{
106239313Sdim  return (__m128i)__builtin_ia32_vpmadcswd((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
107239313Sdim}
108239313Sdim
109288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
110239313Sdim_mm_haddw_epi8(__m128i __A)
111239313Sdim{
112239313Sdim  return (__m128i)__builtin_ia32_vphaddbw((__v16qi)__A);
113239313Sdim}
114239313Sdim
115288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
116239313Sdim_mm_haddd_epi8(__m128i __A)
117239313Sdim{
118239313Sdim  return (__m128i)__builtin_ia32_vphaddbd((__v16qi)__A);
119239313Sdim}
120239313Sdim
121288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
122239313Sdim_mm_haddq_epi8(__m128i __A)
123239313Sdim{
124239313Sdim  return (__m128i)__builtin_ia32_vphaddbq((__v16qi)__A);
125239313Sdim}
126239313Sdim
127288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
128239313Sdim_mm_haddd_epi16(__m128i __A)
129239313Sdim{
130239313Sdim  return (__m128i)__builtin_ia32_vphaddwd((__v8hi)__A);
131239313Sdim}
132239313Sdim
133288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
134239313Sdim_mm_haddq_epi16(__m128i __A)
135239313Sdim{
136239313Sdim  return (__m128i)__builtin_ia32_vphaddwq((__v8hi)__A);
137239313Sdim}
138239313Sdim
139288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
140239313Sdim_mm_haddq_epi32(__m128i __A)
141239313Sdim{
142239313Sdim  return (__m128i)__builtin_ia32_vphadddq((__v4si)__A);
143239313Sdim}
144239313Sdim
145288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
146239313Sdim_mm_haddw_epu8(__m128i __A)
147239313Sdim{
148239313Sdim  return (__m128i)__builtin_ia32_vphaddubw((__v16qi)__A);
149239313Sdim}
150239313Sdim
151288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
152239313Sdim_mm_haddd_epu8(__m128i __A)
153239313Sdim{
154239313Sdim  return (__m128i)__builtin_ia32_vphaddubd((__v16qi)__A);
155239313Sdim}
156239313Sdim
157288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
158239313Sdim_mm_haddq_epu8(__m128i __A)
159239313Sdim{
160239313Sdim  return (__m128i)__builtin_ia32_vphaddubq((__v16qi)__A);
161239313Sdim}
162239313Sdim
163288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
164239313Sdim_mm_haddd_epu16(__m128i __A)
165239313Sdim{
166239313Sdim  return (__m128i)__builtin_ia32_vphadduwd((__v8hi)__A);
167239313Sdim}
168239313Sdim
169288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
170239313Sdim_mm_haddq_epu16(__m128i __A)
171239313Sdim{
172239313Sdim  return (__m128i)__builtin_ia32_vphadduwq((__v8hi)__A);
173239313Sdim}
174239313Sdim
175288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
176239313Sdim_mm_haddq_epu32(__m128i __A)
177239313Sdim{
178239313Sdim  return (__m128i)__builtin_ia32_vphaddudq((__v4si)__A);
179239313Sdim}
180239313Sdim
181288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
182239313Sdim_mm_hsubw_epi8(__m128i __A)
183239313Sdim{
184239313Sdim  return (__m128i)__builtin_ia32_vphsubbw((__v16qi)__A);
185239313Sdim}
186239313Sdim
187288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
188239313Sdim_mm_hsubd_epi16(__m128i __A)
189239313Sdim{
190239313Sdim  return (__m128i)__builtin_ia32_vphsubwd((__v8hi)__A);
191239313Sdim}
192239313Sdim
193288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
194239313Sdim_mm_hsubq_epi32(__m128i __A)
195239313Sdim{
196239313Sdim  return (__m128i)__builtin_ia32_vphsubdq((__v4si)__A);
197239313Sdim}
198239313Sdim
199288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
200239313Sdim_mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)
201239313Sdim{
202321369Sdim  return (__m128i)(((__v2du)__A & (__v2du)__C) | ((__v2du)__B & ~(__v2du)__C));
203239313Sdim}
204239313Sdim
205341825Sdimstatic __inline__ __m256i __DEFAULT_FN_ATTRS256
206239313Sdim_mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C)
207239313Sdim{
208321369Sdim  return (__m256i)(((__v4du)__A & (__v4du)__C) | ((__v4du)__B & ~(__v4du)__C));
209239313Sdim}
210239313Sdim
211288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
212239313Sdim_mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
213239313Sdim{
214239313Sdim  return (__m128i)__builtin_ia32_vpperm((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);
215239313Sdim}
216239313Sdim
217288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
218239313Sdim_mm_rot_epi8(__m128i __A, __m128i __B)
219239313Sdim{
220239313Sdim  return (__m128i)__builtin_ia32_vprotb((__v16qi)__A, (__v16qi)__B);
221239313Sdim}
222239313Sdim
223288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
224239313Sdim_mm_rot_epi16(__m128i __A, __m128i __B)
225239313Sdim{
226239313Sdim  return (__m128i)__builtin_ia32_vprotw((__v8hi)__A, (__v8hi)__B);
227239313Sdim}
228239313Sdim
229288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
230239313Sdim_mm_rot_epi32(__m128i __A, __m128i __B)
231239313Sdim{
232239313Sdim  return (__m128i)__builtin_ia32_vprotd((__v4si)__A, (__v4si)__B);
233239313Sdim}
234239313Sdim
235288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
236239313Sdim_mm_rot_epi64(__m128i __A, __m128i __B)
237239313Sdim{
238239313Sdim  return (__m128i)__builtin_ia32_vprotq((__v2di)__A, (__v2di)__B);
239239313Sdim}
240239313Sdim
241341825Sdim#define _mm_roti_epi8(A, N) \
242341825Sdim  (__m128i)__builtin_ia32_vprotbi((__v16qi)(__m128i)(A), (N))
243239313Sdim
244341825Sdim#define _mm_roti_epi16(A, N) \
245341825Sdim  (__m128i)__builtin_ia32_vprotwi((__v8hi)(__m128i)(A), (N))
246239313Sdim
247341825Sdim#define _mm_roti_epi32(A, N) \
248341825Sdim  (__m128i)__builtin_ia32_vprotdi((__v4si)(__m128i)(A), (N))
249239313Sdim
250341825Sdim#define _mm_roti_epi64(A, N) \
251341825Sdim  (__m128i)__builtin_ia32_vprotqi((__v2di)(__m128i)(A), (N))
252239313Sdim
253288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
254239313Sdim_mm_shl_epi8(__m128i __A, __m128i __B)
255239313Sdim{
256239313Sdim  return (__m128i)__builtin_ia32_vpshlb((__v16qi)__A, (__v16qi)__B);
257239313Sdim}
258239313Sdim
259288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
260239313Sdim_mm_shl_epi16(__m128i __A, __m128i __B)
261239313Sdim{
262239313Sdim  return (__m128i)__builtin_ia32_vpshlw((__v8hi)__A, (__v8hi)__B);
263239313Sdim}
264239313Sdim
265288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
266239313Sdim_mm_shl_epi32(__m128i __A, __m128i __B)
267239313Sdim{
268239313Sdim  return (__m128i)__builtin_ia32_vpshld((__v4si)__A, (__v4si)__B);
269239313Sdim}
270239313Sdim
271288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
272239313Sdim_mm_shl_epi64(__m128i __A, __m128i __B)
273239313Sdim{
274239313Sdim  return (__m128i)__builtin_ia32_vpshlq((__v2di)__A, (__v2di)__B);
275239313Sdim}
276239313Sdim
277288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
278239313Sdim_mm_sha_epi8(__m128i __A, __m128i __B)
279239313Sdim{
280239313Sdim  return (__m128i)__builtin_ia32_vpshab((__v16qi)__A, (__v16qi)__B);
281239313Sdim}
282239313Sdim
283288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
284239313Sdim_mm_sha_epi16(__m128i __A, __m128i __B)
285239313Sdim{
286239313Sdim  return (__m128i)__builtin_ia32_vpshaw((__v8hi)__A, (__v8hi)__B);
287239313Sdim}
288239313Sdim
289288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
290239313Sdim_mm_sha_epi32(__m128i __A, __m128i __B)
291239313Sdim{
292239313Sdim  return (__m128i)__builtin_ia32_vpshad((__v4si)__A, (__v4si)__B);
293239313Sdim}
294239313Sdim
295288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
296239313Sdim_mm_sha_epi64(__m128i __A, __m128i __B)
297239313Sdim{
298239313Sdim  return (__m128i)__builtin_ia32_vpshaq((__v2di)__A, (__v2di)__B);
299239313Sdim}
300239313Sdim
301341825Sdim#define _mm_com_epu8(A, B, N) \
302296417Sdim  (__m128i)__builtin_ia32_vpcomub((__v16qi)(__m128i)(A), \
303341825Sdim                                  (__v16qi)(__m128i)(B), (N))
304239313Sdim
305341825Sdim#define _mm_com_epu16(A, B, N) \
306296417Sdim  (__m128i)__builtin_ia32_vpcomuw((__v8hi)(__m128i)(A), \
307341825Sdim                                  (__v8hi)(__m128i)(B), (N))
308239313Sdim
309341825Sdim#define _mm_com_epu32(A, B, N) \
310296417Sdim  (__m128i)__builtin_ia32_vpcomud((__v4si)(__m128i)(A), \
311341825Sdim                                  (__v4si)(__m128i)(B), (N))
312239313Sdim
313341825Sdim#define _mm_com_epu64(A, B, N) \
314296417Sdim  (__m128i)__builtin_ia32_vpcomuq((__v2di)(__m128i)(A), \
315341825Sdim                                  (__v2di)(__m128i)(B), (N))
316239313Sdim
317341825Sdim#define _mm_com_epi8(A, B, N) \
318296417Sdim  (__m128i)__builtin_ia32_vpcomb((__v16qi)(__m128i)(A), \
319341825Sdim                                 (__v16qi)(__m128i)(B), (N))
320239313Sdim
321341825Sdim#define _mm_com_epi16(A, B, N) \
322296417Sdim  (__m128i)__builtin_ia32_vpcomw((__v8hi)(__m128i)(A), \
323341825Sdim                                 (__v8hi)(__m128i)(B), (N))
324239313Sdim
325341825Sdim#define _mm_com_epi32(A, B, N) \
326296417Sdim  (__m128i)__builtin_ia32_vpcomd((__v4si)(__m128i)(A), \
327341825Sdim                                 (__v4si)(__m128i)(B), (N))
328239313Sdim
329341825Sdim#define _mm_com_epi64(A, B, N) \
330296417Sdim  (__m128i)__builtin_ia32_vpcomq((__v2di)(__m128i)(A), \
331341825Sdim                                 (__v2di)(__m128i)(B), (N))
332239313Sdim
333261991Sdim#define _MM_PCOMCTRL_LT    0
334261991Sdim#define _MM_PCOMCTRL_LE    1
335261991Sdim#define _MM_PCOMCTRL_GT    2
336261991Sdim#define _MM_PCOMCTRL_GE    3
337261991Sdim#define _MM_PCOMCTRL_EQ    4
338261991Sdim#define _MM_PCOMCTRL_NEQ   5
339261991Sdim#define _MM_PCOMCTRL_FALSE 6
340261991Sdim#define _MM_PCOMCTRL_TRUE  7
341261991Sdim
342288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
343261991Sdim_mm_comlt_epu8(__m128i __A, __m128i __B)
344261991Sdim{
345261991Sdim  return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LT);
346261991Sdim}
347261991Sdim
348288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
349261991Sdim_mm_comle_epu8(__m128i __A, __m128i __B)
350261991Sdim{
351261991Sdim  return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_LE);
352261991Sdim}
353261991Sdim
354288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
355261991Sdim_mm_comgt_epu8(__m128i __A, __m128i __B)
356261991Sdim{
357261991Sdim  return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GT);
358261991Sdim}
359261991Sdim
360288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
361261991Sdim_mm_comge_epu8(__m128i __A, __m128i __B)
362261991Sdim{
363261991Sdim  return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_GE);
364261991Sdim}
365261991Sdim
366288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
367261991Sdim_mm_comeq_epu8(__m128i __A, __m128i __B)
368261991Sdim{
369261991Sdim  return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_EQ);
370261991Sdim}
371261991Sdim
372288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
373261991Sdim_mm_comneq_epu8(__m128i __A, __m128i __B)
374261991Sdim{
375261991Sdim  return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_NEQ);
376261991Sdim}
377261991Sdim
378288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
379261991Sdim_mm_comfalse_epu8(__m128i __A, __m128i __B)
380261991Sdim{
381261991Sdim  return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_FALSE);
382261991Sdim}
383261991Sdim
384288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
385261991Sdim_mm_comtrue_epu8(__m128i __A, __m128i __B)
386261991Sdim{
387261991Sdim  return _mm_com_epu8(__A, __B, _MM_PCOMCTRL_TRUE);
388261991Sdim}
389261991Sdim
390288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
391261991Sdim_mm_comlt_epu16(__m128i __A, __m128i __B)
392261991Sdim{
393261991Sdim  return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LT);
394261991Sdim}
395261991Sdim
396288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
397261991Sdim_mm_comle_epu16(__m128i __A, __m128i __B)
398261991Sdim{
399261991Sdim  return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_LE);
400261991Sdim}
401261991Sdim
402288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
403261991Sdim_mm_comgt_epu16(__m128i __A, __m128i __B)
404261991Sdim{
405261991Sdim  return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GT);
406261991Sdim}
407261991Sdim
408288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
409261991Sdim_mm_comge_epu16(__m128i __A, __m128i __B)
410261991Sdim{
411261991Sdim  return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_GE);
412261991Sdim}
413261991Sdim
414288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
415261991Sdim_mm_comeq_epu16(__m128i __A, __m128i __B)
416261991Sdim{
417261991Sdim  return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_EQ);
418261991Sdim}
419261991Sdim
420288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
421261991Sdim_mm_comneq_epu16(__m128i __A, __m128i __B)
422261991Sdim{
423261991Sdim  return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_NEQ);
424261991Sdim}
425261991Sdim
426288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
427261991Sdim_mm_comfalse_epu16(__m128i __A, __m128i __B)
428261991Sdim{
429261991Sdim  return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_FALSE);
430261991Sdim}
431261991Sdim
432288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
433261991Sdim_mm_comtrue_epu16(__m128i __A, __m128i __B)
434261991Sdim{
435261991Sdim  return _mm_com_epu16(__A, __B, _MM_PCOMCTRL_TRUE);
436261991Sdim}
437261991Sdim
438288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
439261991Sdim_mm_comlt_epu32(__m128i __A, __m128i __B)
440261991Sdim{
441261991Sdim  return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LT);
442261991Sdim}
443261991Sdim
444288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
445261991Sdim_mm_comle_epu32(__m128i __A, __m128i __B)
446261991Sdim{
447261991Sdim  return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_LE);
448261991Sdim}
449261991Sdim
450288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
451261991Sdim_mm_comgt_epu32(__m128i __A, __m128i __B)
452261991Sdim{
453261991Sdim  return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GT);
454261991Sdim}
455261991Sdim
456288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
457261991Sdim_mm_comge_epu32(__m128i __A, __m128i __B)
458261991Sdim{
459261991Sdim  return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_GE);
460261991Sdim}
461261991Sdim
462288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
463261991Sdim_mm_comeq_epu32(__m128i __A, __m128i __B)
464261991Sdim{
465261991Sdim  return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_EQ);
466261991Sdim}
467261991Sdim
468288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
469261991Sdim_mm_comneq_epu32(__m128i __A, __m128i __B)
470261991Sdim{
471261991Sdim  return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_NEQ);
472261991Sdim}
473261991Sdim
474288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
475261991Sdim_mm_comfalse_epu32(__m128i __A, __m128i __B)
476261991Sdim{
477261991Sdim  return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_FALSE);
478261991Sdim}
479261991Sdim
480288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
481261991Sdim_mm_comtrue_epu32(__m128i __A, __m128i __B)
482261991Sdim{
483261991Sdim  return _mm_com_epu32(__A, __B, _MM_PCOMCTRL_TRUE);
484261991Sdim}
485261991Sdim
486288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
487261991Sdim_mm_comlt_epu64(__m128i __A, __m128i __B)
488261991Sdim{
489261991Sdim  return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LT);
490261991Sdim}
491261991Sdim
492288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
493261991Sdim_mm_comle_epu64(__m128i __A, __m128i __B)
494261991Sdim{
495261991Sdim  return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_LE);
496261991Sdim}
497261991Sdim
498288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
499261991Sdim_mm_comgt_epu64(__m128i __A, __m128i __B)
500261991Sdim{
501261991Sdim  return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GT);
502261991Sdim}
503261991Sdim
504288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
505261991Sdim_mm_comge_epu64(__m128i __A, __m128i __B)
506261991Sdim{
507261991Sdim  return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_GE);
508261991Sdim}
509261991Sdim
510288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
511261991Sdim_mm_comeq_epu64(__m128i __A, __m128i __B)
512261991Sdim{
513261991Sdim  return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_EQ);
514261991Sdim}
515261991Sdim
516288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
517261991Sdim_mm_comneq_epu64(__m128i __A, __m128i __B)
518261991Sdim{
519261991Sdim  return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_NEQ);
520261991Sdim}
521261991Sdim
522288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
523261991Sdim_mm_comfalse_epu64(__m128i __A, __m128i __B)
524261991Sdim{
525261991Sdim  return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_FALSE);
526261991Sdim}
527261991Sdim
528288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
529261991Sdim_mm_comtrue_epu64(__m128i __A, __m128i __B)
530261991Sdim{
531261991Sdim  return _mm_com_epu64(__A, __B, _MM_PCOMCTRL_TRUE);
532261991Sdim}
533261991Sdim
534288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
535261991Sdim_mm_comlt_epi8(__m128i __A, __m128i __B)
536261991Sdim{
537261991Sdim  return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LT);
538261991Sdim}
539261991Sdim
540288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
541261991Sdim_mm_comle_epi8(__m128i __A, __m128i __B)
542261991Sdim{
543261991Sdim  return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_LE);
544261991Sdim}
545261991Sdim
546288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
547261991Sdim_mm_comgt_epi8(__m128i __A, __m128i __B)
548261991Sdim{
549261991Sdim  return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GT);
550261991Sdim}
551261991Sdim
552288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
553261991Sdim_mm_comge_epi8(__m128i __A, __m128i __B)
554261991Sdim{
555261991Sdim  return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_GE);
556261991Sdim}
557261991Sdim
558288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
559261991Sdim_mm_comeq_epi8(__m128i __A, __m128i __B)
560261991Sdim{
561261991Sdim  return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_EQ);
562261991Sdim}
563261991Sdim
564288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
565261991Sdim_mm_comneq_epi8(__m128i __A, __m128i __B)
566261991Sdim{
567261991Sdim  return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_NEQ);
568261991Sdim}
569261991Sdim
570288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
571261991Sdim_mm_comfalse_epi8(__m128i __A, __m128i __B)
572261991Sdim{
573261991Sdim  return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_FALSE);
574261991Sdim}
575261991Sdim
576288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
577261991Sdim_mm_comtrue_epi8(__m128i __A, __m128i __B)
578261991Sdim{
579261991Sdim  return _mm_com_epi8(__A, __B, _MM_PCOMCTRL_TRUE);
580261991Sdim}
581261991Sdim
582288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
583261991Sdim_mm_comlt_epi16(__m128i __A, __m128i __B)
584261991Sdim{
585261991Sdim  return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LT);
586261991Sdim}
587261991Sdim
588288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
589261991Sdim_mm_comle_epi16(__m128i __A, __m128i __B)
590261991Sdim{
591261991Sdim  return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_LE);
592261991Sdim}
593261991Sdim
594288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
595261991Sdim_mm_comgt_epi16(__m128i __A, __m128i __B)
596261991Sdim{
597261991Sdim  return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GT);
598261991Sdim}
599261991Sdim
600288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
601261991Sdim_mm_comge_epi16(__m128i __A, __m128i __B)
602261991Sdim{
603261991Sdim  return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_GE);
604261991Sdim}
605261991Sdim
606288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
607261991Sdim_mm_comeq_epi16(__m128i __A, __m128i __B)
608261991Sdim{
609261991Sdim  return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_EQ);
610261991Sdim}
611261991Sdim
612288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
613261991Sdim_mm_comneq_epi16(__m128i __A, __m128i __B)
614261991Sdim{
615261991Sdim  return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_NEQ);
616261991Sdim}
617261991Sdim
618288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
619261991Sdim_mm_comfalse_epi16(__m128i __A, __m128i __B)
620261991Sdim{
621261991Sdim  return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_FALSE);
622261991Sdim}
623261991Sdim
624288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
625261991Sdim_mm_comtrue_epi16(__m128i __A, __m128i __B)
626261991Sdim{
627261991Sdim  return _mm_com_epi16(__A, __B, _MM_PCOMCTRL_TRUE);
628261991Sdim}
629261991Sdim
630288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
631261991Sdim_mm_comlt_epi32(__m128i __A, __m128i __B)
632261991Sdim{
633261991Sdim  return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LT);
634261991Sdim}
635261991Sdim
636288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
637261991Sdim_mm_comle_epi32(__m128i __A, __m128i __B)
638261991Sdim{
639261991Sdim  return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_LE);
640261991Sdim}
641261991Sdim
642288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
643261991Sdim_mm_comgt_epi32(__m128i __A, __m128i __B)
644261991Sdim{
645261991Sdim  return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GT);
646261991Sdim}
647261991Sdim
648288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
649261991Sdim_mm_comge_epi32(__m128i __A, __m128i __B)
650261991Sdim{
651261991Sdim  return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_GE);
652261991Sdim}
653261991Sdim
654288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
655261991Sdim_mm_comeq_epi32(__m128i __A, __m128i __B)
656261991Sdim{
657261991Sdim  return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_EQ);
658261991Sdim}
659261991Sdim
660288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
661261991Sdim_mm_comneq_epi32(__m128i __A, __m128i __B)
662261991Sdim{
663261991Sdim  return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_NEQ);
664261991Sdim}
665261991Sdim
666288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
667261991Sdim_mm_comfalse_epi32(__m128i __A, __m128i __B)
668261991Sdim{
669261991Sdim  return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_FALSE);
670261991Sdim}
671261991Sdim
672288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
673261991Sdim_mm_comtrue_epi32(__m128i __A, __m128i __B)
674261991Sdim{
675261991Sdim  return _mm_com_epi32(__A, __B, _MM_PCOMCTRL_TRUE);
676261991Sdim}
677261991Sdim
678288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
679261991Sdim_mm_comlt_epi64(__m128i __A, __m128i __B)
680261991Sdim{
681261991Sdim  return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LT);
682261991Sdim}
683261991Sdim
684288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
685261991Sdim_mm_comle_epi64(__m128i __A, __m128i __B)
686261991Sdim{
687261991Sdim  return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_LE);
688261991Sdim}
689261991Sdim
690288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
691261991Sdim_mm_comgt_epi64(__m128i __A, __m128i __B)
692261991Sdim{
693261991Sdim  return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GT);
694261991Sdim}
695261991Sdim
696288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
697261991Sdim_mm_comge_epi64(__m128i __A, __m128i __B)
698261991Sdim{
699261991Sdim  return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_GE);
700261991Sdim}
701261991Sdim
702288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
703261991Sdim_mm_comeq_epi64(__m128i __A, __m128i __B)
704261991Sdim{
705261991Sdim  return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_EQ);
706261991Sdim}
707261991Sdim
708288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
709261991Sdim_mm_comneq_epi64(__m128i __A, __m128i __B)
710261991Sdim{
711261991Sdim  return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_NEQ);
712261991Sdim}
713261991Sdim
714288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
715261991Sdim_mm_comfalse_epi64(__m128i __A, __m128i __B)
716261991Sdim{
717261991Sdim  return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_FALSE);
718261991Sdim}
719261991Sdim
720288943Sdimstatic __inline__ __m128i __DEFAULT_FN_ATTRS
721261991Sdim_mm_comtrue_epi64(__m128i __A, __m128i __B)
722261991Sdim{
723261991Sdim  return _mm_com_epi64(__A, __B, _MM_PCOMCTRL_TRUE);
724261991Sdim}
725261991Sdim
726341825Sdim#define _mm_permute2_pd(X, Y, C, I) \
727296417Sdim  (__m128d)__builtin_ia32_vpermil2pd((__v2df)(__m128d)(X), \
728296417Sdim                                     (__v2df)(__m128d)(Y), \
729341825Sdim                                     (__v2di)(__m128i)(C), (I))
730239313Sdim
731341825Sdim#define _mm256_permute2_pd(X, Y, C, I) \
732296417Sdim  (__m256d)__builtin_ia32_vpermil2pd256((__v4df)(__m256d)(X), \
733296417Sdim                                        (__v4df)(__m256d)(Y), \
734341825Sdim                                        (__v4di)(__m256i)(C), (I))
735239313Sdim
736341825Sdim#define _mm_permute2_ps(X, Y, C, I) \
737296417Sdim  (__m128)__builtin_ia32_vpermil2ps((__v4sf)(__m128)(X), (__v4sf)(__m128)(Y), \
738341825Sdim                                    (__v4si)(__m128i)(C), (I))
739239313Sdim
740341825Sdim#define _mm256_permute2_ps(X, Y, C, I) \
741296417Sdim  (__m256)__builtin_ia32_vpermil2ps256((__v8sf)(__m256)(X), \
742296417Sdim                                       (__v8sf)(__m256)(Y), \
743341825Sdim                                       (__v8si)(__m256i)(C), (I))
744239313Sdim
745288943Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS
746239313Sdim_mm_frcz_ss(__m128 __A)
747239313Sdim{
748239313Sdim  return (__m128)__builtin_ia32_vfrczss((__v4sf)__A);
749239313Sdim}
750239313Sdim
751288943Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS
752239313Sdim_mm_frcz_sd(__m128d __A)
753239313Sdim{
754239313Sdim  return (__m128d)__builtin_ia32_vfrczsd((__v2df)__A);
755239313Sdim}
756239313Sdim
757288943Sdimstatic __inline__ __m128 __DEFAULT_FN_ATTRS
758239313Sdim_mm_frcz_ps(__m128 __A)
759239313Sdim{
760239313Sdim  return (__m128)__builtin_ia32_vfrczps((__v4sf)__A);
761239313Sdim}
762239313Sdim
763288943Sdimstatic __inline__ __m128d __DEFAULT_FN_ATTRS
764239313Sdim_mm_frcz_pd(__m128d __A)
765239313Sdim{
766239313Sdim  return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A);
767239313Sdim}
768239313Sdim
769341825Sdimstatic __inline__ __m256 __DEFAULT_FN_ATTRS256
770239313Sdim_mm256_frcz_ps(__m256 __A)
771239313Sdim{
772239313Sdim  return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A);
773239313Sdim}
774239313Sdim
775341825Sdimstatic __inline__ __m256d __DEFAULT_FN_ATTRS256
776239313Sdim_mm256_frcz_pd(__m256d __A)
777239313Sdim{
778239313Sdim  return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A);
779239313Sdim}
780239313Sdim
781288943Sdim#undef __DEFAULT_FN_ATTRS
782341825Sdim#undef __DEFAULT_FN_ATTRS256
783288943Sdim
784239313Sdim#endif /* __XOPINTRIN_H */
785