1/* Copyright (C) 2007-2022 Free Software Foundation, Inc.
2
3   This file is part of GCC.
4
5   GCC is free software; you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation; either version 3, or (at your option)
8   any later version.
9
10   GCC is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14
15   Under Section 7 of GPL version 3, you are granted additional
16   permissions described in the GCC Runtime Library Exception, version
17   3.1, as published by the Free Software Foundation.
18
19   You should have received a copy of the GNU General Public License and
20   a copy of the GCC Runtime Library Exception along with this program;
21   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22   <http://www.gnu.org/licenses/>.  */
23
24#ifndef _X86INTRIN_H_INCLUDED
25# error "Never use <xopintrin.h> directly; include <x86intrin.h> instead."
26#endif
27
28#ifndef _XOPMMINTRIN_H_INCLUDED
29#define _XOPMMINTRIN_H_INCLUDED
30
31#include <fma4intrin.h>
32
33#ifndef __XOP__
34#pragma GCC push_options
35#pragma GCC target("xop")
36#define __DISABLE_XOP__
37#endif /* __XOP__ */
38
39/* Integer multiply/add instructions. */
40extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
41_mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
42{
43  return (__m128i) __builtin_ia32_vpmacssww ((__v8hi)__A,(__v8hi)__B, (__v8hi)__C);
44}
45
46extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
47_mm_macc_epi16(__m128i __A, __m128i __B, __m128i __C)
48{
49  return (__m128i) __builtin_ia32_vpmacsww ((__v8hi)__A, (__v8hi)__B, (__v8hi)__C);
50}
51
52extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
53_mm_maccsd_epi16(__m128i __A, __m128i __B, __m128i __C)
54{
55  return  (__m128i) __builtin_ia32_vpmacsswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
56}
57
58extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
59_mm_maccd_epi16(__m128i __A, __m128i __B, __m128i __C)
60{
61  return  (__m128i) __builtin_ia32_vpmacswd ((__v8hi)__A, (__v8hi)__B, (__v4si)__C);
62}
63
64extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
65_mm_maccs_epi32(__m128i __A, __m128i __B, __m128i __C)
66{
67  return  (__m128i) __builtin_ia32_vpmacssdd ((__v4si)__A, (__v4si)__B, (__v4si)__C);
68}
69
70extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
71_mm_macc_epi32(__m128i __A, __m128i __B, __m128i __C)
72{
73  return  (__m128i) __builtin_ia32_vpmacsdd ((__v4si)__A, (__v4si)__B, (__v4si)__C);
74}
75
76extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
77_mm_maccslo_epi32(__m128i __A, __m128i __B, __m128i __C)
78{
79  return  (__m128i) __builtin_ia32_vpmacssdql ((__v4si)__A, (__v4si)__B, (__v2di)__C);
80}
81
82extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
83_mm_macclo_epi32(__m128i __A, __m128i __B, __m128i __C)
84{
85  return  (__m128i) __builtin_ia32_vpmacsdql ((__v4si)__A, (__v4si)__B, (__v2di)__C);
86}
87
88extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
89_mm_maccshi_epi32(__m128i __A, __m128i __B, __m128i __C)
90{
91  return  (__m128i) __builtin_ia32_vpmacssdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C);
92}
93
94extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
95_mm_macchi_epi32(__m128i __A, __m128i __B, __m128i __C)
96{
97  return  (__m128i) __builtin_ia32_vpmacsdqh ((__v4si)__A, (__v4si)__B, (__v2di)__C);
98}
99
100extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
101_mm_maddsd_epi16(__m128i __A, __m128i __B, __m128i __C)
102{
103  return  (__m128i) __builtin_ia32_vpmadcsswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C);
104}
105
106extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
107_mm_maddd_epi16(__m128i __A, __m128i __B, __m128i __C)
108{
109  return  (__m128i) __builtin_ia32_vpmadcswd ((__v8hi)__A,(__v8hi)__B,(__v4si)__C);
110}
111
112/* Packed Integer Horizontal Add and Subtract */
113extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
114_mm_haddw_epi8(__m128i __A)
115{
116  return  (__m128i) __builtin_ia32_vphaddbw ((__v16qi)__A);
117}
118
119extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
120_mm_haddd_epi8(__m128i __A)
121{
122  return  (__m128i) __builtin_ia32_vphaddbd ((__v16qi)__A);
123}
124
125extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
126_mm_haddq_epi8(__m128i __A)
127{
128  return  (__m128i) __builtin_ia32_vphaddbq ((__v16qi)__A);
129}
130
131extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
132_mm_haddd_epi16(__m128i __A)
133{
134  return  (__m128i) __builtin_ia32_vphaddwd ((__v8hi)__A);
135}
136
137extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
138_mm_haddq_epi16(__m128i __A)
139{
140  return  (__m128i) __builtin_ia32_vphaddwq ((__v8hi)__A);
141}
142
143extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
144_mm_haddq_epi32(__m128i __A)
145{
146  return  (__m128i) __builtin_ia32_vphadddq ((__v4si)__A);
147}
148
149extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
150_mm_haddw_epu8(__m128i __A)
151{
152  return  (__m128i) __builtin_ia32_vphaddubw ((__v16qi)__A);
153}
154
155extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
156_mm_haddd_epu8(__m128i __A)
157{
158  return  (__m128i) __builtin_ia32_vphaddubd ((__v16qi)__A);
159}
160
161extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
162_mm_haddq_epu8(__m128i __A)
163{
164  return  (__m128i) __builtin_ia32_vphaddubq ((__v16qi)__A);
165}
166
167extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
168_mm_haddd_epu16(__m128i __A)
169{
170  return  (__m128i) __builtin_ia32_vphadduwd ((__v8hi)__A);
171}
172
173extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
174_mm_haddq_epu16(__m128i __A)
175{
176  return  (__m128i) __builtin_ia32_vphadduwq ((__v8hi)__A);
177}
178
179extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
180_mm_haddq_epu32(__m128i __A)
181{
182  return  (__m128i) __builtin_ia32_vphaddudq ((__v4si)__A);
183}
184
185extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
186_mm_hsubw_epi8(__m128i __A)
187{
188  return  (__m128i) __builtin_ia32_vphsubbw ((__v16qi)__A);
189}
190
191extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
192_mm_hsubd_epi16(__m128i __A)
193{
194  return  (__m128i) __builtin_ia32_vphsubwd ((__v8hi)__A);
195}
196
197extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
198_mm_hsubq_epi32(__m128i __A)
199{
200  return  (__m128i) __builtin_ia32_vphsubdq ((__v4si)__A);
201}
202
203/* Vector conditional move and permute */
204
205extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
206_mm_cmov_si128(__m128i __A, __m128i __B, __m128i __C)
207{
208  return  (__m128i) __builtin_ia32_vpcmov (__A, __B, __C);
209}
210
211extern __inline __m256i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
212_mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C)
213{
214  return  (__m256i) __builtin_ia32_vpcmov256 (__A, __B, __C);
215}
216
217extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
218_mm_perm_epi8(__m128i __A, __m128i __B, __m128i __C)
219{
220  return  (__m128i) __builtin_ia32_vpperm ((__v16qi)__A, (__v16qi)__B, (__v16qi)__C);
221}
222
223/* Packed Integer Rotates and Shifts
224   Rotates - Non-Immediate form */
225
226extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
227_mm_rot_epi8(__m128i __A,  __m128i __B)
228{
229  return  (__m128i) __builtin_ia32_vprotb ((__v16qi)__A, (__v16qi)__B);
230}
231
232extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
233_mm_rot_epi16(__m128i __A,  __m128i __B)
234{
235  return  (__m128i) __builtin_ia32_vprotw ((__v8hi)__A, (__v8hi)__B);
236}
237
238extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
239_mm_rot_epi32(__m128i __A,  __m128i __B)
240{
241  return  (__m128i) __builtin_ia32_vprotd ((__v4si)__A, (__v4si)__B);
242}
243
244extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
245_mm_rot_epi64(__m128i __A,  __m128i __B)
246{
247  return (__m128i)  __builtin_ia32_vprotq ((__v2di)__A, (__v2di)__B);
248}
249
250/* Rotates - Immediate form */
251
252#ifdef __OPTIMIZE__
253extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
254_mm_roti_epi8(__m128i __A, const int __B)
255{
256  return  (__m128i) __builtin_ia32_vprotbi ((__v16qi)__A, __B);
257}
258
259extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
260_mm_roti_epi16(__m128i __A, const int __B)
261{
262  return  (__m128i) __builtin_ia32_vprotwi ((__v8hi)__A, __B);
263}
264
265extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
266_mm_roti_epi32(__m128i __A, const int __B)
267{
268  return  (__m128i) __builtin_ia32_vprotdi ((__v4si)__A, __B);
269}
270
271extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
272_mm_roti_epi64(__m128i __A, const int __B)
273{
274  return  (__m128i) __builtin_ia32_vprotqi ((__v2di)__A, __B);
275}
276#else
277#define _mm_roti_epi8(A, N) \
278  ((__m128i) __builtin_ia32_vprotbi ((__v16qi)(__m128i)(A), (int)(N)))
279#define _mm_roti_epi16(A, N) \
280  ((__m128i) __builtin_ia32_vprotwi ((__v8hi)(__m128i)(A), (int)(N)))
281#define _mm_roti_epi32(A, N) \
282  ((__m128i) __builtin_ia32_vprotdi ((__v4si)(__m128i)(A), (int)(N)))
283#define _mm_roti_epi64(A, N) \
284  ((__m128i) __builtin_ia32_vprotqi ((__v2di)(__m128i)(A), (int)(N)))
285#endif
286
287/* Shifts */
288
289extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
290_mm_shl_epi8(__m128i __A,  __m128i __B)
291{
292  return  (__m128i) __builtin_ia32_vpshlb ((__v16qi)__A, (__v16qi)__B);
293}
294
295extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
296_mm_shl_epi16(__m128i __A,  __m128i __B)
297{
298  return  (__m128i) __builtin_ia32_vpshlw ((__v8hi)__A, (__v8hi)__B);
299}
300
301extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
302_mm_shl_epi32(__m128i __A,  __m128i __B)
303{
304  return  (__m128i) __builtin_ia32_vpshld ((__v4si)__A, (__v4si)__B);
305}
306
307extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
308_mm_shl_epi64(__m128i __A,  __m128i __B)
309{
310  return  (__m128i) __builtin_ia32_vpshlq ((__v2di)__A, (__v2di)__B);
311}
312
313
314extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
315_mm_sha_epi8(__m128i __A,  __m128i __B)
316{
317  return  (__m128i) __builtin_ia32_vpshab ((__v16qi)__A, (__v16qi)__B);
318}
319
320extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
321_mm_sha_epi16(__m128i __A,  __m128i __B)
322{
323  return  (__m128i) __builtin_ia32_vpshaw ((__v8hi)__A, (__v8hi)__B);
324}
325
326extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
327_mm_sha_epi32(__m128i __A,  __m128i __B)
328{
329  return  (__m128i) __builtin_ia32_vpshad ((__v4si)__A, (__v4si)__B);
330}
331
332extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
333_mm_sha_epi64(__m128i __A,  __m128i __B)
334{
335  return  (__m128i) __builtin_ia32_vpshaq ((__v2di)__A, (__v2di)__B);
336}
337
338/* Compare and Predicate Generation
339   pcom (integer, unsigned bytes) */
340
341extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
342_mm_comlt_epu8(__m128i __A, __m128i __B)
343{
344  return (__m128i) __builtin_ia32_vpcomltub ((__v16qi)__A, (__v16qi)__B);
345}
346
347extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
348_mm_comle_epu8(__m128i __A, __m128i __B)
349{
350  return (__m128i) __builtin_ia32_vpcomleub ((__v16qi)__A, (__v16qi)__B);
351}
352
353extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
354_mm_comgt_epu8(__m128i __A, __m128i __B)
355{
356  return (__m128i) __builtin_ia32_vpcomgtub ((__v16qi)__A, (__v16qi)__B);
357}
358
359extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
360_mm_comge_epu8(__m128i __A, __m128i __B)
361{
362  return (__m128i) __builtin_ia32_vpcomgeub ((__v16qi)__A, (__v16qi)__B);
363}
364
365extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
366_mm_comeq_epu8(__m128i __A, __m128i __B)
367{
368  return (__m128i) __builtin_ia32_vpcomequb ((__v16qi)__A, (__v16qi)__B);
369}
370
371extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
372_mm_comneq_epu8(__m128i __A, __m128i __B)
373{
374  return (__m128i) __builtin_ia32_vpcomnequb ((__v16qi)__A, (__v16qi)__B);
375}
376
377extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
378_mm_comfalse_epu8(__m128i __A, __m128i __B)
379{
380  return (__m128i) __builtin_ia32_vpcomfalseub ((__v16qi)__A, (__v16qi)__B);
381}
382
383extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
384_mm_comtrue_epu8(__m128i __A, __m128i __B)
385{
386  return (__m128i) __builtin_ia32_vpcomtrueub ((__v16qi)__A, (__v16qi)__B);
387}
388
389/*pcom (integer, unsigned words) */
390
391extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
392_mm_comlt_epu16(__m128i __A, __m128i __B)
393{
394  return (__m128i) __builtin_ia32_vpcomltuw ((__v8hi)__A, (__v8hi)__B);
395}
396
397extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
398_mm_comle_epu16(__m128i __A, __m128i __B)
399{
400  return (__m128i) __builtin_ia32_vpcomleuw ((__v8hi)__A, (__v8hi)__B);
401}
402
403extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
404_mm_comgt_epu16(__m128i __A, __m128i __B)
405{
406  return (__m128i) __builtin_ia32_vpcomgtuw ((__v8hi)__A, (__v8hi)__B);
407}
408
409extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
410_mm_comge_epu16(__m128i __A, __m128i __B)
411{
412  return (__m128i) __builtin_ia32_vpcomgeuw ((__v8hi)__A, (__v8hi)__B);
413}
414
415extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
416_mm_comeq_epu16(__m128i __A, __m128i __B)
417{
418  return (__m128i) __builtin_ia32_vpcomequw ((__v8hi)__A, (__v8hi)__B);
419}
420
421extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
422_mm_comneq_epu16(__m128i __A, __m128i __B)
423{
424  return (__m128i) __builtin_ia32_vpcomnequw ((__v8hi)__A, (__v8hi)__B);
425}
426
427extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
428_mm_comfalse_epu16(__m128i __A, __m128i __B)
429{
430  return (__m128i) __builtin_ia32_vpcomfalseuw ((__v8hi)__A, (__v8hi)__B);
431}
432
433extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
434_mm_comtrue_epu16(__m128i __A, __m128i __B)
435{
436  return (__m128i) __builtin_ia32_vpcomtrueuw ((__v8hi)__A, (__v8hi)__B);
437}
438
439/*pcom (integer, unsigned double words) */
440
441extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
442_mm_comlt_epu32(__m128i __A, __m128i __B)
443{
444  return (__m128i) __builtin_ia32_vpcomltud ((__v4si)__A, (__v4si)__B);
445}
446
447extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
448_mm_comle_epu32(__m128i __A, __m128i __B)
449{
450  return (__m128i) __builtin_ia32_vpcomleud ((__v4si)__A, (__v4si)__B);
451}
452
453extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
454_mm_comgt_epu32(__m128i __A, __m128i __B)
455{
456  return (__m128i) __builtin_ia32_vpcomgtud ((__v4si)__A, (__v4si)__B);
457}
458
459extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
460_mm_comge_epu32(__m128i __A, __m128i __B)
461{
462  return (__m128i) __builtin_ia32_vpcomgeud ((__v4si)__A, (__v4si)__B);
463}
464
465extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
466_mm_comeq_epu32(__m128i __A, __m128i __B)
467{
468  return (__m128i) __builtin_ia32_vpcomequd ((__v4si)__A, (__v4si)__B);
469}
470
471extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
472_mm_comneq_epu32(__m128i __A, __m128i __B)
473{
474  return (__m128i) __builtin_ia32_vpcomnequd ((__v4si)__A, (__v4si)__B);
475}
476
477extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
478_mm_comfalse_epu32(__m128i __A, __m128i __B)
479{
480  return (__m128i) __builtin_ia32_vpcomfalseud ((__v4si)__A, (__v4si)__B);
481}
482
483extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
484_mm_comtrue_epu32(__m128i __A, __m128i __B)
485{
486  return (__m128i) __builtin_ia32_vpcomtrueud ((__v4si)__A, (__v4si)__B);
487}
488
489/*pcom (integer, unsigned quad words) */
490
491extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
492_mm_comlt_epu64(__m128i __A, __m128i __B)
493{
494  return (__m128i) __builtin_ia32_vpcomltuq ((__v2di)__A, (__v2di)__B);
495}
496
497extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
498_mm_comle_epu64(__m128i __A, __m128i __B)
499{
500  return (__m128i) __builtin_ia32_vpcomleuq ((__v2di)__A, (__v2di)__B);
501}
502
503extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
504_mm_comgt_epu64(__m128i __A, __m128i __B)
505{
506  return (__m128i) __builtin_ia32_vpcomgtuq ((__v2di)__A, (__v2di)__B);
507}
508
509extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
510_mm_comge_epu64(__m128i __A, __m128i __B)
511{
512  return (__m128i) __builtin_ia32_vpcomgeuq ((__v2di)__A, (__v2di)__B);
513}
514
515extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
516_mm_comeq_epu64(__m128i __A, __m128i __B)
517{
518  return (__m128i) __builtin_ia32_vpcomequq ((__v2di)__A, (__v2di)__B);
519}
520
521extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
522_mm_comneq_epu64(__m128i __A, __m128i __B)
523{
524  return (__m128i) __builtin_ia32_vpcomnequq ((__v2di)__A, (__v2di)__B);
525}
526
527extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
528_mm_comfalse_epu64(__m128i __A, __m128i __B)
529{
530  return (__m128i) __builtin_ia32_vpcomfalseuq ((__v2di)__A, (__v2di)__B);
531}
532
533extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
534_mm_comtrue_epu64(__m128i __A, __m128i __B)
535{
536  return (__m128i) __builtin_ia32_vpcomtrueuq ((__v2di)__A, (__v2di)__B);
537}
538
539/*pcom (integer, signed bytes) */
540
541extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
542_mm_comlt_epi8(__m128i __A, __m128i __B)
543{
544  return (__m128i) __builtin_ia32_vpcomltb ((__v16qi)__A, (__v16qi)__B);
545}
546
547extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
548_mm_comle_epi8(__m128i __A, __m128i __B)
549{
550  return (__m128i) __builtin_ia32_vpcomleb ((__v16qi)__A, (__v16qi)__B);
551}
552
553extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
554_mm_comgt_epi8(__m128i __A, __m128i __B)
555{
556  return (__m128i) __builtin_ia32_vpcomgtb ((__v16qi)__A, (__v16qi)__B);
557}
558
559extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
560_mm_comge_epi8(__m128i __A, __m128i __B)
561{
562  return (__m128i) __builtin_ia32_vpcomgeb ((__v16qi)__A, (__v16qi)__B);
563}
564
565extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
566_mm_comeq_epi8(__m128i __A, __m128i __B)
567{
568  return (__m128i) __builtin_ia32_vpcomeqb ((__v16qi)__A, (__v16qi)__B);
569}
570
571extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
572_mm_comneq_epi8(__m128i __A, __m128i __B)
573{
574  return (__m128i) __builtin_ia32_vpcomneqb ((__v16qi)__A, (__v16qi)__B);
575}
576
577extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
578_mm_comfalse_epi8(__m128i __A, __m128i __B)
579{
580  return (__m128i) __builtin_ia32_vpcomfalseb ((__v16qi)__A, (__v16qi)__B);
581}
582
583extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
584_mm_comtrue_epi8(__m128i __A, __m128i __B)
585{
586  return (__m128i) __builtin_ia32_vpcomtrueb ((__v16qi)__A, (__v16qi)__B);
587}
588
589/*pcom (integer, signed words) */
590
591extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
592_mm_comlt_epi16(__m128i __A, __m128i __B)
593{
594  return (__m128i) __builtin_ia32_vpcomltw ((__v8hi)__A, (__v8hi)__B);
595}
596
597extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
598_mm_comle_epi16(__m128i __A, __m128i __B)
599{
600  return (__m128i) __builtin_ia32_vpcomlew ((__v8hi)__A, (__v8hi)__B);
601}
602
603extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
604_mm_comgt_epi16(__m128i __A, __m128i __B)
605{
606  return (__m128i) __builtin_ia32_vpcomgtw ((__v8hi)__A, (__v8hi)__B);
607}
608
609extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
610_mm_comge_epi16(__m128i __A, __m128i __B)
611{
612  return (__m128i) __builtin_ia32_vpcomgew ((__v8hi)__A, (__v8hi)__B);
613}
614
615extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
616_mm_comeq_epi16(__m128i __A, __m128i __B)
617{
618  return (__m128i) __builtin_ia32_vpcomeqw ((__v8hi)__A, (__v8hi)__B);
619}
620
621extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
622_mm_comneq_epi16(__m128i __A, __m128i __B)
623{
624  return (__m128i) __builtin_ia32_vpcomneqw ((__v8hi)__A, (__v8hi)__B);
625}
626
627extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
628_mm_comfalse_epi16(__m128i __A, __m128i __B)
629{
630  return (__m128i) __builtin_ia32_vpcomfalsew ((__v8hi)__A, (__v8hi)__B);
631}
632
633extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
634_mm_comtrue_epi16(__m128i __A, __m128i __B)
635{
636  return (__m128i) __builtin_ia32_vpcomtruew ((__v8hi)__A, (__v8hi)__B);
637}
638
639/*pcom (integer, signed double words) */
640
641extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
642_mm_comlt_epi32(__m128i __A, __m128i __B)
643{
644  return (__m128i) __builtin_ia32_vpcomltd ((__v4si)__A, (__v4si)__B);
645}
646
647extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
648_mm_comle_epi32(__m128i __A, __m128i __B)
649{
650  return (__m128i) __builtin_ia32_vpcomled ((__v4si)__A, (__v4si)__B);
651}
652
653extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
654_mm_comgt_epi32(__m128i __A, __m128i __B)
655{
656  return (__m128i) __builtin_ia32_vpcomgtd ((__v4si)__A, (__v4si)__B);
657}
658
659extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
660_mm_comge_epi32(__m128i __A, __m128i __B)
661{
662  return (__m128i) __builtin_ia32_vpcomged ((__v4si)__A, (__v4si)__B);
663}
664
665extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
666_mm_comeq_epi32(__m128i __A, __m128i __B)
667{
668  return (__m128i) __builtin_ia32_vpcomeqd ((__v4si)__A, (__v4si)__B);
669}
670
671extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
672_mm_comneq_epi32(__m128i __A, __m128i __B)
673{
674  return (__m128i) __builtin_ia32_vpcomneqd ((__v4si)__A, (__v4si)__B);
675}
676
677extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
678_mm_comfalse_epi32(__m128i __A, __m128i __B)
679{
680  return (__m128i) __builtin_ia32_vpcomfalsed ((__v4si)__A, (__v4si)__B);
681}
682
683extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
684_mm_comtrue_epi32(__m128i __A, __m128i __B)
685{
686  return (__m128i) __builtin_ia32_vpcomtrued ((__v4si)__A, (__v4si)__B);
687}
688
689/*pcom (integer, signed quad words) */
690
691extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
692_mm_comlt_epi64(__m128i __A, __m128i __B)
693{
694  return (__m128i) __builtin_ia32_vpcomltq ((__v2di)__A, (__v2di)__B);
695}
696
697extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
698_mm_comle_epi64(__m128i __A, __m128i __B)
699{
700  return (__m128i) __builtin_ia32_vpcomleq ((__v2di)__A, (__v2di)__B);
701}
702
703extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
704_mm_comgt_epi64(__m128i __A, __m128i __B)
705{
706  return (__m128i) __builtin_ia32_vpcomgtq ((__v2di)__A, (__v2di)__B);
707}
708
709extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
710_mm_comge_epi64(__m128i __A, __m128i __B)
711{
712  return (__m128i) __builtin_ia32_vpcomgeq ((__v2di)__A, (__v2di)__B);
713}
714
715extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
716_mm_comeq_epi64(__m128i __A, __m128i __B)
717{
718  return (__m128i) __builtin_ia32_vpcomeqq ((__v2di)__A, (__v2di)__B);
719}
720
721extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
722_mm_comneq_epi64(__m128i __A, __m128i __B)
723{
724  return (__m128i) __builtin_ia32_vpcomneqq ((__v2di)__A, (__v2di)__B);
725}
726
727extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
728_mm_comfalse_epi64(__m128i __A, __m128i __B)
729{
730  return (__m128i) __builtin_ia32_vpcomfalseq ((__v2di)__A, (__v2di)__B);
731}
732
733extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
734_mm_comtrue_epi64(__m128i __A, __m128i __B)
735{
736  return (__m128i) __builtin_ia32_vpcomtrueq ((__v2di)__A, (__v2di)__B);
737}
738
739/* FRCZ */
740
741extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
742_mm_frcz_ps (__m128 __A)
743{
744  return (__m128) __builtin_ia32_vfrczps ((__v4sf)__A);
745}
746
747extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
748_mm_frcz_pd (__m128d __A)
749{
750  return (__m128d) __builtin_ia32_vfrczpd ((__v2df)__A);
751}
752
753extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
754_mm_frcz_ss (__m128 __A, __m128 __B)
755{
756  return (__m128) __builtin_ia32_movss ((__v4sf)__A,
757					(__v4sf)
758					__builtin_ia32_vfrczss ((__v4sf)__B));
759}
760
761extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
762_mm_frcz_sd (__m128d __A, __m128d __B)
763{
764  return (__m128d) __builtin_ia32_movsd ((__v2df)__A,
765					 (__v2df)
766					 __builtin_ia32_vfrczsd ((__v2df)__B));
767}
768
769extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
770_mm256_frcz_ps (__m256 __A)
771{
772  return (__m256) __builtin_ia32_vfrczps256 ((__v8sf)__A);
773}
774
775extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
776_mm256_frcz_pd (__m256d __A)
777{
778  return (__m256d) __builtin_ia32_vfrczpd256 ((__v4df)__A);
779}
780
781/* PERMIL2 */
782
783#ifdef __OPTIMIZE__
784extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
785_mm_permute2_pd (__m128d __X, __m128d __Y, __m128i __C, const int __I)
786{
787  return (__m128d) __builtin_ia32_vpermil2pd ((__v2df)__X,
788					      (__v2df)__Y,
789					      (__v2di)__C,
790					      __I);
791}
792
793extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
794_mm256_permute2_pd (__m256d __X, __m256d __Y, __m256i __C, const int __I)
795{
796  return (__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)__X,
797						 (__v4df)__Y,
798						 (__v4di)__C,
799						 __I);
800}
801
802extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
803_mm_permute2_ps (__m128 __X, __m128 __Y, __m128i __C, const int __I)
804{
805  return (__m128) __builtin_ia32_vpermil2ps ((__v4sf)__X,
806					     (__v4sf)__Y,
807					     (__v4si)__C,
808					     __I);
809}
810
811extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
812_mm256_permute2_ps (__m256 __X, __m256 __Y, __m256i __C, const int __I)
813{
814  return (__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)__X,
815						(__v8sf)__Y,
816						(__v8si)__C,
817						__I);
818}
819#else
820#define _mm_permute2_pd(X, Y, C, I)					\
821  ((__m128d) __builtin_ia32_vpermil2pd ((__v2df)(__m128d)(X),		\
822					(__v2df)(__m128d)(Y),		\
823					(__v2di)(__m128i)(C),		\
824					(int)(I)))
825
826#define _mm256_permute2_pd(X, Y, C, I)					\
827  ((__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)(__m256d)(X),	\
828					   (__v4df)(__m256d)(Y),	\
829					   (__v4di)(__m256i)(C),	\
830					   (int)(I)))
831
832#define _mm_permute2_ps(X, Y, C, I)					\
833  ((__m128) __builtin_ia32_vpermil2ps ((__v4sf)(__m128)(X),		\
834				       (__v4sf)(__m128)(Y),		\
835				       (__v4si)(__m128i)(C),		\
836				       (int)(I)))
837
838#define _mm256_permute2_ps(X, Y, C, I)					\
839  ((__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)(__m256)(X),		\
840					  (__v8sf)(__m256)(Y),  	\
841					  (__v8si)(__m256i)(C),		\
842 					  (int)(I)))
843#endif /* __OPTIMIZE__ */
844
845#ifdef __DISABLE_XOP__
846#undef __DISABLE_XOP__
847#pragma GCC pop_options
848#endif /* __DISABLE_XOP__ */
849
850#endif /* _XOPMMINTRIN_H_INCLUDED */
851