1/* Copyright (C) 2017-2020 Free Software Foundation, Inc.
2
3   This file is part of GCC.
4
5   GCC is free software; you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation; either version 3, or (at your option)
8   any later version.
9
10   GCC is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14
15   Under Section 7 of GPL version 3, you are granted additional
16   permissions described in the GCC Runtime Library Exception, version
17   3.1, as published by the Free Software Foundation.
18
19   You should have received a copy of the GNU General Public License and
20   a copy of the GCC Runtime Library Exception along with this program;
21   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22   <http://www.gnu.org/licenses/>.  */
23
24#ifndef _IMMINTRIN_H_INCLUDED
25#error "Never use <gfniintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef _GFNIINTRIN_H_INCLUDED
29#define _GFNIINTRIN_H_INCLUDED
30
31#if !defined(__GFNI__) || !defined(__SSE2__)
32#pragma GCC push_options
33#pragma GCC target("gfni,sse2")
34#define __DISABLE_GFNI__
35#endif /* __GFNI__ */
36
37extern __inline __m128i
38__attribute__((__gnu_inline__, __always_inline__, __artificial__))
39_mm_gf2p8mul_epi8 (__m128i __A, __m128i __B)
40{
41  return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi((__v16qi) __A,
42						   (__v16qi) __B);
43}
44
45#ifdef __OPTIMIZE__
46extern __inline __m128i
47__attribute__((__gnu_inline__, __always_inline__, __artificial__))
48_mm_gf2p8affineinv_epi64_epi8 (__m128i __A, __m128i __B, const int __C)
49{
50  return (__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi ((__v16qi) __A,
51							   (__v16qi) __B,
52							    __C);
53}
54
55extern __inline __m128i
56__attribute__((__gnu_inline__, __always_inline__, __artificial__))
57_mm_gf2p8affine_epi64_epi8 (__m128i __A, __m128i __B, const int __C)
58{
59  return (__m128i) __builtin_ia32_vgf2p8affineqb_v16qi ((__v16qi) __A,
60							(__v16qi) __B, __C);
61}
62#else
63#define _mm_gf2p8affineinv_epi64_epi8(A, B, C)				   \
64  ((__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \
65					   (__v16qi)(__m128i)(B), (int)(C)))
66#define _mm_gf2p8affine_epi64_epi8(A, B, C)				   \
67  ((__m128i) __builtin_ia32_vgf2p8affineqb_v16qi ((__v16qi)(__m128i)(A),   \
68					   (__v16qi)(__m128i)(B), (int)(C)))
69#endif
70
71#ifdef __DISABLE_GFNI__
72#undef __DISABLE_GFNI__
73#pragma GCC pop_options
74#endif /* __DISABLE_GFNI__ */
75
76#if !defined(__GFNI__) || !defined(__AVX__)
77#pragma GCC push_options
78#pragma GCC target("gfni,avx")
79#define __DISABLE_GFNIAVX__
80#endif /* __GFNIAVX__ */
81
82extern __inline __m256i
83__attribute__((__gnu_inline__, __always_inline__, __artificial__))
84_mm256_gf2p8mul_epi8 (__m256i __A, __m256i __B)
85{
86  return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi ((__v32qi) __A,
87						    (__v32qi) __B);
88}
89
90#ifdef __OPTIMIZE__
91extern __inline __m256i
92__attribute__((__gnu_inline__, __always_inline__, __artificial__))
93_mm256_gf2p8affineinv_epi64_epi8 (__m256i __A, __m256i __B, const int __C)
94{
95  return (__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi ((__v32qi) __A,
96							   (__v32qi) __B,
97							    __C);
98}
99
100extern __inline __m256i
101__attribute__((__gnu_inline__, __always_inline__, __artificial__))
102_mm256_gf2p8affine_epi64_epi8 (__m256i __A, __m256i __B, const int __C)
103{
104  return (__m256i) __builtin_ia32_vgf2p8affineqb_v32qi ((__v32qi) __A,
105							(__v32qi) __B, __C);
106}
107#else
108#define _mm256_gf2p8affineinv_epi64_epi8(A, B, C)			   \
109  ((__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A), \
110						    (__v32qi)(__m256i)(B), \
111						    (int)(C)))
112#define _mm256_gf2p8affine_epi64_epi8(A, B, C)				   \
113  ((__m256i) __builtin_ia32_vgf2p8affineqb_v32qi ((__v32qi)(__m256i)(A),   \
114					(   __v32qi)(__m256i)(B), (int)(C)))
115#endif
116
117#ifdef __DISABLE_GFNIAVX__
118#undef __DISABLE_GFNIAVX__
119#pragma GCC pop_options
120#endif /* __GFNIAVX__ */
121
122#if !defined(__GFNI__) || !defined(__AVX512VL__)
123#pragma GCC push_options
124#pragma GCC target("gfni,avx512vl")
125#define __DISABLE_GFNIAVX512VL__
126#endif /* __GFNIAVX512VL__ */
127
128extern __inline __m128i
129__attribute__((__gnu_inline__, __always_inline__, __artificial__))
130_mm_mask_gf2p8mul_epi8 (__m128i __A, __mmask16 __B, __m128i __C, __m128i __D)
131{
132  return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi_mask ((__v16qi) __C,
133							 (__v16qi) __D,
134							 (__v16qi)__A, __B);
135}
136
137extern __inline __m128i
138__attribute__((__gnu_inline__, __always_inline__, __artificial__))
139_mm_maskz_gf2p8mul_epi8 (__mmask16 __A, __m128i __B, __m128i __C)
140{
141  return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi_mask ((__v16qi) __B,
142			(__v16qi) __C, (__v16qi) _mm_setzero_si128 (), __A);
143}
144
145#ifdef __OPTIMIZE__
146extern __inline __m128i
147__attribute__((__gnu_inline__, __always_inline__, __artificial__))
148_mm_mask_gf2p8affineinv_epi64_epi8 (__m128i __A, __mmask16 __B, __m128i __C,
149				    __m128i __D, const int __E)
150{
151  return (__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask ((__v16qi) __C,
152								(__v16qi) __D,
153								 __E,
154								(__v16qi)__A,
155								 __B);
156}
157
158extern __inline __m128i
159__attribute__((__gnu_inline__, __always_inline__, __artificial__))
160_mm_maskz_gf2p8affineinv_epi64_epi8 (__mmask16 __A, __m128i __B, __m128i __C,
161				     const int __D)
162{
163  return (__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask ((__v16qi) __B,
164						(__v16qi) __C, __D,
165						(__v16qi) _mm_setzero_si128 (),
166						 __A);
167}
168
169extern __inline __m128i
170__attribute__((__gnu_inline__, __always_inline__, __artificial__))
171_mm_mask_gf2p8affine_epi64_epi8 (__m128i __A, __mmask16 __B, __m128i __C,
172				 __m128i __D, const int __E)
173{
174  return (__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask ((__v16qi) __C,
175					(__v16qi) __D, __E, (__v16qi)__A, __B);
176}
177
178extern __inline __m128i
179__attribute__((__gnu_inline__, __always_inline__, __artificial__))
180_mm_maskz_gf2p8affine_epi64_epi8 (__mmask16 __A, __m128i __B, __m128i __C,
181				  const int __D)
182{
183  return (__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask ((__v16qi) __B,
184		     (__v16qi) __C, __D, (__v16qi) _mm_setzero_si128 (), __A);
185}
186#else
187#define _mm_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) 		   \
188  ((__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask(		   \
189			(__v16qi)(__m128i)(C), (__v16qi)(__m128i)(D),      \
190			(int)(E), (__v16qi)(__m128i)(A), (__mmask16)(B)))
191#define _mm_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D) \
192  ((__m128i) __builtin_ia32_vgf2p8affineinvqb_v16qi_mask(		   \
193			(__v16qi)(__m128i)(B), (__v16qi)(__m128i)(C),	   \
194			(int)(D), (__v16qi)(__m128i) _mm_setzero_si128 (), \
195			(__mmask16)(A)))
196#define _mm_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) \
197  ((__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask((__v16qi)(__m128i)(C),\
198      (__v16qi)(__m128i)(D), (int)(E), (__v16qi)(__m128i)(A), (__mmask16)(B)))
199#define _mm_maskz_gf2p8affine_epi64_epi8(A, B, C, D)			    \
200  ((__m128i) __builtin_ia32_vgf2p8affineqb_v16qi_mask((__v16qi)(__m128i)(B),\
201		(__v16qi)(__m128i)(C), (int)(D),			    \
202		(__v16qi)(__m128i) _mm_setzero_si128 (), (__mmask16)(A)))
203#endif
204
205#ifdef __DISABLE_GFNIAVX512VL__
206#undef __DISABLE_GFNIAVX512VL__
207#pragma GCC pop_options
208#endif /* __GFNIAVX512VL__ */
209
210#if !defined(__GFNI__) || !defined(__AVX512VL__) || !defined(__AVX512BW__)
211#pragma GCC push_options
212#pragma GCC target("gfni,avx512vl,avx512bw")
213#define __DISABLE_GFNIAVX512VLBW__
214#endif /* __GFNIAVX512VLBW__ */
215
216extern __inline __m256i
217__attribute__((__gnu_inline__, __always_inline__, __artificial__))
218_mm256_mask_gf2p8mul_epi8 (__m256i __A, __mmask32 __B, __m256i __C,
219			   __m256i __D)
220{
221  return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi_mask ((__v32qi) __C,
222							 (__v32qi) __D,
223							 (__v32qi)__A, __B);
224}
225
226extern __inline __m256i
227__attribute__((__gnu_inline__, __always_inline__, __artificial__))
228_mm256_maskz_gf2p8mul_epi8 (__mmask32 __A, __m256i __B, __m256i __C)
229{
230  return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi_mask ((__v32qi) __B,
231			(__v32qi) __C, (__v32qi) _mm256_setzero_si256 (), __A);
232}
233
234#ifdef __OPTIMIZE__
235extern __inline __m256i
236__attribute__((__gnu_inline__, __always_inline__, __artificial__))
237_mm256_mask_gf2p8affineinv_epi64_epi8 (__m256i __A, __mmask32 __B,
238				       __m256i __C, __m256i __D, const int __E)
239{
240  return (__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask ((__v32qi) __C,
241								(__v32qi) __D,
242							 	 __E,
243								(__v32qi)__A,
244								 __B);
245}
246
247extern __inline __m256i
248__attribute__((__gnu_inline__, __always_inline__, __artificial__))
249_mm256_maskz_gf2p8affineinv_epi64_epi8 (__mmask32 __A, __m256i __B,
250					__m256i __C, const int __D)
251{
252  return (__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask ((__v32qi) __B,
253				      (__v32qi) __C, __D,
254				      (__v32qi) _mm256_setzero_si256 (), __A);
255}
256
257extern __inline __m256i
258__attribute__((__gnu_inline__, __always_inline__, __artificial__))
259_mm256_mask_gf2p8affine_epi64_epi8 (__m256i __A, __mmask32 __B, __m256i __C,
260				    __m256i __D, const int __E)
261{
262  return (__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask ((__v32qi) __C,
263							     (__v32qi) __D,
264							      __E,
265							     (__v32qi)__A,
266							      __B);
267}
268
269extern __inline __m256i
270__attribute__((__gnu_inline__, __always_inline__, __artificial__))
271_mm256_maskz_gf2p8affine_epi64_epi8 (__mmask32 __A, __m256i __B,
272				     __m256i __C, const int __D)
273{
274  return (__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask ((__v32qi) __B,
275		(__v32qi) __C, __D, (__v32qi)_mm256_setzero_si256 (), __A);
276}
277#else
278#define _mm256_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E)		\
279  ((__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask(		\
280	(__v32qi)(__m256i)(C), (__v32qi)(__m256i)(D), (int)(E),		\
281	(__v32qi)(__m256i)(A), (__mmask32)(B)))
282#define _mm256_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D)		\
283  ((__m256i) __builtin_ia32_vgf2p8affineinvqb_v32qi_mask(		\
284	(__v32qi)(__m256i)(B), (__v32qi)(__m256i)(C), (int)(D),		\
285	(__v32qi)(__m256i) _mm256_setzero_si256 (), (__mmask32)(A)))
286#define _mm256_mask_gf2p8affine_epi64_epi8(A, B, C, D, E) 		    \
287  ((__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask((__v32qi)(__m256i)(C),\
288	(__v32qi)(__m256i)(D), (int)(E), (__v32qi)(__m256i)(A), (__mmask32)(B)))
289#define _mm256_maskz_gf2p8affine_epi64_epi8(A, B, C, D)			    \
290  ((__m256i) __builtin_ia32_vgf2p8affineqb_v32qi_mask((__v32qi)(__m256i)(B),\
291	 (__v32qi)(__m256i)(C), (int)(D),				    \
292	 (__v32qi)(__m256i) _mm256_setzero_si256 (), (__mmask32)(A)))
293#endif
294
295#ifdef __DISABLE_GFNIAVX512VLBW__
296#undef __DISABLE_GFNIAVX512VLBW__
297#pragma GCC pop_options
298#endif /* __GFNIAVX512VLBW__ */
299
300#if !defined(__GFNI__) || !defined(__AVX512F__) || !defined(__AVX512BW__)
301#pragma GCC push_options
302#pragma GCC target("gfni,avx512f,avx512bw")
303#define __DISABLE_GFNIAVX512FBW__
304#endif /* __GFNIAVX512FBW__ */
305
306extern __inline __m512i
307__attribute__((__gnu_inline__, __always_inline__, __artificial__))
308_mm512_mask_gf2p8mul_epi8 (__m512i __A, __mmask64 __B, __m512i __C,
309			   __m512i __D)
310{
311  return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi_mask ((__v64qi) __C,
312					(__v64qi) __D, (__v64qi)__A, __B);
313}
314
315extern __inline __m512i
316__attribute__((__gnu_inline__, __always_inline__, __artificial__))
317_mm512_maskz_gf2p8mul_epi8 (__mmask64 __A, __m512i __B, __m512i __C)
318{
319  return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi_mask ((__v64qi) __B,
320			(__v64qi) __C, (__v64qi) _mm512_setzero_si512 (), __A);
321}
322extern __inline __m512i
323__attribute__((__gnu_inline__, __always_inline__, __artificial__))
324_mm512_gf2p8mul_epi8 (__m512i __A, __m512i __B)
325{
326  return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi ((__v64qi) __A,
327						    (__v64qi) __B);
328}
329
330#ifdef __OPTIMIZE__
331extern __inline __m512i
332__attribute__((__gnu_inline__, __always_inline__, __artificial__))
333_mm512_mask_gf2p8affineinv_epi64_epi8 (__m512i __A, __mmask64 __B, __m512i __C,
334				       __m512i __D, const int __E)
335{
336  return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask ((__v64qi) __C,
337								(__v64qi) __D,
338								 __E,
339								(__v64qi)__A,
340								 __B);
341}
342
343extern __inline __m512i
344__attribute__((__gnu_inline__, __always_inline__, __artificial__))
345_mm512_maskz_gf2p8affineinv_epi64_epi8 (__mmask64 __A, __m512i __B,
346					__m512i __C, const int __D)
347{
348  return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask ((__v64qi) __B,
349				(__v64qi) __C, __D,
350				(__v64qi) _mm512_setzero_si512 (), __A);
351}
352
353extern __inline __m512i
354__attribute__((__gnu_inline__, __always_inline__, __artificial__))
355_mm512_gf2p8affineinv_epi64_epi8 (__m512i __A, __m512i __B, const int __C)
356{
357  return (__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi ((__v64qi) __A,
358							   (__v64qi) __B, __C);
359}
360
361extern __inline __m512i
362__attribute__((__gnu_inline__, __always_inline__, __artificial__))
363_mm512_mask_gf2p8affine_epi64_epi8 (__m512i __A, __mmask64 __B, __m512i __C,
364				    __m512i __D, const int __E)
365{
366  return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask ((__v64qi) __C,
367					(__v64qi) __D, __E, (__v64qi)__A, __B);
368}
369
370extern __inline __m512i
371__attribute__((__gnu_inline__, __always_inline__, __artificial__))
372_mm512_maskz_gf2p8affine_epi64_epi8 (__mmask64 __A, __m512i __B, __m512i __C,
373				     const int __D)
374{
375  return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask ((__v64qi) __B,
376		  (__v64qi) __C, __D, (__v64qi) _mm512_setzero_si512 (), __A);
377}
378extern __inline __m512i
379__attribute__((__gnu_inline__, __always_inline__, __artificial__))
380_mm512_gf2p8affine_epi64_epi8 (__m512i __A, __m512i __B, const int __C)
381{
382  return (__m512i) __builtin_ia32_vgf2p8affineqb_v64qi ((__v64qi) __A,
383							(__v64qi) __B, __C);
384}
385#else
386#define _mm512_mask_gf2p8affineinv_epi64_epi8(A, B, C, D, E) 		\
387  ((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask(		\
388	(__v64qi)(__m512i)(C), (__v64qi)(__m512i)(D), (int)(E),		\
389	(__v64qi)(__m512i)(A), (__mmask64)(B)))
390#define _mm512_maskz_gf2p8affineinv_epi64_epi8(A, B, C, D)		\
391  ((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi_mask(		\
392	(__v64qi)(__m512i)(B), (__v64qi)(__m512i)(C), (int)(D),		\
393	(__v64qi)(__m512i) _mm512_setzero_si512 (), (__mmask64)(A)))
394#define _mm512_gf2p8affineinv_epi64_epi8(A, B, C)			\
395  ((__m512i) __builtin_ia32_vgf2p8affineinvqb_v64qi (			\
396	(__v64qi)(__m512i)(A), (__v64qi)(__m512i)(B), (int)(C)))
397#define _mm512_mask_gf2p8affine_epi64_epi8(A, B, C, D, E)		    \
398  ((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask((__v64qi)(__m512i)(C),\
399     (__v64qi)(__m512i)(D), (int)(E), (__v64qi)(__m512i)(A), (__mmask64)(B)))
400#define _mm512_maskz_gf2p8affine_epi64_epi8(A, B, C, D)			    \
401  ((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi_mask((__v64qi)(__m512i)(B),\
402	 (__v64qi)(__m512i)(C), (int)(D),				    \
403	 (__v64qi)(__m512i) _mm512_setzero_si512 (), (__mmask64)(A)))
404#define _mm512_gf2p8affine_epi64_epi8(A, B, C)				    \
405  ((__m512i) __builtin_ia32_vgf2p8affineqb_v64qi ((__v64qi)(__m512i)(A),    \
406	 (__v64qi)(__m512i)(B), (int)(C)))
407#endif
408
409#ifdef __DISABLE_GFNIAVX512FBW__
410#undef __DISABLE_GFNIAVX512FBW__
411#pragma GCC pop_options
412#endif /* __GFNIAVX512FBW__ */
413
414#endif /* _GFNIINTRIN_H_INCLUDED */
415