1/* Copyright (C) 2013-2015 Free Software Foundation, Inc.
2
3   This file is part of GCC.
4
5   GCC is free software; you can redistribute it and/or modify
6   it under the terms of the GNU General Public License as published by
7   the Free Software Foundation; either version 3, or (at your option)
8   any later version.
9
10   GCC is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   GNU General Public License for more details.
14
15   Under Section 7 of GPL version 3, you are granted additional
16   permissions described in the GCC Runtime Library Exception, version
17   3.1, as published by the Free Software Foundation.
18
19   You should have received a copy of the GNU General Public License and
20   a copy of the GCC Runtime Library Exception along with this program;
21   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22   <http://www.gnu.org/licenses/>.  */
23
24#ifndef _IMMINTRIN_H_INCLUDED
25#error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
26#endif
27
28#ifndef _AVX512ERINTRIN_H_INCLUDED
29#define _AVX512ERINTRIN_H_INCLUDED
30
31#ifndef __AVX512ER__
32#pragma GCC push_options
33#pragma GCC target("avx512er")
34#define __DISABLE_AVX512ER__
35#endif /* __AVX512ER__ */
36
37/* Internal data types for implementing the intrinsics.  */
38typedef double __v8df __attribute__ ((__vector_size__ (64)));
39typedef float __v16sf __attribute__ ((__vector_size__ (64)));
40
41/* The Intel API is flexible enough that we must allow aliasing with other
42   vector types, and their scalar components.  */
43typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
44typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
45
46typedef unsigned char  __mmask8;
47typedef unsigned short __mmask16;
48
49#ifdef __OPTIMIZE__
50extern __inline __m512d
51__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
52_mm512_exp2a23_round_pd (__m512d __A, int __R)
53{
54  __m512d __W;
55  return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
56					       (__v8df) __W,
57					       (__mmask8) -1, __R);
58}
59
60extern __inline __m512d
61__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
62_mm512_mask_exp2a23_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
63{
64  return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
65					       (__v8df) __W,
66					       (__mmask8) __U, __R);
67}
68
69extern __inline __m512d
70__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
71_mm512_maskz_exp2a23_round_pd (__mmask8 __U, __m512d __A, int __R)
72{
73  return (__m512d) __builtin_ia32_exp2pd_mask ((__v8df) __A,
74					       (__v8df) _mm512_setzero_pd (),
75					       (__mmask8) __U, __R);
76}
77
78extern __inline __m512
79__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
80_mm512_exp2a23_round_ps (__m512 __A, int __R)
81{
82  __m512 __W;
83  return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
84					      (__v16sf) __W,
85					      (__mmask16) -1, __R);
86}
87
88extern __inline __m512
89__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
90_mm512_mask_exp2a23_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
91{
92  return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
93					      (__v16sf) __W,
94					      (__mmask16) __U, __R);
95}
96
97extern __inline __m512
98__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
99_mm512_maskz_exp2a23_round_ps (__mmask16 __U, __m512 __A, int __R)
100{
101  return (__m512) __builtin_ia32_exp2ps_mask ((__v16sf) __A,
102					      (__v16sf) _mm512_setzero_ps (),
103					      (__mmask16) __U, __R);
104}
105
106extern __inline __m512d
107__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
108_mm512_rcp28_round_pd (__m512d __A, int __R)
109{
110  __m512d __W;
111  return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
112						(__v8df) __W,
113						(__mmask8) -1, __R);
114}
115
116extern __inline __m512d
117__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
118_mm512_mask_rcp28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
119{
120  return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
121						(__v8df) __W,
122						(__mmask8) __U, __R);
123}
124
125extern __inline __m512d
126__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
127_mm512_maskz_rcp28_round_pd (__mmask8 __U, __m512d __A, int __R)
128{
129  return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
130						(__v8df) _mm512_setzero_pd (),
131						(__mmask8) __U, __R);
132}
133
134extern __inline __m512
135__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
136_mm512_rcp28_round_ps (__m512 __A, int __R)
137{
138  __m512 __W;
139  return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
140					       (__v16sf) __W,
141					       (__mmask16) -1, __R);
142}
143
144extern __inline __m512
145__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
146_mm512_mask_rcp28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
147{
148  return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
149					       (__v16sf) __W,
150					       (__mmask16) __U, __R);
151}
152
153extern __inline __m512
154__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
155_mm512_maskz_rcp28_round_ps (__mmask16 __U, __m512 __A, int __R)
156{
157  return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
158					       (__v16sf) _mm512_setzero_ps (),
159					       (__mmask16) __U, __R);
160}
161
162extern __inline __m128d
163__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
164_mm_rcp28_round_sd (__m128d __A, __m128d __B, int __R)
165{
166  return (__m128d) __builtin_ia32_rcp28sd_round ((__v2df) __B,
167						 (__v2df) __A,
168						 __R);
169}
170
171extern __inline __m128
172__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
173_mm_rcp28_round_ss (__m128 __A, __m128 __B, int __R)
174{
175  return (__m128) __builtin_ia32_rcp28ss_round ((__v4sf) __B,
176						(__v4sf) __A,
177						__R);
178}
179
180extern __inline __m512d
181__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
182_mm512_rsqrt28_round_pd (__m512d __A, int __R)
183{
184  __m512d __W;
185  return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
186						  (__v8df) __W,
187						  (__mmask8) -1, __R);
188}
189
190extern __inline __m512d
191__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
192_mm512_mask_rsqrt28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
193{
194  return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
195						  (__v8df) __W,
196						  (__mmask8) __U, __R);
197}
198
199extern __inline __m512d
200__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
201_mm512_maskz_rsqrt28_round_pd (__mmask8 __U, __m512d __A, int __R)
202{
203  return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
204						  (__v8df) _mm512_setzero_pd (),
205						  (__mmask8) __U, __R);
206}
207
208extern __inline __m512
209__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
210_mm512_rsqrt28_round_ps (__m512 __A, int __R)
211{
212  __m512 __W;
213  return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
214						 (__v16sf) __W,
215						 (__mmask16) -1, __R);
216}
217
218extern __inline __m512
219__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
220_mm512_mask_rsqrt28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
221{
222  return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
223						 (__v16sf) __W,
224						 (__mmask16) __U, __R);
225}
226
227extern __inline __m512
228__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
229_mm512_maskz_rsqrt28_round_ps (__mmask16 __U, __m512 __A, int __R)
230{
231  return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
232						 (__v16sf) _mm512_setzero_ps (),
233						 (__mmask16) __U, __R);
234}
235
236extern __inline __m128d
237__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
238_mm_rsqrt28_round_sd (__m128d __A, __m128d __B, int __R)
239{
240  return (__m128d) __builtin_ia32_rsqrt28sd_round ((__v2df) __B,
241						   (__v2df) __A,
242						   __R);
243}
244
245extern __inline __m128
246__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
247_mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R)
248{
249  return (__m128) __builtin_ia32_rsqrt28ss_round ((__v4sf) __B,
250						  (__v4sf) __A,
251						  __R);
252}
253
254#else
255#define _mm512_exp2a23_round_pd(A, C)            \
256    __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
257
258#define _mm512_mask_exp2a23_round_pd(W, U, A, C) \
259    __builtin_ia32_exp2pd_mask(A, W, U, C)
260
261#define _mm512_maskz_exp2a23_round_pd(U, A, C)   \
262    __builtin_ia32_exp2pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
263
264#define _mm512_exp2a23_round_ps(A, C)            \
265    __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
266
267#define _mm512_mask_exp2a23_round_ps(W, U, A, C) \
268    __builtin_ia32_exp2ps_mask(A, W, U, C)
269
270#define _mm512_maskz_exp2a23_round_ps(U, A, C)   \
271    __builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
272
273#define _mm512_rcp28_round_pd(A, C)            \
274    __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
275
276#define _mm512_mask_rcp28_round_pd(W, U, A, C) \
277    __builtin_ia32_rcp28pd_mask(A, W, U, C)
278
279#define _mm512_maskz_rcp28_round_pd(U, A, C)   \
280    __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
281
282#define _mm512_rcp28_round_ps(A, C)            \
283    __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
284
285#define _mm512_mask_rcp28_round_ps(W, U, A, C) \
286    __builtin_ia32_rcp28ps_mask(A, W, U, C)
287
288#define _mm512_maskz_rcp28_round_ps(U, A, C)   \
289    __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
290
291#define _mm512_rsqrt28_round_pd(A, C)            \
292    __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
293
294#define _mm512_mask_rsqrt28_round_pd(W, U, A, C) \
295    __builtin_ia32_rsqrt28pd_mask(A, W, U, C)
296
297#define _mm512_maskz_rsqrt28_round_pd(U, A, C)   \
298    __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
299
300#define _mm512_rsqrt28_round_ps(A, C)            \
301    __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
302
303#define _mm512_mask_rsqrt28_round_ps(W, U, A, C) \
304    __builtin_ia32_rsqrt28ps_mask(A, W, U, C)
305
306#define _mm512_maskz_rsqrt28_round_ps(U, A, C)   \
307    __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
308
309#define _mm_rcp28_round_sd(A, B, R)	\
310    __builtin_ia32_rcp28sd_round(A, B, R)
311
312#define _mm_rcp28_round_ss(A, B, R)	\
313    __builtin_ia32_rcp28ss_round(A, B, R)
314
315#define _mm_rsqrt28_round_sd(A, B, R)	\
316    __builtin_ia32_rsqrt28sd_round(A, B, R)
317
318#define _mm_rsqrt28_round_ss(A, B, R)	\
319    __builtin_ia32_rsqrt28ss_round(A, B, R)
320
321#endif
322
323#define _mm512_exp2a23_pd(A)                    \
324    _mm512_exp2a23_round_pd(A, _MM_FROUND_CUR_DIRECTION)
325
326#define _mm512_mask_exp2a23_pd(W, U, A)   \
327    _mm512_mask_exp2a23_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
328
329#define _mm512_maskz_exp2a23_pd(U, A)     \
330    _mm512_maskz_exp2a23_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
331
332#define _mm512_exp2a23_ps(A)                    \
333    _mm512_exp2a23_round_ps(A, _MM_FROUND_CUR_DIRECTION)
334
335#define _mm512_mask_exp2a23_ps(W, U, A)   \
336    _mm512_mask_exp2a23_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
337
338#define _mm512_maskz_exp2a23_ps(U, A)     \
339    _mm512_maskz_exp2a23_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
340
341#define _mm512_rcp28_pd(A)                    \
342    _mm512_rcp28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
343
344#define _mm512_mask_rcp28_pd(W, U, A)   \
345    _mm512_mask_rcp28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
346
347#define _mm512_maskz_rcp28_pd(U, A)     \
348    _mm512_maskz_rcp28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
349
350#define _mm512_rcp28_ps(A)                    \
351    _mm512_rcp28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
352
353#define _mm512_mask_rcp28_ps(W, U, A)   \
354    _mm512_mask_rcp28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
355
356#define _mm512_maskz_rcp28_ps(U, A)     \
357    _mm512_maskz_rcp28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
358
359#define _mm512_rsqrt28_pd(A)                    \
360    _mm512_rsqrt28_round_pd(A, _MM_FROUND_CUR_DIRECTION)
361
362#define _mm512_mask_rsqrt28_pd(W, U, A)   \
363    _mm512_mask_rsqrt28_round_pd(W, U, A, _MM_FROUND_CUR_DIRECTION)
364
365#define _mm512_maskz_rsqrt28_pd(U, A)     \
366    _mm512_maskz_rsqrt28_round_pd(U, A, _MM_FROUND_CUR_DIRECTION)
367
368#define _mm512_rsqrt28_ps(A)                    \
369    _mm512_rsqrt28_round_ps(A, _MM_FROUND_CUR_DIRECTION)
370
371#define _mm512_mask_rsqrt28_ps(W, U, A)   \
372    _mm512_mask_rsqrt28_round_ps(W, U, A, _MM_FROUND_CUR_DIRECTION)
373
374#define _mm512_maskz_rsqrt28_ps(U, A)     \
375    _mm512_maskz_rsqrt28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
376
377#define _mm_rcp28_sd(A, B)	\
378    __builtin_ia32_rcp28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
379
380#define _mm_rcp28_ss(A, B)	\
381    __builtin_ia32_rcp28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
382
383#define _mm_rsqrt28_sd(A, B)	\
384    __builtin_ia32_rsqrt28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
385
386#define _mm_rsqrt28_ss(A, B)	\
387    __builtin_ia32_rsqrt28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
388
389#ifdef __DISABLE_AVX512ER__
390#undef __DISABLE_AVX512ER__
391#pragma GCC pop_options
392#endif /* __DISABLE_AVX512ER__ */
393
394#endif /* _AVX512ERINTRIN_H_INCLUDED */
395