1/*===---- avx512erintrin.h - AVX512ER intrinsics ---------------------------===
2 *
3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 * See https://llvm.org/LICENSE.txt for license information.
5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 *
7 *===-----------------------------------------------------------------------===
8 */
9#ifndef __IMMINTRIN_H
10#error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
11#endif
12
13#ifndef __AVX512ERINTRIN_H
14#define __AVX512ERINTRIN_H
15
16/* exp2a23 */
17#define _mm512_exp2a23_round_pd(A, R) \
18  (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
19                                      (__v8df)_mm512_setzero_pd(), \
20                                      (__mmask8)-1, (int)(R))
21
22#define _mm512_mask_exp2a23_round_pd(S, M, A, R) \
23  (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
24                                      (__v8df)(__m512d)(S), (__mmask8)(M), \
25                                      (int)(R))
26
27#define _mm512_maskz_exp2a23_round_pd(M, A, R) \
28  (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
29                                      (__v8df)_mm512_setzero_pd(), \
30                                      (__mmask8)(M), (int)(R))
31
32#define _mm512_exp2a23_pd(A) \
33  _mm512_exp2a23_round_pd((A), _MM_FROUND_CUR_DIRECTION)
34
35#define _mm512_mask_exp2a23_pd(S, M, A) \
36  _mm512_mask_exp2a23_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
37
38#define _mm512_maskz_exp2a23_pd(M, A) \
39  _mm512_maskz_exp2a23_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
40
41#define _mm512_exp2a23_round_ps(A, R) \
42  (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
43                                     (__v16sf)_mm512_setzero_ps(), \
44                                     (__mmask16)-1, (int)(R))
45
46#define _mm512_mask_exp2a23_round_ps(S, M, A, R) \
47  (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
48                                     (__v16sf)(__m512)(S), (__mmask16)(M), \
49                                     (int)(R))
50
51#define _mm512_maskz_exp2a23_round_ps(M, A, R) \
52  (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
53                                     (__v16sf)_mm512_setzero_ps(), \
54                                     (__mmask16)(M), (int)(R))
55
56#define _mm512_exp2a23_ps(A) \
57  _mm512_exp2a23_round_ps((A), _MM_FROUND_CUR_DIRECTION)
58
59#define _mm512_mask_exp2a23_ps(S, M, A) \
60  _mm512_mask_exp2a23_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
61
62#define _mm512_maskz_exp2a23_ps(M, A) \
63  _mm512_maskz_exp2a23_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
64
65/* rsqrt28 */
66#define _mm512_rsqrt28_round_pd(A, R) \
67  (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
68                                         (__v8df)_mm512_setzero_pd(), \
69                                         (__mmask8)-1, (int)(R))
70
71#define _mm512_mask_rsqrt28_round_pd(S, M, A, R) \
72  (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
73                                         (__v8df)(__m512d)(S), (__mmask8)(M), \
74                                         (int)(R))
75
76#define _mm512_maskz_rsqrt28_round_pd(M, A, R) \
77  (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
78                                         (__v8df)_mm512_setzero_pd(), \
79                                         (__mmask8)(M), (int)(R))
80
81#define _mm512_rsqrt28_pd(A) \
82  _mm512_rsqrt28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
83
84#define _mm512_mask_rsqrt28_pd(S, M, A) \
85  _mm512_mask_rsqrt28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
86
87#define _mm512_maskz_rsqrt28_pd(M, A) \
88  _mm512_maskz_rsqrt28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
89
90#define _mm512_rsqrt28_round_ps(A, R) \
91  (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
92                                        (__v16sf)_mm512_setzero_ps(), \
93                                        (__mmask16)-1, (int)(R))
94
95#define _mm512_mask_rsqrt28_round_ps(S, M, A, R) \
96  (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
97                                        (__v16sf)(__m512)(S), (__mmask16)(M), \
98                                        (int)(R))
99
100#define _mm512_maskz_rsqrt28_round_ps(M, A, R) \
101  (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
102                                        (__v16sf)_mm512_setzero_ps(), \
103                                        (__mmask16)(M), (int)(R))
104
105#define _mm512_rsqrt28_ps(A) \
106  _mm512_rsqrt28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
107
108#define _mm512_mask_rsqrt28_ps(S, M, A) \
109  _mm512_mask_rsqrt28_round_ps((S), (M), A, _MM_FROUND_CUR_DIRECTION)
110
111#define _mm512_maskz_rsqrt28_ps(M, A) \
112  _mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
113
114#define _mm_rsqrt28_round_ss(A, B, R) \
115  (__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
116                                              (__v4sf)(__m128)(B), \
117                                              (__v4sf)_mm_setzero_ps(), \
118                                              (__mmask8)-1, (int)(R))
119
120#define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) \
121  (__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
122                                              (__v4sf)(__m128)(B), \
123                                              (__v4sf)(__m128)(S), \
124                                              (__mmask8)(M), (int)(R))
125
126#define _mm_maskz_rsqrt28_round_ss(M, A, B, R) \
127  (__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
128                                              (__v4sf)(__m128)(B), \
129                                              (__v4sf)_mm_setzero_ps(), \
130                                              (__mmask8)(M), (int)(R))
131
132#define _mm_rsqrt28_ss(A, B) \
133  _mm_rsqrt28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
134
135#define _mm_mask_rsqrt28_ss(S, M, A, B) \
136  _mm_mask_rsqrt28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
137
138#define _mm_maskz_rsqrt28_ss(M, A, B) \
139  _mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
140
141#define _mm_rsqrt28_round_sd(A, B, R) \
142  (__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
143                                               (__v2df)(__m128d)(B), \
144                                               (__v2df)_mm_setzero_pd(), \
145                                               (__mmask8)-1, (int)(R))
146
147#define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) \
148  (__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
149                                               (__v2df)(__m128d)(B), \
150                                               (__v2df)(__m128d)(S), \
151                                               (__mmask8)(M), (int)(R))
152
153#define _mm_maskz_rsqrt28_round_sd(M, A, B, R) \
154  (__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
155                                               (__v2df)(__m128d)(B), \
156                                               (__v2df)_mm_setzero_pd(), \
157                                               (__mmask8)(M), (int)(R))
158
159#define _mm_rsqrt28_sd(A, B) \
160  _mm_rsqrt28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
161
162#define _mm_mask_rsqrt28_sd(S, M, A, B) \
163  _mm_mask_rsqrt28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
164
165#define _mm_maskz_rsqrt28_sd(M, A, B) \
166  _mm_maskz_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
167
168/* rcp28 */
169#define _mm512_rcp28_round_pd(A, R) \
170  (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
171                                       (__v8df)_mm512_setzero_pd(), \
172                                       (__mmask8)-1, (int)(R))
173
174#define _mm512_mask_rcp28_round_pd(S, M, A, R) \
175  (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
176                                       (__v8df)(__m512d)(S), (__mmask8)(M), \
177                                       (int)(R))
178
179#define _mm512_maskz_rcp28_round_pd(M, A, R) \
180  (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
181                                       (__v8df)_mm512_setzero_pd(), \
182                                       (__mmask8)(M), (int)(R))
183
184#define _mm512_rcp28_pd(A) \
185  _mm512_rcp28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
186
187#define _mm512_mask_rcp28_pd(S, M, A) \
188  _mm512_mask_rcp28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
189
190#define _mm512_maskz_rcp28_pd(M, A) \
191  _mm512_maskz_rcp28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
192
193#define _mm512_rcp28_round_ps(A, R) \
194  (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
195                                      (__v16sf)_mm512_setzero_ps(), \
196                                      (__mmask16)-1, (int)(R))
197
198#define _mm512_mask_rcp28_round_ps(S, M, A, R) \
199  (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
200                                      (__v16sf)(__m512)(S), (__mmask16)(M), \
201                                      (int)(R))
202
203#define _mm512_maskz_rcp28_round_ps(M, A, R) \
204  (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
205                                      (__v16sf)_mm512_setzero_ps(), \
206                                      (__mmask16)(M), (int)(R))
207
208#define _mm512_rcp28_ps(A) \
209  _mm512_rcp28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
210
211#define _mm512_mask_rcp28_ps(S, M, A) \
212  _mm512_mask_rcp28_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
213
214#define _mm512_maskz_rcp28_ps(M, A) \
215  _mm512_maskz_rcp28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
216
217#define _mm_rcp28_round_ss(A, B, R) \
218  (__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
219                                            (__v4sf)(__m128)(B), \
220                                            (__v4sf)_mm_setzero_ps(), \
221                                            (__mmask8)-1, (int)(R))
222
223#define _mm_mask_rcp28_round_ss(S, M, A, B, R) \
224  (__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
225                                            (__v4sf)(__m128)(B), \
226                                            (__v4sf)(__m128)(S), \
227                                            (__mmask8)(M), (int)(R))
228
229#define _mm_maskz_rcp28_round_ss(M, A, B, R) \
230  (__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
231                                            (__v4sf)(__m128)(B), \
232                                            (__v4sf)_mm_setzero_ps(), \
233                                            (__mmask8)(M), (int)(R))
234
235#define _mm_rcp28_ss(A, B) \
236  _mm_rcp28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
237
238#define _mm_mask_rcp28_ss(S, M, A, B) \
239  _mm_mask_rcp28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
240
241#define _mm_maskz_rcp28_ss(M, A, B) \
242  _mm_maskz_rcp28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
243
244#define _mm_rcp28_round_sd(A, B, R) \
245  (__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
246                                             (__v2df)(__m128d)(B), \
247                                             (__v2df)_mm_setzero_pd(), \
248                                             (__mmask8)-1, (int)(R))
249
250#define _mm_mask_rcp28_round_sd(S, M, A, B, R) \
251  (__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
252                                             (__v2df)(__m128d)(B), \
253                                             (__v2df)(__m128d)(S), \
254                                             (__mmask8)(M), (int)(R))
255
256#define _mm_maskz_rcp28_round_sd(M, A, B, R) \
257  (__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
258                                             (__v2df)(__m128d)(B), \
259                                             (__v2df)_mm_setzero_pd(), \
260                                             (__mmask8)(M), (int)(R))
261
262#define _mm_rcp28_sd(A, B) \
263  _mm_rcp28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
264
265#define _mm_mask_rcp28_sd(S, M, A, B) \
266  _mm_mask_rcp28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
267
268#define _mm_maskz_rcp28_sd(M, A, B) \
269  _mm_maskz_rcp28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
270
271#endif /* __AVX512ERINTRIN_H */
272