1/*===---- avx512erintrin.h - AVX512ER intrinsics ---------------------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9#ifndef __IMMINTRIN_H 10#error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead." 11#endif 12 13#ifndef __AVX512ERINTRIN_H 14#define __AVX512ERINTRIN_H 15 16/* exp2a23 */ 17#define _mm512_exp2a23_round_pd(A, R) \ 18 (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ 19 (__v8df)_mm512_setzero_pd(), \ 20 (__mmask8)-1, (int)(R)) 21 22#define _mm512_mask_exp2a23_round_pd(S, M, A, R) \ 23 (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ 24 (__v8df)(__m512d)(S), (__mmask8)(M), \ 25 (int)(R)) 26 27#define _mm512_maskz_exp2a23_round_pd(M, A, R) \ 28 (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ 29 (__v8df)_mm512_setzero_pd(), \ 30 (__mmask8)(M), (int)(R)) 31 32#define _mm512_exp2a23_pd(A) \ 33 _mm512_exp2a23_round_pd((A), _MM_FROUND_CUR_DIRECTION) 34 35#define _mm512_mask_exp2a23_pd(S, M, A) \ 36 _mm512_mask_exp2a23_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION) 37 38#define _mm512_maskz_exp2a23_pd(M, A) \ 39 _mm512_maskz_exp2a23_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION) 40 41#define _mm512_exp2a23_round_ps(A, R) \ 42 (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ 43 (__v16sf)_mm512_setzero_ps(), \ 44 (__mmask16)-1, (int)(R)) 45 46#define _mm512_mask_exp2a23_round_ps(S, M, A, R) \ 47 (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ 48 (__v16sf)(__m512)(S), (__mmask16)(M), \ 49 (int)(R)) 50 51#define _mm512_maskz_exp2a23_round_ps(M, A, R) \ 52 (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ 53 (__v16sf)_mm512_setzero_ps(), \ 54 (__mmask16)(M), (int)(R)) 55 56#define _mm512_exp2a23_ps(A) \ 57 _mm512_exp2a23_round_ps((A), _MM_FROUND_CUR_DIRECTION) 58 59#define _mm512_mask_exp2a23_ps(S, M, A) \ 60 _mm512_mask_exp2a23_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION) 61 62#define _mm512_maskz_exp2a23_ps(M, A) \ 63 _mm512_maskz_exp2a23_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) 64 65/* rsqrt28 */ 66#define _mm512_rsqrt28_round_pd(A, R) \ 67 (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ 68 (__v8df)_mm512_setzero_pd(), \ 69 (__mmask8)-1, (int)(R)) 70 71#define _mm512_mask_rsqrt28_round_pd(S, M, A, R) \ 72 (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ 73 (__v8df)(__m512d)(S), (__mmask8)(M), \ 74 (int)(R)) 75 76#define _mm512_maskz_rsqrt28_round_pd(M, A, R) \ 77 (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ 78 (__v8df)_mm512_setzero_pd(), \ 79 (__mmask8)(M), (int)(R)) 80 81#define _mm512_rsqrt28_pd(A) \ 82 _mm512_rsqrt28_round_pd((A), _MM_FROUND_CUR_DIRECTION) 83 84#define _mm512_mask_rsqrt28_pd(S, M, A) \ 85 _mm512_mask_rsqrt28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION) 86 87#define _mm512_maskz_rsqrt28_pd(M, A) \ 88 _mm512_maskz_rsqrt28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION) 89 90#define _mm512_rsqrt28_round_ps(A, R) \ 91 (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ 92 (__v16sf)_mm512_setzero_ps(), \ 93 (__mmask16)-1, (int)(R)) 94 95#define _mm512_mask_rsqrt28_round_ps(S, M, A, R) \ 96 (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ 97 (__v16sf)(__m512)(S), (__mmask16)(M), \ 98 (int)(R)) 99 100#define _mm512_maskz_rsqrt28_round_ps(M, A, R) \ 101 (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ 102 (__v16sf)_mm512_setzero_ps(), \ 103 (__mmask16)(M), (int)(R)) 104 105#define _mm512_rsqrt28_ps(A) \ 106 _mm512_rsqrt28_round_ps((A), _MM_FROUND_CUR_DIRECTION) 107 108#define _mm512_mask_rsqrt28_ps(S, M, A) \ 109 _mm512_mask_rsqrt28_round_ps((S), (M), A, _MM_FROUND_CUR_DIRECTION) 110 111#define _mm512_maskz_rsqrt28_ps(M, A) \ 112 _mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) 113 114#define _mm_rsqrt28_round_ss(A, B, R) \ 115 (__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \ 116 (__v4sf)(__m128)(B), \ 117 (__v4sf)_mm_setzero_ps(), \ 118 (__mmask8)-1, (int)(R)) 119 120#define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) \ 121 (__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \ 122 (__v4sf)(__m128)(B), \ 123 (__v4sf)(__m128)(S), \ 124 (__mmask8)(M), (int)(R)) 125 126#define _mm_maskz_rsqrt28_round_ss(M, A, B, R) \ 127 (__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \ 128 (__v4sf)(__m128)(B), \ 129 (__v4sf)_mm_setzero_ps(), \ 130 (__mmask8)(M), (int)(R)) 131 132#define _mm_rsqrt28_ss(A, B) \ 133 _mm_rsqrt28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION) 134 135#define _mm_mask_rsqrt28_ss(S, M, A, B) \ 136 _mm_mask_rsqrt28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION) 137 138#define _mm_maskz_rsqrt28_ss(M, A, B) \ 139 _mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION) 140 141#define _mm_rsqrt28_round_sd(A, B, R) \ 142 (__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \ 143 (__v2df)(__m128d)(B), \ 144 (__v2df)_mm_setzero_pd(), \ 145 (__mmask8)-1, (int)(R)) 146 147#define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) \ 148 (__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \ 149 (__v2df)(__m128d)(B), \ 150 (__v2df)(__m128d)(S), \ 151 (__mmask8)(M), (int)(R)) 152 153#define _mm_maskz_rsqrt28_round_sd(M, A, B, R) \ 154 (__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \ 155 (__v2df)(__m128d)(B), \ 156 (__v2df)_mm_setzero_pd(), \ 157 (__mmask8)(M), (int)(R)) 158 159#define _mm_rsqrt28_sd(A, B) \ 160 _mm_rsqrt28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION) 161 162#define _mm_mask_rsqrt28_sd(S, M, A, B) \ 163 _mm_mask_rsqrt28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION) 164 165#define _mm_maskz_rsqrt28_sd(M, A, B) \ 166 _mm_maskz_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION) 167 168/* rcp28 */ 169#define _mm512_rcp28_round_pd(A, R) \ 170 (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ 171 (__v8df)_mm512_setzero_pd(), \ 172 (__mmask8)-1, (int)(R)) 173 174#define _mm512_mask_rcp28_round_pd(S, M, A, R) \ 175 (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ 176 (__v8df)(__m512d)(S), (__mmask8)(M), \ 177 (int)(R)) 178 179#define _mm512_maskz_rcp28_round_pd(M, A, R) \ 180 (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ 181 (__v8df)_mm512_setzero_pd(), \ 182 (__mmask8)(M), (int)(R)) 183 184#define _mm512_rcp28_pd(A) \ 185 _mm512_rcp28_round_pd((A), _MM_FROUND_CUR_DIRECTION) 186 187#define _mm512_mask_rcp28_pd(S, M, A) \ 188 _mm512_mask_rcp28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION) 189 190#define _mm512_maskz_rcp28_pd(M, A) \ 191 _mm512_maskz_rcp28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION) 192 193#define _mm512_rcp28_round_ps(A, R) \ 194 (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ 195 (__v16sf)_mm512_setzero_ps(), \ 196 (__mmask16)-1, (int)(R)) 197 198#define _mm512_mask_rcp28_round_ps(S, M, A, R) \ 199 (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ 200 (__v16sf)(__m512)(S), (__mmask16)(M), \ 201 (int)(R)) 202 203#define _mm512_maskz_rcp28_round_ps(M, A, R) \ 204 (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ 205 (__v16sf)_mm512_setzero_ps(), \ 206 (__mmask16)(M), (int)(R)) 207 208#define _mm512_rcp28_ps(A) \ 209 _mm512_rcp28_round_ps((A), _MM_FROUND_CUR_DIRECTION) 210 211#define _mm512_mask_rcp28_ps(S, M, A) \ 212 _mm512_mask_rcp28_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION) 213 214#define _mm512_maskz_rcp28_ps(M, A) \ 215 _mm512_maskz_rcp28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) 216 217#define _mm_rcp28_round_ss(A, B, R) \ 218 (__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \ 219 (__v4sf)(__m128)(B), \ 220 (__v4sf)_mm_setzero_ps(), \ 221 (__mmask8)-1, (int)(R)) 222 223#define _mm_mask_rcp28_round_ss(S, M, A, B, R) \ 224 (__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \ 225 (__v4sf)(__m128)(B), \ 226 (__v4sf)(__m128)(S), \ 227 (__mmask8)(M), (int)(R)) 228 229#define _mm_maskz_rcp28_round_ss(M, A, B, R) \ 230 (__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \ 231 (__v4sf)(__m128)(B), \ 232 (__v4sf)_mm_setzero_ps(), \ 233 (__mmask8)(M), (int)(R)) 234 235#define _mm_rcp28_ss(A, B) \ 236 _mm_rcp28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION) 237 238#define _mm_mask_rcp28_ss(S, M, A, B) \ 239 _mm_mask_rcp28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION) 240 241#define _mm_maskz_rcp28_ss(M, A, B) \ 242 _mm_maskz_rcp28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION) 243 244#define _mm_rcp28_round_sd(A, B, R) \ 245 (__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \ 246 (__v2df)(__m128d)(B), \ 247 (__v2df)_mm_setzero_pd(), \ 248 (__mmask8)-1, (int)(R)) 249 250#define _mm_mask_rcp28_round_sd(S, M, A, B, R) \ 251 (__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \ 252 (__v2df)(__m128d)(B), \ 253 (__v2df)(__m128d)(S), \ 254 (__mmask8)(M), (int)(R)) 255 256#define _mm_maskz_rcp28_round_sd(M, A, B, R) \ 257 (__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \ 258 (__v2df)(__m128d)(B), \ 259 (__v2df)_mm_setzero_pd(), \ 260 (__mmask8)(M), (int)(R)) 261 262#define _mm_rcp28_sd(A, B) \ 263 _mm_rcp28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION) 264 265#define _mm_mask_rcp28_sd(S, M, A, B) \ 266 _mm_mask_rcp28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION) 267 268#define _mm_maskz_rcp28_sd(M, A, B) \ 269 _mm_maskz_rcp28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION) 270 271#endif /* __AVX512ERINTRIN_H */ 272