avx512erintrin.h revision 353358
1317027Sdim/*===---- avx512erintrin.h - AVX512ER intrinsics ---------------------------=== 2317027Sdim * 3317027Sdim * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4317027Sdim * See https://llvm.org/LICENSE.txt for license information. 5317027Sdim * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6317027Sdim * 7317027Sdim *===-----------------------------------------------------------------------=== 8317027Sdim */ 9317027Sdim#ifndef __IMMINTRIN_H 10317027Sdim#error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead." 11317027Sdim#endif 12317027Sdim 13317027Sdim#ifndef __AVX512ERINTRIN_H 14317027Sdim#define __AVX512ERINTRIN_H 15317027Sdim 16317027Sdim/* exp2a23 */ 17317027Sdim#define _mm512_exp2a23_round_pd(A, R) \ 18317027Sdim (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ 19317027Sdim (__v8df)_mm512_setzero_pd(), \ 20317027Sdim (__mmask8)-1, (int)(R)) 21317027Sdim 22317027Sdim#define _mm512_mask_exp2a23_round_pd(S, M, A, R) \ 23317027Sdim (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ 24317027Sdim (__v8df)(__m512d)(S), (__mmask8)(M), \ 25317027Sdim (int)(R)) 26317027Sdim 27317027Sdim#define _mm512_maskz_exp2a23_round_pd(M, A, R) \ 28317027Sdim (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ 29317027Sdim (__v8df)_mm512_setzero_pd(), \ 30317027Sdim (__mmask8)(M), (int)(R)) 31317027Sdim 32317027Sdim#define _mm512_exp2a23_pd(A) \ 33317027Sdim _mm512_exp2a23_round_pd((A), _MM_FROUND_CUR_DIRECTION) 34317027Sdim 35317027Sdim#define _mm512_mask_exp2a23_pd(S, M, A) \ 36317027Sdim _mm512_mask_exp2a23_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION) 37317027Sdim 38317027Sdim#define _mm512_maskz_exp2a23_pd(M, A) \ 39317027Sdim _mm512_maskz_exp2a23_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION) 40317027Sdim 41317027Sdim#define _mm512_exp2a23_round_ps(A, R) \ 42317027Sdim (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ 43317027Sdim (__v16sf)_mm512_setzero_ps(), \ 44317027Sdim (__mmask16)-1, (int)(R)) 45317027Sdim 46317027Sdim#define _mm512_mask_exp2a23_round_ps(S, M, A, R) \ 47317027Sdim (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ 48317027Sdim (__v16sf)(__m512)(S), (__mmask16)(M), \ 49317027Sdim (int)(R)) 50317027Sdim 51317027Sdim#define _mm512_maskz_exp2a23_round_ps(M, A, R) \ 52317027Sdim (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ 53317027Sdim (__v16sf)_mm512_setzero_ps(), \ 54317027Sdim (__mmask16)(M), (int)(R)) 55317027Sdim 56317027Sdim#define _mm512_exp2a23_ps(A) \ 57317027Sdim _mm512_exp2a23_round_ps((A), _MM_FROUND_CUR_DIRECTION) 58317027Sdim 59317027Sdim#define _mm512_mask_exp2a23_ps(S, M, A) \ 60317027Sdim _mm512_mask_exp2a23_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION) 61317027Sdim 62317027Sdim#define _mm512_maskz_exp2a23_ps(M, A) \ 63317027Sdim _mm512_maskz_exp2a23_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) 64317027Sdim 65317027Sdim/* rsqrt28 */ 66#define _mm512_rsqrt28_round_pd(A, R) \ 67 (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ 68 (__v8df)_mm512_setzero_pd(), \ 69 (__mmask8)-1, (int)(R)) 70 71#define _mm512_mask_rsqrt28_round_pd(S, M, A, R) \ 72 (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ 73 (__v8df)(__m512d)(S), (__mmask8)(M), \ 74 (int)(R)) 75 76#define _mm512_maskz_rsqrt28_round_pd(M, A, R) \ 77 (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ 78 (__v8df)_mm512_setzero_pd(), \ 79 (__mmask8)(M), (int)(R)) 80 81#define _mm512_rsqrt28_pd(A) \ 82 _mm512_rsqrt28_round_pd((A), _MM_FROUND_CUR_DIRECTION) 83 84#define _mm512_mask_rsqrt28_pd(S, M, A) \ 85 _mm512_mask_rsqrt28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION) 86 87#define _mm512_maskz_rsqrt28_pd(M, A) \ 88 _mm512_maskz_rsqrt28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION) 89 90#define _mm512_rsqrt28_round_ps(A, R) \ 91 (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ 92 (__v16sf)_mm512_setzero_ps(), \ 93 (__mmask16)-1, (int)(R)) 94 95#define _mm512_mask_rsqrt28_round_ps(S, M, A, R) \ 96 (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ 97 (__v16sf)(__m512)(S), (__mmask16)(M), \ 98 (int)(R)) 99 100#define _mm512_maskz_rsqrt28_round_ps(M, A, R) \ 101 (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ 102 (__v16sf)_mm512_setzero_ps(), \ 103 (__mmask16)(M), (int)(R)) 104 105#define _mm512_rsqrt28_ps(A) \ 106 _mm512_rsqrt28_round_ps((A), _MM_FROUND_CUR_DIRECTION) 107 108#define _mm512_mask_rsqrt28_ps(S, M, A) \ 109 _mm512_mask_rsqrt28_round_ps((S), (M), A, _MM_FROUND_CUR_DIRECTION) 110 111#define _mm512_maskz_rsqrt28_ps(M, A) \ 112 _mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) 113 114#define _mm_rsqrt28_round_ss(A, B, R) \ 115 (__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \ 116 (__v4sf)(__m128)(B), \ 117 (__v4sf)_mm_setzero_ps(), \ 118 (__mmask8)-1, (int)(R)) 119 120#define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) \ 121 (__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \ 122 (__v4sf)(__m128)(B), \ 123 (__v4sf)(__m128)(S), \ 124 (__mmask8)(M), (int)(R)) 125 126#define _mm_maskz_rsqrt28_round_ss(M, A, B, R) \ 127 (__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \ 128 (__v4sf)(__m128)(B), \ 129 (__v4sf)_mm_setzero_ps(), \ 130 (__mmask8)(M), (int)(R)) 131 132#define _mm_rsqrt28_ss(A, B) \ 133 _mm_rsqrt28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION) 134 135#define _mm_mask_rsqrt28_ss(S, M, A, B) \ 136 _mm_mask_rsqrt28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION) 137 138#define _mm_maskz_rsqrt28_ss(M, A, B) \ 139 _mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION) 140 141#define _mm_rsqrt28_round_sd(A, B, R) \ 142 (__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \ 143 (__v2df)(__m128d)(B), \ 144 (__v2df)_mm_setzero_pd(), \ 145 (__mmask8)-1, (int)(R)) 146 147#define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) \ 148 (__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \ 149 (__v2df)(__m128d)(B), \ 150 (__v2df)(__m128d)(S), \ 151 (__mmask8)(M), (int)(R)) 152 153#define _mm_maskz_rsqrt28_round_sd(M, A, B, R) \ 154 (__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \ 155 (__v2df)(__m128d)(B), \ 156 (__v2df)_mm_setzero_pd(), \ 157 (__mmask8)(M), (int)(R)) 158 159#define _mm_rsqrt28_sd(A, B) \ 160 _mm_rsqrt28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION) 161 162#define _mm_mask_rsqrt28_sd(S, M, A, B) \ 163 _mm_mask_rsqrt28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION) 164 165#define _mm_maskz_rsqrt28_sd(M, A, B) \ 166 _mm_maskz_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION) 167 168/* rcp28 */ 169#define _mm512_rcp28_round_pd(A, R) \ 170 (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ 171 (__v8df)_mm512_setzero_pd(), \ 172 (__mmask8)-1, (int)(R)) 173 174#define _mm512_mask_rcp28_round_pd(S, M, A, R) \ 175 (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ 176 (__v8df)(__m512d)(S), (__mmask8)(M), \ 177 (int)(R)) 178 179#define _mm512_maskz_rcp28_round_pd(M, A, R) \ 180 (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ 181 (__v8df)_mm512_setzero_pd(), \ 182 (__mmask8)(M), (int)(R)) 183 184#define _mm512_rcp28_pd(A) \ 185 _mm512_rcp28_round_pd((A), _MM_FROUND_CUR_DIRECTION) 186 187#define _mm512_mask_rcp28_pd(S, M, A) \ 188 _mm512_mask_rcp28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION) 189 190#define _mm512_maskz_rcp28_pd(M, A) \ 191 _mm512_maskz_rcp28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION) 192 193#define _mm512_rcp28_round_ps(A, R) \ 194 (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ 195 (__v16sf)_mm512_setzero_ps(), \ 196 (__mmask16)-1, (int)(R)) 197 198#define _mm512_mask_rcp28_round_ps(S, M, A, R) \ 199 (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ 200 (__v16sf)(__m512)(S), (__mmask16)(M), \ 201 (int)(R)) 202 203#define _mm512_maskz_rcp28_round_ps(M, A, R) \ 204 (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ 205 (__v16sf)_mm512_setzero_ps(), \ 206 (__mmask16)(M), (int)(R)) 207 208#define _mm512_rcp28_ps(A) \ 209 _mm512_rcp28_round_ps((A), _MM_FROUND_CUR_DIRECTION) 210 211#define _mm512_mask_rcp28_ps(S, M, A) \ 212 _mm512_mask_rcp28_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION) 213 214#define _mm512_maskz_rcp28_ps(M, A) \ 215 _mm512_maskz_rcp28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) 216 217#define _mm_rcp28_round_ss(A, B, R) \ 218 (__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \ 219 (__v4sf)(__m128)(B), \ 220 (__v4sf)_mm_setzero_ps(), \ 221 (__mmask8)-1, (int)(R)) 222 223#define _mm_mask_rcp28_round_ss(S, M, A, B, R) \ 224 (__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \ 225 (__v4sf)(__m128)(B), \ 226 (__v4sf)(__m128)(S), \ 227 (__mmask8)(M), (int)(R)) 228 229#define _mm_maskz_rcp28_round_ss(M, A, B, R) \ 230 (__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \ 231 (__v4sf)(__m128)(B), \ 232 (__v4sf)_mm_setzero_ps(), \ 233 (__mmask8)(M), (int)(R)) 234 235#define _mm_rcp28_ss(A, B) \ 236 _mm_rcp28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION) 237 238#define _mm_mask_rcp28_ss(S, M, A, B) \ 239 _mm_mask_rcp28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION) 240 241#define _mm_maskz_rcp28_ss(M, A, B) \ 242 _mm_maskz_rcp28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION) 243 244#define _mm_rcp28_round_sd(A, B, R) \ 245 (__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \ 246 (__v2df)(__m128d)(B), \ 247 (__v2df)_mm_setzero_pd(), \ 248 (__mmask8)-1, (int)(R)) 249 250#define _mm_mask_rcp28_round_sd(S, M, A, B, R) \ 251 (__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \ 252 (__v2df)(__m128d)(B), \ 253 (__v2df)(__m128d)(S), \ 254 (__mmask8)(M), (int)(R)) 255 256#define _mm_maskz_rcp28_round_sd(M, A, B, R) \ 257 (__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \ 258 (__v2df)(__m128d)(B), \ 259 (__v2df)_mm_setzero_pd(), \ 260 (__mmask8)(M), (int)(R)) 261 262#define _mm_rcp28_sd(A, B) \ 263 _mm_rcp28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION) 264 265#define _mm_mask_rcp28_sd(S, M, A, B) \ 266 _mm_mask_rcp28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION) 267 268#define _mm_maskz_rcp28_sd(M, A, B) \ 269 _mm_maskz_rcp28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION) 270 271#endif /* __AVX512ERINTRIN_H */ 272