avx512erintrin.h revision 296417
1175164Sjhb/*===---- avx512erintrin.h - AVX512ER intrinsics ---------------------------=== 2225344Srwatson * 3175164Sjhb * Permission is hereby granted, free of charge, to any person obtaining a copy 4175164Sjhb * of this software and associated documentation files (the "Software"), to deal 5175164Sjhb * in the Software without restriction, including without limitation the rights 6175164Sjhb * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7175164Sjhb * copies of the Software, and to permit persons to whom the Software is 8175164Sjhb * furnished to do so, subject to the following conditions: 9175164Sjhb * 10175164Sjhb * The above copyright notice and this permission notice shall be included in 11175164Sjhb * all copies or substantial portions of the Software. 12175164Sjhb * 13175164Sjhb * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14175164Sjhb * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15175164Sjhb * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16175164Sjhb * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17175164Sjhb * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18175164Sjhb * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19175164Sjhb * THE SOFTWARE. 20175164Sjhb * 21175164Sjhb *===-----------------------------------------------------------------------=== 22175164Sjhb */ 23175164Sjhb#ifndef __IMMINTRIN_H 24175164Sjhb#error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead." 25175164Sjhb#endif 26175164Sjhb 27175164Sjhb#ifndef __AVX512ERINTRIN_H 28175164Sjhb#define __AVX512ERINTRIN_H 29175164Sjhb 30175164Sjhb// exp2a23 31175164Sjhb#define _mm512_exp2a23_round_pd(A, R) __extension__ ({ \ 32175164Sjhb (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ 33175164Sjhb (__v8df)_mm512_setzero_pd(), \ 34225344Srwatson (__mmask8)-1, (R)); }) 35175164Sjhb 36175164Sjhb#define _mm512_mask_exp2a23_round_pd(S, M, A, R) __extension__ ({ \ 37175164Sjhb (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ 38225344Srwatson (__v8df)(__m512d)(S), \ 39175164Sjhb (__mmask8)(M), (R)); }) 40225344Srwatson 41175164Sjhb#define _mm512_maskz_exp2a23_round_pd(M, A, R) __extension__ ({ \ 42175164Sjhb (__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \ 43175164Sjhb (__v8df)_mm512_setzero_pd(), \ 44175164Sjhb (__mmask8)(M), (R)); }) 45175164Sjhb 46175164Sjhb#define _mm512_exp2a23_pd(A) \ 47223692Sjonathan _mm512_exp2a23_round_pd((A), _MM_FROUND_CUR_DIRECTION) 48223692Sjonathan 49175164Sjhb#define _mm512_mask_exp2a23_pd(S, M, A) \ 50223692Sjonathan _mm512_mask_exp2a23_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION) 51175164Sjhb 52175164Sjhb#define _mm512_maskz_exp2a23_pd(M, A) \ 53175164Sjhb _mm512_maskz_exp2a23_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION) 54175164Sjhb 55175164Sjhb#define _mm512_exp2a23_round_ps(A, R) __extension__ ({ \ 56175164Sjhb (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ 57175164Sjhb (__v16sf)_mm512_setzero_ps(), \ 58175164Sjhb (__mmask8)-1, (R)); }) 59175164Sjhb 60224914Skib#define _mm512_mask_exp2a23_round_ps(S, M, A, R) __extension__ ({ \ 61175164Sjhb (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ 62175164Sjhb (__v16sf)(__m512)(S), \ 63175164Sjhb (__mmask8)(M), (R)); }) 64248084Sattilio 65175164Sjhb#define _mm512_maskz_exp2a23_round_ps(M, A, R) __extension__ ({ \ 66175164Sjhb (__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \ 67175164Sjhb (__v16sf)_mm512_setzero_ps(), \ 68175164Sjhb (__mmask8)(M), (R)); }) 69175164Sjhb 70175164Sjhb#define _mm512_exp2a23_ps(A) \ 71175164Sjhb _mm512_exp2a23_round_ps((A), _MM_FROUND_CUR_DIRECTION) 72175164Sjhb 73175164Sjhb#define _mm512_mask_exp2a23_ps(S, M, A) \ 74175164Sjhb _mm512_mask_exp2a23_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION) 75175164Sjhb 76175164Sjhb#define _mm512_maskz_exp2a23_ps(M, A) \ 77175164Sjhb _mm512_maskz_exp2a23_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) 78228533Sjhb 79175164Sjhb// rsqrt28 80228509Sjhb#define _mm512_rsqrt28_round_pd(A, R) __extension__ ({ \ 81175164Sjhb (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ 82175164Sjhb (__v8df)_mm512_setzero_pd(), \ 83229821Salc (__mmask8)-1, (R)); }) 84175164Sjhb 85175164Sjhb#define _mm512_mask_rsqrt28_round_pd(S, M, A, R) __extension__ ({ \ 86175164Sjhb (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ 87175164Sjhb (__v8df)(__m512d)(S), \ 88175164Sjhb (__mmask8)(M), (R)); }) 89175164Sjhb 90175164Sjhb#define _mm512_maskz_rsqrt28_round_pd(M, A, R) __extension__ ({ \ 91175164Sjhb (__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \ 92175164Sjhb (__v8df)_mm512_setzero_pd(), \ 93175164Sjhb (__mmask8)(M), (R)); }) 94175164Sjhb 95175164Sjhb#define _mm512_rsqrt28_pd(A) \ 96175164Sjhb _mm512_rsqrt28_round_pd((A), _MM_FROUND_CUR_DIRECTION) 97175164Sjhb 98175164Sjhb#define _mm512_mask_rsqrt28_pd(S, M, A) \ 99175164Sjhb _mm512_mask_rsqrt28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION) 100175164Sjhb 101175164Sjhb#define _mm512_maskz_rsqrt28_pd(M, A) \ 102175164Sjhb _mm512_maskz_rsqrt28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION) 103175164Sjhb 104175164Sjhb#define _mm512_rsqrt28_round_ps(A, R) __extension__ ({ \ 105175164Sjhb (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ 106175164Sjhb (__v16sf)_mm512_setzero_ps(), \ 107175164Sjhb (__mmask16)-1, (R)); }) 108175164Sjhb 109175164Sjhb#define _mm512_mask_rsqrt28_round_ps(S, M, A, R) __extension__ ({ \ 110194766Skib (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ 111175164Sjhb (__v16sf)(__m512)(S), \ 112175164Sjhb (__mmask16)(M), (R)); }) 113175164Sjhb 114175164Sjhb#define _mm512_maskz_rsqrt28_round_ps(M, A, R) __extension__ ({ \ 115175164Sjhb (__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \ 116175164Sjhb (__v16sf)_mm512_setzero_ps(), \ 117175164Sjhb (__mmask16)(M), (R)); }) 118175164Sjhb 119175164Sjhb#define _mm512_rsqrt28_ps(A) \ 120224914Skib _mm512_rsqrt28_round_ps((A), _MM_FROUND_CUR_DIRECTION) 121224914Skib 122175164Sjhb#define _mm512_mask_rsqrt28_ps(S, M, A) \ 123175164Sjhb _mm512_mask_rsqrt28_round_ps((S), (M), A, _MM_FROUND_CUR_DIRECTION) 124175164Sjhb 125175164Sjhb#define _mm512_maskz_rsqrt28_ps(M, A) \ 126175164Sjhb _mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) 127175164Sjhb 128175164Sjhb#define _mm_rsqrt28_round_ss(A, B, R) __extension__ ({ \ 129175164Sjhb (__m128)__builtin_ia32_rsqrt28ss_round((__v4sf)(__m128)(A), \ 130175164Sjhb (__v4sf)(__m128)(B), \ 131175164Sjhb (__v4sf)_mm_setzero_ps(), \ 132175164Sjhb (__mmask8)-1, (R)); }) 133224914Skib 134224914Skib#define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) __extension__ ({ \ 135175164Sjhb (__m128)__builtin_ia32_rsqrt28ss_round((__v4sf)(__m128)(A), \ 136175164Sjhb (__v4sf)(__m128)(B), \ 137175164Sjhb (__v4sf)(__m128)(S), \ 138175164Sjhb (__mmask8)(M), (R)); }) 139175164Sjhb 140175164Sjhb#define _mm_maskz_rsqrt28_round_ss(M, A, B, R) __extension__ ({ \ 141175164Sjhb (__m128)__builtin_ia32_rsqrt28ss_round((__v4sf)(__m128)(A), \ 142175164Sjhb (__v4sf)(__m128)(B), \ 143175164Sjhb (__v4sf)_mm_setzero_ps(), \ 144175164Sjhb (__mmask8)(M), (R)); }) 145175164Sjhb 146175164Sjhb#define _mm_rsqrt28_ss(A, B) \ 147175164Sjhb _mm_rsqrt28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION) 148175164Sjhb 149175164Sjhb#define _mm_mask_rsqrt28_ss(S, M, A, B) \ 150175164Sjhb _mm_mask_rsqrt28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION) 151175164Sjhb 152175164Sjhb#define _mm_maskz_rsqrt28_ss(M, A, B) \ 153175164Sjhb _mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION) 154175164Sjhb 155175164Sjhb#define _mm_rsqrt28_round_sd(A, B, R) __extension__ ({ \ 156175164Sjhb (__m128d)__builtin_ia32_rsqrt28sd_round((__v2df)(__m128d)(A), \ 157175164Sjhb (__v2df)(__m128d)(B), \ 158175164Sjhb (__v2df)_mm_setzero_pd(), \ 159175164Sjhb (__mmask8)-1, (R)); }) 160175164Sjhb 161175164Sjhb#define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) __extension__ ({ \ 162175164Sjhb (__m128d)__builtin_ia32_rsqrt28sd_round((__v2df)(__m128d)(A), \ 163175164Sjhb (__v2df)(__m128d)(B), \ 164175164Sjhb (__v2df)(__m128d)(S), \ 165175164Sjhb (__mmask8)(M), (R)); }) 166175164Sjhb 167175164Sjhb#define _mm_maskz_rsqrt28_round_sd(M, A, B, R) __extension__ ({ \ 168175164Sjhb (__m128d)__builtin_ia32_rsqrt28sd_round((__v2df)(__m128d)(A), \ 169175164Sjhb (__v2df)(__m128d)(B), \ 170175164Sjhb (__v2df)_mm_setzero_pd(), \ 171194766Skib (__mmask8)(M), (R)); }) 172175164Sjhb 173175164Sjhb#define _mm_rsqrt28_sd(A, B) \ 174175164Sjhb _mm_rsqrt28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION) 175175164Sjhb 176175164Sjhb#define _mm_mask_rsqrt28_sd(S, M, A, B) \ 177175164Sjhb _mm_mask_rsqrt28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION) 178175164Sjhb 179175164Sjhb#define _mm_maskz_rsqrt28_sd(M, A, B) \ 180175164Sjhb _mm_mask_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION) 181175164Sjhb 182175164Sjhb// rcp28 183175164Sjhb#define _mm512_rcp28_round_pd(A, R) __extension__ ({ \ 184175164Sjhb (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ 185175164Sjhb (__v8df)_mm512_setzero_pd(), \ 186175164Sjhb (__mmask8)-1, (R)); }) 187175164Sjhb 188175164Sjhb#define _mm512_mask_rcp28_round_pd(S, M, A, R) __extension__ ({ \ 189175164Sjhb (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ 190175164Sjhb (__v8df)(__m512d)(S), \ 191175164Sjhb (__mmask8)(M), (R)); }) 192175164Sjhb 193175164Sjhb#define _mm512_maskz_rcp28_round_pd(M, A, R) __extension__ ({ \ 194175164Sjhb (__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \ 195175164Sjhb (__v8df)_mm512_setzero_pd(), \ 196175164Sjhb (__mmask8)(M), (R)); }) 197175164Sjhb 198175164Sjhb#define _mm512_rcp28_pd(A) \ 199175164Sjhb _mm512_rcp28_round_pd((A), _MM_FROUND_CUR_DIRECTION) 200175164Sjhb 201175164Sjhb#define _mm512_mask_rcp28_pd(S, M, A) \ 202175164Sjhb _mm512_mask_rcp28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION) 203175164Sjhb 204175164Sjhb#define _mm512_maskz_rcp28_pd(M, A) \ 205175164Sjhb _mm512_maskz_rcp28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION) 206175164Sjhb 207175164Sjhb#define _mm512_rcp28_round_ps(A, R) __extension__ ({ \ 208175164Sjhb (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ 209175164Sjhb (__v16sf)_mm512_setzero_ps(), \ 210175164Sjhb (__mmask16)-1, (R)); }) 211175164Sjhb 212175164Sjhb#define _mm512_mask_rcp28_round_ps(S, M, A, R) __extension__ ({ \ 213175164Sjhb (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ 214175164Sjhb (__v16sf)(__m512)(S), \ 215175164Sjhb (__mmask16)(M), (R)); }) 216175164Sjhb 217175164Sjhb#define _mm512_maskz_rcp28_round_ps(M, A, R) __extension__ ({ \ 218175164Sjhb (__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \ 219175164Sjhb (__v16sf)_mm512_setzero_ps(), \ 220175164Sjhb (__mmask16)(M), (R)); }) 221175164Sjhb 222224914Skib#define _mm512_rcp28_ps(A) \ 223205792Sed _mm512_rcp28_round_ps((A), _MM_FROUND_CUR_DIRECTION) 224205792Sed 225205792Sed#define _mm512_mask_rcp28_ps(S, M, A) \ 226224914Skib _mm512_mask_rcp28_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION) 227224914Skib 228175164Sjhb#define _mm512_maskz_rcp28_ps(M, A) \ 229175164Sjhb _mm512_maskz_rcp28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION) 230224914Skib 231175164Sjhb#define _mm_rcp28_round_ss(A, B, R) __extension__ ({ \ 232175164Sjhb (__m128)__builtin_ia32_rcp28ss_round((__v4sf)(__m128)(A), \ 233175164Sjhb (__v4sf)(__m128)(B), \ 234175164Sjhb (__v4sf)_mm_setzero_ps(), \ 235175164Sjhb (__mmask8)-1, (R)); }) 236175164Sjhb 237175164Sjhb#define _mm_mask_rcp28_round_ss(S, M, A, B, R) __extension__ ({ \ 238175164Sjhb (__m128)__builtin_ia32_rcp28ss_round((__v4sf)(__m128)(A), \ 239175164Sjhb (__v4sf)(__m128)(B), \ 240175164Sjhb (__v4sf)(__m128)(S), \ 241175164Sjhb (__mmask8)(M), (R)); }) 242175164Sjhb 243175164Sjhb#define _mm_maskz_rcp28_round_ss(M, A, B, R) __extension__ ({ \ 244175164Sjhb (__m128)__builtin_ia32_rcp28ss_round((__v4sf)(__m128)(A), \ 245175164Sjhb (__v4sf)(__m128)(B), \ 246175164Sjhb (__v4sf)_mm_setzero_ps(), \ 247194766Skib (__mmask8)(M), (R)); }) 248175164Sjhb 249175164Sjhb#define _mm_rcp28_ss(A, B) \ 250175164Sjhb _mm_rcp28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION) 251229821Salc 252229821Salc#define _mm_mask_rcp28_ss(S, M, A, B) \ 253194766Skib _mm_mask_rcp28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION) 254229821Salc 255175164Sjhb#define _mm_maskz_rcp28_ss(M, A, B) \ 256175164Sjhb _mm_maskz_rcp28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION) 257248084Sattilio 258175164Sjhb#define _mm_rcp28_round_sd(A, B, R) __extension__ ({ \ 259248084Sattilio (__m128d)__builtin_ia32_rcp28sd_round((__v2df)(__m128d)(A), \ 260194766Skib (__v2df)(__m128d)(B), \ 261175164Sjhb (__v2df)_mm_setzero_pd(), \ 262175164Sjhb (__mmask8)-1, (R)); }) 263175164Sjhb 264175164Sjhb#define _mm_mask_rcp28_round_sd(S, M, A, B, R) __extension__ ({ \ 265175164Sjhb (__m128d)__builtin_ia32_rcp28sd_round((__v2df)(__m128d)(A), \ 266228509Sjhb (__v2df)(__m128d)(B), \ 267228509Sjhb (__v2df)(__m128d)(S), \ 268228509Sjhb (__mmask8)(M), (R)); }) 269228509Sjhb 270228509Sjhb#define _mm_maskz_rcp28_round_sd(M, A, B, R) __extension__ ({ \ 271248084Sattilio (__m128d)__builtin_ia32_rcp28sd_round((__v2df)(__m128d)(A), \ 272228509Sjhb (__v2df)(__m128d)(B), \ 273228509Sjhb (__v2df)_mm_setzero_pd(), \ 274229821Salc (__mmask8)(M), (R)); }) 275229821Salc 276229821Salc#define _mm_rcp28_sd(A, B) \ 277229821Salc _mm_rcp28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION) 278229821Salc 279229821Salc#define _mm_mask_rcp28_sd(S, M, A, B) \ 280229821Salc _mm_mask_rcp28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION) 281229821Salc 282229821Salc#define _mm_maskz_rcp28_sd(M, A, B) \ 283229821Salc _mm_maskz_rcp28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION) 284229821Salc 285229821Salc#endif // __AVX512ERINTRIN_H 286229821Salc