1158115Sume/* 2158115Sume * Single-precision SVE 2^x function. 3158115Sume * 4158115Sume * Copyright (c) 2023, Arm Limited. 5158115Sume * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6158115Sume */ 7158115Sume 8158115Sume#include "sv_math.h" 9158115Sume#include "poly_sve_f32.h" 10158115Sume#include "pl_sig.h" 11158115Sume#include "pl_test.h" 12158115Sume 13158115Sumestatic const struct data 14158115Sume{ 15158115Sume float poly[5]; 16158115Sume float shift, thres; 17158115Sume} data = { 18158115Sume /* Coefficients copied from the polynomial in AdvSIMD variant, reversed for 19158115Sume compatibility with polynomial helpers. */ 20158115Sume .poly = { 0x1.62e422p-1f, 0x1.ebf9bcp-3f, 0x1.c6bd32p-5f, 0x1.3ce9e4p-7f, 21158115Sume 0x1.59977ap-10f }, 22158115Sume /* 1.5*2^17 + 127. */ 23158115Sume .shift = 0x1.903f8p17f, 24158115Sume /* Roughly 87.3. For x < -Thres, the result is subnormal and not handled 25158115Sume correctly by FEXPA. */ 26158115Sume .thres = 0x1.5d5e2ap+6f, 27158115Sume}; 28158115Sume 29158115Sumestatic svfloat32_t NOINLINE 30158115Sumespecial_case (svfloat32_t x, svfloat32_t y, svbool_t special) 31158115Sume{ 32194093Sdes return sv_call_f32 (exp2f, x, y, special); 33158115Sume} 34158115Sume 35158115Sume/* Single-precision SVE exp2f routine. Implements the same algorithm 36158115Sume as AdvSIMD exp2f. 37158115Sume Worst case error is 1.04 ULPs. 38158115Sume SV_NAME_F1 (exp2)(0x1.943b9p-1) got 0x1.ba7eb2p+0 39194091Sdes want 0x1.ba7ebp+0. */ 40158115Sumesvfloat32_t SV_NAME_F1 (exp2) (svfloat32_t x, const svbool_t pg) 41158115Sume{ 42158115Sume const struct data *d = ptr_barrier (&data); 43158115Sume /* exp2(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)] 44158115Sume x = n + r, with r in [-1/2, 1/2]. */ 45158115Sume svfloat32_t shift = sv_f32 (d->shift); 46158115Sume svfloat32_t z = svadd_x (pg, x, shift); 47158115Sume svfloat32_t n = svsub_x (pg, z, shift); 48158115Sume svfloat32_t r = svsub_x (pg, x, n); 49158115Sume 50158115Sume svbool_t special = svacgt (pg, x, d->thres); 51158115Sume svfloat32_t scale = svexpa (svreinterpret_u32 (z)); 52158115Sume 53158115Sume /* Polynomial evaluation: poly(r) ~ exp2(r)-1. 54194091Sdes Evaluate polynomial use hybrid scheme - offset ESTRIN by 1 for 55158115Sume coefficients 1 to 4, and apply most significant coefficient directly. */ 56158115Sume svfloat32_t r2 = svmul_x (pg, r, r); 57158115Sume svfloat32_t p14 = sv_pairwise_poly_3_f32_x (pg, r, r2, d->poly + 1); 58158115Sume svfloat32_t p0 = svmul_x (pg, r, d->poly[0]); 59158115Sume svfloat32_t poly = svmla_x (pg, p0, r2, p14); 60158115Sume 61158115Sume if (unlikely (svptest_any (pg, special))) 62158115Sume return special_case (x, svmla_x (pg, scale, scale, poly), special); 63158115Sume 64158115Sume return svmla_x (pg, scale, scale, poly); 65158115Sume} 66158115Sume 67158115SumePL_SIG (SV, F, 1, exp2, -9.9, 9.9) 68194091SdesPL_TEST_ULP (SV_NAME_F1 (exp2), 0.55) 69158115SumePL_TEST_INTERVAL (SV_NAME_F1 (exp2), 0, Thres, 40000) 70158115SumePL_TEST_INTERVAL (SV_NAME_F1 (exp2), Thres, 1, 50000) 71158115SumePL_TEST_INTERVAL (SV_NAME_F1 (exp2), 1, Thres, 50000) 72158115SumePL_TEST_INTERVAL (SV_NAME_F1 (exp2), Thres, inf, 50000) 73158115SumePL_TEST_INTERVAL (SV_NAME_F1 (exp2), -0, -0x1p-23, 40000) 74158115SumePL_TEST_INTERVAL (SV_NAME_F1 (exp2), -0x1p-23, -1, 50000) 75158115SumePL_TEST_INTERVAL (SV_NAME_F1 (exp2), -1, -0x1p23, 50000) 76158115SumePL_TEST_INTERVAL (SV_NAME_F1 (exp2), -0x1p23, -inf, 50000) 77158115SumePL_TEST_INTERVAL (SV_NAME_F1 (exp2), -0, ScaleThres, 40000) 78158115SumePL_TEST_INTERVAL (SV_NAME_F1 (exp2), ScaleThres, -1, 50000) 79158115SumePL_TEST_INTERVAL (SV_NAME_F1 (exp2), -1, ScaleThres, 50000) 80158115SumePL_TEST_INTERVAL (SV_NAME_F1 (exp2), ScaleThres, -inf, 50000) 81158115Sume