1158115Sume/*
2158115Sume * Single-precision SVE 2^x function.
3158115Sume *
4158115Sume * Copyright (c) 2023, Arm Limited.
5158115Sume * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6158115Sume */
7158115Sume
8158115Sume#include "sv_math.h"
9158115Sume#include "poly_sve_f32.h"
10158115Sume#include "pl_sig.h"
11158115Sume#include "pl_test.h"
12158115Sume
13158115Sumestatic const struct data
14158115Sume{
15158115Sume  float poly[5];
16158115Sume  float shift, thres;
17158115Sume} data = {
18158115Sume  /* Coefficients copied from the polynomial in AdvSIMD variant, reversed for
19158115Sume     compatibility with polynomial helpers.  */
20158115Sume  .poly = { 0x1.62e422p-1f, 0x1.ebf9bcp-3f, 0x1.c6bd32p-5f, 0x1.3ce9e4p-7f,
21158115Sume	    0x1.59977ap-10f },
22158115Sume  /* 1.5*2^17 + 127.  */
23158115Sume  .shift = 0x1.903f8p17f,
24158115Sume  /* Roughly 87.3. For x < -Thres, the result is subnormal and not handled
25158115Sume     correctly by FEXPA.  */
26158115Sume  .thres = 0x1.5d5e2ap+6f,
27158115Sume};
28158115Sume
29158115Sumestatic svfloat32_t NOINLINE
30158115Sumespecial_case (svfloat32_t x, svfloat32_t y, svbool_t special)
31158115Sume{
32194093Sdes  return sv_call_f32 (exp2f, x, y, special);
33158115Sume}
34158115Sume
35158115Sume/* Single-precision SVE exp2f routine. Implements the same algorithm
36158115Sume   as AdvSIMD exp2f.
37158115Sume   Worst case error is 1.04 ULPs.
38158115Sume   SV_NAME_F1 (exp2)(0x1.943b9p-1) got 0x1.ba7eb2p+0
39194091Sdes				  want 0x1.ba7ebp+0.  */
40158115Sumesvfloat32_t SV_NAME_F1 (exp2) (svfloat32_t x, const svbool_t pg)
41158115Sume{
42158115Sume  const struct data *d = ptr_barrier (&data);
43158115Sume  /* exp2(x) = 2^n (1 + poly(r)), with 1 + poly(r) in [1/sqrt(2),sqrt(2)]
44158115Sume    x = n + r, with r in [-1/2, 1/2].  */
45158115Sume  svfloat32_t shift = sv_f32 (d->shift);
46158115Sume  svfloat32_t z = svadd_x (pg, x, shift);
47158115Sume  svfloat32_t n = svsub_x (pg, z, shift);
48158115Sume  svfloat32_t r = svsub_x (pg, x, n);
49158115Sume
50158115Sume  svbool_t special = svacgt (pg, x, d->thres);
51158115Sume  svfloat32_t scale = svexpa (svreinterpret_u32 (z));
52158115Sume
53158115Sume  /* Polynomial evaluation: poly(r) ~ exp2(r)-1.
54194091Sdes     Evaluate polynomial use hybrid scheme - offset ESTRIN by 1 for
55158115Sume     coefficients 1 to 4, and apply most significant coefficient directly.  */
56158115Sume  svfloat32_t r2 = svmul_x (pg, r, r);
57158115Sume  svfloat32_t p14 = sv_pairwise_poly_3_f32_x (pg, r, r2, d->poly + 1);
58158115Sume  svfloat32_t p0 = svmul_x (pg, r, d->poly[0]);
59158115Sume  svfloat32_t poly = svmla_x (pg, p0, r2, p14);
60158115Sume
61158115Sume  if (unlikely (svptest_any (pg, special)))
62158115Sume    return special_case (x, svmla_x (pg, scale, scale, poly), special);
63158115Sume
64158115Sume  return svmla_x (pg, scale, scale, poly);
65158115Sume}
66158115Sume
67158115SumePL_SIG (SV, F, 1, exp2, -9.9, 9.9)
68194091SdesPL_TEST_ULP (SV_NAME_F1 (exp2), 0.55)
69158115SumePL_TEST_INTERVAL (SV_NAME_F1 (exp2), 0, Thres, 40000)
70158115SumePL_TEST_INTERVAL (SV_NAME_F1 (exp2), Thres, 1, 50000)
71158115SumePL_TEST_INTERVAL (SV_NAME_F1 (exp2), 1, Thres, 50000)
72158115SumePL_TEST_INTERVAL (SV_NAME_F1 (exp2), Thres, inf, 50000)
73158115SumePL_TEST_INTERVAL (SV_NAME_F1 (exp2), -0, -0x1p-23, 40000)
74158115SumePL_TEST_INTERVAL (SV_NAME_F1 (exp2), -0x1p-23, -1, 50000)
75158115SumePL_TEST_INTERVAL (SV_NAME_F1 (exp2), -1, -0x1p23, 50000)
76158115SumePL_TEST_INTERVAL (SV_NAME_F1 (exp2), -0x1p23, -inf, 50000)
77158115SumePL_TEST_INTERVAL (SV_NAME_F1 (exp2), -0, ScaleThres, 40000)
78158115SumePL_TEST_INTERVAL (SV_NAME_F1 (exp2), ScaleThres, -1, 50000)
79158115SumePL_TEST_INTERVAL (SV_NAME_F1 (exp2), -1, ScaleThres, 50000)
80158115SumePL_TEST_INTERVAL (SV_NAME_F1 (exp2), ScaleThres, -inf, 50000)
81158115Sume