1/*
2 * Generic helpers for evaluating polynomials with various schemes.
3 *
4 * Copyright (c) 2023, Arm Limited.
5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6 */
7
8#ifndef VTYPE
9# error Cannot use poly_generic without defining VTYPE
10#endif
11#ifndef VWRAP
12# error Cannot use poly_generic without defining VWRAP
13#endif
14#ifndef FMA
15# error Cannot use poly_generic without defining FMA
16#endif
17
18static inline VTYPE VWRAP (pairwise_poly_3) (VTYPE x, VTYPE x2,
19					     const VTYPE *poly)
20{
21  /* At order 3, Estrin and Pairwise Horner are identical.  */
22  VTYPE p01 = FMA (poly[1], x, poly[0]);
23  VTYPE p23 = FMA (poly[3], x, poly[2]);
24  return FMA (p23, x2, p01);
25}
26
27static inline VTYPE VWRAP (estrin_4) (VTYPE x, VTYPE x2, VTYPE x4,
28				      const VTYPE *poly)
29{
30  VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly);
31  return FMA (poly[4], x4, p03);
32}
33static inline VTYPE VWRAP (estrin_5) (VTYPE x, VTYPE x2, VTYPE x4,
34				      const VTYPE *poly)
35{
36  VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly);
37  VTYPE p45 = FMA (poly[5], x, poly[4]);
38  return FMA (p45, x4, p03);
39}
40static inline VTYPE VWRAP (estrin_6) (VTYPE x, VTYPE x2, VTYPE x4,
41				      const VTYPE *poly)
42{
43  VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly);
44  VTYPE p45 = FMA (poly[5], x, poly[4]);
45  VTYPE p46 = FMA (poly[6], x2, p45);
46  return FMA (p46, x4, p03);
47}
48static inline VTYPE VWRAP (estrin_7) (VTYPE x, VTYPE x2, VTYPE x4,
49				      const VTYPE *poly)
50{
51  VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly);
52  VTYPE p47 = VWRAP (pairwise_poly_3) (x, x2, poly + 4);
53  return FMA (p47, x4, p03);
54}
55static inline VTYPE VWRAP (estrin_8) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
56				      const VTYPE *poly)
57{
58  return FMA (poly[8], x8, VWRAP (estrin_7) (x, x2, x4, poly));
59}
60static inline VTYPE VWRAP (estrin_9) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
61				      const VTYPE *poly)
62{
63  VTYPE p89 = FMA (poly[9], x, poly[8]);
64  return FMA (p89, x8, VWRAP (estrin_7) (x, x2, x4, poly));
65}
66static inline VTYPE VWRAP (estrin_10) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
67				       const VTYPE *poly)
68{
69  VTYPE p89 = FMA (poly[9], x, poly[8]);
70  VTYPE p8_10 = FMA (poly[10], x2, p89);
71  return FMA (p8_10, x8, VWRAP (estrin_7) (x, x2, x4, poly));
72}
73static inline VTYPE VWRAP (estrin_11) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
74				       const VTYPE *poly)
75{
76  VTYPE p8_11 = VWRAP (pairwise_poly_3) (x, x2, poly + 8);
77  return FMA (p8_11, x8, VWRAP (estrin_7) (x, x2, x4, poly));
78}
79static inline VTYPE VWRAP (estrin_12) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
80				       const VTYPE *poly)
81{
82  return FMA (VWRAP (estrin_4) (x, x2, x4, poly + 8), x8,
83	      VWRAP (estrin_7) (x, x2, x4, poly));
84}
85static inline VTYPE VWRAP (estrin_13) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
86				       const VTYPE *poly)
87{
88  return FMA (VWRAP (estrin_5) (x, x2, x4, poly + 8), x8,
89	      VWRAP (estrin_7) (x, x2, x4, poly));
90}
91static inline VTYPE VWRAP (estrin_14) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
92				       const VTYPE *poly)
93{
94  return FMA (VWRAP (estrin_6) (x, x2, x4, poly + 8), x8,
95	      VWRAP (estrin_7) (x, x2, x4, poly));
96}
97static inline VTYPE VWRAP (estrin_15) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
98				       const VTYPE *poly)
99{
100  return FMA (VWRAP (estrin_7) (x, x2, x4, poly + 8), x8,
101	      VWRAP (estrin_7) (x, x2, x4, poly));
102}
103static inline VTYPE VWRAP (estrin_16) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
104				       VTYPE x16, const VTYPE *poly)
105{
106  return FMA (poly[16], x16, VWRAP (estrin_15) (x, x2, x4, x8, poly));
107}
108static inline VTYPE VWRAP (estrin_17) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
109				       VTYPE x16, const VTYPE *poly)
110{
111  VTYPE p16_17 = FMA (poly[17], x, poly[16]);
112  return FMA (p16_17, x16, VWRAP (estrin_15) (x, x2, x4, x8, poly));
113}
114static inline VTYPE VWRAP (estrin_18) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
115				       VTYPE x16, const VTYPE *poly)
116{
117  VTYPE p16_17 = FMA (poly[17], x, poly[16]);
118  VTYPE p16_18 = FMA (poly[18], x2, p16_17);
119  return FMA (p16_18, x16, VWRAP (estrin_15) (x, x2, x4, x8, poly));
120}
121static inline VTYPE VWRAP (estrin_19) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8,
122				       VTYPE x16, const VTYPE *poly)
123{
124  VTYPE p16_19 = VWRAP (pairwise_poly_3) (x, x2, poly + 16);
125  return FMA (p16_19, x16, VWRAP (estrin_15) (x, x2, x4, x8, poly));
126}
127
128static inline VTYPE VWRAP (horner_2) (VTYPE x, const VTYPE *poly)
129{
130  VTYPE p = FMA (poly[2], x, poly[1]);
131  return FMA (x, p, poly[0]);
132}
133static inline VTYPE VWRAP (horner_3) (VTYPE x, const VTYPE *poly)
134{
135  VTYPE p = FMA (poly[3], x, poly[2]);
136  p = FMA (x, p, poly[1]);
137  p = FMA (x, p, poly[0]);
138  return p;
139}
140static inline VTYPE VWRAP (horner_4) (VTYPE x, const VTYPE *poly)
141{
142  VTYPE p = FMA (poly[4], x, poly[3]);
143  p = FMA (x, p, poly[2]);
144  p = FMA (x, p, poly[1]);
145  p = FMA (x, p, poly[0]);
146  return p;
147}
148static inline VTYPE VWRAP (horner_5) (VTYPE x, const VTYPE *poly)
149{
150  return FMA (x, VWRAP (horner_4) (x, poly + 1), poly[0]);
151}
152static inline VTYPE VWRAP (horner_6) (VTYPE x, const VTYPE *poly)
153{
154  return FMA (x, VWRAP (horner_5) (x, poly + 1), poly[0]);
155}
156static inline VTYPE VWRAP (horner_7) (VTYPE x, const VTYPE *poly)
157{
158  return FMA (x, VWRAP (horner_6) (x, poly + 1), poly[0]);
159}
160static inline VTYPE VWRAP (horner_8) (VTYPE x, const VTYPE *poly)
161{
162  return FMA (x, VWRAP (horner_7) (x, poly + 1), poly[0]);
163}
164static inline VTYPE VWRAP (horner_9) (VTYPE x, const VTYPE *poly)
165{
166  return FMA (x, VWRAP (horner_8) (x, poly + 1), poly[0]);
167}
168static inline VTYPE VWRAP (horner_10) (VTYPE x, const VTYPE *poly)
169{
170  return FMA (x, VWRAP (horner_9) (x, poly + 1), poly[0]);
171}
172static inline VTYPE VWRAP (horner_11) (VTYPE x, const VTYPE *poly)
173{
174  return FMA (x, VWRAP (horner_10) (x, poly + 1), poly[0]);
175}
176static inline VTYPE VWRAP (horner_12) (VTYPE x, const VTYPE *poly)
177{
178  return FMA (x, VWRAP (horner_11) (x, poly + 1), poly[0]);
179}
180
181static inline VTYPE VWRAP (pw_horner_4) (VTYPE x, VTYPE x2, const VTYPE *poly)
182{
183  VTYPE p01 = FMA (poly[1], x, poly[0]);
184  VTYPE p23 = FMA (poly[3], x, poly[2]);
185  VTYPE p;
186  p = FMA (x2, poly[4], p23);
187  p = FMA (x2, p, p01);
188  return p;
189}
190static inline VTYPE VWRAP (pw_horner_5) (VTYPE x, VTYPE x2, const VTYPE *poly)
191{
192  VTYPE p01 = FMA (poly[1], x, poly[0]);
193  VTYPE p23 = FMA (poly[3], x, poly[2]);
194  VTYPE p45 = FMA (poly[5], x, poly[4]);
195  VTYPE p;
196  p = FMA (x2, p45, p23);
197  p = FMA (x2, p, p01);
198  return p;
199}
200static inline VTYPE VWRAP (pw_horner_6) (VTYPE x, VTYPE x2, const VTYPE *poly)
201{
202  VTYPE p26 = VWRAP (pw_horner_4) (x, x2, poly + 2);
203  VTYPE p01 = FMA (poly[1], x, poly[0]);
204  return FMA (x2, p26, p01);
205}
206static inline VTYPE VWRAP (pw_horner_7) (VTYPE x, VTYPE x2, const VTYPE *poly)
207{
208  VTYPE p27 = VWRAP (pw_horner_5) (x, x2, poly + 2);
209  VTYPE p01 = FMA (poly[1], x, poly[0]);
210  return FMA (x2, p27, p01);
211}
212static inline VTYPE VWRAP (pw_horner_8) (VTYPE x, VTYPE x2, const VTYPE *poly)
213{
214  VTYPE p28 = VWRAP (pw_horner_6) (x, x2, poly + 2);
215  VTYPE p01 = FMA (poly[1], x, poly[0]);
216  return FMA (x2, p28, p01);
217}
218static inline VTYPE VWRAP (pw_horner_9) (VTYPE x, VTYPE x2, const VTYPE *poly)
219{
220  VTYPE p29 = VWRAP (pw_horner_7) (x, x2, poly + 2);
221  VTYPE p01 = FMA (poly[1], x, poly[0]);
222  return FMA (x2, p29, p01);
223}
224static inline VTYPE VWRAP (pw_horner_10) (VTYPE x, VTYPE x2, const VTYPE *poly)
225{
226  VTYPE p2_10 = VWRAP (pw_horner_8) (x, x2, poly + 2);
227  VTYPE p01 = FMA (poly[1], x, poly[0]);
228  return FMA (x2, p2_10, p01);
229}
230static inline VTYPE VWRAP (pw_horner_11) (VTYPE x, VTYPE x2, const VTYPE *poly)
231{
232  VTYPE p2_11 = VWRAP (pw_horner_9) (x, x2, poly + 2);
233  VTYPE p01 = FMA (poly[1], x, poly[0]);
234  return FMA (x2, p2_11, p01);
235}
236static inline VTYPE VWRAP (pw_horner_12) (VTYPE x, VTYPE x2, const VTYPE *poly)
237{
238  VTYPE p2_12 = VWRAP (pw_horner_10) (x, x2, poly + 2);
239  VTYPE p01 = FMA (poly[1], x, poly[0]);
240  return FMA (x2, p2_12, p01);
241}
242static inline VTYPE VWRAP (pw_horner_13) (VTYPE x, VTYPE x2, const VTYPE *poly)
243{
244  VTYPE p2_13 = VWRAP (pw_horner_11) (x, x2, poly + 2);
245  VTYPE p01 = FMA (poly[1], x, poly[0]);
246  return FMA (x2, p2_13, p01);
247}
248static inline VTYPE VWRAP (pw_horner_14) (VTYPE x, VTYPE x2, const VTYPE *poly)
249{
250  VTYPE p2_14 = VWRAP (pw_horner_12) (x, x2, poly + 2);
251  VTYPE p01 = FMA (poly[1], x, poly[0]);
252  return FMA (x2, p2_14, p01);
253}
254static inline VTYPE VWRAP (pw_horner_15) (VTYPE x, VTYPE x2, const VTYPE *poly)
255{
256  VTYPE p2_15 = VWRAP (pw_horner_13) (x, x2, poly + 2);
257  VTYPE p01 = FMA (poly[1], x, poly[0]);
258  return FMA (x2, p2_15, p01);
259}
260static inline VTYPE VWRAP (pw_horner_16) (VTYPE x, VTYPE x2, const VTYPE *poly)
261{
262  VTYPE p2_16 = VWRAP (pw_horner_14) (x, x2, poly + 2);
263  VTYPE p01 = FMA (poly[1], x, poly[0]);
264  return FMA (x2, p2_16, p01);
265}
266static inline VTYPE VWRAP (pw_horner_17) (VTYPE x, VTYPE x2, const VTYPE *poly)
267{
268  VTYPE p2_17 = VWRAP (pw_horner_15) (x, x2, poly + 2);
269  VTYPE p01 = FMA (poly[1], x, poly[0]);
270  return FMA (x2, p2_17, p01);
271}
272static inline VTYPE VWRAP (pw_horner_18) (VTYPE x, VTYPE x2, const VTYPE *poly)
273{
274  VTYPE p2_18 = VWRAP (pw_horner_16) (x, x2, poly + 2);
275  VTYPE p01 = FMA (poly[1], x, poly[0]);
276  return FMA (x2, p2_18, p01);
277}
278