1/* 2 * Generic helpers for evaluating polynomials with various schemes. 3 * 4 * Copyright (c) 2023, Arm Limited. 5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 */ 7 8#ifndef VTYPE 9# error Cannot use poly_generic without defining VTYPE 10#endif 11#ifndef VWRAP 12# error Cannot use poly_generic without defining VWRAP 13#endif 14#ifndef FMA 15# error Cannot use poly_generic without defining FMA 16#endif 17 18static inline VTYPE VWRAP (pairwise_poly_3) (VTYPE x, VTYPE x2, 19 const VTYPE *poly) 20{ 21 /* At order 3, Estrin and Pairwise Horner are identical. */ 22 VTYPE p01 = FMA (poly[1], x, poly[0]); 23 VTYPE p23 = FMA (poly[3], x, poly[2]); 24 return FMA (p23, x2, p01); 25} 26 27static inline VTYPE VWRAP (estrin_4) (VTYPE x, VTYPE x2, VTYPE x4, 28 const VTYPE *poly) 29{ 30 VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly); 31 return FMA (poly[4], x4, p03); 32} 33static inline VTYPE VWRAP (estrin_5) (VTYPE x, VTYPE x2, VTYPE x4, 34 const VTYPE *poly) 35{ 36 VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly); 37 VTYPE p45 = FMA (poly[5], x, poly[4]); 38 return FMA (p45, x4, p03); 39} 40static inline VTYPE VWRAP (estrin_6) (VTYPE x, VTYPE x2, VTYPE x4, 41 const VTYPE *poly) 42{ 43 VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly); 44 VTYPE p45 = FMA (poly[5], x, poly[4]); 45 VTYPE p46 = FMA (poly[6], x2, p45); 46 return FMA (p46, x4, p03); 47} 48static inline VTYPE VWRAP (estrin_7) (VTYPE x, VTYPE x2, VTYPE x4, 49 const VTYPE *poly) 50{ 51 VTYPE p03 = VWRAP (pairwise_poly_3) (x, x2, poly); 52 VTYPE p47 = VWRAP (pairwise_poly_3) (x, x2, poly + 4); 53 return FMA (p47, x4, p03); 54} 55static inline VTYPE VWRAP (estrin_8) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, 56 const VTYPE *poly) 57{ 58 return FMA (poly[8], x8, VWRAP (estrin_7) (x, x2, x4, poly)); 59} 60static inline VTYPE VWRAP (estrin_9) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, 61 const VTYPE *poly) 62{ 63 VTYPE p89 = FMA (poly[9], x, poly[8]); 64 return FMA (p89, x8, VWRAP (estrin_7) (x, x2, x4, poly)); 65} 66static inline VTYPE VWRAP (estrin_10) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, 67 const VTYPE *poly) 68{ 69 VTYPE p89 = FMA (poly[9], x, poly[8]); 70 VTYPE p8_10 = FMA (poly[10], x2, p89); 71 return FMA (p8_10, x8, VWRAP (estrin_7) (x, x2, x4, poly)); 72} 73static inline VTYPE VWRAP (estrin_11) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, 74 const VTYPE *poly) 75{ 76 VTYPE p8_11 = VWRAP (pairwise_poly_3) (x, x2, poly + 8); 77 return FMA (p8_11, x8, VWRAP (estrin_7) (x, x2, x4, poly)); 78} 79static inline VTYPE VWRAP (estrin_12) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, 80 const VTYPE *poly) 81{ 82 return FMA (VWRAP (estrin_4) (x, x2, x4, poly + 8), x8, 83 VWRAP (estrin_7) (x, x2, x4, poly)); 84} 85static inline VTYPE VWRAP (estrin_13) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, 86 const VTYPE *poly) 87{ 88 return FMA (VWRAP (estrin_5) (x, x2, x4, poly + 8), x8, 89 VWRAP (estrin_7) (x, x2, x4, poly)); 90} 91static inline VTYPE VWRAP (estrin_14) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, 92 const VTYPE *poly) 93{ 94 return FMA (VWRAP (estrin_6) (x, x2, x4, poly + 8), x8, 95 VWRAP (estrin_7) (x, x2, x4, poly)); 96} 97static inline VTYPE VWRAP (estrin_15) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, 98 const VTYPE *poly) 99{ 100 return FMA (VWRAP (estrin_7) (x, x2, x4, poly + 8), x8, 101 VWRAP (estrin_7) (x, x2, x4, poly)); 102} 103static inline VTYPE VWRAP (estrin_16) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, 104 VTYPE x16, const VTYPE *poly) 105{ 106 return FMA (poly[16], x16, VWRAP (estrin_15) (x, x2, x4, x8, poly)); 107} 108static inline VTYPE VWRAP (estrin_17) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, 109 VTYPE x16, const VTYPE *poly) 110{ 111 VTYPE p16_17 = FMA (poly[17], x, poly[16]); 112 return FMA (p16_17, x16, VWRAP (estrin_15) (x, x2, x4, x8, poly)); 113} 114static inline VTYPE VWRAP (estrin_18) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, 115 VTYPE x16, const VTYPE *poly) 116{ 117 VTYPE p16_17 = FMA (poly[17], x, poly[16]); 118 VTYPE p16_18 = FMA (poly[18], x2, p16_17); 119 return FMA (p16_18, x16, VWRAP (estrin_15) (x, x2, x4, x8, poly)); 120} 121static inline VTYPE VWRAP (estrin_19) (VTYPE x, VTYPE x2, VTYPE x4, VTYPE x8, 122 VTYPE x16, const VTYPE *poly) 123{ 124 VTYPE p16_19 = VWRAP (pairwise_poly_3) (x, x2, poly + 16); 125 return FMA (p16_19, x16, VWRAP (estrin_15) (x, x2, x4, x8, poly)); 126} 127 128static inline VTYPE VWRAP (horner_2) (VTYPE x, const VTYPE *poly) 129{ 130 VTYPE p = FMA (poly[2], x, poly[1]); 131 return FMA (x, p, poly[0]); 132} 133static inline VTYPE VWRAP (horner_3) (VTYPE x, const VTYPE *poly) 134{ 135 VTYPE p = FMA (poly[3], x, poly[2]); 136 p = FMA (x, p, poly[1]); 137 p = FMA (x, p, poly[0]); 138 return p; 139} 140static inline VTYPE VWRAP (horner_4) (VTYPE x, const VTYPE *poly) 141{ 142 VTYPE p = FMA (poly[4], x, poly[3]); 143 p = FMA (x, p, poly[2]); 144 p = FMA (x, p, poly[1]); 145 p = FMA (x, p, poly[0]); 146 return p; 147} 148static inline VTYPE VWRAP (horner_5) (VTYPE x, const VTYPE *poly) 149{ 150 return FMA (x, VWRAP (horner_4) (x, poly + 1), poly[0]); 151} 152static inline VTYPE VWRAP (horner_6) (VTYPE x, const VTYPE *poly) 153{ 154 return FMA (x, VWRAP (horner_5) (x, poly + 1), poly[0]); 155} 156static inline VTYPE VWRAP (horner_7) (VTYPE x, const VTYPE *poly) 157{ 158 return FMA (x, VWRAP (horner_6) (x, poly + 1), poly[0]); 159} 160static inline VTYPE VWRAP (horner_8) (VTYPE x, const VTYPE *poly) 161{ 162 return FMA (x, VWRAP (horner_7) (x, poly + 1), poly[0]); 163} 164static inline VTYPE VWRAP (horner_9) (VTYPE x, const VTYPE *poly) 165{ 166 return FMA (x, VWRAP (horner_8) (x, poly + 1), poly[0]); 167} 168static inline VTYPE VWRAP (horner_10) (VTYPE x, const VTYPE *poly) 169{ 170 return FMA (x, VWRAP (horner_9) (x, poly + 1), poly[0]); 171} 172static inline VTYPE VWRAP (horner_11) (VTYPE x, const VTYPE *poly) 173{ 174 return FMA (x, VWRAP (horner_10) (x, poly + 1), poly[0]); 175} 176static inline VTYPE VWRAP (horner_12) (VTYPE x, const VTYPE *poly) 177{ 178 return FMA (x, VWRAP (horner_11) (x, poly + 1), poly[0]); 179} 180 181static inline VTYPE VWRAP (pw_horner_4) (VTYPE x, VTYPE x2, const VTYPE *poly) 182{ 183 VTYPE p01 = FMA (poly[1], x, poly[0]); 184 VTYPE p23 = FMA (poly[3], x, poly[2]); 185 VTYPE p; 186 p = FMA (x2, poly[4], p23); 187 p = FMA (x2, p, p01); 188 return p; 189} 190static inline VTYPE VWRAP (pw_horner_5) (VTYPE x, VTYPE x2, const VTYPE *poly) 191{ 192 VTYPE p01 = FMA (poly[1], x, poly[0]); 193 VTYPE p23 = FMA (poly[3], x, poly[2]); 194 VTYPE p45 = FMA (poly[5], x, poly[4]); 195 VTYPE p; 196 p = FMA (x2, p45, p23); 197 p = FMA (x2, p, p01); 198 return p; 199} 200static inline VTYPE VWRAP (pw_horner_6) (VTYPE x, VTYPE x2, const VTYPE *poly) 201{ 202 VTYPE p26 = VWRAP (pw_horner_4) (x, x2, poly + 2); 203 VTYPE p01 = FMA (poly[1], x, poly[0]); 204 return FMA (x2, p26, p01); 205} 206static inline VTYPE VWRAP (pw_horner_7) (VTYPE x, VTYPE x2, const VTYPE *poly) 207{ 208 VTYPE p27 = VWRAP (pw_horner_5) (x, x2, poly + 2); 209 VTYPE p01 = FMA (poly[1], x, poly[0]); 210 return FMA (x2, p27, p01); 211} 212static inline VTYPE VWRAP (pw_horner_8) (VTYPE x, VTYPE x2, const VTYPE *poly) 213{ 214 VTYPE p28 = VWRAP (pw_horner_6) (x, x2, poly + 2); 215 VTYPE p01 = FMA (poly[1], x, poly[0]); 216 return FMA (x2, p28, p01); 217} 218static inline VTYPE VWRAP (pw_horner_9) (VTYPE x, VTYPE x2, const VTYPE *poly) 219{ 220 VTYPE p29 = VWRAP (pw_horner_7) (x, x2, poly + 2); 221 VTYPE p01 = FMA (poly[1], x, poly[0]); 222 return FMA (x2, p29, p01); 223} 224static inline VTYPE VWRAP (pw_horner_10) (VTYPE x, VTYPE x2, const VTYPE *poly) 225{ 226 VTYPE p2_10 = VWRAP (pw_horner_8) (x, x2, poly + 2); 227 VTYPE p01 = FMA (poly[1], x, poly[0]); 228 return FMA (x2, p2_10, p01); 229} 230static inline VTYPE VWRAP (pw_horner_11) (VTYPE x, VTYPE x2, const VTYPE *poly) 231{ 232 VTYPE p2_11 = VWRAP (pw_horner_9) (x, x2, poly + 2); 233 VTYPE p01 = FMA (poly[1], x, poly[0]); 234 return FMA (x2, p2_11, p01); 235} 236static inline VTYPE VWRAP (pw_horner_12) (VTYPE x, VTYPE x2, const VTYPE *poly) 237{ 238 VTYPE p2_12 = VWRAP (pw_horner_10) (x, x2, poly + 2); 239 VTYPE p01 = FMA (poly[1], x, poly[0]); 240 return FMA (x2, p2_12, p01); 241} 242static inline VTYPE VWRAP (pw_horner_13) (VTYPE x, VTYPE x2, const VTYPE *poly) 243{ 244 VTYPE p2_13 = VWRAP (pw_horner_11) (x, x2, poly + 2); 245 VTYPE p01 = FMA (poly[1], x, poly[0]); 246 return FMA (x2, p2_13, p01); 247} 248static inline VTYPE VWRAP (pw_horner_14) (VTYPE x, VTYPE x2, const VTYPE *poly) 249{ 250 VTYPE p2_14 = VWRAP (pw_horner_12) (x, x2, poly + 2); 251 VTYPE p01 = FMA (poly[1], x, poly[0]); 252 return FMA (x2, p2_14, p01); 253} 254static inline VTYPE VWRAP (pw_horner_15) (VTYPE x, VTYPE x2, const VTYPE *poly) 255{ 256 VTYPE p2_15 = VWRAP (pw_horner_13) (x, x2, poly + 2); 257 VTYPE p01 = FMA (poly[1], x, poly[0]); 258 return FMA (x2, p2_15, p01); 259} 260static inline VTYPE VWRAP (pw_horner_16) (VTYPE x, VTYPE x2, const VTYPE *poly) 261{ 262 VTYPE p2_16 = VWRAP (pw_horner_14) (x, x2, poly + 2); 263 VTYPE p01 = FMA (poly[1], x, poly[0]); 264 return FMA (x2, p2_16, p01); 265} 266static inline VTYPE VWRAP (pw_horner_17) (VTYPE x, VTYPE x2, const VTYPE *poly) 267{ 268 VTYPE p2_17 = VWRAP (pw_horner_15) (x, x2, poly + 2); 269 VTYPE p01 = FMA (poly[1], x, poly[0]); 270 return FMA (x2, p2_17, p01); 271} 272static inline VTYPE VWRAP (pw_horner_18) (VTYPE x, VTYPE x2, const VTYPE *poly) 273{ 274 VTYPE p2_18 = VWRAP (pw_horner_16) (x, x2, poly + 2); 275 VTYPE p01 = FMA (poly[1], x, poly[0]); 276 return FMA (x2, p2_18, p01); 277} 278