fpu_cordic.c revision 1.4
1/* $NetBSD: fpu_cordic.c,v 1.4 2016/12/06 05:58:19 isaki Exp $ */ 2 3/* 4 * Copyright (c) 2013 Tetsuya Isaki. All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 20 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 22 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 23 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28#include <sys/cdefs.h> 29__KERNEL_RCSID(0, "$NetBSD: fpu_cordic.c,v 1.4 2016/12/06 05:58:19 isaki Exp $"); 30 31#include <machine/ieee.h> 32 33#include "fpu_emulate.h" 34 35/* 36 * sfpn = shoftened fp number; the idea is from fpu_log.c but not the same. 37 * The most significant byte of sp_m0 is EXP (signed byte) and the rest 38 * of sp_m0 is fp_mant[0]. 39 */ 40struct sfpn { 41 uint32_t sp_m0; 42 uint32_t sp_m1; 43 uint32_t sp_m2; 44}; 45 46#if defined(CORDIC_BOOTSTRAP) 47/* 48 * This is a bootstrap code to generate a pre-calculated tables such as 49 * atan_table[]. However, it's just for reference. 50 * If you want to run the bootstrap, you will define CORDIC_BOOTSTRAP 51 * and modify these files as a userland application. 52 */ 53 54#include <stdio.h> 55#include <stdlib.h> 56#include <string.h> 57#include <float.h> 58 59static void prepare_cordic_const(struct fpemu *); 60static struct fpn *fpu_gain1_cordic(struct fpemu *); 61static struct fpn *fpu_atan_taylor(struct fpemu *); 62static void printf_fpn(const struct fpn *); 63static void printf_sfpn(const struct sfpn *); 64static void fpn_to_sfpn(struct sfpn *, const struct fpn *); 65 66static struct sfpn atan_table[EXT_FRACBITS]; 67static struct fpn inv_gain1; 68 69int 70main(int argc, char *argv[]) 71{ 72 struct fpemu dummyfe; 73 int i; 74 struct fpn fp; 75 76 memset(&dummyfe, 0, sizeof(dummyfe)); 77 prepare_cordic_const(&dummyfe); 78 79 /* output as source code */ 80 printf("static const struct sfpn atan_table[] = {\n"); 81 for (i = 0; i < EXT_FRACBITS; i++) { 82 printf("\t"); 83 printf_sfpn(&atan_table[i]); 84 printf(",\n"); 85 } 86 printf("};\n\n"); 87 88 printf("const struct fpn fpu_cordic_inv_gain1 =\n\t"); 89 printf_fpn(&inv_gain1); 90 printf(";\n\n"); 91} 92 93/* 94 * This routine uses fpu_const(), fpu_add(), fpu_div(), fpu_logn() 95 * and fpu_atan_taylor() as bootstrap. 96 */ 97static void 98prepare_cordic_const(struct fpemu *fe) 99{ 100 struct fpn t; 101 struct fpn x; 102 struct fpn *r; 103 int i; 104 105 /* atan_table */ 106 fpu_const(&t, FPU_CONST_1); 107 for (i = 0; i < EXT_FRACBITS; i++) { 108 /* atan(t) */ 109 CPYFPN(&fe->fe_f2, &t); 110 r = fpu_atan_taylor(fe); 111 fpn_to_sfpn(&atan_table[i], r); 112 113 /* t /= 2 */ 114 t.fp_exp--; 115 } 116 117 /* inv_gain1 = 1 / gain1cordic() */ 118 r = fpu_gain1_cordic(fe); 119 CPYFPN(&fe->fe_f2, r); 120 fpu_const(&fe->fe_f1, FPU_CONST_1); 121 r = fpu_div(fe); 122 CPYFPN(&inv_gain1, r); 123} 124 125static struct fpn * 126fpu_gain1_cordic(struct fpemu *fe) 127{ 128 struct fpn x; 129 struct fpn y; 130 struct fpn z; 131 struct fpn v; 132 133 fpu_const(&x, FPU_CONST_1); 134 fpu_const(&y, FPU_CONST_0); 135 fpu_const(&z, FPU_CONST_0); 136 CPYFPN(&v, &x); 137 v.fp_sign = !v.fp_sign; 138 139 fpu_cordit1(fe, &x, &y, &z, &v); 140 CPYFPN(&fe->fe_f2, &x); 141 return &fe->fe_f2; 142} 143 144/* 145 * arctan(x) = pi/4 (for |x| = 1) 146 * 147 * x^3 x^5 x^7 148 * arctan(x) = x - --- + --- - --- + ... (for |x| < 1) 149 * 3 5 7 150 */ 151static struct fpn * 152fpu_atan_taylor(struct fpemu *fe) 153{ 154 struct fpn res; 155 struct fpn x2; 156 struct fpn s0; 157 struct fpn *s1; 158 struct fpn *r; 159 uint32_t k; 160 161 /* arctan(1) is pi/4 */ 162 if (fe->fe_f2.fp_exp == 0) { 163 fpu_const(&fe->fe_f2, FPU_CONST_PI); 164 fe->fe_f2.fp_exp -= 2; 165 return &fe->fe_f2; 166 } 167 168 /* s0 := x */ 169 CPYFPN(&s0, &fe->fe_f2); 170 171 /* res := x */ 172 CPYFPN(&res, &fe->fe_f2); 173 174 /* x2 := x * x */ 175 CPYFPN(&fe->fe_f1, &fe->fe_f2); 176 r = fpu_mul(fe); 177 CPYFPN(&x2, r); 178 179 k = 3; 180 for (;;) { 181 /* s1 := -s0 * x2 */ 182 CPYFPN(&fe->fe_f1, &s0); 183 CPYFPN(&fe->fe_f2, &x2); 184 s1 = fpu_mul(fe); 185 s1->fp_sign ^= 1; 186 CPYFPN(&fe->fe_f1, s1); 187 188 /* s0 := s1 for next loop */ 189 CPYFPN(&s0, s1); 190 191 /* s1 := s1 / k */ 192 fpu_explode(fe, &fe->fe_f2, FTYPE_LNG, &k); 193 s1 = fpu_div(fe); 194 195 /* break if s1 is enough small */ 196 if (ISZERO(s1)) 197 break; 198 if (res.fp_exp - s1->fp_exp >= FP_NMANT) 199 break; 200 201 /* res += s1 */ 202 CPYFPN(&fe->fe_f2, s1); 203 CPYFPN(&fe->fe_f1, &res); 204 r = fpu_add(fe); 205 CPYFPN(&res, r); 206 207 k += 2; 208 } 209 210 CPYFPN(&fe->fe_f2, &res); 211 return &fe->fe_f2; 212} 213 214static void 215printf_fpn(const struct fpn *fp) 216{ 217 printf("{ %d, %d, %3d, %d, { 0x%08x, 0x%08x, 0x%08x, }, }", 218 fp->fp_class, fp->fp_sign, fp->fp_exp, fp->fp_sticky ? 1 : 0, 219 fp->fp_mant[0], fp->fp_mant[1], fp->fp_mant[2]); 220} 221 222static void 223printf_sfpn(const struct sfpn *sp) 224{ 225 printf("{ 0x%08x, 0x%08x, 0x%08x, }", 226 sp->sp_m0, sp->sp_m1, sp->sp_m2); 227} 228 229static void 230fpn_to_sfpn(struct sfpn *sp, const struct fpn *fp) 231{ 232 sp->sp_m0 = (fp->fp_exp << 24) | fp->fp_mant[0]; 233 sp->sp_m1 = fp->fp_mant[1]; 234 sp->sp_m2 = fp->fp_mant[2]; 235} 236 237#else /* CORDIC_BOOTSTRAP */ 238 239static const struct sfpn atan_table[] = { 240 { 0xff06487e, 0xd5110b46, 0x11a80000, }, 241 { 0xfe076b19, 0xc1586ed3, 0xda2b7f0d, }, 242 { 0xfd07d6dd, 0x7e4b2037, 0x58ab6e33, }, 243 { 0xfc07f56e, 0xa6ab0bdb, 0x719644b5, }, 244 { 0xfb07fd56, 0xedcb3f7a, 0x71b65937, }, 245 { 0xfa07ff55, 0x6eea5d89, 0x2a13bce7, }, 246 { 0xf907ffd5, 0x56eedca6, 0xaddf3c5f, }, 247 { 0xf807fff5, 0x556eeea5, 0xcb403117, }, 248 { 0xf707fffd, 0x5556eeed, 0xca5d8956, }, 249 { 0xf607ffff, 0x55556eee, 0xea5ca6ab, }, 250 { 0xf507ffff, 0xd55556ee, 0xeedca5c8, }, 251 { 0xf407ffff, 0xf555556e, 0xeeeea5c8, }, 252 { 0xf307ffff, 0xfd555556, 0xeeeeedc8, }, 253 { 0xf207ffff, 0xff555555, 0x6eeeeee8, }, 254 { 0xf107ffff, 0xffd55555, 0x56eeeeed, }, 255 { 0xf007ffff, 0xfff55555, 0x556eeeed, }, 256 { 0xef07ffff, 0xfffd5555, 0x5556eeed, }, 257 { 0xee07ffff, 0xffff5555, 0x55556eed, }, 258 { 0xed07ffff, 0xffffd555, 0x555556ed, }, 259 { 0xec07ffff, 0xfffff555, 0x5555556d, }, 260 { 0xeb07ffff, 0xfffffd55, 0x55555555, }, 261 { 0xea07ffff, 0xffffff55, 0x55555554, }, 262 { 0xe907ffff, 0xffffffd5, 0x55555554, }, 263 { 0xe807ffff, 0xfffffff5, 0x55555554, }, 264 { 0xe707ffff, 0xfffffffd, 0x55555554, }, 265 { 0xe607ffff, 0xffffffff, 0x55555554, }, 266 { 0xe507ffff, 0xffffffff, 0xd5555554, }, 267 { 0xe407ffff, 0xffffffff, 0xf5555554, }, 268 { 0xe307ffff, 0xffffffff, 0xfd555554, }, 269 { 0xe207ffff, 0xffffffff, 0xff555554, }, 270 { 0xe107ffff, 0xffffffff, 0xffd55554, }, 271 { 0xe007ffff, 0xffffffff, 0xfff55554, }, 272 { 0xdf07ffff, 0xffffffff, 0xfffd5554, }, 273 { 0xde07ffff, 0xffffffff, 0xffff5554, }, 274 { 0xdd07ffff, 0xffffffff, 0xffffd554, }, 275 { 0xdc07ffff, 0xffffffff, 0xfffff554, }, 276 { 0xdb07ffff, 0xffffffff, 0xfffffd54, }, 277 { 0xda07ffff, 0xffffffff, 0xffffff54, }, 278 { 0xd907ffff, 0xffffffff, 0xffffffd4, }, 279 { 0xd807ffff, 0xffffffff, 0xfffffff4, }, 280 { 0xd707ffff, 0xffffffff, 0xfffffffc, }, 281 { 0xd7040000, 0x00000000, 0x00000000, }, 282 { 0xd6040000, 0x00000000, 0x00000000, }, 283 { 0xd5040000, 0x00000000, 0x00000000, }, 284 { 0xd4040000, 0x00000000, 0x00000000, }, 285 { 0xd3040000, 0x00000000, 0x00000000, }, 286 { 0xd2040000, 0x00000000, 0x00000000, }, 287 { 0xd1040000, 0x00000000, 0x00000000, }, 288 { 0xd0040000, 0x00000000, 0x00000000, }, 289 { 0xcf040000, 0x00000000, 0x00000000, }, 290 { 0xce040000, 0x00000000, 0x00000000, }, 291 { 0xcd040000, 0x00000000, 0x00000000, }, 292 { 0xcc040000, 0x00000000, 0x00000000, }, 293 { 0xcb040000, 0x00000000, 0x00000000, }, 294 { 0xca040000, 0x00000000, 0x00000000, }, 295 { 0xc9040000, 0x00000000, 0x00000000, }, 296 { 0xc8040000, 0x00000000, 0x00000000, }, 297 { 0xc7040000, 0x00000000, 0x00000000, }, 298 { 0xc6040000, 0x00000000, 0x00000000, }, 299 { 0xc5040000, 0x00000000, 0x00000000, }, 300 { 0xc4040000, 0x00000000, 0x00000000, }, 301 { 0xc3040000, 0x00000000, 0x00000000, }, 302 { 0xc2040000, 0x00000000, 0x00000000, }, 303 { 0xc1040000, 0x00000000, 0x00000000, }, 304}; 305 306const struct fpn fpu_cordic_inv_gain1 = 307 { 1, 0, -1, 1, { 0x0004dba7, 0x6d421af2, 0xd33fafd1, }, }; 308 309#endif /* CORDIC_BOOTSTRAP */ 310 311static inline void 312sfpn_to_fpn(struct fpn *fp, const struct sfpn *s) 313{ 314 fp->fp_class = FPC_NUM; 315 fp->fp_sign = 0; 316 fp->fp_sticky = 0; 317 fp->fp_exp = s->sp_m0 >> 24; 318 if (fp->fp_exp & 0x80) { 319 fp->fp_exp |= 0xffffff00; 320 } 321 fp->fp_mant[0] = s->sp_m0 & 0x000fffff; 322 fp->fp_mant[1] = s->sp_m1; 323 fp->fp_mant[2] = s->sp_m2; 324} 325 326void 327fpu_cordit1(struct fpemu *fe, struct fpn *x0, struct fpn *y0, struct fpn *z0, 328 const struct fpn *vecmode) 329{ 330 struct fpn t; 331 struct fpn x; 332 struct fpn y; 333 struct fpn z; 334 struct fpn *r; 335 int i; 336 int sign; 337 338 fpu_const(&t, FPU_CONST_1); 339 CPYFPN(&x, x0); 340 CPYFPN(&y, y0); 341 CPYFPN(&z, z0); 342 343 for (i = 0; i < EXT_FRACBITS; i++) { 344 struct fpn x1; 345 346 /* y < vecmode */ 347 CPYFPN(&fe->fe_f1, &y); 348 CPYFPN(&fe->fe_f2, vecmode); 349 fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign; 350 r = fpu_add(fe); 351 352 if ((vecmode->fp_sign == 0 && r->fp_sign) || 353 (vecmode->fp_sign && z.fp_sign == 0)) { 354 sign = 1; 355 } else { 356 sign = 0; 357 } 358 359 /* y * t */ 360 CPYFPN(&fe->fe_f1, &y); 361 CPYFPN(&fe->fe_f2, &t); 362 r = fpu_mul(fe); 363 364 /* 365 * x1 = x - y*t (if sign) 366 * x1 = x + y*t 367 */ 368 CPYFPN(&fe->fe_f2, r); 369 if (sign) 370 fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign; 371 CPYFPN(&fe->fe_f1, &x); 372 r = fpu_add(fe); 373 CPYFPN(&x1, r); 374 375 /* x * t */ 376 CPYFPN(&fe->fe_f1, &x); 377 CPYFPN(&fe->fe_f2, &t); 378 r = fpu_mul(fe); 379 380 /* 381 * y = y + x*t (if sign) 382 * y = y - x*t 383 */ 384 CPYFPN(&fe->fe_f2, r); 385 if (!sign) 386 fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign; 387 CPYFPN(&fe->fe_f1, &y); 388 r = fpu_add(fe); 389 CPYFPN(&y, r); 390 391 /* 392 * z = z - atan_table[i] (if sign) 393 * z = z + atan_table[i] 394 */ 395 CPYFPN(&fe->fe_f1, &z); 396 sfpn_to_fpn(&fe->fe_f2, &atan_table[i]); 397 if (sign) 398 fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign; 399 r = fpu_add(fe); 400 CPYFPN(&z, r); 401 402 /* x = x1 */ 403 CPYFPN(&x, &x1); 404 405 /* t /= 2 */ 406 t.fp_exp--; 407 } 408 409 CPYFPN(x0, &x); 410 CPYFPN(y0, &y); 411 CPYFPN(z0, &z); 412} 413