fpu_cordic.c revision 1.1
1/* $NetBSD: fpu_cordic.c,v 1.1 2013/04/19 13:31:11 isaki Exp $ */ 2 3/* 4 * Copyright (c) 2013 Tetsuya Isaki. All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 20 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 22 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 23 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28#include <sys/cdefs.h> 29__KERNEL_RCSID(0, "$NetBSD: fpu_cordic.c,v 1.1 2013/04/19 13:31:11 isaki Exp $"); 30 31#include <machine/ieee.h> 32 33#include "fpu_emulate.h" 34 35/* 36 * sfpn = shoftened fp number; the idea is from fpu_log.c but not the same. 37 * The most significant byte of sp_m0 is EXP (signed byte) and the rest 38 * of sp_m0 is fp_mant[0]. 39 */ 40struct sfpn { 41 uint32_t sp_m0; 42 uint32_t sp_m1; 43 uint32_t sp_m2; 44}; 45 46#if defined(CORDIC_BOOTSTRAP) 47/* 48 * This is a bootstrap code to generate a pre-calculated tables such as 49 * atan_table[] and atanh_table[]. However, it's just for reference. 50 * If you want to run the bootstrap, you will define CORDIC_BOOTSTRAP 51 * and modify these files as a userland application. 52 */ 53 54#include <stdio.h> 55#include <stdlib.h> 56#include <string.h> 57#include <float.h> 58 59static void prepare_cordic_const(struct fpemu *); 60static struct fpn *fpu_gain1_cordic(struct fpemu *); 61static struct fpn *fpu_gain2_cordic(struct fpemu *); 62static struct fpn *fpu_atan_tayler(struct fpemu *); 63static void printf_fpn(const struct fpn *); 64static void printf_sfpn(const struct sfpn *); 65static void fpn_to_sfpn(struct sfpn *, const struct fpn *); 66 67static struct sfpn atan_table[EXT_FRACBITS]; 68static struct sfpn atanh_table[EXT_FRACBITS]; 69static struct fpn inv_gain1; 70static struct fpn inv_gain2; 71 72int 73main(int argc, char *argv[]) 74{ 75 struct fpemu dummyfe; 76 int i; 77 struct fpn fp; 78 79 memset(&dummyfe, 0, sizeof(dummyfe)); 80 prepare_cordic_const(&dummyfe); 81 82 /* output as source code */ 83 printf("static const struct sfpn atan_table[] = {\n"); 84 for (i = 0; i < EXT_FRACBITS; i++) { 85 printf("\t"); 86 printf_sfpn(&atan_table[i]); 87 printf(",\n"); 88 } 89 printf("};\n\n"); 90 91 printf("static const struct sfpn atanh_table[] = {\n"); 92 for (i = 0; i < EXT_FRACBITS; i++) { 93 printf("\t"); 94 printf_sfpn(&atanh_table[i]); 95 printf(",\n"); 96 } 97 printf("};\n\n"); 98 99 printf("const struct fpn fpu_cordic_inv_gain1 =\n\t"); 100 printf_fpn(&inv_gain1); 101 printf(";\n\n"); 102 103 printf("const struct fpn fpu_cordic_inv_gain2 =\n\t"); 104 printf_fpn(&inv_gain2); 105 printf(";\n\n"); 106} 107 108/* 109 * This routine uses fpu_const(), fpu_add(), fpu_div(), fpu_logn() 110 * and fpu_atan_tayler() as bootstrap. 111 */ 112static void 113prepare_cordic_const(struct fpemu *fe) 114{ 115 struct fpn t; 116 struct fpn x; 117 struct fpn *r; 118 int i; 119 120 /* atan_table and atanh_table */ 121 fpu_const(&t, FPU_CONST_1); 122 for (i = 0; i < EXT_FRACBITS; i++) { 123 /* atan(t) */ 124 CPYFPN(&fe->fe_f2, &t); 125 r = fpu_atan_tayler(fe); 126 fpn_to_sfpn(&atan_table[i], r); 127 128 /* t /= 2 */ 129 t.fp_exp--; 130 131 /* (1-t) */ 132 fpu_const(&fe->fe_f1, FPU_CONST_1); 133 CPYFPN(&fe->fe_f2, &t); 134 fe->fe_f2.fp_sign = 1; 135 r = fpu_add(fe); 136 CPYFPN(&x, r); 137 138 /* (1+t) */ 139 fpu_const(&fe->fe_f1, FPU_CONST_1); 140 CPYFPN(&fe->fe_f2, &t); 141 r = fpu_add(fe); 142 143 /* r = (1+t)/(1-t) */ 144 CPYFPN(&fe->fe_f1, r); 145 CPYFPN(&fe->fe_f2, &x); 146 r = fpu_div(fe); 147 148 /* r = log(r) */ 149 CPYFPN(&fe->fe_f2, r); 150 r = fpu_logn(fe); 151 152 /* r /= 2 */ 153 r->fp_exp--; 154 155 fpn_to_sfpn(&atanh_table[i], r); 156 } 157 158 /* inv_gain1 = 1 / gain1cordic() */ 159 r = fpu_gain1_cordic(fe); 160 CPYFPN(&fe->fe_f2, r); 161 fpu_const(&fe->fe_f1, FPU_CONST_1); 162 r = fpu_div(fe); 163 CPYFPN(&inv_gain1, r); 164 165 /* inv_gain2 = 1 / gain2cordic() */ 166 r = fpu_gain2_cordic(fe); 167 CPYFPN(&fe->fe_f2, r); 168 fpu_const(&fe->fe_f1, FPU_CONST_1); 169 r = fpu_div(fe); 170 CPYFPN(&inv_gain2, r); 171} 172 173static struct fpn * 174fpu_gain1_cordic(struct fpemu *fe) 175{ 176 struct fpn x; 177 struct fpn y; 178 struct fpn z; 179 struct fpn v; 180 181 fpu_const(&x, FPU_CONST_1); 182 fpu_const(&y, FPU_CONST_0); 183 fpu_const(&z, FPU_CONST_0); 184 CPYFPN(&v, &x); 185 v.fp_sign = !v.fp_sign; 186 187 fpu_cordit1(fe, &x, &y, &z, &v); 188 CPYFPN(&fe->fe_f2, &x); 189 return &fe->fe_f2; 190} 191 192static struct fpn * 193fpu_gain2_cordic(struct fpemu *fe) 194{ 195 struct fpn x; 196 struct fpn y; 197 struct fpn z; 198 struct fpn v; 199 200 fpu_const(&x, FPU_CONST_1); 201 fpu_const(&y, FPU_CONST_0); 202 fpu_const(&z, FPU_CONST_0); 203 CPYFPN(&v, &x); 204 v.fp_sign = !v.fp_sign; 205 206 fpu_cordit2(fe, &x, &y, &z, &v); 207 CPYFPN(&fe->fe_f2, &x); 208 return &fe->fe_f2; 209} 210 211/* 212 * arctan(x) = pi/4 (for |x| = 1) 213 * 214 * x^3 x^5 x^7 215 * arctan(x) = x - --- + --- - --- + ... (for |x| < 1) 216 * 3 5 7 217 */ 218static struct fpn * 219fpu_atan_tayler(struct fpemu *fe) 220{ 221 struct fpn res; 222 struct fpn x2; 223 struct fpn s0; 224 struct fpn *s1; 225 struct fpn *r; 226 uint32_t k; 227 228 /* arctan(1) is pi/4 */ 229 if (fe->fe_f2.fp_exp == 0) { 230 fpu_const(&fe->fe_f2, FPU_CONST_PI); 231 fe->fe_f2.fp_exp -= 2; 232 return &fe->fe_f2; 233 } 234 235 /* s0 := x */ 236 CPYFPN(&s0, &fe->fe_f2); 237 238 /* res := x */ 239 CPYFPN(&res, &fe->fe_f2); 240 241 /* x2 := x * x */ 242 CPYFPN(&fe->fe_f1, &fe->fe_f2); 243 r = fpu_mul(fe); 244 CPYFPN(&x2, r); 245 246 k = 3; 247 for (;;) { 248 /* s1 := -s0 * x2 */ 249 CPYFPN(&fe->fe_f1, &s0); 250 CPYFPN(&fe->fe_f2, &x2); 251 s1 = fpu_mul(fe); 252 s1->fp_sign ^= 1; 253 CPYFPN(&fe->fe_f1, s1); 254 255 /* s0 := s1 for next loop */ 256 CPYFPN(&s0, s1); 257 258 /* s1 := s1 / k */ 259 fpu_explode(fe, &fe->fe_f2, FTYPE_LNG, &k); 260 s1 = fpu_div(fe); 261 262 /* break if s1 is enough small */ 263 if (ISZERO(s1)) 264 break; 265 if (res.fp_exp - s1->fp_exp >= FP_NMANT) 266 break; 267 268 /* res += s1 */ 269 CPYFPN(&fe->fe_f2, s1); 270 CPYFPN(&fe->fe_f1, &res); 271 r = fpu_add(fe); 272 CPYFPN(&res, r); 273 274 k += 2; 275 } 276 277 CPYFPN(&fe->fe_f2, &res); 278 return &fe->fe_f2; 279} 280 281static void 282printf_fpn(const struct fpn *fp) 283{ 284 printf("{ %d, %d, %3d, %d, { 0x%08x, 0x%08x, 0x%08x, }, }", 285 fp->fp_class, fp->fp_sign, fp->fp_exp, fp->fp_sticky ? 1 : 0, 286 fp->fp_mant[0], fp->fp_mant[1], fp->fp_mant[2]); 287} 288 289static void 290printf_sfpn(const struct sfpn *sp) 291{ 292 printf("{ 0x%08x, 0x%08x, 0x%08x, }", 293 sp->sp_m0, sp->sp_m1, sp->sp_m2); 294} 295 296static void 297fpn_to_sfpn(struct sfpn *sp, const struct fpn *fp) 298{ 299 sp->sp_m0 = (fp->fp_exp << 24) | fp->fp_mant[0]; 300 sp->sp_m1 = fp->fp_mant[1]; 301 sp->sp_m2 = fp->fp_mant[2]; 302} 303 304#else /* CORDIC_BOOTSTRAP */ 305 306static const struct sfpn atan_table[] = { 307 { 0xff06487e, 0xd5110b46, 0x11a80000, }, 308 { 0xfe076b19, 0xc1586ed3, 0xda2b7f0d, }, 309 { 0xfd07d6dd, 0x7e4b2037, 0x58ab6e33, }, 310 { 0xfc07f56e, 0xa6ab0bdb, 0x719644b5, }, 311 { 0xfb07fd56, 0xedcb3f7a, 0x71b65937, }, 312 { 0xfa07ff55, 0x6eea5d89, 0x2a13bce7, }, 313 { 0xf907ffd5, 0x56eedca6, 0xaddf3c5f, }, 314 { 0xf807fff5, 0x556eeea5, 0xcb403117, }, 315 { 0xf707fffd, 0x5556eeed, 0xca5d8956, }, 316 { 0xf607ffff, 0x55556eee, 0xea5ca6ab, }, 317 { 0xf507ffff, 0xd55556ee, 0xeedca5c8, }, 318 { 0xf407ffff, 0xf555556e, 0xeeeea5c8, }, 319 { 0xf307ffff, 0xfd555556, 0xeeeeedc8, }, 320 { 0xf207ffff, 0xff555555, 0x6eeeeee8, }, 321 { 0xf107ffff, 0xffd55555, 0x56eeeeed, }, 322 { 0xf007ffff, 0xfff55555, 0x556eeeed, }, 323 { 0xef07ffff, 0xfffd5555, 0x5556eeed, }, 324 { 0xee07ffff, 0xffff5555, 0x55556eed, }, 325 { 0xed07ffff, 0xffffd555, 0x555556ed, }, 326 { 0xec07ffff, 0xfffff555, 0x5555556d, }, 327 { 0xeb07ffff, 0xfffffd55, 0x55555555, }, 328 { 0xea07ffff, 0xffffff55, 0x55555554, }, 329 { 0xe907ffff, 0xffffffd5, 0x55555554, }, 330 { 0xe807ffff, 0xfffffff5, 0x55555554, }, 331 { 0xe707ffff, 0xfffffffd, 0x55555554, }, 332 { 0xe607ffff, 0xffffffff, 0x55555554, }, 333 { 0xe507ffff, 0xffffffff, 0xd5555554, }, 334 { 0xe407ffff, 0xffffffff, 0xf5555554, }, 335 { 0xe307ffff, 0xffffffff, 0xfd555554, }, 336 { 0xe207ffff, 0xffffffff, 0xff555554, }, 337 { 0xe107ffff, 0xffffffff, 0xffd55554, }, 338 { 0xe007ffff, 0xffffffff, 0xfff55554, }, 339 { 0xdf07ffff, 0xffffffff, 0xfffd5554, }, 340 { 0xde07ffff, 0xffffffff, 0xffff5554, }, 341 { 0xdd07ffff, 0xffffffff, 0xffffd554, }, 342 { 0xdc07ffff, 0xffffffff, 0xfffff554, }, 343 { 0xdb07ffff, 0xffffffff, 0xfffffd54, }, 344 { 0xda07ffff, 0xffffffff, 0xffffff54, }, 345 { 0xd907ffff, 0xffffffff, 0xffffffd4, }, 346 { 0xd807ffff, 0xffffffff, 0xfffffff4, }, 347 { 0xd707ffff, 0xffffffff, 0xfffffffc, }, 348 { 0xd7040000, 0x00000000, 0x00000000, }, 349 { 0xd6040000, 0x00000000, 0x00000000, }, 350 { 0xd5040000, 0x00000000, 0x00000000, }, 351 { 0xd4040000, 0x00000000, 0x00000000, }, 352 { 0xd3040000, 0x00000000, 0x00000000, }, 353 { 0xd2040000, 0x00000000, 0x00000000, }, 354 { 0xd1040000, 0x00000000, 0x00000000, }, 355 { 0xd0040000, 0x00000000, 0x00000000, }, 356 { 0xcf040000, 0x00000000, 0x00000000, }, 357 { 0xce040000, 0x00000000, 0x00000000, }, 358 { 0xcd040000, 0x00000000, 0x00000000, }, 359 { 0xcc040000, 0x00000000, 0x00000000, }, 360 { 0xcb040000, 0x00000000, 0x00000000, }, 361 { 0xca040000, 0x00000000, 0x00000000, }, 362 { 0xc9040000, 0x00000000, 0x00000000, }, 363 { 0xc8040000, 0x00000000, 0x00000000, }, 364 { 0xc7040000, 0x00000000, 0x00000000, }, 365 { 0xc6040000, 0x00000000, 0x00000000, }, 366 { 0xc5040000, 0x00000000, 0x00000000, }, 367 { 0xc4040000, 0x00000000, 0x00000000, }, 368 { 0xc3040000, 0x00000000, 0x00000000, }, 369 { 0xc2040000, 0x00000000, 0x00000000, }, 370 { 0xc1040000, 0x00000000, 0x00000000, }, 371}; 372 373static const struct sfpn atanh_table[] = { 374 { 0xff0464fa, 0x9eab40c2, 0xa5dc43f6, }, 375 { 0xfe04162b, 0xbea04514, 0x69ca8e4a, }, 376 { 0xfd040562, 0x4727abbd, 0xda654b67, }, 377 { 0xfc040156, 0x22b4dd6b, 0x372a679c, }, 378 { 0xfb040055, 0x62246bb8, 0x92d60b35, }, 379 { 0xfa040015, 0x56222b47, 0x2637d656, }, 380 { 0xf9040005, 0x55622246, 0xb4dcf86e, }, 381 { 0xf8040001, 0x55562222, 0xb46bb307, }, 382 { 0xf7040000, 0x55556222, 0x246b45cd, }, 383 { 0xf6040000, 0x15555622, 0x222b465b, }, 384 { 0xf5040000, 0x05555562, 0x2222467f, }, 385 { 0xf4040000, 0x01555556, 0x22221eaf, }, 386 { 0xf3040000, 0x00555555, 0x62222213, }, 387 { 0xf2040000, 0x00155555, 0x56221221, }, 388 { 0xf1040000, 0x00055555, 0x556221a2, }, 389 { 0xf0040000, 0x00015555, 0x5556221e, }, 390 { 0xef040000, 0x00005555, 0x55552222, }, 391 { 0xee040000, 0x00001555, 0x55555222, }, 392 { 0xed040000, 0x00000555, 0x55555522, }, 393 { 0xec040000, 0x00000155, 0x55555552, }, 394 { 0xeb040000, 0x00000055, 0x554d5555, }, 395 { 0xea040000, 0x00000015, 0x55545555, }, 396 { 0xe9040000, 0x00000005, 0x55553555, }, 397 { 0xe8040000, 0x00000001, 0x55555155, }, 398 { 0xe7040000, 0x00000000, 0x555554d5, }, 399 { 0xe6040000, 0x00000000, 0x15555545, }, 400 { 0xe5040000, 0x00000000, 0x05555553, }, 401 { 0xe307ffff, 0xffffffff, 0xfaaaaaaa, }, 402 { 0xe207ffff, 0xffffffff, 0xfeaaaaaa, }, 403 { 0xe107ffff, 0xffffffff, 0xffaaaaaa, }, 404 { 0xe007ffff, 0xffffffff, 0xffeaaaaa, }, 405 { 0xdf07ffff, 0xffffffff, 0xfffaaaaa, }, 406 { 0xde07ffff, 0xffffffff, 0xfffeaaaa, }, 407 { 0xdd07ffff, 0xffffffff, 0xffffaaaa, }, 408 { 0xdc07ffff, 0xffffffff, 0xffffeaaa, }, 409 { 0xdb07ffff, 0xffffffff, 0xfffffaaa, }, 410 { 0xda07ffff, 0xffffffff, 0xfffffeaa, }, 411 { 0xd907ffff, 0xffffffff, 0xffffffaa, }, 412 { 0xd807ffff, 0xffffffff, 0xffffffea, }, 413 { 0xd707ffff, 0xffffffff, 0xfffffffa, }, 414 { 0xd607ffff, 0xffffffff, 0xfffffffe, }, 415 { 0xd507ffff, 0xfffffe00, 0x00000000, }, 416 { 0xd407ffff, 0xffffff00, 0x00000000, }, 417 { 0xd307ffff, 0xffffff80, 0x00000000, }, 418 { 0xd207ffff, 0xffffffc0, 0x00000000, }, 419 { 0xd107ffff, 0xffffffe0, 0x00000000, }, 420 { 0xd007ffff, 0xfffffff0, 0x00000000, }, 421 { 0xcf07ffff, 0xfffffff8, 0x00000000, }, 422 { 0xce07ffff, 0xfffffffc, 0x00000000, }, 423 { 0xcd07ffff, 0xfffffffe, 0x00000000, }, 424 { 0xcc07ffff, 0xffffffff, 0x00000000, }, 425 { 0xcb07ffff, 0xffffffff, 0x80000000, }, 426 { 0xca07ffff, 0xffffffff, 0xc0000000, }, 427 { 0xc907ffff, 0xffffffff, 0xe0000000, }, 428 { 0xc807ffff, 0xffffffff, 0xf0000000, }, 429 { 0xc707ffff, 0xffffffff, 0xf8000000, }, 430 { 0xc607ffff, 0xffffffff, 0xfc000000, }, 431 { 0xc507ffff, 0xffffffff, 0xfe000000, }, 432 { 0xc407ffff, 0xffffffff, 0xff000000, }, 433 { 0xc307ffff, 0xffffffff, 0xff800000, }, 434 { 0xc207ffff, 0xffffffff, 0xffc00000, }, 435 { 0xc107ffff, 0xffffffff, 0xffe00000, }, 436 { 0xc007ffff, 0xffffffff, 0xfff00000, }, 437 { 0xbf07ffff, 0xffffffff, 0xfff80000, }, 438}; 439 440const struct fpn fpu_cordic_inv_gain1 = 441 { 1, 0, -1, 1, { 0x0004dba7, 0x6d421af2, 0xd33fafd1, }, }; 442 443const struct fpn fpu_cordic_inv_gain2 = 444 { 1, 0, 0, 1, { 0x0004d483, 0xec3803fc, 0xc5ff12f8, }, }; 445 446#endif /* CORDIC_BOOTSTRAP */ 447 448static inline void 449sfpn_to_fpn(struct fpn *fp, const struct sfpn *s) 450{ 451 fp->fp_class = FPC_NUM; 452 fp->fp_sign = 0; 453 fp->fp_sticky = 0; 454 fp->fp_exp = s->sp_m0 >> 24; 455 if (fp->fp_exp & 0x80) { 456 fp->fp_exp |= 0xffffff00; 457 } 458 fp->fp_mant[0] = s->sp_m0 & 0x000fffff; 459 fp->fp_mant[1] = s->sp_m1; 460 fp->fp_mant[2] = s->sp_m2; 461} 462 463void 464fpu_cordit1(struct fpemu *fe, struct fpn *x0, struct fpn *y0, struct fpn *z0, 465 const struct fpn *vecmode) 466{ 467 struct fpn t; 468 struct fpn x; 469 struct fpn y; 470 struct fpn z; 471 struct fpn *r; 472 int i; 473 int sign; 474 475 fpu_const(&t, FPU_CONST_1); 476 CPYFPN(&x, x0); 477 CPYFPN(&y, y0); 478 CPYFPN(&z, z0); 479 480 for (i = 0; i < EXT_FRACBITS; i++) { 481 struct fpn x1; 482 483 /* y < vecmode */ 484 CPYFPN(&fe->fe_f1, &y); 485 CPYFPN(&fe->fe_f2, vecmode); 486 fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign; 487 r = fpu_add(fe); 488 489 if ((vecmode->fp_sign == 0 && r->fp_sign) || 490 (vecmode->fp_sign && z.fp_sign == 0)) { 491 sign = 1; 492 } else { 493 sign = 0; 494 } 495 496 /* y * t */ 497 CPYFPN(&fe->fe_f1, &y); 498 CPYFPN(&fe->fe_f2, &t); 499 r = fpu_mul(fe); 500 501 /* 502 * x1 = x - y*t (if sign) 503 * x1 = x + y*t 504 */ 505 CPYFPN(&fe->fe_f2, r); 506 if (sign) 507 fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign; 508 CPYFPN(&fe->fe_f1, &x); 509 r = fpu_add(fe); 510 CPYFPN(&x1, r); 511 512 /* x * t */ 513 CPYFPN(&fe->fe_f1, &x); 514 CPYFPN(&fe->fe_f2, &t); 515 r = fpu_mul(fe); 516 517 /* 518 * y = y + x*t (if sign) 519 * y = y - x*t 520 */ 521 CPYFPN(&fe->fe_f2, r); 522 if (!sign) 523 fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign; 524 CPYFPN(&fe->fe_f1, &y); 525 r = fpu_add(fe); 526 CPYFPN(&y, r); 527 528 /* 529 * z = z - atan_table[i] (if sign) 530 * z = z + atan_table[i] 531 */ 532 CPYFPN(&fe->fe_f1, &z); 533 sfpn_to_fpn(&fe->fe_f2, &atan_table[i]); 534 if (sign) 535 fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign; 536 r = fpu_add(fe); 537 CPYFPN(&z, r); 538 539 /* x = x1 */ 540 CPYFPN(&x, &x1); 541 542 /* t /= 2 */ 543 t.fp_exp--; 544 } 545 546 CPYFPN(x0, &x); 547 CPYFPN(y0, &y); 548 CPYFPN(z0, &z); 549} 550 551void 552fpu_cordit2(struct fpemu *fe, struct fpn *x0, struct fpn *y0, struct fpn *z0, 553 const struct fpn *vecmode) 554{ 555 struct fpn t; 556 struct fpn x; 557 struct fpn y; 558 struct fpn z; 559 struct fpn *r; 560 int i; 561 int k; 562 int sign; 563 564 /* t = 0.5 */ 565 fpu_const(&t, FPU_CONST_1); 566 t.fp_exp--; 567 568 CPYFPN(&x, x0); 569 CPYFPN(&y, y0); 570 CPYFPN(&z, z0); 571 572 k = 3; 573 for (i = 0; i < EXT_FRACBITS; i++) { 574 struct fpn x1; 575 int j; 576 577 for (j = 0; j < 2; j++) { 578 if ((vecmode->fp_sign == 0 && y.fp_sign) || 579 (vecmode->fp_sign && z.fp_sign == 0)) { 580 sign = 0; 581 } else { 582 sign = 1; 583 } 584 585 /* y * t */ 586 CPYFPN(&fe->fe_f1, &y); 587 CPYFPN(&fe->fe_f2, &t); 588 r = fpu_mul(fe); 589 590 /* 591 * x1 = x + y*t 592 * x1 = x - y*t (if sign) 593 */ 594 CPYFPN(&fe->fe_f2, r); 595 if (sign) 596 fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign; 597 CPYFPN(&fe->fe_f1, &x); 598 r = fpu_add(fe); 599 CPYFPN(&x1, r); 600 601 /* x * t */ 602 CPYFPN(&fe->fe_f1, &x); 603 CPYFPN(&fe->fe_f2, &t); 604 r = fpu_mul(fe); 605 606 /* 607 * y = y + x*t 608 * y = y - x*t (if sign) 609 */ 610 CPYFPN(&fe->fe_f2, r); 611 if (sign) 612 fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign; 613 CPYFPN(&fe->fe_f1, &y); 614 r = fpu_add(fe); 615 CPYFPN(&y, r); 616 617 /* 618 * z = z + atanh_table[i] (if sign) 619 * z = z - atanh_table[i] 620 */ 621 CPYFPN(&fe->fe_f1, &z); 622 sfpn_to_fpn(&fe->fe_f2, &atanh_table[i]); 623 if (!sign) 624 fe->fe_f2.fp_sign = !fe->fe_f2.fp_sign; 625 r = fpu_add(fe); 626 CPYFPN(&z, r); 627 628 /* x = x1 */ 629 CPYFPN(&x, &x1); 630 631 if (k > 0) { 632 k--; 633 break; 634 } else { 635 k = 3; 636 } 637 } 638 639 /* t /= 2 */ 640 t.fp_exp--; 641 } 642 643 CPYFPN(x0, &x); 644 CPYFPN(y0, &y); 645 CPYFPN(z0, &z); 646} 647