1/* Copyright (C) 2019-2020 Free Software Foundation, Inc. 2 3 This file is part of LIBF7, which is part of GCC. 4 5 GCC is free software; you can redistribute it and/or modify it under 6 the terms of the GNU General Public License as published by the Free 7 Software Foundation; either version 3, or (at your option) any later 8 version. 9 10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 11 WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 13 for more details. 14 15 Under Section 7 of GPL version 3, you are granted additional 16 permissions described in the GCC Runtime Library Exception, version 17 3.1, as published by the Free Software Foundation. 18 19 You should have received a copy of the GNU General Public License and 20 a copy of the GCC Runtime Library Exception along with this program; 21 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 22 <http://www.gnu.org/licenses/>. */ 23 24#include "libf7.h" 25 26#ifndef __AVR_TINY__ 27 28#define ALIAS(X, Y) \ 29 F7_WEAK __attribute__((__alias__(F7_STRINGY(X)))) __typeof__(X) Y; 30 31#define DALIAS(...) // empty 32#define LALIAS(...) // empty 33 34#ifndef IN_LIBGCC2 35 36#include <stdio.h> 37#include <assert.h> 38 39#define in_libgcc false 40 41_Static_assert (sizeof (f7_t) == 10 && F7_MANT_BYTES == 7, 42 "libf7 will only work with 7-byte mantissa."); 43#else 44 45#define in_libgcc true 46 47#if __SIZEOF_DOUBLE__ == 8 48#undef DALIAS 49#define DALIAS(X,Y) \ 50 F7_WEAK __attribute__((__alias__(F7_STRINGY(X)))) __typeof__(X) Y; 51#endif 52 53#if __SIZEOF_LONG_DOUBLE__ == 8 54#undef LALIAS 55#define LALIAS(X,Y) \ 56 F7_WEAK __attribute__((__alias__(F7_STRINGY(X)))) __typeof__(X) Y; 57#endif 58 59#endif // in libgcc 60 61static F7_INLINE 62void f7_assert (bool x) 63{ 64 if (!in_libgcc && !x) 65 __builtin_abort(); 66} 67 68static F7_INLINE 69int16_t abs_ssat16 (int16_t a) 70{ 71 _Sat _Fract sa = __builtin_avr_rbits (a); 72 return __builtin_avr_bitsr (__builtin_avr_absr (sa)); 73} 74 75static F7_INLINE 76int16_t add_ssat16 (int16_t a, int16_t b) 77{ 78 _Sat _Fract sa = __builtin_avr_rbits (a); 79 _Sat _Fract sb = __builtin_avr_rbits (b); 80 return __builtin_avr_bitsr (sa + sb); 81} 82 83static F7_INLINE 84int16_t sub_ssat16 (int16_t a, int16_t b) 85{ 86 _Sat _Fract sa = __builtin_avr_rbits (a); 87 _Sat _Fract sb = __builtin_avr_rbits (b); 88 return __builtin_avr_bitsr (sa - sb); 89} 90 91static F7_INLINE 92int8_t ssat8_range (int16_t a, int8_t range) 93{ 94 if (a >= range) 95 return range; 96 if (a <= -range) 97 return -range; 98 return a; 99} 100 101 102#define IN_LIBF7_H 103 #define F7_CONST_DEF(NAME, FLAGS, M6, M5, M4, M3, M2, M1, M0, EXPO) \ 104 F7_UNUSED static const uint8_t F7_(const_##NAME##_msb) = M6; \ 105 F7_UNUSED static const int16_t F7_(const_##NAME##_expo) = EXPO; 106 #include "libf7-const.def" 107 #undef F7_CONST_DEF 108#undef IN_LIBF7_H 109 110 111/* 112 libgcc naming converntions for conversions: 113 114 __float<fmode><fmode> : Convert float modes. 115 __floatun<imode><fmode>: Convert unsigned integral to float. 116 __fix<fmode><imode> : Convert float to signed integral. 117 __fixuns<fmode><imode> : Convert float to unsigned integral. 118*/ 119 120 121#ifdef F7MOD_floatundidf_ 122F7_WEAK 123f7_double_t __floatundidf (uint64_t x) 124{ 125 f7_t xx; 126 f7_set_u64 (&xx, x); 127 return f7_get_double (&xx); 128} 129#endif // F7MOD_floatundidf_ 130 131 132#ifdef F7MOD_floatdidf_ 133F7_WEAK 134f7_double_t __floatdidf (int64_t x) 135{ 136 f7_t xx; 137 f7_set_s64 (&xx, x); 138 return f7_get_double (&xx); 139} 140#endif // F7MOD_floatdidf_ 141 142 143#ifdef F7MOD_init_ 144f7_t* f7_init_impl (uint64_t mant, uint8_t flags, f7_t *cc, int16_t expo) 145{ 146 flags &= F7_FLAGS; 147 if (f7_class_number (flags)) 148 { 149 uint8_t msb; 150 while ((__builtin_memcpy (&msb, (uint8_t*) &mant + 7, 1), msb)) 151 { 152 mant >>= 1; 153 expo = add_ssat16 (expo, 1); 154 } 155 *(uint64_t*) cc->mant = mant; 156 cc->expo = add_ssat16 (expo, F7_MANT_BITS-1); 157 158 cc = f7_normalize_asm (cc); 159 } 160 161 cc->flags = flags; 162 163 return cc; 164} 165#endif // F7MOD_init_ 166 167 168#ifdef F7MOD_set_s16_ 169f7_t* f7_set_s16_impl (f7_t *cc, int16_t i16) 170{ 171 uint16_t u16 = (uint16_t) i16; 172 uint8_t flags = 0; 173 if (i16 < 0) 174 { 175 u16 = -u16; 176 flags = F7_FLAG_sign; 177 } 178 f7_set_u16_impl (cc, u16); 179 cc->flags = flags; 180 return cc; 181} 182#endif // F7MOD_set_s16_ 183 184 185#ifdef F7MOD_set_u16_ 186f7_t* f7_set_u16_impl (f7_t *cc, uint16_t u16) 187{ 188 f7_clr (cc); 189 F7_MANT_HI2 (cc) = u16; 190 cc->expo = 15; 191 return f7_normalize_asm (cc); 192} 193#endif // F7MOD_set_u16_ 194 195 196#ifdef F7MOD_set_s32_ 197f7_t* f7_set_s32 (f7_t *cc, int32_t i32) 198{ 199 uint32_t u32 = (uint32_t) i32; 200 uint8_t flags = 0; 201 if (i32 < 0) 202 { 203 u32 = -u32; 204 flags = F7_FLAG_sign; 205 } 206 cc = f7_set_u32 (cc, u32); 207 cc->flags = flags; 208 return cc; 209} 210ALIAS (f7_set_s32, f7_floatsidf) 211#endif // F7MOD_set_s32_ 212 213 214#ifdef F7MOD_set_u32_ 215f7_t* f7_set_u32 (f7_t *cc, uint32_t u32) 216{ 217 f7_clr (cc); 218 F7_MANT_HI4 (cc) = u32; 219 cc->expo = 31; 220 return f7_normalize_asm (cc); 221} 222ALIAS (f7_set_u32, f7_floatunsidf) 223#endif // F7MOD_set_u32_ 224 225 226// IEEE 754 single 227// float = s bbbbbbbb mmmmmmmmmmmmmmmmmmmmmmm 228// 31 229// s = sign 230// b = biased exponent, bias = 127 231// m = mantissa 232 233// +0 = 0 0 0 234// -0 = 1 0 0 235// Inf = S B 0 = S * Inf, B = 0xff 236// NaN = S B M, B = 0xff, M != 0 237// Sub-normal = S 0 M = S * 0.M * 2^{1 - bias}, M != 0 238// Normal = S B M = S * 1.M * 2^{B - bias}, B = 1 ... 0xfe 239 240#define FLT_DIG_EXP 8 241#define FLT_DIG_MANT (31 - FLT_DIG_EXP) 242#define FLT_MAX_EXP ((1 << FLT_DIG_EXP) - 1) 243#define FLT_EXP_BIAS (FLT_MAX_EXP >> 1) 244 245#ifdef F7MOD_set_float_ 246F7_WEAK 247void f7_set_float (f7_t *cc, float f) 248{ 249 uint32_t val32; 250 251 _Static_assert (__SIZEOF_FLOAT__ == 4, ""); 252 _Static_assert (__FLT_MANT_DIG__ == 24, ""); 253 __builtin_memcpy (&val32, &f, __SIZEOF_FLOAT__); 254 255 uint16_t val16 = val32 >> 16; 256 val16 >>= FLT_DIG_MANT - 16; 257 258 uint8_t expo_biased = val16 & FLT_MAX_EXP; 259 bool sign = val16 & (1u << FLT_DIG_EXP); 260 261 f7_clr (cc); 262 263 uint32_t mant = val32 & ((1ul << FLT_DIG_MANT) -1); 264 265 if (mant == 0) 266 { 267 if (expo_biased == 0) 268 return; 269 if (expo_biased >= FLT_MAX_EXP) 270 return f7_set_inf (cc, sign); 271 } 272 273 if (expo_biased == 0) 274 expo_biased = 1; // Sub-normal: biased expo of 1 was encoded as 0. 275 else if (expo_biased < FLT_MAX_EXP) 276 mant |= (1ul << FLT_DIG_MANT); 277 else 278 return f7_set_nan (cc); 279 280 __builtin_memcpy (& F7_MANT_HI4 (cc), &mant, 4); 281 282 cc->expo = expo_biased - FLT_EXP_BIAS + 31 - FLT_DIG_MANT; 283 f7_normalize_asm (cc); 284 f7_set_sign (cc, sign); 285} 286ALIAS (f7_set_float, f7_extendsfdf2) 287#endif // F7MOD_set_float_ 288 289 290#ifdef F7MOD_get_float_ 291static F7_INLINE 292float make_float (uint32_t x) 293{ 294 float ff; 295 __builtin_memcpy (&ff, &x, 4); 296 return ff; 297 298} 299 300F7_WEAK 301float f7_get_float (const f7_t *aa) 302{ 303 uint8_t a_class = f7_classify (aa); 304 305 if (f7_class_nan (a_class)) 306 return make_float (0xffc00000 /* NaN: Biased expo = 0xff, mant != 0 */); 307 308 uint32_t mant; 309 __builtin_memcpy (&mant, &F7_MANT_CONST_HI4 (aa), 4); 310 311 uint8_t expo8 = 0; 312 uint8_t mant_offset = FLT_DIG_EXP; 313 int16_t c_expo = add_ssat16 (aa->expo, FLT_EXP_BIAS); 314 315 if (f7_class_zero (a_class) || c_expo <= -FLT_DIG_MANT) 316 { 317 // Zero or tiny. 318 return 0.0f; 319 } 320 else if (c_expo >= FLT_MAX_EXP || f7_class_inf (a_class)) 321 { 322 // Inf or overflow. 323 expo8 = FLT_MAX_EXP; 324 mant = 0; 325 } 326 else if (c_expo > 0) 327 { 328 // Normal. 329 expo8 = c_expo; 330 } 331 else 332 { 333 // Sub-normal: -DIG_MANT < c_expo <= 0. 334 // Encoding of 0 represents a biased exponent of 1. 335 // mant_offset in 9...31. 336 expo8 = 0; 337 mant_offset += 1 - c_expo; 338 } 339 340 uint16_t expo16 = expo8 << (FLT_DIG_MANT - 16); 341 342 if (f7_class_sign (a_class)) 343 expo16 |= 1u << (FLT_DIG_EXP + FLT_DIG_MANT - 16); 344 345 mant >>= mant_offset; 346 347 __asm ("cbr %T0%t2, 1 << (7 & %2)" "\n\t" 348 "or %C0, %A1" "\n\t" 349 "or %D0, %B1" 350 : "+d" (mant) 351 : "r" (expo16), "n" (FLT_DIG_MANT)); 352 353 return make_float (mant); 354} 355F7_PURE ALIAS (f7_get_float, f7_truncdfsf2) 356#endif // F7MOD_get_float_ 357 358#define DBL_DIG_EXP 11 359#define DBL_DIG_MANT (63 - DBL_DIG_EXP) 360#define DBL_MAX_EXP ((1 << DBL_DIG_EXP) - 1) 361#define DBL_EXP_BIAS (DBL_MAX_EXP >> 1) 362 363 364#ifdef F7MOD_set_double_ 365void f7_set_double_impl (f7_double_t val64, f7_t *cc) 366{ 367 f7_clr (cc); 368 register uint64_t mant __asm ("r18") = val64 & ((1ull << DBL_DIG_MANT) -1); 369 370 uint16_t val16 = 3[(uint16_t*) & val64]; 371 val16 >>= DBL_DIG_MANT - 48; 372 373 uint16_t expo_biased = val16 & DBL_MAX_EXP; 374 bool sign = val16 & (1u << DBL_DIG_EXP); 375 376 if (mant == 0) 377 { 378 if (expo_biased == 0) 379 return; 380 if (expo_biased >= DBL_MAX_EXP) 381 return f7_set_inf (cc, sign); 382 } 383 __asm ("" : "+r" (mant)); 384 385 if (expo_biased == 0) 386 expo_biased = 1; // Sub-normal: biased expo of 1 was encoded as 0. 387 else if (expo_biased < DBL_MAX_EXP) 388 mant |= (1ull << DBL_DIG_MANT); 389 else 390 return f7_set_nan (cc); 391 392 *(uint64_t*) & cc->mant = mant; 393 394 cc->expo = expo_biased - DBL_EXP_BIAS + 63 - DBL_DIG_MANT - 8; 395 f7_normalize_asm (cc); 396 f7_set_sign (cc, sign); 397} 398#endif // F7MOD_set_double_ 399 400 401#ifdef F7MOD_set_pdouble_ 402void f7_set_pdouble (f7_t *cc, const f7_double_t *val64) 403{ 404 f7_set_double (cc, *val64); 405} 406#endif // F7MOD_set_pdouble_ 407 408 409#ifdef F7MOD_get_double_ 410static F7_INLINE 411uint64_t clr_r18 (void) 412{ 413 extern void __clr_8 (void); 414 register uint64_t r18 __asm ("r18"); 415 __asm ("%~call %x[f]" : "=r" (r18) : [f] "i" (__clr_8)); 416 return r18; 417} 418 419static F7_INLINE 420f7_double_t make_double (uint64_t x) 421{ 422 register f7_double_t r18 __asm ("r18") = x; 423 __asm ("" : "+r" (r18)); 424 return r18; 425} 426 427F7_WEAK 428f7_double_t f7_get_double (const f7_t *aa) 429{ 430 uint8_t a_class = f7_classify (aa); 431 432 if (f7_class_nan (a_class)) 433 { 434 uint64_t nan = clr_r18() | (0x7fffull << 48); 435 return make_double (nan); 436 } 437 438 uint64_t mant; 439 __builtin_memcpy (&mant, & aa->mant, 8); 440 441 mant &= 0x00ffffffffffffff; 442 443 // FIXME: For subnormals, rounding is premature and should be 444 // done *after* the mantissa has been shifted into place 445 // (or the round value be shifted left accordingly). 446 // Round. 447 mant += 1u << (F7_MANT_BITS - (1 + DBL_DIG_MANT) - 1); 448 449 uint8_t dex; 450 register uint64_t r18 __asm ("r18") = mant; 451 // dex = Overflow ? 1 : 0. 452 __asm ("bst %T[mant]%T[bitno]" "\n\t" 453 "clr %0" "\n\t" 454 "bld %0,0" 455 : "=r" (dex), [mant] "+r" (r18) 456 : [bitno] "n" (64 - 8)); 457 458 mant = r18 >> dex; 459 460 uint16_t expo16 = 0; 461 uint16_t mant_offset = DBL_DIG_EXP - 8; 462 int16_t c_expo = add_ssat16 (aa->expo, DBL_EXP_BIAS + dex); 463 464 if (f7_class_zero (a_class) || c_expo <= -DBL_DIG_MANT) 465 { 466 // Zero or tiny. 467 return make_double (clr_r18()); 468 } 469 else if (c_expo >= DBL_MAX_EXP || f7_class_inf (a_class)) 470 { 471 // Inf or overflow. 472 expo16 = DBL_MAX_EXP; 473 mant = clr_r18(); 474 } 475 else if (c_expo > 0) 476 { 477 // Normal. 478 expo16 = c_expo; 479 } 480 else 481 { 482 // Sub-normal: -DIG_MANT < c_expo <= 0. 483 // Encoding expo of 0 represents a biased exponent of 1. 484 // mant_offset in 5...55 = 63-8. 485 mant_offset += 1 - c_expo; 486 } 487 488 expo16 <<= (DBL_DIG_MANT - 48); 489 490 if (f7_class_sign (a_class)) 491 expo16 |= 1u << (DBL_DIG_EXP + DBL_DIG_MANT - 48); 492 493 // mant >>= mant_offset; 494 mant = f7_lshrdi3 (mant, mant_offset); 495 496 r18 = mant; 497 __asm ("cbr %T0%t2, 1 << (7 & %2)" "\n\t" 498 "or %r0+6, %A1" "\n\t" 499 "or %r0+7, %B1" 500 : "+r" (r18) 501 : "r" (expo16), "n" (DBL_DIG_MANT)); 502 503 return make_double (r18); 504} 505#endif // F7MOD_get_double_ 506 507 508#ifdef F7MOD_fabs_ 509F7_WEAK 510void f7_fabs (f7_t *cc, const f7_t *aa) 511{ 512 f7_abs (cc, aa); 513} 514#endif // F7MOD_fabs_ 515 516 517#ifdef F7MOD_neg_ 518F7_WEAK 519f7_t* f7_neg (f7_t *cc, const f7_t *aa) 520{ 521 f7_copy (cc, aa); 522 523 uint8_t c_class = f7_classify (cc); 524 525 if (! f7_class_zero (c_class)) 526 cc->sign = ! f7_class_sign (c_class); 527 528 return cc; 529} 530#endif // F7MOD_neg_ 531 532 533#ifdef F7MOD_frexp_ 534F7_WEAK 535void f7_frexp (f7_t *cc, const f7_t *aa, int *expo) 536{ 537 uint8_t a_class = f7_classify (aa); 538 539 if (f7_class_nan (a_class)) 540 return f7_set_nan (cc); 541 542 if (f7_class_inf (a_class) || aa->expo == INT16_MAX) 543 return f7_set_inf (cc, f7_class_sign (a_class)); 544 545 if (! f7_msbit (aa)) 546 { 547 *expo = 0; 548 return f7_clr (cc); 549 } 550 551 *expo = 1 + aa->expo; 552 cc->flags = a_class & F7_FLAG_sign; 553 cc->expo = -1; 554 f7_copy_mant (cc, aa); 555} 556#endif // F7MOD_frexp_ 557 558#ifdef F7MOD_get_s16_ 559F7_WEAK 560int16_t f7_get_s16 (const f7_t *aa) 561{ 562 extern int16_t to_s16 (const f7_t*, uint8_t) F7ASM(f7_to_integer_asm); 563 return to_s16 (aa, 0xf); 564} 565#endif // F7MOD_get_s16_ 566 567 568#ifdef F7MOD_get_s32_ 569F7_WEAK 570int32_t f7_get_s32 (const f7_t *aa) 571{ 572 extern int32_t to_s32 (const f7_t*, uint8_t) F7ASM(f7_to_integer_asm); 573 return to_s32 (aa, 0x1f); 574} 575F7_PURE ALIAS (f7_get_s32, f7_fixdfsi) 576#endif // F7MOD_get_s32_ 577 578 579#ifdef F7MOD_get_s64_ 580 F7_WEAK 581 int64_t f7_get_s64 (const f7_t *aa) 582{ 583 extern int64_t to_s64 (const f7_t*, uint8_t) F7ASM(f7_to_integer_asm); 584 return to_s64 (aa, 0x3f); 585} 586F7_PURE ALIAS (f7_get_s64, f7_fixdfdi) 587#endif // F7MOD_get_s64_ 588 589#ifdef F7MOD_get_u16_ 590 F7_WEAK 591 uint16_t f7_get_u16 (const f7_t *aa) 592{ 593 extern uint16_t to_u16 (const f7_t*, uint8_t) F7ASM(f7_to_unsigned_asm); 594 return to_u16 (aa, 0xf); 595} 596#endif // F7MOD_get_u16_ 597 598 599#ifdef F7MOD_get_u32_ 600F7_WEAK 601uint32_t f7_get_u32 (const f7_t *aa) 602{ 603 extern uint32_t to_u32 (const f7_t*, uint8_t) F7ASM(f7_to_unsigned_asm); 604 return to_u32 (aa, 0x1f); 605} 606F7_PURE ALIAS (f7_get_u32, f7_fixunsdfsi) 607#endif // F7MOD_get_u32_ 608 609 610#ifdef F7MOD_get_u64_ 611F7_WEAK 612uint64_t f7_get_u64 (const f7_t *aa) 613{ 614 extern int64_t to_u64 (const f7_t*, uint8_t) F7ASM(f7_to_unsigned_asm); 615 return to_u64 (aa, 0x3f); 616} 617F7_PURE ALIAS (f7_get_u64, f7_fixunsdfdi) 618#endif // F7MOD_get_u64_ 619 620 621#ifdef F7MOD_cmp_unordered_ 622F7_NOINLINE 623static int8_t cmp_u8 (uint8_t a_class, uint8_t b_class, bool sign_a); 624 625F7_WEAK 626int8_t f7_cmp_unordered (const f7_t *aa, const f7_t *bb, bool with_sign) 627{ 628 uint8_t a_class = f7_classify (aa); 629 uint8_t b_class = f7_classify (bb); 630 631 uint8_t a_sign = f7_class_sign (a_class) & with_sign; 632 uint8_t b_sign = f7_class_sign (b_class) & with_sign; 633 uint8_t ab_class = a_class | b_class; 634 ab_class &= with_sign - 2; 635 636 if (f7_class_nan (ab_class)) 637 return INT8_MIN; 638 639 if (a_sign != b_sign) 640 return b_sign - a_sign; 641 642 if (f7_class_inf (ab_class)) 643 return cmp_u8 (a_class, b_class, a_sign); 644 645 if (f7_class_zero (ab_class)) 646 return cmp_u8 (b_class, a_class, a_sign); 647 648 if (aa->expo < bb->expo) 649 return a_sign ? 1 : -1; 650 651 if (aa->expo > bb->expo) 652 return a_sign ? -1 : 1; 653 654 return cmp_u8 (1 + f7_cmp_mant (aa, bb), 1, a_sign); 655} 656 657 658int8_t cmp_u8 (uint8_t a_class, uint8_t b_class, bool sign_a) 659{ 660 int8_t c; 661 __asm ("sub %[a], %[b]" "\n\t" 662 "breq 1f" "\n\t" 663 "sbc %[c], %[c]" "\n\t" 664 "sbci %[c], -1" "\n\t" 665 "sbrc %[s], 0" "\n\t" 666 "neg %[c]" "\n\t" 667 "1:" 668 : [c] "=d" (c) 669 : [a] "0" (a_class), [b] "r" (b_class), [s] "r" (sign_a)); 670 return c; 671} 672#endif // F7MOD_cmp_unordered_ 673 674 675#ifdef F7MOD_cmp_abs_ 676F7_WEAK 677int8_t f7_cmp_abs (const f7_t *aa, const f7_t *bb) 678{ 679 return f7_cmp_unordered (aa, bb, false /* no signs */); 680} 681#endif // F7MOD_cmp_abs_ 682 683 684#ifdef F7MOD_cmp_ 685F7_WEAK 686int8_t f7_cmp (const f7_t *aa, const f7_t *bb) 687{ 688 return f7_cmp_unordered (aa, bb, true /* with signs */); 689} 690#endif // F7MOD_cmp_ 691 692 693#ifdef F7MOD_abscmp_msb_ge_ 694// Compare absolute value of Number aa against a f7_t represented 695// by msb and expo. 696F7_WEAK 697bool f7_abscmp_msb_ge (const f7_t *aa, uint8_t msb, int16_t expo) 698{ 699 uint8_t a_msb = aa->mant[F7_MANT_BYTES - 1]; 700 701 if (0 == (0x80 & a_msb)) 702 // 0 or subnormal. 703 return false; 704 705 return aa->expo == expo 706 ? a_msb >= msb 707 : aa->expo > expo; 708} 709#endif // F7MOD_abscmp_msb_ge_ 710 711#ifdef F7MOD_lt_ 712F7_WEAK 713bool f7_lt_impl (const f7_t *aa, const f7_t *bb) 714{ 715 return f7_lt (aa, bb); 716} 717#endif // F7MOD_lt_ 718 719#ifdef F7MOD_le_ 720F7_WEAK 721bool f7_le_impl (const f7_t *aa, const f7_t *bb) 722{ 723 return f7_le (aa, bb); 724} 725#endif // F7MOD_le_ 726 727#ifdef F7MOD_gt_ 728F7_WEAK 729bool f7_gt_impl (const f7_t *aa, const f7_t *bb) 730{ 731 return f7_gt (aa, bb); 732} 733#endif // F7MOD_gt_ 734 735#ifdef F7MOD_ge_ 736F7_WEAK 737bool f7_ge_impl (const f7_t *aa, const f7_t *bb) 738{ 739 return f7_ge (aa, bb); 740} 741#endif // F7MOD_ge_ 742 743#ifdef F7MOD_ne_ 744F7_WEAK 745bool f7_ne_impl (const f7_t *aa, const f7_t *bb) 746{ 747 return f7_ne (aa, bb); 748} 749#endif // F7MOD_ne_ 750 751#ifdef F7MOD_eq_ 752F7_WEAK 753bool f7_eq_impl (const f7_t *aa, const f7_t *bb) 754{ 755 return f7_eq (aa, bb); 756} 757#endif // F7MOD_eq_ 758 759 760#ifdef F7MOD_unord_ 761F7_WEAK 762bool f7_unord_impl (const f7_t *aa, const f7_t *bb) 763{ 764 return f7_unordered (aa, bb); 765} 766#endif // F7MOD_unord_ 767 768 769#ifdef F7MOD_minmax_ 770F7_WEAK 771f7_t* f7_minmax (f7_t *cc, const f7_t *aa, const f7_t *bb, bool do_min) 772{ 773 int8_t cmp = f7_cmp_unordered (aa, bb, true /* with signs */); 774 775 if (cmp == INT8_MIN) 776 return (f7_set_nan (cc), cc); 777 778 if (do_min) 779 cmp = -cmp; 780 781 return f7_copy (cc, cmp >= 0 ? aa : bb); 782} 783#endif // F7MOD_minmax_ 784 785 786#ifdef F7MOD_fmax_ 787F7_WEAK 788f7_t* f7_fmax (f7_t *cc, const f7_t *aa, const f7_t *bb) 789{ 790 return f7_minmax (cc, aa, bb, false); 791} 792ALIAS (f7_fmax, f7_max) 793#endif // F7MOD_fmax_ 794 795 796#ifdef F7MOD_fmin_ 797F7_WEAK 798f7_t* f7_fmin (f7_t *cc, const f7_t *aa, const f7_t *bb) 799{ 800 return f7_minmax (cc, aa, bb, true); 801} 802ALIAS (f7_fmin, f7_min) 803#endif // F7MOD_fmin_ 804 805 806#ifdef F7MOD_mulx_ 807F7_WEAK 808uint8_t f7_mulx (f7_t *cc, const f7_t *aa, const f7_t *bb, bool no_rounding) 809{ 810 uint8_t a_class = f7_classify (aa); 811 uint8_t b_class = f7_classify (bb); 812 // From this point on, no more access aa->flags or bb->flags 813 // to avoid early-clobber when writing cc->flags. 814 815 uint8_t ab_class = a_class | b_class; 816 // If either value is NaN, return NaN. 817 if (f7_class_nan (ab_class) 818 // Any combination of Inf and 0. 819 || (f7_class_zero (ab_class) && f7_class_inf (ab_class))) 820 { 821 cc->flags = F7_FLAG_nan; 822 return 0; 823 } 824 // If either value is 0.0, return 0.0. 825 if (f7_class_zero (ab_class)) 826 { 827 f7_clr (cc); 828 return 0; 829 } 830 // We have 2 non-zero numbers-or-INF. 831 832 uint8_t c_sign = (a_class ^ b_class) & F7_FLAG_sign; 833 uint8_t c_inf = ab_class & F7_FLAG_inf; 834 cc->flags = c_sign | c_inf; 835 if (c_inf) 836 return 0; 837 838 int16_t expo = add_ssat16 (aa->expo, bb->expo); 839 // Store expo and handle expo = INT16_MIN and INT16_MAX. 840 if (f7_store_expo (cc, expo)) 841 return 0; 842 843 return f7_mul_mant_asm (cc, aa, bb, no_rounding); 844} 845#endif // F7MOD_mulx_ 846 847 848#ifdef F7MOD_square_ 849F7_WEAK 850void f7_square (f7_t *cc, const f7_t *aa) 851{ 852 f7_mul (cc, aa, aa); 853} 854#endif // F7MOD_square_ 855 856 857#ifdef F7MOD_mul_ 858F7_WEAK 859void f7_mul (f7_t *cc, const f7_t *aa, const f7_t *bb) 860{ 861 f7_mulx (cc, aa, bb, false); 862} 863#endif // F7MOD_mul_ 864 865 866#ifdef F7MOD_Iadd_ 867F7_WEAK void f7_Iadd (f7_t *cc, const f7_t *aa) { f7_add (cc, cc, aa); } 868#endif 869 870#ifdef F7MOD_Isub_ 871F7_WEAK void f7_Isub (f7_t *cc, const f7_t *aa) { f7_sub (cc, cc, aa); } 872#endif 873 874#ifdef F7MOD_Imul_ 875F7_WEAK void f7_Imul (f7_t *cc, const f7_t *aa) { f7_mul (cc, cc, aa); } 876#endif 877 878#ifdef F7MOD_Idiv_ 879F7_WEAK void f7_Idiv (f7_t *cc, const f7_t *aa) { f7_div (cc, cc, aa); } 880#endif 881 882#ifdef F7MOD_IRsub_ 883F7_WEAK void f7_IRsub (f7_t *cc, const f7_t *aa) { f7_sub (cc, aa, cc); } 884#endif 885 886#ifdef F7MOD_Ineg_ 887F7_WEAK void f7_Ineg (f7_t *cc) { f7_neg (cc, cc); } 888#endif 889 890#ifdef F7MOD_Isqrt_ 891F7_WEAK void f7_Isqrt (f7_t *cc) { f7_sqrt (cc, cc); } 892#endif 893 894#ifdef F7MOD_Isquare_ 895F7_WEAK void f7_Isquare (f7_t *cc) { f7_square (cc, cc); } 896#endif 897 898#ifdef F7MOD_Ildexp_ 899F7_WEAK f7_t* f7_Ildexp (f7_t *cc, int ex) { return f7_ldexp (cc, cc, ex); } 900#endif 901 902 903#ifdef F7MOD_add_ 904F7_WEAK 905void f7_add (f7_t *cc, const f7_t *aa, const f7_t *bb) 906{ 907 f7_addsub (cc, aa, bb, false); 908} 909#endif // F7MOD_add_ 910 911 912#ifdef F7MOD_sub_ 913F7_WEAK 914void f7_sub (f7_t *cc, const f7_t *aa, const f7_t *bb) 915{ 916 f7_addsub (cc, aa, bb, true); 917} 918#endif // F7MOD_sub_ 919 920 921#ifdef F7MOD_addsub_ 922static void return_with_sign (f7_t *cc, const f7_t *aa, int8_t c_sign) 923{ 924 __asm (";;; return with sign"); 925 f7_copy (cc, aa); 926 if (c_sign != -1) 927 f7_set_sign (cc, c_sign); 928} 929 930F7_WEAK 931void f7_addsub (f7_t *cc, const f7_t *aa, const f7_t *bb, bool neg_b) 932{ 933 uint8_t a_class = f7_classify (aa); 934 uint8_t b_class = f7_classify (bb); 935 // From this point on, no more access aa->flags or bb->flags 936 // to avoid early-clobber when writing cc->flags. 937 938 // Hande NaNs. 939 if (f7_class_nan (a_class | b_class)) 940 return f7_set_nan (cc); 941 942 bool a_sign = f7_class_sign (a_class); 943 bool b_sign = f7_class_sign (b_class) ^ neg_b; 944 945 // Add the mantissae? 946 bool do_add = a_sign == b_sign; 947 948 // Handle +Infs and -Infs. 949 bool a_inf = f7_class_inf (a_class); 950 bool b_inf = f7_class_inf (b_class); 951 952 if (a_inf && b_inf) 953 { 954 if (do_add) 955 return f7_set_inf (cc, a_sign); 956 else 957 return f7_set_nan (cc); 958 } 959 else if (a_inf) 960 return f7_set_inf (cc, a_sign); 961 else if (b_inf) 962 return f7_set_inf (cc, b_sign); 963 964 int16_t shift16 = sub_ssat16 (aa->expo, bb->expo); 965 966 // aa + 0 = aa. 967 // Also check MSBit to get rid of Subnormals and 0. 968 if (shift16 > F7_MANT_BITS || f7_is0 (bb)) 969 return return_with_sign (cc, aa, -1); 970 971 // 0 + bb = bb. 972 // 0 - bb = -bb. 973 // Also check MSBit to get rid of Subnormals and 0. 974 if (shift16 < -F7_MANT_BITS || f7_is0 (aa)) 975 return return_with_sign (cc, bb, b_sign); 976 977 // Now aa and bb are non-zero, non-NaN, non-Inf. 978 // shift > 0 ==> |a| > |b| 979 // shift < 0 ==> |a| < |b| 980 int8_t shift = (int8_t) shift16; 981 bool c_sign = a_sign; 982 if (shift < 0 983 || (shift == 0 && f7_cmp_mant (aa, bb) < 0)) 984 { 985 const f7_t *p = aa; aa = bb; bb = p; 986 c_sign = b_sign; 987 shift = -shift; 988 } 989 uint8_t shift2 = (uint8_t) (shift << 1); 990 991 cc->expo = aa->expo; 992 // From this point on, no more access aa->expo or bb->expo 993 // to avoid early-clobber when writing cc->expo. 994 995 cc->flags = c_sign; _Static_assert (F7_FLAGNO_sign == 0, ""); 996 997 // This function uses neither .expo nor .flags from either aa or bb, 998 // hence there is early-clobber for cc->expo and cc->flags. 999 f7_addsub_mant_scaled_asm (cc, aa, bb, shift2 | do_add); 1000} 1001#endif // F7MOD_addsub_ 1002 1003 1004#ifdef F7MOD_madd_msub_ 1005F7_WEAK 1006void f7_madd_msub (f7_t *cc, const f7_t *aa, const f7_t *bb, const f7_t *dd, 1007 bool neg_d) 1008{ 1009 f7_t xx7, *xx = &xx7; 1010 uint8_t x_lsb = f7_mulx (xx, aa, bb, true /* no rounding */); 1011 uint8_t x_sign = f7_signbit (xx); 1012 int16_t x_expo = xx->expo; 1013 f7_addsub (xx, xx, dd, neg_d); 1014 // Now add LSB. If cancellation occured in the add / sub, then we have the 1015 // chance of extra 8 bits of precision. Turn LSByte into f7_t. 1016 f7_clr (cc); 1017 cc->expo = sub_ssat16 (x_expo, F7_MANT_BITS); 1018 cc->mant[F7_MANT_BYTES - 1] = x_lsb; 1019 cc = f7_normalize_asm (cc); 1020 cc->flags = x_sign; 1021 f7_Iadd (cc, xx); 1022} 1023#endif // F7MOD_madd_msub_ 1024 1025#ifdef F7MOD_madd_ 1026F7_WEAK 1027void f7_madd (f7_t *cc, const f7_t *aa, const f7_t *bb, const f7_t *dd) 1028{ 1029 f7_madd_msub (cc, aa, bb, dd, false); 1030} 1031#endif // F7MOD_madd_ 1032 1033#ifdef F7MOD_msub_ 1034F7_WEAK 1035void f7_msub (f7_t *cc, const f7_t *aa, const f7_t *bb, const f7_t *dd) 1036{ 1037 f7_madd_msub (cc, aa, bb, dd, true); 1038} 1039#endif // F7MOD_msub_ 1040 1041 1042#ifdef F7MOD_ldexp_ 1043F7_WEAK 1044f7_t* f7_ldexp (f7_t *cc, const f7_t *aa, int delta) 1045{ 1046 uint8_t a_class = f7_classify (aa); 1047 1048 cc->flags = a_class; 1049 1050 // Inf and NaN. 1051 if (! f7_class_number (a_class)) 1052 return cc; 1053 1054 if (f7_msbit (aa) == 0) 1055 return (f7_clr (cc), cc); 1056 1057 int16_t expo = add_ssat16 (delta, aa->expo); 1058 // Store expo and handle expo = INT16_MIN and INT16_MAX. 1059 if (! f7_store_expo (cc, expo)) 1060 f7_copy_mant (cc, aa); 1061 1062 return cc; 1063} 1064#endif // F7MOD_ldexp_ 1065 1066 1067#if USE_LPM 1068#elif USE_LD 1069#else 1070#error need include "asm-defs.h" 1071#endif // USE_LPM 1072 1073/* 1074 Handling constants: 1075 1076 F7_PCONST (PVAR, X) 1077 1078 Set f7_t [const] *PVAR to an LD address for one 1079 of the f7_const_X[_P] constants. 1080 PVAR might be set to point to a local auto that serves 1081 as temporary storage for f7_const_X_P. PVAR is only 1082 valid in the current block. 1083 1084 const f7_t* F7_PCONST_U16 (PVAR, <ident> X) // USE_LD 1085 f7_t* F7_PCONST_U16 (PVAR, uint16_t X) // USE_LPM 1086 1087 Set f7_t [const] *PVAR to an LD address for one of the 1088 f7_const_X[_P] constants. PVAR might be set to point to a 1089 local auto that serves as temporary storage for X. PVAR is 1090 only valid in the current block. 1091 1092 F7_PCONST_VAR (PVAR, VAR) 1093 1094 VAR is a pointer variable holding the address of some f7_const_X[_P] 1095 constant. Set [const] f7_t *PVAR to a respective LD address. 1096 PVAR might be set to point to a local auto that serves 1097 as temporary storage for f7_const_X_P. PVAR is only 1098 valid in the current block. 1099 1100 F7_CONST_ADDR (<ident> CST, f7_t* PTMP) 1101 1102 Return an LD address to for some f7_const_X[_P] constant. 1103 *PTMP might be needed to hold a copy of f7_const_X_P in RAM. 1104 1105 f7_t* F7_U16_ADDR (uint16_t X, f7_t* PTMP) // USE_LPM 1106 const f7_t* F7_U16_ADDR (<cst-ident> X, <unused>) // USE_LD 1107 1108 Return an LD address to compile-time constant uint16_t X which is 1109 also known as f7_const_X[_P]. *PTMP might be set to (f7_t) X. 1110 1111 f7_t* f7_const (f7_t *PVAR, <cst-ident> X) 1112 1113 Copy f7_const_X[_P] to *PVAR. 1114 1115 f7_t* f7_copy_flash (f7_t *DST, const f7_t *SRC) 1116 1117 Copy to *DST with LD (from .rodata in flash) if the address 1118 space is linear, or with LPM (from .progmem.data) if the 1119 address space is not linear. 1120 1121 f7_t* f7_copy (f7_t *DST, const f7_t* SRC) 1122 1123 Copy to RAM using LD. 1124 1125 f7_t* f7_copy_P (f7_t *DST, const f7_t *SRC) 1126 1127 Copy to RAM using LPM. 1128*/ 1129 1130#if USE_LPM 1131 #define F7_RAW_CONST_ADDR(CST) \ 1132 & F7_(const_##CST##_P) 1133 1134 #define F7_PCONST(PVAR, CST) \ 1135 f7_t _var_for_##CST; \ 1136 f7_copy_P (& _var_for_##CST, & F7_(const_##CST##_P)); \ 1137 PVAR = & _var_for_##CST 1138 1139 #define F7_PCONST_U16(PVAR, CST) \ 1140 f7_t _var_for_##CST; \ 1141 PVAR = f7_set_u16 (& _var_for_##CST, CST) 1142 1143 #define F7_PCONST_VAR(PVAR, VAR) \ 1144 f7_t _var_for_##VAR; \ 1145 f7_copy_P (& _var_for_##VAR, VAR); \ 1146 PVAR = & _var_for_##VAR 1147 1148 #define MAYBE_const // empty 1149 1150 #define F7_CONST_ADDR(CST, PTMP) \ 1151 f7_copy_P ((PTMP), & F7_(const_##CST##_P)) 1152 1153 #define F7_U16_ADDR(CST, PTMP) \ 1154 f7_set_u16 ((PTMP), CST) 1155 1156#elif USE_LD 1157 #define F7_RAW_CONST_ADDR(CST) \ 1158 & F7_(const_##CST) 1159 1160 #define F7_PCONST(PVAR, CST) \ 1161 PVAR = & F7_(const_##CST) 1162 1163 #define F7_PCONST_U16(PVAR, CST) \ 1164 PVAR = & F7_(const_##CST) 1165 1166 #define F7_PCONST_VAR(PVAR, VAR) \ 1167 PVAR = (VAR) 1168 1169 #define F7_CONST_ADDR(CST, PTMP) \ 1170 (& F7_(const_##CST)) 1171 1172 #define F7_U16_ADDR(CST, PTMP) \ 1173 (& F7_(const_##CST)) 1174 1175 #define MAYBE_const const 1176#endif 1177 1178 1179 1180#define DD(str, X) \ 1181 do { \ 1182 LOG_PSTR (PSTR (str)); \ 1183 f7_dump (X); \ 1184 } while (0) 1185 1186#undef DD 1187#define DD(...) (void) 0 1188 1189 1190#ifdef F7MOD_sqrt_ 1191static void sqrt_worker (f7_t *cc, const f7_t *rr) 1192{ 1193 f7_t tmp7, *tmp = &tmp7; 1194 f7_t aa7, *aa = &aa7; 1195 1196 // aa in [1/2, 2) => aa->expo in { -1, 0 }. 1197 int16_t a_expo = -(rr->expo & 1); 1198 int16_t c_expo = (rr->expo - a_expo) >> 1; // FIXME: r_expo = INT_MAX??? 1199 1200 __asm ("" : "+r" (aa)); 1201 1202 f7_copy (aa, rr); 1203 aa->expo = a_expo; 1204 1205 // No use of rr or *cc past this point: We may use cc as temporary. 1206 // Approximate square-root of A by X <-- (X + A / X) / 2. 1207 1208 f7_sqrt_approx_asm (cc, aa); 1209 1210 // Iterate X <-- (X + A / X) / 2. 1211 // 3 Iterations with 16, 32, 58 bits of precision for the quotient. 1212 1213 for (uint8_t prec = 16; (prec & 0x80) == 0; prec <<= 1) 1214 { 1215 f7_divx (tmp, aa, cc, (prec & 64) ? 2 + F7_MANT_BITS : prec); 1216 f7_Iadd (cc, tmp); 1217 // This will never underflow because |c_expo| is small. 1218 cc->expo--; 1219 } 1220 1221 // Similar: |c_expo| is small, hence no ldexp needed. 1222 cc->expo += c_expo; 1223} 1224 1225F7_WEAK 1226void f7_sqrt (f7_t *cc, const f7_t *aa) 1227{ 1228 uint8_t a_class = f7_classify (aa); 1229 1230 if (f7_class_nan (a_class) || f7_class_sign (a_class)) 1231 return f7_set_nan (cc); 1232 1233 if (f7_class_inf (a_class)) 1234 return f7_set_inf (cc, 0); 1235 1236 if (f7_class_zero (a_class)) 1237 return f7_clr (cc); 1238 1239 sqrt_worker (cc, aa); 1240} 1241#endif // F7MOD_sqrt_ 1242 1243 1244#ifdef F7MOD_hypot_ 1245F7_WEAK 1246void f7_hypot (f7_t *cc, const f7_t *aa, const f7_t *bb) 1247{ 1248 f7_t xx7, *xx = &xx7; 1249 1250 f7_square (xx, aa); 1251 f7_square (cc, bb); 1252 f7_Iadd (cc, xx); 1253 f7_Isqrt (cc); 1254} 1255#endif // F7MOD_hypot_ 1256 1257 1258#ifdef F7MOD_const_m1_ 1259#include "libf7-constdef.h" 1260#endif // -1 1261 1262#ifdef F7MOD_const_1_2_ 1263#include "libf7-constdef.h" 1264#endif // 1/2 1265 1266#ifdef F7MOD_const_1_3_ 1267#include "libf7-constdef.h" 1268#endif // 1/3 1269 1270#ifdef F7MOD_const_ln2_ 1271#include "libf7-constdef.h" 1272#endif // ln2 1273 1274#ifdef F7MOD_const_1_ln2_ 1275#include "libf7-constdef.h" 1276#endif // 1_ln2 1277 1278#ifdef F7MOD_const_ln10_ 1279#include "libf7-constdef.h" 1280#endif // ln10 1281 1282#ifdef F7MOD_const_1_ln10_ 1283#include "libf7-constdef.h" 1284#endif // 1_ln10 1285 1286#ifdef F7MOD_const_1_ 1287#include "libf7-constdef.h" 1288#endif // 1 1289 1290#ifdef F7MOD_const_sqrt2_ 1291#include "libf7-constdef.h" 1292#endif // sqrt2 1293 1294#ifdef F7MOD_const_2_ 1295#include "libf7-constdef.h" 1296#endif // 2 1297 1298#ifdef F7MOD_const_pi_ 1299#include "libf7-constdef.h" 1300#endif // pi 1301 1302 1303#ifdef F7MOD_divx_ 1304 1305// C /= A 1306extern void f7_div_asm (f7_t*, const f7_t*, uint8_t); 1307 1308F7_WEAK 1309void f7_divx (f7_t *cc, const f7_t *aa, const f7_t *bb, uint8_t quot_bits) 1310{ 1311 uint8_t a_class = f7_classify (aa); 1312 uint8_t b_class = f7_classify (bb); 1313 // From this point on, no more access aa->flags or bb->flags 1314 // to avoid early-clobber when writing cc->flags. 1315 1316 // If either value is NaN, return NaN. 1317 if (f7_class_nan (a_class | b_class) 1318 // If both values are Inf or both are 0, return NaN. 1319 || f7_class_zero (a_class & b_class) 1320 || f7_class_inf (a_class & b_class) 1321 // Inf / 0 = NaN. 1322 || (f7_class_inf (a_class) && f7_class_zero (b_class))) 1323 { 1324 return f7_set_nan (cc); 1325 } 1326 1327 // 0 / B = 0 for non-zero, non-NaN B. 1328 // A / Inf = 0 for non-zero numbers A. 1329 if (f7_class_zero (a_class) || f7_class_inf (b_class)) 1330 return f7_clr (cc); 1331 1332 uint8_t c_sign = (a_class ^ b_class) & F7_FLAG_sign; 1333 1334 if (f7_class_inf (a_class) || f7_class_zero (b_class)) 1335 return f7_set_inf (cc, c_sign); 1336 1337 cc->flags = c_sign; _Static_assert (F7_FLAGNO_sign == 0, ""); 1338 int16_t expo = sub_ssat16 (aa->expo, bb->expo); 1339 1340 // Store expo and handle expo = INT16_MIN and INT16_MAX. 1341 if (f7_store_expo (cc, expo)) 1342 return; 1343 1344 f7_t ss7, *ss = &ss7; 1345 ss->flags = cc->flags; 1346 ss->expo = cc->expo; 1347 1348 f7_copy_mant (ss, aa); 1349 f7_div_asm (ss, bb, quot_bits); 1350 f7_copy (cc, ss); 1351} 1352#endif // F7MOD_divx_ 1353 1354 1355#ifdef F7MOD_div_ 1356F7_WEAK 1357void f7_div (f7_t *cc, const f7_t *aa, const f7_t *bb) 1358{ 1359 /* When f7_divx calls f7_div_asm, dividend and divisor are valid 1360 mantissae, i.e. their MSBit is set. Therefore, the quotient will 1361 be in [0x0.ff..., 0x0.40...] and to adjust it, at most 1 left-shift 1362 is needed. Compute F7_MANT_BITS + 2 bits of the quotient: 1363 One bit is used for rounding, and one bit might be consumed by the 1364 mentioned left-shift. */ 1365 1366 f7_divx (cc, aa, bb, 2 + F7_MANT_BITS); 1367} 1368#endif // F7MOD_div_ 1369 1370 1371#ifdef F7MOD_div1_ 1372F7_WEAK 1373void f7_div1 (f7_t *cc, const f7_t *aa) 1374{ 1375 F7_PCONST_U16 (const f7_t *one, 1); 1376 f7_div (cc, one, aa); 1377} 1378#endif // F7MOD_div_ 1379 1380 1381#ifdef F7MOD_fmod_ 1382F7_WEAK 1383void f7_fmod (f7_t *cc, const f7_t *aa, const f7_t *bb) 1384{ 1385 uint8_t a_class = f7_classify (aa); 1386 uint8_t b_class = f7_classify (bb); 1387 1388 if (! f7_class_number (a_class) 1389 || f7_class_nan (b_class) 1390 || f7_class_zero (b_class)) 1391 { 1392 return f7_set_nan (cc); 1393 } 1394 1395 // A == 0 and B != 0 => 0. 1396 if (f7_class_zero (a_class)) 1397 return f7_clr (cc); 1398 1399 f7_t zz7, *zz = & zz7; 1400 1401 f7_div (zz, aa, bb); 1402 1403 // Z in Z, |Z| <= |A/B|. 1404 f7_trunc (zz, zz); 1405 1406 // C = A - Z * B. 1407 f7_msub (cc, zz, bb, aa); 1408 cc->flags ^= F7_FLAG_sign; 1409} 1410#endif // F7MOD_fmod_ 1411 1412 1413#ifdef F7MOD_truncx_ 1414F7_WEAK 1415f7_t* f7_truncx (f7_t *cc, const f7_t *aa, bool do_floor) 1416{ 1417 uint8_t a_class = f7_classify (aa); 1418 1419 if (! f7_class_nonzero (a_class)) 1420 return f7_copy (cc, aa); 1421 1422 bool sign = f7_class_sign (a_class); 1423 1424 int16_t a_expo = aa->expo; 1425 1426 if (a_expo < 0) 1427 { 1428 // |A| < 1. 1429 if (sign & do_floor) 1430 return f7_set_s16 (cc, -1); 1431 1432 f7_clr (cc); 1433 return cc; 1434 } 1435 else if (a_expo >= F7_MANT_BITS - 1) 1436 // A == floor (A). 1437 return f7_copy (cc, aa); 1438 1439 f7_t tmp7, *tmp = &tmp7; 1440 1441 // Needed if aa === cc. 1442 f7_copy (tmp, aa); 1443 1444 cc->flags = sign; 1445 cc->expo = a_expo; 1446 f7_clr_mant_lsbs (cc, aa, F7_MANT_BITS - 1 - a_expo); 1447 1448 if (do_floor && cc->sign && f7_cmp_mant (cc, tmp) != 0) 1449 { 1450 F7_PCONST_U16 (const f7_t *one, 1); 1451 f7_Isub (cc, one); 1452 } 1453 1454 return cc; 1455} 1456#endif // F7MOD_truncx_ 1457 1458 1459#ifdef F7MOD_floor_ 1460F7_WEAK 1461f7_t* f7_floor (f7_t *cc, const f7_t *aa) 1462{ 1463 return f7_truncx (cc, aa, true); 1464} 1465#endif // F7MOD_floor_ 1466 1467 1468#ifdef F7MOD_trunc_ 1469F7_WEAK 1470f7_t* f7_trunc (f7_t *cc, const f7_t *aa) 1471{ 1472 return f7_truncx (cc, aa, false); 1473} 1474#endif // F7MOD_trunc_ 1475 1476 1477#ifdef F7MOD_ceil_ 1478F7_WEAK 1479void f7_ceil (f7_t *cc, const f7_t *aa) 1480{ 1481 cc = f7_copy (cc, aa); 1482 cc->flags ^= F7_FLAG_sign; 1483 cc = f7_floor (cc, cc); 1484 f7_Ineg (cc); 1485} 1486#endif // F7MOD_ceil_ 1487 1488 1489#ifdef F7MOD_round_ 1490F7_WEAK 1491void f7_round (f7_t *cc, const f7_t *aa) 1492{ 1493 f7_t tmp; 1494 (void) tmp; 1495 const f7_t *half = F7_CONST_ADDR (1_2, &tmp); 1496 1497 f7_addsub (cc, aa, half, f7_signbit (aa)); 1498 f7_trunc (cc, cc); 1499} 1500#endif // F7MOD_round_ 1501 1502 1503#ifdef F7MOD_horner_ 1504 1505// Assertion when using this function is that either cc != xx, 1506// or if cc == xx, then tmp1 must be non-NULL and tmp1 != xx. 1507// In General, the calling functions have a spare f7_t object available 1508// and can pass it down to save some stack. 1509// Moreover, the power series must have degree 1 at least. 1510 1511F7_WEAK 1512void f7_horner (f7_t *cc, const f7_t *xx, uint8_t n_coeff, const f7_t *coeff, 1513 f7_t *tmp1) 1514{ 1515 f7_assert (n_coeff > 1); 1516 1517 if (cc != xx) 1518 tmp1 = cc; 1519 else 1520 f7_assert (tmp1 != NULL && tmp1 != xx); 1521 1522 f7_t *yy = tmp1; 1523 f7_t tmp27, *tmp2 = &tmp27; 1524 1525 n_coeff--; 1526 const f7_t *pcoeff = coeff + n_coeff; 1527 1528 f7_copy_flash (yy, pcoeff); 1529 1530 while (1) 1531 { 1532 --pcoeff; 1533#if 1 1534 f7_Imul (yy, xx); 1535 const f7_t *cst = USE_LD ? pcoeff : f7_copy_P (tmp2, pcoeff); 1536 if (coeff == pcoeff) 1537 return f7_add (cc, yy, cst); 1538 f7_Iadd (yy, cst); 1539#else 1540 const f7_t *cst = USE_LD ? pcoeff : f7_copy_P (tmp2, pcoeff); 1541 f7_madd (yy, yy, xx, cst); 1542 if (coeff == pcoeff) 1543 { 1544 f7_copy (cc, yy); 1545 return; 1546 } 1547#endif 1548 } 1549 1550 __builtin_unreachable(); 1551} 1552#endif // F7MOD_horner_ 1553 1554 1555#ifdef F7MOD_log_ 1556F7_WEAK 1557void f7_log (f7_t *cc, const f7_t *aa) 1558{ 1559 f7_logx (cc, aa, NULL); 1560} 1561#endif // F7MOD_log_ 1562 1563 1564#ifdef F7MOD_log2_ 1565F7_WEAK 1566void f7_log2 (f7_t *cc, const f7_t *aa) 1567{ 1568 f7_logx (cc, aa, F7_RAW_CONST_ADDR (1_ln2)); 1569} 1570#endif // F7MOD_log2_ 1571 1572 1573#ifdef F7MOD_log10_ 1574F7_WEAK 1575void f7_log10 (f7_t *cc, const f7_t *aa) 1576{ 1577 f7_logx (cc, aa, F7_RAW_CONST_ADDR (1_ln10)); 1578} 1579#endif // F7MOD_log10_ 1580 1581 1582#ifdef F7MOD_logx_ 1583 1584#define ARRAY_NAME coeff_artanh 1585#include "libf7-array.def" 1586#undef ARRAY_NAME 1587 1588// Compute P * ln(A) if P != NULL and ln(A), otherwise. 1589// P is a LD-address if USE_LD and a LPM-address if USE_LPM. 1590// Assumption is that P > 0. 1591 1592F7_WEAK 1593void f7_logx (f7_t *cc, const f7_t *aa, const f7_t *p) 1594{ 1595 uint8_t a_class = f7_classify (aa); 1596 1597 if (f7_class_nan (a_class) || f7_class_sign (a_class)) 1598 return f7_set_nan (cc); 1599 1600 if (f7_class_inf (a_class)) 1601 return f7_set_inf (cc, 0); 1602 1603 if (f7_class_zero (a_class)) 1604 return f7_set_inf (cc, 1); 1605 1606 f7_t *yy = cc; 1607 f7_t xx7, *xx = &xx7; 1608 f7_t tmp7, *tmp = &tmp7; 1609 1610 // Y in [1, 2] = A * 2 ^ (-a_expo). 1611 int16_t a_expo = aa->expo; 1612 f7_copy (yy, aa); 1613 yy->expo = 0; 1614 1615 // Y in [1 / sqrt2, sqrt2]. 1616 1617 if (f7_abscmp_msb_ge (yy, F7_(const_sqrt2_msb), F7_(const_sqrt2_expo))) 1618 { 1619 yy->expo = -1; 1620 a_expo = add_ssat16 (a_expo, 1); 1621 } 1622 1623 const f7_t *one = F7_U16_ADDR (1, & tmp7); 1624 1625 // X := (Y - 1) / (Y + 1), |X| <= (sqrt2 - 1) / (sqrt2 + 1) ~ 0.172. 1626 f7_sub (xx, yy, one); 1627 f7_Iadd (yy, one); 1628 f7_Idiv (xx, yy); 1629 1630 // Y := X^2, |Y| < 0.03. 1631 f7_square (yy, xx); 1632 1633 // Y := artanh (X^2) / X 1634 f7_horner (yy, yy, n_coeff_artanh, coeff_artanh, tmp); 1635 1636 // C = X * Y = ln A - a_expo * ln2. 1637 f7_mul (cc, xx, yy); 1638 1639 // X := a_expo * ln2. 1640 f7_set_s16 (xx, a_expo); 1641 f7_Imul (xx, F7_CONST_ADDR (ln2, & tmp7)); 1642 1643 // C = ln A. 1644 f7_Iadd (cc, xx); 1645 1646 if (p && USE_LPM) 1647 f7_Imul (cc, f7_copy_P (tmp, p)); 1648 if (p && USE_LD) 1649 f7_Imul (cc, p); 1650} 1651#endif // F7MOD_logx_ 1652 1653 1654#ifdef F7MOD_exp_ 1655 1656#define ARRAY_NAME coeff_exp 1657#include "libf7-array.def" 1658#undef ARRAY_NAME 1659 1660#define STATIC static 1661#include "libf7-constdef.h" // ln2_low 1662#undef STATIC 1663 1664F7_WEAK 1665void f7_exp (f7_t *cc, const f7_t *aa) 1666{ 1667 uint8_t a_class = f7_classify (aa); 1668 1669 if (f7_class_nan (a_class)) 1670 return f7_set_nan (cc); 1671 1672 /* The maximal exponent of 2 for a double is 1023, hence we may limit 1673 to |A| < 1023 * ln2 ~ 709. We limit to 1024 ~ 1.99 * 2^9 */ 1674 1675 if (f7_class_inf (a_class) 1676 || (f7_class_nonzero (a_class) && aa->expo >= 9)) 1677 { 1678 if (f7_class_sign (a_class)) 1679 return f7_clr (cc); 1680 else 1681 return f7_set_inf (cc, 0); 1682 } 1683 1684 f7_t const *cst; 1685 f7_t qq7, *qq = &qq7; 1686 1687 F7_PCONST (cst, ln2); 1688 1689 // We limited |A| to 1024 and are now dividing by ln2, hence Q will 1690 // be at most 1024 / ln2 ~ 1477 and fit into 11 bits. We will 1691 // round Q anyway, hence only request 11 bits from the division and 1692 // one additional bit that might be needed to normalize the quotient. 1693 f7_divx (qq, aa, cst, 1 + 11); 1694 1695 // Use the smallest (by absolute value) remainder system. 1696 f7_round (qq, qq); 1697 int16_t q = f7_get_s16 (qq); 1698 1699 // Reducing A mod ln2 gives |C| <= ln2 / 2, C = -A mod ln2. 1700 f7_msub (cc, qq, cst, aa); 1701 1702 // Corrigendum: We added Q * ln2; now add Q times the low part of ln2 1703 // for better precision. Due to |C| < |A| this is not a no-op in general. 1704 const f7_t *yy = F7_CONST_ADDR (ln2_low, &_var_for_ln2); 1705 f7_madd (cc, qq, yy, cc); 1706 1707 // Because we computed C = -A mod ... 1708 cc->flags ^= F7_FLAG_sign; 1709 1710 // Reduce further to |C| < ln2 / 8 which is the range of our MiniMax poly. 1711 const uint8_t MAX_LN2_RED = 3; 1712 int8_t scal2 = 0; 1713 1714 while (f7_abscmp_msb_ge (cc, F7_(const_ln2_msb), 1715 F7_(const_ln2_expo) - MAX_LN2_RED)) 1716 { 1717 scal2++; 1718 cc->expo--; 1719 } 1720 1721 f7_horner (cc, cc, n_coeff_exp, coeff_exp, qq); 1722 1723 while (--scal2 >= 0) 1724 f7_Isquare (cc); 1725 1726 f7_Ildexp (cc, q); 1727} 1728#endif // F7MOD_exp_ 1729 1730 1731#ifdef F7MOD_pow10_ 1732F7_WEAK 1733void f7_pow10 (f7_t *cc, const f7_t *aa) 1734{ 1735 const f7_t *p_ln10; 1736 F7_PCONST (p_ln10, ln10); 1737 f7_mul (cc, aa, p_ln10); 1738 f7_exp (cc, cc); 1739} 1740ALIAS (f7_pow10, f7_exp10) 1741#endif // F7MOD_pow10_ 1742 1743 1744#ifdef F7MOD_cbrt_ 1745F7_WEAK 1746void f7_cbrt (f7_t *cc, const f7_t *aa) 1747{ 1748 f7_copy (cc, aa); 1749 const f7_t *p_1_3; 1750 uint8_t c_flags = cc->flags; 1751 cc->flags &= ~F7_FLAG_sign; 1752 f7_log (cc, cc); 1753 F7_PCONST (p_1_3, 1_3); 1754 f7_Imul (cc, p_1_3); 1755 f7_exp (cc, cc); 1756 1757 if (c_flags & F7_FLAG_sign) 1758 cc->flags |= F7_FLAG_sign; 1759} 1760#endif // F7MOD_cbrt_ 1761 1762 1763#ifdef F7MOD_pow_ 1764F7_WEAK 1765void f7_pow (f7_t *cc, const f7_t *aa, const f7_t *bb) 1766{ 1767#if 0 1768 f7_t slots[cc == bb]; 1769 f7_t *yy = cc == bb ? slots : cc; 1770#else 1771 f7_t yy7, *yy = &yy7; 1772#endif 1773 f7_log (yy, aa); 1774 f7_Imul (yy, bb); 1775 f7_exp (cc, yy); 1776} 1777#endif // F7MOD_pow_ 1778 1779 1780#ifdef F7MOD_powi_ 1781F7_WEAK 1782void f7_powi (f7_t *cc, const f7_t *aa, int ii) 1783{ 1784 uint16_t u16 = ii; 1785 f7_t xx27, *xx2 = &xx27; 1786 1787 if (ii < 0) 1788 u16 = -u16; 1789 1790 f7_copy (xx2, aa); 1791 1792 f7_set_u16 (cc, 1); 1793 1794 while (1) 1795 { 1796 if (u16 & 1) 1797 f7_Imul (cc, xx2); 1798 1799 if (! f7_is_nonzero (cc)) 1800 break; 1801 1802 u16 >>= 1; 1803 if (u16 == 0) 1804 break; 1805 f7_Isquare (xx2); 1806 } 1807 1808 if (ii < 0) 1809 f7_div1 (xx2, aa); 1810} 1811#endif // F7MOD_powi_ 1812 1813 1814#ifdef F7MOD_sincos_ 1815 1816#define ARRAY_NAME coeff_sin 1817 #define FOR_SIN 1818 #include "libf7-array.def" 1819 #undef FOR_SIN 1820#undef ARRAY_NAME 1821 1822#define ARRAY_NAME coeff_cos 1823 #define FOR_COS 1824 #include "libf7-array.def" 1825 #undef FOR_COS 1826#undef ARRAY_NAME 1827 1828#define STATIC static 1829#include "libf7-constdef.h" // pi_low 1830#undef STATIC 1831 1832typedef union 1833{ 1834 struct 1835 { 1836 bool neg_sin : 1; // Must be bit F7_FLAGNO_sign. 1837 bool neg_cos : 1; 1838 bool do_sin: 1; 1839 bool do_cos: 1; 1840 bool swap_sincos : 1; 1841 uint8_t res : 3; 1842 }; 1843 uint8_t bits; 1844} sincos_t; 1845 1846 1847F7_WEAK 1848void f7_sincos (f7_t *ss, f7_t *cc, const f7_t *aa) 1849{ 1850 uint8_t a_class = f7_classify (aa); 1851 1852 sincos_t sc = { .bits = a_class & F7_FLAG_sign }; 1853 if (ss != NULL) sc.do_sin = 1; 1854 if (cc != NULL) sc.do_cos = 1; 1855 1856 if (f7_class_nan (a_class) || f7_class_inf (a_class)) 1857 { 1858 if (sc.do_sin) f7_set_nan (ss); 1859 if (sc.do_cos) f7_set_nan (cc); 1860 return; 1861 } 1862 1863 f7_t pi7, *pi = &pi7; 1864 f7_t xx7, *xx = &xx7; 1865 f7_t yy7, *yy = &yy7; 1866 f7_t *hh = sc.do_sin ? ss : cc; 1867 1868 // X = |A| 1869 f7_copy (xx, aa); 1870 xx->flags = 0; 1871 1872 // Y is how often we subtract PI from X. 1873 f7_clr (yy); 1874 f7_const (pi, pi); 1875 1876 if (f7_abscmp_msb_ge (xx, F7_(const_pi_msb), F7_(const_pi_expo) + 1)) 1877 { 1878 pi->expo = 1 + F7_(const_pi_expo); // 2*pi 1879 1880 // Y = X / 2pi. 1881 f7_div (yy, xx, pi); 1882 1883 // The integral part of |A| / pi mod 2 is bit 55 - x_expo. 1884 if (yy->expo >= F7_MANT_BITS && !f7_is_zero (yy)) 1885 { 1886 // Too big for sensible calculation: Should this be NaN instead? 1887 if (sc.do_sin) f7_clr (ss); 1888 if (sc.do_cos) f7_clr (cc); 1889 return; 1890 } 1891 1892 // X -= 2pi * [ X / 2pi ] 1893 f7_floor (yy, yy); 1894 1895 f7_msub (xx, yy, pi, xx); 1896 xx->flags ^= F7_FLAG_sign; 1897 1898 // We divided by 2pi, but Y should count times we subtracted pi. 1899 yy->expo++; 1900 } 1901 1902 pi->expo = F7_(const_pi_expo); // pi 1903 f7_sub (hh, xx, pi); 1904 if (!f7_signbit (hh)) 1905 { 1906 // H = X - pi >= 0 => X >= pi 1907 // sin(x) = -sin(x - pi) 1908 // cos(x) = -cos(x - pi) 1909 f7_copy (xx, hh); 1910 // Y: We subtracted pi one more time. 1911 f7_Iadd (yy, f7_set_u16 (hh, 1)); 1912 sc.neg_sin ^= 1; 1913 sc.neg_cos ^= 1; 1914 } 1915 1916 pi->expo = F7_(const_pi_expo) - 1; // pi/2 1917 if (f7_gt (xx, pi)) 1918 { 1919 // x > pi/2 1920 // sin(x) = sin(pi - x) 1921 // cos(x) = -cos(pi - x) 1922 pi->expo = F7_(const_pi_expo); // pi 1923 f7_IRsub (xx, pi); 1924 // Y: We subtracted pi one more time (and then negated). 1925 f7_Iadd (yy, f7_set_u16 (hh, 1)); 1926 yy->flags ^= F7_FLAG_sign; 1927 sc.neg_cos ^= 1; 1928 } 1929 1930 pi->expo = F7_(const_pi_expo) - 2; // pi/4 1931 if (f7_gt (xx, pi)) 1932 { 1933 // x > pi/4 1934 // sin(x) = cos(pi/2 - x) 1935 // cos(x) = sin(pi/2 - x) 1936 pi->expo = F7_(const_pi_expo) - 1; // pi/2 1937 f7_IRsub (xx, pi); 1938 // Y: We subtracted pi/2 one more time (and then negated). 1939 f7_Iadd (yy, f7_set_1pow2 (hh, -1, 0)); 1940 yy->flags ^= F7_FLAG_sign; 1941 sc.swap_sincos = 1; 1942 } 1943 1944 if (!f7_is0 (yy)) 1945 { 1946 // Y counts how often we subtracted pi from X in order to 1947 // get 0 <= X < pi/4 as small as possible (Y is 0 mod 0.5). 1948 // Now also subtract the low part of pi: 1949 // f7_const_pi_low = pi - f7_const_pi in order to get more precise 1950 // results in the cases where the final result is close to 0. 1951 const f7_t *pi_low = F7_CONST_ADDR (pi_low, pi); 1952 //f7_const (pi, pi_low); 1953 f7_Imul (yy, pi_low); 1954 f7_Isub (xx, yy); 1955 } 1956 1957 // X in [0, pi/4]. 1958 // X^2 in [0, pi^2/16] ~ [0, 0.6169] 1959 1960 f7_square (yy, xx); 1961 1962 f7_t *x_sin = xx; 1963 f7_t *x_cos = yy; 1964 1965 if ((sc.do_sin && !sc.swap_sincos) 1966 || (sc.do_cos && sc.swap_sincos)) 1967 { 1968 f7_horner (hh, yy, n_coeff_sin, coeff_sin, NULL); 1969 f7_mul (x_sin, hh, xx); 1970 } 1971 1972 if ((sc.do_cos && !sc.swap_sincos) 1973 || (sc.do_sin && sc.swap_sincos)) 1974 { 1975 f7_horner (x_cos, yy, n_coeff_cos, coeff_cos, hh); 1976 } 1977 1978 if (sc.swap_sincos) 1979 { 1980 x_sin = yy; 1981 x_cos = xx; 1982 } 1983 1984 if (sc.do_sin) 1985 { 1986 x_sin->flags ^= sc.bits; 1987 x_sin->flags &= F7_FLAG_sign; 1988 f7_copy (ss, x_sin); 1989 } 1990 1991 if (sc.do_cos) 1992 { 1993 x_cos->flags = sc.neg_cos; 1994 f7_copy (cc, x_cos); 1995 } 1996} 1997#endif // F7MOD_sincos_ 1998 1999#ifdef F7MOD_sin_ 2000F7_WEAK 2001void f7_sin (f7_t *ss, const f7_t *aa) 2002{ 2003 f7_sincos (ss, NULL, aa); 2004} 2005#endif // F7MOD_sin_ 2006 2007#ifdef F7MOD_cos_ 2008F7_WEAK 2009void f7_cos (f7_t *cc, const f7_t *aa) 2010{ 2011 f7_sincos (NULL, cc, aa); 2012} 2013#endif // F7MOD_cos_ 2014 2015 2016#ifdef F7MOD_tan_ 2017F7_WEAK 2018void f7_tan (f7_t *tt, const f7_t *aa) 2019{ 2020 f7_t xcos; 2021 f7_sincos (tt, & xcos, aa); 2022 f7_Idiv (tt, & xcos); 2023} 2024#endif // F7MOD_tan_ 2025 2026 2027#ifdef F7MOD_cotan_ 2028F7_WEAK 2029void f7_cotan (f7_t *ct, const f7_t *aa) 2030{ 2031 f7_t xcos; 2032 f7_sincos (ct, & xcos, aa); 2033 f7_div (ct, & xcos, ct); 2034} 2035#endif // F7MOD_cotan_ 2036 2037 2038#ifdef F7MOD_sinhcosh_ 2039F7_WEAK 2040void f7_sinhcosh (f7_t *cc, const f7_t *aa, bool do_sinh) 2041{ 2042 f7_t xx7, *xx = &xx7; 2043 // C = exp(A) 2044 f7_exp (cc, aa); 2045 // X = exp(-A) 2046 f7_div (xx, f7_set_u16 (xx, 1), cc); 2047 // sinh(A) = (exp(A) - exp(-A)) / 2 2048 // cosh(A) = (exp(A) + exp(-A)) / 2 2049 f7_addsub (cc, cc, xx, do_sinh); 2050 cc->expo--; 2051} 2052#endif // F7MOD_sinhcosh_ 2053 2054 2055#ifdef F7MOD_sinh_ 2056F7_WEAK 2057void f7_sinh (f7_t *cc, const f7_t *aa) 2058{ 2059 f7_sinhcosh (cc, aa, true); 2060} 2061#endif // F7MOD_sinh_ 2062 2063 2064#ifdef F7MOD_cosh_ 2065F7_WEAK 2066void f7_cosh (f7_t *cc, const f7_t *aa) 2067{ 2068 f7_sinhcosh (cc, aa, false); 2069} 2070#endif // F7MOD_cosh_ 2071 2072 2073#ifdef F7MOD_tanh_ 2074F7_WEAK 2075void f7_tanh (f7_t *cc, const f7_t *aa) 2076{ 2077 // tanh(A) = (exp(2A) - 1) / (exp(2A) + 1) 2078 f7_t xx7, *xx = &xx7; 2079 F7_PCONST_U16 (const f7_t *one, 1); 2080 // C = 2A 2081 f7_copy (cc, aa); 2082 cc->expo++; 2083 // C = exp(2A) 2084 f7_exp (cc, cc); 2085 // X = exp(2A) + 1 2086 f7_add (xx, cc, one); 2087 // C = exp(2A) - 1 2088 f7_Isub (cc, one); 2089 // C = tanh(A) 2090 f7_Idiv (cc, xx); 2091} 2092#endif // F7MOD_tanh_ 2093 2094 2095#ifdef F7MOD_atan_ 2096 2097#define MINIMAX_6_6_IN_0_1 2098 2099#define ARRAY_NAME coeff_atan_zahler 2100#define FOR_NUMERATOR 2101#include "libf7-array.def" 2102#undef FOR_NUMERATOR 2103#undef ARRAY_NAME 2104 2105#define ARRAY_NAME coeff_atan_nenner 2106#define FOR_DENOMINATOR 2107#include "libf7-array.def" 2108#undef FOR_DENOMINATOR 2109#undef ARRAY_NAME 2110 2111#include "libf7-constdef.h" 2112 2113F7_WEAK 2114void f7_atan (f7_t *cc, const f7_t *aa) 2115{ 2116 uint8_t a_class = f7_classify (aa); 2117 uint8_t flags = a_class & F7_FLAG_sign; 2118 2119 if (f7_class_nan (a_class)) 2120 return f7_set_nan (cc); 2121 2122 f7_t yy7, *yy = &yy7; 2123 f7_t zz7, *zz = &zz7; 2124 2125 if (f7_class_inf (a_class)) 2126 { 2127 f7_set_u16 (cc, 0); 2128 goto do_Inf; 2129 } 2130 2131 // C = |A| 2132 f7_copy (cc, aa); 2133 cc->flags = 0; 2134 2135 if (!f7_class_zero (a_class) && cc->expo >= 0) 2136 { 2137 // C >= 1: use atan (x) + atan (1/x) = pi / 2 to reduce to [0, 1]. 2138 flags |= 1 << 1; 2139 f7_div (cc, f7_set_u16 (yy, 1), cc); 2140 } 2141#if !defined (MINIMAX_6_6_IN_0_1) 2142 const uint8_t const_a_msb = 0x89; 2143 const int16_t const_a_expo = -2; 2144 if (f7_abscmp_msb_ge (cc, const_a_msb, const_a_expo)) 2145 { 2146 // We have C in [0, 1] and we want to use argument reduction by means 2147 // of addition theorem atan(x) - atan(y) = atan((x - y) / (1 + xy)). 2148 // We want to split [0, 1] into [0, a] u [a, 1] in such a way that 2149 // the upper interval will be mapped to [-a, a]. The system is easy 2150 // to solve and yiels 2151 // y = 1 / sqrt (3) ~ 0.57735 atan(y) = pi / 6 2152 // a = (1 - y) / (1 + y) ~ 0.26795 ~ 0x0.8930A2F * 2^-1. 2153 flags |= 1 << 2; 2154 // C <- (C - Y) / (1 + C * Y) in [-a, a] 2155 const f7_t *cst = F7_CONST_ADDR (1_sqrt3, zz); 2156 f7_mul (yy, cc, cst); 2157 f7_Isub (cc, cst); 2158 f7_Iadd (yy, F7_U16_ADDR (1, zz)); 2159 f7_Idiv (cc, yy); 2160 } 2161#endif 2162 // C <- C * p(C^2) / q(C^2) 2163 f7_square (yy, cc); 2164 f7_horner (zz, yy, n_coeff_atan_zahler, coeff_atan_zahler, NULL); 2165 f7_Imul (zz, cc); 2166 f7_horner (cc, yy, n_coeff_atan_nenner, coeff_atan_nenner, NULL); 2167 f7_div (cc, zz, cc); 2168 2169#if !defined (MINIMAX_6_6_IN_0_1) 2170 if (flags & (1 << 2)) 2171 f7_Iadd (cc, F7_CONST_ADDR (pi_6, yy)); 2172#endif 2173 2174 if (flags & (1 << 1)) 2175 { 2176 do_Inf:; 2177 // Y = pi / 2 2178 f7_const (yy, pi); 2179 yy->expo = F7_(const_pi_expo) - 1; 2180 f7_IRsub (cc, yy); 2181 } 2182 2183 cc->flags = a_class & F7_FLAG_sign; 2184} 2185#undef MINIMAX_6_6_IN_0_1 2186#endif // F7MOD_atan_ 2187 2188 2189#ifdef F7MOD_asinacos_ 2190 2191#define ARRAY_NAME coeff_func_a_zahler 2192#define FOR_NUMERATOR 2193#include "libf7-array.def" 2194#undef FOR_NUMERATOR 2195#undef ARRAY_NAME 2196 2197#define ARRAY_NAME coeff_func_a_nenner 2198#define FOR_DENOMINATOR 2199#include "libf7-array.def" 2200#undef FOR_DENOMINATOR 2201#undef ARRAY_NAME 2202 2203typedef union 2204{ 2205 struct 2206 { 2207 bool sign : 1; // Must be bit F7_FLAGNO_sign. 2208 bool do_acos : 1; // From caller. 2209 bool have_acos : 1; // What we compute from rational approx p/q. 2210 uint8_t res : 5; 2211 }; 2212 uint8_t bits; 2213} asinacos_t; 2214 2215F7_WEAK 2216void f7_asinacos (f7_t *cc, const f7_t *aa, uint8_t what) 2217{ 2218 f7_t xx7, *xx = &xx7; 2219 f7_t xx27, *xx2 = &xx27; 2220 2221 asinacos_t flags = { .bits = what | f7_signbit (aa) }; 2222 2223 f7_abs (xx, aa); 2224 2225 int8_t cmp = f7_cmp (xx, f7_set_u16 (cc, 1)); 2226 2227 if (cmp == INT8_MIN 2228 || cmp > 0) 2229 { 2230 return f7_set_nan (cc); 2231 } 2232 2233 if (xx->expo <= -2 || f7_is_zero (xx)) 2234 { 2235 // |A| < 1/2: asin(x) = x * a(2*x^2) 2236 f7_square (xx2, xx); 2237 xx2->expo ++; 2238 } 2239 else 2240 { 2241 // |A| > 1/2: acos (1-x) = sqrt(2*x) * a(x) 2242 // C is 1 from above. 2243 f7_IRsub (xx, cc); 2244 f7_copy (xx2, xx); 2245 flags.have_acos = 1; 2246 } 2247 2248 // MiniMax [5/4] numerator. 2249 f7_horner (cc, xx2, n_coeff_func_a_zahler, coeff_func_a_zahler, NULL); 2250 2251 if (flags.have_acos) 2252 { 2253 xx->expo ++; 2254 f7_Isqrt (xx); 2255 } 2256 f7_Imul (cc, xx); 2257 2258 // MiniMax [5/4] denominator. 2259 f7_horner (xx, xx2, n_coeff_func_a_nenner, coeff_func_a_nenner, NULL); 2260 2261 f7_Idiv (cc, xx); 2262 2263 /* 2264 With the current value of C, we have: 2265 2266 | | do_asin | do_acos 2267 | C | A <= 0 | A >= 0 | A <= 0 | A >= 0 2268 ----------+------------+-----------+----------+----------+---------- 2269 have_asin | asin (|A|) | -C | C | pi/2 + C | pi/2 - C 2270 have_acos | acos (|A|) | -pi/2 + C | pi/2 - C | pi - C | C 2271 2272 Result = n_pi2 * pi/2 + C * (c_sign ? -1 : 1) 2273 Result (A, do_asin) = asin (A) 2274 Result (A, do_acos) = acos (A) 2275 2276 with 2277 c_sign = do_acos ^ have_acos ^ a_sign 2278 n_pi2 = do_acos + have_acos * (a_sign ^ do_acos) ? (-1 : 1) 2279 n_pi2 in { -1, 0, 1, 2 } 2280 */ 2281 2282 // All what matters for c_sign is bit 0. 2283 uint8_t c_sign = flags.bits; 2284 int8_t n_pi2 = flags.do_acos; 2285 c_sign ^= flags.do_acos; 2286 if (flags.have_acos) 2287 { 2288 n_pi2++; 2289 __asm ("" : "+r" (n_pi2)); 2290 if (c_sign & 1) // c_sign & 1 = a_sign ^ do_acos 2291 n_pi2 -= 2; 2292 c_sign++; 2293 } 2294 2295 cc->flags = c_sign & F7_FLAG_sign; 2296 2297 if (n_pi2) 2298 { 2299 f7_const (xx, pi); 2300 if (n_pi2 < 0) 2301 xx->sign = 1; 2302 if (n_pi2 != 2) 2303 xx->expo = F7_(const_pi_expo) - 1; 2304 2305 f7_Iadd (cc, xx); 2306 } 2307} 2308#endif // F7MOD_asinacos_ 2309 2310 2311#ifdef F7MOD_asin_ 2312F7_WEAK 2313void f7_asin (f7_t *cc, const f7_t *aa) 2314{ 2315 f7_asinacos (cc, aa, 0); 2316} 2317#endif // F7MOD_asin_ 2318 2319 2320#ifdef F7MOD_acos_ 2321F7_WEAK 2322void f7_acos (f7_t *cc, const f7_t *aa) 2323{ 2324 f7_asinacos (cc, aa, 1 << 1); 2325} 2326#endif // F7MOD_acos_ 2327 2328 2329#ifndef IN_LIBGCC2 2330 2331#ifdef F7MOD_put_C_ 2332 2333#include <stdio.h> 2334#include <avr/pgmspace.h> 2335 2336static F7_INLINE 2337uint8_t f7_hex_digit (uint8_t nibble) 2338{ 2339 nibble = (uint8_t) (nibble + '0'); 2340 if (nibble > '9') 2341 nibble = (uint8_t) (nibble + ('a' - '0' - 10)); 2342 return nibble; 2343} 2344 2345static void f7_put_hex2 (uint8_t x, FILE *stream) 2346{ 2347 putc ('0', stream); 2348 if (x) 2349 { 2350 putc ('x', stream); 2351 putc (f7_hex_digit (x >> 4), stream); 2352 putc (f7_hex_digit (x & 0xf), stream); 2353 } 2354} 2355 2356#define XPUT(str) \ 2357 fputs_P (PSTR (str), stream) 2358 2359// Write to STREAM a line that is appropriate for usage in libf7-const.def. 2360 2361F7_WEAK 2362void f7_put_CDEF (const char *name, const f7_t *aa, FILE *stream) 2363{ 2364 char buf[7]; 2365 XPUT ("F7_CONST_DEF ("); 2366 fputs (name, stream); 2367 XPUT (",\t"); 2368 uint8_t a_class = f7_classify (aa); 2369 if (! f7_class_nonzero (a_class)) 2370 { 2371 f7_put_hex2 (a_class & F7_FLAGS, stream); 2372 XPUT (",\t0,0,0,0,0,0,0,\t0)"); 2373 return; 2374 } 2375 putc ('0' + (a_class & F7_FLAGS), stream); 2376 XPUT (",\t"); 2377 2378 for (uint8_t i = 0; i < F7_MANT_BYTES; i++) 2379 { 2380 f7_put_hex2 (aa->mant[F7_MANT_BYTES-1 - i], stream); 2381 putc (',', stream); 2382 } 2383 putc ('\t', stream); 2384 2385 itoa (aa->expo, buf, 10); 2386 fputs (buf, stream); 2387 XPUT (")"); 2388} 2389 2390void f7_put_C (const f7_t *aa, FILE *stream) 2391{ 2392 char buf[7]; 2393 2394 uint8_t a_class = f7_classify (aa); 2395 if (f7_class_nan (a_class)) 2396 { 2397 XPUT ("{ .is_nan = 1 }"); 2398 return; 2399 } 2400 bool sign = a_class & F7_FLAG_sign; 2401 2402 if (f7_class_inf (a_class)) 2403 { 2404 XPUT ("{ .is_nan = 1, .sign = "); 2405 putc ('0' + sign, stream); 2406 XPUT (" }"); 2407 return; 2408 } 2409 2410 XPUT ("{ .sign = "); 2411 putc ('0' + sign, stream); 2412 2413 XPUT (", .mant = { "); 2414 for (uint8_t i = 0; i < F7_MANT_BYTES; i++) 2415 { 2416 f7_put_hex2 (aa->mant[F7_MANT_BYTES-1 - i], stream); 2417 if (i != F7_MANT_BYTES - 1) 2418 putc (',', stream); 2419 } 2420 2421 XPUT (" }, .expo = "); 2422 itoa (aa->expo, buf, 10); 2423 fputs (buf, stream); 2424 XPUT (" }"); 2425} 2426#endif //F7MOD_put_C_ 2427 2428 2429#ifdef F7MOD_dump_ 2430 2431#include <avr/pgmspace.h> 2432 2433#ifndef AVRTEST_H 2434 2435#include <stdio.h> 2436 2437static void LOG_PSTR (const char *str) 2438{ 2439 fputs_P (str, stdout); 2440} 2441 2442static void LOG_PFMT_U16 (const char *fmt, uint16_t x) 2443{ 2444 printf_P (fmt, x); 2445} 2446 2447static void LOG_PFMT_FLOAT (const char *fmt, float x) 2448{ 2449 printf_P (fmt, x); 2450} 2451 2452#define LOG_X8(X) LOG_PFMT_U16 (PSTR (" 0x%02x "), (uint8_t)(X)) 2453#define LOG_PFMT_S16(FMT, X) LOG_PFMT_U16 (FMT, (unsigned)(X)) 2454#define LOG_PFMT_ADDR(FMT, X) LOG_PFMT_U16 (FMT, (unsigned)(X)) 2455 2456#endif // AVRTEST_H 2457 2458static void dump_byte (uint8_t b) 2459{ 2460 LOG_PSTR (PSTR (" ")); 2461 for (uint8_t i = 0; i < 8; i++) 2462 { 2463 LOG_PSTR ((b & 0x80) ? PSTR ("1") : PSTR ("0")); 2464 b = (uint8_t) (b << 1); 2465 } 2466} 2467 2468void f7_dump_mant (const f7_t *aa) 2469{ 2470 LOG_PSTR (PSTR ("\tmant =")); 2471 for (int i = F7_MANT_BYTES - 1; i >= 0; i--) 2472 LOG_X8 (aa->mant[i]); 2473 LOG_PSTR (PSTR ("\n\t =")); 2474 2475 for (int i = F7_MANT_BYTES - 1; i >= 0; i--) 2476 dump_byte (aa->mant[i]); 2477 LOG_PSTR (PSTR ("\n")); 2478} 2479 2480void f7_dump (const f7_t *aa) 2481{ 2482 LOG_PFMT_ADDR (PSTR ("\n0x%04x\tflags = "), aa); 2483 dump_byte (aa->flags); 2484 uint8_t a_class = f7_classify (aa); 2485 LOG_PSTR (PSTR (" = ")); 2486 LOG_PSTR (f7_class_sign (a_class) ? PSTR ("-") : PSTR ("+")); 2487 if (f7_class_inf (a_class)) LOG_PSTR (PSTR ("Inf ")); 2488 if (f7_class_nan (a_class)) LOG_PSTR (PSTR ("NaN ")); 2489 if (f7_class_zero (a_class)) LOG_PSTR (PSTR ("0 ")); 2490 if (f7_class_number (a_class)) LOG_PSTR (PSTR ("Number ")); 2491 2492 LOG_PFMT_FLOAT (PSTR (" = %.10g\n"), f7_get_float (aa)); 2493 LOG_PFMT_S16 (PSTR ("\texpo = %d\n"), aa->expo); 2494 2495 f7_dump_mant (aa); 2496} 2497#endif // F7MOD_dump_ 2498 2499#endif // ! libgcc 2500 2501#endif // !AVR_TINY 2502