1/* Copyright (C) 2019-2020 Free Software Foundation, Inc.
2
3   This file is part of LIBF7, which is part of GCC.
4
5   GCC is free software; you can redistribute it and/or modify it under
6   the terms of the GNU General Public License as published by the Free
7   Software Foundation; either version 3, or (at your option) any later
8   version.
9
10   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11   WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13   for more details.
14
15   Under Section 7 of GPL version 3, you are granted additional
16   permissions described in the GCC Runtime Library Exception, version
17   3.1, as published by the Free Software Foundation.
18
19   You should have received a copy of the GNU General Public License and
20   a copy of the GCC Runtime Library Exception along with this program;
21   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22   <http://www.gnu.org/licenses/>.  */
23
24#include "libf7.h"
25
26#ifndef __AVR_TINY__
27
28#define ALIAS(X, Y) \
29  F7_WEAK __attribute__((__alias__(F7_STRINGY(X)))) __typeof__(X) Y;
30
31#define DALIAS(...) // empty
32#define LALIAS(...) // empty
33
34#ifndef IN_LIBGCC2
35
36#include <stdio.h>
37#include <assert.h>
38
39#define in_libgcc false
40
41_Static_assert (sizeof (f7_t) == 10 && F7_MANT_BYTES == 7,
42		"libf7 will only work with 7-byte mantissa.");
43#else
44
45#define in_libgcc true
46
47#if __SIZEOF_DOUBLE__ == 8
48#undef  DALIAS
49#define DALIAS(X,Y) \
50  F7_WEAK __attribute__((__alias__(F7_STRINGY(X)))) __typeof__(X) Y;
51#endif
52
53#if __SIZEOF_LONG_DOUBLE__ == 8
54#undef  LALIAS
55#define LALIAS(X,Y) \
56  F7_WEAK __attribute__((__alias__(F7_STRINGY(X)))) __typeof__(X) Y;
57#endif
58
59#endif // in libgcc
60
61static F7_INLINE
62void f7_assert (bool x)
63{
64  if (!in_libgcc && !x)
65    __builtin_abort();
66}
67
68static F7_INLINE
69int16_t abs_ssat16 (int16_t a)
70{
71  _Sat _Fract sa = __builtin_avr_rbits (a);
72  return __builtin_avr_bitsr (__builtin_avr_absr (sa));
73}
74
75static F7_INLINE
76int16_t add_ssat16 (int16_t a, int16_t b)
77{
78  _Sat _Fract sa = __builtin_avr_rbits (a);
79  _Sat _Fract sb = __builtin_avr_rbits (b);
80  return __builtin_avr_bitsr (sa + sb);
81}
82
83static F7_INLINE
84int16_t sub_ssat16 (int16_t a, int16_t b)
85{
86  _Sat _Fract sa = __builtin_avr_rbits (a);
87  _Sat _Fract sb = __builtin_avr_rbits (b);
88  return __builtin_avr_bitsr (sa - sb);
89}
90
91static F7_INLINE
92int8_t ssat8_range (int16_t a, int8_t range)
93{
94  if (a >= range)
95    return range;
96  if (a <= -range)
97    return -range;
98  return a;
99}
100
101
102#define IN_LIBF7_H
103  #define F7_CONST_DEF(NAME, FLAGS, M6, M5, M4, M3, M2, M1, M0, EXPO) \
104    F7_UNUSED static const uint8_t F7_(const_##NAME##_msb)  = M6;     \
105    F7_UNUSED static const int16_t F7_(const_##NAME##_expo) = EXPO;
106  #include "libf7-const.def"
107  #undef F7_CONST_DEF
108#undef IN_LIBF7_H
109
110
111/*
112  libgcc naming converntions for conversions:
113
114   __float<fmode><fmode>  : Convert float modes.
115   __floatun<imode><fmode>: Convert unsigned integral to float.
116   __fix<fmode><imode>    : Convert float to signed integral.
117   __fixuns<fmode><imode> : Convert float to unsigned integral.
118*/
119
120
121#ifdef F7MOD_floatundidf_
122F7_WEAK
123f7_double_t __floatundidf (uint64_t x)
124{
125  f7_t xx;
126  f7_set_u64 (&xx, x);
127  return f7_get_double (&xx);
128}
129#endif // F7MOD_floatundidf_
130
131
132#ifdef F7MOD_floatdidf_
133F7_WEAK
134f7_double_t __floatdidf (int64_t x)
135{
136  f7_t xx;
137  f7_set_s64 (&xx, x);
138  return f7_get_double (&xx);
139}
140#endif // F7MOD_floatdidf_
141
142
143#ifdef F7MOD_init_
144f7_t* f7_init_impl (uint64_t mant, uint8_t flags, f7_t *cc, int16_t expo)
145{
146  flags &= F7_FLAGS;
147  if (f7_class_number (flags))
148    {
149      uint8_t msb;
150      while ((__builtin_memcpy (&msb, (uint8_t*) &mant + 7, 1), msb))
151	{
152	  mant >>= 1;
153	  expo = add_ssat16 (expo, 1);
154	}
155      *(uint64_t*) cc->mant = mant;
156      cc->expo = add_ssat16 (expo, F7_MANT_BITS-1);
157
158      cc = f7_normalize_asm (cc);
159    }
160
161  cc->flags = flags;
162
163  return cc;
164}
165#endif // F7MOD_init_
166
167
168#ifdef F7MOD_set_s16_
169f7_t* f7_set_s16_impl (f7_t *cc, int16_t i16)
170{
171  uint16_t u16 = (uint16_t) i16;
172  uint8_t flags = 0;
173  if (i16 < 0)
174    {
175      u16 = -u16;
176      flags = F7_FLAG_sign;
177    }
178  f7_set_u16_impl (cc, u16);
179  cc->flags = flags;
180  return cc;
181}
182#endif // F7MOD_set_s16_
183
184
185#ifdef F7MOD_set_u16_
186f7_t* f7_set_u16_impl (f7_t *cc, uint16_t u16)
187{
188  f7_clr (cc);
189  F7_MANT_HI2 (cc) = u16;
190  cc->expo = 15;
191  return f7_normalize_asm (cc);
192}
193#endif // F7MOD_set_u16_
194
195
196#ifdef F7MOD_set_s32_
197f7_t* f7_set_s32 (f7_t *cc, int32_t i32)
198{
199  uint32_t u32 = (uint32_t) i32;
200  uint8_t flags = 0;
201  if (i32 < 0)
202    {
203      u32 = -u32;
204      flags = F7_FLAG_sign;
205    }
206  cc = f7_set_u32 (cc, u32);
207  cc->flags = flags;
208  return cc;
209}
210ALIAS (f7_set_s32, f7_floatsidf)
211#endif // F7MOD_set_s32_
212
213
214#ifdef F7MOD_set_u32_
215f7_t* f7_set_u32 (f7_t *cc, uint32_t u32)
216{
217  f7_clr (cc);
218  F7_MANT_HI4 (cc) = u32;
219  cc->expo = 31;
220  return f7_normalize_asm (cc);
221}
222ALIAS (f7_set_u32, f7_floatunsidf)
223#endif // F7MOD_set_u32_
224
225
226// IEEE 754 single
227// float =  s  bbbbbbbb mmmmmmmmmmmmmmmmmmmmmmm
228//	   31
229// s = sign
230// b = biased exponent, bias = 127
231// m = mantissa
232
233// +0	      =	 0 0 0
234// -0	      =	 1 0 0
235// Inf	      =	 S B 0	=  S * Inf, B = 0xff
236// NaN	      =	 S B M,	   B = 0xff, M != 0
237// Sub-normal =	 S 0 M	=  S * 0.M * 2^{1 - bias}, M != 0
238// Normal     =  S B M  =  S * 1.M * 2^{B - bias}, B = 1 ... 0xfe
239
240#define FLT_DIG_EXP   8
241#define FLT_DIG_MANT  (31 - FLT_DIG_EXP)
242#define FLT_MAX_EXP   ((1 << FLT_DIG_EXP) - 1)
243#define FLT_EXP_BIAS  (FLT_MAX_EXP >> 1)
244
245#ifdef F7MOD_set_float_
246F7_WEAK
247void f7_set_float (f7_t *cc, float f)
248{
249  uint32_t val32;
250
251  _Static_assert (__SIZEOF_FLOAT__ == 4, "");
252  _Static_assert (__FLT_MANT_DIG__ == 24, "");
253  __builtin_memcpy (&val32, &f, __SIZEOF_FLOAT__);
254
255  uint16_t val16 = val32 >> 16;
256  val16 >>= FLT_DIG_MANT - 16;
257
258  uint8_t expo_biased = val16 & FLT_MAX_EXP;
259  bool sign = val16 & (1u << FLT_DIG_EXP);
260
261  f7_clr (cc);
262
263  uint32_t mant = val32 & ((1ul << FLT_DIG_MANT) -1);
264
265  if (mant == 0)
266    {
267      if (expo_biased == 0)
268	return;
269      if (expo_biased >= FLT_MAX_EXP)
270	return f7_set_inf (cc, sign);
271    }
272
273  if (expo_biased == 0)
274    expo_biased = 1;   // Sub-normal: biased expo of 1 was encoded as 0.
275  else if (expo_biased < FLT_MAX_EXP)
276    mant |= (1ul << FLT_DIG_MANT);
277  else
278    return f7_set_nan (cc);
279
280  __builtin_memcpy (& F7_MANT_HI4 (cc), &mant, 4);
281
282  cc->expo = expo_biased - FLT_EXP_BIAS + 31 - FLT_DIG_MANT;
283  f7_normalize_asm (cc);
284  f7_set_sign (cc, sign);
285}
286ALIAS (f7_set_float, f7_extendsfdf2)
287#endif // F7MOD_set_float_
288
289
290#ifdef F7MOD_get_float_
291static F7_INLINE
292float make_float (uint32_t x)
293{
294  float ff;
295  __builtin_memcpy (&ff, &x, 4);
296  return ff;
297
298}
299
300F7_WEAK
301float f7_get_float (const f7_t *aa)
302{
303  uint8_t a_class = f7_classify (aa);
304
305  if (f7_class_nan (a_class))
306    return make_float (0xffc00000 /* NaN: Biased expo = 0xff, mant != 0 */);
307
308  uint32_t mant;
309  __builtin_memcpy (&mant, &F7_MANT_CONST_HI4 (aa), 4);
310
311  uint8_t expo8 = 0;
312  uint8_t mant_offset = FLT_DIG_EXP;
313  int16_t c_expo = add_ssat16 (aa->expo, FLT_EXP_BIAS);
314
315  if (f7_class_zero (a_class) || c_expo <= -FLT_DIG_MANT)
316    {
317      // Zero or tiny.
318      return 0.0f;
319    }
320  else if (c_expo >= FLT_MAX_EXP || f7_class_inf (a_class))
321    {
322      // Inf or overflow.
323      expo8 = FLT_MAX_EXP;
324      mant = 0;
325    }
326  else if (c_expo > 0)
327    {
328      // Normal.
329      expo8 = c_expo;
330    }
331  else
332    {
333      // Sub-normal:  -DIG_MANT < c_expo <= 0.
334      // Encoding of 0 represents a biased exponent of 1.
335      // mant_offset in 9...31.
336      expo8 = 0;
337      mant_offset += 1 - c_expo;
338    }
339
340  uint16_t expo16 = expo8 << (FLT_DIG_MANT - 16);
341
342  if (f7_class_sign (a_class))
343    expo16 |= 1u << (FLT_DIG_EXP + FLT_DIG_MANT - 16);
344
345  mant >>= mant_offset;
346
347  __asm ("cbr %T0%t2, 1 << (7 & %2)"  "\n\t"
348	 "or  %C0, %A1"		      "\n\t"
349	 "or  %D0, %B1"
350	 : "+d" (mant)
351	 : "r" (expo16), "n" (FLT_DIG_MANT));
352
353  return make_float (mant);
354}
355F7_PURE ALIAS (f7_get_float, f7_truncdfsf2)
356#endif // F7MOD_get_float_
357
358#define DBL_DIG_EXP   11
359#define DBL_DIG_MANT  (63 - DBL_DIG_EXP)
360#define DBL_MAX_EXP   ((1 << DBL_DIG_EXP) - 1)
361#define DBL_EXP_BIAS  (DBL_MAX_EXP >> 1)
362
363
364#ifdef F7MOD_set_double_
365void f7_set_double_impl (f7_double_t val64, f7_t *cc)
366{
367  f7_clr (cc);
368  register uint64_t mant __asm ("r18") = val64 & ((1ull << DBL_DIG_MANT) -1);
369
370  uint16_t val16 = 3[(uint16_t*) & val64];
371  val16 >>= DBL_DIG_MANT - 48;
372
373  uint16_t expo_biased = val16 & DBL_MAX_EXP;
374  bool sign = val16 & (1u << DBL_DIG_EXP);
375
376  if (mant == 0)
377    {
378      if (expo_biased == 0)
379	return;
380      if (expo_biased >= DBL_MAX_EXP)
381	return f7_set_inf (cc, sign);
382    }
383  __asm ("" : "+r" (mant));
384
385  if (expo_biased == 0)
386    expo_biased = 1;   // Sub-normal: biased expo of 1 was encoded as 0.
387  else if (expo_biased < DBL_MAX_EXP)
388    mant |= (1ull << DBL_DIG_MANT);
389  else
390    return f7_set_nan (cc);
391
392  *(uint64_t*) & cc->mant = mant;
393
394  cc->expo = expo_biased - DBL_EXP_BIAS + 63 - DBL_DIG_MANT - 8;
395  f7_normalize_asm (cc);
396  f7_set_sign (cc, sign);
397}
398#endif // F7MOD_set_double_
399
400
401#ifdef F7MOD_set_pdouble_
402void f7_set_pdouble (f7_t *cc, const f7_double_t *val64)
403{
404  f7_set_double (cc, *val64);
405}
406#endif // F7MOD_set_pdouble_
407
408
409#ifdef F7MOD_get_double_
410static F7_INLINE
411uint64_t clr_r18 (void)
412{
413  extern void __clr_8 (void);
414  register uint64_t r18 __asm ("r18");
415  __asm ("%~call %x[f]" : "=r" (r18) : [f] "i" (__clr_8));
416  return r18;
417}
418
419static F7_INLINE
420f7_double_t make_double (uint64_t x)
421{
422  register f7_double_t r18 __asm ("r18") = x;
423  __asm ("" : "+r" (r18));
424  return r18;
425}
426
427F7_WEAK
428f7_double_t f7_get_double (const f7_t *aa)
429{
430  uint8_t a_class = f7_classify (aa);
431
432  if (f7_class_nan (a_class))
433    {
434      uint64_t nan = clr_r18() | (0x7fffull << 48);
435      return make_double (nan);
436    }
437
438  uint64_t mant;
439  __builtin_memcpy (&mant, & aa->mant, 8);
440
441  mant &= 0x00ffffffffffffff;
442
443  // FIXME: For subnormals, rounding is premature and should be
444  //	    done *after* the mantissa has been shifted into place
445  //	    (or the round value be shifted left accordingly).
446  // Round.
447  mant += 1u << (F7_MANT_BITS - (1 + DBL_DIG_MANT) - 1);
448
449  uint8_t dex;
450  register uint64_t r18 __asm ("r18") = mant;
451  // dex = Overflow ? 1 : 0.
452  __asm ("bst %T[mant]%T[bitno]"  "\n\t"
453	 "clr %0"		  "\n\t"
454	 "bld %0,0"
455	 : "=r" (dex), [mant] "+r" (r18)
456	 : [bitno] "n" (64 - 8));
457
458  mant = r18 >> dex;
459
460  uint16_t expo16 = 0;
461  uint16_t mant_offset = DBL_DIG_EXP - 8;
462  int16_t c_expo = add_ssat16 (aa->expo, DBL_EXP_BIAS + dex);
463
464  if (f7_class_zero (a_class) || c_expo <= -DBL_DIG_MANT)
465    {
466      // Zero or tiny.
467      return make_double (clr_r18());
468    }
469  else if (c_expo >= DBL_MAX_EXP || f7_class_inf (a_class))
470    {
471      // Inf or overflow.
472      expo16 = DBL_MAX_EXP;
473      mant = clr_r18();
474    }
475  else if (c_expo > 0)
476    {
477      // Normal.
478      expo16 = c_expo;
479    }
480  else
481    {
482      // Sub-normal:  -DIG_MANT < c_expo <= 0.
483      // Encoding expo of 0 represents a biased exponent of 1.
484      // mant_offset in 5...55 = 63-8.
485      mant_offset += 1 - c_expo;
486    }
487
488  expo16 <<= (DBL_DIG_MANT - 48);
489
490  if (f7_class_sign (a_class))
491    expo16 |= 1u << (DBL_DIG_EXP + DBL_DIG_MANT - 48);
492
493  // mant >>= mant_offset;
494  mant = f7_lshrdi3 (mant, mant_offset);
495
496  r18 = mant;
497  __asm ("cbr %T0%t2, 1 << (7 & %2)"  "\n\t"
498	 "or  %r0+6, %A1"	      "\n\t"
499	 "or  %r0+7, %B1"
500	 : "+r" (r18)
501	 : "r" (expo16), "n" (DBL_DIG_MANT));
502
503  return make_double (r18);
504}
505#endif // F7MOD_get_double_
506
507
508#ifdef F7MOD_fabs_
509F7_WEAK
510void f7_fabs (f7_t *cc, const f7_t *aa)
511{
512  f7_abs (cc, aa);
513}
514#endif // F7MOD_fabs_
515
516
517#ifdef F7MOD_neg_
518F7_WEAK
519f7_t* f7_neg (f7_t *cc, const f7_t *aa)
520{
521  f7_copy (cc, aa);
522
523  uint8_t c_class = f7_classify (cc);
524
525  if (! f7_class_zero (c_class))
526    cc->sign = ! f7_class_sign (c_class);
527
528  return cc;
529}
530#endif // F7MOD_neg_
531
532
533#ifdef F7MOD_frexp_
534F7_WEAK
535void f7_frexp (f7_t *cc, const f7_t *aa, int *expo)
536{
537  uint8_t a_class = f7_classify (aa);
538
539  if (f7_class_nan (a_class))
540    return f7_set_nan (cc);
541
542  if (f7_class_inf (a_class) || aa->expo == INT16_MAX)
543    return f7_set_inf (cc, f7_class_sign (a_class));
544
545  if (! f7_msbit (aa))
546    {
547      *expo = 0;
548      return f7_clr (cc);
549    }
550
551  *expo = 1 + aa->expo;
552  cc->flags = a_class & F7_FLAG_sign;
553  cc->expo = -1;
554  f7_copy_mant (cc, aa);
555}
556#endif // F7MOD_frexp_
557
558#ifdef F7MOD_get_s16_
559F7_WEAK
560int16_t f7_get_s16 (const f7_t *aa)
561{
562  extern int16_t to_s16 (const f7_t*, uint8_t) F7ASM(f7_to_integer_asm);
563  return to_s16 (aa, 0xf);
564}
565#endif // F7MOD_get_s16_
566
567
568#ifdef F7MOD_get_s32_
569F7_WEAK
570int32_t f7_get_s32 (const f7_t *aa)
571{
572  extern int32_t to_s32 (const f7_t*, uint8_t) F7ASM(f7_to_integer_asm);
573  return to_s32 (aa, 0x1f);
574}
575F7_PURE ALIAS (f7_get_s32, f7_fixdfsi)
576#endif // F7MOD_get_s32_
577
578
579#ifdef F7MOD_get_s64_
580  F7_WEAK
581  int64_t f7_get_s64 (const f7_t *aa)
582{
583  extern int64_t to_s64 (const f7_t*, uint8_t) F7ASM(f7_to_integer_asm);
584  return to_s64 (aa, 0x3f);
585}
586F7_PURE ALIAS (f7_get_s64, f7_fixdfdi)
587#endif // F7MOD_get_s64_
588
589#ifdef F7MOD_get_u16_
590  F7_WEAK
591  uint16_t f7_get_u16 (const f7_t *aa)
592{
593  extern uint16_t to_u16 (const f7_t*, uint8_t) F7ASM(f7_to_unsigned_asm);
594  return to_u16 (aa, 0xf);
595}
596#endif // F7MOD_get_u16_
597
598
599#ifdef F7MOD_get_u32_
600F7_WEAK
601uint32_t f7_get_u32 (const f7_t *aa)
602{
603  extern uint32_t to_u32 (const f7_t*, uint8_t) F7ASM(f7_to_unsigned_asm);
604  return to_u32 (aa, 0x1f);
605}
606F7_PURE ALIAS (f7_get_u32, f7_fixunsdfsi)
607#endif // F7MOD_get_u32_
608
609
610#ifdef F7MOD_get_u64_
611F7_WEAK
612uint64_t f7_get_u64 (const f7_t *aa)
613{
614  extern int64_t to_u64 (const f7_t*, uint8_t) F7ASM(f7_to_unsigned_asm);
615  return to_u64 (aa, 0x3f);
616}
617F7_PURE ALIAS (f7_get_u64, f7_fixunsdfdi)
618#endif // F7MOD_get_u64_
619
620
621#ifdef F7MOD_cmp_unordered_
622F7_NOINLINE
623static int8_t cmp_u8 (uint8_t a_class, uint8_t b_class, bool sign_a);
624
625F7_WEAK
626int8_t f7_cmp_unordered (const f7_t *aa, const f7_t *bb, bool with_sign)
627{
628  uint8_t a_class = f7_classify (aa);
629  uint8_t b_class = f7_classify (bb);
630
631  uint8_t a_sign = f7_class_sign (a_class) & with_sign;
632  uint8_t b_sign = f7_class_sign (b_class) & with_sign;
633  uint8_t ab_class = a_class | b_class;
634  ab_class &= with_sign - 2;
635
636  if (f7_class_nan (ab_class))
637    return INT8_MIN;
638
639  if (a_sign != b_sign)
640    return b_sign - a_sign;
641
642  if (f7_class_inf (ab_class))
643    return cmp_u8 (a_class, b_class, a_sign);
644
645  if (f7_class_zero (ab_class))
646    return cmp_u8 (b_class, a_class, a_sign);
647
648  if (aa->expo < bb->expo)
649    return a_sign ? 1 : -1;
650
651  if (aa->expo > bb->expo)
652    return a_sign ? -1 : 1;
653
654  return cmp_u8 (1 + f7_cmp_mant (aa, bb), 1, a_sign);
655}
656
657
658int8_t cmp_u8 (uint8_t a_class, uint8_t b_class, bool sign_a)
659{
660  int8_t c;
661  __asm ("sub  %[a], %[b]"    "\n\t"
662	 "breq 1f"	      "\n\t"
663	 "sbc  %[c], %[c]"    "\n\t"
664	 "sbci %[c], -1"      "\n\t"
665	 "sbrc %[s], 0"	      "\n\t"
666	 "neg  %[c]"	      "\n\t"
667	 "1:"
668	 : [c] "=d" (c)
669	 : [a] "0" (a_class), [b] "r" (b_class), [s] "r" (sign_a));
670  return c;
671}
672#endif // F7MOD_cmp_unordered_
673
674
675#ifdef F7MOD_cmp_abs_
676F7_WEAK
677int8_t f7_cmp_abs (const f7_t *aa, const f7_t *bb)
678{
679  return f7_cmp_unordered (aa, bb, false /* no signs */);
680}
681#endif // F7MOD_cmp_abs_
682
683
684#ifdef F7MOD_cmp_
685F7_WEAK
686int8_t f7_cmp (const f7_t *aa, const f7_t *bb)
687{
688  return f7_cmp_unordered (aa, bb, true /* with signs */);
689}
690#endif // F7MOD_cmp_
691
692
693#ifdef F7MOD_abscmp_msb_ge_
694// Compare absolute value of Number aa against a f7_t represented
695// by msb and expo.
696F7_WEAK
697bool f7_abscmp_msb_ge (const f7_t *aa, uint8_t msb, int16_t expo)
698{
699  uint8_t a_msb = aa->mant[F7_MANT_BYTES - 1];
700
701  if (0 == (0x80 & a_msb))
702    // 0 or subnormal.
703    return false;
704
705  return aa->expo == expo
706    ? a_msb >= msb
707    : aa->expo > expo;
708}
709#endif // F7MOD_abscmp_msb_ge_
710
711#ifdef F7MOD_lt_
712F7_WEAK
713bool f7_lt_impl (const f7_t *aa, const f7_t *bb)
714{
715  return f7_lt (aa, bb);
716}
717#endif // F7MOD_lt_
718
719#ifdef F7MOD_le_
720F7_WEAK
721bool f7_le_impl (const f7_t *aa, const f7_t *bb)
722{
723  return f7_le (aa, bb);
724}
725#endif // F7MOD_le_
726
727#ifdef F7MOD_gt_
728F7_WEAK
729bool f7_gt_impl (const f7_t *aa, const f7_t *bb)
730{
731  return f7_gt (aa, bb);
732}
733#endif // F7MOD_gt_
734
735#ifdef F7MOD_ge_
736F7_WEAK
737bool f7_ge_impl (const f7_t *aa, const f7_t *bb)
738{
739  return f7_ge (aa, bb);
740}
741#endif // F7MOD_ge_
742
743#ifdef F7MOD_ne_
744F7_WEAK
745bool f7_ne_impl (const f7_t *aa, const f7_t *bb)
746{
747  return f7_ne (aa, bb);
748}
749#endif // F7MOD_ne_
750
751#ifdef F7MOD_eq_
752F7_WEAK
753bool f7_eq_impl (const f7_t *aa, const f7_t *bb)
754{
755  return f7_eq (aa, bb);
756}
757#endif // F7MOD_eq_
758
759
760#ifdef F7MOD_unord_
761F7_WEAK
762bool f7_unord_impl (const f7_t *aa, const f7_t *bb)
763{
764  return f7_unordered (aa, bb);
765}
766#endif // F7MOD_unord_
767
768
769#ifdef F7MOD_minmax_
770F7_WEAK
771f7_t* f7_minmax (f7_t *cc, const f7_t *aa, const f7_t *bb, bool do_min)
772{
773  int8_t cmp = f7_cmp_unordered (aa, bb, true /* with signs */);
774
775  if (cmp == INT8_MIN)
776    return (f7_set_nan (cc), cc);
777
778  if (do_min)
779    cmp = -cmp;
780
781  return f7_copy (cc, cmp >= 0 ? aa : bb);
782}
783#endif // F7MOD_minmax_
784
785
786#ifdef F7MOD_fmax_
787F7_WEAK
788f7_t* f7_fmax (f7_t *cc, const f7_t *aa, const f7_t *bb)
789{
790  return f7_minmax (cc, aa, bb, false);
791}
792ALIAS (f7_fmax, f7_max)
793#endif // F7MOD_fmax_
794
795
796#ifdef F7MOD_fmin_
797F7_WEAK
798f7_t* f7_fmin (f7_t *cc, const f7_t *aa, const f7_t *bb)
799{
800  return f7_minmax (cc, aa, bb, true);
801}
802ALIAS (f7_fmin, f7_min)
803#endif // F7MOD_fmin_
804
805
806#ifdef F7MOD_mulx_
807F7_WEAK
808uint8_t f7_mulx (f7_t *cc, const f7_t *aa, const f7_t *bb, bool no_rounding)
809{
810  uint8_t a_class = f7_classify (aa);
811  uint8_t b_class = f7_classify (bb);
812  // From this point on, no more access aa->flags or bb->flags
813  // to avoid early-clobber when writing cc->flags.
814
815  uint8_t ab_class = a_class | b_class;
816  // If either value is NaN, return NaN.
817  if (f7_class_nan (ab_class)
818      // Any combination of Inf and 0.
819      || (f7_class_zero (ab_class) && f7_class_inf (ab_class)))
820    {
821      cc->flags = F7_FLAG_nan;
822      return 0;
823    }
824  // If either value is 0.0, return 0.0.
825  if (f7_class_zero (ab_class))
826    {
827      f7_clr (cc);
828      return 0;
829    }
830  // We have 2 non-zero numbers-or-INF.
831
832  uint8_t c_sign = (a_class ^ b_class) & F7_FLAG_sign;
833  uint8_t c_inf  = ab_class & F7_FLAG_inf;
834  cc->flags = c_sign | c_inf;
835  if (c_inf)
836    return 0;
837
838  int16_t expo = add_ssat16 (aa->expo, bb->expo);
839  // Store expo and handle expo = INT16_MIN  and INT16_MAX.
840  if (f7_store_expo (cc, expo))
841    return 0;
842
843  return f7_mul_mant_asm (cc, aa, bb, no_rounding);
844}
845#endif // F7MOD_mulx_
846
847
848#ifdef F7MOD_square_
849F7_WEAK
850void f7_square (f7_t *cc, const f7_t *aa)
851{
852  f7_mul (cc, aa, aa);
853}
854#endif // F7MOD_square_
855
856
857#ifdef F7MOD_mul_
858F7_WEAK
859void f7_mul (f7_t *cc, const f7_t *aa, const f7_t *bb)
860{
861  f7_mulx (cc, aa, bb, false);
862}
863#endif // F7MOD_mul_
864
865
866#ifdef F7MOD_Iadd_
867F7_WEAK void f7_Iadd (f7_t *cc, const f7_t *aa) { f7_add (cc, cc, aa); }
868#endif
869
870#ifdef F7MOD_Isub_
871F7_WEAK void f7_Isub (f7_t *cc, const f7_t *aa) { f7_sub (cc, cc, aa); }
872#endif
873
874#ifdef F7MOD_Imul_
875F7_WEAK void f7_Imul (f7_t *cc, const f7_t *aa) { f7_mul (cc, cc, aa); }
876#endif
877
878#ifdef F7MOD_Idiv_
879F7_WEAK void f7_Idiv (f7_t *cc, const f7_t *aa) { f7_div (cc, cc, aa); }
880#endif
881
882#ifdef F7MOD_IRsub_
883F7_WEAK void f7_IRsub (f7_t *cc, const f7_t *aa) { f7_sub (cc, aa, cc); }
884#endif
885
886#ifdef F7MOD_Ineg_
887F7_WEAK void f7_Ineg (f7_t *cc) { f7_neg (cc, cc); }
888#endif
889
890#ifdef F7MOD_Isqrt_
891F7_WEAK void f7_Isqrt (f7_t *cc) { f7_sqrt (cc, cc); }
892#endif
893
894#ifdef F7MOD_Isquare_
895F7_WEAK void f7_Isquare (f7_t *cc) { f7_square (cc, cc); }
896#endif
897
898#ifdef F7MOD_Ildexp_
899F7_WEAK f7_t* f7_Ildexp (f7_t *cc, int ex) { return f7_ldexp (cc, cc, ex); }
900#endif
901
902
903#ifdef F7MOD_add_
904F7_WEAK
905void f7_add (f7_t *cc, const f7_t *aa, const f7_t *bb)
906{
907  f7_addsub (cc, aa, bb, false);
908}
909#endif // F7MOD_add_
910
911
912#ifdef F7MOD_sub_
913F7_WEAK
914void f7_sub (f7_t *cc, const f7_t *aa, const f7_t *bb)
915{
916  f7_addsub (cc, aa, bb, true);
917}
918#endif // F7MOD_sub_
919
920
921#ifdef F7MOD_addsub_
922static void return_with_sign (f7_t *cc, const f7_t *aa, int8_t c_sign)
923{
924  __asm (";;; return with sign");
925  f7_copy (cc, aa);
926  if (c_sign != -1)
927    f7_set_sign (cc, c_sign);
928}
929
930F7_WEAK
931void f7_addsub (f7_t *cc, const f7_t *aa, const f7_t *bb, bool neg_b)
932{
933  uint8_t a_class = f7_classify (aa);
934  uint8_t b_class = f7_classify (bb);
935  // From this point on, no more access aa->flags or bb->flags
936  // to avoid early-clobber when writing cc->flags.
937
938  // Hande NaNs.
939  if (f7_class_nan (a_class | b_class))
940    return f7_set_nan (cc);
941
942  bool a_sign = f7_class_sign (a_class);
943  bool b_sign = f7_class_sign (b_class) ^ neg_b;
944
945  // Add the mantissae?
946  bool do_add = a_sign == b_sign;
947
948  // Handle +Infs and -Infs.
949  bool a_inf = f7_class_inf (a_class);
950  bool b_inf = f7_class_inf (b_class);
951
952  if (a_inf && b_inf)
953    {
954      if (do_add)
955	return f7_set_inf (cc, a_sign);
956      else
957	return f7_set_nan (cc);
958    }
959  else if (a_inf)
960    return f7_set_inf (cc, a_sign);
961  else if (b_inf)
962    return f7_set_inf (cc, b_sign);
963
964  int16_t shift16 = sub_ssat16 (aa->expo, bb->expo);
965
966  // aa + 0 = aa.
967  // Also check MSBit to get rid of Subnormals and 0.
968  if (shift16 > F7_MANT_BITS || f7_is0 (bb))
969    return return_with_sign (cc, aa, -1);
970
971  // 0 + bb = bb.
972  // 0 - bb = -bb.
973  // Also check MSBit to get rid of Subnormals and 0.
974  if (shift16 < -F7_MANT_BITS || f7_is0 (aa))
975    return return_with_sign (cc, bb, b_sign);
976
977  // Now aa and bb are non-zero, non-NaN, non-Inf.
978  // shift > 0 ==> |a| > |b|
979  // shift < 0 ==> |a| < |b|
980  int8_t shift = (int8_t) shift16;
981  bool c_sign = a_sign;
982  if (shift < 0
983      || (shift == 0 && f7_cmp_mant (aa, bb) < 0))
984    {
985      const f7_t *p = aa; aa = bb; bb = p;
986      c_sign = b_sign;
987      shift = -shift;
988    }
989  uint8_t shift2 = (uint8_t) (shift << 1);
990
991  cc->expo = aa->expo;
992  // From this point on, no more access aa->expo or bb->expo
993  // to avoid early-clobber when writing cc->expo.
994
995  cc->flags = c_sign;  _Static_assert (F7_FLAGNO_sign == 0, "");
996
997  // This function uses neither .expo nor .flags from either aa or bb,
998  // hence there is early-clobber for cc->expo and cc->flags.
999  f7_addsub_mant_scaled_asm (cc, aa, bb, shift2 | do_add);
1000}
1001#endif // F7MOD_addsub_
1002
1003
1004#ifdef F7MOD_madd_msub_
1005F7_WEAK
1006void f7_madd_msub (f7_t *cc, const f7_t *aa, const f7_t *bb, const f7_t *dd,
1007                   bool neg_d)
1008{
1009  f7_t xx7, *xx = &xx7;
1010  uint8_t x_lsb = f7_mulx (xx, aa, bb, true /* no rounding */);
1011  uint8_t x_sign = f7_signbit (xx);
1012  int16_t x_expo = xx->expo;
1013  f7_addsub (xx, xx, dd, neg_d);
1014  // Now add LSB.  If cancellation occured in the add / sub, then we have the
1015  // chance of extra 8 bits of precision.  Turn LSByte into f7_t.
1016  f7_clr (cc);
1017  cc->expo = sub_ssat16 (x_expo, F7_MANT_BITS);
1018  cc->mant[F7_MANT_BYTES - 1] = x_lsb;
1019  cc = f7_normalize_asm (cc);
1020  cc->flags = x_sign;
1021  f7_Iadd (cc, xx);
1022}
1023#endif // F7MOD_madd_msub_
1024
1025#ifdef F7MOD_madd_
1026F7_WEAK
1027void f7_madd (f7_t *cc, const f7_t *aa, const f7_t *bb, const f7_t *dd)
1028{
1029  f7_madd_msub (cc, aa, bb, dd, false);
1030}
1031#endif // F7MOD_madd_
1032
1033#ifdef F7MOD_msub_
1034F7_WEAK
1035void f7_msub (f7_t *cc, const f7_t *aa, const f7_t *bb, const f7_t *dd)
1036{
1037  f7_madd_msub (cc, aa, bb, dd, true);
1038}
1039#endif // F7MOD_msub_
1040
1041
1042#ifdef F7MOD_ldexp_
1043F7_WEAK
1044f7_t* f7_ldexp (f7_t *cc, const f7_t *aa, int delta)
1045{
1046  uint8_t a_class = f7_classify (aa);
1047
1048  cc->flags = a_class;
1049
1050  // Inf and NaN.
1051  if (! f7_class_number (a_class))
1052    return cc;
1053
1054  if (f7_msbit (aa) == 0)
1055    return (f7_clr (cc), cc);
1056
1057  int16_t expo = add_ssat16 (delta, aa->expo);
1058  // Store expo and handle expo = INT16_MIN  and INT16_MAX.
1059  if (! f7_store_expo (cc, expo))
1060    f7_copy_mant (cc, aa);
1061
1062  return cc;
1063}
1064#endif // F7MOD_ldexp_
1065
1066
1067#if USE_LPM
1068#elif USE_LD
1069#else
1070#error need include "asm-defs.h"
1071#endif // USE_LPM
1072
1073/*
1074  Handling constants:
1075
1076  F7_PCONST (PVAR, X)
1077
1078      Set  f7_t [const] *PVAR  to an LD address for one
1079      of the  f7_const_X[_P]  constants.
1080      PVAR might be set to point to a local auto that serves
1081      as temporary storage for f7_const_X_P.  PVAR is only
1082      valid in the current block.
1083
1084  const f7_t* F7_PCONST_U16 (PVAR, <ident> X)       // USE_LD
1085  f7_t*       F7_PCONST_U16 (PVAR, uint16_t X)      // USE_LPM
1086
1087      Set  f7_t [const] *PVAR  to an LD address for one of the
1088      f7_const_X[_P]  constants.  PVAR might be set to point to a
1089      local auto that serves as temporary storage for X.  PVAR is
1090      only valid in the current block.
1091
1092  F7_PCONST_VAR (PVAR, VAR)
1093
1094      VAR is a pointer variable holding the address of some f7_const_X[_P]
1095      constant.  Set  [const] f7_t *PVAR  to a respective LD address.
1096      PVAR might be set to point to a local auto that serves
1097      as temporary storage for f7_const_X_P.  PVAR is only
1098      valid in the current block.
1099
1100  F7_CONST_ADDR (<ident> CST, f7_t* PTMP)
1101
1102      Return an LD address to for some f7_const_X[_P] constant.
1103      *PTMP might be needed to hold a copy of f7_const_X_P in RAM.
1104
1105  f7_t*       F7_U16_ADDR (uint16_t     X, f7_t* PTMP)   // USE_LPM
1106  const f7_t* F7_U16_ADDR (<cst-ident>  X, <unused>)     // USE_LD
1107
1108      Return an LD address to compile-time constant  uint16_t X  which is
1109      also known as  f7_const_X[_P].  *PTMP might be set to  (f7_t) X.
1110
1111  f7_t* f7_const (f7_t *PVAR, <cst-ident> X)
1112
1113      Copy  f7_const_X[_P]  to *PVAR.
1114
1115  f7_t* f7_copy_flash (f7_t *DST, const f7_t *SRC)
1116
1117      Copy to *DST with LD (from .rodata in flash) if the address
1118      space is linear, or with  LPM (from .progmem.data) if the
1119      address space is not linear.
1120
1121  f7_t* f7_copy (f7_t *DST, const f7_t* SRC)
1122
1123      Copy to RAM using LD.
1124
1125  f7_t* f7_copy_P (f7_t *DST, const f7_t *SRC)
1126
1127      Copy to RAM using LPM.
1128*/
1129
1130#if USE_LPM
1131  #define F7_RAW_CONST_ADDR(CST) \
1132      & F7_(const_##CST##_P)
1133
1134  #define F7_PCONST(PVAR, CST)				    \
1135      f7_t _var_for_##CST;				    \
1136      f7_copy_P (& _var_for_##CST, & F7_(const_##CST##_P)); \
1137      PVAR = & _var_for_##CST
1138
1139  #define F7_PCONST_U16(PVAR, CST)		\
1140      f7_t _var_for_##CST;			\
1141      PVAR = f7_set_u16 (& _var_for_##CST, CST)
1142
1143  #define F7_PCONST_VAR(PVAR, VAR)		\
1144      f7_t _var_for_##VAR;			\
1145      f7_copy_P (& _var_for_##VAR, VAR);	\
1146      PVAR = & _var_for_##VAR
1147
1148  #define MAYBE_const // empty
1149
1150  #define F7_CONST_ADDR(CST, PTMP) \
1151      f7_copy_P ((PTMP), & F7_(const_##CST##_P))
1152
1153  #define F7_U16_ADDR(CST, PTMP)   \
1154      f7_set_u16 ((PTMP), CST)
1155
1156#elif USE_LD
1157  #define F7_RAW_CONST_ADDR(CST)   \
1158      & F7_(const_##CST)
1159
1160  #define F7_PCONST(PVAR, CST)	   \
1161      PVAR = & F7_(const_##CST)
1162
1163  #define F7_PCONST_U16(PVAR, CST) \
1164      PVAR = & F7_(const_##CST)
1165
1166  #define F7_PCONST_VAR(PVAR, VAR) \
1167      PVAR = (VAR)
1168
1169  #define F7_CONST_ADDR(CST, PTMP) \
1170      (& F7_(const_##CST))
1171
1172  #define F7_U16_ADDR(CST, PTMP)   \
1173      (& F7_(const_##CST))
1174
1175  #define MAYBE_const const
1176#endif
1177
1178
1179
1180#define DD(str, X)		\
1181  do {				\
1182    LOG_PSTR (PSTR (str));	\
1183    f7_dump (X);		\
1184  } while (0)
1185
1186#undef DD
1187#define DD(...) (void) 0
1188
1189
1190#ifdef F7MOD_sqrt_
1191static void sqrt_worker (f7_t *cc, const f7_t *rr)
1192{
1193  f7_t tmp7, *tmp = &tmp7;
1194  f7_t aa7, *aa = &aa7;
1195
1196  // aa in  [1/2, 2)  =>  aa->expo in { -1, 0 }.
1197  int16_t a_expo = -(rr->expo & 1);
1198  int16_t c_expo = (rr->expo - a_expo) >> 1;  // FIXME: r_expo = INT_MAX???
1199
1200  __asm ("" : "+r" (aa));
1201
1202  f7_copy (aa, rr);
1203  aa->expo = a_expo;
1204
1205  // No use of rr or *cc past this point:  We may use cc as temporary.
1206  // Approximate square-root of  A  by  X <-- (X + A / X) / 2.
1207
1208  f7_sqrt_approx_asm (cc, aa);
1209
1210  // Iterate  X <-- (X + A / X) / 2.
1211  // 3 Iterations with 16, 32, 58 bits of precision for the quotient.
1212
1213  for (uint8_t prec = 16; (prec & 0x80) == 0; prec <<= 1)
1214    {
1215      f7_divx (tmp, aa, cc, (prec & 64) ? 2 + F7_MANT_BITS : prec);
1216      f7_Iadd (cc, tmp);
1217      // This will never underflow because |c_expo| is small.
1218      cc->expo--;
1219    }
1220
1221  // Similar: |c_expo| is small, hence no ldexp needed.
1222  cc->expo += c_expo;
1223}
1224
1225F7_WEAK
1226void f7_sqrt (f7_t *cc, const f7_t *aa)
1227{
1228  uint8_t a_class = f7_classify (aa);
1229
1230  if (f7_class_nan (a_class) || f7_class_sign (a_class))
1231    return f7_set_nan (cc);
1232
1233  if (f7_class_inf (a_class))
1234    return f7_set_inf (cc, 0);
1235
1236  if (f7_class_zero (a_class))
1237    return f7_clr (cc);
1238
1239  sqrt_worker (cc, aa);
1240}
1241#endif // F7MOD_sqrt_
1242
1243
1244#ifdef F7MOD_hypot_
1245F7_WEAK
1246void f7_hypot (f7_t *cc, const f7_t *aa, const f7_t *bb)
1247{
1248  f7_t xx7, *xx = &xx7;
1249
1250  f7_square (xx, aa);
1251  f7_square (cc, bb);
1252  f7_Iadd (cc, xx);
1253  f7_Isqrt (cc);
1254}
1255#endif // F7MOD_hypot_
1256
1257
1258#ifdef F7MOD_const_m1_
1259#include "libf7-constdef.h"
1260#endif // -1
1261
1262#ifdef F7MOD_const_1_2_
1263#include "libf7-constdef.h"
1264#endif // 1/2
1265
1266#ifdef F7MOD_const_1_3_
1267#include "libf7-constdef.h"
1268#endif // 1/3
1269
1270#ifdef F7MOD_const_ln2_
1271#include "libf7-constdef.h"
1272#endif // ln2
1273
1274#ifdef F7MOD_const_1_ln2_
1275#include "libf7-constdef.h"
1276#endif // 1_ln2
1277
1278#ifdef F7MOD_const_ln10_
1279#include "libf7-constdef.h"
1280#endif // ln10
1281
1282#ifdef F7MOD_const_1_ln10_
1283#include "libf7-constdef.h"
1284#endif // 1_ln10
1285
1286#ifdef F7MOD_const_1_
1287#include "libf7-constdef.h"
1288#endif // 1
1289
1290#ifdef F7MOD_const_sqrt2_
1291#include "libf7-constdef.h"
1292#endif // sqrt2
1293
1294#ifdef F7MOD_const_2_
1295#include "libf7-constdef.h"
1296#endif // 2
1297
1298#ifdef F7MOD_const_pi_
1299#include "libf7-constdef.h"
1300#endif // pi
1301
1302
1303#ifdef F7MOD_divx_
1304
1305// C /= A
1306extern void f7_div_asm (f7_t*, const f7_t*, uint8_t);
1307
1308F7_WEAK
1309void f7_divx (f7_t *cc, const f7_t *aa, const f7_t *bb, uint8_t quot_bits)
1310{
1311  uint8_t a_class = f7_classify (aa);
1312  uint8_t b_class = f7_classify (bb);
1313  // From this point on, no more access aa->flags or bb->flags
1314  // to avoid early-clobber when writing cc->flags.
1315
1316  // If either value is NaN, return NaN.
1317  if (f7_class_nan (a_class | b_class)
1318      // If both values are Inf or both are 0, return NaN.
1319      || f7_class_zero (a_class & b_class)
1320      || f7_class_inf (a_class & b_class)
1321      // Inf / 0 = NaN.
1322      || (f7_class_inf (a_class) && f7_class_zero (b_class)))
1323    {
1324      return f7_set_nan (cc);
1325    }
1326
1327  // 0 / B   = 0  for non-zero, non-NaN B.
1328  // A / Inf = 0  for non-zero numbers A.
1329  if (f7_class_zero (a_class) || f7_class_inf (b_class))
1330    return f7_clr (cc);
1331
1332  uint8_t c_sign = (a_class ^ b_class) & F7_FLAG_sign;
1333
1334  if (f7_class_inf (a_class) || f7_class_zero (b_class))
1335    return f7_set_inf (cc, c_sign);
1336
1337  cc->flags = c_sign;     _Static_assert (F7_FLAGNO_sign == 0, "");
1338  int16_t expo = sub_ssat16 (aa->expo, bb->expo);
1339
1340  // Store expo and handle expo = INT16_MIN  and INT16_MAX.
1341  if (f7_store_expo (cc, expo))
1342    return;
1343
1344  f7_t ss7, *ss = &ss7;
1345  ss->flags = cc->flags;
1346  ss->expo  = cc->expo;
1347
1348  f7_copy_mant (ss, aa);
1349  f7_div_asm (ss, bb, quot_bits);
1350  f7_copy (cc, ss);
1351}
1352#endif // F7MOD_divx_
1353
1354
1355#ifdef F7MOD_div_
1356F7_WEAK
1357void f7_div (f7_t *cc, const f7_t *aa, const f7_t *bb)
1358{
1359  /* When f7_divx calls f7_div_asm, dividend and divisor are valid
1360     mantissae, i.e. their MSBit is set.  Therefore, the quotient will
1361     be in  [0x0.ff..., 0x0.40...]  and to adjust it, at most 1 left-shift
1362     is needed.  Compute F7_MANT_BITS + 2 bits of the quotient:
1363     One bit is used for rounding, and one bit might be consumed by the
1364     mentioned left-shift.  */
1365
1366  f7_divx (cc, aa, bb, 2 + F7_MANT_BITS);
1367}
1368#endif // F7MOD_div_
1369
1370
1371#ifdef F7MOD_div1_
1372F7_WEAK
1373void f7_div1 (f7_t *cc, const f7_t *aa)
1374{
1375  F7_PCONST_U16 (const f7_t *one, 1);
1376  f7_div (cc, one, aa);
1377}
1378#endif // F7MOD_div_
1379
1380
1381#ifdef F7MOD_fmod_
1382F7_WEAK
1383void f7_fmod (f7_t *cc, const f7_t *aa, const f7_t *bb)
1384{
1385  uint8_t a_class = f7_classify (aa);
1386  uint8_t b_class = f7_classify (bb);
1387
1388  if (! f7_class_number (a_class)
1389      || f7_class_nan (b_class)
1390      || f7_class_zero (b_class))
1391    {
1392      return f7_set_nan (cc);
1393    }
1394
1395  // A == 0 and B != 0  =>  0.
1396  if (f7_class_zero (a_class))
1397    return f7_clr (cc);
1398
1399  f7_t zz7, *zz = & zz7;
1400
1401  f7_div (zz, aa, bb);
1402
1403  // Z in Z,  |Z| <= |A/B|.
1404  f7_trunc (zz, zz);
1405
1406  // C = A - Z * B.
1407  f7_msub (cc, zz, bb, aa);
1408  cc->flags ^= F7_FLAG_sign;
1409}
1410#endif // F7MOD_fmod_
1411
1412
1413#ifdef F7MOD_truncx_
1414F7_WEAK
1415f7_t* f7_truncx (f7_t *cc, const f7_t *aa, bool do_floor)
1416{
1417  uint8_t a_class = f7_classify (aa);
1418
1419  if (! f7_class_nonzero (a_class))
1420    return f7_copy (cc, aa);
1421
1422  bool sign = f7_class_sign (a_class);
1423
1424  int16_t a_expo = aa->expo;
1425
1426  if (a_expo < 0)
1427    {
1428      // |A| < 1.
1429      if (sign & do_floor)
1430	return f7_set_s16 (cc, -1);
1431
1432      f7_clr (cc);
1433      return cc;
1434    }
1435  else if (a_expo >= F7_MANT_BITS - 1)
1436    // A == floor (A).
1437    return f7_copy (cc, aa);
1438
1439  f7_t tmp7, *tmp = &tmp7;
1440
1441  // Needed if aa === cc.
1442  f7_copy (tmp, aa);
1443
1444  cc->flags = sign;
1445  cc->expo = a_expo;
1446  f7_clr_mant_lsbs (cc, aa, F7_MANT_BITS - 1 - a_expo);
1447
1448  if (do_floor && cc->sign && f7_cmp_mant (cc, tmp) != 0)
1449    {
1450      F7_PCONST_U16 (const f7_t *one, 1);
1451      f7_Isub (cc, one);
1452    }
1453
1454  return cc;
1455}
1456#endif // F7MOD_truncx_
1457
1458
1459#ifdef F7MOD_floor_
1460F7_WEAK
1461f7_t* f7_floor (f7_t *cc, const f7_t *aa)
1462{
1463  return f7_truncx (cc, aa, true);
1464}
1465#endif // F7MOD_floor_
1466
1467
1468#ifdef F7MOD_trunc_
1469F7_WEAK
1470f7_t* f7_trunc (f7_t *cc, const f7_t *aa)
1471{
1472  return f7_truncx (cc, aa, false);
1473}
1474#endif // F7MOD_trunc_
1475
1476
1477#ifdef F7MOD_ceil_
1478F7_WEAK
1479void f7_ceil (f7_t *cc, const f7_t *aa)
1480{
1481  cc = f7_copy (cc, aa);
1482  cc->flags ^= F7_FLAG_sign;
1483  cc = f7_floor (cc, cc);
1484  f7_Ineg (cc);
1485}
1486#endif // F7MOD_ceil_
1487
1488
1489#ifdef F7MOD_round_
1490F7_WEAK
1491void f7_round (f7_t *cc, const f7_t *aa)
1492{
1493  f7_t tmp;
1494  (void) tmp;
1495  const f7_t *half = F7_CONST_ADDR (1_2, &tmp);
1496
1497  f7_addsub   (cc, aa, half, f7_signbit (aa));
1498  f7_trunc (cc, cc);
1499}
1500#endif // F7MOD_round_
1501
1502
1503#ifdef F7MOD_horner_
1504
1505// Assertion when using this function is that either cc != xx,
1506// or if cc == xx, then tmp1 must be non-NULL and tmp1 != xx.
1507// In General, the calling functions have a spare f7_t object available
1508// and can pass it down to save some stack.
1509// Moreover, the power series must have degree 1 at least.
1510
1511F7_WEAK
1512void f7_horner (f7_t *cc, const f7_t *xx, uint8_t n_coeff, const f7_t *coeff,
1513                f7_t *tmp1)
1514{
1515  f7_assert (n_coeff > 1);
1516
1517  if (cc != xx)
1518    tmp1 = cc;
1519  else
1520    f7_assert (tmp1 != NULL && tmp1 != xx);
1521
1522  f7_t *yy = tmp1;
1523  f7_t tmp27, *tmp2 = &tmp27;
1524
1525  n_coeff--;
1526  const f7_t *pcoeff = coeff + n_coeff;
1527
1528  f7_copy_flash (yy, pcoeff);
1529
1530  while (1)
1531    {
1532      --pcoeff;
1533#if 1
1534      f7_Imul (yy, xx);
1535      const f7_t *cst = USE_LD ? pcoeff : f7_copy_P (tmp2, pcoeff);
1536      if (coeff == pcoeff)
1537	return f7_add (cc, yy, cst);
1538      f7_Iadd (yy, cst);
1539#else
1540      const f7_t *cst = USE_LD ? pcoeff : f7_copy_P (tmp2, pcoeff);
1541      f7_madd (yy, yy, xx, cst);
1542      if (coeff == pcoeff)
1543        {
1544	  f7_copy (cc, yy);
1545	  return;
1546        }
1547#endif
1548    }
1549
1550  __builtin_unreachable();
1551}
1552#endif // F7MOD_horner_
1553
1554
1555#ifdef F7MOD_log_
1556F7_WEAK
1557void f7_log (f7_t *cc, const f7_t *aa)
1558{
1559    f7_logx (cc, aa, NULL);
1560}
1561#endif // F7MOD_log_
1562
1563
1564#ifdef F7MOD_log2_
1565F7_WEAK
1566void f7_log2 (f7_t *cc, const f7_t *aa)
1567{
1568  f7_logx (cc, aa, F7_RAW_CONST_ADDR (1_ln2));
1569}
1570#endif // F7MOD_log2_
1571
1572
1573#ifdef F7MOD_log10_
1574F7_WEAK
1575void f7_log10 (f7_t *cc, const f7_t *aa)
1576{
1577  f7_logx (cc, aa, F7_RAW_CONST_ADDR (1_ln10));
1578}
1579#endif // F7MOD_log10_
1580
1581
1582#ifdef F7MOD_logx_
1583
1584#define ARRAY_NAME coeff_artanh
1585#include "libf7-array.def"
1586#undef ARRAY_NAME
1587
1588// Compute P * ln(A)  if P != NULL and ln(A), otherwise.
1589// P is a LD-address if USE_LD and a LPM-address if USE_LPM.
1590// Assumption is that P > 0.
1591
1592F7_WEAK
1593void f7_logx (f7_t *cc, const f7_t *aa, const f7_t *p)
1594{
1595  uint8_t a_class = f7_classify (aa);
1596
1597  if (f7_class_nan (a_class) || f7_class_sign (a_class))
1598    return f7_set_nan (cc);
1599
1600  if (f7_class_inf (a_class))
1601    return f7_set_inf (cc, 0);
1602
1603  if (f7_class_zero (a_class))
1604    return f7_set_inf (cc, 1);
1605
1606  f7_t *yy = cc;
1607  f7_t xx7, *xx = &xx7;
1608  f7_t tmp7, *tmp = &tmp7;
1609
1610  // Y in [1, 2]  =  A * 2 ^ (-a_expo).
1611  int16_t a_expo = aa->expo;
1612  f7_copy (yy, aa);
1613  yy->expo = 0;
1614
1615  // Y in [1 / sqrt2, sqrt2].
1616
1617  if (f7_abscmp_msb_ge (yy, F7_(const_sqrt2_msb), F7_(const_sqrt2_expo)))
1618    {
1619      yy->expo = -1;
1620      a_expo = add_ssat16 (a_expo, 1);
1621    }
1622
1623  const f7_t *one = F7_U16_ADDR (1, & tmp7);
1624
1625  // X := (Y - 1) / (Y + 1),  |X| <= (sqrt2 - 1) / (sqrt2 + 1)  ~  0.172.
1626  f7_sub (xx, yy, one);
1627  f7_Iadd (yy, one);
1628  f7_Idiv (xx, yy);
1629
1630  // Y := X^2,  |Y| < 0.03.
1631  f7_square (yy, xx);
1632
1633  // Y := artanh (X^2) / X
1634  f7_horner (yy, yy, n_coeff_artanh, coeff_artanh, tmp);
1635
1636  // C = X * Y = ln A - a_expo * ln2.
1637  f7_mul (cc, xx, yy);
1638
1639  // X := a_expo * ln2.
1640  f7_set_s16 (xx, a_expo);
1641  f7_Imul (xx, F7_CONST_ADDR (ln2, & tmp7));
1642
1643  // C = ln A.
1644  f7_Iadd (cc, xx);
1645
1646  if (p && USE_LPM)
1647    f7_Imul (cc, f7_copy_P (tmp, p));
1648  if (p && USE_LD)
1649    f7_Imul (cc, p);
1650}
1651#endif // F7MOD_logx_
1652
1653
1654#ifdef F7MOD_exp_
1655
1656#define ARRAY_NAME coeff_exp
1657#include "libf7-array.def"
1658#undef ARRAY_NAME
1659
1660#define STATIC static
1661#include "libf7-constdef.h" // ln2_low
1662#undef STATIC
1663
1664F7_WEAK
1665void f7_exp (f7_t *cc, const f7_t *aa)
1666{
1667  uint8_t a_class = f7_classify (aa);
1668
1669  if (f7_class_nan (a_class))
1670    return f7_set_nan (cc);
1671
1672  /* The maximal exponent of 2 for a double is 1023, hence we may limit
1673     to  |A| < 1023 * ln2 ~ 709.  We limit to  1024 ~ 1.99 * 2^9  */
1674
1675  if (f7_class_inf (a_class)
1676      || (f7_class_nonzero (a_class) && aa->expo >= 9))
1677    {
1678      if (f7_class_sign (a_class))
1679	return f7_clr (cc);
1680      else
1681	return f7_set_inf (cc, 0);
1682    }
1683
1684  f7_t const *cst;
1685  f7_t qq7, *qq = &qq7;
1686
1687  F7_PCONST (cst, ln2);
1688
1689  // We limited |A| to 1024 and are now dividing by ln2, hence Q will
1690  // be at most 1024 / ln2 ~ 1477 and fit into 11 bits.  We will
1691  // round Q anyway, hence only request 11 bits from the division and
1692  // one additional bit that might be needed to normalize the quotient.
1693  f7_divx (qq, aa, cst, 1 + 11);
1694
1695  // Use the smallest (by absolute value) remainder system.
1696  f7_round (qq, qq);
1697  int16_t q = f7_get_s16 (qq);
1698
1699  // Reducing A mod ln2 gives |C| <= ln2 / 2,  C = -A mod ln2.
1700  f7_msub (cc, qq, cst, aa);
1701
1702  // Corrigendum:  We added Q * ln2; now add Q times the low part of ln2
1703  // for better precision.  Due to |C| < |A| this is not a no-op in general.
1704  const f7_t *yy = F7_CONST_ADDR (ln2_low, &_var_for_ln2);
1705  f7_madd (cc, qq, yy, cc);
1706
1707  // Because we computed C = -A mod ...
1708  cc->flags ^= F7_FLAG_sign;
1709
1710  // Reduce further to |C| < ln2 / 8 which is the range of our MiniMax poly.
1711  const uint8_t MAX_LN2_RED = 3;
1712  int8_t scal2 = 0;
1713
1714  while (f7_abscmp_msb_ge (cc, F7_(const_ln2_msb),
1715			   F7_(const_ln2_expo) - MAX_LN2_RED))
1716    {
1717      scal2++;
1718      cc->expo--;
1719    }
1720
1721  f7_horner (cc, cc, n_coeff_exp, coeff_exp, qq);
1722
1723  while (--scal2 >= 0)
1724    f7_Isquare (cc);
1725
1726  f7_Ildexp (cc, q);
1727}
1728#endif // F7MOD_exp_
1729
1730
1731#ifdef F7MOD_pow10_
1732F7_WEAK
1733void f7_pow10 (f7_t *cc, const f7_t *aa)
1734{
1735  const f7_t *p_ln10;
1736  F7_PCONST (p_ln10, ln10);
1737  f7_mul (cc, aa, p_ln10);
1738  f7_exp (cc, cc);
1739}
1740ALIAS (f7_pow10, f7_exp10)
1741#endif // F7MOD_pow10_
1742
1743
1744#ifdef F7MOD_cbrt_
1745F7_WEAK
1746void f7_cbrt (f7_t *cc, const f7_t *aa)
1747{
1748  f7_copy (cc, aa);
1749  const f7_t *p_1_3;
1750  uint8_t c_flags = cc->flags;
1751  cc->flags &= ~F7_FLAG_sign;
1752  f7_log (cc, cc);
1753  F7_PCONST (p_1_3, 1_3);
1754  f7_Imul (cc, p_1_3);
1755  f7_exp (cc, cc);
1756
1757  if (c_flags & F7_FLAG_sign)
1758    cc->flags |= F7_FLAG_sign;
1759}
1760#endif // F7MOD_cbrt_
1761
1762
1763#ifdef F7MOD_pow_
1764F7_WEAK
1765void f7_pow (f7_t *cc, const f7_t *aa, const f7_t *bb)
1766{
1767#if 0
1768  f7_t slots[cc == bb];
1769  f7_t *yy = cc == bb ? slots : cc;
1770#else
1771  f7_t yy7, *yy = &yy7;
1772#endif
1773  f7_log (yy, aa);
1774  f7_Imul (yy, bb);
1775  f7_exp (cc, yy);
1776}
1777#endif // F7MOD_pow_
1778
1779
1780#ifdef F7MOD_powi_
1781F7_WEAK
1782void f7_powi (f7_t *cc, const f7_t *aa, int ii)
1783{
1784  uint16_t u16 = ii;
1785  f7_t xx27, *xx2 = &xx27;
1786
1787  if (ii < 0)
1788    u16 = -u16;
1789
1790  f7_copy (xx2, aa);
1791
1792  f7_set_u16 (cc, 1);
1793
1794  while (1)
1795    {
1796      if (u16 & 1)
1797	f7_Imul (cc, xx2);
1798
1799      if (! f7_is_nonzero (cc))
1800	break;
1801
1802      u16 >>= 1;
1803      if (u16 == 0)
1804	break;
1805      f7_Isquare (xx2);
1806    }
1807
1808  if (ii < 0)
1809    f7_div1 (xx2, aa);
1810}
1811#endif // F7MOD_powi_
1812
1813
1814#ifdef F7MOD_sincos_
1815
1816#define ARRAY_NAME coeff_sin
1817  #define FOR_SIN
1818  #include "libf7-array.def"
1819  #undef  FOR_SIN
1820#undef ARRAY_NAME
1821
1822#define ARRAY_NAME coeff_cos
1823  #define FOR_COS
1824  #include "libf7-array.def"
1825  #undef  FOR_COS
1826#undef ARRAY_NAME
1827
1828#define STATIC static
1829#include "libf7-constdef.h" // pi_low
1830#undef STATIC
1831
1832typedef union
1833{
1834  struct
1835  {
1836    bool    neg_sin : 1; // Must be bit F7_FLAGNO_sign.
1837    bool    neg_cos : 1;
1838    bool    do_sin: 1;
1839    bool    do_cos: 1;
1840    bool    swap_sincos : 1;
1841    uint8_t res : 3;
1842  };
1843  uint8_t bits;
1844} sincos_t;
1845
1846
1847F7_WEAK
1848void f7_sincos (f7_t *ss, f7_t *cc, const f7_t *aa)
1849{
1850  uint8_t a_class = f7_classify (aa);
1851
1852  sincos_t sc = { .bits = a_class & F7_FLAG_sign };
1853  if (ss != NULL) sc.do_sin = 1;
1854  if (cc != NULL) sc.do_cos = 1;
1855
1856  if (f7_class_nan (a_class) || f7_class_inf (a_class))
1857    {
1858      if (sc.do_sin)  f7_set_nan (ss);
1859      if (sc.do_cos)  f7_set_nan (cc);
1860      return;
1861    }
1862
1863  f7_t pi7, *pi = &pi7;
1864  f7_t xx7, *xx = &xx7;
1865  f7_t yy7, *yy = &yy7;
1866  f7_t *hh = sc.do_sin ? ss : cc;
1867
1868  // X = |A|
1869  f7_copy (xx, aa);
1870  xx->flags = 0;
1871
1872  // Y is how often we subtract PI from X.
1873  f7_clr (yy);
1874  f7_const (pi, pi);
1875
1876  if (f7_abscmp_msb_ge (xx, F7_(const_pi_msb), F7_(const_pi_expo) + 1))
1877    {
1878      pi->expo = 1 + F7_(const_pi_expo);  // 2*pi
1879
1880      // Y = X / 2pi.
1881      f7_div (yy, xx, pi);
1882
1883      // The integral part of |A| / pi mod 2 is bit 55 - x_expo.
1884      if (yy->expo >= F7_MANT_BITS && !f7_is_zero (yy))
1885        {
1886	  // Too big for sensible calculation:  Should this be NaN instead?
1887	  if (sc.do_sin)  f7_clr (ss);
1888	  if (sc.do_cos)  f7_clr (cc);
1889	  return;
1890        }
1891
1892      // X -= 2pi * [ X / 2pi ]
1893      f7_floor (yy, yy);
1894
1895      f7_msub (xx, yy, pi, xx);
1896      xx->flags ^= F7_FLAG_sign;
1897
1898      // We divided by 2pi, but Y should count times we subtracted pi.
1899      yy->expo++;
1900    }
1901
1902  pi->expo = F7_(const_pi_expo); // pi
1903  f7_sub (hh, xx, pi);
1904  if (!f7_signbit (hh))
1905    {
1906      // H = X - pi >= 0  =>  X >= pi
1907      // sin(x) = -sin(x - pi)
1908      // cos(x) = -cos(x - pi)
1909      f7_copy (xx, hh);
1910      // Y: We subtracted pi one more time.
1911      f7_Iadd (yy, f7_set_u16 (hh, 1));
1912      sc.neg_sin ^= 1;
1913      sc.neg_cos ^= 1;
1914    }
1915
1916  pi->expo = F7_(const_pi_expo) - 1; // pi/2
1917  if (f7_gt (xx, pi))
1918    {
1919      // x > pi/2
1920      // sin(x) =  sin(pi - x)
1921      // cos(x) = -cos(pi - x)
1922      pi->expo = F7_(const_pi_expo); // pi
1923      f7_IRsub (xx, pi);
1924      // Y: We subtracted pi one more time (and then negated).
1925      f7_Iadd (yy, f7_set_u16 (hh, 1));
1926      yy->flags ^= F7_FLAG_sign;
1927      sc.neg_cos ^= 1;
1928    }
1929
1930  pi->expo = F7_(const_pi_expo) - 2; // pi/4
1931  if (f7_gt (xx, pi))
1932    {
1933      // x > pi/4
1934      // sin(x) = cos(pi/2 - x)
1935      // cos(x) = sin(pi/2 - x)
1936      pi->expo = F7_(const_pi_expo) - 1; // pi/2
1937      f7_IRsub (xx, pi);
1938      // Y: We subtracted pi/2 one more time (and then negated).
1939      f7_Iadd (yy, f7_set_1pow2 (hh, -1, 0));
1940      yy->flags ^= F7_FLAG_sign;
1941      sc.swap_sincos = 1;
1942    }
1943
1944  if (!f7_is0 (yy))
1945    {
1946      // Y counts how often we subtracted pi from X in order to
1947      // get 0 <= X < pi/4 as small as possible (Y is 0 mod 0.5).
1948      // Now also subtract the low part of pi:
1949      // f7_const_pi_low = pi - f7_const_pi  in order to get more precise
1950      // results in the cases where the final result is close to 0.
1951      const f7_t *pi_low = F7_CONST_ADDR (pi_low, pi);
1952      //f7_const (pi, pi_low);
1953      f7_Imul (yy, pi_low);
1954      f7_Isub (xx, yy);
1955    }
1956
1957  // X   in [0, pi/4].
1958  // X^2 in [0, pi^2/16] ~ [0, 0.6169]
1959
1960  f7_square (yy, xx);
1961
1962  f7_t *x_sin = xx;
1963  f7_t *x_cos = yy;
1964
1965  if ((sc.do_sin && !sc.swap_sincos)
1966      || (sc.do_cos && sc.swap_sincos))
1967    {
1968      f7_horner (hh, yy, n_coeff_sin, coeff_sin, NULL);
1969      f7_mul (x_sin, hh, xx);
1970    }
1971
1972  if ((sc.do_cos && !sc.swap_sincos)
1973      || (sc.do_sin && sc.swap_sincos))
1974    {
1975      f7_horner (x_cos, yy, n_coeff_cos, coeff_cos, hh);
1976    }
1977
1978  if (sc.swap_sincos)
1979    {
1980      x_sin = yy;
1981      x_cos = xx;
1982    }
1983
1984  if (sc.do_sin)
1985    {
1986      x_sin->flags ^= sc.bits;
1987      x_sin->flags &= F7_FLAG_sign;
1988      f7_copy (ss, x_sin);
1989    }
1990
1991  if (sc.do_cos)
1992    {
1993      x_cos->flags = sc.neg_cos;
1994      f7_copy (cc, x_cos);
1995    }
1996}
1997#endif // F7MOD_sincos_
1998
1999#ifdef F7MOD_sin_
2000F7_WEAK
2001void f7_sin (f7_t *ss, const f7_t *aa)
2002{
2003  f7_sincos (ss, NULL, aa);
2004}
2005#endif // F7MOD_sin_
2006
2007#ifdef F7MOD_cos_
2008F7_WEAK
2009void f7_cos (f7_t *cc, const f7_t *aa)
2010{
2011  f7_sincos (NULL, cc, aa);
2012}
2013#endif // F7MOD_cos_
2014
2015
2016#ifdef F7MOD_tan_
2017F7_WEAK
2018void f7_tan (f7_t *tt, const f7_t *aa)
2019{
2020  f7_t xcos;
2021  f7_sincos (tt, & xcos, aa);
2022  f7_Idiv (tt, & xcos);
2023}
2024#endif // F7MOD_tan_
2025
2026
2027#ifdef F7MOD_cotan_
2028F7_WEAK
2029void f7_cotan (f7_t *ct, const f7_t *aa)
2030{
2031  f7_t xcos;
2032  f7_sincos (ct, & xcos, aa);
2033  f7_div (ct, & xcos, ct);
2034}
2035#endif // F7MOD_cotan_
2036
2037
2038#ifdef F7MOD_sinhcosh_
2039F7_WEAK
2040void f7_sinhcosh (f7_t *cc, const f7_t *aa, bool do_sinh)
2041{
2042  f7_t xx7, *xx = &xx7;
2043  // C = exp(A)
2044  f7_exp (cc, aa);
2045  // X = exp(-A)
2046  f7_div (xx, f7_set_u16 (xx, 1), cc);
2047  // sinh(A) = (exp(A) - exp(-A)) / 2
2048  // cosh(A) = (exp(A) + exp(-A)) / 2
2049  f7_addsub (cc, cc, xx, do_sinh);
2050  cc->expo--;
2051}
2052#endif // F7MOD_sinhcosh_
2053
2054
2055#ifdef F7MOD_sinh_
2056F7_WEAK
2057void f7_sinh (f7_t *cc, const f7_t *aa)
2058{
2059  f7_sinhcosh (cc, aa, true);
2060}
2061#endif // F7MOD_sinh_
2062
2063
2064#ifdef F7MOD_cosh_
2065F7_WEAK
2066void f7_cosh (f7_t *cc, const f7_t *aa)
2067{
2068  f7_sinhcosh (cc, aa, false);
2069}
2070#endif // F7MOD_cosh_
2071
2072
2073#ifdef F7MOD_tanh_
2074F7_WEAK
2075void f7_tanh (f7_t *cc, const f7_t *aa)
2076{
2077  // tanh(A) = (exp(2A) - 1) / (exp(2A) + 1)
2078  f7_t xx7, *xx = &xx7;
2079  F7_PCONST_U16 (const f7_t *one, 1);
2080  // C = 2A
2081  f7_copy (cc, aa);
2082  cc->expo++;
2083  // C = exp(2A)
2084  f7_exp (cc, cc);
2085  // X = exp(2A) + 1
2086  f7_add (xx, cc, one);
2087  // C = exp(2A) - 1
2088  f7_Isub (cc, one);
2089  // C = tanh(A)
2090  f7_Idiv (cc, xx);
2091}
2092#endif // F7MOD_tanh_
2093
2094
2095#ifdef F7MOD_atan_
2096
2097#define MINIMAX_6_6_IN_0_1
2098
2099#define ARRAY_NAME coeff_atan_zahler
2100#define FOR_NUMERATOR
2101#include "libf7-array.def"
2102#undef FOR_NUMERATOR
2103#undef ARRAY_NAME
2104
2105#define ARRAY_NAME coeff_atan_nenner
2106#define FOR_DENOMINATOR
2107#include "libf7-array.def"
2108#undef FOR_DENOMINATOR
2109#undef ARRAY_NAME
2110
2111#include "libf7-constdef.h"
2112
2113F7_WEAK
2114void f7_atan (f7_t *cc, const f7_t *aa)
2115{
2116  uint8_t a_class = f7_classify (aa);
2117  uint8_t flags = a_class & F7_FLAG_sign;
2118
2119  if (f7_class_nan (a_class))
2120    return f7_set_nan (cc);
2121
2122  f7_t yy7, *yy = &yy7;
2123  f7_t zz7, *zz = &zz7;
2124
2125  if (f7_class_inf (a_class))
2126    {
2127      f7_set_u16 (cc, 0);
2128      goto do_Inf;
2129    }
2130
2131  // C = |A|
2132  f7_copy (cc, aa);
2133  cc->flags = 0;
2134
2135  if (!f7_class_zero (a_class) && cc->expo >= 0)
2136    {
2137      // C >= 1:  use  atan (x) + atan (1/x) = pi / 2  to reduce to [0, 1].
2138      flags |= 1 << 1;
2139      f7_div (cc, f7_set_u16 (yy, 1), cc);
2140    }
2141#if !defined (MINIMAX_6_6_IN_0_1)
2142  const uint8_t const_a_msb = 0x89;
2143  const int16_t const_a_expo = -2;
2144  if (f7_abscmp_msb_ge (cc, const_a_msb, const_a_expo))
2145    {
2146      // We have C in [0, 1] and we want to use argument reduction by means
2147      // of addition theorem  atan(x) - atan(y) = atan((x - y) / (1 + xy)).
2148      // We want to split [0, 1] into  [0, a] u [a, 1]  in such a way that
2149      // the upper interval will be mapped to  [-a, a].  The system is easy
2150      // to solve and yiels
2151      //    y = 1 / sqrt (3)       ~  0.57735     atan(y) = pi / 6
2152      //    a = (1 - y) / (1 + y)  ~  0.26795  ~  0x0.8930A2F * 2^-1.
2153      flags |= 1 << 2;
2154      // C <- (C - Y) / (1 + C * Y)  in  [-a, a]
2155      const f7_t *cst = F7_CONST_ADDR (1_sqrt3, zz);
2156      f7_mul (yy, cc, cst);
2157      f7_Isub (cc, cst);
2158      f7_Iadd (yy, F7_U16_ADDR (1, zz));
2159      f7_Idiv (cc, yy);
2160    }
2161#endif
2162  // C <- C * p(C^2) / q(C^2)
2163  f7_square (yy, cc);
2164  f7_horner (zz, yy, n_coeff_atan_zahler, coeff_atan_zahler, NULL);
2165  f7_Imul (zz, cc);
2166  f7_horner (cc, yy, n_coeff_atan_nenner, coeff_atan_nenner, NULL);
2167  f7_div (cc, zz, cc);
2168
2169#if !defined (MINIMAX_6_6_IN_0_1)
2170  if (flags & (1 << 2))
2171    f7_Iadd (cc, F7_CONST_ADDR (pi_6, yy));
2172#endif
2173
2174  if (flags & (1 << 1))
2175    {
2176    do_Inf:;
2177      // Y = pi / 2
2178      f7_const (yy, pi);
2179      yy->expo = F7_(const_pi_expo) - 1;
2180      f7_IRsub (cc, yy);
2181    }
2182
2183  cc->flags = a_class & F7_FLAG_sign;
2184}
2185#undef MINIMAX_6_6_IN_0_1
2186#endif // F7MOD_atan_
2187
2188
2189#ifdef F7MOD_asinacos_
2190
2191#define ARRAY_NAME coeff_func_a_zahler
2192#define FOR_NUMERATOR
2193#include "libf7-array.def"
2194#undef  FOR_NUMERATOR
2195#undef  ARRAY_NAME
2196
2197#define ARRAY_NAME coeff_func_a_nenner
2198#define FOR_DENOMINATOR
2199#include "libf7-array.def"
2200#undef  FOR_DENOMINATOR
2201#undef  ARRAY_NAME
2202
2203typedef union
2204{
2205  struct
2206  {
2207    bool    sign : 1;       // Must be bit F7_FLAGNO_sign.
2208    bool    do_acos : 1;    // From caller.
2209    bool    have_acos : 1;  // What we compute from rational approx p/q.
2210    uint8_t res : 5;
2211  };
2212  uint8_t bits;
2213} asinacos_t;
2214
2215F7_WEAK
2216void f7_asinacos (f7_t *cc, const f7_t *aa, uint8_t what)
2217{
2218  f7_t xx7, *xx = &xx7;
2219  f7_t xx27, *xx2 = &xx27;
2220
2221  asinacos_t flags = { .bits = what | f7_signbit (aa) };
2222
2223  f7_abs (xx, aa);
2224
2225  int8_t cmp = f7_cmp (xx, f7_set_u16 (cc, 1));
2226
2227  if (cmp == INT8_MIN
2228      || cmp > 0)
2229    {
2230      return f7_set_nan (cc);
2231    }
2232
2233  if (xx->expo <= -2 || f7_is_zero (xx))
2234    {
2235      // |A| < 1/2:  asin(x) = x * a(2*x^2)
2236      f7_square (xx2, xx);
2237      xx2->expo ++;
2238    }
2239  else
2240    {
2241      // |A| > 1/2: acos (1-x) = sqrt(2*x) * a(x)
2242      // C is 1 from above.
2243      f7_IRsub (xx, cc);
2244      f7_copy (xx2, xx);
2245      flags.have_acos = 1;
2246    }
2247
2248  // MiniMax [5/4] numerator.
2249  f7_horner (cc, xx2, n_coeff_func_a_zahler, coeff_func_a_zahler, NULL);
2250
2251  if (flags.have_acos)
2252    {
2253      xx->expo ++;
2254      f7_Isqrt (xx);
2255    }
2256  f7_Imul (cc, xx);
2257
2258  // MiniMax [5/4] denominator.
2259  f7_horner (xx, xx2, n_coeff_func_a_nenner, coeff_func_a_nenner, NULL);
2260
2261  f7_Idiv (cc, xx);
2262
2263  /*
2264      With the current value of C, we have:
2265
2266		|	     |	      do_asin	    |	    do_acos
2267		|     C	     |	A <= 0	 |  A >= 0  |  A <= 0  |  A >= 0
2268      ----------+------------+-----------+----------+----------+----------
2269      have_asin | asin (|A|) |	  -C	 |    C	    | pi/2 + C | pi/2 - C
2270      have_acos | acos (|A|) | -pi/2 + C | pi/2 - C |  pi - C  |     C
2271
2272      Result = n_pi2 * pi/2 + C * (c_sign ? -1 : 1)
2273      Result (A, do_asin) = asin (A)
2274      Result (A, do_acos) = acos (A)
2275
2276      with
2277	  c_sign = do_acos ^ have_acos ^ a_sign
2278	  n_pi2	 = do_acos + have_acos * (a_sign ^ do_acos) ? (-1 : 1)
2279	  n_pi2 in { -1, 0, 1, 2 }
2280  */
2281
2282  // All what matters for c_sign is bit 0.
2283  uint8_t c_sign = flags.bits;
2284  int8_t n_pi2 = flags.do_acos;
2285  c_sign ^= flags.do_acos;
2286  if (flags.have_acos)
2287    {
2288      n_pi2++;
2289      __asm ("" : "+r" (n_pi2));
2290      if (c_sign & 1)  // c_sign & 1  =  a_sign ^ do_acos
2291	n_pi2 -= 2;
2292      c_sign++;
2293    }
2294
2295  cc->flags = c_sign & F7_FLAG_sign;
2296
2297  if (n_pi2)
2298    {
2299      f7_const (xx, pi);
2300      if (n_pi2 < 0)
2301	xx->sign = 1;
2302      if (n_pi2 != 2)
2303	xx->expo = F7_(const_pi_expo) - 1;
2304
2305      f7_Iadd (cc, xx);
2306    }
2307}
2308#endif // F7MOD_asinacos_
2309
2310
2311#ifdef F7MOD_asin_
2312F7_WEAK
2313void f7_asin (f7_t *cc, const f7_t *aa)
2314{
2315  f7_asinacos (cc, aa, 0);
2316}
2317#endif // F7MOD_asin_
2318
2319
2320#ifdef F7MOD_acos_
2321F7_WEAK
2322void f7_acos (f7_t *cc, const f7_t *aa)
2323{
2324  f7_asinacos (cc, aa, 1 << 1);
2325}
2326#endif // F7MOD_acos_
2327
2328
2329#ifndef IN_LIBGCC2
2330
2331#ifdef F7MOD_put_C_
2332
2333#include <stdio.h>
2334#include <avr/pgmspace.h>
2335
2336static F7_INLINE
2337uint8_t f7_hex_digit (uint8_t nibble)
2338{
2339  nibble = (uint8_t) (nibble + '0');
2340  if (nibble > '9')
2341    nibble = (uint8_t) (nibble + ('a' - '0' - 10));
2342  return nibble;
2343}
2344
2345static void f7_put_hex2 (uint8_t x, FILE *stream)
2346{
2347  putc ('0', stream);
2348  if (x)
2349    {
2350      putc ('x', stream);
2351      putc (f7_hex_digit (x >> 4), stream);
2352      putc (f7_hex_digit (x & 0xf), stream);
2353    }
2354}
2355
2356#define XPUT(str) \
2357  fputs_P (PSTR (str), stream)
2358
2359// Write to STREAM a line that is appropriate for usage in libf7-const.def.
2360
2361F7_WEAK
2362void f7_put_CDEF (const char *name, const f7_t *aa, FILE *stream)
2363{
2364  char buf[7];
2365  XPUT ("F7_CONST_DEF (");
2366  fputs (name, stream);
2367  XPUT (",\t");
2368  uint8_t a_class = f7_classify (aa);
2369  if (! f7_class_nonzero (a_class))
2370    {
2371      f7_put_hex2 (a_class & F7_FLAGS, stream);
2372      XPUT (",\t0,0,0,0,0,0,0,\t0)");
2373      return;
2374    }
2375  putc ('0' + (a_class & F7_FLAGS), stream);
2376  XPUT (",\t");
2377
2378  for (uint8_t i = 0; i < F7_MANT_BYTES; i++)
2379    {
2380      f7_put_hex2 (aa->mant[F7_MANT_BYTES-1 - i], stream);
2381      putc (',', stream);
2382    }
2383  putc ('\t', stream);
2384
2385  itoa (aa->expo, buf, 10);
2386  fputs (buf, stream);
2387  XPUT (")");
2388}
2389
2390void f7_put_C (const f7_t *aa, FILE *stream)
2391{
2392  char buf[7];
2393
2394  uint8_t a_class = f7_classify (aa);
2395  if (f7_class_nan (a_class))
2396    {
2397      XPUT ("{ .is_nan = 1 }");
2398      return;
2399    }
2400  bool sign = a_class & F7_FLAG_sign;
2401
2402  if (f7_class_inf (a_class))
2403    {
2404      XPUT ("{ .is_nan = 1, .sign = ");
2405      putc ('0' + sign, stream);
2406      XPUT (" }");
2407      return;
2408    }
2409
2410  XPUT ("{ .sign = ");
2411  putc ('0' + sign, stream);
2412
2413  XPUT (", .mant = { ");
2414  for (uint8_t i = 0; i < F7_MANT_BYTES; i++)
2415    {
2416      f7_put_hex2 (aa->mant[F7_MANT_BYTES-1 - i], stream);
2417      if (i != F7_MANT_BYTES - 1)
2418	putc (',', stream);
2419    }
2420
2421  XPUT (" }, .expo = ");
2422  itoa (aa->expo, buf, 10);
2423  fputs (buf, stream);
2424  XPUT (" }");
2425}
2426#endif //F7MOD_put_C_
2427
2428
2429#ifdef F7MOD_dump_
2430
2431#include <avr/pgmspace.h>
2432
2433#ifndef AVRTEST_H
2434
2435#include <stdio.h>
2436
2437static void LOG_PSTR (const char *str)
2438{
2439  fputs_P (str, stdout);
2440}
2441
2442static void LOG_PFMT_U16 (const char *fmt, uint16_t x)
2443{
2444  printf_P (fmt, x);
2445}
2446
2447static void LOG_PFMT_FLOAT (const char *fmt, float x)
2448{
2449  printf_P (fmt, x);
2450}
2451
2452#define LOG_X8(X)               LOG_PFMT_U16 (PSTR (" 0x%02x "), (uint8_t)(X))
2453#define LOG_PFMT_S16(FMT, X)    LOG_PFMT_U16 (FMT, (unsigned)(X))
2454#define LOG_PFMT_ADDR(FMT, X)   LOG_PFMT_U16 (FMT, (unsigned)(X))
2455
2456#endif // AVRTEST_H
2457
2458static void dump_byte (uint8_t b)
2459{
2460  LOG_PSTR (PSTR (" "));
2461  for (uint8_t i = 0; i < 8; i++)
2462    {
2463      LOG_PSTR ((b & 0x80) ? PSTR ("1") : PSTR ("0"));
2464      b = (uint8_t) (b << 1);
2465    }
2466}
2467
2468void f7_dump_mant (const f7_t *aa)
2469{
2470  LOG_PSTR (PSTR ("\tmant   ="));
2471  for (int i = F7_MANT_BYTES - 1; i >= 0; i--)
2472    LOG_X8 (aa->mant[i]);
2473  LOG_PSTR (PSTR ("\n\t       ="));
2474
2475  for (int i = F7_MANT_BYTES - 1; i >= 0; i--)
2476    dump_byte (aa->mant[i]);
2477  LOG_PSTR (PSTR ("\n"));
2478}
2479
2480void f7_dump (const f7_t *aa)
2481{
2482  LOG_PFMT_ADDR (PSTR ("\n0x%04x\tflags  = "), aa);
2483  dump_byte (aa->flags);
2484  uint8_t a_class = f7_classify (aa);
2485  LOG_PSTR (PSTR ("  = "));
2486  LOG_PSTR (f7_class_sign (a_class) ? PSTR ("-") : PSTR ("+"));
2487  if (f7_class_inf (a_class))    LOG_PSTR (PSTR ("Inf "));
2488  if (f7_class_nan (a_class))    LOG_PSTR (PSTR ("NaN "));
2489  if (f7_class_zero (a_class))   LOG_PSTR (PSTR ("0 "));
2490  if (f7_class_number (a_class)) LOG_PSTR (PSTR ("Number "));
2491
2492  LOG_PFMT_FLOAT (PSTR (" = %.10g\n"), f7_get_float (aa));
2493  LOG_PFMT_S16 (PSTR ("\texpo   = %d\n"), aa->expo);
2494
2495  f7_dump_mant (aa);
2496}
2497#endif // F7MOD_dump_
2498
2499#endif // ! libgcc
2500
2501#endif // !AVR_TINY
2502