1/*
2 * ====================================================
3 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
4 *
5 * Developed at SunPro, a Sun Microsystems, Inc. business.
6 * Permission to use, copy, modify, and distribute this
7 * software is freely granted, provided that this notice
8 * is preserved.
9 * ====================================================
10 */
11
12/*
13 * from: @(#)fdlibm.h 5.1 93/09/24
14 * $NetBSD: math_private.h,v 1.32 2024/04/03 04:40:23 kre Exp $
15 */
16
17#ifndef _MATH_PRIVATE_H_
18#define _MATH_PRIVATE_H_
19
20#include <assert.h>
21#include <sys/types.h>
22
23/* The original fdlibm code used statements like:
24	n0 = ((*(int*)&one)>>29)^1;		* index of high word *
25	ix0 = *(n0+(int*)&x);			* high word of x *
26	ix1 = *((1-n0)+(int*)&x);		* low word of x *
27   to dig two 32 bit words out of the 64 bit IEEE floating point
28   value.  That is non-ANSI, and, moreover, the gcc instruction
29   scheduler gets it wrong.  We instead use the following macros.
30   Unlike the original code, we determine the endianness at compile
31   time, not at run time; I don't see much benefit to selecting
32   endianness at run time.  */
33
34/* A union which permits us to convert between a double and two 32 bit
35   ints.  */
36
37/*
38 * A union which permits us to convert between a double and two 32 bit
39 * ints.
40 */
41
42#ifdef __arm__
43#if defined(__VFP_FP__) || defined(__ARM_EABI__)
44#define	IEEE_WORD_ORDER	BYTE_ORDER
45#else
46#define	IEEE_WORD_ORDER	BIG_ENDIAN
47#endif
48#else /* __arm__ */
49#define	IEEE_WORD_ORDER	BYTE_ORDER
50#endif
51
52/* A union which permits us to convert between a long double and
53   four 32 bit ints.  */
54
55#if IEEE_WORD_ORDER == BIG_ENDIAN
56
57typedef union
58{
59  long double value;
60  struct {
61    u_int32_t mswhi;
62    u_int32_t mswlo;
63    u_int32_t lswhi;
64    u_int32_t lswlo;
65  } parts32;
66  struct {
67    u_int64_t msw;
68    u_int64_t lsw;
69  } parts64;
70} ieee_quad_shape_type;
71
72#endif
73
74#if IEEE_WORD_ORDER == LITTLE_ENDIAN
75
76typedef union
77{
78  long double value;
79  struct {
80    u_int32_t lswlo;
81    u_int32_t lswhi;
82    u_int32_t mswlo;
83    u_int32_t mswhi;
84  } parts32;
85  struct {
86    u_int64_t lsw;
87    u_int64_t msw;
88  } parts64;
89} ieee_quad_shape_type;
90
91#endif
92
93#if IEEE_WORD_ORDER == BIG_ENDIAN
94
95typedef union
96{
97  double value;
98  struct
99  {
100    u_int32_t msw;
101    u_int32_t lsw;
102  } parts;
103  struct
104  {
105    u_int64_t w;
106  } xparts;
107} ieee_double_shape_type;
108
109#endif
110
111#if IEEE_WORD_ORDER == LITTLE_ENDIAN
112
113typedef union
114{
115  double value;
116  struct
117  {
118    u_int32_t lsw;
119    u_int32_t msw;
120  } parts;
121  struct
122  {
123    u_int64_t w;
124  } xparts;
125} ieee_double_shape_type;
126
127#endif
128
129/* Get two 32 bit ints from a double.  */
130
131#define EXTRACT_WORDS(ix0,ix1,d)				\
132do {								\
133  ieee_double_shape_type ew_u;					\
134  ew_u.value = (d);						\
135  (ix0) = ew_u.parts.msw;					\
136  (ix1) = ew_u.parts.lsw;					\
137} while (0)
138
139/* Get a 64-bit int from a double. */
140#define EXTRACT_WORD64(ix,d)					\
141do {								\
142  ieee_double_shape_type ew_u;					\
143  ew_u.value = (d);						\
144  (ix) = ew_u.xparts.w;						\
145} while (0)
146
147
148/* Get the more significant 32 bit int from a double.  */
149
150#define GET_HIGH_WORD(i,d)					\
151do {								\
152  ieee_double_shape_type gh_u;					\
153  gh_u.value = (d);						\
154  (i) = gh_u.parts.msw;						\
155} while (0)
156
157/* Get the less significant 32 bit int from a double.  */
158
159#define GET_LOW_WORD(i,d)					\
160do {								\
161  ieee_double_shape_type gl_u;					\
162  gl_u.value = (d);						\
163  (i) = gl_u.parts.lsw;						\
164} while (0)
165
166/* Set a double from two 32 bit ints.  */
167
168#define INSERT_WORDS(d,ix0,ix1)					\
169do {								\
170  ieee_double_shape_type iw_u;					\
171  iw_u.parts.msw = (ix0);					\
172  iw_u.parts.lsw = (ix1);					\
173  (d) = iw_u.value;						\
174} while (0)
175
176/* Set a double from a 64-bit int. */
177#define INSERT_WORD64(d,ix)					\
178do {								\
179  ieee_double_shape_type iw_u;					\
180  iw_u.xparts.w = (ix);						\
181  (d) = iw_u.value;						\
182} while (0)
183
184
185/* Set the more significant 32 bits of a double from an int.  */
186
187#define SET_HIGH_WORD(d,v)					\
188do {								\
189  ieee_double_shape_type sh_u;					\
190  sh_u.value = (d);						\
191  sh_u.parts.msw = (v);						\
192  (d) = sh_u.value;						\
193} while (0)
194
195/* Set the less significant 32 bits of a double from an int.  */
196
197#define SET_LOW_WORD(d,v)					\
198do {								\
199  ieee_double_shape_type sl_u;					\
200  sl_u.value = (d);						\
201  sl_u.parts.lsw = (v);						\
202  (d) = sl_u.value;						\
203} while (0)
204
205/* A union which permits us to convert between a float and a 32 bit
206   int.  */
207
208typedef union
209{
210  float value;
211  u_int32_t word;
212} ieee_float_shape_type;
213
214/* Get a 32 bit int from a float.  */
215
216#define GET_FLOAT_WORD(i,d)					\
217do {								\
218  ieee_float_shape_type gf_u;					\
219  gf_u.value = (d);						\
220  (i) = gf_u.word;						\
221} while (0)
222
223/* Set a float from a 32 bit int.  */
224
225#define SET_FLOAT_WORD(d,i)					\
226do {								\
227  ieee_float_shape_type sf_u;					\
228  sf_u.word = (i);						\
229  (d) = sf_u.value;						\
230} while (0)
231
232#define GET_EXPSIGN(u)						\
233  (((u)->extu_sign << EXT_EXPBITS) | (u)->extu_exp)
234#define SET_EXPSIGN(u, x)					\
235  (u)->extu_exp = (x),						\
236  (u)->extu_sign = ((x) >> EXT_EXPBITS)
237#define GET_LDBL80_MAN(u)					\
238  (((uint64_t)(u)->extu_frach << EXT_FRACLBITS) | (u)->extu_fracl)
239#define SET_LDBL80_MAN(u, v)					\
240  ((u)->extu_fracl = (v) & ((1ULL << EXT_FRACLBITS) - 1),	\
241  (u)->extu_frach = (v) >> EXT_FRACLBITS)
242
243
244/*
245 * Get expsign and mantissa as 16 bit and 64 bit ints from an 80 bit long
246 * double.
247 */
248
249#define	EXTRACT_LDBL80_WORDS(ix0,ix1,d)				\
250do {								\
251  union ieee_ext_u ew_u;					\
252  ew_u.extu_ld = (d);						\
253  (ix0) = GET_EXPSIGN(&ew_u);					\
254  (ix1) = GET_LDBL80_MAN(&ew_u);				\
255} while (0)
256
257/*
258 * Get expsign and mantissa as one 16 bit and two 64 bit ints from a 128 bit
259 * long double.
260 */
261
262#define	EXTRACT_LDBL128_WORDS(ix0,ix1,ix2,d)			\
263do {								\
264  union ieee_ext_u ew_u;					\
265  ew_u.extu_ld = (d);						\
266  (ix0) = GET_EXPSIGN(&ew_u);					\
267  (ix1) = ew_u.extu_fracl;					\
268  (ix2) = ew_u.extu_frach;					\
269} while (0)
270
271/* Get expsign as a 16 bit int from a long double.  */
272
273#define	GET_LDBL_EXPSIGN(i,d)					\
274do {								\
275  union ieee_ext_u ge_u;					\
276  ge_u.extu_ld = (d);						\
277  (i) = GET_EXPSIGN(&ge_u);					\
278} while (0)
279
280/*
281 * Set an 80 bit long double from a 16 bit int expsign and a 64 bit int
282 * mantissa.
283 */
284
285#define	INSERT_LDBL80_WORDS(d,ix0,ix1)				\
286do {								\
287  union ieee_ext_u iw_u;					\
288  SET_EXPSIGN(&iw_u, ix0);					\
289  SET_LDBL80_MAN(&iw_u, ix1);					\
290  (d) = iw_u.extu_ld;						\
291} while (0)
292
293/*
294 * Set a 128 bit long double from a 16 bit int expsign and two 64 bit ints
295 * comprising the mantissa.
296 */
297
298#define	INSERT_LDBL128_WORDS(d,ix0,ix1,ix2)			\
299do {								\
300  union ieee_ext_u iw_u;					\
301  SET_EXPSIGN(&iw_u, ix0);					\
302  iw_u.extu_frach = (ix1);					\
303  iw_u.extu_fracl = (ix2);					\
304  (d) = iw_u.extu_ld;						\
305} while (0)
306
307/* Set expsign of a long double from a 16 bit int.  */
308
309#define	SET_LDBL_EXPSIGN(d,v)					\
310do {								\
311  union ieee_ext_u se_u;					\
312  se_u.extu_ld = (d);						\
313  SET_EXPSIGN(&se_u, v);						\
314  (d) = se_u.extu_ld;						\
315} while (0)
316
317#ifdef __i386__
318/* Long double constants are broken on i386. */
319#define	LD80C(m, ex, v) {						\
320	.extu_fracl = (uint32_t)(__CONCAT(m, ULL)),			\
321	.extu_frach = __CONCAT(m, ULL) >> EXT_FRACLBITS,		\
322	.extu_exp = (0x3fff + (ex)),					\
323	.extu_sign = ((v) < 0),						\
324}
325#else
326/**XXX: the following comment may no longer be true:  kre 20240122 **/
327/* The above works on non-i386 too, but we use this to check v. */
328#define	LD80C(m, ex, v)	{ .extu_ld = (v), }
329#endif
330
331/*
332 * Attempt to get strict C99 semantics for assignment with non-C99 compilers.
333 */
334#if FLT_EVAL_METHOD == 0 || __GNUC__ == 0
335#define	STRICT_ASSIGN(type, lval, rval)	((lval) = (rval))
336#else
337#define	STRICT_ASSIGN(type, lval, rval) do {	\
338	volatile type __lval;			\
339						\
340	if (sizeof(type) >= sizeof(long double))	\
341		(lval) = (rval);		\
342	else {					\
343		__lval = (rval);		\
344		(lval) = __lval;		\
345	}					\
346} while (0)
347#endif
348
349/* Support switching the mode to FP_PE if necessary. */
350#if defined(__i386__) && !defined(NO_FPSETPREC)
351
352#include <ieeefp.h>
353
354#define	ENTERI() ENTERIT(long double)
355#define	ENTERIT(returntype)			\
356	returntype __retval;			\
357	fp_prec_t __oprec;			\
358						\
359	if ((__oprec = fpgetprec()) != FP_PE)	\
360		fpsetprec(FP_PE)
361#define	RETURNI(x) do {				\
362	__retval = (x);				\
363	if (__oprec != FP_PE)			\
364		fpsetprec(__oprec);		\
365	RETURNF(__retval);			\
366} while (0)
367#define	ENTERV()				\
368	fp_prec_t __oprec;			\
369						\
370	if ((__oprec = fpgetprec()) != FP_PE)	\
371		fpsetprec(FP_PE)
372#define	RETURNV() do {				\
373	if (__oprec != FP_PE)			\
374		fpsetprec(__oprec);		\
375	return;			\
376} while (0)
377#else
378#define	ENTERI()
379#define	ENTERIT(x)
380#define	RETURNI(x)	RETURNF(x)
381#define	ENTERV()
382#define	RETURNV()	return
383#endif
384
385/* Default return statement if hack*_t() is not used. */
386#define      RETURNF(v)      return (v)
387
388/*
389 * 2sum gives the same result as 2sumF without requiring |a| >= |b| or
390 * a == 0, but is slower.
391 */
392#define	_2sum(a, b) do {	\
393	__typeof(a) __s, __w;	\
394				\
395	__w = (a) + (b);	\
396	__s = __w - (a);	\
397	(b) = ((a) - (__w - __s)) + ((b) - __s); \
398	(a) = __w;		\
399} while (0)
400
401/*
402 * 2sumF algorithm.
403 *
404 * "Normalize" the terms in the infinite-precision expression a + b for
405 * the sum of 2 floating point values so that b is as small as possible
406 * relative to 'a'.  (The resulting 'a' is the value of the expression in
407 * the same precision as 'a' and the resulting b is the rounding error.)
408 * |a| must be >= |b| or 0, b's type must be no larger than 'a's type, and
409 * exponent overflow or underflow must not occur.  This uses a Theorem of
410 * Dekker (1971).  See Knuth (1981) 4.2.2 Theorem C.  The name "TwoSum"
411 * is apparently due to Skewchuk (1997).
412 *
413 * For this to always work, assignment of a + b to 'a' must not retain any
414 * extra precision in a + b.  This is required by C standards but broken
415 * in many compilers.  The brokenness cannot be worked around using
416 * STRICT_ASSIGN() like we do elsewhere, since the efficiency of this
417 * algorithm would be destroyed by non-null strict assignments.  (The
418 * compilers are correct to be broken -- the efficiency of all floating
419 * point code calculations would be destroyed similarly if they forced the
420 * conversions.)
421 *
422 * Fortunately, a case that works well can usually be arranged by building
423 * any extra precision into the type of 'a' -- 'a' should have type float_t,
424 * double_t or long double.  b's type should be no larger than 'a's type.
425 * Callers should use these types with scopes as large as possible, to
426 * reduce their own extra-precision and efficiciency problems.  In
427 * particular, they shouldn't convert back and forth just to call here.
428 */
429#ifdef DEBUG
430#define	_2sumF(a, b) do {				\
431	__typeof(a) __w;				\
432	volatile __typeof(a) __ia, __ib, __r, __vw;	\
433							\
434	__ia = (a);					\
435	__ib = (b);					\
436	assert(__ia == 0 || fabsl(__ia) >= fabsl(__ib));	\
437							\
438	__w = (a) + (b);				\
439	(b) = ((a) - __w) + (b);			\
440	(a) = __w;					\
441							\
442	/* The next 2 assertions are weak if (a) is already long double. */ \
443	assert((long double)__ia + __ib == (long double)(a) + (b));	\
444	__vw = __ia + __ib;				\
445	__r = __ia - __vw;				\
446	__r += __ib;					\
447	assert(__vw == (a) && __r == (b));		\
448} while (0)
449#else /* !DEBUG */
450#define	_2sumF(a, b) do {	\
451	__typeof(a) __w;	\
452				\
453	__w = (a) + (b);	\
454	(b) = ((a) - __w) + (b); \
455	(a) = __w;		\
456} while (0)
457#endif /* DEBUG */
458
459/*
460 * Set x += c, where x is represented in extra precision as a + b.
461 * x must be sufficiently normalized and sufficiently larger than c,
462 * and the result is then sufficiently normalized.
463 *
464 * The details of ordering are that |a| must be >= |c| (so that (a, c)
465 * can be normalized without extra work to swap 'a' with c).  The details of
466 * the normalization are that b must be small relative to the normalized 'a'.
467 * Normalization of (a, c) makes the normalized c tiny relative to the
468 * normalized a, so b remains small relative to 'a' in the result.  However,
469 * b need not ever be tiny relative to 'a'.  For example, b might be about
470 * 2**20 times smaller than 'a' to give about 20 extra bits of precision.
471 * That is usually enough, and adding c (which by normalization is about
472 * 2**53 times smaller than a) cannot change b significantly.  However,
473 * cancellation of 'a' with c in normalization of (a, c) may reduce 'a'
474 * significantly relative to b.  The caller must ensure that significant
475 * cancellation doesn't occur, either by having c of the same sign as 'a',
476 * or by having |c| a few percent smaller than |a|.  Pre-normalization of
477 * (a, b) may help.
478 *
479 * This is a variant of an algorithm of Kahan (see Knuth (1981) 4.2.2
480 * exercise 19).  We gain considerable efficiency by requiring the terms to
481 * be sufficiently normalized and sufficiently increasing.
482 */
483#define	_3sumF(a, b, c) do {	\
484	__typeof(a) __tmp;	\
485				\
486	__tmp = (c);		\
487	_2sumF(__tmp, (a));	\
488	(b) += (a);		\
489	(a) = __tmp;		\
490} while (0)
491
492/*
493 * Common routine to process the arguments to nan(), nanf(), and nanl().
494 */
495void _scan_nan(uint32_t *__words, int __num_words, const char *__s);
496
497/*
498 * Mix 0, 1 or 2 NaNs.  First add 0 to each arg.  This normally just turns
499 * signaling NaNs into quiet NaNs by setting a quiet bit.  We do this
500 * because we want to never return a signaling NaN, and also because we
501 * don't want the quiet bit to affect the result.  Then mix the converted
502 * args using the specified operation.
503 *
504 * When one arg is NaN, the result is typically that arg quieted.  When both
505 * args are NaNs, the result is typically the quietening of the arg whose
506 * mantissa is largest after quietening.  When neither arg is NaN, the
507 * result may be NaN because it is indeterminate, or finite for subsequent
508 * construction of a NaN as the indeterminate 0.0L/0.0L.
509 *
510 * Technical complications: the result in bits after rounding to the final
511 * precision might depend on the runtime precision and/or on compiler
512 * optimizations, especially when different register sets are used for
513 * different precisions.  Try to make the result not depend on at least the
514 * runtime precision by always doing the main mixing step in long double
515 * precision.  Try to reduce dependencies on optimizations by adding the
516 * the 0's in different precisions (unless everything is in long double
517 * precision).
518 */
519#define	nan_mix(x, y)		(nan_mix_op((x), (y), +))
520#define	nan_mix_op(x, y, op)	(((x) + 0.0L) op ((y) + 0))
521
522#ifdef	_COMPLEX_H
523
524/*
525 * Quoting from ISO/IEC 9899:TC2:
526 *
527 * 6.2.5.13 Types
528 * Each complex type has the same representation and alignment requirements as
529 * an array type containing exactly two elements of the corresponding real type;
530 * the first element is equal to the real part, and the second element to the
531 * imaginary part, of the complex number.
532 */
533typedef union {
534	float complex z;
535	float parts[2];
536} float_complex;
537
538typedef union {
539	double complex z;
540	double parts[2];
541} double_complex;
542
543typedef union {
544	long double complex z;
545	long double parts[2];
546} long_double_complex;
547
548#define	REAL_PART(z)	((z).parts[0])
549#define	IMAG_PART(z)	((z).parts[1])
550
551/*
552 * Inline functions that can be used to construct complex values.
553 *
554 * The C99 standard intends x+I*y to be used for this, but x+I*y is
555 * currently unusable in general since gcc introduces many overflow,
556 * underflow, sign and efficiency bugs by rewriting I*y as
557 * (0.0+I)*(y+0.0*I) and laboriously computing the full complex product.
558 * In particular, I*Inf is corrupted to NaN+I*Inf, and I*-0 is corrupted
559 * to -0.0+I*0.0.
560 *
561 * The C11 standard introduced the macros CMPLX(), CMPLXF() and CMPLXL()
562 * to construct complex values.  Compilers that conform to the C99
563 * standard require the following functions to avoid the above issues.
564 */
565
566#ifndef CMPLXF
567static __inline float complex
568CMPLXF(float x, float y)
569{
570	float_complex z;
571
572	REAL_PART(z) = x;
573	IMAG_PART(z) = y;
574	return (z.z);
575}
576#endif
577
578#ifndef CMPLX
579static __inline double complex
580CMPLX(double x, double y)
581{
582	double_complex z;
583
584	REAL_PART(z) = x;
585	IMAG_PART(z) = y;
586	return (z.z);
587}
588#endif
589
590#ifndef CMPLXL
591static __inline long double complex
592CMPLXL(long double x, long double y)
593{
594	long_double_complex z;
595
596	REAL_PART(z) = x;
597	IMAG_PART(z) = y;
598	return (z.z);
599}
600#endif
601
602#endif	/* _COMPLEX_H */
603
604/* ieee style elementary functions */
605extern double __ieee754_sqrt __P((double));
606extern double __ieee754_acos __P((double));
607extern double __ieee754_acosh __P((double));
608extern double __ieee754_log __P((double));
609extern double __ieee754_atanh __P((double));
610extern double __ieee754_asin __P((double));
611extern double __ieee754_atan2 __P((double,double));
612extern double __ieee754_exp __P((double));
613extern double __ieee754_cosh __P((double));
614extern double __ieee754_fmod __P((double,double));
615extern double __ieee754_pow __P((double,double));
616extern double __ieee754_lgamma_r __P((double,int *));
617extern double __ieee754_gamma_r __P((double,int *));
618extern double __ieee754_lgamma __P((double));
619extern double __ieee754_gamma __P((double));
620extern double __ieee754_log10 __P((double));
621extern double __ieee754_log2 __P((double));
622extern double __ieee754_sinh __P((double));
623extern double __ieee754_hypot __P((double,double));
624extern double __ieee754_j0 __P((double));
625extern double __ieee754_j1 __P((double));
626extern double __ieee754_y0 __P((double));
627extern double __ieee754_y1 __P((double));
628extern double __ieee754_jn __P((int,double));
629extern double __ieee754_yn __P((int,double));
630extern double __ieee754_remainder __P((double,double));
631extern int32_t __ieee754_rem_pio2 __P((double,double*));
632extern double __ieee754_scalb __P((double,double));
633
634/* fdlibm kernel function */
635extern double __kernel_standard __P((double,double,int));
636extern double __kernel_sin __P((double,double,int));
637extern double __kernel_cos __P((double,double));
638extern double __kernel_tan __P((double,double,int));
639extern int    __kernel_rem_pio2 __P((double*,double*,int,int,int));
640
641
642/* ieee style elementary float functions */
643extern float __ieee754_sqrtf __P((float));
644extern float __ieee754_acosf __P((float));
645extern float __ieee754_acoshf __P((float));
646extern float __ieee754_logf __P((float));
647extern float __ieee754_atanhf __P((float));
648extern float __ieee754_asinf __P((float));
649extern float __ieee754_atan2f __P((float,float));
650extern float __ieee754_expf __P((float));
651extern float __ieee754_coshf __P((float));
652extern float __ieee754_fmodf __P((float,float));
653extern float __ieee754_powf __P((float,float));
654extern float __ieee754_lgammaf_r __P((float,int *));
655extern float __ieee754_gammaf_r __P((float,int *));
656extern float __ieee754_lgammaf __P((float));
657extern float __ieee754_gammaf __P((float));
658extern float __ieee754_log10f __P((float));
659extern float __ieee754_log2f __P((float));
660extern float __ieee754_sinhf __P((float));
661extern float __ieee754_hypotf __P((float,float));
662extern float __ieee754_j0f __P((float));
663extern float __ieee754_j1f __P((float));
664extern float __ieee754_y0f __P((float));
665extern float __ieee754_y1f __P((float));
666extern float __ieee754_jnf __P((int,float));
667extern float __ieee754_ynf __P((int,float));
668extern float __ieee754_remainderf __P((float,float));
669extern int32_t __ieee754_rem_pio2f __P((float,float*));
670extern float __ieee754_scalbf __P((float,float));
671
672/* float versions of fdlibm kernel functions */
673extern float __kernel_sinf __P((float,float,int));
674extern float __kernel_cosf __P((float,float));
675extern float __kernel_tanf __P((float,float,int));
676extern int   __kernel_rem_pio2f __P((float*,float*,int,int,int,const int32_t*));
677
678/* ieee style elementary long double functions */
679extern long double __ieee754_fmodl(long double, long double);
680extern long double __ieee754_sqrtl(long double);
681
682/*
683 * TRUNC() is a macro that sets the trailing 27 bits in the mantissa of an
684 * IEEE double variable to zero.  It must be expression-like for syntactic
685 * reasons, and we implement this expression using an inline function
686 * instead of a pure macro to avoid depending on the gcc feature of
687 * statement-expressions.
688 */
689#define	TRUNC(d)	(_b_trunc(&(d)))
690
691static __inline void
692_b_trunc(volatile double *_dp)
693{
694	uint32_t _lw;
695
696	GET_LOW_WORD(_lw, *_dp);
697	SET_LOW_WORD(*_dp, _lw & 0xf8000000);
698}
699
700struct Double {
701	double	a;
702	double	b;
703};
704
705/*
706 * Functions internal to the math package, yet not static.
707 */
708double	__exp__D(double, double);
709struct Double __log__D(double);
710
711/*
712 * The rnint() family rounds to the nearest integer for a restricted range
713 * range of args (up to about 2**MANT_DIG).  We assume that the current
714 * rounding mode is FE_TONEAREST so that this can be done efficiently.
715 * Extra precision causes more problems in practice, and we only centralize
716 * this here to reduce those problems, and have not solved the efficiency
717 * problems.  The exp2() family uses a more delicate version of this that
718 * requires extracting bits from the intermediate value, so it is not
719 * centralized here and should copy any solution of the efficiency problems.
720 */
721
722static inline double
723rnint(double x)
724{
725	/*
726	 * This casts to double to kill any extra precision.  This depends
727	 * on the cast being applied to a double_t to avoid compiler bugs
728	 * (this is a cleaner version of STRICT_ASSIGN()).  This is
729	 * inefficient if there actually is extra precision, but is hard
730	 * to improve on.  We use double_t in the API to minimise conversions
731	 * for just calling here.  Note that we cannot easily change the
732	 * magic number to the one that works directly with double_t, since
733	 * the rounding precision is variable at runtime on x86 so the
734	 * magic number would need to be variable.  Assuming that the
735	 * rounding precision is always the default is too fragile.  This
736	 * and many other complications will move when the default is
737	 * changed to FP_PE.
738	 */
739	return ((double)(x + 0x1.8p52) - 0x1.8p52);
740}
741
742static inline float
743rnintf(float x)
744{
745	/*
746	 * As for rnint(), except we could just call that to handle the
747	 * extra precision case, usually without losing efficiency.
748	 */
749	return ((float)(x + 0x1.8p23F) - 0x1.8p23F);
750}
751
752#ifdef LDBL_MANT_DIG
753/*
754 * The complications for extra precision are smaller for rnintl() since it
755 * can safely assume that the rounding precision has been increased from
756 * its default to FP_PE on x86.  We don't exploit that here to get small
757 * optimizations from limiting the range to double.  We just need it for
758 * the magic number to work with long doubles.  ld128 callers should use
759 * rnint() instead of this if possible.  ld80 callers should prefer
760 * rnintl() since for amd64 this avoids swapping the register set, while
761 * for i386 it makes no difference (assuming FP_PE), and for other arches
762 * it makes little difference.
763 */
764static inline long double
765rnintl(long double x)
766{
767	return (x + ___CONCAT(0x1.8p,LDBL_MANT_DIG) / 2 -
768	    ___CONCAT(0x1.8p,LDBL_MANT_DIG) / 2);
769}
770#endif /* LDBL_MANT_DIG */
771
772/*
773 * irint() and i64rint() give the same result as casting to their integer
774 * return type provided their arg is a floating point integer.  They can
775 * sometimes be more efficient because no rounding is required.
776 */
777#if (defined(amd64) || defined(__i386__)) && defined(__GNUCLIKE_ASM)
778#define	irint(x)						\
779    (sizeof(x) == sizeof(float) &&				\
780    sizeof(__float_t) == sizeof(long double) ? irintf(x) :	\
781    sizeof(x) == sizeof(double) &&				\
782    sizeof(__double_t) == sizeof(long double) ? irintd(x) :	\
783    sizeof(x) == sizeof(long double) ? irintl(x) : (int)(x))
784#else
785#define	irint(x)	((int)(x))
786#endif
787
788#define	i64rint(x)	((int64_t)(x))	/* only needed for ld128 so not opt. */
789
790#if defined(__i386__) && defined(__GNUCLIKE_ASM)
791static __inline int
792irintf(float x)
793{
794	int n;
795
796	__asm("fistl %0" : "=m" (n) : "t" (x));
797	return (n);
798}
799
800static __inline int
801irintd(double x)
802{
803	int n;
804
805	__asm("fistl %0" : "=m" (n) : "t" (x));
806	return (n);
807}
808#endif
809
810#if (defined(__amd64__) || defined(__i386__)) && defined(__GNUCLIKE_ASM)
811static __inline int
812irintl(long double x)
813{
814	int n;
815
816	__asm("fistl %0" : "=m" (n) : "t" (x));
817	return (n);
818}
819#endif
820
821/*
822 * The following are fast floor macros for 0 <= |x| < 0x1p(N-1), where
823 * N is the precision of the type of x. These macros are used in the
824 * half-cycle trignometric functions (e.g., sinpi(x)).
825 */
826#define	FFLOORF(x, j0, ix) do {			\
827	(j0) = (((ix) >> 23) & 0xff) - 0x7f;	\
828	(ix) &= ~(0x007fffff >> (j0));		\
829	SET_FLOAT_WORD((x), (ix));		\
830} while (0)
831
832#define	FFLOOR(x, j0, ix, lx) do {				\
833	(j0) = (((ix) >> 20) & 0x7ff) - 0x3ff;			\
834	if ((j0) < 20) {					\
835		(ix) &= ~(0x000fffff >> (j0));			\
836		(lx) = 0;					\
837	} else {						\
838		(lx) &= ~((uint32_t)0xffffffff >> ((j0) - 20));	\
839	}							\
840	INSERT_WORDS((x), (ix), (lx));				\
841} while (0)
842
843#define	FFLOORL80(x, j0, ix, lx) do {			\
844	j0 = ix - 0x3fff + 1;				\
845	if ((j0) < 32) {				\
846		(lx) = ((lx) >> 32) << 32;		\
847		(lx) &= ~((((lx) << 32)-1) >> (j0));	\
848	} else {					\
849		uint64_t _m;				\
850		_m = (uint64_t)-1 >> (j0);		\
851		if ((lx) & _m) (lx) &= ~_m;		\
852	}						\
853	INSERT_LDBL80_WORDS((x), (ix), (lx));		\
854} while (0)
855
856#define FFLOORL128(x, ai, ar) do {			\
857	union ieee_ext_u u;				\
858	uint64_t m;					\
859	int e;						\
860	u.extu_ld = (x);					\
861	e = u.extu_exp - 16383;				\
862	if (e < 48) {					\
863		m = ((1llu << 49) - 1) >> (e + 1);	\
864		u.extu_frach &= ~m;			\
865		u.extu_fracl = 0;			\
866	} else {					\
867		m = (uint64_t)-1 >> (e - 48);		\
868		u.extu_fracl &= ~m;			\
869	}						\
870	(ai) = u.extu_ld;					\
871	(ar) = (x) - (ai);				\
872} while (0)
873
874#ifdef DEBUG
875#if defined(__amd64__) || defined(__i386__)
876#define breakpoint()    asm("int $3")
877#else
878#include <signal.h>
879
880#define breakpoint()    raise(SIGTRAP)
881#endif
882#endif
883
884#ifdef STRUCT_RETURN
885#define	RETURNSP(rp) do {		\
886	if (!(rp)->lo_set)		\
887		RETURNF((rp)->hi);	\
888	RETURNF((rp)->hi + (rp)->lo);	\
889} while (0)
890#define	RETURNSPI(rp) do {		\
891	if (!(rp)->lo_set)		\
892		RETURNI((rp)->hi);	\
893	RETURNI((rp)->hi + (rp)->lo);	\
894} while (0)
895#endif
896
897#define	SUM2P(x, y) ({			\
898	const __typeof (x) __x = (x);	\
899	const __typeof (y) __y = (y);	\
900	__x + __y;			\
901})
902
903#ifndef INLINE_KERNEL_SINDF
904float   __kernel_sindf(double);
905#endif
906#ifndef INLINE_KERNEL_COSDF
907float   __kernel_cosdf(double);
908#endif
909#ifndef INLINE_KERNEL_TANDF
910float   __kernel_tandf(double,int);
911#endif
912
913/* long double precision kernel functions */
914long double __kernel_sinl(long double, long double, int);
915long double __kernel_cosl(long double, long double);
916long double __kernel_tanl(long double, long double, int);
917
918#endif /* _MATH_PRIVATE_H_ */
919