1/* 2 * ==================================================== 3 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. 4 * 5 * Developed at SunPro, a Sun Microsystems, Inc. business. 6 * Permission to use, copy, modify, and distribute this 7 * software is freely granted, provided that this notice 8 * is preserved. 9 * ==================================================== 10 */ 11 12/* 13 * from: @(#)fdlibm.h 5.1 93/09/24 14 * $NetBSD: math_private.h,v 1.32 2024/04/03 04:40:23 kre Exp $ 15 */ 16 17#ifndef _MATH_PRIVATE_H_ 18#define _MATH_PRIVATE_H_ 19 20#include <assert.h> 21#include <sys/types.h> 22 23/* The original fdlibm code used statements like: 24 n0 = ((*(int*)&one)>>29)^1; * index of high word * 25 ix0 = *(n0+(int*)&x); * high word of x * 26 ix1 = *((1-n0)+(int*)&x); * low word of x * 27 to dig two 32 bit words out of the 64 bit IEEE floating point 28 value. That is non-ANSI, and, moreover, the gcc instruction 29 scheduler gets it wrong. We instead use the following macros. 30 Unlike the original code, we determine the endianness at compile 31 time, not at run time; I don't see much benefit to selecting 32 endianness at run time. */ 33 34/* A union which permits us to convert between a double and two 32 bit 35 ints. */ 36 37/* 38 * A union which permits us to convert between a double and two 32 bit 39 * ints. 40 */ 41 42#ifdef __arm__ 43#if defined(__VFP_FP__) || defined(__ARM_EABI__) 44#define IEEE_WORD_ORDER BYTE_ORDER 45#else 46#define IEEE_WORD_ORDER BIG_ENDIAN 47#endif 48#else /* __arm__ */ 49#define IEEE_WORD_ORDER BYTE_ORDER 50#endif 51 52/* A union which permits us to convert between a long double and 53 four 32 bit ints. */ 54 55#if IEEE_WORD_ORDER == BIG_ENDIAN 56 57typedef union 58{ 59 long double value; 60 struct { 61 u_int32_t mswhi; 62 u_int32_t mswlo; 63 u_int32_t lswhi; 64 u_int32_t lswlo; 65 } parts32; 66 struct { 67 u_int64_t msw; 68 u_int64_t lsw; 69 } parts64; 70} ieee_quad_shape_type; 71 72#endif 73 74#if IEEE_WORD_ORDER == LITTLE_ENDIAN 75 76typedef union 77{ 78 long double value; 79 struct { 80 u_int32_t lswlo; 81 u_int32_t lswhi; 82 u_int32_t mswlo; 83 u_int32_t mswhi; 84 } parts32; 85 struct { 86 u_int64_t lsw; 87 u_int64_t msw; 88 } parts64; 89} ieee_quad_shape_type; 90 91#endif 92 93#if IEEE_WORD_ORDER == BIG_ENDIAN 94 95typedef union 96{ 97 double value; 98 struct 99 { 100 u_int32_t msw; 101 u_int32_t lsw; 102 } parts; 103 struct 104 { 105 u_int64_t w; 106 } xparts; 107} ieee_double_shape_type; 108 109#endif 110 111#if IEEE_WORD_ORDER == LITTLE_ENDIAN 112 113typedef union 114{ 115 double value; 116 struct 117 { 118 u_int32_t lsw; 119 u_int32_t msw; 120 } parts; 121 struct 122 { 123 u_int64_t w; 124 } xparts; 125} ieee_double_shape_type; 126 127#endif 128 129/* Get two 32 bit ints from a double. */ 130 131#define EXTRACT_WORDS(ix0,ix1,d) \ 132do { \ 133 ieee_double_shape_type ew_u; \ 134 ew_u.value = (d); \ 135 (ix0) = ew_u.parts.msw; \ 136 (ix1) = ew_u.parts.lsw; \ 137} while (0) 138 139/* Get a 64-bit int from a double. */ 140#define EXTRACT_WORD64(ix,d) \ 141do { \ 142 ieee_double_shape_type ew_u; \ 143 ew_u.value = (d); \ 144 (ix) = ew_u.xparts.w; \ 145} while (0) 146 147 148/* Get the more significant 32 bit int from a double. */ 149 150#define GET_HIGH_WORD(i,d) \ 151do { \ 152 ieee_double_shape_type gh_u; \ 153 gh_u.value = (d); \ 154 (i) = gh_u.parts.msw; \ 155} while (0) 156 157/* Get the less significant 32 bit int from a double. */ 158 159#define GET_LOW_WORD(i,d) \ 160do { \ 161 ieee_double_shape_type gl_u; \ 162 gl_u.value = (d); \ 163 (i) = gl_u.parts.lsw; \ 164} while (0) 165 166/* Set a double from two 32 bit ints. */ 167 168#define INSERT_WORDS(d,ix0,ix1) \ 169do { \ 170 ieee_double_shape_type iw_u; \ 171 iw_u.parts.msw = (ix0); \ 172 iw_u.parts.lsw = (ix1); \ 173 (d) = iw_u.value; \ 174} while (0) 175 176/* Set a double from a 64-bit int. */ 177#define INSERT_WORD64(d,ix) \ 178do { \ 179 ieee_double_shape_type iw_u; \ 180 iw_u.xparts.w = (ix); \ 181 (d) = iw_u.value; \ 182} while (0) 183 184 185/* Set the more significant 32 bits of a double from an int. */ 186 187#define SET_HIGH_WORD(d,v) \ 188do { \ 189 ieee_double_shape_type sh_u; \ 190 sh_u.value = (d); \ 191 sh_u.parts.msw = (v); \ 192 (d) = sh_u.value; \ 193} while (0) 194 195/* Set the less significant 32 bits of a double from an int. */ 196 197#define SET_LOW_WORD(d,v) \ 198do { \ 199 ieee_double_shape_type sl_u; \ 200 sl_u.value = (d); \ 201 sl_u.parts.lsw = (v); \ 202 (d) = sl_u.value; \ 203} while (0) 204 205/* A union which permits us to convert between a float and a 32 bit 206 int. */ 207 208typedef union 209{ 210 float value; 211 u_int32_t word; 212} ieee_float_shape_type; 213 214/* Get a 32 bit int from a float. */ 215 216#define GET_FLOAT_WORD(i,d) \ 217do { \ 218 ieee_float_shape_type gf_u; \ 219 gf_u.value = (d); \ 220 (i) = gf_u.word; \ 221} while (0) 222 223/* Set a float from a 32 bit int. */ 224 225#define SET_FLOAT_WORD(d,i) \ 226do { \ 227 ieee_float_shape_type sf_u; \ 228 sf_u.word = (i); \ 229 (d) = sf_u.value; \ 230} while (0) 231 232#define GET_EXPSIGN(u) \ 233 (((u)->extu_sign << EXT_EXPBITS) | (u)->extu_exp) 234#define SET_EXPSIGN(u, x) \ 235 (u)->extu_exp = (x), \ 236 (u)->extu_sign = ((x) >> EXT_EXPBITS) 237#define GET_LDBL80_MAN(u) \ 238 (((uint64_t)(u)->extu_frach << EXT_FRACLBITS) | (u)->extu_fracl) 239#define SET_LDBL80_MAN(u, v) \ 240 ((u)->extu_fracl = (v) & ((1ULL << EXT_FRACLBITS) - 1), \ 241 (u)->extu_frach = (v) >> EXT_FRACLBITS) 242 243 244/* 245 * Get expsign and mantissa as 16 bit and 64 bit ints from an 80 bit long 246 * double. 247 */ 248 249#define EXTRACT_LDBL80_WORDS(ix0,ix1,d) \ 250do { \ 251 union ieee_ext_u ew_u; \ 252 ew_u.extu_ld = (d); \ 253 (ix0) = GET_EXPSIGN(&ew_u); \ 254 (ix1) = GET_LDBL80_MAN(&ew_u); \ 255} while (0) 256 257/* 258 * Get expsign and mantissa as one 16 bit and two 64 bit ints from a 128 bit 259 * long double. 260 */ 261 262#define EXTRACT_LDBL128_WORDS(ix0,ix1,ix2,d) \ 263do { \ 264 union ieee_ext_u ew_u; \ 265 ew_u.extu_ld = (d); \ 266 (ix0) = GET_EXPSIGN(&ew_u); \ 267 (ix1) = ew_u.extu_fracl; \ 268 (ix2) = ew_u.extu_frach; \ 269} while (0) 270 271/* Get expsign as a 16 bit int from a long double. */ 272 273#define GET_LDBL_EXPSIGN(i,d) \ 274do { \ 275 union ieee_ext_u ge_u; \ 276 ge_u.extu_ld = (d); \ 277 (i) = GET_EXPSIGN(&ge_u); \ 278} while (0) 279 280/* 281 * Set an 80 bit long double from a 16 bit int expsign and a 64 bit int 282 * mantissa. 283 */ 284 285#define INSERT_LDBL80_WORDS(d,ix0,ix1) \ 286do { \ 287 union ieee_ext_u iw_u; \ 288 SET_EXPSIGN(&iw_u, ix0); \ 289 SET_LDBL80_MAN(&iw_u, ix1); \ 290 (d) = iw_u.extu_ld; \ 291} while (0) 292 293/* 294 * Set a 128 bit long double from a 16 bit int expsign and two 64 bit ints 295 * comprising the mantissa. 296 */ 297 298#define INSERT_LDBL128_WORDS(d,ix0,ix1,ix2) \ 299do { \ 300 union ieee_ext_u iw_u; \ 301 SET_EXPSIGN(&iw_u, ix0); \ 302 iw_u.extu_frach = (ix1); \ 303 iw_u.extu_fracl = (ix2); \ 304 (d) = iw_u.extu_ld; \ 305} while (0) 306 307/* Set expsign of a long double from a 16 bit int. */ 308 309#define SET_LDBL_EXPSIGN(d,v) \ 310do { \ 311 union ieee_ext_u se_u; \ 312 se_u.extu_ld = (d); \ 313 SET_EXPSIGN(&se_u, v); \ 314 (d) = se_u.extu_ld; \ 315} while (0) 316 317#ifdef __i386__ 318/* Long double constants are broken on i386. */ 319#define LD80C(m, ex, v) { \ 320 .extu_fracl = (uint32_t)(__CONCAT(m, ULL)), \ 321 .extu_frach = __CONCAT(m, ULL) >> EXT_FRACLBITS, \ 322 .extu_exp = (0x3fff + (ex)), \ 323 .extu_sign = ((v) < 0), \ 324} 325#else 326/**XXX: the following comment may no longer be true: kre 20240122 **/ 327/* The above works on non-i386 too, but we use this to check v. */ 328#define LD80C(m, ex, v) { .extu_ld = (v), } 329#endif 330 331/* 332 * Attempt to get strict C99 semantics for assignment with non-C99 compilers. 333 */ 334#if FLT_EVAL_METHOD == 0 || __GNUC__ == 0 335#define STRICT_ASSIGN(type, lval, rval) ((lval) = (rval)) 336#else 337#define STRICT_ASSIGN(type, lval, rval) do { \ 338 volatile type __lval; \ 339 \ 340 if (sizeof(type) >= sizeof(long double)) \ 341 (lval) = (rval); \ 342 else { \ 343 __lval = (rval); \ 344 (lval) = __lval; \ 345 } \ 346} while (0) 347#endif 348 349/* Support switching the mode to FP_PE if necessary. */ 350#if defined(__i386__) && !defined(NO_FPSETPREC) 351 352#include <ieeefp.h> 353 354#define ENTERI() ENTERIT(long double) 355#define ENTERIT(returntype) \ 356 returntype __retval; \ 357 fp_prec_t __oprec; \ 358 \ 359 if ((__oprec = fpgetprec()) != FP_PE) \ 360 fpsetprec(FP_PE) 361#define RETURNI(x) do { \ 362 __retval = (x); \ 363 if (__oprec != FP_PE) \ 364 fpsetprec(__oprec); \ 365 RETURNF(__retval); \ 366} while (0) 367#define ENTERV() \ 368 fp_prec_t __oprec; \ 369 \ 370 if ((__oprec = fpgetprec()) != FP_PE) \ 371 fpsetprec(FP_PE) 372#define RETURNV() do { \ 373 if (__oprec != FP_PE) \ 374 fpsetprec(__oprec); \ 375 return; \ 376} while (0) 377#else 378#define ENTERI() 379#define ENTERIT(x) 380#define RETURNI(x) RETURNF(x) 381#define ENTERV() 382#define RETURNV() return 383#endif 384 385/* Default return statement if hack*_t() is not used. */ 386#define RETURNF(v) return (v) 387 388/* 389 * 2sum gives the same result as 2sumF without requiring |a| >= |b| or 390 * a == 0, but is slower. 391 */ 392#define _2sum(a, b) do { \ 393 __typeof(a) __s, __w; \ 394 \ 395 __w = (a) + (b); \ 396 __s = __w - (a); \ 397 (b) = ((a) - (__w - __s)) + ((b) - __s); \ 398 (a) = __w; \ 399} while (0) 400 401/* 402 * 2sumF algorithm. 403 * 404 * "Normalize" the terms in the infinite-precision expression a + b for 405 * the sum of 2 floating point values so that b is as small as possible 406 * relative to 'a'. (The resulting 'a' is the value of the expression in 407 * the same precision as 'a' and the resulting b is the rounding error.) 408 * |a| must be >= |b| or 0, b's type must be no larger than 'a's type, and 409 * exponent overflow or underflow must not occur. This uses a Theorem of 410 * Dekker (1971). See Knuth (1981) 4.2.2 Theorem C. The name "TwoSum" 411 * is apparently due to Skewchuk (1997). 412 * 413 * For this to always work, assignment of a + b to 'a' must not retain any 414 * extra precision in a + b. This is required by C standards but broken 415 * in many compilers. The brokenness cannot be worked around using 416 * STRICT_ASSIGN() like we do elsewhere, since the efficiency of this 417 * algorithm would be destroyed by non-null strict assignments. (The 418 * compilers are correct to be broken -- the efficiency of all floating 419 * point code calculations would be destroyed similarly if they forced the 420 * conversions.) 421 * 422 * Fortunately, a case that works well can usually be arranged by building 423 * any extra precision into the type of 'a' -- 'a' should have type float_t, 424 * double_t or long double. b's type should be no larger than 'a's type. 425 * Callers should use these types with scopes as large as possible, to 426 * reduce their own extra-precision and efficiciency problems. In 427 * particular, they shouldn't convert back and forth just to call here. 428 */ 429#ifdef DEBUG 430#define _2sumF(a, b) do { \ 431 __typeof(a) __w; \ 432 volatile __typeof(a) __ia, __ib, __r, __vw; \ 433 \ 434 __ia = (a); \ 435 __ib = (b); \ 436 assert(__ia == 0 || fabsl(__ia) >= fabsl(__ib)); \ 437 \ 438 __w = (a) + (b); \ 439 (b) = ((a) - __w) + (b); \ 440 (a) = __w; \ 441 \ 442 /* The next 2 assertions are weak if (a) is already long double. */ \ 443 assert((long double)__ia + __ib == (long double)(a) + (b)); \ 444 __vw = __ia + __ib; \ 445 __r = __ia - __vw; \ 446 __r += __ib; \ 447 assert(__vw == (a) && __r == (b)); \ 448} while (0) 449#else /* !DEBUG */ 450#define _2sumF(a, b) do { \ 451 __typeof(a) __w; \ 452 \ 453 __w = (a) + (b); \ 454 (b) = ((a) - __w) + (b); \ 455 (a) = __w; \ 456} while (0) 457#endif /* DEBUG */ 458 459/* 460 * Set x += c, where x is represented in extra precision as a + b. 461 * x must be sufficiently normalized and sufficiently larger than c, 462 * and the result is then sufficiently normalized. 463 * 464 * The details of ordering are that |a| must be >= |c| (so that (a, c) 465 * can be normalized without extra work to swap 'a' with c). The details of 466 * the normalization are that b must be small relative to the normalized 'a'. 467 * Normalization of (a, c) makes the normalized c tiny relative to the 468 * normalized a, so b remains small relative to 'a' in the result. However, 469 * b need not ever be tiny relative to 'a'. For example, b might be about 470 * 2**20 times smaller than 'a' to give about 20 extra bits of precision. 471 * That is usually enough, and adding c (which by normalization is about 472 * 2**53 times smaller than a) cannot change b significantly. However, 473 * cancellation of 'a' with c in normalization of (a, c) may reduce 'a' 474 * significantly relative to b. The caller must ensure that significant 475 * cancellation doesn't occur, either by having c of the same sign as 'a', 476 * or by having |c| a few percent smaller than |a|. Pre-normalization of 477 * (a, b) may help. 478 * 479 * This is a variant of an algorithm of Kahan (see Knuth (1981) 4.2.2 480 * exercise 19). We gain considerable efficiency by requiring the terms to 481 * be sufficiently normalized and sufficiently increasing. 482 */ 483#define _3sumF(a, b, c) do { \ 484 __typeof(a) __tmp; \ 485 \ 486 __tmp = (c); \ 487 _2sumF(__tmp, (a)); \ 488 (b) += (a); \ 489 (a) = __tmp; \ 490} while (0) 491 492/* 493 * Common routine to process the arguments to nan(), nanf(), and nanl(). 494 */ 495void _scan_nan(uint32_t *__words, int __num_words, const char *__s); 496 497/* 498 * Mix 0, 1 or 2 NaNs. First add 0 to each arg. This normally just turns 499 * signaling NaNs into quiet NaNs by setting a quiet bit. We do this 500 * because we want to never return a signaling NaN, and also because we 501 * don't want the quiet bit to affect the result. Then mix the converted 502 * args using the specified operation. 503 * 504 * When one arg is NaN, the result is typically that arg quieted. When both 505 * args are NaNs, the result is typically the quietening of the arg whose 506 * mantissa is largest after quietening. When neither arg is NaN, the 507 * result may be NaN because it is indeterminate, or finite for subsequent 508 * construction of a NaN as the indeterminate 0.0L/0.0L. 509 * 510 * Technical complications: the result in bits after rounding to the final 511 * precision might depend on the runtime precision and/or on compiler 512 * optimizations, especially when different register sets are used for 513 * different precisions. Try to make the result not depend on at least the 514 * runtime precision by always doing the main mixing step in long double 515 * precision. Try to reduce dependencies on optimizations by adding the 516 * the 0's in different precisions (unless everything is in long double 517 * precision). 518 */ 519#define nan_mix(x, y) (nan_mix_op((x), (y), +)) 520#define nan_mix_op(x, y, op) (((x) + 0.0L) op ((y) + 0)) 521 522#ifdef _COMPLEX_H 523 524/* 525 * Quoting from ISO/IEC 9899:TC2: 526 * 527 * 6.2.5.13 Types 528 * Each complex type has the same representation and alignment requirements as 529 * an array type containing exactly two elements of the corresponding real type; 530 * the first element is equal to the real part, and the second element to the 531 * imaginary part, of the complex number. 532 */ 533typedef union { 534 float complex z; 535 float parts[2]; 536} float_complex; 537 538typedef union { 539 double complex z; 540 double parts[2]; 541} double_complex; 542 543typedef union { 544 long double complex z; 545 long double parts[2]; 546} long_double_complex; 547 548#define REAL_PART(z) ((z).parts[0]) 549#define IMAG_PART(z) ((z).parts[1]) 550 551/* 552 * Inline functions that can be used to construct complex values. 553 * 554 * The C99 standard intends x+I*y to be used for this, but x+I*y is 555 * currently unusable in general since gcc introduces many overflow, 556 * underflow, sign and efficiency bugs by rewriting I*y as 557 * (0.0+I)*(y+0.0*I) and laboriously computing the full complex product. 558 * In particular, I*Inf is corrupted to NaN+I*Inf, and I*-0 is corrupted 559 * to -0.0+I*0.0. 560 * 561 * The C11 standard introduced the macros CMPLX(), CMPLXF() and CMPLXL() 562 * to construct complex values. Compilers that conform to the C99 563 * standard require the following functions to avoid the above issues. 564 */ 565 566#ifndef CMPLXF 567static __inline float complex 568CMPLXF(float x, float y) 569{ 570 float_complex z; 571 572 REAL_PART(z) = x; 573 IMAG_PART(z) = y; 574 return (z.z); 575} 576#endif 577 578#ifndef CMPLX 579static __inline double complex 580CMPLX(double x, double y) 581{ 582 double_complex z; 583 584 REAL_PART(z) = x; 585 IMAG_PART(z) = y; 586 return (z.z); 587} 588#endif 589 590#ifndef CMPLXL 591static __inline long double complex 592CMPLXL(long double x, long double y) 593{ 594 long_double_complex z; 595 596 REAL_PART(z) = x; 597 IMAG_PART(z) = y; 598 return (z.z); 599} 600#endif 601 602#endif /* _COMPLEX_H */ 603 604/* ieee style elementary functions */ 605extern double __ieee754_sqrt __P((double)); 606extern double __ieee754_acos __P((double)); 607extern double __ieee754_acosh __P((double)); 608extern double __ieee754_log __P((double)); 609extern double __ieee754_atanh __P((double)); 610extern double __ieee754_asin __P((double)); 611extern double __ieee754_atan2 __P((double,double)); 612extern double __ieee754_exp __P((double)); 613extern double __ieee754_cosh __P((double)); 614extern double __ieee754_fmod __P((double,double)); 615extern double __ieee754_pow __P((double,double)); 616extern double __ieee754_lgamma_r __P((double,int *)); 617extern double __ieee754_gamma_r __P((double,int *)); 618extern double __ieee754_lgamma __P((double)); 619extern double __ieee754_gamma __P((double)); 620extern double __ieee754_log10 __P((double)); 621extern double __ieee754_log2 __P((double)); 622extern double __ieee754_sinh __P((double)); 623extern double __ieee754_hypot __P((double,double)); 624extern double __ieee754_j0 __P((double)); 625extern double __ieee754_j1 __P((double)); 626extern double __ieee754_y0 __P((double)); 627extern double __ieee754_y1 __P((double)); 628extern double __ieee754_jn __P((int,double)); 629extern double __ieee754_yn __P((int,double)); 630extern double __ieee754_remainder __P((double,double)); 631extern int32_t __ieee754_rem_pio2 __P((double,double*)); 632extern double __ieee754_scalb __P((double,double)); 633 634/* fdlibm kernel function */ 635extern double __kernel_standard __P((double,double,int)); 636extern double __kernel_sin __P((double,double,int)); 637extern double __kernel_cos __P((double,double)); 638extern double __kernel_tan __P((double,double,int)); 639extern int __kernel_rem_pio2 __P((double*,double*,int,int,int)); 640 641 642/* ieee style elementary float functions */ 643extern float __ieee754_sqrtf __P((float)); 644extern float __ieee754_acosf __P((float)); 645extern float __ieee754_acoshf __P((float)); 646extern float __ieee754_logf __P((float)); 647extern float __ieee754_atanhf __P((float)); 648extern float __ieee754_asinf __P((float)); 649extern float __ieee754_atan2f __P((float,float)); 650extern float __ieee754_expf __P((float)); 651extern float __ieee754_coshf __P((float)); 652extern float __ieee754_fmodf __P((float,float)); 653extern float __ieee754_powf __P((float,float)); 654extern float __ieee754_lgammaf_r __P((float,int *)); 655extern float __ieee754_gammaf_r __P((float,int *)); 656extern float __ieee754_lgammaf __P((float)); 657extern float __ieee754_gammaf __P((float)); 658extern float __ieee754_log10f __P((float)); 659extern float __ieee754_log2f __P((float)); 660extern float __ieee754_sinhf __P((float)); 661extern float __ieee754_hypotf __P((float,float)); 662extern float __ieee754_j0f __P((float)); 663extern float __ieee754_j1f __P((float)); 664extern float __ieee754_y0f __P((float)); 665extern float __ieee754_y1f __P((float)); 666extern float __ieee754_jnf __P((int,float)); 667extern float __ieee754_ynf __P((int,float)); 668extern float __ieee754_remainderf __P((float,float)); 669extern int32_t __ieee754_rem_pio2f __P((float,float*)); 670extern float __ieee754_scalbf __P((float,float)); 671 672/* float versions of fdlibm kernel functions */ 673extern float __kernel_sinf __P((float,float,int)); 674extern float __kernel_cosf __P((float,float)); 675extern float __kernel_tanf __P((float,float,int)); 676extern int __kernel_rem_pio2f __P((float*,float*,int,int,int,const int32_t*)); 677 678/* ieee style elementary long double functions */ 679extern long double __ieee754_fmodl(long double, long double); 680extern long double __ieee754_sqrtl(long double); 681 682/* 683 * TRUNC() is a macro that sets the trailing 27 bits in the mantissa of an 684 * IEEE double variable to zero. It must be expression-like for syntactic 685 * reasons, and we implement this expression using an inline function 686 * instead of a pure macro to avoid depending on the gcc feature of 687 * statement-expressions. 688 */ 689#define TRUNC(d) (_b_trunc(&(d))) 690 691static __inline void 692_b_trunc(volatile double *_dp) 693{ 694 uint32_t _lw; 695 696 GET_LOW_WORD(_lw, *_dp); 697 SET_LOW_WORD(*_dp, _lw & 0xf8000000); 698} 699 700struct Double { 701 double a; 702 double b; 703}; 704 705/* 706 * Functions internal to the math package, yet not static. 707 */ 708double __exp__D(double, double); 709struct Double __log__D(double); 710 711/* 712 * The rnint() family rounds to the nearest integer for a restricted range 713 * range of args (up to about 2**MANT_DIG). We assume that the current 714 * rounding mode is FE_TONEAREST so that this can be done efficiently. 715 * Extra precision causes more problems in practice, and we only centralize 716 * this here to reduce those problems, and have not solved the efficiency 717 * problems. The exp2() family uses a more delicate version of this that 718 * requires extracting bits from the intermediate value, so it is not 719 * centralized here and should copy any solution of the efficiency problems. 720 */ 721 722static inline double 723rnint(double x) 724{ 725 /* 726 * This casts to double to kill any extra precision. This depends 727 * on the cast being applied to a double_t to avoid compiler bugs 728 * (this is a cleaner version of STRICT_ASSIGN()). This is 729 * inefficient if there actually is extra precision, but is hard 730 * to improve on. We use double_t in the API to minimise conversions 731 * for just calling here. Note that we cannot easily change the 732 * magic number to the one that works directly with double_t, since 733 * the rounding precision is variable at runtime on x86 so the 734 * magic number would need to be variable. Assuming that the 735 * rounding precision is always the default is too fragile. This 736 * and many other complications will move when the default is 737 * changed to FP_PE. 738 */ 739 return ((double)(x + 0x1.8p52) - 0x1.8p52); 740} 741 742static inline float 743rnintf(float x) 744{ 745 /* 746 * As for rnint(), except we could just call that to handle the 747 * extra precision case, usually without losing efficiency. 748 */ 749 return ((float)(x + 0x1.8p23F) - 0x1.8p23F); 750} 751 752#ifdef LDBL_MANT_DIG 753/* 754 * The complications for extra precision are smaller for rnintl() since it 755 * can safely assume that the rounding precision has been increased from 756 * its default to FP_PE on x86. We don't exploit that here to get small 757 * optimizations from limiting the range to double. We just need it for 758 * the magic number to work with long doubles. ld128 callers should use 759 * rnint() instead of this if possible. ld80 callers should prefer 760 * rnintl() since for amd64 this avoids swapping the register set, while 761 * for i386 it makes no difference (assuming FP_PE), and for other arches 762 * it makes little difference. 763 */ 764static inline long double 765rnintl(long double x) 766{ 767 return (x + ___CONCAT(0x1.8p,LDBL_MANT_DIG) / 2 - 768 ___CONCAT(0x1.8p,LDBL_MANT_DIG) / 2); 769} 770#endif /* LDBL_MANT_DIG */ 771 772/* 773 * irint() and i64rint() give the same result as casting to their integer 774 * return type provided their arg is a floating point integer. They can 775 * sometimes be more efficient because no rounding is required. 776 */ 777#if (defined(amd64) || defined(__i386__)) && defined(__GNUCLIKE_ASM) 778#define irint(x) \ 779 (sizeof(x) == sizeof(float) && \ 780 sizeof(__float_t) == sizeof(long double) ? irintf(x) : \ 781 sizeof(x) == sizeof(double) && \ 782 sizeof(__double_t) == sizeof(long double) ? irintd(x) : \ 783 sizeof(x) == sizeof(long double) ? irintl(x) : (int)(x)) 784#else 785#define irint(x) ((int)(x)) 786#endif 787 788#define i64rint(x) ((int64_t)(x)) /* only needed for ld128 so not opt. */ 789 790#if defined(__i386__) && defined(__GNUCLIKE_ASM) 791static __inline int 792irintf(float x) 793{ 794 int n; 795 796 __asm("fistl %0" : "=m" (n) : "t" (x)); 797 return (n); 798} 799 800static __inline int 801irintd(double x) 802{ 803 int n; 804 805 __asm("fistl %0" : "=m" (n) : "t" (x)); 806 return (n); 807} 808#endif 809 810#if (defined(__amd64__) || defined(__i386__)) && defined(__GNUCLIKE_ASM) 811static __inline int 812irintl(long double x) 813{ 814 int n; 815 816 __asm("fistl %0" : "=m" (n) : "t" (x)); 817 return (n); 818} 819#endif 820 821/* 822 * The following are fast floor macros for 0 <= |x| < 0x1p(N-1), where 823 * N is the precision of the type of x. These macros are used in the 824 * half-cycle trignometric functions (e.g., sinpi(x)). 825 */ 826#define FFLOORF(x, j0, ix) do { \ 827 (j0) = (((ix) >> 23) & 0xff) - 0x7f; \ 828 (ix) &= ~(0x007fffff >> (j0)); \ 829 SET_FLOAT_WORD((x), (ix)); \ 830} while (0) 831 832#define FFLOOR(x, j0, ix, lx) do { \ 833 (j0) = (((ix) >> 20) & 0x7ff) - 0x3ff; \ 834 if ((j0) < 20) { \ 835 (ix) &= ~(0x000fffff >> (j0)); \ 836 (lx) = 0; \ 837 } else { \ 838 (lx) &= ~((uint32_t)0xffffffff >> ((j0) - 20)); \ 839 } \ 840 INSERT_WORDS((x), (ix), (lx)); \ 841} while (0) 842 843#define FFLOORL80(x, j0, ix, lx) do { \ 844 j0 = ix - 0x3fff + 1; \ 845 if ((j0) < 32) { \ 846 (lx) = ((lx) >> 32) << 32; \ 847 (lx) &= ~((((lx) << 32)-1) >> (j0)); \ 848 } else { \ 849 uint64_t _m; \ 850 _m = (uint64_t)-1 >> (j0); \ 851 if ((lx) & _m) (lx) &= ~_m; \ 852 } \ 853 INSERT_LDBL80_WORDS((x), (ix), (lx)); \ 854} while (0) 855 856#define FFLOORL128(x, ai, ar) do { \ 857 union ieee_ext_u u; \ 858 uint64_t m; \ 859 int e; \ 860 u.extu_ld = (x); \ 861 e = u.extu_exp - 16383; \ 862 if (e < 48) { \ 863 m = ((1llu << 49) - 1) >> (e + 1); \ 864 u.extu_frach &= ~m; \ 865 u.extu_fracl = 0; \ 866 } else { \ 867 m = (uint64_t)-1 >> (e - 48); \ 868 u.extu_fracl &= ~m; \ 869 } \ 870 (ai) = u.extu_ld; \ 871 (ar) = (x) - (ai); \ 872} while (0) 873 874#ifdef DEBUG 875#if defined(__amd64__) || defined(__i386__) 876#define breakpoint() asm("int $3") 877#else 878#include <signal.h> 879 880#define breakpoint() raise(SIGTRAP) 881#endif 882#endif 883 884#ifdef STRUCT_RETURN 885#define RETURNSP(rp) do { \ 886 if (!(rp)->lo_set) \ 887 RETURNF((rp)->hi); \ 888 RETURNF((rp)->hi + (rp)->lo); \ 889} while (0) 890#define RETURNSPI(rp) do { \ 891 if (!(rp)->lo_set) \ 892 RETURNI((rp)->hi); \ 893 RETURNI((rp)->hi + (rp)->lo); \ 894} while (0) 895#endif 896 897#define SUM2P(x, y) ({ \ 898 const __typeof (x) __x = (x); \ 899 const __typeof (y) __y = (y); \ 900 __x + __y; \ 901}) 902 903#ifndef INLINE_KERNEL_SINDF 904float __kernel_sindf(double); 905#endif 906#ifndef INLINE_KERNEL_COSDF 907float __kernel_cosdf(double); 908#endif 909#ifndef INLINE_KERNEL_TANDF 910float __kernel_tandf(double,int); 911#endif 912 913/* long double precision kernel functions */ 914long double __kernel_sinl(long double, long double, int); 915long double __kernel_cosl(long double, long double); 916long double __kernel_tanl(long double, long double, int); 917 918#endif /* _MATH_PRIVATE_H_ */ 919