Deleted Added
full compact
math_private.h (241051) math_private.h (251292)
1/*
2 * ====================================================
3 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
4 *
5 * Developed at SunPro, a Sun Microsystems, Inc. business.
6 * Permission to use, copy, modify, and distribute this
7 * software is freely granted, provided that this notice
8 * is preserved.
9 * ====================================================
10 */
11
12/*
13 * from: @(#)fdlibm.h 5.1 93/09/24
1/*
2 * ====================================================
3 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
4 *
5 * Developed at SunPro, a Sun Microsystems, Inc. business.
6 * Permission to use, copy, modify, and distribute this
7 * software is freely granted, provided that this notice
8 * is preserved.
9 * ====================================================
10 */
11
12/*
13 * from: @(#)fdlibm.h 5.1 93/09/24
14 * $FreeBSD: head/lib/msun/src/math_private.h 241051 2012-09-29 16:40:12Z kargl $
14 * $FreeBSD: head/lib/msun/src/math_private.h 251292 2013-06-03 09:14:31Z das $
15 */
16
17#ifndef _MATH_PRIVATE_H_
18#define _MATH_PRIVATE_H_
19
20#include <sys/types.h>
21#include <machine/endian.h>
22

--- 160 unchanged lines hidden (view full) ---

183
184#define SET_FLOAT_WORD(d,i) \
185do { \
186 ieee_float_shape_type sf_u; \
187 sf_u.word = (i); \
188 (d) = sf_u.value; \
189} while (0)
190
15 */
16
17#ifndef _MATH_PRIVATE_H_
18#define _MATH_PRIVATE_H_
19
20#include <sys/types.h>
21#include <machine/endian.h>
22

--- 160 unchanged lines hidden (view full) ---

183
184#define SET_FLOAT_WORD(d,i) \
185do { \
186 ieee_float_shape_type sf_u; \
187 sf_u.word = (i); \
188 (d) = sf_u.value; \
189} while (0)
190
191/*
192 * Get expsign and mantissa as 16 bit and 64 bit ints from an 80 bit long
193 * double.
194 */
195
196#define EXTRACT_LDBL80_WORDS(ix0,ix1,d) \
197do { \
198 union IEEEl2bits ew_u; \
199 ew_u.e = (d); \
200 (ix0) = ew_u.xbits.expsign; \
201 (ix1) = ew_u.xbits.man; \
202} while (0)
203
204/*
205 * Get expsign and mantissa as one 16 bit and two 64 bit ints from a 128 bit
206 * long double.
207 */
208
209#define EXTRACT_LDBL128_WORDS(ix0,ix1,ix2,d) \
210do { \
211 union IEEEl2bits ew_u; \
212 ew_u.e = (d); \
213 (ix0) = ew_u.xbits.expsign; \
214 (ix1) = ew_u.xbits.manh; \
215 (ix2) = ew_u.xbits.manl; \
216} while (0)
217
191/* Get expsign as a 16 bit int from a long double. */
192
193#define GET_LDBL_EXPSIGN(i,d) \
194do { \
195 union IEEEl2bits ge_u; \
196 ge_u.e = (d); \
197 (i) = ge_u.xbits.expsign; \
198} while (0)
199
218/* Get expsign as a 16 bit int from a long double. */
219
220#define GET_LDBL_EXPSIGN(i,d) \
221do { \
222 union IEEEl2bits ge_u; \
223 ge_u.e = (d); \
224 (i) = ge_u.xbits.expsign; \
225} while (0)
226
227/*
228 * Set an 80 bit long double from a 16 bit int expsign and a 64 bit int
229 * mantissa.
230 */
231
232#define INSERT_LDBL80_WORDS(d,ix0,ix1) \
233do { \
234 union IEEEl2bits iw_u; \
235 iw_u.xbits.expsign = (ix0); \
236 iw_u.xbits.man = (ix1); \
237 (d) = iw_u.e; \
238} while (0)
239
240/*
241 * Set a 128 bit long double from a 16 bit int expsign and two 64 bit ints
242 * comprising the mantissa.
243 */
244
245#define INSERT_LDBL128_WORDS(d,ix0,ix1,ix2) \
246do { \
247 union IEEEl2bits iw_u; \
248 iw_u.xbits.expsign = (ix0); \
249 iw_u.xbits.manh = (ix1); \
250 iw_u.xbits.manl = (ix2); \
251 (d) = iw_u.e; \
252} while (0)
253
200/* Set expsign of a long double from a 16 bit int. */
201
202#define SET_LDBL_EXPSIGN(d,v) \
203do { \
204 union IEEEl2bits se_u; \
205 se_u.e = (d); \
206 se_u.xbits.expsign = (v); \
207 (d) = se_u.e; \

--- 48 unchanged lines hidden (view full) ---

256#define ENTERI(x)
257#define RETURNI(x) RETURNF(x)
258#endif
259
260/* Default return statement if hack*_t() is not used. */
261#define RETURNF(v) return (v)
262
263/*
254/* Set expsign of a long double from a 16 bit int. */
255
256#define SET_LDBL_EXPSIGN(d,v) \
257do { \
258 union IEEEl2bits se_u; \
259 se_u.e = (d); \
260 se_u.xbits.expsign = (v); \
261 (d) = se_u.e; \

--- 48 unchanged lines hidden (view full) ---

310#define ENTERI(x)
311#define RETURNI(x) RETURNF(x)
312#endif
313
314/* Default return statement if hack*_t() is not used. */
315#define RETURNF(v) return (v)
316
317/*
318 * 2sum gives the same result as 2sumF without requiring |a| >= |b| or
319 * a == 0, but is slower.
320 */
321#define _2sum(a, b) do { \
322 __typeof(a) __s, __w; \
323 \
324 __w = (a) + (b); \
325 __s = __w - (a); \
326 (b) = ((a) - (__w - __s)) + ((b) - __s); \
327 (a) = __w; \
328} while (0)
329
330/*
331 * 2sumF algorithm.
332 *
333 * "Normalize" the terms in the infinite-precision expression a + b for
334 * the sum of 2 floating point values so that b is as small as possible
335 * relative to 'a'. (The resulting 'a' is the value of the expression in
336 * the same precision as 'a' and the resulting b is the rounding error.)
337 * |a| must be >= |b| or 0, b's type must be no larger than 'a's type, and
338 * exponent overflow or underflow must not occur. This uses a Theorem of
339 * Dekker (1971). See Knuth (1981) 4.2.2 Theorem C. The name "TwoSum"
340 * is apparently due to Skewchuk (1997).
341 *
342 * For this to always work, assignment of a + b to 'a' must not retain any
343 * extra precision in a + b. This is required by C standards but broken
344 * in many compilers. The brokenness cannot be worked around using
345 * STRICT_ASSIGN() like we do elsewhere, since the efficiency of this
346 * algorithm would be destroyed by non-null strict assignments. (The
347 * compilers are correct to be broken -- the efficiency of all floating
348 * point code calculations would be destroyed similarly if they forced the
349 * conversions.)
350 *
351 * Fortunately, a case that works well can usually be arranged by building
352 * any extra precision into the type of 'a' -- 'a' should have type float_t,
353 * double_t or long double. b's type should be no larger than 'a's type.
354 * Callers should use these types with scopes as large as possible, to
355 * reduce their own extra-precision and efficiciency problems. In
356 * particular, they shouldn't convert back and forth just to call here.
357 */
358#ifdef DEBUG
359#define _2sumF(a, b) do { \
360 __typeof(a) __w; \
361 volatile __typeof(a) __ia, __ib, __r, __vw; \
362 \
363 __ia = (a); \
364 __ib = (b); \
365 assert(__ia == 0 || fabsl(__ia) >= fabsl(__ib)); \
366 \
367 __w = (a) + (b); \
368 (b) = ((a) - __w) + (b); \
369 (a) = __w; \
370 \
371 /* The next 2 assertions are weak if (a) is already long double. */ \
372 assert((long double)__ia + __ib == (long double)(a) + (b)); \
373 __vw = __ia + __ib; \
374 __r = __ia - __vw; \
375 __r += __ib; \
376 assert(__vw == (a) && __r == (b)); \
377} while (0)
378#else /* !DEBUG */
379#define _2sumF(a, b) do { \
380 __typeof(a) __w; \
381 \
382 __w = (a) + (b); \
383 (b) = ((a) - __w) + (b); \
384 (a) = __w; \
385} while (0)
386#endif /* DEBUG */
387
388/*
389 * Set x += c, where x is represented in extra precision as a + b.
390 * x must be sufficiently normalized and sufficiently larger than c,
391 * and the result is then sufficiently normalized.
392 *
393 * The details of ordering are that |a| must be >= |c| (so that (a, c)
394 * can be normalized without extra work to swap 'a' with c). The details of
395 * the normalization are that b must be small relative to the normalized 'a'.
396 * Normalization of (a, c) makes the normalized c tiny relative to the
397 * normalized a, so b remains small relative to 'a' in the result. However,
398 * b need not ever be tiny relative to 'a'. For example, b might be about
399 * 2**20 times smaller than 'a' to give about 20 extra bits of precision.
400 * That is usually enough, and adding c (which by normalization is about
401 * 2**53 times smaller than a) cannot change b significantly. However,
402 * cancellation of 'a' with c in normalization of (a, c) may reduce 'a'
403 * significantly relative to b. The caller must ensure that significant
404 * cancellation doesn't occur, either by having c of the same sign as 'a',
405 * or by having |c| a few percent smaller than |a|. Pre-normalization of
406 * (a, b) may help.
407 *
408 * This is is a variant of an algorithm of Kahan (see Knuth (1981) 4.2.2
409 * exercise 19). We gain considerable efficiency by requiring the terms to
410 * be sufficiently normalized and sufficiently increasing.
411 */
412#define _3sumF(a, b, c) do { \
413 __typeof(a) __tmp; \
414 \
415 __tmp = (c); \
416 _2sumF(__tmp, (a)); \
417 (b) += (a); \
418 (a) = __tmp; \
419} while (0)
420
421/*
264 * Common routine to process the arguments to nan(), nanf(), and nanl().
265 */
266void _scan_nan(uint32_t *__words, int __num_words, const char *__s);
267
268#ifdef _COMPLEX_H
269
270/*
271 * C99 specifies that complex numbers have the same representation as

--- 93 unchanged lines hidden (view full) ---

365 asm("fistl %0" : "=m" (n) : "t" (x));
366 return (n);
367}
368#define HAVE_EFFICIENT_IRINTL
369#endif
370
371#endif /* __GNUCLIKE_ASM */
372
422 * Common routine to process the arguments to nan(), nanf(), and nanl().
423 */
424void _scan_nan(uint32_t *__words, int __num_words, const char *__s);
425
426#ifdef _COMPLEX_H
427
428/*
429 * C99 specifies that complex numbers have the same representation as

--- 93 unchanged lines hidden (view full) ---

523 asm("fistl %0" : "=m" (n) : "t" (x));
524 return (n);
525}
526#define HAVE_EFFICIENT_IRINTL
527#endif
528
529#endif /* __GNUCLIKE_ASM */
530
531#ifdef DEBUG
532#if defined(__amd64__) || defined(__i386__)
533#define breakpoint() asm("int $3")
534#else
535#include <signal.h>
536
537#define breakpoint() raise(SIGTRAP)
538#endif
539#endif
540
541/* Write a pari script to test things externally. */
542#ifdef DOPRINT
543#include <stdio.h>
544
545#ifndef DOPRINT_SWIZZLE
546#define DOPRINT_SWIZZLE 0
547#endif
548
549#ifdef DOPRINT_LD80
550
551#define DOPRINT_START(xp) do { \
552 uint64_t __lx; \
553 uint16_t __hx; \
554 \
555 /* Hack to give more-problematic args. */ \
556 EXTRACT_LDBL80_WORDS(__hx, __lx, *xp); \
557 __lx ^= DOPRINT_SWIZZLE; \
558 INSERT_LDBL80_WORDS(*xp, __hx, __lx); \
559 printf("x = %.21Lg; ", (long double)*xp); \
560} while (0)
561#define DOPRINT_END1(v) \
562 printf("y = %.21Lg; z = 0; show(x, y, z);\n", (long double)(v))
563#define DOPRINT_END2(hi, lo) \
564 printf("y = %.21Lg; z = %.21Lg; show(x, y, z);\n", \
565 (long double)(hi), (long double)(lo))
566
567#elif defined(DOPRINT_D64)
568
569#define DOPRINT_START(xp) do { \
570 uint32_t __hx, __lx; \
571 \
572 EXTRACT_WORDS(__hx, __lx, *xp); \
573 __lx ^= DOPRINT_SWIZZLE; \
574 INSERT_WORDS(*xp, __hx, __lx); \
575 printf("x = %.21Lg; ", (long double)*xp); \
576} while (0)
577#define DOPRINT_END1(v) \
578 printf("y = %.21Lg; z = 0; show(x, y, z);\n", (long double)(v))
579#define DOPRINT_END2(hi, lo) \
580 printf("y = %.21Lg; z = %.21Lg; show(x, y, z);\n", \
581 (long double)(hi), (long double)(lo))
582
583#elif defined(DOPRINT_F32)
584
585#define DOPRINT_START(xp) do { \
586 uint32_t __hx; \
587 \
588 GET_FLOAT_WORD(__hx, *xp); \
589 __hx ^= DOPRINT_SWIZZLE; \
590 SET_FLOAT_WORD(*xp, __hx); \
591 printf("x = %.21Lg; ", (long double)*xp); \
592} while (0)
593#define DOPRINT_END1(v) \
594 printf("y = %.21Lg; z = 0; show(x, y, z);\n", (long double)(v))
595#define DOPRINT_END2(hi, lo) \
596 printf("y = %.21Lg; z = %.21Lg; show(x, y, z);\n", \
597 (long double)(hi), (long double)(lo))
598
599#else /* !DOPRINT_LD80 && !DOPRINT_D64 (LD128 only) */
600
601#ifndef DOPRINT_SWIZZLE_HIGH
602#define DOPRINT_SWIZZLE_HIGH 0
603#endif
604
605#define DOPRINT_START(xp) do { \
606 uint64_t __lx, __llx; \
607 uint16_t __hx; \
608 \
609 EXTRACT_LDBL128_WORDS(__hx, __lx, __llx, *xp); \
610 __llx ^= DOPRINT_SWIZZLE; \
611 __lx ^= DOPRINT_SWIZZLE_HIGH; \
612 INSERT_LDBL128_WORDS(*xp, __hx, __lx, __llx); \
613 printf("x = %.36Lg; ", (long double)*xp); \
614} while (0)
615#define DOPRINT_END1(v) \
616 printf("y = %.36Lg; z = 0; show(x, y, z);\n", (long double)(v))
617#define DOPRINT_END2(hi, lo) \
618 printf("y = %.36Lg; z = %.36Lg; show(x, y, z);\n", \
619 (long double)(hi), (long double)(lo))
620
621#endif /* DOPRINT_LD80 */
622
623#else /* !DOPRINT */
624#define DOPRINT_START(xp)
625#define DOPRINT_END1(v)
626#define DOPRINT_END2(hi, lo)
627#endif /* DOPRINT */
628
629#define RETURNP(x) do { \
630 DOPRINT_END1(x); \
631 RETURNF(x); \
632} while (0)
633#define RETURNPI(x) do { \
634 DOPRINT_END1(x); \
635 RETURNI(x); \
636} while (0)
637#define RETURN2P(x, y) do { \
638 DOPRINT_END2((x), (y)); \
639 RETURNF((x) + (y)); \
640} while (0)
641#define RETURN2PI(x, y) do { \
642 DOPRINT_END2((x), (y)); \
643 RETURNI((x) + (y)); \
644} while (0)
645#ifdef STRUCT_RETURN
646#define RETURNSP(rp) do { \
647 if (!(rp)->lo_set) \
648 RETURNP((rp)->hi); \
649 RETURN2P((rp)->hi, (rp)->lo); \
650} while (0)
651#define RETURNSPI(rp) do { \
652 if (!(rp)->lo_set) \
653 RETURNPI((rp)->hi); \
654 RETURN2PI((rp)->hi, (rp)->lo); \
655} while (0)
656#endif
657#define SUM2P(x, y) ({ \
658 const __typeof (x) __x = (x); \
659 const __typeof (y) __y = (y); \
660 \
661 DOPRINT_END2(__x, __y); \
662 __x + __y; \
663})
664
373/*
374 * ieee style elementary functions
375 *
376 * We rename functions here to improve other sources' diffability
377 * against fdlibm.
378 */
379#define __ieee754_sqrt sqrt
380#define __ieee754_acos acos

--- 92 unchanged lines hidden ---
665/*
666 * ieee style elementary functions
667 *
668 * We rename functions here to improve other sources' diffability
669 * against fdlibm.
670 */
671#define __ieee754_sqrt sqrt
672#define __ieee754_acos acos

--- 92 unchanged lines hidden ---