1/* $NetBSD: softfloat.c,v 1.1 2002/05/21 23:51:07 bjh21 Exp $ */
2
3/*
4 * This version hacked for use with gcc -msoft-float by bjh21.
5 * (Mostly a case of #ifdefing out things GCC doesn't need or provides
6 *  itself).
7 */
8
9/*
10 * Things you may want to define:
11 *
12 * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with
13 *   -msoft-float) to work.  Include "softfloat-for-gcc.h" to get them
14 *   properly renamed.
15 */
16
17/*
18 * This differs from the standard bits32/softfloat.c in that float64
19 * is defined to be a 64-bit integer rather than a structure.  The
20 * structure is float64s, with translation between the two going via
21 * float64u.
22 */
23
24/*
25===============================================================================
26
27This C source file is part of the SoftFloat IEC/IEEE Floating-Point
28Arithmetic Package, Release 2a.
29
30Written by John R. Hauser.  This work was made possible in part by the
31International Computer Science Institute, located at Suite 600, 1947 Center
32Street, Berkeley, California 94704.  Funding was partially provided by the
33National Science Foundation under grant MIP-9311980.  The original version
34of this code was written as part of a project to build a fixed-point vector
35processor in collaboration with the University of California at Berkeley,
36overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
37is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
38arithmetic/SoftFloat.html'.
39
40THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
41has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
42TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
43PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
44AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
45
46Derivative works are acceptable, even for commercial purposes, so long as
47(1) they include prominent notice that the work is derivative, and (2) they
48include prominent notice akin to these four paragraphs for those parts of
49this code that are retained.
50
51===============================================================================
52*/
53
54#include <sys/cdefs.h>
55__FBSDID("$FreeBSD$");
56
57#ifdef SOFTFLOAT_FOR_GCC
58#include "softfloat-for-gcc.h"
59#endif
60
61#include "milieu.h"
62#include "softfloat.h"
63
64/*
65 * Conversions between floats as stored in memory and floats as
66 * SoftFloat uses them
67 */
68#ifndef FLOAT64_DEMANGLE
69#define FLOAT64_DEMANGLE(a)	(a)
70#endif
71#ifndef FLOAT64_MANGLE
72#define FLOAT64_MANGLE(a)	(a)
73#endif
74
75/*
76-------------------------------------------------------------------------------
77Floating-point rounding mode and exception flags.
78-------------------------------------------------------------------------------
79*/
80int float_rounding_mode = float_round_nearest_even;
81int float_exception_flags = 0;
82
83/*
84-------------------------------------------------------------------------------
85Primitive arithmetic functions, including multi-word arithmetic, and
86division and square root approximations.  (Can be specialized to target if
87desired.)
88-------------------------------------------------------------------------------
89*/
90#include "softfloat-macros"
91
92/*
93-------------------------------------------------------------------------------
94Functions and definitions to determine:  (1) whether tininess for underflow
95is detected before or after rounding by default, (2) what (if anything)
96happens when exceptions are raised, (3) how signaling NaNs are distinguished
97from quiet NaNs, (4) the default generated quiet NaNs, and (4) how NaNs
98are propagated from function inputs to output.  These details are target-
99specific.
100-------------------------------------------------------------------------------
101*/
102#include "softfloat-specialize"
103
104/*
105-------------------------------------------------------------------------------
106Returns the fraction bits of the single-precision floating-point value `a'.
107-------------------------------------------------------------------------------
108*/
109INLINE bits32 extractFloat32Frac( float32 a )
110{
111
112    return a & 0x007FFFFF;
113
114}
115
116/*
117-------------------------------------------------------------------------------
118Returns the exponent bits of the single-precision floating-point value `a'.
119-------------------------------------------------------------------------------
120*/
121INLINE int16 extractFloat32Exp( float32 a )
122{
123
124    return ( a>>23 ) & 0xFF;
125
126}
127
128/*
129-------------------------------------------------------------------------------
130Returns the sign bit of the single-precision floating-point value `a'.
131-------------------------------------------------------------------------------
132*/
133INLINE flag extractFloat32Sign( float32 a )
134{
135
136    return a>>31;
137
138}
139
140/*
141-------------------------------------------------------------------------------
142Normalizes the subnormal single-precision floating-point value represented
143by the denormalized significand `aSig'.  The normalized exponent and
144significand are stored at the locations pointed to by `zExpPtr' and
145`zSigPtr', respectively.
146-------------------------------------------------------------------------------
147*/
148static void
149 normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
150{
151    int8 shiftCount;
152
153    shiftCount = countLeadingZeros32( aSig ) - 8;
154    *zSigPtr = aSig<<shiftCount;
155    *zExpPtr = 1 - shiftCount;
156
157}
158
159/*
160-------------------------------------------------------------------------------
161Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
162single-precision floating-point value, returning the result.  After being
163shifted into the proper positions, the three fields are simply added
164together to form the result.  This means that any integer portion of `zSig'
165will be added into the exponent.  Since a properly normalized significand
166will have an integer portion equal to 1, the `zExp' input should be 1 less
167than the desired result exponent whenever `zSig' is a complete, normalized
168significand.
169-------------------------------------------------------------------------------
170*/
171INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
172{
173
174    return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
175
176}
177
178/*
179-------------------------------------------------------------------------------
180Takes an abstract floating-point value having sign `zSign', exponent `zExp',
181and significand `zSig', and returns the proper single-precision floating-
182point value corresponding to the abstract input.  Ordinarily, the abstract
183value is simply rounded and packed into the single-precision format, with
184the inexact exception raised if the abstract input cannot be represented
185exactly.  However, if the abstract value is too large, the overflow and
186inexact exceptions are raised and an infinity or maximal finite value is
187returned.  If the abstract value is too small, the input value is rounded to
188a subnormal number, and the underflow and inexact exceptions are raised if
189the abstract input cannot be represented exactly as a subnormal single-
190precision floating-point number.
191    The input significand `zSig' has its binary point between bits 30
192and 29, which is 7 bits to the left of the usual location.  This shifted
193significand must be normalized or smaller.  If `zSig' is not normalized,
194`zExp' must be 0; in that case, the result returned is a subnormal number,
195and it must not require rounding.  In the usual case that `zSig' is
196normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
197The handling of underflow and overflow follows the IEC/IEEE Standard for
198Binary Floating-Point Arithmetic.
199-------------------------------------------------------------------------------
200*/
201static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
202{
203    int8 roundingMode;
204    flag roundNearestEven;
205    int8 roundIncrement, roundBits;
206    flag isTiny;
207
208    roundingMode = float_rounding_mode;
209    roundNearestEven = roundingMode == float_round_nearest_even;
210    roundIncrement = 0x40;
211    if ( ! roundNearestEven ) {
212        if ( roundingMode == float_round_to_zero ) {
213            roundIncrement = 0;
214        }
215        else {
216            roundIncrement = 0x7F;
217            if ( zSign ) {
218                if ( roundingMode == float_round_up ) roundIncrement = 0;
219            }
220            else {
221                if ( roundingMode == float_round_down ) roundIncrement = 0;
222            }
223        }
224    }
225    roundBits = zSig & 0x7F;
226    if ( 0xFD <= (bits16) zExp ) {
227        if (    ( 0xFD < zExp )
228             || (    ( zExp == 0xFD )
229                  && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
230           ) {
231            float_raise( float_flag_overflow | float_flag_inexact );
232            return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
233        }
234        if ( zExp < 0 ) {
235            isTiny =
236                   ( float_detect_tininess == float_tininess_before_rounding )
237                || ( zExp < -1 )
238                || ( zSig + roundIncrement < 0x80000000 );
239            shift32RightJamming( zSig, - zExp, &zSig );
240            zExp = 0;
241            roundBits = zSig & 0x7F;
242            if ( isTiny && roundBits ) float_raise( float_flag_underflow );
243        }
244    }
245    if ( roundBits ) float_exception_flags |= float_flag_inexact;
246    zSig = ( zSig + roundIncrement )>>7;
247    zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
248    if ( zSig == 0 ) zExp = 0;
249    return packFloat32( zSign, zExp, zSig );
250
251}
252
253/*
254-------------------------------------------------------------------------------
255Takes an abstract floating-point value having sign `zSign', exponent `zExp',
256and significand `zSig', and returns the proper single-precision floating-
257point value corresponding to the abstract input.  This routine is just like
258`roundAndPackFloat32' except that `zSig' does not have to be normalized.
259Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
260floating-point exponent.
261-------------------------------------------------------------------------------
262*/
263static float32
264 normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
265{
266    int8 shiftCount;
267
268    shiftCount = countLeadingZeros32( zSig ) - 1;
269    return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
270
271}
272
273/*
274-------------------------------------------------------------------------------
275Returns the least-significant 32 fraction bits of the double-precision
276floating-point value `a'.
277-------------------------------------------------------------------------------
278*/
279INLINE bits32 extractFloat64Frac1( float64 a )
280{
281
282    return FLOAT64_DEMANGLE(a) & LIT64( 0x00000000FFFFFFFF );
283
284}
285
286/*
287-------------------------------------------------------------------------------
288Returns the most-significant 20 fraction bits of the double-precision
289floating-point value `a'.
290-------------------------------------------------------------------------------
291*/
292INLINE bits32 extractFloat64Frac0( float64 a )
293{
294
295    return ( FLOAT64_DEMANGLE(a)>>32 ) & 0x000FFFFF;
296
297}
298
299/*
300-------------------------------------------------------------------------------
301Returns the exponent bits of the double-precision floating-point value `a'.
302-------------------------------------------------------------------------------
303*/
304INLINE int16 extractFloat64Exp( float64 a )
305{
306
307    return ( FLOAT64_DEMANGLE(a)>>52 ) & 0x7FF;
308
309}
310
311/*
312-------------------------------------------------------------------------------
313Returns the sign bit of the double-precision floating-point value `a'.
314-------------------------------------------------------------------------------
315*/
316INLINE flag extractFloat64Sign( float64 a )
317{
318
319    return FLOAT64_DEMANGLE(a)>>63;
320
321}
322
323/*
324-------------------------------------------------------------------------------
325Normalizes the subnormal double-precision floating-point value represented
326by the denormalized significand formed by the concatenation of `aSig0' and
327`aSig1'.  The normalized exponent is stored at the location pointed to by
328`zExpPtr'.  The most significant 21 bits of the normalized significand are
329stored at the location pointed to by `zSig0Ptr', and the least significant
33032 bits of the normalized significand are stored at the location pointed to
331by `zSig1Ptr'.
332-------------------------------------------------------------------------------
333*/
334static void
335 normalizeFloat64Subnormal(
336     bits32 aSig0,
337     bits32 aSig1,
338     int16 *zExpPtr,
339     bits32 *zSig0Ptr,
340     bits32 *zSig1Ptr
341 )
342{
343    int8 shiftCount;
344
345    if ( aSig0 == 0 ) {
346        shiftCount = countLeadingZeros32( aSig1 ) - 11;
347        if ( shiftCount < 0 ) {
348            *zSig0Ptr = aSig1>>( - shiftCount );
349            *zSig1Ptr = aSig1<<( shiftCount & 31 );
350        }
351        else {
352            *zSig0Ptr = aSig1<<shiftCount;
353            *zSig1Ptr = 0;
354        }
355        *zExpPtr = - shiftCount - 31;
356    }
357    else {
358        shiftCount = countLeadingZeros32( aSig0 ) - 11;
359        shortShift64Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
360        *zExpPtr = 1 - shiftCount;
361    }
362
363}
364
365/*
366-------------------------------------------------------------------------------
367Packs the sign `zSign', the exponent `zExp', and the significand formed by
368the concatenation of `zSig0' and `zSig1' into a double-precision floating-
369point value, returning the result.  After being shifted into the proper
370positions, the three fields `zSign', `zExp', and `zSig0' are simply added
371together to form the most significant 32 bits of the result.  This means
372that any integer portion of `zSig0' will be added into the exponent.  Since
373a properly normalized significand will have an integer portion equal to 1,
374the `zExp' input should be 1 less than the desired result exponent whenever
375`zSig0' and `zSig1' concatenated form a complete, normalized significand.
376-------------------------------------------------------------------------------
377*/
378INLINE float64
379 packFloat64( flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 )
380{
381
382    return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) +
383			   ( ( (bits64) zExp )<<52 ) +
384			   ( ( (bits64) zSig0 )<<32 ) + zSig1 );
385
386
387}
388
389/*
390-------------------------------------------------------------------------------
391Takes an abstract floating-point value having sign `zSign', exponent `zExp',
392and extended significand formed by the concatenation of `zSig0', `zSig1',
393and `zSig2', and returns the proper double-precision floating-point value
394corresponding to the abstract input.  Ordinarily, the abstract value is
395simply rounded and packed into the double-precision format, with the inexact
396exception raised if the abstract input cannot be represented exactly.
397However, if the abstract value is too large, the overflow and inexact
398exceptions are raised and an infinity or maximal finite value is returned.
399If the abstract value is too small, the input value is rounded to a
400subnormal number, and the underflow and inexact exceptions are raised if the
401abstract input cannot be represented exactly as a subnormal double-precision
402floating-point number.
403    The input significand must be normalized or smaller.  If the input
404significand is not normalized, `zExp' must be 0; in that case, the result
405returned is a subnormal number, and it must not require rounding.  In the
406usual case that the input significand is normalized, `zExp' must be 1 less
407than the ``true'' floating-point exponent.  The handling of underflow and
408overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
409-------------------------------------------------------------------------------
410*/
411static float64
412 roundAndPackFloat64(
413     flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1, bits32 zSig2 )
414{
415    int8 roundingMode;
416    flag roundNearestEven, increment, isTiny;
417
418    roundingMode = float_rounding_mode;
419    roundNearestEven = ( roundingMode == float_round_nearest_even );
420    increment = ( (sbits32) zSig2 < 0 );
421    if ( ! roundNearestEven ) {
422        if ( roundingMode == float_round_to_zero ) {
423            increment = 0;
424        }
425        else {
426            if ( zSign ) {
427                increment = ( roundingMode == float_round_down ) && zSig2;
428            }
429            else {
430                increment = ( roundingMode == float_round_up ) && zSig2;
431            }
432        }
433    }
434    if ( 0x7FD <= (bits16) zExp ) {
435        if (    ( 0x7FD < zExp )
436             || (    ( zExp == 0x7FD )
437                  && eq64( 0x001FFFFF, 0xFFFFFFFF, zSig0, zSig1 )
438                  && increment
439                )
440           ) {
441            float_raise( float_flag_overflow | float_flag_inexact );
442            if (    ( roundingMode == float_round_to_zero )
443                 || ( zSign && ( roundingMode == float_round_up ) )
444                 || ( ! zSign && ( roundingMode == float_round_down ) )
445               ) {
446                return packFloat64( zSign, 0x7FE, 0x000FFFFF, 0xFFFFFFFF );
447            }
448            return packFloat64( zSign, 0x7FF, 0, 0 );
449        }
450        if ( zExp < 0 ) {
451            isTiny =
452                   ( float_detect_tininess == float_tininess_before_rounding )
453                || ( zExp < -1 )
454                || ! increment
455                || lt64( zSig0, zSig1, 0x001FFFFF, 0xFFFFFFFF );
456            shift64ExtraRightJamming(
457                zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
458            zExp = 0;
459            if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
460            if ( roundNearestEven ) {
461                increment = ( (sbits32) zSig2 < 0 );
462            }
463            else {
464                if ( zSign ) {
465                    increment = ( roundingMode == float_round_down ) && zSig2;
466                }
467                else {
468                    increment = ( roundingMode == float_round_up ) && zSig2;
469                }
470            }
471        }
472    }
473    if ( zSig2 ) float_exception_flags |= float_flag_inexact;
474    if ( increment ) {
475        add64( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
476        zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
477    }
478    else {
479        if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
480    }
481    return packFloat64( zSign, zExp, zSig0, zSig1 );
482
483}
484
485/*
486-------------------------------------------------------------------------------
487Takes an abstract floating-point value having sign `zSign', exponent `zExp',
488and significand formed by the concatenation of `zSig0' and `zSig1', and
489returns the proper double-precision floating-point value corresponding
490to the abstract input.  This routine is just like `roundAndPackFloat64'
491except that the input significand has fewer bits and does not have to be
492normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
493point exponent.
494-------------------------------------------------------------------------------
495*/
496static float64
497 normalizeRoundAndPackFloat64(
498     flag zSign, int16 zExp, bits32 zSig0, bits32 zSig1 )
499{
500    int8 shiftCount;
501    bits32 zSig2;
502
503    if ( zSig0 == 0 ) {
504        zSig0 = zSig1;
505        zSig1 = 0;
506        zExp -= 32;
507    }
508    shiftCount = countLeadingZeros32( zSig0 ) - 11;
509    if ( 0 <= shiftCount ) {
510        zSig2 = 0;
511        shortShift64Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
512    }
513    else {
514        shift64ExtraRightJamming(
515            zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
516    }
517    zExp -= shiftCount;
518    return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
519
520}
521
522/*
523-------------------------------------------------------------------------------
524Returns the result of converting the 32-bit two's complement integer `a' to
525the single-precision floating-point format.  The conversion is performed
526according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
527-------------------------------------------------------------------------------
528*/
529float32 int32_to_float32( int32 a )
530{
531    flag zSign;
532
533    if ( a == 0 ) return 0;
534    if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
535    zSign = ( a < 0 );
536    return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );
537
538}
539
540/*
541-------------------------------------------------------------------------------
542Returns the result of converting the 32-bit two's complement integer `a' to
543the double-precision floating-point format.  The conversion is performed
544according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
545-------------------------------------------------------------------------------
546*/
547float64 int32_to_float64( int32 a )
548{
549    flag zSign;
550    bits32 absA;
551    int8 shiftCount;
552    bits32 zSig0, zSig1;
553
554    if ( a == 0 ) return packFloat64( 0, 0, 0, 0 );
555    zSign = ( a < 0 );
556    absA = zSign ? - a : a;
557    shiftCount = countLeadingZeros32( absA ) - 11;
558    if ( 0 <= shiftCount ) {
559        zSig0 = absA<<shiftCount;
560        zSig1 = 0;
561    }
562    else {
563        shift64Right( absA, 0, - shiftCount, &zSig0, &zSig1 );
564    }
565    return packFloat64( zSign, 0x412 - shiftCount, zSig0, zSig1 );
566
567}
568
569#ifndef SOFTFLOAT_FOR_GCC
570/*
571-------------------------------------------------------------------------------
572Returns the result of converting the single-precision floating-point value
573`a' to the 32-bit two's complement integer format.  The conversion is
574performed according to the IEC/IEEE Standard for Binary Floating-Point
575Arithmetic---which means in particular that the conversion is rounded
576according to the current rounding mode.  If `a' is a NaN, the largest
577positive integer is returned.  Otherwise, if the conversion overflows, the
578largest integer with the same sign as `a' is returned.
579-------------------------------------------------------------------------------
580*/
581int32 float32_to_int32( float32 a )
582{
583    flag aSign;
584    int16 aExp, shiftCount;
585    bits32 aSig, aSigExtra;
586    int32 z;
587    int8 roundingMode;
588
589    aSig = extractFloat32Frac( a );
590    aExp = extractFloat32Exp( a );
591    aSign = extractFloat32Sign( a );
592    shiftCount = aExp - 0x96;
593    if ( 0 <= shiftCount ) {
594        if ( 0x9E <= aExp ) {
595            if ( a != 0xCF000000 ) {
596                float_raise( float_flag_invalid );
597                if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
598                    return 0x7FFFFFFF;
599                }
600            }
601            return (sbits32) 0x80000000;
602        }
603        z = ( aSig | 0x00800000 )<<shiftCount;
604        if ( aSign ) z = - z;
605    }
606    else {
607        if ( aExp < 0x7E ) {
608            aSigExtra = aExp | aSig;
609            z = 0;
610        }
611        else {
612            aSig |= 0x00800000;
613            aSigExtra = aSig<<( shiftCount & 31 );
614            z = aSig>>( - shiftCount );
615        }
616        if ( aSigExtra ) float_exception_flags |= float_flag_inexact;
617        roundingMode = float_rounding_mode;
618        if ( roundingMode == float_round_nearest_even ) {
619            if ( (sbits32) aSigExtra < 0 ) {
620                ++z;
621                if ( (bits32) ( aSigExtra<<1 ) == 0 ) z &= ~1;
622            }
623            if ( aSign ) z = - z;
624        }
625        else {
626            aSigExtra = ( aSigExtra != 0 );
627            if ( aSign ) {
628                z += ( roundingMode == float_round_down ) & aSigExtra;
629                z = - z;
630            }
631            else {
632                z += ( roundingMode == float_round_up ) & aSigExtra;
633            }
634        }
635    }
636    return z;
637
638}
639#endif
640
641/*
642-------------------------------------------------------------------------------
643Returns the result of converting the single-precision floating-point value
644`a' to the 32-bit two's complement integer format.  The conversion is
645performed according to the IEC/IEEE Standard for Binary Floating-Point
646Arithmetic, except that the conversion is always rounded toward zero.
647If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
648the conversion overflows, the largest integer with the same sign as `a' is
649returned.
650-------------------------------------------------------------------------------
651*/
652int32 float32_to_int32_round_to_zero( float32 a )
653{
654    flag aSign;
655    int16 aExp, shiftCount;
656    bits32 aSig;
657    int32 z;
658
659    aSig = extractFloat32Frac( a );
660    aExp = extractFloat32Exp( a );
661    aSign = extractFloat32Sign( a );
662    shiftCount = aExp - 0x9E;
663    if ( 0 <= shiftCount ) {
664        if ( a != 0xCF000000 ) {
665            float_raise( float_flag_invalid );
666            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
667        }
668        return (sbits32) 0x80000000;
669    }
670    else if ( aExp <= 0x7E ) {
671        if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
672        return 0;
673    }
674    aSig = ( aSig | 0x00800000 )<<8;
675    z = aSig>>( - shiftCount );
676    if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
677        float_exception_flags |= float_flag_inexact;
678    }
679    if ( aSign ) z = - z;
680    return z;
681
682}
683
684/*
685-------------------------------------------------------------------------------
686Returns the result of converting the single-precision floating-point value
687`a' to the double-precision floating-point format.  The conversion is
688performed according to the IEC/IEEE Standard for Binary Floating-Point
689Arithmetic.
690-------------------------------------------------------------------------------
691*/
692float64 float32_to_float64( float32 a )
693{
694    flag aSign;
695    int16 aExp;
696    bits32 aSig, zSig0, zSig1;
697
698    aSig = extractFloat32Frac( a );
699    aExp = extractFloat32Exp( a );
700    aSign = extractFloat32Sign( a );
701    if ( aExp == 0xFF ) {
702        if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
703        return packFloat64( aSign, 0x7FF, 0, 0 );
704    }
705    if ( aExp == 0 ) {
706        if ( aSig == 0 ) return packFloat64( aSign, 0, 0, 0 );
707        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
708        --aExp;
709    }
710    shift64Right( aSig, 0, 3, &zSig0, &zSig1 );
711    return packFloat64( aSign, aExp + 0x380, zSig0, zSig1 );
712
713}
714
715#ifndef SOFTFLOAT_FOR_GCC
716/*
717-------------------------------------------------------------------------------
718Rounds the single-precision floating-point value `a' to an integer,
719and returns the result as a single-precision floating-point value.  The
720operation is performed according to the IEC/IEEE Standard for Binary
721Floating-Point Arithmetic.
722-------------------------------------------------------------------------------
723*/
724float32 float32_round_to_int( float32 a )
725{
726    flag aSign;
727    int16 aExp;
728    bits32 lastBitMask, roundBitsMask;
729    int8 roundingMode;
730    float32 z;
731
732    aExp = extractFloat32Exp( a );
733    if ( 0x96 <= aExp ) {
734        if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
735            return propagateFloat32NaN( a, a );
736        }
737        return a;
738    }
739    if ( aExp <= 0x7E ) {
740        if ( (bits32) ( a<<1 ) == 0 ) return a;
741        float_exception_flags |= float_flag_inexact;
742        aSign = extractFloat32Sign( a );
743        switch ( float_rounding_mode ) {
744         case float_round_nearest_even:
745            if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
746                return packFloat32( aSign, 0x7F, 0 );
747            }
748            break;
749	 case float_round_to_zero:
750	    break;
751         case float_round_down:
752            return aSign ? 0xBF800000 : 0;
753         case float_round_up:
754            return aSign ? 0x80000000 : 0x3F800000;
755        }
756        return packFloat32( aSign, 0, 0 );
757    }
758    lastBitMask = 1;
759    lastBitMask <<= 0x96 - aExp;
760    roundBitsMask = lastBitMask - 1;
761    z = a;
762    roundingMode = float_rounding_mode;
763    if ( roundingMode == float_round_nearest_even ) {
764        z += lastBitMask>>1;
765        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
766    }
767    else if ( roundingMode != float_round_to_zero ) {
768        if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
769            z += roundBitsMask;
770        }
771    }
772    z &= ~ roundBitsMask;
773    if ( z != a ) float_exception_flags |= float_flag_inexact;
774    return z;
775
776}
777#endif
778
779/*
780-------------------------------------------------------------------------------
781Returns the result of adding the absolute values of the single-precision
782floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
783before being returned.  `zSign' is ignored if the result is a NaN.
784The addition is performed according to the IEC/IEEE Standard for Binary
785Floating-Point Arithmetic.
786-------------------------------------------------------------------------------
787*/
788static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
789{
790    int16 aExp, bExp, zExp;
791    bits32 aSig, bSig, zSig;
792    int16 expDiff;
793
794    aSig = extractFloat32Frac( a );
795    aExp = extractFloat32Exp( a );
796    bSig = extractFloat32Frac( b );
797    bExp = extractFloat32Exp( b );
798    expDiff = aExp - bExp;
799    aSig <<= 6;
800    bSig <<= 6;
801    if ( 0 < expDiff ) {
802        if ( aExp == 0xFF ) {
803            if ( aSig ) return propagateFloat32NaN( a, b );
804            return a;
805        }
806        if ( bExp == 0 ) {
807            --expDiff;
808        }
809        else {
810            bSig |= 0x20000000;
811        }
812        shift32RightJamming( bSig, expDiff, &bSig );
813        zExp = aExp;
814    }
815    else if ( expDiff < 0 ) {
816        if ( bExp == 0xFF ) {
817            if ( bSig ) return propagateFloat32NaN( a, b );
818            return packFloat32( zSign, 0xFF, 0 );
819        }
820        if ( aExp == 0 ) {
821            ++expDiff;
822        }
823        else {
824            aSig |= 0x20000000;
825        }
826        shift32RightJamming( aSig, - expDiff, &aSig );
827        zExp = bExp;
828    }
829    else {
830        if ( aExp == 0xFF ) {
831            if ( aSig | bSig ) return propagateFloat32NaN( a, b );
832            return a;
833        }
834        if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
835        zSig = 0x40000000 + aSig + bSig;
836        zExp = aExp;
837        goto roundAndPack;
838    }
839    aSig |= 0x20000000;
840    zSig = ( aSig + bSig )<<1;
841    --zExp;
842    if ( (sbits32) zSig < 0 ) {
843        zSig = aSig + bSig;
844        ++zExp;
845    }
846 roundAndPack:
847    return roundAndPackFloat32( zSign, zExp, zSig );
848
849}
850
851/*
852-------------------------------------------------------------------------------
853Returns the result of subtracting the absolute values of the single-
854precision floating-point values `a' and `b'.  If `zSign' is 1, the
855difference is negated before being returned.  `zSign' is ignored if the
856result is a NaN.  The subtraction is performed according to the IEC/IEEE
857Standard for Binary Floating-Point Arithmetic.
858-------------------------------------------------------------------------------
859*/
860static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
861{
862    int16 aExp, bExp, zExp;
863    bits32 aSig, bSig, zSig;
864    int16 expDiff;
865
866    aSig = extractFloat32Frac( a );
867    aExp = extractFloat32Exp( a );
868    bSig = extractFloat32Frac( b );
869    bExp = extractFloat32Exp( b );
870    expDiff = aExp - bExp;
871    aSig <<= 7;
872    bSig <<= 7;
873    if ( 0 < expDiff ) goto aExpBigger;
874    if ( expDiff < 0 ) goto bExpBigger;
875    if ( aExp == 0xFF ) {
876        if ( aSig | bSig ) return propagateFloat32NaN( a, b );
877        float_raise( float_flag_invalid );
878        return float32_default_nan;
879    }
880    if ( aExp == 0 ) {
881        aExp = 1;
882        bExp = 1;
883    }
884    if ( bSig < aSig ) goto aBigger;
885    if ( aSig < bSig ) goto bBigger;
886    return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
887 bExpBigger:
888    if ( bExp == 0xFF ) {
889        if ( bSig ) return propagateFloat32NaN( a, b );
890        return packFloat32( zSign ^ 1, 0xFF, 0 );
891    }
892    if ( aExp == 0 ) {
893        ++expDiff;
894    }
895    else {
896        aSig |= 0x40000000;
897    }
898    shift32RightJamming( aSig, - expDiff, &aSig );
899    bSig |= 0x40000000;
900 bBigger:
901    zSig = bSig - aSig;
902    zExp = bExp;
903    zSign ^= 1;
904    goto normalizeRoundAndPack;
905 aExpBigger:
906    if ( aExp == 0xFF ) {
907        if ( aSig ) return propagateFloat32NaN( a, b );
908        return a;
909    }
910    if ( bExp == 0 ) {
911        --expDiff;
912    }
913    else {
914        bSig |= 0x40000000;
915    }
916    shift32RightJamming( bSig, expDiff, &bSig );
917    aSig |= 0x40000000;
918 aBigger:
919    zSig = aSig - bSig;
920    zExp = aExp;
921 normalizeRoundAndPack:
922    --zExp;
923    return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
924
925}
926
927/*
928-------------------------------------------------------------------------------
929Returns the result of adding the single-precision floating-point values `a'
930and `b'.  The operation is performed according to the IEC/IEEE Standard for
931Binary Floating-Point Arithmetic.
932-------------------------------------------------------------------------------
933*/
934float32 float32_add( float32 a, float32 b )
935{
936    flag aSign, bSign;
937
938    aSign = extractFloat32Sign( a );
939    bSign = extractFloat32Sign( b );
940    if ( aSign == bSign ) {
941        return addFloat32Sigs( a, b, aSign );
942    }
943    else {
944        return subFloat32Sigs( a, b, aSign );
945    }
946
947}
948
949/*
950-------------------------------------------------------------------------------
951Returns the result of subtracting the single-precision floating-point values
952`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
953for Binary Floating-Point Arithmetic.
954-------------------------------------------------------------------------------
955*/
956float32 float32_sub( float32 a, float32 b )
957{
958    flag aSign, bSign;
959
960    aSign = extractFloat32Sign( a );
961    bSign = extractFloat32Sign( b );
962    if ( aSign == bSign ) {
963        return subFloat32Sigs( a, b, aSign );
964    }
965    else {
966        return addFloat32Sigs( a, b, aSign );
967    }
968
969}
970
971/*
972-------------------------------------------------------------------------------
973Returns the result of multiplying the single-precision floating-point values
974`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
975for Binary Floating-Point Arithmetic.
976-------------------------------------------------------------------------------
977*/
978float32 float32_mul( float32 a, float32 b )
979{
980    flag aSign, bSign, zSign;
981    int16 aExp, bExp, zExp;
982    bits32 aSig, bSig, zSig0, zSig1;
983
984    aSig = extractFloat32Frac( a );
985    aExp = extractFloat32Exp( a );
986    aSign = extractFloat32Sign( a );
987    bSig = extractFloat32Frac( b );
988    bExp = extractFloat32Exp( b );
989    bSign = extractFloat32Sign( b );
990    zSign = aSign ^ bSign;
991    if ( aExp == 0xFF ) {
992        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
993            return propagateFloat32NaN( a, b );
994        }
995        if ( ( bExp | bSig ) == 0 ) {
996            float_raise( float_flag_invalid );
997            return float32_default_nan;
998        }
999        return packFloat32( zSign, 0xFF, 0 );
1000    }
1001    if ( bExp == 0xFF ) {
1002        if ( bSig ) return propagateFloat32NaN( a, b );
1003        if ( ( aExp | aSig ) == 0 ) {
1004            float_raise( float_flag_invalid );
1005            return float32_default_nan;
1006        }
1007        return packFloat32( zSign, 0xFF, 0 );
1008    }
1009    if ( aExp == 0 ) {
1010        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1011        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1012    }
1013    if ( bExp == 0 ) {
1014        if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1015        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1016    }
1017    zExp = aExp + bExp - 0x7F;
1018    aSig = ( aSig | 0x00800000 )<<7;
1019    bSig = ( bSig | 0x00800000 )<<8;
1020    mul32To64( aSig, bSig, &zSig0, &zSig1 );
1021    zSig0 |= ( zSig1 != 0 );
1022    if ( 0 <= (sbits32) ( zSig0<<1 ) ) {
1023        zSig0 <<= 1;
1024        --zExp;
1025    }
1026    return roundAndPackFloat32( zSign, zExp, zSig0 );
1027
1028}
1029
1030/*
1031-------------------------------------------------------------------------------
1032Returns the result of dividing the single-precision floating-point value `a'
1033by the corresponding value `b'.  The operation is performed according to the
1034IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1035-------------------------------------------------------------------------------
1036*/
1037float32 float32_div( float32 a, float32 b )
1038{
1039    flag aSign, bSign, zSign;
1040    int16 aExp, bExp, zExp;
1041    bits32 aSig, bSig, zSig, rem0, rem1, term0, term1;
1042
1043    aSig = extractFloat32Frac( a );
1044    aExp = extractFloat32Exp( a );
1045    aSign = extractFloat32Sign( a );
1046    bSig = extractFloat32Frac( b );
1047    bExp = extractFloat32Exp( b );
1048    bSign = extractFloat32Sign( b );
1049    zSign = aSign ^ bSign;
1050    if ( aExp == 0xFF ) {
1051        if ( aSig ) return propagateFloat32NaN( a, b );
1052        if ( bExp == 0xFF ) {
1053            if ( bSig ) return propagateFloat32NaN( a, b );
1054            float_raise( float_flag_invalid );
1055            return float32_default_nan;
1056        }
1057        return packFloat32( zSign, 0xFF, 0 );
1058    }
1059    if ( bExp == 0xFF ) {
1060        if ( bSig ) return propagateFloat32NaN( a, b );
1061        return packFloat32( zSign, 0, 0 );
1062    }
1063    if ( bExp == 0 ) {
1064        if ( bSig == 0 ) {
1065            if ( ( aExp | aSig ) == 0 ) {
1066                float_raise( float_flag_invalid );
1067                return float32_default_nan;
1068            }
1069            float_raise( float_flag_divbyzero );
1070            return packFloat32( zSign, 0xFF, 0 );
1071        }
1072        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1073    }
1074    if ( aExp == 0 ) {
1075        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1076        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1077    }
1078    zExp = aExp - bExp + 0x7D;
1079    aSig = ( aSig | 0x00800000 )<<7;
1080    bSig = ( bSig | 0x00800000 )<<8;
1081    if ( bSig <= ( aSig + aSig ) ) {
1082        aSig >>= 1;
1083        ++zExp;
1084    }
1085    zSig = estimateDiv64To32( aSig, 0, bSig );
1086    if ( ( zSig & 0x3F ) <= 2 ) {
1087        mul32To64( bSig, zSig, &term0, &term1 );
1088        sub64( aSig, 0, term0, term1, &rem0, &rem1 );
1089        while ( (sbits32) rem0 < 0 ) {
1090            --zSig;
1091            add64( rem0, rem1, 0, bSig, &rem0, &rem1 );
1092        }
1093        zSig |= ( rem1 != 0 );
1094    }
1095    return roundAndPackFloat32( zSign, zExp, zSig );
1096
1097}
1098
1099#ifndef SOFTFLOAT_FOR_GCC
1100/*
1101-------------------------------------------------------------------------------
1102Returns the remainder of the single-precision floating-point value `a'
1103with respect to the corresponding value `b'.  The operation is performed
1104according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1105-------------------------------------------------------------------------------
1106*/
1107float32 float32_rem( float32 a, float32 b )
1108{
1109    flag aSign, bSign, zSign;
1110    int16 aExp, bExp, expDiff;
1111    bits32 aSig, bSig, q, allZero, alternateASig;
1112    sbits32 sigMean;
1113
1114    aSig = extractFloat32Frac( a );
1115    aExp = extractFloat32Exp( a );
1116    aSign = extractFloat32Sign( a );
1117    bSig = extractFloat32Frac( b );
1118    bExp = extractFloat32Exp( b );
1119    bSign = extractFloat32Sign( b );
1120    if ( aExp == 0xFF ) {
1121        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1122            return propagateFloat32NaN( a, b );
1123        }
1124        float_raise( float_flag_invalid );
1125        return float32_default_nan;
1126    }
1127    if ( bExp == 0xFF ) {
1128        if ( bSig ) return propagateFloat32NaN( a, b );
1129        return a;
1130    }
1131    if ( bExp == 0 ) {
1132        if ( bSig == 0 ) {
1133            float_raise( float_flag_invalid );
1134            return float32_default_nan;
1135        }
1136        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1137    }
1138    if ( aExp == 0 ) {
1139        if ( aSig == 0 ) return a;
1140        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1141    }
1142    expDiff = aExp - bExp;
1143    aSig = ( aSig | 0x00800000 )<<8;
1144    bSig = ( bSig | 0x00800000 )<<8;
1145    if ( expDiff < 0 ) {
1146        if ( expDiff < -1 ) return a;
1147        aSig >>= 1;
1148    }
1149    q = ( bSig <= aSig );
1150    if ( q ) aSig -= bSig;
1151    expDiff -= 32;
1152    while ( 0 < expDiff ) {
1153        q = estimateDiv64To32( aSig, 0, bSig );
1154        q = ( 2 < q ) ? q - 2 : 0;
1155        aSig = - ( ( bSig>>2 ) * q );
1156        expDiff -= 30;
1157    }
1158    expDiff += 32;
1159    if ( 0 < expDiff ) {
1160        q = estimateDiv64To32( aSig, 0, bSig );
1161        q = ( 2 < q ) ? q - 2 : 0;
1162        q >>= 32 - expDiff;
1163        bSig >>= 2;
1164        aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
1165    }
1166    else {
1167        aSig >>= 2;
1168        bSig >>= 2;
1169    }
1170    do {
1171        alternateASig = aSig;
1172        ++q;
1173        aSig -= bSig;
1174    } while ( 0 <= (sbits32) aSig );
1175    sigMean = aSig + alternateASig;
1176    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
1177        aSig = alternateASig;
1178    }
1179    zSign = ( (sbits32) aSig < 0 );
1180    if ( zSign ) aSig = - aSig;
1181    return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
1182
1183}
1184#endif
1185
1186#ifndef SOFTFLOAT_FOR_GCC
1187/*
1188-------------------------------------------------------------------------------
1189Returns the square root of the single-precision floating-point value `a'.
1190The operation is performed according to the IEC/IEEE Standard for Binary
1191Floating-Point Arithmetic.
1192-------------------------------------------------------------------------------
1193*/
1194float32 float32_sqrt( float32 a )
1195{
1196    flag aSign;
1197    int16 aExp, zExp;
1198    bits32 aSig, zSig, rem0, rem1, term0, term1;
1199
1200    aSig = extractFloat32Frac( a );
1201    aExp = extractFloat32Exp( a );
1202    aSign = extractFloat32Sign( a );
1203    if ( aExp == 0xFF ) {
1204        if ( aSig ) return propagateFloat32NaN( a, 0 );
1205        if ( ! aSign ) return a;
1206        float_raise( float_flag_invalid );
1207        return float32_default_nan;
1208    }
1209    if ( aSign ) {
1210        if ( ( aExp | aSig ) == 0 ) return a;
1211        float_raise( float_flag_invalid );
1212        return float32_default_nan;
1213    }
1214    if ( aExp == 0 ) {
1215        if ( aSig == 0 ) return 0;
1216        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1217    }
1218    zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
1219    aSig = ( aSig | 0x00800000 )<<8;
1220    zSig = estimateSqrt32( aExp, aSig ) + 2;
1221    if ( ( zSig & 0x7F ) <= 5 ) {
1222        if ( zSig < 2 ) {
1223            zSig = 0x7FFFFFFF;
1224            goto roundAndPack;
1225        }
1226        else {
1227            aSig >>= aExp & 1;
1228            mul32To64( zSig, zSig, &term0, &term1 );
1229            sub64( aSig, 0, term0, term1, &rem0, &rem1 );
1230            while ( (sbits32) rem0 < 0 ) {
1231                --zSig;
1232                shortShift64Left( 0, zSig, 1, &term0, &term1 );
1233                term1 |= 1;
1234                add64( rem0, rem1, term0, term1, &rem0, &rem1 );
1235            }
1236            zSig |= ( ( rem0 | rem1 ) != 0 );
1237        }
1238    }
1239    shift32RightJamming( zSig, 1, &zSig );
1240 roundAndPack:
1241    return roundAndPackFloat32( 0, zExp, zSig );
1242
1243}
1244#endif
1245
1246/*
1247-------------------------------------------------------------------------------
1248Returns 1 if the single-precision floating-point value `a' is equal to
1249the corresponding value `b', and 0 otherwise.  The comparison is performed
1250according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1251-------------------------------------------------------------------------------
1252*/
1253flag float32_eq( float32 a, float32 b )
1254{
1255
1256    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1257         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1258       ) {
1259        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1260            float_raise( float_flag_invalid );
1261        }
1262        return 0;
1263    }
1264    return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
1265
1266}
1267
1268/*
1269-------------------------------------------------------------------------------
1270Returns 1 if the single-precision floating-point value `a' is less than
1271or equal to the corresponding value `b', and 0 otherwise.  The comparison
1272is performed according to the IEC/IEEE Standard for Binary Floating-Point
1273Arithmetic.
1274-------------------------------------------------------------------------------
1275*/
1276flag float32_le( float32 a, float32 b )
1277{
1278    flag aSign, bSign;
1279
1280    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1281         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1282       ) {
1283        float_raise( float_flag_invalid );
1284        return 0;
1285    }
1286    aSign = extractFloat32Sign( a );
1287    bSign = extractFloat32Sign( b );
1288    if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
1289    return ( a == b ) || ( aSign ^ ( a < b ) );
1290
1291}
1292
1293/*
1294-------------------------------------------------------------------------------
1295Returns 1 if the single-precision floating-point value `a' is less than
1296the corresponding value `b', and 0 otherwise.  The comparison is performed
1297according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1298-------------------------------------------------------------------------------
1299*/
1300flag float32_lt( float32 a, float32 b )
1301{
1302    flag aSign, bSign;
1303
1304    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1305         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1306       ) {
1307        float_raise( float_flag_invalid );
1308        return 0;
1309    }
1310    aSign = extractFloat32Sign( a );
1311    bSign = extractFloat32Sign( b );
1312    if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
1313    return ( a != b ) && ( aSign ^ ( a < b ) );
1314
1315}
1316
1317#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1318/*
1319-------------------------------------------------------------------------------
1320Returns 1 if the single-precision floating-point value `a' is equal to
1321the corresponding value `b', and 0 otherwise.  The invalid exception is
1322raised if either operand is a NaN.  Otherwise, the comparison is performed
1323according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1324-------------------------------------------------------------------------------
1325*/
1326flag float32_eq_signaling( float32 a, float32 b )
1327{
1328
1329    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1330         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1331       ) {
1332        float_raise( float_flag_invalid );
1333        return 0;
1334    }
1335    return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
1336
1337}
1338
1339/*
1340-------------------------------------------------------------------------------
1341Returns 1 if the single-precision floating-point value `a' is less than or
1342equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
1343cause an exception.  Otherwise, the comparison is performed according to the
1344IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1345-------------------------------------------------------------------------------
1346*/
1347flag float32_le_quiet( float32 a, float32 b )
1348{
1349    flag aSign, bSign;
1350    int16 aExp, bExp;
1351
1352    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1353         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1354       ) {
1355        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1356            float_raise( float_flag_invalid );
1357        }
1358        return 0;
1359    }
1360    aSign = extractFloat32Sign( a );
1361    bSign = extractFloat32Sign( b );
1362    if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
1363    return ( a == b ) || ( aSign ^ ( a < b ) );
1364
1365}
1366
1367/*
1368-------------------------------------------------------------------------------
1369Returns 1 if the single-precision floating-point value `a' is less than
1370the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
1371exception.  Otherwise, the comparison is performed according to the IEC/IEEE
1372Standard for Binary Floating-Point Arithmetic.
1373-------------------------------------------------------------------------------
1374*/
1375flag float32_lt_quiet( float32 a, float32 b )
1376{
1377    flag aSign, bSign;
1378
1379    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
1380         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
1381       ) {
1382        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
1383            float_raise( float_flag_invalid );
1384        }
1385        return 0;
1386    }
1387    aSign = extractFloat32Sign( a );
1388    bSign = extractFloat32Sign( b );
1389    if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
1390    return ( a != b ) && ( aSign ^ ( a < b ) );
1391
1392}
1393#endif /* !SOFTFLOAT_FOR_GCC */
1394
1395#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1396/*
1397-------------------------------------------------------------------------------
1398Returns the result of converting the double-precision floating-point value
1399`a' to the 32-bit two's complement integer format.  The conversion is
1400performed according to the IEC/IEEE Standard for Binary Floating-Point
1401Arithmetic---which means in particular that the conversion is rounded
1402according to the current rounding mode.  If `a' is a NaN, the largest
1403positive integer is returned.  Otherwise, if the conversion overflows, the
1404largest integer with the same sign as `a' is returned.
1405-------------------------------------------------------------------------------
1406*/
1407int32 float64_to_int32( float64 a )
1408{
1409    flag aSign;
1410    int16 aExp, shiftCount;
1411    bits32 aSig0, aSig1, absZ, aSigExtra;
1412    int32 z;
1413    int8 roundingMode;
1414
1415    aSig1 = extractFloat64Frac1( a );
1416    aSig0 = extractFloat64Frac0( a );
1417    aExp = extractFloat64Exp( a );
1418    aSign = extractFloat64Sign( a );
1419    shiftCount = aExp - 0x413;
1420    if ( 0 <= shiftCount ) {
1421        if ( 0x41E < aExp ) {
1422            if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;
1423            goto invalid;
1424        }
1425        shortShift64Left(
1426            aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra );
1427        if ( 0x80000000 < absZ ) goto invalid;
1428    }
1429    else {
1430        aSig1 = ( aSig1 != 0 );
1431        if ( aExp < 0x3FE ) {
1432            aSigExtra = aExp | aSig0 | aSig1;
1433            absZ = 0;
1434        }
1435        else {
1436            aSig0 |= 0x00100000;
1437            aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;
1438            absZ = aSig0>>( - shiftCount );
1439        }
1440    }
1441    roundingMode = float_rounding_mode;
1442    if ( roundingMode == float_round_nearest_even ) {
1443        if ( (sbits32) aSigExtra < 0 ) {
1444            ++absZ;
1445            if ( (bits32) ( aSigExtra<<1 ) == 0 ) absZ &= ~1;
1446        }
1447        z = aSign ? - absZ : absZ;
1448    }
1449    else {
1450        aSigExtra = ( aSigExtra != 0 );
1451        if ( aSign ) {
1452            z = - (   absZ
1453                    + ( ( roundingMode == float_round_down ) & aSigExtra ) );
1454        }
1455        else {
1456            z = absZ + ( ( roundingMode == float_round_up ) & aSigExtra );
1457        }
1458    }
1459    if ( ( aSign ^ ( z < 0 ) ) && z ) {
1460 invalid:
1461        float_raise( float_flag_invalid );
1462        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
1463    }
1464    if ( aSigExtra ) float_exception_flags |= float_flag_inexact;
1465    return z;
1466
1467}
1468#endif /* !SOFTFLOAT_FOR_GCC */
1469
1470/*
1471-------------------------------------------------------------------------------
1472Returns the result of converting the double-precision floating-point value
1473`a' to the 32-bit two's complement integer format.  The conversion is
1474performed according to the IEC/IEEE Standard for Binary Floating-Point
1475Arithmetic, except that the conversion is always rounded toward zero.
1476If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
1477the conversion overflows, the largest integer with the same sign as `a' is
1478returned.
1479-------------------------------------------------------------------------------
1480*/
1481int32 float64_to_int32_round_to_zero( float64 a )
1482{
1483    flag aSign;
1484    int16 aExp, shiftCount;
1485    bits32 aSig0, aSig1, absZ, aSigExtra;
1486    int32 z;
1487
1488    aSig1 = extractFloat64Frac1( a );
1489    aSig0 = extractFloat64Frac0( a );
1490    aExp = extractFloat64Exp( a );
1491    aSign = extractFloat64Sign( a );
1492    shiftCount = aExp - 0x413;
1493    if ( 0 <= shiftCount ) {
1494        if ( 0x41E < aExp ) {
1495            if ( ( aExp == 0x7FF ) && ( aSig0 | aSig1 ) ) aSign = 0;
1496            goto invalid;
1497        }
1498        shortShift64Left(
1499            aSig0 | 0x00100000, aSig1, shiftCount, &absZ, &aSigExtra );
1500    }
1501    else {
1502        if ( aExp < 0x3FF ) {
1503            if ( aExp | aSig0 | aSig1 ) {
1504                float_exception_flags |= float_flag_inexact;
1505            }
1506            return 0;
1507        }
1508        aSig0 |= 0x00100000;
1509        aSigExtra = ( aSig0<<( shiftCount & 31 ) ) | aSig1;
1510        absZ = aSig0>>( - shiftCount );
1511    }
1512    z = aSign ? - absZ : absZ;
1513    if ( ( aSign ^ ( z < 0 ) ) && z ) {
1514 invalid:
1515        float_raise( float_flag_invalid );
1516        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
1517    }
1518    if ( aSigExtra ) float_exception_flags |= float_flag_inexact;
1519    return z;
1520
1521}
1522
1523/*
1524-------------------------------------------------------------------------------
1525Returns the result of converting the double-precision floating-point value
1526`a' to the single-precision floating-point format.  The conversion is
1527performed according to the IEC/IEEE Standard for Binary Floating-Point
1528Arithmetic.
1529-------------------------------------------------------------------------------
1530*/
1531float32 float64_to_float32( float64 a )
1532{
1533    flag aSign;
1534    int16 aExp;
1535    bits32 aSig0, aSig1, zSig;
1536    bits32 allZero;
1537
1538    aSig1 = extractFloat64Frac1( a );
1539    aSig0 = extractFloat64Frac0( a );
1540    aExp = extractFloat64Exp( a );
1541    aSign = extractFloat64Sign( a );
1542    if ( aExp == 0x7FF ) {
1543        if ( aSig0 | aSig1 ) {
1544            return commonNaNToFloat32( float64ToCommonNaN( a ) );
1545        }
1546        return packFloat32( aSign, 0xFF, 0 );
1547    }
1548    shift64RightJamming( aSig0, aSig1, 22, &allZero, &zSig );
1549    if ( aExp ) zSig |= 0x40000000;
1550    return roundAndPackFloat32( aSign, aExp - 0x381, zSig );
1551
1552}
1553
1554#ifndef SOFTFLOAT_FOR_GCC
1555/*
1556-------------------------------------------------------------------------------
1557Rounds the double-precision floating-point value `a' to an integer,
1558and returns the result as a double-precision floating-point value.  The
1559operation is performed according to the IEC/IEEE Standard for Binary
1560Floating-Point Arithmetic.
1561-------------------------------------------------------------------------------
1562*/
1563float64 float64_round_to_int( float64 a )
1564{
1565    flag aSign;
1566    int16 aExp;
1567    bits32 lastBitMask, roundBitsMask;
1568    int8 roundingMode;
1569    float64 z;
1570
1571    aExp = extractFloat64Exp( a );
1572    if ( 0x413 <= aExp ) {
1573        if ( 0x433 <= aExp ) {
1574            if (    ( aExp == 0x7FF )
1575                 && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) ) {
1576                return propagateFloat64NaN( a, a );
1577            }
1578            return a;
1579        }
1580        lastBitMask = 1;
1581        lastBitMask = ( lastBitMask<<( 0x432 - aExp ) )<<1;
1582        roundBitsMask = lastBitMask - 1;
1583        z = a;
1584        roundingMode = float_rounding_mode;
1585        if ( roundingMode == float_round_nearest_even ) {
1586            if ( lastBitMask ) {
1587                add64( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
1588                if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
1589            }
1590            else {
1591                if ( (sbits32) z.low < 0 ) {
1592                    ++z.high;
1593                    if ( (bits32) ( z.low<<1 ) == 0 ) z.high &= ~1;
1594                }
1595            }
1596        }
1597        else if ( roundingMode != float_round_to_zero ) {
1598            if (   extractFloat64Sign( z )
1599                 ^ ( roundingMode == float_round_up ) ) {
1600                add64( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
1601            }
1602        }
1603        z.low &= ~ roundBitsMask;
1604    }
1605    else {
1606        if ( aExp <= 0x3FE ) {
1607            if ( ( ( (bits32) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
1608            float_exception_flags |= float_flag_inexact;
1609            aSign = extractFloat64Sign( a );
1610            switch ( float_rounding_mode ) {
1611             case float_round_nearest_even:
1612                if (    ( aExp == 0x3FE )
1613                     && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) )
1614                   ) {
1615                    return packFloat64( aSign, 0x3FF, 0, 0 );
1616                }
1617                break;
1618             case float_round_down:
1619                return
1620                      aSign ? packFloat64( 1, 0x3FF, 0, 0 )
1621                    : packFloat64( 0, 0, 0, 0 );
1622             case float_round_up:
1623                return
1624                      aSign ? packFloat64( 1, 0, 0, 0 )
1625                    : packFloat64( 0, 0x3FF, 0, 0 );
1626            }
1627            return packFloat64( aSign, 0, 0, 0 );
1628        }
1629        lastBitMask = 1;
1630        lastBitMask <<= 0x413 - aExp;
1631        roundBitsMask = lastBitMask - 1;
1632        z.low = 0;
1633        z.high = a.high;
1634        roundingMode = float_rounding_mode;
1635        if ( roundingMode == float_round_nearest_even ) {
1636            z.high += lastBitMask>>1;
1637            if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
1638                z.high &= ~ lastBitMask;
1639            }
1640        }
1641        else if ( roundingMode != float_round_to_zero ) {
1642            if (   extractFloat64Sign( z )
1643                 ^ ( roundingMode == float_round_up ) ) {
1644                z.high |= ( a.low != 0 );
1645                z.high += roundBitsMask;
1646            }
1647        }
1648        z.high &= ~ roundBitsMask;
1649    }
1650    if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
1651        float_exception_flags |= float_flag_inexact;
1652    }
1653    return z;
1654
1655}
1656#endif
1657
1658/*
1659-------------------------------------------------------------------------------
1660Returns the result of adding the absolute values of the double-precision
1661floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
1662before being returned.  `zSign' is ignored if the result is a NaN.
1663The addition is performed according to the IEC/IEEE Standard for Binary
1664Floating-Point Arithmetic.
1665-------------------------------------------------------------------------------
1666*/
1667static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
1668{
1669    int16 aExp, bExp, zExp;
1670    bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
1671    int16 expDiff;
1672
1673    aSig1 = extractFloat64Frac1( a );
1674    aSig0 = extractFloat64Frac0( a );
1675    aExp = extractFloat64Exp( a );
1676    bSig1 = extractFloat64Frac1( b );
1677    bSig0 = extractFloat64Frac0( b );
1678    bExp = extractFloat64Exp( b );
1679    expDiff = aExp - bExp;
1680    if ( 0 < expDiff ) {
1681        if ( aExp == 0x7FF ) {
1682            if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
1683            return a;
1684        }
1685        if ( bExp == 0 ) {
1686            --expDiff;
1687        }
1688        else {
1689            bSig0 |= 0x00100000;
1690        }
1691        shift64ExtraRightJamming(
1692            bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
1693        zExp = aExp;
1694    }
1695    else if ( expDiff < 0 ) {
1696        if ( bExp == 0x7FF ) {
1697            if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1698            return packFloat64( zSign, 0x7FF, 0, 0 );
1699        }
1700        if ( aExp == 0 ) {
1701            ++expDiff;
1702        }
1703        else {
1704            aSig0 |= 0x00100000;
1705        }
1706        shift64ExtraRightJamming(
1707            aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
1708        zExp = bExp;
1709    }
1710    else {
1711        if ( aExp == 0x7FF ) {
1712            if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
1713                return propagateFloat64NaN( a, b );
1714            }
1715            return a;
1716        }
1717        add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
1718        if ( aExp == 0 ) return packFloat64( zSign, 0, zSig0, zSig1 );
1719        zSig2 = 0;
1720        zSig0 |= 0x00200000;
1721        zExp = aExp;
1722        goto shiftRight1;
1723    }
1724    aSig0 |= 0x00100000;
1725    add64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
1726    --zExp;
1727    if ( zSig0 < 0x00200000 ) goto roundAndPack;
1728    ++zExp;
1729 shiftRight1:
1730    shift64ExtraRightJamming( zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
1731 roundAndPack:
1732    return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
1733
1734}
1735
1736/*
1737-------------------------------------------------------------------------------
1738Returns the result of subtracting the absolute values of the double-
1739precision floating-point values `a' and `b'.  If `zSign' is 1, the
1740difference is negated before being returned.  `zSign' is ignored if the
1741result is a NaN.  The subtraction is performed according to the IEC/IEEE
1742Standard for Binary Floating-Point Arithmetic.
1743-------------------------------------------------------------------------------
1744*/
1745static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
1746{
1747    int16 aExp, bExp, zExp;
1748    bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
1749    int16 expDiff;
1750
1751    aSig1 = extractFloat64Frac1( a );
1752    aSig0 = extractFloat64Frac0( a );
1753    aExp = extractFloat64Exp( a );
1754    bSig1 = extractFloat64Frac1( b );
1755    bSig0 = extractFloat64Frac0( b );
1756    bExp = extractFloat64Exp( b );
1757    expDiff = aExp - bExp;
1758    shortShift64Left( aSig0, aSig1, 10, &aSig0, &aSig1 );
1759    shortShift64Left( bSig0, bSig1, 10, &bSig0, &bSig1 );
1760    if ( 0 < expDiff ) goto aExpBigger;
1761    if ( expDiff < 0 ) goto bExpBigger;
1762    if ( aExp == 0x7FF ) {
1763        if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
1764            return propagateFloat64NaN( a, b );
1765        }
1766        float_raise( float_flag_invalid );
1767        return float64_default_nan;
1768    }
1769    if ( aExp == 0 ) {
1770        aExp = 1;
1771        bExp = 1;
1772    }
1773    if ( bSig0 < aSig0 ) goto aBigger;
1774    if ( aSig0 < bSig0 ) goto bBigger;
1775    if ( bSig1 < aSig1 ) goto aBigger;
1776    if ( aSig1 < bSig1 ) goto bBigger;
1777    return packFloat64( float_rounding_mode == float_round_down, 0, 0, 0 );
1778 bExpBigger:
1779    if ( bExp == 0x7FF ) {
1780        if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1781        return packFloat64( zSign ^ 1, 0x7FF, 0, 0 );
1782    }
1783    if ( aExp == 0 ) {
1784        ++expDiff;
1785    }
1786    else {
1787        aSig0 |= 0x40000000;
1788    }
1789    shift64RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
1790    bSig0 |= 0x40000000;
1791 bBigger:
1792    sub64( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
1793    zExp = bExp;
1794    zSign ^= 1;
1795    goto normalizeRoundAndPack;
1796 aExpBigger:
1797    if ( aExp == 0x7FF ) {
1798        if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
1799        return a;
1800    }
1801    if ( bExp == 0 ) {
1802        --expDiff;
1803    }
1804    else {
1805        bSig0 |= 0x40000000;
1806    }
1807    shift64RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
1808    aSig0 |= 0x40000000;
1809 aBigger:
1810    sub64( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
1811    zExp = aExp;
1812 normalizeRoundAndPack:
1813    --zExp;
1814    return normalizeRoundAndPackFloat64( zSign, zExp - 10, zSig0, zSig1 );
1815
1816}
1817
1818/*
1819-------------------------------------------------------------------------------
1820Returns the result of adding the double-precision floating-point values `a'
1821and `b'.  The operation is performed according to the IEC/IEEE Standard for
1822Binary Floating-Point Arithmetic.
1823-------------------------------------------------------------------------------
1824*/
1825float64 float64_add( float64 a, float64 b )
1826{
1827    flag aSign, bSign;
1828
1829    aSign = extractFloat64Sign( a );
1830    bSign = extractFloat64Sign( b );
1831    if ( aSign == bSign ) {
1832        return addFloat64Sigs( a, b, aSign );
1833    }
1834    else {
1835        return subFloat64Sigs( a, b, aSign );
1836    }
1837
1838}
1839
1840/*
1841-------------------------------------------------------------------------------
1842Returns the result of subtracting the double-precision floating-point values
1843`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1844for Binary Floating-Point Arithmetic.
1845-------------------------------------------------------------------------------
1846*/
1847float64 float64_sub( float64 a, float64 b )
1848{
1849    flag aSign, bSign;
1850
1851    aSign = extractFloat64Sign( a );
1852    bSign = extractFloat64Sign( b );
1853    if ( aSign == bSign ) {
1854        return subFloat64Sigs( a, b, aSign );
1855    }
1856    else {
1857        return addFloat64Sigs( a, b, aSign );
1858    }
1859
1860}
1861
1862/*
1863-------------------------------------------------------------------------------
1864Returns the result of multiplying the double-precision floating-point values
1865`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1866for Binary Floating-Point Arithmetic.
1867-------------------------------------------------------------------------------
1868*/
1869float64 float64_mul( float64 a, float64 b )
1870{
1871    flag aSign, bSign, zSign;
1872    int16 aExp, bExp, zExp;
1873    bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
1874
1875    aSig1 = extractFloat64Frac1( a );
1876    aSig0 = extractFloat64Frac0( a );
1877    aExp = extractFloat64Exp( a );
1878    aSign = extractFloat64Sign( a );
1879    bSig1 = extractFloat64Frac1( b );
1880    bSig0 = extractFloat64Frac0( b );
1881    bExp = extractFloat64Exp( b );
1882    bSign = extractFloat64Sign( b );
1883    zSign = aSign ^ bSign;
1884    if ( aExp == 0x7FF ) {
1885        if (    ( aSig0 | aSig1 )
1886             || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) {
1887            return propagateFloat64NaN( a, b );
1888        }
1889        if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
1890        return packFloat64( zSign, 0x7FF, 0, 0 );
1891    }
1892    if ( bExp == 0x7FF ) {
1893        if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1894        if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
1895 invalid:
1896            float_raise( float_flag_invalid );
1897            return float64_default_nan;
1898        }
1899        return packFloat64( zSign, 0x7FF, 0, 0 );
1900    }
1901    if ( aExp == 0 ) {
1902        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
1903        normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
1904    }
1905    if ( bExp == 0 ) {
1906        if ( ( bSig0 | bSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
1907        normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
1908    }
1909    zExp = aExp + bExp - 0x400;
1910    aSig0 |= 0x00100000;
1911    shortShift64Left( bSig0, bSig1, 12, &bSig0, &bSig1 );
1912    mul64To128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
1913    add64( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
1914    zSig2 |= ( zSig3 != 0 );
1915    if ( 0x00200000 <= zSig0 ) {
1916        shift64ExtraRightJamming(
1917            zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
1918        ++zExp;
1919    }
1920    return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
1921
1922}
1923
1924/*
1925-------------------------------------------------------------------------------
1926Returns the result of dividing the double-precision floating-point value `a'
1927by the corresponding value `b'.  The operation is performed according to the
1928IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1929-------------------------------------------------------------------------------
1930*/
1931float64 float64_div( float64 a, float64 b )
1932{
1933    flag aSign, bSign, zSign;
1934    int16 aExp, bExp, zExp;
1935    bits32 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
1936    bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
1937
1938    aSig1 = extractFloat64Frac1( a );
1939    aSig0 = extractFloat64Frac0( a );
1940    aExp = extractFloat64Exp( a );
1941    aSign = extractFloat64Sign( a );
1942    bSig1 = extractFloat64Frac1( b );
1943    bSig0 = extractFloat64Frac0( b );
1944    bExp = extractFloat64Exp( b );
1945    bSign = extractFloat64Sign( b );
1946    zSign = aSign ^ bSign;
1947    if ( aExp == 0x7FF ) {
1948        if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, b );
1949        if ( bExp == 0x7FF ) {
1950            if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1951            goto invalid;
1952        }
1953        return packFloat64( zSign, 0x7FF, 0, 0 );
1954    }
1955    if ( bExp == 0x7FF ) {
1956        if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
1957        return packFloat64( zSign, 0, 0, 0 );
1958    }
1959    if ( bExp == 0 ) {
1960        if ( ( bSig0 | bSig1 ) == 0 ) {
1961            if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
1962 invalid:
1963                float_raise( float_flag_invalid );
1964                return float64_default_nan;
1965            }
1966            float_raise( float_flag_divbyzero );
1967            return packFloat64( zSign, 0x7FF, 0, 0 );
1968        }
1969        normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
1970    }
1971    if ( aExp == 0 ) {
1972        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( zSign, 0, 0, 0 );
1973        normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
1974    }
1975    zExp = aExp - bExp + 0x3FD;
1976    shortShift64Left( aSig0 | 0x00100000, aSig1, 11, &aSig0, &aSig1 );
1977    shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 );
1978    if ( le64( bSig0, bSig1, aSig0, aSig1 ) ) {
1979        shift64Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
1980        ++zExp;
1981    }
1982    zSig0 = estimateDiv64To32( aSig0, aSig1, bSig0 );
1983    mul64By32To96( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
1984    sub96( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
1985    while ( (sbits32) rem0 < 0 ) {
1986        --zSig0;
1987        add96( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
1988    }
1989    zSig1 = estimateDiv64To32( rem1, rem2, bSig0 );
1990    if ( ( zSig1 & 0x3FF ) <= 4 ) {
1991        mul64By32To96( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
1992        sub96( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
1993        while ( (sbits32) rem1 < 0 ) {
1994            --zSig1;
1995            add96( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
1996        }
1997        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
1998    }
1999    shift64ExtraRightJamming( zSig0, zSig1, 0, 11, &zSig0, &zSig1, &zSig2 );
2000    return roundAndPackFloat64( zSign, zExp, zSig0, zSig1, zSig2 );
2001
2002}
2003
2004#ifndef SOFTFLOAT_FOR_GCC
2005/*
2006-------------------------------------------------------------------------------
2007Returns the remainder of the double-precision floating-point value `a'
2008with respect to the corresponding value `b'.  The operation is performed
2009according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2010-------------------------------------------------------------------------------
2011*/
2012float64 float64_rem( float64 a, float64 b )
2013{
2014    flag aSign, bSign, zSign;
2015    int16 aExp, bExp, expDiff;
2016    bits32 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
2017    bits32 allZero, alternateASig0, alternateASig1, sigMean1;
2018    sbits32 sigMean0;
2019    float64 z;
2020
2021    aSig1 = extractFloat64Frac1( a );
2022    aSig0 = extractFloat64Frac0( a );
2023    aExp = extractFloat64Exp( a );
2024    aSign = extractFloat64Sign( a );
2025    bSig1 = extractFloat64Frac1( b );
2026    bSig0 = extractFloat64Frac0( b );
2027    bExp = extractFloat64Exp( b );
2028    bSign = extractFloat64Sign( b );
2029    if ( aExp == 0x7FF ) {
2030        if (    ( aSig0 | aSig1 )
2031             || ( ( bExp == 0x7FF ) && ( bSig0 | bSig1 ) ) ) {
2032            return propagateFloat64NaN( a, b );
2033        }
2034        goto invalid;
2035    }
2036    if ( bExp == 0x7FF ) {
2037        if ( bSig0 | bSig1 ) return propagateFloat64NaN( a, b );
2038        return a;
2039    }
2040    if ( bExp == 0 ) {
2041        if ( ( bSig0 | bSig1 ) == 0 ) {
2042 invalid:
2043            float_raise( float_flag_invalid );
2044            return float64_default_nan;
2045        }
2046        normalizeFloat64Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
2047    }
2048    if ( aExp == 0 ) {
2049        if ( ( aSig0 | aSig1 ) == 0 ) return a;
2050        normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
2051    }
2052    expDiff = aExp - bExp;
2053    if ( expDiff < -1 ) return a;
2054    shortShift64Left(
2055        aSig0 | 0x00100000, aSig1, 11 - ( expDiff < 0 ), &aSig0, &aSig1 );
2056    shortShift64Left( bSig0 | 0x00100000, bSig1, 11, &bSig0, &bSig1 );
2057    q = le64( bSig0, bSig1, aSig0, aSig1 );
2058    if ( q ) sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
2059    expDiff -= 32;
2060    while ( 0 < expDiff ) {
2061        q = estimateDiv64To32( aSig0, aSig1, bSig0 );
2062        q = ( 4 < q ) ? q - 4 : 0;
2063        mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 );
2064        shortShift96Left( term0, term1, term2, 29, &term1, &term2, &allZero );
2065        shortShift64Left( aSig0, aSig1, 29, &aSig0, &allZero );
2066        sub64( aSig0, 0, term1, term2, &aSig0, &aSig1 );
2067        expDiff -= 29;
2068    }
2069    if ( -32 < expDiff ) {
2070        q = estimateDiv64To32( aSig0, aSig1, bSig0 );
2071        q = ( 4 < q ) ? q - 4 : 0;
2072        q >>= - expDiff;
2073        shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 );
2074        expDiff += 24;
2075        if ( expDiff < 0 ) {
2076            shift64Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
2077        }
2078        else {
2079            shortShift64Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
2080        }
2081        mul64By32To96( bSig0, bSig1, q, &term0, &term1, &term2 );
2082        sub64( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
2083    }
2084    else {
2085        shift64Right( aSig0, aSig1, 8, &aSig0, &aSig1 );
2086        shift64Right( bSig0, bSig1, 8, &bSig0, &bSig1 );
2087    }
2088    do {
2089        alternateASig0 = aSig0;
2090        alternateASig1 = aSig1;
2091        ++q;
2092        sub64( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
2093    } while ( 0 <= (sbits32) aSig0 );
2094    add64(
2095        aSig0, aSig1, alternateASig0, alternateASig1, &sigMean0, &sigMean1 );
2096    if (    ( sigMean0 < 0 )
2097         || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
2098        aSig0 = alternateASig0;
2099        aSig1 = alternateASig1;
2100    }
2101    zSign = ( (sbits32) aSig0 < 0 );
2102    if ( zSign ) sub64( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
2103    return
2104        normalizeRoundAndPackFloat64( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
2105
2106}
2107#endif
2108
2109#ifndef SOFTFLOAT_FOR_GCC
2110/*
2111-------------------------------------------------------------------------------
2112Returns the square root of the double-precision floating-point value `a'.
2113The operation is performed according to the IEC/IEEE Standard for Binary
2114Floating-Point Arithmetic.
2115-------------------------------------------------------------------------------
2116*/
2117float64 float64_sqrt( float64 a )
2118{
2119    flag aSign;
2120    int16 aExp, zExp;
2121    bits32 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
2122    bits32 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
2123    float64 z;
2124
2125    aSig1 = extractFloat64Frac1( a );
2126    aSig0 = extractFloat64Frac0( a );
2127    aExp = extractFloat64Exp( a );
2128    aSign = extractFloat64Sign( a );
2129    if ( aExp == 0x7FF ) {
2130        if ( aSig0 | aSig1 ) return propagateFloat64NaN( a, a );
2131        if ( ! aSign ) return a;
2132        goto invalid;
2133    }
2134    if ( aSign ) {
2135        if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
2136 invalid:
2137        float_raise( float_flag_invalid );
2138        return float64_default_nan;
2139    }
2140    if ( aExp == 0 ) {
2141        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat64( 0, 0, 0, 0 );
2142        normalizeFloat64Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
2143    }
2144    zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
2145    aSig0 |= 0x00100000;
2146    shortShift64Left( aSig0, aSig1, 11, &term0, &term1 );
2147    zSig0 = ( estimateSqrt32( aExp, term0 )>>1 ) + 1;
2148    if ( zSig0 == 0 ) zSig0 = 0x7FFFFFFF;
2149    doubleZSig0 = zSig0 + zSig0;
2150    shortShift64Left( aSig0, aSig1, 9 - ( aExp & 1 ), &aSig0, &aSig1 );
2151    mul32To64( zSig0, zSig0, &term0, &term1 );
2152    sub64( aSig0, aSig1, term0, term1, &rem0, &rem1 );
2153    while ( (sbits32) rem0 < 0 ) {
2154        --zSig0;
2155        doubleZSig0 -= 2;
2156        add64( rem0, rem1, 0, doubleZSig0 | 1, &rem0, &rem1 );
2157    }
2158    zSig1 = estimateDiv64To32( rem1, 0, doubleZSig0 );
2159    if ( ( zSig1 & 0x1FF ) <= 5 ) {
2160        if ( zSig1 == 0 ) zSig1 = 1;
2161        mul32To64( doubleZSig0, zSig1, &term1, &term2 );
2162        sub64( rem1, 0, term1, term2, &rem1, &rem2 );
2163        mul32To64( zSig1, zSig1, &term2, &term3 );
2164        sub96( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
2165        while ( (sbits32) rem1 < 0 ) {
2166            --zSig1;
2167            shortShift64Left( 0, zSig1, 1, &term2, &term3 );
2168            term3 |= 1;
2169            term2 |= doubleZSig0;
2170            add96( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
2171        }
2172        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
2173    }
2174    shift64ExtraRightJamming( zSig0, zSig1, 0, 10, &zSig0, &zSig1, &zSig2 );
2175    return roundAndPackFloat64( 0, zExp, zSig0, zSig1, zSig2 );
2176
2177}
2178#endif
2179
2180/*
2181-------------------------------------------------------------------------------
2182Returns 1 if the double-precision floating-point value `a' is equal to
2183the corresponding value `b', and 0 otherwise.  The comparison is performed
2184according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2185-------------------------------------------------------------------------------
2186*/
2187flag float64_eq( float64 a, float64 b )
2188{
2189
2190    if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2191              && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2192         || (    ( extractFloat64Exp( b ) == 0x7FF )
2193              && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2194       ) {
2195        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2196            float_raise( float_flag_invalid );
2197        }
2198        return 0;
2199    }
2200    return ( a == b ) ||
2201	( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 );
2202
2203}
2204
2205/*
2206-------------------------------------------------------------------------------
2207Returns 1 if the double-precision floating-point value `a' is less than
2208or equal to the corresponding value `b', and 0 otherwise.  The comparison
2209is performed according to the IEC/IEEE Standard for Binary Floating-Point
2210Arithmetic.
2211-------------------------------------------------------------------------------
2212*/
2213flag float64_le( float64 a, float64 b )
2214{
2215    flag aSign, bSign;
2216
2217    if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2218              && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2219         || (    ( extractFloat64Exp( b ) == 0x7FF )
2220              && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2221       ) {
2222        float_raise( float_flag_invalid );
2223        return 0;
2224    }
2225    aSign = extractFloat64Sign( a );
2226    bSign = extractFloat64Sign( b );
2227    if ( aSign != bSign )
2228	return aSign ||
2229	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) ==
2230	      0 );
2231    return ( a == b ) ||
2232	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
2233}
2234
2235/*
2236-------------------------------------------------------------------------------
2237Returns 1 if the double-precision floating-point value `a' is less than
2238the corresponding value `b', and 0 otherwise.  The comparison is performed
2239according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2240-------------------------------------------------------------------------------
2241*/
2242flag float64_lt( float64 a, float64 b )
2243{
2244    flag aSign, bSign;
2245
2246    if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2247              && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2248         || (    ( extractFloat64Exp( b ) == 0x7FF )
2249              && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2250       ) {
2251        float_raise( float_flag_invalid );
2252        return 0;
2253    }
2254    aSign = extractFloat64Sign( a );
2255    bSign = extractFloat64Sign( b );
2256    if ( aSign != bSign )
2257	return aSign &&
2258	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) !=
2259	      0 );
2260    return ( a != b ) &&
2261	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
2262
2263}
2264
2265#ifndef SOFTFLOAT_FOR_GCC
2266/*
2267-------------------------------------------------------------------------------
2268Returns 1 if the double-precision floating-point value `a' is equal to
2269the corresponding value `b', and 0 otherwise.  The invalid exception is
2270raised if either operand is a NaN.  Otherwise, the comparison is performed
2271according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2272-------------------------------------------------------------------------------
2273*/
2274flag float64_eq_signaling( float64 a, float64 b )
2275{
2276
2277    if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2278              && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2279         || (    ( extractFloat64Exp( b ) == 0x7FF )
2280              && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2281       ) {
2282        float_raise( float_flag_invalid );
2283        return 0;
2284    }
2285    return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
2286
2287}
2288
2289/*
2290-------------------------------------------------------------------------------
2291Returns 1 if the double-precision floating-point value `a' is less than or
2292equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
2293cause an exception.  Otherwise, the comparison is performed according to the
2294IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2295-------------------------------------------------------------------------------
2296*/
2297flag float64_le_quiet( float64 a, float64 b )
2298{
2299    flag aSign, bSign;
2300
2301    if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2302              && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2303         || (    ( extractFloat64Exp( b ) == 0x7FF )
2304              && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2305       ) {
2306        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2307            float_raise( float_flag_invalid );
2308        }
2309        return 0;
2310    }
2311    aSign = extractFloat64Sign( a );
2312    bSign = extractFloat64Sign( b );
2313    if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
2314    return ( a == b ) || ( aSign ^ ( a < b ) );
2315
2316}
2317
2318/*
2319-------------------------------------------------------------------------------
2320Returns 1 if the double-precision floating-point value `a' is less than
2321the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
2322exception.  Otherwise, the comparison is performed according to the IEC/IEEE
2323Standard for Binary Floating-Point Arithmetic.
2324-------------------------------------------------------------------------------
2325*/
2326flag float64_lt_quiet( float64 a, float64 b )
2327{
2328    flag aSign, bSign;
2329
2330    if (    (    ( extractFloat64Exp( a ) == 0x7FF )
2331              && ( extractFloat64Frac0( a ) | extractFloat64Frac1( a ) ) )
2332         || (    ( extractFloat64Exp( b ) == 0x7FF )
2333              && ( extractFloat64Frac0( b ) | extractFloat64Frac1( b ) ) )
2334       ) {
2335        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
2336            float_raise( float_flag_invalid );
2337        }
2338        return 0;
2339    }
2340    aSign = extractFloat64Sign( a );
2341    bSign = extractFloat64Sign( b );
2342    if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
2343    return ( a != b ) && ( aSign ^ ( a < b ) );
2344
2345}
2346
2347#endif
2348