1/* $NetBSD: softfloat.c,v 1.8 2011/07/10 04:52:23 matt Exp $ */
2
3/*
4 * This version hacked for use with gcc -msoft-float by bjh21.
5 * (Mostly a case of #ifdefing out things GCC doesn't need or provides
6 *  itself).
7 */
8
9/*
10 * Things you may want to define:
11 *
12 * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with
13 *   -msoft-float) to work.  Include "softfloat-for-gcc.h" to get them
14 *   properly renamed.
15 */
16
17/*
18===============================================================================
19
20This C source file is part of the SoftFloat IEC/IEEE Floating-point
21Arithmetic Package, Release 2a.
22
23Written by John R. Hauser.  This work was made possible in part by the
24International Computer Science Institute, located at Suite 600, 1947 Center
25Street, Berkeley, California 94704.  Funding was partially provided by the
26National Science Foundation under grant MIP-9311980.  The original version
27of this code was written as part of a project to build a fixed-point vector
28processor in collaboration with the University of California at Berkeley,
29overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31arithmetic/SoftFloat.html'.
32
33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38
39Derivative works are acceptable, even for commercial purposes, so long as
40(1) they include prominent notice that the work is derivative, and (2) they
41include prominent notice akin to these four paragraphs for those parts of
42this code that are retained.
43
44===============================================================================
45*/
46
47#include <sys/cdefs.h>
48__FBSDID("$FreeBSD$");
49
50#ifdef SOFTFLOAT_FOR_GCC
51#include "softfloat-for-gcc.h"
52#endif
53
54#include "milieu.h"
55#include "softfloat.h"
56
57/*
58 * Conversions between floats as stored in memory and floats as
59 * SoftFloat uses them
60 */
61#ifndef FLOAT64_DEMANGLE
62#define FLOAT64_DEMANGLE(a)	(a)
63#endif
64#ifndef FLOAT64_MANGLE
65#define FLOAT64_MANGLE(a)	(a)
66#endif
67
68/*
69-------------------------------------------------------------------------------
70Floating-point rounding mode, extended double-precision rounding precision,
71and exception flags.
72-------------------------------------------------------------------------------
73*/
74int float_rounding_mode = float_round_nearest_even;
75int float_exception_flags = 0;
76#ifdef FLOATX80
77int8 floatx80_rounding_precision = 80;
78#endif
79
80/*
81-------------------------------------------------------------------------------
82Primitive arithmetic functions, including multi-word arithmetic, and
83division and square root approximations.  (Can be specialized to target if
84desired.)
85-------------------------------------------------------------------------------
86*/
87#include "softfloat-macros"
88
89/*
90-------------------------------------------------------------------------------
91Functions and definitions to determine:  (1) whether tininess for underflow
92is detected before or after rounding by default, (2) what (if anything)
93happens when exceptions are raised, (3) how signaling NaNs are distinguished
94from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
95are propagated from function inputs to output.  These details are target-
96specific.
97-------------------------------------------------------------------------------
98*/
99#include "softfloat-specialize"
100
101#if !defined(SOFTFLOAT_FOR_GCC) || defined(FLOATX80) || defined(FLOAT128)
102/*
103-------------------------------------------------------------------------------
104Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
105and 7, and returns the properly rounded 32-bit integer corresponding to the
106input.  If `zSign' is 1, the input is negated before being converted to an
107integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
108is simply rounded to an integer, with the inexact exception raised if the
109input cannot be represented exactly as an integer.  However, if the fixed-
110point input is too large, the invalid exception is raised and the largest
111positive or negative integer is returned.
112-------------------------------------------------------------------------------
113*/
114static int32 roundAndPackInt32( flag zSign, bits64 absZ )
115{
116    int8 roundingMode;
117    flag roundNearestEven;
118    int8 roundIncrement, roundBits;
119    int32 z;
120
121    roundingMode = float_rounding_mode;
122    roundNearestEven = ( roundingMode == float_round_nearest_even );
123    roundIncrement = 0x40;
124    if ( ! roundNearestEven ) {
125        if ( roundingMode == float_round_to_zero ) {
126            roundIncrement = 0;
127        }
128        else {
129            roundIncrement = 0x7F;
130            if ( zSign ) {
131                if ( roundingMode == float_round_up ) roundIncrement = 0;
132            }
133            else {
134                if ( roundingMode == float_round_down ) roundIncrement = 0;
135            }
136        }
137    }
138    roundBits = absZ & 0x7F;
139    absZ = ( absZ + roundIncrement )>>7;
140    absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
141    z = absZ;
142    if ( zSign ) z = - z;
143    if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
144        float_raise( float_flag_invalid );
145        return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
146    }
147    if ( roundBits ) float_exception_flags |= float_flag_inexact;
148    return z;
149
150}
151
152/*
153-------------------------------------------------------------------------------
154Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
155`absZ1', with binary point between bits 63 and 64 (between the input words),
156and returns the properly rounded 64-bit integer corresponding to the input.
157If `zSign' is 1, the input is negated before being converted to an integer.
158Ordinarily, the fixed-point input is simply rounded to an integer, with
159the inexact exception raised if the input cannot be represented exactly as
160an integer.  However, if the fixed-point input is too large, the invalid
161exception is raised and the largest positive or negative integer is
162returned.
163-------------------------------------------------------------------------------
164*/
165static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 )
166{
167    int8 roundingMode;
168    flag roundNearestEven, increment;
169    int64 z;
170
171    roundingMode = float_rounding_mode;
172    roundNearestEven = ( roundingMode == float_round_nearest_even );
173    increment = ( (sbits64) absZ1 < 0 );
174    if ( ! roundNearestEven ) {
175        if ( roundingMode == float_round_to_zero ) {
176            increment = 0;
177        }
178        else {
179            if ( zSign ) {
180                increment = ( roundingMode == float_round_down ) && absZ1;
181            }
182            else {
183                increment = ( roundingMode == float_round_up ) && absZ1;
184            }
185        }
186    }
187    if ( increment ) {
188        ++absZ0;
189        if ( absZ0 == 0 ) goto overflow;
190        absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven );
191    }
192    z = absZ0;
193    if ( zSign ) z = - z;
194    if ( z && ( ( z < 0 ) ^ zSign ) ) {
195 overflow:
196        float_raise( float_flag_invalid );
197        return
198              zSign ? (sbits64) LIT64( 0x8000000000000000 )
199            : LIT64( 0x7FFFFFFFFFFFFFFF );
200    }
201    if ( absZ1 ) float_exception_flags |= float_flag_inexact;
202    return z;
203
204}
205#endif
206
207/*
208-------------------------------------------------------------------------------
209Returns the fraction bits of the single-precision floating-point value `a'.
210-------------------------------------------------------------------------------
211*/
212INLINE bits32 extractFloat32Frac( float32 a )
213{
214
215    return a & 0x007FFFFF;
216
217}
218
219/*
220-------------------------------------------------------------------------------
221Returns the exponent bits of the single-precision floating-point value `a'.
222-------------------------------------------------------------------------------
223*/
224INLINE int16 extractFloat32Exp( float32 a )
225{
226
227    return ( a>>23 ) & 0xFF;
228
229}
230
231/*
232-------------------------------------------------------------------------------
233Returns the sign bit of the single-precision floating-point value `a'.
234-------------------------------------------------------------------------------
235*/
236INLINE flag extractFloat32Sign( float32 a )
237{
238
239    return a>>31;
240
241}
242
243/*
244-------------------------------------------------------------------------------
245Normalizes the subnormal single-precision floating-point value represented
246by the denormalized significand `aSig'.  The normalized exponent and
247significand are stored at the locations pointed to by `zExpPtr' and
248`zSigPtr', respectively.
249-------------------------------------------------------------------------------
250*/
251static void
252 normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
253{
254    int8 shiftCount;
255
256    shiftCount = countLeadingZeros32( aSig ) - 8;
257    *zSigPtr = aSig<<shiftCount;
258    *zExpPtr = 1 - shiftCount;
259
260}
261
262/*
263-------------------------------------------------------------------------------
264Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
265single-precision floating-point value, returning the result.  After being
266shifted into the proper positions, the three fields are simply added
267together to form the result.  This means that any integer portion of `zSig'
268will be added into the exponent.  Since a properly normalized significand
269will have an integer portion equal to 1, the `zExp' input should be 1 less
270than the desired result exponent whenever `zSig' is a complete, normalized
271significand.
272-------------------------------------------------------------------------------
273*/
274INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
275{
276
277    return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
278
279}
280
281/*
282-------------------------------------------------------------------------------
283Takes an abstract floating-point value having sign `zSign', exponent `zExp',
284and significand `zSig', and returns the proper single-precision floating-
285point value corresponding to the abstract input.  Ordinarily, the abstract
286value is simply rounded and packed into the single-precision format, with
287the inexact exception raised if the abstract input cannot be represented
288exactly.  However, if the abstract value is too large, the overflow and
289inexact exceptions are raised and an infinity or maximal finite value is
290returned.  If the abstract value is too small, the input value is rounded to
291a subnormal number, and the underflow and inexact exceptions are raised if
292the abstract input cannot be represented exactly as a subnormal single-
293precision floating-point number.
294    The input significand `zSig' has its binary point between bits 30
295and 29, which is 7 bits to the left of the usual location.  This shifted
296significand must be normalized or smaller.  If `zSig' is not normalized,
297`zExp' must be 0; in that case, the result returned is a subnormal number,
298and it must not require rounding.  In the usual case that `zSig' is
299normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
300The handling of underflow and overflow follows the IEC/IEEE Standard for
301Binary Floating-Point Arithmetic.
302-------------------------------------------------------------------------------
303*/
304static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
305{
306    int8 roundingMode;
307    flag roundNearestEven;
308    int8 roundIncrement, roundBits;
309    flag isTiny;
310
311    roundingMode = float_rounding_mode;
312    roundNearestEven = ( roundingMode == float_round_nearest_even );
313    roundIncrement = 0x40;
314    if ( ! roundNearestEven ) {
315        if ( roundingMode == float_round_to_zero ) {
316            roundIncrement = 0;
317        }
318        else {
319            roundIncrement = 0x7F;
320            if ( zSign ) {
321                if ( roundingMode == float_round_up ) roundIncrement = 0;
322            }
323            else {
324                if ( roundingMode == float_round_down ) roundIncrement = 0;
325            }
326        }
327    }
328    roundBits = zSig & 0x7F;
329    if ( 0xFD <= (bits16) zExp ) {
330        if (    ( 0xFD < zExp )
331             || (    ( zExp == 0xFD )
332                  && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
333           ) {
334            float_raise( float_flag_overflow | float_flag_inexact );
335            return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
336        }
337        if ( zExp < 0 ) {
338            isTiny =
339                   ( float_detect_tininess == float_tininess_before_rounding )
340                || ( zExp < -1 )
341                || ( zSig + roundIncrement < 0x80000000 );
342            shift32RightJamming( zSig, - zExp, &zSig );
343            zExp = 0;
344            roundBits = zSig & 0x7F;
345            if ( isTiny && roundBits ) float_raise( float_flag_underflow );
346        }
347    }
348    if ( roundBits ) float_exception_flags |= float_flag_inexact;
349    zSig = ( zSig + roundIncrement )>>7;
350    zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
351    if ( zSig == 0 ) zExp = 0;
352    return packFloat32( zSign, zExp, zSig );
353
354}
355
356/*
357-------------------------------------------------------------------------------
358Takes an abstract floating-point value having sign `zSign', exponent `zExp',
359and significand `zSig', and returns the proper single-precision floating-
360point value corresponding to the abstract input.  This routine is just like
361`roundAndPackFloat32' except that `zSig' does not have to be normalized.
362Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
363floating-point exponent.
364-------------------------------------------------------------------------------
365*/
366static float32
367 normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
368{
369    int8 shiftCount;
370
371    shiftCount = countLeadingZeros32( zSig ) - 1;
372    return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
373
374}
375
376/*
377-------------------------------------------------------------------------------
378Returns the fraction bits of the double-precision floating-point value `a'.
379-------------------------------------------------------------------------------
380*/
381INLINE bits64 extractFloat64Frac( float64 a )
382{
383
384    return FLOAT64_DEMANGLE(a) & LIT64( 0x000FFFFFFFFFFFFF );
385
386}
387
388/*
389-------------------------------------------------------------------------------
390Returns the exponent bits of the double-precision floating-point value `a'.
391-------------------------------------------------------------------------------
392*/
393INLINE int16 extractFloat64Exp( float64 a )
394{
395
396    return ( FLOAT64_DEMANGLE(a)>>52 ) & 0x7FF;
397
398}
399
400/*
401-------------------------------------------------------------------------------
402Returns the sign bit of the double-precision floating-point value `a'.
403-------------------------------------------------------------------------------
404*/
405INLINE flag extractFloat64Sign( float64 a )
406{
407
408    return FLOAT64_DEMANGLE(a)>>63;
409
410}
411
412/*
413-------------------------------------------------------------------------------
414Normalizes the subnormal double-precision floating-point value represented
415by the denormalized significand `aSig'.  The normalized exponent and
416significand are stored at the locations pointed to by `zExpPtr' and
417`zSigPtr', respectively.
418-------------------------------------------------------------------------------
419*/
420static void
421 normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
422{
423    int8 shiftCount;
424
425    shiftCount = countLeadingZeros64( aSig ) - 11;
426    *zSigPtr = aSig<<shiftCount;
427    *zExpPtr = 1 - shiftCount;
428
429}
430
431/*
432-------------------------------------------------------------------------------
433Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
434double-precision floating-point value, returning the result.  After being
435shifted into the proper positions, the three fields are simply added
436together to form the result.  This means that any integer portion of `zSig'
437will be added into the exponent.  Since a properly normalized significand
438will have an integer portion equal to 1, the `zExp' input should be 1 less
439than the desired result exponent whenever `zSig' is a complete, normalized
440significand.
441-------------------------------------------------------------------------------
442*/
443INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
444{
445
446    return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) +
447			   ( ( (bits64) zExp )<<52 ) + zSig );
448
449}
450
451/*
452-------------------------------------------------------------------------------
453Takes an abstract floating-point value having sign `zSign', exponent `zExp',
454and significand `zSig', and returns the proper double-precision floating-
455point value corresponding to the abstract input.  Ordinarily, the abstract
456value is simply rounded and packed into the double-precision format, with
457the inexact exception raised if the abstract input cannot be represented
458exactly.  However, if the abstract value is too large, the overflow and
459inexact exceptions are raised and an infinity or maximal finite value is
460returned.  If the abstract value is too small, the input value is rounded to
461a subnormal number, and the underflow and inexact exceptions are raised if
462the abstract input cannot be represented exactly as a subnormal double-
463precision floating-point number.
464    The input significand `zSig' has its binary point between bits 62
465and 61, which is 10 bits to the left of the usual location.  This shifted
466significand must be normalized or smaller.  If `zSig' is not normalized,
467`zExp' must be 0; in that case, the result returned is a subnormal number,
468and it must not require rounding.  In the usual case that `zSig' is
469normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
470The handling of underflow and overflow follows the IEC/IEEE Standard for
471Binary Floating-Point Arithmetic.
472-------------------------------------------------------------------------------
473*/
474static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
475{
476    int8 roundingMode;
477    flag roundNearestEven;
478    int16 roundIncrement, roundBits;
479    flag isTiny;
480
481    roundingMode = float_rounding_mode;
482    roundNearestEven = ( roundingMode == float_round_nearest_even );
483    roundIncrement = 0x200;
484    if ( ! roundNearestEven ) {
485        if ( roundingMode == float_round_to_zero ) {
486            roundIncrement = 0;
487        }
488        else {
489            roundIncrement = 0x3FF;
490            if ( zSign ) {
491                if ( roundingMode == float_round_up ) roundIncrement = 0;
492            }
493            else {
494                if ( roundingMode == float_round_down ) roundIncrement = 0;
495            }
496        }
497    }
498    roundBits = zSig & 0x3FF;
499    if ( 0x7FD <= (bits16) zExp ) {
500        if (    ( 0x7FD < zExp )
501             || (    ( zExp == 0x7FD )
502                  && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
503           ) {
504            float_raise( float_flag_overflow | float_flag_inexact );
505            return FLOAT64_MANGLE(
506		FLOAT64_DEMANGLE(packFloat64( zSign, 0x7FF, 0 )) -
507		( roundIncrement == 0 ));
508        }
509        if ( zExp < 0 ) {
510            isTiny =
511                   ( float_detect_tininess == float_tininess_before_rounding )
512                || ( zExp < -1 )
513                || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
514            shift64RightJamming( zSig, - zExp, &zSig );
515            zExp = 0;
516            roundBits = zSig & 0x3FF;
517            if ( isTiny && roundBits ) float_raise( float_flag_underflow );
518        }
519    }
520    if ( roundBits ) float_exception_flags |= float_flag_inexact;
521    zSig = ( zSig + roundIncrement )>>10;
522    zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
523    if ( zSig == 0 ) zExp = 0;
524    return packFloat64( zSign, zExp, zSig );
525
526}
527
528/*
529-------------------------------------------------------------------------------
530Takes an abstract floating-point value having sign `zSign', exponent `zExp',
531and significand `zSig', and returns the proper double-precision floating-
532point value corresponding to the abstract input.  This routine is just like
533`roundAndPackFloat64' except that `zSig' does not have to be normalized.
534Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
535floating-point exponent.
536-------------------------------------------------------------------------------
537*/
538static float64
539 normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
540{
541    int8 shiftCount;
542
543    shiftCount = countLeadingZeros64( zSig ) - 1;
544    return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount );
545
546}
547
548#ifdef FLOATX80
549
550/*
551-------------------------------------------------------------------------------
552Returns the fraction bits of the extended double-precision floating-point
553value `a'.
554-------------------------------------------------------------------------------
555*/
556INLINE bits64 extractFloatx80Frac( floatx80 a )
557{
558
559    return a.low;
560
561}
562
563/*
564-------------------------------------------------------------------------------
565Returns the exponent bits of the extended double-precision floating-point
566value `a'.
567-------------------------------------------------------------------------------
568*/
569INLINE int32 extractFloatx80Exp( floatx80 a )
570{
571
572    return a.high & 0x7FFF;
573
574}
575
576/*
577-------------------------------------------------------------------------------
578Returns the sign bit of the extended double-precision floating-point value
579`a'.
580-------------------------------------------------------------------------------
581*/
582INLINE flag extractFloatx80Sign( floatx80 a )
583{
584
585    return a.high>>15;
586
587}
588
589/*
590-------------------------------------------------------------------------------
591Normalizes the subnormal extended double-precision floating-point value
592represented by the denormalized significand `aSig'.  The normalized exponent
593and significand are stored at the locations pointed to by `zExpPtr' and
594`zSigPtr', respectively.
595-------------------------------------------------------------------------------
596*/
597static void
598 normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
599{
600    int8 shiftCount;
601
602    shiftCount = countLeadingZeros64( aSig );
603    *zSigPtr = aSig<<shiftCount;
604    *zExpPtr = 1 - shiftCount;
605
606}
607
608/*
609-------------------------------------------------------------------------------
610Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
611extended double-precision floating-point value, returning the result.
612-------------------------------------------------------------------------------
613*/
614INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
615{
616    floatx80 z;
617
618    z.low = zSig;
619    z.high = ( ( (bits16) zSign )<<15 ) + zExp;
620    return z;
621
622}
623
624/*
625-------------------------------------------------------------------------------
626Takes an abstract floating-point value having sign `zSign', exponent `zExp',
627and extended significand formed by the concatenation of `zSig0' and `zSig1',
628and returns the proper extended double-precision floating-point value
629corresponding to the abstract input.  Ordinarily, the abstract value is
630rounded and packed into the extended double-precision format, with the
631inexact exception raised if the abstract input cannot be represented
632exactly.  However, if the abstract value is too large, the overflow and
633inexact exceptions are raised and an infinity or maximal finite value is
634returned.  If the abstract value is too small, the input value is rounded to
635a subnormal number, and the underflow and inexact exceptions are raised if
636the abstract input cannot be represented exactly as a subnormal extended
637double-precision floating-point number.
638    If `roundingPrecision' is 32 or 64, the result is rounded to the same
639number of bits as single or double precision, respectively.  Otherwise, the
640result is rounded to the full precision of the extended double-precision
641format.
642    The input significand must be normalized or smaller.  If the input
643significand is not normalized, `zExp' must be 0; in that case, the result
644returned is a subnormal number, and it must not require rounding.  The
645handling of underflow and overflow follows the IEC/IEEE Standard for Binary
646Floating-Point Arithmetic.
647-------------------------------------------------------------------------------
648*/
649static floatx80
650 roundAndPackFloatx80(
651     int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
652 )
653{
654    int8 roundingMode;
655    flag roundNearestEven, increment, isTiny;
656    int64 roundIncrement, roundMask, roundBits;
657
658    roundingMode = float_rounding_mode;
659    roundNearestEven = ( roundingMode == float_round_nearest_even );
660    if ( roundingPrecision == 80 ) goto precision80;
661    if ( roundingPrecision == 64 ) {
662        roundIncrement = LIT64( 0x0000000000000400 );
663        roundMask = LIT64( 0x00000000000007FF );
664    }
665    else if ( roundingPrecision == 32 ) {
666        roundIncrement = LIT64( 0x0000008000000000 );
667        roundMask = LIT64( 0x000000FFFFFFFFFF );
668    }
669    else {
670        goto precision80;
671    }
672    zSig0 |= ( zSig1 != 0 );
673    if ( ! roundNearestEven ) {
674        if ( roundingMode == float_round_to_zero ) {
675            roundIncrement = 0;
676        }
677        else {
678            roundIncrement = roundMask;
679            if ( zSign ) {
680                if ( roundingMode == float_round_up ) roundIncrement = 0;
681            }
682            else {
683                if ( roundingMode == float_round_down ) roundIncrement = 0;
684            }
685        }
686    }
687    roundBits = zSig0 & roundMask;
688    if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
689        if (    ( 0x7FFE < zExp )
690             || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
691           ) {
692            goto overflow;
693        }
694        if ( zExp <= 0 ) {
695            isTiny =
696                   ( float_detect_tininess == float_tininess_before_rounding )
697                || ( zExp < 0 )
698                || ( zSig0 <= zSig0 + roundIncrement );
699            shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
700            zExp = 0;
701            roundBits = zSig0 & roundMask;
702            if ( isTiny && roundBits ) float_raise( float_flag_underflow );
703            if ( roundBits ) float_exception_flags |= float_flag_inexact;
704            zSig0 += roundIncrement;
705            if ( (sbits64) zSig0 < 0 ) zExp = 1;
706            roundIncrement = roundMask + 1;
707            if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
708                roundMask |= roundIncrement;
709            }
710            zSig0 &= ~ roundMask;
711            return packFloatx80( zSign, zExp, zSig0 );
712        }
713    }
714    if ( roundBits ) float_exception_flags |= float_flag_inexact;
715    zSig0 += roundIncrement;
716    if ( zSig0 < roundIncrement ) {
717        ++zExp;
718        zSig0 = LIT64( 0x8000000000000000 );
719    }
720    roundIncrement = roundMask + 1;
721    if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
722        roundMask |= roundIncrement;
723    }
724    zSig0 &= ~ roundMask;
725    if ( zSig0 == 0 ) zExp = 0;
726    return packFloatx80( zSign, zExp, zSig0 );
727 precision80:
728    increment = ( (sbits64) zSig1 < 0 );
729    if ( ! roundNearestEven ) {
730        if ( roundingMode == float_round_to_zero ) {
731            increment = 0;
732        }
733        else {
734            if ( zSign ) {
735                increment = ( roundingMode == float_round_down ) && zSig1;
736            }
737            else {
738                increment = ( roundingMode == float_round_up ) && zSig1;
739            }
740        }
741    }
742    if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
743        if (    ( 0x7FFE < zExp )
744             || (    ( zExp == 0x7FFE )
745                  && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
746                  && increment
747                )
748           ) {
749            roundMask = 0;
750 overflow:
751            float_raise( float_flag_overflow | float_flag_inexact );
752            if (    ( roundingMode == float_round_to_zero )
753                 || ( zSign && ( roundingMode == float_round_up ) )
754                 || ( ! zSign && ( roundingMode == float_round_down ) )
755               ) {
756                return packFloatx80( zSign, 0x7FFE, ~ roundMask );
757            }
758            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
759        }
760        if ( zExp <= 0 ) {
761            isTiny =
762                   ( float_detect_tininess == float_tininess_before_rounding )
763                || ( zExp < 0 )
764                || ! increment
765                || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
766            shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
767            zExp = 0;
768            if ( isTiny && zSig1 ) float_raise( float_flag_underflow );
769            if ( zSig1 ) float_exception_flags |= float_flag_inexact;
770            if ( roundNearestEven ) {
771                increment = ( (sbits64) zSig1 < 0 );
772            }
773            else {
774                if ( zSign ) {
775                    increment = ( roundingMode == float_round_down ) && zSig1;
776                }
777                else {
778                    increment = ( roundingMode == float_round_up ) && zSig1;
779                }
780            }
781            if ( increment ) {
782                ++zSig0;
783                zSig0 &=
784                    ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
785                if ( (sbits64) zSig0 < 0 ) zExp = 1;
786            }
787            return packFloatx80( zSign, zExp, zSig0 );
788        }
789    }
790    if ( zSig1 ) float_exception_flags |= float_flag_inexact;
791    if ( increment ) {
792        ++zSig0;
793        if ( zSig0 == 0 ) {
794            ++zExp;
795            zSig0 = LIT64( 0x8000000000000000 );
796        }
797        else {
798            zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
799        }
800    }
801    else {
802        if ( zSig0 == 0 ) zExp = 0;
803    }
804    return packFloatx80( zSign, zExp, zSig0 );
805
806}
807
808/*
809-------------------------------------------------------------------------------
810Takes an abstract floating-point value having sign `zSign', exponent
811`zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
812and returns the proper extended double-precision floating-point value
813corresponding to the abstract input.  This routine is just like
814`roundAndPackFloatx80' except that the input significand does not have to be
815normalized.
816-------------------------------------------------------------------------------
817*/
818static floatx80
819 normalizeRoundAndPackFloatx80(
820     int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
821 )
822{
823    int8 shiftCount;
824
825    if ( zSig0 == 0 ) {
826        zSig0 = zSig1;
827        zSig1 = 0;
828        zExp -= 64;
829    }
830    shiftCount = countLeadingZeros64( zSig0 );
831    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
832    zExp -= shiftCount;
833    return
834        roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );
835
836}
837
838#endif
839
840#ifdef FLOAT128
841
842/*
843-------------------------------------------------------------------------------
844Returns the least-significant 64 fraction bits of the quadruple-precision
845floating-point value `a'.
846-------------------------------------------------------------------------------
847*/
848INLINE bits64 extractFloat128Frac1( float128 a )
849{
850
851    return a.low;
852
853}
854
855/*
856-------------------------------------------------------------------------------
857Returns the most-significant 48 fraction bits of the quadruple-precision
858floating-point value `a'.
859-------------------------------------------------------------------------------
860*/
861INLINE bits64 extractFloat128Frac0( float128 a )
862{
863
864    return a.high & LIT64( 0x0000FFFFFFFFFFFF );
865
866}
867
868/*
869-------------------------------------------------------------------------------
870Returns the exponent bits of the quadruple-precision floating-point value
871`a'.
872-------------------------------------------------------------------------------
873*/
874INLINE int32 extractFloat128Exp( float128 a )
875{
876
877    return ( a.high>>48 ) & 0x7FFF;
878
879}
880
881/*
882-------------------------------------------------------------------------------
883Returns the sign bit of the quadruple-precision floating-point value `a'.
884-------------------------------------------------------------------------------
885*/
886INLINE flag extractFloat128Sign( float128 a )
887{
888
889    return a.high>>63;
890
891}
892
893/*
894-------------------------------------------------------------------------------
895Normalizes the subnormal quadruple-precision floating-point value
896represented by the denormalized significand formed by the concatenation of
897`aSig0' and `aSig1'.  The normalized exponent is stored at the location
898pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
899significand are stored at the location pointed to by `zSig0Ptr', and the
900least significant 64 bits of the normalized significand are stored at the
901location pointed to by `zSig1Ptr'.
902-------------------------------------------------------------------------------
903*/
904static void
905 normalizeFloat128Subnormal(
906     bits64 aSig0,
907     bits64 aSig1,
908     int32 *zExpPtr,
909     bits64 *zSig0Ptr,
910     bits64 *zSig1Ptr
911 )
912{
913    int8 shiftCount;
914
915    if ( aSig0 == 0 ) {
916        shiftCount = countLeadingZeros64( aSig1 ) - 15;
917        if ( shiftCount < 0 ) {
918            *zSig0Ptr = aSig1>>( - shiftCount );
919            *zSig1Ptr = aSig1<<( shiftCount & 63 );
920        }
921        else {
922            *zSig0Ptr = aSig1<<shiftCount;
923            *zSig1Ptr = 0;
924        }
925        *zExpPtr = - shiftCount - 63;
926    }
927    else {
928        shiftCount = countLeadingZeros64( aSig0 ) - 15;
929        shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
930        *zExpPtr = 1 - shiftCount;
931    }
932
933}
934
935/*
936-------------------------------------------------------------------------------
937Packs the sign `zSign', the exponent `zExp', and the significand formed
938by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
939floating-point value, returning the result.  After being shifted into the
940proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
941added together to form the most significant 32 bits of the result.  This
942means that any integer portion of `zSig0' will be added into the exponent.
943Since a properly normalized significand will have an integer portion equal
944to 1, the `zExp' input should be 1 less than the desired result exponent
945whenever `zSig0' and `zSig1' concatenated form a complete, normalized
946significand.
947-------------------------------------------------------------------------------
948*/
949INLINE float128
950 packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
951{
952    float128 z;
953
954    z.low = zSig1;
955    z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
956    return z;
957
958}
959
960/*
961-------------------------------------------------------------------------------
962Takes an abstract floating-point value having sign `zSign', exponent `zExp',
963and extended significand formed by the concatenation of `zSig0', `zSig1',
964and `zSig2', and returns the proper quadruple-precision floating-point value
965corresponding to the abstract input.  Ordinarily, the abstract value is
966simply rounded and packed into the quadruple-precision format, with the
967inexact exception raised if the abstract input cannot be represented
968exactly.  However, if the abstract value is too large, the overflow and
969inexact exceptions are raised and an infinity or maximal finite value is
970returned.  If the abstract value is too small, the input value is rounded to
971a subnormal number, and the underflow and inexact exceptions are raised if
972the abstract input cannot be represented exactly as a subnormal quadruple-
973precision floating-point number.
974    The input significand must be normalized or smaller.  If the input
975significand is not normalized, `zExp' must be 0; in that case, the result
976returned is a subnormal number, and it must not require rounding.  In the
977usual case that the input significand is normalized, `zExp' must be 1 less
978than the ``true'' floating-point exponent.  The handling of underflow and
979overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
980-------------------------------------------------------------------------------
981*/
982static float128
983 roundAndPackFloat128(
984     flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 )
985{
986    int8 roundingMode;
987    flag roundNearestEven, increment, isTiny;
988
989    roundingMode = float_rounding_mode;
990    roundNearestEven = ( roundingMode == float_round_nearest_even );
991    increment = ( (sbits64) zSig2 < 0 );
992    if ( ! roundNearestEven ) {
993        if ( roundingMode == float_round_to_zero ) {
994            increment = 0;
995        }
996        else {
997            if ( zSign ) {
998                increment = ( roundingMode == float_round_down ) && zSig2;
999            }
1000            else {
1001                increment = ( roundingMode == float_round_up ) && zSig2;
1002            }
1003        }
1004    }
1005    if ( 0x7FFD <= (bits32) zExp ) {
1006        if (    ( 0x7FFD < zExp )
1007             || (    ( zExp == 0x7FFD )
1008                  && eq128(
1009                         LIT64( 0x0001FFFFFFFFFFFF ),
1010                         LIT64( 0xFFFFFFFFFFFFFFFF ),
1011                         zSig0,
1012                         zSig1
1013                     )
1014                  && increment
1015                )
1016           ) {
1017            float_raise( float_flag_overflow | float_flag_inexact );
1018            if (    ( roundingMode == float_round_to_zero )
1019                 || ( zSign && ( roundingMode == float_round_up ) )
1020                 || ( ! zSign && ( roundingMode == float_round_down ) )
1021               ) {
1022                return
1023                    packFloat128(
1024                        zSign,
1025                        0x7FFE,
1026                        LIT64( 0x0000FFFFFFFFFFFF ),
1027                        LIT64( 0xFFFFFFFFFFFFFFFF )
1028                    );
1029            }
1030            return packFloat128( zSign, 0x7FFF, 0, 0 );
1031        }
1032        if ( zExp < 0 ) {
1033            isTiny =
1034                   ( float_detect_tininess == float_tininess_before_rounding )
1035                || ( zExp < -1 )
1036                || ! increment
1037                || lt128(
1038                       zSig0,
1039                       zSig1,
1040                       LIT64( 0x0001FFFFFFFFFFFF ),
1041                       LIT64( 0xFFFFFFFFFFFFFFFF )
1042                   );
1043            shift128ExtraRightJamming(
1044                zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
1045            zExp = 0;
1046            if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
1047            if ( roundNearestEven ) {
1048                increment = ( (sbits64) zSig2 < 0 );
1049            }
1050            else {
1051                if ( zSign ) {
1052                    increment = ( roundingMode == float_round_down ) && zSig2;
1053                }
1054                else {
1055                    increment = ( roundingMode == float_round_up ) && zSig2;
1056                }
1057            }
1058        }
1059    }
1060    if ( zSig2 ) float_exception_flags |= float_flag_inexact;
1061    if ( increment ) {
1062        add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
1063        zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
1064    }
1065    else {
1066        if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
1067    }
1068    return packFloat128( zSign, zExp, zSig0, zSig1 );
1069
1070}
1071
1072/*
1073-------------------------------------------------------------------------------
1074Takes an abstract floating-point value having sign `zSign', exponent `zExp',
1075and significand formed by the concatenation of `zSig0' and `zSig1', and
1076returns the proper quadruple-precision floating-point value corresponding
1077to the abstract input.  This routine is just like `roundAndPackFloat128'
1078except that the input significand has fewer bits and does not have to be
1079normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
1080point exponent.
1081-------------------------------------------------------------------------------
1082*/
1083static float128
1084 normalizeRoundAndPackFloat128(
1085     flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
1086{
1087    int8 shiftCount;
1088    bits64 zSig2;
1089
1090    if ( zSig0 == 0 ) {
1091        zSig0 = zSig1;
1092        zSig1 = 0;
1093        zExp -= 64;
1094    }
1095    shiftCount = countLeadingZeros64( zSig0 ) - 15;
1096    if ( 0 <= shiftCount ) {
1097        zSig2 = 0;
1098        shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1099    }
1100    else {
1101        shift128ExtraRightJamming(
1102            zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
1103    }
1104    zExp -= shiftCount;
1105    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
1106
1107}
1108
1109#endif
1110
1111/*
1112-------------------------------------------------------------------------------
1113Returns the result of converting the 32-bit two's complement integer `a'
1114to the single-precision floating-point format.  The conversion is performed
1115according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1116-------------------------------------------------------------------------------
1117*/
1118float32 int32_to_float32( int32 a )
1119{
1120    flag zSign;
1121
1122    if ( a == 0 ) return 0;
1123    if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
1124    zSign = ( a < 0 );
1125    return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a );
1126
1127}
1128
1129#ifndef SOFTFLOAT_FOR_GCC /* __floatunsisf is in libgcc */
1130float32 uint32_to_float32( uint32 a )
1131{
1132    if ( a == 0 ) return 0;
1133    if ( a & (bits32) 0x80000000 )
1134	return normalizeRoundAndPackFloat32( 0, 0x9D, a >> 1 );
1135    return normalizeRoundAndPackFloat32( 0, 0x9C, a );
1136}
1137#endif
1138
1139
1140/*
1141-------------------------------------------------------------------------------
1142Returns the result of converting the 32-bit two's complement integer `a'
1143to the double-precision floating-point format.  The conversion is performed
1144according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1145-------------------------------------------------------------------------------
1146*/
1147float64 int32_to_float64( int32 a )
1148{
1149    flag zSign;
1150    uint32 absA;
1151    int8 shiftCount;
1152    bits64 zSig;
1153
1154    if ( a == 0 ) return 0;
1155    zSign = ( a < 0 );
1156    absA = zSign ? - a : a;
1157    shiftCount = countLeadingZeros32( absA ) + 21;
1158    zSig = absA;
1159    return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
1160
1161}
1162
1163#ifndef SOFTFLOAT_FOR_GCC /* __floatunsidf is in libgcc */
1164float64 uint32_to_float64( uint32 a )
1165{
1166    int8 shiftCount;
1167    bits64 zSig = a;
1168
1169    if ( a == 0 ) return 0;
1170    shiftCount = countLeadingZeros32( a ) + 21;
1171    return packFloat64( 0, 0x432 - shiftCount, zSig<<shiftCount );
1172
1173}
1174#endif
1175
1176#ifdef FLOATX80
1177
1178/*
1179-------------------------------------------------------------------------------
1180Returns the result of converting the 32-bit two's complement integer `a'
1181to the extended double-precision floating-point format.  The conversion
1182is performed according to the IEC/IEEE Standard for Binary Floating-Point
1183Arithmetic.
1184-------------------------------------------------------------------------------
1185*/
1186floatx80 int32_to_floatx80( int32 a )
1187{
1188    flag zSign;
1189    uint32 absA;
1190    int8 shiftCount;
1191    bits64 zSig;
1192
1193    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1194    zSign = ( a < 0 );
1195    absA = zSign ? - a : a;
1196    shiftCount = countLeadingZeros32( absA ) + 32;
1197    zSig = absA;
1198    return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
1199
1200}
1201
1202floatx80 uint32_to_floatx80( uint32 a )
1203{
1204    int8 shiftCount;
1205    bits64 zSig = a;
1206
1207    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1208    shiftCount = countLeadingZeros32( a ) + 32;
1209    return packFloatx80( 0, 0x403E - shiftCount, zSig<<shiftCount );
1210
1211}
1212
1213#endif
1214
1215#ifdef FLOAT128
1216
1217/*
1218-------------------------------------------------------------------------------
1219Returns the result of converting the 32-bit two's complement integer `a' to
1220the quadruple-precision floating-point format.  The conversion is performed
1221according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1222-------------------------------------------------------------------------------
1223*/
1224float128 int32_to_float128( int32 a )
1225{
1226    flag zSign;
1227    uint32 absA;
1228    int8 shiftCount;
1229    bits64 zSig0;
1230
1231    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1232    zSign = ( a < 0 );
1233    absA = zSign ? - a : a;
1234    shiftCount = countLeadingZeros32( absA ) + 17;
1235    zSig0 = absA;
1236    return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
1237
1238}
1239
1240float128 uint32_to_float128( uint32 a )
1241{
1242    int8 shiftCount;
1243    bits64 zSig0 = a;
1244
1245    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1246    shiftCount = countLeadingZeros32( a ) + 17;
1247    return packFloat128( 0, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
1248
1249}
1250
1251#endif
1252
1253#ifndef SOFTFLOAT_FOR_GCC /* __floatdi?f is in libgcc2.c */
1254/*
1255-------------------------------------------------------------------------------
1256Returns the result of converting the 64-bit two's complement integer `a'
1257to the single-precision floating-point format.  The conversion is performed
1258according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1259-------------------------------------------------------------------------------
1260*/
1261float32 int64_to_float32( int64 a )
1262{
1263    flag zSign;
1264    uint64 absA;
1265    int8 shiftCount;
1266
1267    if ( a == 0 ) return 0;
1268    zSign = ( a < 0 );
1269    absA = zSign ? - a : a;
1270    shiftCount = countLeadingZeros64( absA ) - 40;
1271    if ( 0 <= shiftCount ) {
1272        return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
1273    }
1274    else {
1275        shiftCount += 7;
1276        if ( shiftCount < 0 ) {
1277            shift64RightJamming( absA, - shiftCount, &absA );
1278        }
1279        else {
1280            absA <<= shiftCount;
1281        }
1282        return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA );
1283    }
1284
1285}
1286
1287/*
1288-------------------------------------------------------------------------------
1289Returns the result of converting the 64-bit two's complement integer `a'
1290to the double-precision floating-point format.  The conversion is performed
1291according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1292-------------------------------------------------------------------------------
1293*/
1294float64 int64_to_float64( int64 a )
1295{
1296    flag zSign;
1297
1298    if ( a == 0 ) return 0;
1299    if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) {
1300        return packFloat64( 1, 0x43E, 0 );
1301    }
1302    zSign = ( a < 0 );
1303    return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a );
1304
1305}
1306
1307#ifdef FLOATX80
1308
1309/*
1310-------------------------------------------------------------------------------
1311Returns the result of converting the 64-bit two's complement integer `a'
1312to the extended double-precision floating-point format.  The conversion
1313is performed according to the IEC/IEEE Standard for Binary Floating-Point
1314Arithmetic.
1315-------------------------------------------------------------------------------
1316*/
1317floatx80 int64_to_floatx80( int64 a )
1318{
1319    flag zSign;
1320    uint64 absA;
1321    int8 shiftCount;
1322
1323    if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1324    zSign = ( a < 0 );
1325    absA = zSign ? - a : a;
1326    shiftCount = countLeadingZeros64( absA );
1327    return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
1328
1329}
1330
1331#endif
1332
1333#endif /* !SOFTFLOAT_FOR_GCC */
1334
1335#ifdef FLOAT128
1336
1337/*
1338-------------------------------------------------------------------------------
1339Returns the result of converting the 64-bit two's complement integer `a' to
1340the quadruple-precision floating-point format.  The conversion is performed
1341according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1342-------------------------------------------------------------------------------
1343*/
1344float128 int64_to_float128( int64 a )
1345{
1346    flag zSign;
1347    uint64 absA;
1348    int8 shiftCount;
1349    int32 zExp;
1350    bits64 zSig0, zSig1;
1351
1352    if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1353    zSign = ( a < 0 );
1354    absA = zSign ? - a : a;
1355    shiftCount = countLeadingZeros64( absA ) + 49;
1356    zExp = 0x406E - shiftCount;
1357    if ( 64 <= shiftCount ) {
1358        zSig1 = 0;
1359        zSig0 = absA;
1360        shiftCount -= 64;
1361    }
1362    else {
1363        zSig1 = absA;
1364        zSig0 = 0;
1365    }
1366    shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1367    return packFloat128( zSign, zExp, zSig0, zSig1 );
1368
1369}
1370
1371#endif
1372
1373#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1374/*
1375-------------------------------------------------------------------------------
1376Returns the result of converting the single-precision floating-point value
1377`a' to the 32-bit two's complement integer format.  The conversion is
1378performed according to the IEC/IEEE Standard for Binary Floating-Point
1379Arithmetic---which means in particular that the conversion is rounded
1380according to the current rounding mode.  If `a' is a NaN, the largest
1381positive integer is returned.  Otherwise, if the conversion overflows, the
1382largest integer with the same sign as `a' is returned.
1383-------------------------------------------------------------------------------
1384*/
1385int32 float32_to_int32( float32 a )
1386{
1387    flag aSign;
1388    int16 aExp, shiftCount;
1389    bits32 aSig;
1390    bits64 aSig64;
1391
1392    aSig = extractFloat32Frac( a );
1393    aExp = extractFloat32Exp( a );
1394    aSign = extractFloat32Sign( a );
1395    if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
1396    if ( aExp ) aSig |= 0x00800000;
1397    shiftCount = 0xAF - aExp;
1398    aSig64 = aSig;
1399    aSig64 <<= 32;
1400    if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
1401    return roundAndPackInt32( aSign, aSig64 );
1402
1403}
1404#endif /* !SOFTFLOAT_FOR_GCC */
1405
1406/*
1407-------------------------------------------------------------------------------
1408Returns the result of converting the single-precision floating-point value
1409`a' to the 32-bit two's complement integer format.  The conversion is
1410performed according to the IEC/IEEE Standard for Binary Floating-Point
1411Arithmetic, except that the conversion is always rounded toward zero.
1412If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
1413the conversion overflows, the largest integer with the same sign as `a' is
1414returned.
1415-------------------------------------------------------------------------------
1416*/
1417int32 float32_to_int32_round_to_zero( float32 a )
1418{
1419    flag aSign;
1420    int16 aExp, shiftCount;
1421    bits32 aSig;
1422    int32 z;
1423
1424    aSig = extractFloat32Frac( a );
1425    aExp = extractFloat32Exp( a );
1426    aSign = extractFloat32Sign( a );
1427    shiftCount = aExp - 0x9E;
1428    if ( 0 <= shiftCount ) {
1429        if ( a != 0xCF000000 ) {
1430            float_raise( float_flag_invalid );
1431            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
1432        }
1433        return (sbits32) 0x80000000;
1434    }
1435    else if ( aExp <= 0x7E ) {
1436        if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
1437        return 0;
1438    }
1439    aSig = ( aSig | 0x00800000 )<<8;
1440    z = aSig>>( - shiftCount );
1441    if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
1442        float_exception_flags |= float_flag_inexact;
1443    }
1444    if ( aSign ) z = - z;
1445    return z;
1446
1447}
1448
1449#ifndef SOFTFLOAT_FOR_GCC /* __fix?fdi provided by libgcc2.c */
1450/*
1451-------------------------------------------------------------------------------
1452Returns the result of converting the single-precision floating-point value
1453`a' to the 64-bit two's complement integer format.  The conversion is
1454performed according to the IEC/IEEE Standard for Binary Floating-Point
1455Arithmetic---which means in particular that the conversion is rounded
1456according to the current rounding mode.  If `a' is a NaN, the largest
1457positive integer is returned.  Otherwise, if the conversion overflows, the
1458largest integer with the same sign as `a' is returned.
1459-------------------------------------------------------------------------------
1460*/
1461int64 float32_to_int64( float32 a )
1462{
1463    flag aSign;
1464    int16 aExp, shiftCount;
1465    bits32 aSig;
1466    bits64 aSig64, aSigExtra;
1467
1468    aSig = extractFloat32Frac( a );
1469    aExp = extractFloat32Exp( a );
1470    aSign = extractFloat32Sign( a );
1471    shiftCount = 0xBE - aExp;
1472    if ( shiftCount < 0 ) {
1473        float_raise( float_flag_invalid );
1474        if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1475            return LIT64( 0x7FFFFFFFFFFFFFFF );
1476        }
1477        return (sbits64) LIT64( 0x8000000000000000 );
1478    }
1479    if ( aExp ) aSig |= 0x00800000;
1480    aSig64 = aSig;
1481    aSig64 <<= 40;
1482    shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
1483    return roundAndPackInt64( aSign, aSig64, aSigExtra );
1484
1485}
1486
1487/*
1488-------------------------------------------------------------------------------
1489Returns the result of converting the single-precision floating-point value
1490`a' to the 64-bit two's complement integer format.  The conversion is
1491performed according to the IEC/IEEE Standard for Binary Floating-Point
1492Arithmetic, except that the conversion is always rounded toward zero.  If
1493`a' is a NaN, the largest positive integer is returned.  Otherwise, if the
1494conversion overflows, the largest integer with the same sign as `a' is
1495returned.
1496-------------------------------------------------------------------------------
1497*/
1498int64 float32_to_int64_round_to_zero( float32 a )
1499{
1500    flag aSign;
1501    int16 aExp, shiftCount;
1502    bits32 aSig;
1503    bits64 aSig64;
1504    int64 z;
1505
1506    aSig = extractFloat32Frac( a );
1507    aExp = extractFloat32Exp( a );
1508    aSign = extractFloat32Sign( a );
1509    shiftCount = aExp - 0xBE;
1510    if ( 0 <= shiftCount ) {
1511        if ( a != 0xDF000000 ) {
1512            float_raise( float_flag_invalid );
1513            if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1514                return LIT64( 0x7FFFFFFFFFFFFFFF );
1515            }
1516        }
1517        return (sbits64) LIT64( 0x8000000000000000 );
1518    }
1519    else if ( aExp <= 0x7E ) {
1520        if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
1521        return 0;
1522    }
1523    aSig64 = aSig | 0x00800000;
1524    aSig64 <<= 40;
1525    z = aSig64>>( - shiftCount );
1526    if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) {
1527        float_exception_flags |= float_flag_inexact;
1528    }
1529    if ( aSign ) z = - z;
1530    return z;
1531
1532}
1533#endif /* !SOFTFLOAT_FOR_GCC */
1534
1535/*
1536-------------------------------------------------------------------------------
1537Returns the result of converting the single-precision floating-point value
1538`a' to the double-precision floating-point format.  The conversion is
1539performed according to the IEC/IEEE Standard for Binary Floating-Point
1540Arithmetic.
1541-------------------------------------------------------------------------------
1542*/
1543float64 float32_to_float64( float32 a )
1544{
1545    flag aSign;
1546    int16 aExp;
1547    bits32 aSig;
1548
1549    aSig = extractFloat32Frac( a );
1550    aExp = extractFloat32Exp( a );
1551    aSign = extractFloat32Sign( a );
1552    if ( aExp == 0xFF ) {
1553        if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
1554        return packFloat64( aSign, 0x7FF, 0 );
1555    }
1556    if ( aExp == 0 ) {
1557        if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
1558        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1559        --aExp;
1560    }
1561    return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
1562
1563}
1564
1565#ifdef FLOATX80
1566
1567/*
1568-------------------------------------------------------------------------------
1569Returns the result of converting the single-precision floating-point value
1570`a' to the extended double-precision floating-point format.  The conversion
1571is performed according to the IEC/IEEE Standard for Binary Floating-Point
1572Arithmetic.
1573-------------------------------------------------------------------------------
1574*/
1575floatx80 float32_to_floatx80( float32 a )
1576{
1577    flag aSign;
1578    int16 aExp;
1579    bits32 aSig;
1580
1581    aSig = extractFloat32Frac( a );
1582    aExp = extractFloat32Exp( a );
1583    aSign = extractFloat32Sign( a );
1584    if ( aExp == 0xFF ) {
1585        if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
1586        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
1587    }
1588    if ( aExp == 0 ) {
1589        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
1590        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1591    }
1592    aSig |= 0x00800000;
1593    return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
1594
1595}
1596
1597#endif
1598
1599#ifdef FLOAT128
1600
1601/*
1602-------------------------------------------------------------------------------
1603Returns the result of converting the single-precision floating-point value
1604`a' to the double-precision floating-point format.  The conversion is
1605performed according to the IEC/IEEE Standard for Binary Floating-Point
1606Arithmetic.
1607-------------------------------------------------------------------------------
1608*/
1609float128 float32_to_float128( float32 a )
1610{
1611    flag aSign;
1612    int16 aExp;
1613    bits32 aSig;
1614
1615    aSig = extractFloat32Frac( a );
1616    aExp = extractFloat32Exp( a );
1617    aSign = extractFloat32Sign( a );
1618    if ( aExp == 0xFF ) {
1619        if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a ) );
1620        return packFloat128( aSign, 0x7FFF, 0, 0 );
1621    }
1622    if ( aExp == 0 ) {
1623        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
1624        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1625        --aExp;
1626    }
1627    return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );
1628
1629}
1630
1631#endif
1632
1633#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1634/*
1635-------------------------------------------------------------------------------
1636Rounds the single-precision floating-point value `a' to an integer, and
1637returns the result as a single-precision floating-point value.  The
1638operation is performed according to the IEC/IEEE Standard for Binary
1639Floating-Point Arithmetic.
1640-------------------------------------------------------------------------------
1641*/
1642float32 float32_round_to_int( float32 a )
1643{
1644    flag aSign;
1645    int16 aExp;
1646    bits32 lastBitMask, roundBitsMask;
1647    int8 roundingMode;
1648    float32 z;
1649
1650    aExp = extractFloat32Exp( a );
1651    if ( 0x96 <= aExp ) {
1652        if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
1653            return propagateFloat32NaN( a, a );
1654        }
1655        return a;
1656    }
1657    if ( aExp <= 0x7E ) {
1658        if ( (bits32) ( a<<1 ) == 0 ) return a;
1659        float_exception_flags |= float_flag_inexact;
1660        aSign = extractFloat32Sign( a );
1661        switch ( float_rounding_mode ) {
1662         case float_round_nearest_even:
1663            if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) {
1664                return packFloat32( aSign, 0x7F, 0 );
1665            }
1666            break;
1667	 case float_round_to_zero:
1668	    break;
1669         case float_round_down:
1670            return aSign ? 0xBF800000 : 0;
1671         case float_round_up:
1672            return aSign ? 0x80000000 : 0x3F800000;
1673        }
1674        return packFloat32( aSign, 0, 0 );
1675    }
1676    lastBitMask = 1;
1677    lastBitMask <<= 0x96 - aExp;
1678    roundBitsMask = lastBitMask - 1;
1679    z = a;
1680    roundingMode = float_rounding_mode;
1681    if ( roundingMode == float_round_nearest_even ) {
1682        z += lastBitMask>>1;
1683        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
1684    }
1685    else if ( roundingMode != float_round_to_zero ) {
1686        if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) {
1687            z += roundBitsMask;
1688        }
1689    }
1690    z &= ~ roundBitsMask;
1691    if ( z != a ) float_exception_flags |= float_flag_inexact;
1692    return z;
1693
1694}
1695#endif /* !SOFTFLOAT_FOR_GCC */
1696
1697/*
1698-------------------------------------------------------------------------------
1699Returns the result of adding the absolute values of the single-precision
1700floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
1701before being returned.  `zSign' is ignored if the result is a NaN.
1702The addition is performed according to the IEC/IEEE Standard for Binary
1703Floating-Point Arithmetic.
1704-------------------------------------------------------------------------------
1705*/
1706static float32 addFloat32Sigs( float32 a, float32 b, flag zSign )
1707{
1708    int16 aExp, bExp, zExp;
1709    bits32 aSig, bSig, zSig;
1710    int16 expDiff;
1711
1712    aSig = extractFloat32Frac( a );
1713    aExp = extractFloat32Exp( a );
1714    bSig = extractFloat32Frac( b );
1715    bExp = extractFloat32Exp( b );
1716    expDiff = aExp - bExp;
1717    aSig <<= 6;
1718    bSig <<= 6;
1719    if ( 0 < expDiff ) {
1720        if ( aExp == 0xFF ) {
1721            if ( aSig ) return propagateFloat32NaN( a, b );
1722            return a;
1723        }
1724        if ( bExp == 0 ) {
1725            --expDiff;
1726        }
1727        else {
1728            bSig |= 0x20000000;
1729        }
1730        shift32RightJamming( bSig, expDiff, &bSig );
1731        zExp = aExp;
1732    }
1733    else if ( expDiff < 0 ) {
1734        if ( bExp == 0xFF ) {
1735            if ( bSig ) return propagateFloat32NaN( a, b );
1736            return packFloat32( zSign, 0xFF, 0 );
1737        }
1738        if ( aExp == 0 ) {
1739            ++expDiff;
1740        }
1741        else {
1742            aSig |= 0x20000000;
1743        }
1744        shift32RightJamming( aSig, - expDiff, &aSig );
1745        zExp = bExp;
1746    }
1747    else {
1748        if ( aExp == 0xFF ) {
1749            if ( aSig | bSig ) return propagateFloat32NaN( a, b );
1750            return a;
1751        }
1752        if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 );
1753        zSig = 0x40000000 + aSig + bSig;
1754        zExp = aExp;
1755        goto roundAndPack;
1756    }
1757    aSig |= 0x20000000;
1758    zSig = ( aSig + bSig )<<1;
1759    --zExp;
1760    if ( (sbits32) zSig < 0 ) {
1761        zSig = aSig + bSig;
1762        ++zExp;
1763    }
1764 roundAndPack:
1765    return roundAndPackFloat32( zSign, zExp, zSig );
1766
1767}
1768
1769/*
1770-------------------------------------------------------------------------------
1771Returns the result of subtracting the absolute values of the single-
1772precision floating-point values `a' and `b'.  If `zSign' is 1, the
1773difference is negated before being returned.  `zSign' is ignored if the
1774result is a NaN.  The subtraction is performed according to the IEC/IEEE
1775Standard for Binary Floating-Point Arithmetic.
1776-------------------------------------------------------------------------------
1777*/
1778static float32 subFloat32Sigs( float32 a, float32 b, flag zSign )
1779{
1780    int16 aExp, bExp, zExp;
1781    bits32 aSig, bSig, zSig;
1782    int16 expDiff;
1783
1784    aSig = extractFloat32Frac( a );
1785    aExp = extractFloat32Exp( a );
1786    bSig = extractFloat32Frac( b );
1787    bExp = extractFloat32Exp( b );
1788    expDiff = aExp - bExp;
1789    aSig <<= 7;
1790    bSig <<= 7;
1791    if ( 0 < expDiff ) goto aExpBigger;
1792    if ( expDiff < 0 ) goto bExpBigger;
1793    if ( aExp == 0xFF ) {
1794        if ( aSig | bSig ) return propagateFloat32NaN( a, b );
1795        float_raise( float_flag_invalid );
1796        return float32_default_nan;
1797    }
1798    if ( aExp == 0 ) {
1799        aExp = 1;
1800        bExp = 1;
1801    }
1802    if ( bSig < aSig ) goto aBigger;
1803    if ( aSig < bSig ) goto bBigger;
1804    return packFloat32( float_rounding_mode == float_round_down, 0, 0 );
1805 bExpBigger:
1806    if ( bExp == 0xFF ) {
1807        if ( bSig ) return propagateFloat32NaN( a, b );
1808        return packFloat32( zSign ^ 1, 0xFF, 0 );
1809    }
1810    if ( aExp == 0 ) {
1811        ++expDiff;
1812    }
1813    else {
1814        aSig |= 0x40000000;
1815    }
1816    shift32RightJamming( aSig, - expDiff, &aSig );
1817    bSig |= 0x40000000;
1818 bBigger:
1819    zSig = bSig - aSig;
1820    zExp = bExp;
1821    zSign ^= 1;
1822    goto normalizeRoundAndPack;
1823 aExpBigger:
1824    if ( aExp == 0xFF ) {
1825        if ( aSig ) return propagateFloat32NaN( a, b );
1826        return a;
1827    }
1828    if ( bExp == 0 ) {
1829        --expDiff;
1830    }
1831    else {
1832        bSig |= 0x40000000;
1833    }
1834    shift32RightJamming( bSig, expDiff, &bSig );
1835    aSig |= 0x40000000;
1836 aBigger:
1837    zSig = aSig - bSig;
1838    zExp = aExp;
1839 normalizeRoundAndPack:
1840    --zExp;
1841    return normalizeRoundAndPackFloat32( zSign, zExp, zSig );
1842
1843}
1844
1845/*
1846-------------------------------------------------------------------------------
1847Returns the result of adding the single-precision floating-point values `a'
1848and `b'.  The operation is performed according to the IEC/IEEE Standard for
1849Binary Floating-Point Arithmetic.
1850-------------------------------------------------------------------------------
1851*/
1852float32 float32_add( float32 a, float32 b )
1853{
1854    flag aSign, bSign;
1855
1856    aSign = extractFloat32Sign( a );
1857    bSign = extractFloat32Sign( b );
1858    if ( aSign == bSign ) {
1859        return addFloat32Sigs( a, b, aSign );
1860    }
1861    else {
1862        return subFloat32Sigs( a, b, aSign );
1863    }
1864
1865}
1866
1867/*
1868-------------------------------------------------------------------------------
1869Returns the result of subtracting the single-precision floating-point values
1870`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1871for Binary Floating-Point Arithmetic.
1872-------------------------------------------------------------------------------
1873*/
1874float32 float32_sub( float32 a, float32 b )
1875{
1876    flag aSign, bSign;
1877
1878    aSign = extractFloat32Sign( a );
1879    bSign = extractFloat32Sign( b );
1880    if ( aSign == bSign ) {
1881        return subFloat32Sigs( a, b, aSign );
1882    }
1883    else {
1884        return addFloat32Sigs( a, b, aSign );
1885    }
1886
1887}
1888
1889/*
1890-------------------------------------------------------------------------------
1891Returns the result of multiplying the single-precision floating-point values
1892`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
1893for Binary Floating-Point Arithmetic.
1894-------------------------------------------------------------------------------
1895*/
1896float32 float32_mul( float32 a, float32 b )
1897{
1898    flag aSign, bSign, zSign;
1899    int16 aExp, bExp, zExp;
1900    bits32 aSig, bSig;
1901    bits64 zSig64;
1902    bits32 zSig;
1903
1904    aSig = extractFloat32Frac( a );
1905    aExp = extractFloat32Exp( a );
1906    aSign = extractFloat32Sign( a );
1907    bSig = extractFloat32Frac( b );
1908    bExp = extractFloat32Exp( b );
1909    bSign = extractFloat32Sign( b );
1910    zSign = aSign ^ bSign;
1911    if ( aExp == 0xFF ) {
1912        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
1913            return propagateFloat32NaN( a, b );
1914        }
1915        if ( ( bExp | bSig ) == 0 ) {
1916            float_raise( float_flag_invalid );
1917            return float32_default_nan;
1918        }
1919        return packFloat32( zSign, 0xFF, 0 );
1920    }
1921    if ( bExp == 0xFF ) {
1922        if ( bSig ) return propagateFloat32NaN( a, b );
1923        if ( ( aExp | aSig ) == 0 ) {
1924            float_raise( float_flag_invalid );
1925            return float32_default_nan;
1926        }
1927        return packFloat32( zSign, 0xFF, 0 );
1928    }
1929    if ( aExp == 0 ) {
1930        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1931        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1932    }
1933    if ( bExp == 0 ) {
1934        if ( bSig == 0 ) return packFloat32( zSign, 0, 0 );
1935        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1936    }
1937    zExp = aExp + bExp - 0x7F;
1938    aSig = ( aSig | 0x00800000 )<<7;
1939    bSig = ( bSig | 0x00800000 )<<8;
1940    shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 );
1941    zSig = zSig64;
1942    if ( 0 <= (sbits32) ( zSig<<1 ) ) {
1943        zSig <<= 1;
1944        --zExp;
1945    }
1946    return roundAndPackFloat32( zSign, zExp, zSig );
1947
1948}
1949
1950/*
1951-------------------------------------------------------------------------------
1952Returns the result of dividing the single-precision floating-point value `a'
1953by the corresponding value `b'.  The operation is performed according to the
1954IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1955-------------------------------------------------------------------------------
1956*/
1957float32 float32_div( float32 a, float32 b )
1958{
1959    flag aSign, bSign, zSign;
1960    int16 aExp, bExp, zExp;
1961    bits32 aSig, bSig, zSig;
1962
1963    aSig = extractFloat32Frac( a );
1964    aExp = extractFloat32Exp( a );
1965    aSign = extractFloat32Sign( a );
1966    bSig = extractFloat32Frac( b );
1967    bExp = extractFloat32Exp( b );
1968    bSign = extractFloat32Sign( b );
1969    zSign = aSign ^ bSign;
1970    if ( aExp == 0xFF ) {
1971        if ( aSig ) return propagateFloat32NaN( a, b );
1972        if ( bExp == 0xFF ) {
1973            if ( bSig ) return propagateFloat32NaN( a, b );
1974            float_raise( float_flag_invalid );
1975            return float32_default_nan;
1976        }
1977        return packFloat32( zSign, 0xFF, 0 );
1978    }
1979    if ( bExp == 0xFF ) {
1980        if ( bSig ) return propagateFloat32NaN( a, b );
1981        return packFloat32( zSign, 0, 0 );
1982    }
1983    if ( bExp == 0 ) {
1984        if ( bSig == 0 ) {
1985            if ( ( aExp | aSig ) == 0 ) {
1986                float_raise( float_flag_invalid );
1987                return float32_default_nan;
1988            }
1989            float_raise( float_flag_divbyzero );
1990            return packFloat32( zSign, 0xFF, 0 );
1991        }
1992        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
1993    }
1994    if ( aExp == 0 ) {
1995        if ( aSig == 0 ) return packFloat32( zSign, 0, 0 );
1996        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1997    }
1998    zExp = aExp - bExp + 0x7D;
1999    aSig = ( aSig | 0x00800000 )<<7;
2000    bSig = ( bSig | 0x00800000 )<<8;
2001    if ( bSig <= ( aSig + aSig ) ) {
2002        aSig >>= 1;
2003        ++zExp;
2004    }
2005    zSig = ( ( (bits64) aSig )<<32 ) / bSig;
2006    if ( ( zSig & 0x3F ) == 0 ) {
2007        zSig |= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 );
2008    }
2009    return roundAndPackFloat32( zSign, zExp, zSig );
2010
2011}
2012
2013#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
2014/*
2015-------------------------------------------------------------------------------
2016Returns the remainder of the single-precision floating-point value `a'
2017with respect to the corresponding value `b'.  The operation is performed
2018according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2019-------------------------------------------------------------------------------
2020*/
2021float32 float32_rem( float32 a, float32 b )
2022{
2023    flag aSign, bSign, zSign;
2024    int16 aExp, bExp, expDiff;
2025    bits32 aSig, bSig;
2026    bits32 q;
2027    bits64 aSig64, bSig64, q64;
2028    bits32 alternateASig;
2029    sbits32 sigMean;
2030
2031    aSig = extractFloat32Frac( a );
2032    aExp = extractFloat32Exp( a );
2033    aSign = extractFloat32Sign( a );
2034    bSig = extractFloat32Frac( b );
2035    bExp = extractFloat32Exp( b );
2036    bSign = extractFloat32Sign( b );
2037    if ( aExp == 0xFF ) {
2038        if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) {
2039            return propagateFloat32NaN( a, b );
2040        }
2041        float_raise( float_flag_invalid );
2042        return float32_default_nan;
2043    }
2044    if ( bExp == 0xFF ) {
2045        if ( bSig ) return propagateFloat32NaN( a, b );
2046        return a;
2047    }
2048    if ( bExp == 0 ) {
2049        if ( bSig == 0 ) {
2050            float_raise( float_flag_invalid );
2051            return float32_default_nan;
2052        }
2053        normalizeFloat32Subnormal( bSig, &bExp, &bSig );
2054    }
2055    if ( aExp == 0 ) {
2056        if ( aSig == 0 ) return a;
2057        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2058    }
2059    expDiff = aExp - bExp;
2060    aSig |= 0x00800000;
2061    bSig |= 0x00800000;
2062    if ( expDiff < 32 ) {
2063        aSig <<= 8;
2064        bSig <<= 8;
2065        if ( expDiff < 0 ) {
2066            if ( expDiff < -1 ) return a;
2067            aSig >>= 1;
2068        }
2069        q = ( bSig <= aSig );
2070        if ( q ) aSig -= bSig;
2071        if ( 0 < expDiff ) {
2072            q = ( ( (bits64) aSig )<<32 ) / bSig;
2073            q >>= 32 - expDiff;
2074            bSig >>= 2;
2075            aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
2076        }
2077        else {
2078            aSig >>= 2;
2079            bSig >>= 2;
2080        }
2081    }
2082    else {
2083        if ( bSig <= aSig ) aSig -= bSig;
2084        aSig64 = ( (bits64) aSig )<<40;
2085        bSig64 = ( (bits64) bSig )<<40;
2086        expDiff -= 64;
2087        while ( 0 < expDiff ) {
2088            q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2089            q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2090            aSig64 = - ( ( bSig * q64 )<<38 );
2091            expDiff -= 62;
2092        }
2093        expDiff += 64;
2094        q64 = estimateDiv128To64( aSig64, 0, bSig64 );
2095        q64 = ( 2 < q64 ) ? q64 - 2 : 0;
2096        q = q64>>( 64 - expDiff );
2097        bSig <<= 6;
2098        aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q;
2099    }
2100    do {
2101        alternateASig = aSig;
2102        ++q;
2103        aSig -= bSig;
2104    } while ( 0 <= (sbits32) aSig );
2105    sigMean = aSig + alternateASig;
2106    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
2107        aSig = alternateASig;
2108    }
2109    zSign = ( (sbits32) aSig < 0 );
2110    if ( zSign ) aSig = - aSig;
2111    return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig );
2112
2113}
2114#endif /* !SOFTFLOAT_FOR_GCC */
2115
2116#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
2117/*
2118-------------------------------------------------------------------------------
2119Returns the square root of the single-precision floating-point value `a'.
2120The operation is performed according to the IEC/IEEE Standard for Binary
2121Floating-Point Arithmetic.
2122-------------------------------------------------------------------------------
2123*/
2124float32 float32_sqrt( float32 a )
2125{
2126    flag aSign;
2127    int16 aExp, zExp;
2128    bits32 aSig, zSig;
2129    bits64 rem, term;
2130
2131    aSig = extractFloat32Frac( a );
2132    aExp = extractFloat32Exp( a );
2133    aSign = extractFloat32Sign( a );
2134    if ( aExp == 0xFF ) {
2135        if ( aSig ) return propagateFloat32NaN( a, 0 );
2136        if ( ! aSign ) return a;
2137        float_raise( float_flag_invalid );
2138        return float32_default_nan;
2139    }
2140    if ( aSign ) {
2141        if ( ( aExp | aSig ) == 0 ) return a;
2142        float_raise( float_flag_invalid );
2143        return float32_default_nan;
2144    }
2145    if ( aExp == 0 ) {
2146        if ( aSig == 0 ) return 0;
2147        normalizeFloat32Subnormal( aSig, &aExp, &aSig );
2148    }
2149    zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E;
2150    aSig = ( aSig | 0x00800000 )<<8;
2151    zSig = estimateSqrt32( aExp, aSig ) + 2;
2152    if ( ( zSig & 0x7F ) <= 5 ) {
2153        if ( zSig < 2 ) {
2154            zSig = 0x7FFFFFFF;
2155            goto roundAndPack;
2156        }
2157        aSig >>= aExp & 1;
2158        term = ( (bits64) zSig ) * zSig;
2159        rem = ( ( (bits64) aSig )<<32 ) - term;
2160        while ( (sbits64) rem < 0 ) {
2161            --zSig;
2162            rem += ( ( (bits64) zSig )<<1 ) | 1;
2163        }
2164        zSig |= ( rem != 0 );
2165    }
2166    shift32RightJamming( zSig, 1, &zSig );
2167 roundAndPack:
2168    return roundAndPackFloat32( 0, zExp, zSig );
2169
2170}
2171#endif /* !SOFTFLOAT_FOR_GCC */
2172
2173/*
2174-------------------------------------------------------------------------------
2175Returns 1 if the single-precision floating-point value `a' is equal to
2176the corresponding value `b', and 0 otherwise.  The comparison is performed
2177according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2178-------------------------------------------------------------------------------
2179*/
2180flag float32_eq( float32 a, float32 b )
2181{
2182
2183    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2184         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2185       ) {
2186        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2187            float_raise( float_flag_invalid );
2188        }
2189        return 0;
2190    }
2191    return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
2192
2193}
2194
2195/*
2196-------------------------------------------------------------------------------
2197Returns 1 if the single-precision floating-point value `a' is less than
2198or equal to the corresponding value `b', and 0 otherwise.  The comparison
2199is performed according to the IEC/IEEE Standard for Binary Floating-Point
2200Arithmetic.
2201-------------------------------------------------------------------------------
2202*/
2203flag float32_le( float32 a, float32 b )
2204{
2205    flag aSign, bSign;
2206
2207    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2208         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2209       ) {
2210        float_raise( float_flag_invalid );
2211        return 0;
2212    }
2213    aSign = extractFloat32Sign( a );
2214    bSign = extractFloat32Sign( b );
2215    if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
2216    return ( a == b ) || ( aSign ^ ( a < b ) );
2217
2218}
2219
2220/*
2221-------------------------------------------------------------------------------
2222Returns 1 if the single-precision floating-point value `a' is less than
2223the corresponding value `b', and 0 otherwise.  The comparison is performed
2224according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2225-------------------------------------------------------------------------------
2226*/
2227flag float32_lt( float32 a, float32 b )
2228{
2229    flag aSign, bSign;
2230
2231    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2232         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2233       ) {
2234        float_raise( float_flag_invalid );
2235        return 0;
2236    }
2237    aSign = extractFloat32Sign( a );
2238    bSign = extractFloat32Sign( b );
2239    if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
2240    return ( a != b ) && ( aSign ^ ( a < b ) );
2241
2242}
2243
2244#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
2245/*
2246-------------------------------------------------------------------------------
2247Returns 1 if the single-precision floating-point value `a' is equal to
2248the corresponding value `b', and 0 otherwise.  The invalid exception is
2249raised if either operand is a NaN.  Otherwise, the comparison is performed
2250according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2251-------------------------------------------------------------------------------
2252*/
2253flag float32_eq_signaling( float32 a, float32 b )
2254{
2255
2256    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2257         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2258       ) {
2259        float_raise( float_flag_invalid );
2260        return 0;
2261    }
2262    return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 );
2263
2264}
2265
2266/*
2267-------------------------------------------------------------------------------
2268Returns 1 if the single-precision floating-point value `a' is less than or
2269equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
2270cause an exception.  Otherwise, the comparison is performed according to the
2271IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2272-------------------------------------------------------------------------------
2273*/
2274flag float32_le_quiet( float32 a, float32 b )
2275{
2276    flag aSign, bSign;
2277
2278    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2279         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2280       ) {
2281        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2282            float_raise( float_flag_invalid );
2283        }
2284        return 0;
2285    }
2286    aSign = extractFloat32Sign( a );
2287    bSign = extractFloat32Sign( b );
2288    if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 );
2289    return ( a == b ) || ( aSign ^ ( a < b ) );
2290
2291}
2292
2293/*
2294-------------------------------------------------------------------------------
2295Returns 1 if the single-precision floating-point value `a' is less than
2296the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
2297exception.  Otherwise, the comparison is performed according to the IEC/IEEE
2298Standard for Binary Floating-Point Arithmetic.
2299-------------------------------------------------------------------------------
2300*/
2301flag float32_lt_quiet( float32 a, float32 b )
2302{
2303    flag aSign, bSign;
2304
2305    if (    ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) )
2306         || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) )
2307       ) {
2308        if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) {
2309            float_raise( float_flag_invalid );
2310        }
2311        return 0;
2312    }
2313    aSign = extractFloat32Sign( a );
2314    bSign = extractFloat32Sign( b );
2315    if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 );
2316    return ( a != b ) && ( aSign ^ ( a < b ) );
2317
2318}
2319#endif /* !SOFTFLOAT_FOR_GCC */
2320
2321#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
2322/*
2323-------------------------------------------------------------------------------
2324Returns the result of converting the double-precision floating-point value
2325`a' to the 32-bit two's complement integer format.  The conversion is
2326performed according to the IEC/IEEE Standard for Binary Floating-Point
2327Arithmetic---which means in particular that the conversion is rounded
2328according to the current rounding mode.  If `a' is a NaN, the largest
2329positive integer is returned.  Otherwise, if the conversion overflows, the
2330largest integer with the same sign as `a' is returned.
2331-------------------------------------------------------------------------------
2332*/
2333int32 float64_to_int32( float64 a )
2334{
2335    flag aSign;
2336    int16 aExp, shiftCount;
2337    bits64 aSig;
2338
2339    aSig = extractFloat64Frac( a );
2340    aExp = extractFloat64Exp( a );
2341    aSign = extractFloat64Sign( a );
2342    if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2343    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2344    shiftCount = 0x42C - aExp;
2345    if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig );
2346    return roundAndPackInt32( aSign, aSig );
2347
2348}
2349#endif /* !SOFTFLOAT_FOR_GCC */
2350
2351/*
2352-------------------------------------------------------------------------------
2353Returns the result of converting the double-precision floating-point value
2354`a' to the 32-bit two's complement integer format.  The conversion is
2355performed according to the IEC/IEEE Standard for Binary Floating-Point
2356Arithmetic, except that the conversion is always rounded toward zero.
2357If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
2358the conversion overflows, the largest integer with the same sign as `a' is
2359returned.
2360-------------------------------------------------------------------------------
2361*/
2362int32 float64_to_int32_round_to_zero( float64 a )
2363{
2364    flag aSign;
2365    int16 aExp, shiftCount;
2366    bits64 aSig, savedASig;
2367    int32 z;
2368
2369    aSig = extractFloat64Frac( a );
2370    aExp = extractFloat64Exp( a );
2371    aSign = extractFloat64Sign( a );
2372    if ( 0x41E < aExp ) {
2373        if ( ( aExp == 0x7FF ) && aSig ) aSign = 0;
2374        goto invalid;
2375    }
2376    else if ( aExp < 0x3FF ) {
2377        if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
2378        return 0;
2379    }
2380    aSig |= LIT64( 0x0010000000000000 );
2381    shiftCount = 0x433 - aExp;
2382    savedASig = aSig;
2383    aSig >>= shiftCount;
2384    z = aSig;
2385    if ( aSign ) z = - z;
2386    if ( ( z < 0 ) ^ aSign ) {
2387 invalid:
2388        float_raise( float_flag_invalid );
2389        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
2390    }
2391    if ( ( aSig<<shiftCount ) != savedASig ) {
2392        float_exception_flags |= float_flag_inexact;
2393    }
2394    return z;
2395
2396}
2397
2398#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
2399/*
2400-------------------------------------------------------------------------------
2401Returns the result of converting the double-precision floating-point value
2402`a' to the 64-bit two's complement integer format.  The conversion is
2403performed according to the IEC/IEEE Standard for Binary Floating-Point
2404Arithmetic---which means in particular that the conversion is rounded
2405according to the current rounding mode.  If `a' is a NaN, the largest
2406positive integer is returned.  Otherwise, if the conversion overflows, the
2407largest integer with the same sign as `a' is returned.
2408-------------------------------------------------------------------------------
2409*/
2410int64 float64_to_int64( float64 a )
2411{
2412    flag aSign;
2413    int16 aExp, shiftCount;
2414    bits64 aSig, aSigExtra;
2415
2416    aSig = extractFloat64Frac( a );
2417    aExp = extractFloat64Exp( a );
2418    aSign = extractFloat64Sign( a );
2419    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2420    shiftCount = 0x433 - aExp;
2421    if ( shiftCount <= 0 ) {
2422        if ( 0x43E < aExp ) {
2423            float_raise( float_flag_invalid );
2424            if (    ! aSign
2425                 || (    ( aExp == 0x7FF )
2426                      && ( aSig != LIT64( 0x0010000000000000 ) ) )
2427               ) {
2428                return LIT64( 0x7FFFFFFFFFFFFFFF );
2429            }
2430            return (sbits64) LIT64( 0x8000000000000000 );
2431        }
2432        aSigExtra = 0;
2433        aSig <<= - shiftCount;
2434    }
2435    else {
2436        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
2437    }
2438    return roundAndPackInt64( aSign, aSig, aSigExtra );
2439
2440}
2441
2442/*
2443-------------------------------------------------------------------------------
2444Returns the result of converting the double-precision floating-point value
2445`a' to the 64-bit two's complement integer format.  The conversion is
2446performed according to the IEC/IEEE Standard for Binary Floating-Point
2447Arithmetic, except that the conversion is always rounded toward zero.
2448If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
2449the conversion overflows, the largest integer with the same sign as `a' is
2450returned.
2451-------------------------------------------------------------------------------
2452*/
2453int64 float64_to_int64_round_to_zero( float64 a )
2454{
2455    flag aSign;
2456    int16 aExp, shiftCount;
2457    bits64 aSig;
2458    int64 z;
2459
2460    aSig = extractFloat64Frac( a );
2461    aExp = extractFloat64Exp( a );
2462    aSign = extractFloat64Sign( a );
2463    if ( aExp ) aSig |= LIT64( 0x0010000000000000 );
2464    shiftCount = aExp - 0x433;
2465    if ( 0 <= shiftCount ) {
2466        if ( 0x43E <= aExp ) {
2467            if ( a != LIT64( 0xC3E0000000000000 ) ) {
2468                float_raise( float_flag_invalid );
2469                if (    ! aSign
2470                     || (    ( aExp == 0x7FF )
2471                          && ( aSig != LIT64( 0x0010000000000000 ) ) )
2472                   ) {
2473                    return LIT64( 0x7FFFFFFFFFFFFFFF );
2474                }
2475            }
2476            return (sbits64) LIT64( 0x8000000000000000 );
2477        }
2478        z = aSig<<shiftCount;
2479    }
2480    else {
2481        if ( aExp < 0x3FE ) {
2482            if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
2483            return 0;
2484        }
2485        z = aSig>>( - shiftCount );
2486        if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
2487            float_exception_flags |= float_flag_inexact;
2488        }
2489    }
2490    if ( aSign ) z = - z;
2491    return z;
2492
2493}
2494#endif /* !SOFTFLOAT_FOR_GCC */
2495
2496/*
2497-------------------------------------------------------------------------------
2498Returns the result of converting the double-precision floating-point value
2499`a' to the single-precision floating-point format.  The conversion is
2500performed according to the IEC/IEEE Standard for Binary Floating-Point
2501Arithmetic.
2502-------------------------------------------------------------------------------
2503*/
2504float32 float64_to_float32( float64 a )
2505{
2506    flag aSign;
2507    int16 aExp;
2508    bits64 aSig;
2509    bits32 zSig;
2510
2511    aSig = extractFloat64Frac( a );
2512    aExp = extractFloat64Exp( a );
2513    aSign = extractFloat64Sign( a );
2514    if ( aExp == 0x7FF ) {
2515        if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) );
2516        return packFloat32( aSign, 0xFF, 0 );
2517    }
2518    shift64RightJamming( aSig, 22, &aSig );
2519    zSig = aSig;
2520    if ( aExp || zSig ) {
2521        zSig |= 0x40000000;
2522        aExp -= 0x381;
2523    }
2524    return roundAndPackFloat32( aSign, aExp, zSig );
2525
2526}
2527
2528#ifdef FLOATX80
2529
2530/*
2531-------------------------------------------------------------------------------
2532Returns the result of converting the double-precision floating-point value
2533`a' to the extended double-precision floating-point format.  The conversion
2534is performed according to the IEC/IEEE Standard for Binary Floating-Point
2535Arithmetic.
2536-------------------------------------------------------------------------------
2537*/
2538floatx80 float64_to_floatx80( float64 a )
2539{
2540    flag aSign;
2541    int16 aExp;
2542    bits64 aSig;
2543
2544    aSig = extractFloat64Frac( a );
2545    aExp = extractFloat64Exp( a );
2546    aSign = extractFloat64Sign( a );
2547    if ( aExp == 0x7FF ) {
2548        if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) );
2549        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
2550    }
2551    if ( aExp == 0 ) {
2552        if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
2553        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2554    }
2555    return
2556        packFloatx80(
2557            aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
2558
2559}
2560
2561#endif
2562
2563#ifdef FLOAT128
2564
2565/*
2566-------------------------------------------------------------------------------
2567Returns the result of converting the double-precision floating-point value
2568`a' to the quadruple-precision floating-point format.  The conversion is
2569performed according to the IEC/IEEE Standard for Binary Floating-Point
2570Arithmetic.
2571-------------------------------------------------------------------------------
2572*/
2573float128 float64_to_float128( float64 a )
2574{
2575    flag aSign;
2576    int16 aExp;
2577    bits64 aSig, zSig0, zSig1;
2578
2579    aSig = extractFloat64Frac( a );
2580    aExp = extractFloat64Exp( a );
2581    aSign = extractFloat64Sign( a );
2582    if ( aExp == 0x7FF ) {
2583        if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a ) );
2584        return packFloat128( aSign, 0x7FFF, 0, 0 );
2585    }
2586    if ( aExp == 0 ) {
2587        if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
2588        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2589        --aExp;
2590    }
2591    shift128Right( aSig, 0, 4, &zSig0, &zSig1 );
2592    return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 );
2593
2594}
2595
2596#endif
2597
2598#ifndef SOFTFLOAT_FOR_GCC
2599/*
2600-------------------------------------------------------------------------------
2601Rounds the double-precision floating-point value `a' to an integer, and
2602returns the result as a double-precision floating-point value.  The
2603operation is performed according to the IEC/IEEE Standard for Binary
2604Floating-Point Arithmetic.
2605-------------------------------------------------------------------------------
2606*/
2607float64 float64_round_to_int( float64 a )
2608{
2609    flag aSign;
2610    int16 aExp;
2611    bits64 lastBitMask, roundBitsMask;
2612    int8 roundingMode;
2613    float64 z;
2614
2615    aExp = extractFloat64Exp( a );
2616    if ( 0x433 <= aExp ) {
2617        if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) {
2618            return propagateFloat64NaN( a, a );
2619        }
2620        return a;
2621    }
2622    if ( aExp < 0x3FF ) {
2623        if ( (bits64) ( a<<1 ) == 0 ) return a;
2624        float_exception_flags |= float_flag_inexact;
2625        aSign = extractFloat64Sign( a );
2626        switch ( float_rounding_mode ) {
2627         case float_round_nearest_even:
2628            if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) {
2629                return packFloat64( aSign, 0x3FF, 0 );
2630            }
2631            break;
2632	 case float_round_to_zero:
2633	    break;
2634         case float_round_down:
2635            return aSign ? LIT64( 0xBFF0000000000000 ) : 0;
2636         case float_round_up:
2637            return
2638            aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 );
2639        }
2640        return packFloat64( aSign, 0, 0 );
2641    }
2642    lastBitMask = 1;
2643    lastBitMask <<= 0x433 - aExp;
2644    roundBitsMask = lastBitMask - 1;
2645    z = a;
2646    roundingMode = float_rounding_mode;
2647    if ( roundingMode == float_round_nearest_even ) {
2648        z += lastBitMask>>1;
2649        if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask;
2650    }
2651    else if ( roundingMode != float_round_to_zero ) {
2652        if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) {
2653            z += roundBitsMask;
2654        }
2655    }
2656    z &= ~ roundBitsMask;
2657    if ( z != a ) float_exception_flags |= float_flag_inexact;
2658    return z;
2659
2660}
2661#endif
2662
2663/*
2664-------------------------------------------------------------------------------
2665Returns the result of adding the absolute values of the double-precision
2666floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
2667before being returned.  `zSign' is ignored if the result is a NaN.
2668The addition is performed according to the IEC/IEEE Standard for Binary
2669Floating-Point Arithmetic.
2670-------------------------------------------------------------------------------
2671*/
2672static float64 addFloat64Sigs( float64 a, float64 b, flag zSign )
2673{
2674    int16 aExp, bExp, zExp;
2675    bits64 aSig, bSig, zSig;
2676    int16 expDiff;
2677
2678    aSig = extractFloat64Frac( a );
2679    aExp = extractFloat64Exp( a );
2680    bSig = extractFloat64Frac( b );
2681    bExp = extractFloat64Exp( b );
2682    expDiff = aExp - bExp;
2683    aSig <<= 9;
2684    bSig <<= 9;
2685    if ( 0 < expDiff ) {
2686        if ( aExp == 0x7FF ) {
2687            if ( aSig ) return propagateFloat64NaN( a, b );
2688            return a;
2689        }
2690        if ( bExp == 0 ) {
2691            --expDiff;
2692        }
2693        else {
2694            bSig |= LIT64( 0x2000000000000000 );
2695        }
2696        shift64RightJamming( bSig, expDiff, &bSig );
2697        zExp = aExp;
2698    }
2699    else if ( expDiff < 0 ) {
2700        if ( bExp == 0x7FF ) {
2701            if ( bSig ) return propagateFloat64NaN( a, b );
2702            return packFloat64( zSign, 0x7FF, 0 );
2703        }
2704        if ( aExp == 0 ) {
2705            ++expDiff;
2706        }
2707        else {
2708            aSig |= LIT64( 0x2000000000000000 );
2709        }
2710        shift64RightJamming( aSig, - expDiff, &aSig );
2711        zExp = bExp;
2712    }
2713    else {
2714        if ( aExp == 0x7FF ) {
2715            if ( aSig | bSig ) return propagateFloat64NaN( a, b );
2716            return a;
2717        }
2718        if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 );
2719        zSig = LIT64( 0x4000000000000000 ) + aSig + bSig;
2720        zExp = aExp;
2721        goto roundAndPack;
2722    }
2723    aSig |= LIT64( 0x2000000000000000 );
2724    zSig = ( aSig + bSig )<<1;
2725    --zExp;
2726    if ( (sbits64) zSig < 0 ) {
2727        zSig = aSig + bSig;
2728        ++zExp;
2729    }
2730 roundAndPack:
2731    return roundAndPackFloat64( zSign, zExp, zSig );
2732
2733}
2734
2735/*
2736-------------------------------------------------------------------------------
2737Returns the result of subtracting the absolute values of the double-
2738precision floating-point values `a' and `b'.  If `zSign' is 1, the
2739difference is negated before being returned.  `zSign' is ignored if the
2740result is a NaN.  The subtraction is performed according to the IEC/IEEE
2741Standard for Binary Floating-Point Arithmetic.
2742-------------------------------------------------------------------------------
2743*/
2744static float64 subFloat64Sigs( float64 a, float64 b, flag zSign )
2745{
2746    int16 aExp, bExp, zExp;
2747    bits64 aSig, bSig, zSig;
2748    int16 expDiff;
2749
2750    aSig = extractFloat64Frac( a );
2751    aExp = extractFloat64Exp( a );
2752    bSig = extractFloat64Frac( b );
2753    bExp = extractFloat64Exp( b );
2754    expDiff = aExp - bExp;
2755    aSig <<= 10;
2756    bSig <<= 10;
2757    if ( 0 < expDiff ) goto aExpBigger;
2758    if ( expDiff < 0 ) goto bExpBigger;
2759    if ( aExp == 0x7FF ) {
2760        if ( aSig | bSig ) return propagateFloat64NaN( a, b );
2761        float_raise( float_flag_invalid );
2762        return float64_default_nan;
2763    }
2764    if ( aExp == 0 ) {
2765        aExp = 1;
2766        bExp = 1;
2767    }
2768    if ( bSig < aSig ) goto aBigger;
2769    if ( aSig < bSig ) goto bBigger;
2770    return packFloat64( float_rounding_mode == float_round_down, 0, 0 );
2771 bExpBigger:
2772    if ( bExp == 0x7FF ) {
2773        if ( bSig ) return propagateFloat64NaN( a, b );
2774        return packFloat64( zSign ^ 1, 0x7FF, 0 );
2775    }
2776    if ( aExp == 0 ) {
2777        ++expDiff;
2778    }
2779    else {
2780        aSig |= LIT64( 0x4000000000000000 );
2781    }
2782    shift64RightJamming( aSig, - expDiff, &aSig );
2783    bSig |= LIT64( 0x4000000000000000 );
2784 bBigger:
2785    zSig = bSig - aSig;
2786    zExp = bExp;
2787    zSign ^= 1;
2788    goto normalizeRoundAndPack;
2789 aExpBigger:
2790    if ( aExp == 0x7FF ) {
2791        if ( aSig ) return propagateFloat64NaN( a, b );
2792        return a;
2793    }
2794    if ( bExp == 0 ) {
2795        --expDiff;
2796    }
2797    else {
2798        bSig |= LIT64( 0x4000000000000000 );
2799    }
2800    shift64RightJamming( bSig, expDiff, &bSig );
2801    aSig |= LIT64( 0x4000000000000000 );
2802 aBigger:
2803    zSig = aSig - bSig;
2804    zExp = aExp;
2805 normalizeRoundAndPack:
2806    --zExp;
2807    return normalizeRoundAndPackFloat64( zSign, zExp, zSig );
2808
2809}
2810
2811/*
2812-------------------------------------------------------------------------------
2813Returns the result of adding the double-precision floating-point values `a'
2814and `b'.  The operation is performed according to the IEC/IEEE Standard for
2815Binary Floating-Point Arithmetic.
2816-------------------------------------------------------------------------------
2817*/
2818float64 float64_add( float64 a, float64 b )
2819{
2820    flag aSign, bSign;
2821
2822    aSign = extractFloat64Sign( a );
2823    bSign = extractFloat64Sign( b );
2824    if ( aSign == bSign ) {
2825        return addFloat64Sigs( a, b, aSign );
2826    }
2827    else {
2828        return subFloat64Sigs( a, b, aSign );
2829    }
2830
2831}
2832
2833/*
2834-------------------------------------------------------------------------------
2835Returns the result of subtracting the double-precision floating-point values
2836`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
2837for Binary Floating-Point Arithmetic.
2838-------------------------------------------------------------------------------
2839*/
2840float64 float64_sub( float64 a, float64 b )
2841{
2842    flag aSign, bSign;
2843
2844    aSign = extractFloat64Sign( a );
2845    bSign = extractFloat64Sign( b );
2846    if ( aSign == bSign ) {
2847        return subFloat64Sigs( a, b, aSign );
2848    }
2849    else {
2850        return addFloat64Sigs( a, b, aSign );
2851    }
2852
2853}
2854
2855/*
2856-------------------------------------------------------------------------------
2857Returns the result of multiplying the double-precision floating-point values
2858`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
2859for Binary Floating-Point Arithmetic.
2860-------------------------------------------------------------------------------
2861*/
2862float64 float64_mul( float64 a, float64 b )
2863{
2864    flag aSign, bSign, zSign;
2865    int16 aExp, bExp, zExp;
2866    bits64 aSig, bSig, zSig0, zSig1;
2867
2868    aSig = extractFloat64Frac( a );
2869    aExp = extractFloat64Exp( a );
2870    aSign = extractFloat64Sign( a );
2871    bSig = extractFloat64Frac( b );
2872    bExp = extractFloat64Exp( b );
2873    bSign = extractFloat64Sign( b );
2874    zSign = aSign ^ bSign;
2875    if ( aExp == 0x7FF ) {
2876        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
2877            return propagateFloat64NaN( a, b );
2878        }
2879        if ( ( bExp | bSig ) == 0 ) {
2880            float_raise( float_flag_invalid );
2881            return float64_default_nan;
2882        }
2883        return packFloat64( zSign, 0x7FF, 0 );
2884    }
2885    if ( bExp == 0x7FF ) {
2886        if ( bSig ) return propagateFloat64NaN( a, b );
2887        if ( ( aExp | aSig ) == 0 ) {
2888            float_raise( float_flag_invalid );
2889            return float64_default_nan;
2890        }
2891        return packFloat64( zSign, 0x7FF, 0 );
2892    }
2893    if ( aExp == 0 ) {
2894        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
2895        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2896    }
2897    if ( bExp == 0 ) {
2898        if ( bSig == 0 ) return packFloat64( zSign, 0, 0 );
2899        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2900    }
2901    zExp = aExp + bExp - 0x3FF;
2902    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
2903    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2904    mul64To128( aSig, bSig, &zSig0, &zSig1 );
2905    zSig0 |= ( zSig1 != 0 );
2906    if ( 0 <= (sbits64) ( zSig0<<1 ) ) {
2907        zSig0 <<= 1;
2908        --zExp;
2909    }
2910    return roundAndPackFloat64( zSign, zExp, zSig0 );
2911
2912}
2913
2914/*
2915-------------------------------------------------------------------------------
2916Returns the result of dividing the double-precision floating-point value `a'
2917by the corresponding value `b'.  The operation is performed according to
2918the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2919-------------------------------------------------------------------------------
2920*/
2921float64 float64_div( float64 a, float64 b )
2922{
2923    flag aSign, bSign, zSign;
2924    int16 aExp, bExp, zExp;
2925    bits64 aSig, bSig, zSig;
2926    bits64 rem0, rem1;
2927    bits64 term0, term1;
2928
2929    aSig = extractFloat64Frac( a );
2930    aExp = extractFloat64Exp( a );
2931    aSign = extractFloat64Sign( a );
2932    bSig = extractFloat64Frac( b );
2933    bExp = extractFloat64Exp( b );
2934    bSign = extractFloat64Sign( b );
2935    zSign = aSign ^ bSign;
2936    if ( aExp == 0x7FF ) {
2937        if ( aSig ) return propagateFloat64NaN( a, b );
2938        if ( bExp == 0x7FF ) {
2939            if ( bSig ) return propagateFloat64NaN( a, b );
2940            float_raise( float_flag_invalid );
2941            return float64_default_nan;
2942        }
2943        return packFloat64( zSign, 0x7FF, 0 );
2944    }
2945    if ( bExp == 0x7FF ) {
2946        if ( bSig ) return propagateFloat64NaN( a, b );
2947        return packFloat64( zSign, 0, 0 );
2948    }
2949    if ( bExp == 0 ) {
2950        if ( bSig == 0 ) {
2951            if ( ( aExp | aSig ) == 0 ) {
2952                float_raise( float_flag_invalid );
2953                return float64_default_nan;
2954            }
2955            float_raise( float_flag_divbyzero );
2956            return packFloat64( zSign, 0x7FF, 0 );
2957        }
2958        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2959    }
2960    if ( aExp == 0 ) {
2961        if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
2962        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2963    }
2964    zExp = aExp - bExp + 0x3FD;
2965    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
2966    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2967    if ( bSig <= ( aSig + aSig ) ) {
2968        aSig >>= 1;
2969        ++zExp;
2970    }
2971    zSig = estimateDiv128To64( aSig, 0, bSig );
2972    if ( ( zSig & 0x1FF ) <= 2 ) {
2973        mul64To128( bSig, zSig, &term0, &term1 );
2974        sub128( aSig, 0, term0, term1, &rem0, &rem1 );
2975        while ( (sbits64) rem0 < 0 ) {
2976            --zSig;
2977            add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
2978        }
2979        zSig |= ( rem1 != 0 );
2980    }
2981    return roundAndPackFloat64( zSign, zExp, zSig );
2982
2983}
2984
2985#ifndef SOFTFLOAT_FOR_GCC
2986/*
2987-------------------------------------------------------------------------------
2988Returns the remainder of the double-precision floating-point value `a'
2989with respect to the corresponding value `b'.  The operation is performed
2990according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2991-------------------------------------------------------------------------------
2992*/
2993float64 float64_rem( float64 a, float64 b )
2994{
2995    flag aSign, bSign, zSign;
2996    int16 aExp, bExp, expDiff;
2997    bits64 aSig, bSig;
2998    bits64 q, alternateASig;
2999    sbits64 sigMean;
3000
3001    aSig = extractFloat64Frac( a );
3002    aExp = extractFloat64Exp( a );
3003    aSign = extractFloat64Sign( a );
3004    bSig = extractFloat64Frac( b );
3005    bExp = extractFloat64Exp( b );
3006    bSign = extractFloat64Sign( b );
3007    if ( aExp == 0x7FF ) {
3008        if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
3009            return propagateFloat64NaN( a, b );
3010        }
3011        float_raise( float_flag_invalid );
3012        return float64_default_nan;
3013    }
3014    if ( bExp == 0x7FF ) {
3015        if ( bSig ) return propagateFloat64NaN( a, b );
3016        return a;
3017    }
3018    if ( bExp == 0 ) {
3019        if ( bSig == 0 ) {
3020            float_raise( float_flag_invalid );
3021            return float64_default_nan;
3022        }
3023        normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3024    }
3025    if ( aExp == 0 ) {
3026        if ( aSig == 0 ) return a;
3027        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3028    }
3029    expDiff = aExp - bExp;
3030    aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
3031    bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3032    if ( expDiff < 0 ) {
3033        if ( expDiff < -1 ) return a;
3034        aSig >>= 1;
3035    }
3036    q = ( bSig <= aSig );
3037    if ( q ) aSig -= bSig;
3038    expDiff -= 64;
3039    while ( 0 < expDiff ) {
3040        q = estimateDiv128To64( aSig, 0, bSig );
3041        q = ( 2 < q ) ? q - 2 : 0;
3042        aSig = - ( ( bSig>>2 ) * q );
3043        expDiff -= 62;
3044    }
3045    expDiff += 64;
3046    if ( 0 < expDiff ) {
3047        q = estimateDiv128To64( aSig, 0, bSig );
3048        q = ( 2 < q ) ? q - 2 : 0;
3049        q >>= 64 - expDiff;
3050        bSig >>= 2;
3051        aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
3052    }
3053    else {
3054        aSig >>= 2;
3055        bSig >>= 2;
3056    }
3057    do {
3058        alternateASig = aSig;
3059        ++q;
3060        aSig -= bSig;
3061    } while ( 0 <= (sbits64) aSig );
3062    sigMean = aSig + alternateASig;
3063    if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
3064        aSig = alternateASig;
3065    }
3066    zSign = ( (sbits64) aSig < 0 );
3067    if ( zSign ) aSig = - aSig;
3068    return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig );
3069
3070}
3071
3072/*
3073-------------------------------------------------------------------------------
3074Returns the square root of the double-precision floating-point value `a'.
3075The operation is performed according to the IEC/IEEE Standard for Binary
3076Floating-Point Arithmetic.
3077-------------------------------------------------------------------------------
3078*/
3079float64 float64_sqrt( float64 a )
3080{
3081    flag aSign;
3082    int16 aExp, zExp;
3083    bits64 aSig, zSig, doubleZSig;
3084    bits64 rem0, rem1, term0, term1;
3085
3086    aSig = extractFloat64Frac( a );
3087    aExp = extractFloat64Exp( a );
3088    aSign = extractFloat64Sign( a );
3089    if ( aExp == 0x7FF ) {
3090        if ( aSig ) return propagateFloat64NaN( a, a );
3091        if ( ! aSign ) return a;
3092        float_raise( float_flag_invalid );
3093        return float64_default_nan;
3094    }
3095    if ( aSign ) {
3096        if ( ( aExp | aSig ) == 0 ) return a;
3097        float_raise( float_flag_invalid );
3098        return float64_default_nan;
3099    }
3100    if ( aExp == 0 ) {
3101        if ( aSig == 0 ) return 0;
3102        normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3103    }
3104    zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
3105    aSig |= LIT64( 0x0010000000000000 );
3106    zSig = estimateSqrt32( aExp, aSig>>21 );
3107    aSig <<= 9 - ( aExp & 1 );
3108    zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
3109    if ( ( zSig & 0x1FF ) <= 5 ) {
3110        doubleZSig = zSig<<1;
3111        mul64To128( zSig, zSig, &term0, &term1 );
3112        sub128( aSig, 0, term0, term1, &rem0, &rem1 );
3113        while ( (sbits64) rem0 < 0 ) {
3114            --zSig;
3115            doubleZSig -= 2;
3116            add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
3117        }
3118        zSig |= ( ( rem0 | rem1 ) != 0 );
3119    }
3120    return roundAndPackFloat64( 0, zExp, zSig );
3121
3122}
3123#endif
3124
3125/*
3126-------------------------------------------------------------------------------
3127Returns 1 if the double-precision floating-point value `a' is equal to the
3128corresponding value `b', and 0 otherwise.  The comparison is performed
3129according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3130-------------------------------------------------------------------------------
3131*/
3132flag float64_eq( float64 a, float64 b )
3133{
3134
3135    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3136         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3137       ) {
3138        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3139            float_raise( float_flag_invalid );
3140        }
3141        return 0;
3142    }
3143    return ( a == b ) ||
3144	( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 );
3145
3146}
3147
3148/*
3149-------------------------------------------------------------------------------
3150Returns 1 if the double-precision floating-point value `a' is less than or
3151equal to the corresponding value `b', and 0 otherwise.  The comparison is
3152performed according to the IEC/IEEE Standard for Binary Floating-Point
3153Arithmetic.
3154-------------------------------------------------------------------------------
3155*/
3156flag float64_le( float64 a, float64 b )
3157{
3158    flag aSign, bSign;
3159
3160    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3161         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3162       ) {
3163        float_raise( float_flag_invalid );
3164        return 0;
3165    }
3166    aSign = extractFloat64Sign( a );
3167    bSign = extractFloat64Sign( b );
3168    if ( aSign != bSign )
3169	return aSign ||
3170	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) ==
3171	      0 );
3172    return ( a == b ) ||
3173	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
3174
3175}
3176
3177/*
3178-------------------------------------------------------------------------------
3179Returns 1 if the double-precision floating-point value `a' is less than
3180the corresponding value `b', and 0 otherwise.  The comparison is performed
3181according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3182-------------------------------------------------------------------------------
3183*/
3184flag float64_lt( float64 a, float64 b )
3185{
3186    flag aSign, bSign;
3187
3188    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3189         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3190       ) {
3191        float_raise( float_flag_invalid );
3192        return 0;
3193    }
3194    aSign = extractFloat64Sign( a );
3195    bSign = extractFloat64Sign( b );
3196    if ( aSign != bSign )
3197	return aSign &&
3198	    ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) !=
3199	      0 );
3200    return ( a != b ) &&
3201	( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
3202
3203}
3204
3205#ifndef SOFTFLOAT_FOR_GCC
3206/*
3207-------------------------------------------------------------------------------
3208Returns 1 if the double-precision floating-point value `a' is equal to the
3209corresponding value `b', and 0 otherwise.  The invalid exception is raised
3210if either operand is a NaN.  Otherwise, the comparison is performed
3211according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3212-------------------------------------------------------------------------------
3213*/
3214flag float64_eq_signaling( float64 a, float64 b )
3215{
3216
3217    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3218         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3219       ) {
3220        float_raise( float_flag_invalid );
3221        return 0;
3222    }
3223    return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
3224
3225}
3226
3227/*
3228-------------------------------------------------------------------------------
3229Returns 1 if the double-precision floating-point value `a' is less than or
3230equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
3231cause an exception.  Otherwise, the comparison is performed according to the
3232IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3233-------------------------------------------------------------------------------
3234*/
3235flag float64_le_quiet( float64 a, float64 b )
3236{
3237    flag aSign, bSign;
3238
3239    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3240         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3241       ) {
3242        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3243            float_raise( float_flag_invalid );
3244        }
3245        return 0;
3246    }
3247    aSign = extractFloat64Sign( a );
3248    bSign = extractFloat64Sign( b );
3249    if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
3250    return ( a == b ) || ( aSign ^ ( a < b ) );
3251
3252}
3253
3254/*
3255-------------------------------------------------------------------------------
3256Returns 1 if the double-precision floating-point value `a' is less than
3257the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
3258exception.  Otherwise, the comparison is performed according to the IEC/IEEE
3259Standard for Binary Floating-Point Arithmetic.
3260-------------------------------------------------------------------------------
3261*/
3262flag float64_lt_quiet( float64 a, float64 b )
3263{
3264    flag aSign, bSign;
3265
3266    if (    ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3267         || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3268       ) {
3269        if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3270            float_raise( float_flag_invalid );
3271        }
3272        return 0;
3273    }
3274    aSign = extractFloat64Sign( a );
3275    bSign = extractFloat64Sign( b );
3276    if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
3277    return ( a != b ) && ( aSign ^ ( a < b ) );
3278
3279}
3280#endif
3281
3282#ifdef FLOATX80
3283
3284/*
3285-------------------------------------------------------------------------------
3286Returns the result of converting the extended double-precision floating-
3287point value `a' to the 32-bit two's complement integer format.  The
3288conversion is performed according to the IEC/IEEE Standard for Binary
3289Floating-Point Arithmetic---which means in particular that the conversion
3290is rounded according to the current rounding mode.  If `a' is a NaN, the
3291largest positive integer is returned.  Otherwise, if the conversion
3292overflows, the largest integer with the same sign as `a' is returned.
3293-------------------------------------------------------------------------------
3294*/
3295int32 floatx80_to_int32( floatx80 a )
3296{
3297    flag aSign;
3298    int32 aExp, shiftCount;
3299    bits64 aSig;
3300
3301    aSig = extractFloatx80Frac( a );
3302    aExp = extractFloatx80Exp( a );
3303    aSign = extractFloatx80Sign( a );
3304    if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
3305    shiftCount = 0x4037 - aExp;
3306    if ( shiftCount <= 0 ) shiftCount = 1;
3307    shift64RightJamming( aSig, shiftCount, &aSig );
3308    return roundAndPackInt32( aSign, aSig );
3309
3310}
3311
3312/*
3313-------------------------------------------------------------------------------
3314Returns the result of converting the extended double-precision floating-
3315point value `a' to the 32-bit two's complement integer format.  The
3316conversion is performed according to the IEC/IEEE Standard for Binary
3317Floating-Point Arithmetic, except that the conversion is always rounded
3318toward zero.  If `a' is a NaN, the largest positive integer is returned.
3319Otherwise, if the conversion overflows, the largest integer with the same
3320sign as `a' is returned.
3321-------------------------------------------------------------------------------
3322*/
3323int32 floatx80_to_int32_round_to_zero( floatx80 a )
3324{
3325    flag aSign;
3326    int32 aExp, shiftCount;
3327    bits64 aSig, savedASig;
3328    int32 z;
3329
3330    aSig = extractFloatx80Frac( a );
3331    aExp = extractFloatx80Exp( a );
3332    aSign = extractFloatx80Sign( a );
3333    if ( 0x401E < aExp ) {
3334        if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
3335        goto invalid;
3336    }
3337    else if ( aExp < 0x3FFF ) {
3338        if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
3339        return 0;
3340    }
3341    shiftCount = 0x403E - aExp;
3342    savedASig = aSig;
3343    aSig >>= shiftCount;
3344    z = aSig;
3345    if ( aSign ) z = - z;
3346    if ( ( z < 0 ) ^ aSign ) {
3347 invalid:
3348        float_raise( float_flag_invalid );
3349        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
3350    }
3351    if ( ( aSig<<shiftCount ) != savedASig ) {
3352        float_exception_flags |= float_flag_inexact;
3353    }
3354    return z;
3355
3356}
3357
3358/*
3359-------------------------------------------------------------------------------
3360Returns the result of converting the extended double-precision floating-
3361point value `a' to the 64-bit two's complement integer format.  The
3362conversion is performed according to the IEC/IEEE Standard for Binary
3363Floating-Point Arithmetic---which means in particular that the conversion
3364is rounded according to the current rounding mode.  If `a' is a NaN,
3365the largest positive integer is returned.  Otherwise, if the conversion
3366overflows, the largest integer with the same sign as `a' is returned.
3367-------------------------------------------------------------------------------
3368*/
3369int64 floatx80_to_int64( floatx80 a )
3370{
3371    flag aSign;
3372    int32 aExp, shiftCount;
3373    bits64 aSig, aSigExtra;
3374
3375    aSig = extractFloatx80Frac( a );
3376    aExp = extractFloatx80Exp( a );
3377    aSign = extractFloatx80Sign( a );
3378    shiftCount = 0x403E - aExp;
3379    if ( shiftCount <= 0 ) {
3380        if ( shiftCount ) {
3381            float_raise( float_flag_invalid );
3382            if (    ! aSign
3383                 || (    ( aExp == 0x7FFF )
3384                      && ( aSig != LIT64( 0x8000000000000000 ) ) )
3385               ) {
3386                return LIT64( 0x7FFFFFFFFFFFFFFF );
3387            }
3388            return (sbits64) LIT64( 0x8000000000000000 );
3389        }
3390        aSigExtra = 0;
3391    }
3392    else {
3393        shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
3394    }
3395    return roundAndPackInt64( aSign, aSig, aSigExtra );
3396
3397}
3398
3399/*
3400-------------------------------------------------------------------------------
3401Returns the result of converting the extended double-precision floating-
3402point value `a' to the 64-bit two's complement integer format.  The
3403conversion is performed according to the IEC/IEEE Standard for Binary
3404Floating-Point Arithmetic, except that the conversion is always rounded
3405toward zero.  If `a' is a NaN, the largest positive integer is returned.
3406Otherwise, if the conversion overflows, the largest integer with the same
3407sign as `a' is returned.
3408-------------------------------------------------------------------------------
3409*/
3410int64 floatx80_to_int64_round_to_zero( floatx80 a )
3411{
3412    flag aSign;
3413    int32 aExp, shiftCount;
3414    bits64 aSig;
3415    int64 z;
3416
3417    aSig = extractFloatx80Frac( a );
3418    aExp = extractFloatx80Exp( a );
3419    aSign = extractFloatx80Sign( a );
3420    shiftCount = aExp - 0x403E;
3421    if ( 0 <= shiftCount ) {
3422        aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
3423        if ( ( a.high != 0xC03E ) || aSig ) {
3424            float_raise( float_flag_invalid );
3425            if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
3426                return LIT64( 0x7FFFFFFFFFFFFFFF );
3427            }
3428        }
3429        return (sbits64) LIT64( 0x8000000000000000 );
3430    }
3431    else if ( aExp < 0x3FFF ) {
3432        if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
3433        return 0;
3434    }
3435    z = aSig>>( - shiftCount );
3436    if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
3437        float_exception_flags |= float_flag_inexact;
3438    }
3439    if ( aSign ) z = - z;
3440    return z;
3441
3442}
3443
3444/*
3445-------------------------------------------------------------------------------
3446Returns the result of converting the extended double-precision floating-
3447point value `a' to the single-precision floating-point format.  The
3448conversion is performed according to the IEC/IEEE Standard for Binary
3449Floating-Point Arithmetic.
3450-------------------------------------------------------------------------------
3451*/
3452float32 floatx80_to_float32( floatx80 a )
3453{
3454    flag aSign;
3455    int32 aExp;
3456    bits64 aSig;
3457
3458    aSig = extractFloatx80Frac( a );
3459    aExp = extractFloatx80Exp( a );
3460    aSign = extractFloatx80Sign( a );
3461    if ( aExp == 0x7FFF ) {
3462        if ( (bits64) ( aSig<<1 ) ) {
3463            return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
3464        }
3465        return packFloat32( aSign, 0xFF, 0 );
3466    }
3467    shift64RightJamming( aSig, 33, &aSig );
3468    if ( aExp || aSig ) aExp -= 0x3F81;
3469    return roundAndPackFloat32( aSign, aExp, aSig );
3470
3471}
3472
3473/*
3474-------------------------------------------------------------------------------
3475Returns the result of converting the extended double-precision floating-
3476point value `a' to the double-precision floating-point format.  The
3477conversion is performed according to the IEC/IEEE Standard for Binary
3478Floating-Point Arithmetic.
3479-------------------------------------------------------------------------------
3480*/
3481float64 floatx80_to_float64( floatx80 a )
3482{
3483    flag aSign;
3484    int32 aExp;
3485    bits64 aSig, zSig;
3486
3487    aSig = extractFloatx80Frac( a );
3488    aExp = extractFloatx80Exp( a );
3489    aSign = extractFloatx80Sign( a );
3490    if ( aExp == 0x7FFF ) {
3491        if ( (bits64) ( aSig<<1 ) ) {
3492            return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
3493        }
3494        return packFloat64( aSign, 0x7FF, 0 );
3495    }
3496    shift64RightJamming( aSig, 1, &zSig );
3497    if ( aExp || aSig ) aExp -= 0x3C01;
3498    return roundAndPackFloat64( aSign, aExp, zSig );
3499
3500}
3501
3502#ifdef FLOAT128
3503
3504/*
3505-------------------------------------------------------------------------------
3506Returns the result of converting the extended double-precision floating-
3507point value `a' to the quadruple-precision floating-point format.  The
3508conversion is performed according to the IEC/IEEE Standard for Binary
3509Floating-Point Arithmetic.
3510-------------------------------------------------------------------------------
3511*/
3512float128 floatx80_to_float128( floatx80 a )
3513{
3514    flag aSign;
3515    int16 aExp;
3516    bits64 aSig, zSig0, zSig1;
3517
3518    aSig = extractFloatx80Frac( a );
3519    aExp = extractFloatx80Exp( a );
3520    aSign = extractFloatx80Sign( a );
3521    if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
3522        return commonNaNToFloat128( floatx80ToCommonNaN( a ) );
3523    }
3524    shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
3525    return packFloat128( aSign, aExp, zSig0, zSig1 );
3526
3527}
3528
3529#endif
3530
3531/*
3532-------------------------------------------------------------------------------
3533Rounds the extended double-precision floating-point value `a' to an integer,
3534and returns the result as an extended quadruple-precision floating-point
3535value.  The operation is performed according to the IEC/IEEE Standard for
3536Binary Floating-Point Arithmetic.
3537-------------------------------------------------------------------------------
3538*/
3539floatx80 floatx80_round_to_int( floatx80 a )
3540{
3541    flag aSign;
3542    int32 aExp;
3543    bits64 lastBitMask, roundBitsMask;
3544    int8 roundingMode;
3545    floatx80 z;
3546
3547    aExp = extractFloatx80Exp( a );
3548    if ( 0x403E <= aExp ) {
3549        if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
3550            return propagateFloatx80NaN( a, a );
3551        }
3552        return a;
3553    }
3554    if ( aExp < 0x3FFF ) {
3555        if (    ( aExp == 0 )
3556             && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
3557            return a;
3558        }
3559        float_exception_flags |= float_flag_inexact;
3560        aSign = extractFloatx80Sign( a );
3561        switch ( float_rounding_mode ) {
3562         case float_round_nearest_even:
3563            if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
3564               ) {
3565                return
3566                    packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
3567            }
3568            break;
3569	 case float_round_to_zero:
3570	    break;
3571         case float_round_down:
3572            return
3573                  aSign ?
3574                      packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
3575                : packFloatx80( 0, 0, 0 );
3576         case float_round_up:
3577            return
3578                  aSign ? packFloatx80( 1, 0, 0 )
3579                : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
3580        }
3581        return packFloatx80( aSign, 0, 0 );
3582    }
3583    lastBitMask = 1;
3584    lastBitMask <<= 0x403E - aExp;
3585    roundBitsMask = lastBitMask - 1;
3586    z = a;
3587    roundingMode = float_rounding_mode;
3588    if ( roundingMode == float_round_nearest_even ) {
3589        z.low += lastBitMask>>1;
3590        if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
3591    }
3592    else if ( roundingMode != float_round_to_zero ) {
3593        if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
3594            z.low += roundBitsMask;
3595        }
3596    }
3597    z.low &= ~ roundBitsMask;
3598    if ( z.low == 0 ) {
3599        ++z.high;
3600        z.low = LIT64( 0x8000000000000000 );
3601    }
3602    if ( z.low != a.low ) float_exception_flags |= float_flag_inexact;
3603    return z;
3604
3605}
3606
3607/*
3608-------------------------------------------------------------------------------
3609Returns the result of adding the absolute values of the extended double-
3610precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
3611negated before being returned.  `zSign' is ignored if the result is a NaN.
3612The addition is performed according to the IEC/IEEE Standard for Binary
3613Floating-Point Arithmetic.
3614-------------------------------------------------------------------------------
3615*/
3616static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
3617{
3618    int32 aExp, bExp, zExp;
3619    bits64 aSig, bSig, zSig0, zSig1;
3620    int32 expDiff;
3621
3622    aSig = extractFloatx80Frac( a );
3623    aExp = extractFloatx80Exp( a );
3624    bSig = extractFloatx80Frac( b );
3625    bExp = extractFloatx80Exp( b );
3626    expDiff = aExp - bExp;
3627    if ( 0 < expDiff ) {
3628        if ( aExp == 0x7FFF ) {
3629            if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
3630            return a;
3631        }
3632        if ( bExp == 0 ) --expDiff;
3633        shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
3634        zExp = aExp;
3635    }
3636    else if ( expDiff < 0 ) {
3637        if ( bExp == 0x7FFF ) {
3638            if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3639            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3640        }
3641        if ( aExp == 0 ) ++expDiff;
3642        shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
3643        zExp = bExp;
3644    }
3645    else {
3646        if ( aExp == 0x7FFF ) {
3647            if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
3648                return propagateFloatx80NaN( a, b );
3649            }
3650            return a;
3651        }
3652        zSig1 = 0;
3653        zSig0 = aSig + bSig;
3654        if ( aExp == 0 ) {
3655            normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
3656            goto roundAndPack;
3657        }
3658        zExp = aExp;
3659        goto shiftRight1;
3660    }
3661    zSig0 = aSig + bSig;
3662    if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
3663 shiftRight1:
3664    shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
3665    zSig0 |= LIT64( 0x8000000000000000 );
3666    ++zExp;
3667 roundAndPack:
3668    return
3669        roundAndPackFloatx80(
3670            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
3671
3672}
3673
3674/*
3675-------------------------------------------------------------------------------
3676Returns the result of subtracting the absolute values of the extended
3677double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
3678difference is negated before being returned.  `zSign' is ignored if the
3679result is a NaN.  The subtraction is performed according to the IEC/IEEE
3680Standard for Binary Floating-Point Arithmetic.
3681-------------------------------------------------------------------------------
3682*/
3683static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
3684{
3685    int32 aExp, bExp, zExp;
3686    bits64 aSig, bSig, zSig0, zSig1;
3687    int32 expDiff;
3688    floatx80 z;
3689
3690    aSig = extractFloatx80Frac( a );
3691    aExp = extractFloatx80Exp( a );
3692    bSig = extractFloatx80Frac( b );
3693    bExp = extractFloatx80Exp( b );
3694    expDiff = aExp - bExp;
3695    if ( 0 < expDiff ) goto aExpBigger;
3696    if ( expDiff < 0 ) goto bExpBigger;
3697    if ( aExp == 0x7FFF ) {
3698        if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
3699            return propagateFloatx80NaN( a, b );
3700        }
3701        float_raise( float_flag_invalid );
3702        z.low = floatx80_default_nan_low;
3703        z.high = floatx80_default_nan_high;
3704        return z;
3705    }
3706    if ( aExp == 0 ) {
3707        aExp = 1;
3708        bExp = 1;
3709    }
3710    zSig1 = 0;
3711    if ( bSig < aSig ) goto aBigger;
3712    if ( aSig < bSig ) goto bBigger;
3713    return packFloatx80( float_rounding_mode == float_round_down, 0, 0 );
3714 bExpBigger:
3715    if ( bExp == 0x7FFF ) {
3716        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3717        return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
3718    }
3719    if ( aExp == 0 ) ++expDiff;
3720    shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
3721 bBigger:
3722    sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
3723    zExp = bExp;
3724    zSign ^= 1;
3725    goto normalizeRoundAndPack;
3726 aExpBigger:
3727    if ( aExp == 0x7FFF ) {
3728        if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
3729        return a;
3730    }
3731    if ( bExp == 0 ) --expDiff;
3732    shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
3733 aBigger:
3734    sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
3735    zExp = aExp;
3736 normalizeRoundAndPack:
3737    return
3738        normalizeRoundAndPackFloatx80(
3739            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
3740
3741}
3742
3743/*
3744-------------------------------------------------------------------------------
3745Returns the result of adding the extended double-precision floating-point
3746values `a' and `b'.  The operation is performed according to the IEC/IEEE
3747Standard for Binary Floating-Point Arithmetic.
3748-------------------------------------------------------------------------------
3749*/
3750floatx80 floatx80_add( floatx80 a, floatx80 b )
3751{
3752    flag aSign, bSign;
3753
3754    aSign = extractFloatx80Sign( a );
3755    bSign = extractFloatx80Sign( b );
3756    if ( aSign == bSign ) {
3757        return addFloatx80Sigs( a, b, aSign );
3758    }
3759    else {
3760        return subFloatx80Sigs( a, b, aSign );
3761    }
3762
3763}
3764
3765/*
3766-------------------------------------------------------------------------------
3767Returns the result of subtracting the extended double-precision floating-
3768point values `a' and `b'.  The operation is performed according to the
3769IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3770-------------------------------------------------------------------------------
3771*/
3772floatx80 floatx80_sub( floatx80 a, floatx80 b )
3773{
3774    flag aSign, bSign;
3775
3776    aSign = extractFloatx80Sign( a );
3777    bSign = extractFloatx80Sign( b );
3778    if ( aSign == bSign ) {
3779        return subFloatx80Sigs( a, b, aSign );
3780    }
3781    else {
3782        return addFloatx80Sigs( a, b, aSign );
3783    }
3784
3785}
3786
3787/*
3788-------------------------------------------------------------------------------
3789Returns the result of multiplying the extended double-precision floating-
3790point values `a' and `b'.  The operation is performed according to the
3791IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3792-------------------------------------------------------------------------------
3793*/
3794floatx80 floatx80_mul( floatx80 a, floatx80 b )
3795{
3796    flag aSign, bSign, zSign;
3797    int32 aExp, bExp, zExp;
3798    bits64 aSig, bSig, zSig0, zSig1;
3799    floatx80 z;
3800
3801    aSig = extractFloatx80Frac( a );
3802    aExp = extractFloatx80Exp( a );
3803    aSign = extractFloatx80Sign( a );
3804    bSig = extractFloatx80Frac( b );
3805    bExp = extractFloatx80Exp( b );
3806    bSign = extractFloatx80Sign( b );
3807    zSign = aSign ^ bSign;
3808    if ( aExp == 0x7FFF ) {
3809        if (    (bits64) ( aSig<<1 )
3810             || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
3811            return propagateFloatx80NaN( a, b );
3812        }
3813        if ( ( bExp | bSig ) == 0 ) goto invalid;
3814        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3815    }
3816    if ( bExp == 0x7FFF ) {
3817        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3818        if ( ( aExp | aSig ) == 0 ) {
3819 invalid:
3820            float_raise( float_flag_invalid );
3821            z.low = floatx80_default_nan_low;
3822            z.high = floatx80_default_nan_high;
3823            return z;
3824        }
3825        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3826    }
3827    if ( aExp == 0 ) {
3828        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
3829        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3830    }
3831    if ( bExp == 0 ) {
3832        if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
3833        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3834    }
3835    zExp = aExp + bExp - 0x3FFE;
3836    mul64To128( aSig, bSig, &zSig0, &zSig1 );
3837    if ( 0 < (sbits64) zSig0 ) {
3838        shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
3839        --zExp;
3840    }
3841    return
3842        roundAndPackFloatx80(
3843            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
3844
3845}
3846
3847/*
3848-------------------------------------------------------------------------------
3849Returns the result of dividing the extended double-precision floating-point
3850value `a' by the corresponding value `b'.  The operation is performed
3851according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3852-------------------------------------------------------------------------------
3853*/
3854floatx80 floatx80_div( floatx80 a, floatx80 b )
3855{
3856    flag aSign, bSign, zSign;
3857    int32 aExp, bExp, zExp;
3858    bits64 aSig, bSig, zSig0, zSig1;
3859    bits64 rem0, rem1, rem2, term0, term1, term2;
3860    floatx80 z;
3861
3862    aSig = extractFloatx80Frac( a );
3863    aExp = extractFloatx80Exp( a );
3864    aSign = extractFloatx80Sign( a );
3865    bSig = extractFloatx80Frac( b );
3866    bExp = extractFloatx80Exp( b );
3867    bSign = extractFloatx80Sign( b );
3868    zSign = aSign ^ bSign;
3869    if ( aExp == 0x7FFF ) {
3870        if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
3871        if ( bExp == 0x7FFF ) {
3872            if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3873            goto invalid;
3874        }
3875        return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3876    }
3877    if ( bExp == 0x7FFF ) {
3878        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3879        return packFloatx80( zSign, 0, 0 );
3880    }
3881    if ( bExp == 0 ) {
3882        if ( bSig == 0 ) {
3883            if ( ( aExp | aSig ) == 0 ) {
3884 invalid:
3885                float_raise( float_flag_invalid );
3886                z.low = floatx80_default_nan_low;
3887                z.high = floatx80_default_nan_high;
3888                return z;
3889            }
3890            float_raise( float_flag_divbyzero );
3891            return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3892        }
3893        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3894    }
3895    if ( aExp == 0 ) {
3896        if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
3897        normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3898    }
3899    zExp = aExp - bExp + 0x3FFE;
3900    rem1 = 0;
3901    if ( bSig <= aSig ) {
3902        shift128Right( aSig, 0, 1, &aSig, &rem1 );
3903        ++zExp;
3904    }
3905    zSig0 = estimateDiv128To64( aSig, rem1, bSig );
3906    mul64To128( bSig, zSig0, &term0, &term1 );
3907    sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
3908    while ( (sbits64) rem0 < 0 ) {
3909        --zSig0;
3910        add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
3911    }
3912    zSig1 = estimateDiv128To64( rem1, 0, bSig );
3913    if ( (bits64) ( zSig1<<1 ) <= 8 ) {
3914        mul64To128( bSig, zSig1, &term1, &term2 );
3915        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
3916        while ( (sbits64) rem1 < 0 ) {
3917            --zSig1;
3918            add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
3919        }
3920        zSig1 |= ( ( rem1 | rem2 ) != 0 );
3921    }
3922    return
3923        roundAndPackFloatx80(
3924            floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
3925
3926}
3927
3928/*
3929-------------------------------------------------------------------------------
3930Returns the remainder of the extended double-precision floating-point value
3931`a' with respect to the corresponding value `b'.  The operation is performed
3932according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3933-------------------------------------------------------------------------------
3934*/
3935floatx80 floatx80_rem( floatx80 a, floatx80 b )
3936{
3937    flag aSign, bSign, zSign;
3938    int32 aExp, bExp, expDiff;
3939    bits64 aSig0, aSig1, bSig;
3940    bits64 q, term0, term1, alternateASig0, alternateASig1;
3941    floatx80 z;
3942
3943    aSig0 = extractFloatx80Frac( a );
3944    aExp = extractFloatx80Exp( a );
3945    aSign = extractFloatx80Sign( a );
3946    bSig = extractFloatx80Frac( b );
3947    bExp = extractFloatx80Exp( b );
3948    bSign = extractFloatx80Sign( b );
3949    if ( aExp == 0x7FFF ) {
3950        if (    (bits64) ( aSig0<<1 )
3951             || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
3952            return propagateFloatx80NaN( a, b );
3953        }
3954        goto invalid;
3955    }
3956    if ( bExp == 0x7FFF ) {
3957        if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3958        return a;
3959    }
3960    if ( bExp == 0 ) {
3961        if ( bSig == 0 ) {
3962 invalid:
3963            float_raise( float_flag_invalid );
3964            z.low = floatx80_default_nan_low;
3965            z.high = floatx80_default_nan_high;
3966            return z;
3967        }
3968        normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3969    }
3970    if ( aExp == 0 ) {
3971        if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
3972        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
3973    }
3974    bSig |= LIT64( 0x8000000000000000 );
3975    zSign = aSign;
3976    expDiff = aExp - bExp;
3977    aSig1 = 0;
3978    if ( expDiff < 0 ) {
3979        if ( expDiff < -1 ) return a;
3980        shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
3981        expDiff = 0;
3982    }
3983    q = ( bSig <= aSig0 );
3984    if ( q ) aSig0 -= bSig;
3985    expDiff -= 64;
3986    while ( 0 < expDiff ) {
3987        q = estimateDiv128To64( aSig0, aSig1, bSig );
3988        q = ( 2 < q ) ? q - 2 : 0;
3989        mul64To128( bSig, q, &term0, &term1 );
3990        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3991        shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
3992        expDiff -= 62;
3993    }
3994    expDiff += 64;
3995    if ( 0 < expDiff ) {
3996        q = estimateDiv128To64( aSig0, aSig1, bSig );
3997        q = ( 2 < q ) ? q - 2 : 0;
3998        q >>= 64 - expDiff;
3999        mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
4000        sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4001        shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
4002        while ( le128( term0, term1, aSig0, aSig1 ) ) {
4003            ++q;
4004            sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4005        }
4006    }
4007    else {
4008        term1 = 0;
4009        term0 = bSig;
4010    }
4011    sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
4012    if (    lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
4013         || (    eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
4014              && ( q & 1 ) )
4015       ) {
4016        aSig0 = alternateASig0;
4017        aSig1 = alternateASig1;
4018        zSign = ! zSign;
4019    }
4020    return
4021        normalizeRoundAndPackFloatx80(
4022            80, zSign, bExp + expDiff, aSig0, aSig1 );
4023
4024}
4025
4026/*
4027-------------------------------------------------------------------------------
4028Returns the square root of the extended double-precision floating-point
4029value `a'.  The operation is performed according to the IEC/IEEE Standard
4030for Binary Floating-Point Arithmetic.
4031-------------------------------------------------------------------------------
4032*/
4033floatx80 floatx80_sqrt( floatx80 a )
4034{
4035    flag aSign;
4036    int32 aExp, zExp;
4037    bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
4038    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
4039    floatx80 z;
4040
4041    aSig0 = extractFloatx80Frac( a );
4042    aExp = extractFloatx80Exp( a );
4043    aSign = extractFloatx80Sign( a );
4044    if ( aExp == 0x7FFF ) {
4045        if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a );
4046        if ( ! aSign ) return a;
4047        goto invalid;
4048    }
4049    if ( aSign ) {
4050        if ( ( aExp | aSig0 ) == 0 ) return a;
4051 invalid:
4052        float_raise( float_flag_invalid );
4053        z.low = floatx80_default_nan_low;
4054        z.high = floatx80_default_nan_high;
4055        return z;
4056    }
4057    if ( aExp == 0 ) {
4058        if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
4059        normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
4060    }
4061    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
4062    zSig0 = estimateSqrt32( aExp, aSig0>>32 );
4063    shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
4064    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
4065    doubleZSig0 = zSig0<<1;
4066    mul64To128( zSig0, zSig0, &term0, &term1 );
4067    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
4068    while ( (sbits64) rem0 < 0 ) {
4069        --zSig0;
4070        doubleZSig0 -= 2;
4071        add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
4072    }
4073    zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
4074    if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
4075        if ( zSig1 == 0 ) zSig1 = 1;
4076        mul64To128( doubleZSig0, zSig1, &term1, &term2 );
4077        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
4078        mul64To128( zSig1, zSig1, &term2, &term3 );
4079        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
4080        while ( (sbits64) rem1 < 0 ) {
4081            --zSig1;
4082            shortShift128Left( 0, zSig1, 1, &term2, &term3 );
4083            term3 |= 1;
4084            term2 |= doubleZSig0;
4085            add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
4086        }
4087        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
4088    }
4089    shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
4090    zSig0 |= doubleZSig0;
4091    return
4092        roundAndPackFloatx80(
4093            floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
4094
4095}
4096
4097/*
4098-------------------------------------------------------------------------------
4099Returns 1 if the extended double-precision floating-point value `a' is
4100equal to the corresponding value `b', and 0 otherwise.  The comparison is
4101performed according to the IEC/IEEE Standard for Binary Floating-Point
4102Arithmetic.
4103-------------------------------------------------------------------------------
4104*/
4105flag floatx80_eq( floatx80 a, floatx80 b )
4106{
4107
4108    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4109              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4110         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4111              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4112       ) {
4113        if (    floatx80_is_signaling_nan( a )
4114             || floatx80_is_signaling_nan( b ) ) {
4115            float_raise( float_flag_invalid );
4116        }
4117        return 0;
4118    }
4119    return
4120           ( a.low == b.low )
4121        && (    ( a.high == b.high )
4122             || (    ( a.low == 0 )
4123                  && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
4124           );
4125
4126}
4127
4128/*
4129-------------------------------------------------------------------------------
4130Returns 1 if the extended double-precision floating-point value `a' is
4131less than or equal to the corresponding value `b', and 0 otherwise.  The
4132comparison is performed according to the IEC/IEEE Standard for Binary
4133Floating-Point Arithmetic.
4134-------------------------------------------------------------------------------
4135*/
4136flag floatx80_le( floatx80 a, floatx80 b )
4137{
4138    flag aSign, bSign;
4139
4140    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4141              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4142         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4143              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4144       ) {
4145        float_raise( float_flag_invalid );
4146        return 0;
4147    }
4148    aSign = extractFloatx80Sign( a );
4149    bSign = extractFloatx80Sign( b );
4150    if ( aSign != bSign ) {
4151        return
4152               aSign
4153            || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4154                 == 0 );
4155    }
4156    return
4157          aSign ? le128( b.high, b.low, a.high, a.low )
4158        : le128( a.high, a.low, b.high, b.low );
4159
4160}
4161
4162/*
4163-------------------------------------------------------------------------------
4164Returns 1 if the extended double-precision floating-point value `a' is
4165less than the corresponding value `b', and 0 otherwise.  The comparison
4166is performed according to the IEC/IEEE Standard for Binary Floating-Point
4167Arithmetic.
4168-------------------------------------------------------------------------------
4169*/
4170flag floatx80_lt( floatx80 a, floatx80 b )
4171{
4172    flag aSign, bSign;
4173
4174    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4175              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4176         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4177              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4178       ) {
4179        float_raise( float_flag_invalid );
4180        return 0;
4181    }
4182    aSign = extractFloatx80Sign( a );
4183    bSign = extractFloatx80Sign( b );
4184    if ( aSign != bSign ) {
4185        return
4186               aSign
4187            && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4188                 != 0 );
4189    }
4190    return
4191          aSign ? lt128( b.high, b.low, a.high, a.low )
4192        : lt128( a.high, a.low, b.high, b.low );
4193
4194}
4195
4196/*
4197-------------------------------------------------------------------------------
4198Returns 1 if the extended double-precision floating-point value `a' is equal
4199to the corresponding value `b', and 0 otherwise.  The invalid exception is
4200raised if either operand is a NaN.  Otherwise, the comparison is performed
4201according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4202-------------------------------------------------------------------------------
4203*/
4204flag floatx80_eq_signaling( floatx80 a, floatx80 b )
4205{
4206
4207    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4208              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4209         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4210              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4211       ) {
4212        float_raise( float_flag_invalid );
4213        return 0;
4214    }
4215    return
4216           ( a.low == b.low )
4217        && (    ( a.high == b.high )
4218             || (    ( a.low == 0 )
4219                  && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
4220           );
4221
4222}
4223
4224/*
4225-------------------------------------------------------------------------------
4226Returns 1 if the extended double-precision floating-point value `a' is less
4227than or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs
4228do not cause an exception.  Otherwise, the comparison is performed according
4229to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4230-------------------------------------------------------------------------------
4231*/
4232flag floatx80_le_quiet( floatx80 a, floatx80 b )
4233{
4234    flag aSign, bSign;
4235
4236    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4237              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4238         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4239              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4240       ) {
4241        if (    floatx80_is_signaling_nan( a )
4242             || floatx80_is_signaling_nan( b ) ) {
4243            float_raise( float_flag_invalid );
4244        }
4245        return 0;
4246    }
4247    aSign = extractFloatx80Sign( a );
4248    bSign = extractFloatx80Sign( b );
4249    if ( aSign != bSign ) {
4250        return
4251               aSign
4252            || (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4253                 == 0 );
4254    }
4255    return
4256          aSign ? le128( b.high, b.low, a.high, a.low )
4257        : le128( a.high, a.low, b.high, b.low );
4258
4259}
4260
4261/*
4262-------------------------------------------------------------------------------
4263Returns 1 if the extended double-precision floating-point value `a' is less
4264than the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause
4265an exception.  Otherwise, the comparison is performed according to the
4266IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4267-------------------------------------------------------------------------------
4268*/
4269flag floatx80_lt_quiet( floatx80 a, floatx80 b )
4270{
4271    flag aSign, bSign;
4272
4273    if (    (    ( extractFloatx80Exp( a ) == 0x7FFF )
4274              && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4275         || (    ( extractFloatx80Exp( b ) == 0x7FFF )
4276              && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4277       ) {
4278        if (    floatx80_is_signaling_nan( a )
4279             || floatx80_is_signaling_nan( b ) ) {
4280            float_raise( float_flag_invalid );
4281        }
4282        return 0;
4283    }
4284    aSign = extractFloatx80Sign( a );
4285    bSign = extractFloatx80Sign( b );
4286    if ( aSign != bSign ) {
4287        return
4288               aSign
4289            && (    ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4290                 != 0 );
4291    }
4292    return
4293          aSign ? lt128( b.high, b.low, a.high, a.low )
4294        : lt128( a.high, a.low, b.high, b.low );
4295
4296}
4297
4298#endif
4299
4300#ifdef FLOAT128
4301
4302/*
4303-------------------------------------------------------------------------------
4304Returns the result of converting the quadruple-precision floating-point
4305value `a' to the 32-bit two's complement integer format.  The conversion
4306is performed according to the IEC/IEEE Standard for Binary Floating-Point
4307Arithmetic---which means in particular that the conversion is rounded
4308according to the current rounding mode.  If `a' is a NaN, the largest
4309positive integer is returned.  Otherwise, if the conversion overflows, the
4310largest integer with the same sign as `a' is returned.
4311-------------------------------------------------------------------------------
4312*/
4313int32 float128_to_int32( float128 a )
4314{
4315    flag aSign;
4316    int32 aExp, shiftCount;
4317    bits64 aSig0, aSig1;
4318
4319    aSig1 = extractFloat128Frac1( a );
4320    aSig0 = extractFloat128Frac0( a );
4321    aExp = extractFloat128Exp( a );
4322    aSign = extractFloat128Sign( a );
4323    if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
4324    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4325    aSig0 |= ( aSig1 != 0 );
4326    shiftCount = 0x4028 - aExp;
4327    if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
4328    return roundAndPackInt32( aSign, aSig0 );
4329
4330}
4331
4332/*
4333-------------------------------------------------------------------------------
4334Returns the result of converting the quadruple-precision floating-point
4335value `a' to the 32-bit two's complement integer format.  The conversion
4336is performed according to the IEC/IEEE Standard for Binary Floating-Point
4337Arithmetic, except that the conversion is always rounded toward zero.  If
4338`a' is a NaN, the largest positive integer is returned.  Otherwise, if the
4339conversion overflows, the largest integer with the same sign as `a' is
4340returned.
4341-------------------------------------------------------------------------------
4342*/
4343int32 float128_to_int32_round_to_zero( float128 a )
4344{
4345    flag aSign;
4346    int32 aExp, shiftCount;
4347    bits64 aSig0, aSig1, savedASig;
4348    int32 z;
4349
4350    aSig1 = extractFloat128Frac1( a );
4351    aSig0 = extractFloat128Frac0( a );
4352    aExp = extractFloat128Exp( a );
4353    aSign = extractFloat128Sign( a );
4354    aSig0 |= ( aSig1 != 0 );
4355    if ( 0x401E < aExp ) {
4356        if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
4357        goto invalid;
4358    }
4359    else if ( aExp < 0x3FFF ) {
4360        if ( aExp || aSig0 ) float_exception_flags |= float_flag_inexact;
4361        return 0;
4362    }
4363    aSig0 |= LIT64( 0x0001000000000000 );
4364    shiftCount = 0x402F - aExp;
4365    savedASig = aSig0;
4366    aSig0 >>= shiftCount;
4367    z = aSig0;
4368    if ( aSign ) z = - z;
4369    if ( ( z < 0 ) ^ aSign ) {
4370 invalid:
4371        float_raise( float_flag_invalid );
4372        return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
4373    }
4374    if ( ( aSig0<<shiftCount ) != savedASig ) {
4375        float_exception_flags |= float_flag_inexact;
4376    }
4377    return z;
4378
4379}
4380
4381/*
4382-------------------------------------------------------------------------------
4383Returns the result of converting the quadruple-precision floating-point
4384value `a' to the 64-bit two's complement integer format.  The conversion
4385is performed according to the IEC/IEEE Standard for Binary Floating-Point
4386Arithmetic---which means in particular that the conversion is rounded
4387according to the current rounding mode.  If `a' is a NaN, the largest
4388positive integer is returned.  Otherwise, if the conversion overflows, the
4389largest integer with the same sign as `a' is returned.
4390-------------------------------------------------------------------------------
4391*/
4392int64 float128_to_int64( float128 a )
4393{
4394    flag aSign;
4395    int32 aExp, shiftCount;
4396    bits64 aSig0, aSig1;
4397
4398    aSig1 = extractFloat128Frac1( a );
4399    aSig0 = extractFloat128Frac0( a );
4400    aExp = extractFloat128Exp( a );
4401    aSign = extractFloat128Sign( a );
4402    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4403    shiftCount = 0x402F - aExp;
4404    if ( shiftCount <= 0 ) {
4405        if ( 0x403E < aExp ) {
4406            float_raise( float_flag_invalid );
4407            if (    ! aSign
4408                 || (    ( aExp == 0x7FFF )
4409                      && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
4410                    )
4411               ) {
4412                return LIT64( 0x7FFFFFFFFFFFFFFF );
4413            }
4414            return (sbits64) LIT64( 0x8000000000000000 );
4415        }
4416        shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
4417    }
4418    else {
4419        shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
4420    }
4421    return roundAndPackInt64( aSign, aSig0, aSig1 );
4422
4423}
4424
4425/*
4426-------------------------------------------------------------------------------
4427Returns the result of converting the quadruple-precision floating-point
4428value `a' to the 64-bit two's complement integer format.  The conversion
4429is performed according to the IEC/IEEE Standard for Binary Floating-Point
4430Arithmetic, except that the conversion is always rounded toward zero.
4431If `a' is a NaN, the largest positive integer is returned.  Otherwise, if
4432the conversion overflows, the largest integer with the same sign as `a' is
4433returned.
4434-------------------------------------------------------------------------------
4435*/
4436int64 float128_to_int64_round_to_zero( float128 a )
4437{
4438    flag aSign;
4439    int32 aExp, shiftCount;
4440    bits64 aSig0, aSig1;
4441    int64 z;
4442
4443    aSig1 = extractFloat128Frac1( a );
4444    aSig0 = extractFloat128Frac0( a );
4445    aExp = extractFloat128Exp( a );
4446    aSign = extractFloat128Sign( a );
4447    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4448    shiftCount = aExp - 0x402F;
4449    if ( 0 < shiftCount ) {
4450        if ( 0x403E <= aExp ) {
4451            aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
4452            if (    ( a.high == LIT64( 0xC03E000000000000 ) )
4453                 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
4454                if ( aSig1 ) float_exception_flags |= float_flag_inexact;
4455            }
4456            else {
4457                float_raise( float_flag_invalid );
4458                if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
4459                    return LIT64( 0x7FFFFFFFFFFFFFFF );
4460                }
4461            }
4462            return (sbits64) LIT64( 0x8000000000000000 );
4463        }
4464        z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
4465        if ( (bits64) ( aSig1<<shiftCount ) ) {
4466            float_exception_flags |= float_flag_inexact;
4467        }
4468    }
4469    else {
4470        if ( aExp < 0x3FFF ) {
4471            if ( aExp | aSig0 | aSig1 ) {
4472                float_exception_flags |= float_flag_inexact;
4473            }
4474            return 0;
4475        }
4476        z = aSig0>>( - shiftCount );
4477        if (    aSig1
4478             || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
4479            float_exception_flags |= float_flag_inexact;
4480        }
4481    }
4482    if ( aSign ) z = - z;
4483    return z;
4484
4485}
4486
4487#if (defined(SOFTFLOATSPARC64_FOR_GCC) || defined(SOFTFLOAT_FOR_GCC)) \
4488    && defined(SOFTFLOAT_NEED_FIXUNS)
4489/*
4490 * just like above - but do not care for overflow of signed results
4491 */
4492uint64 float128_to_uint64_round_to_zero( float128 a )
4493{
4494    flag aSign;
4495    int32 aExp, shiftCount;
4496    bits64 aSig0, aSig1;
4497    uint64 z;
4498
4499    aSig1 = extractFloat128Frac1( a );
4500    aSig0 = extractFloat128Frac0( a );
4501    aExp = extractFloat128Exp( a );
4502    aSign = extractFloat128Sign( a );
4503    if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4504    shiftCount = aExp - 0x402F;
4505    if ( 0 < shiftCount ) {
4506        if ( 0x403F <= aExp ) {
4507            aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
4508            if (    ( a.high == LIT64( 0xC03E000000000000 ) )
4509                 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
4510                if ( aSig1 ) float_exception_flags |= float_flag_inexact;
4511            }
4512            else {
4513                float_raise( float_flag_invalid );
4514            }
4515            return LIT64( 0xFFFFFFFFFFFFFFFF );
4516        }
4517        z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
4518        if ( (bits64) ( aSig1<<shiftCount ) ) {
4519            float_exception_flags |= float_flag_inexact;
4520        }
4521    }
4522    else {
4523        if ( aExp < 0x3FFF ) {
4524            if ( aExp | aSig0 | aSig1 ) {
4525                float_exception_flags |= float_flag_inexact;
4526            }
4527            return 0;
4528        }
4529        z = aSig0>>( - shiftCount );
4530        if (aSig1 || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
4531            float_exception_flags |= float_flag_inexact;
4532        }
4533    }
4534    if ( aSign ) z = - z;
4535    return z;
4536
4537}
4538#endif /* (SOFTFLOATSPARC64_FOR_GCC || SOFTFLOAT_FOR_GCC) && SOFTFLOAT_NEED_FIXUNS */
4539
4540/*
4541-------------------------------------------------------------------------------
4542Returns the result of converting the quadruple-precision floating-point
4543value `a' to the single-precision floating-point format.  The conversion
4544is performed according to the IEC/IEEE Standard for Binary Floating-Point
4545Arithmetic.
4546-------------------------------------------------------------------------------
4547*/
4548float32 float128_to_float32( float128 a )
4549{
4550    flag aSign;
4551    int32 aExp;
4552    bits64 aSig0, aSig1;
4553    bits32 zSig;
4554
4555    aSig1 = extractFloat128Frac1( a );
4556    aSig0 = extractFloat128Frac0( a );
4557    aExp = extractFloat128Exp( a );
4558    aSign = extractFloat128Sign( a );
4559    if ( aExp == 0x7FFF ) {
4560        if ( aSig0 | aSig1 ) {
4561            return commonNaNToFloat32( float128ToCommonNaN( a ) );
4562        }
4563        return packFloat32( aSign, 0xFF, 0 );
4564    }
4565    aSig0 |= ( aSig1 != 0 );
4566    shift64RightJamming( aSig0, 18, &aSig0 );
4567    zSig = aSig0;
4568    if ( aExp || zSig ) {
4569        zSig |= 0x40000000;
4570        aExp -= 0x3F81;
4571    }
4572    return roundAndPackFloat32( aSign, aExp, zSig );
4573
4574}
4575
4576/*
4577-------------------------------------------------------------------------------
4578Returns the result of converting the quadruple-precision floating-point
4579value `a' to the double-precision floating-point format.  The conversion
4580is performed according to the IEC/IEEE Standard for Binary Floating-Point
4581Arithmetic.
4582-------------------------------------------------------------------------------
4583*/
4584float64 float128_to_float64( float128 a )
4585{
4586    flag aSign;
4587    int32 aExp;
4588    bits64 aSig0, aSig1;
4589
4590    aSig1 = extractFloat128Frac1( a );
4591    aSig0 = extractFloat128Frac0( a );
4592    aExp = extractFloat128Exp( a );
4593    aSign = extractFloat128Sign( a );
4594    if ( aExp == 0x7FFF ) {
4595        if ( aSig0 | aSig1 ) {
4596            return commonNaNToFloat64( float128ToCommonNaN( a ) );
4597        }
4598        return packFloat64( aSign, 0x7FF, 0 );
4599    }
4600    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
4601    aSig0 |= ( aSig1 != 0 );
4602    if ( aExp || aSig0 ) {
4603        aSig0 |= LIT64( 0x4000000000000000 );
4604        aExp -= 0x3C01;
4605    }
4606    return roundAndPackFloat64( aSign, aExp, aSig0 );
4607
4608}
4609
4610#ifdef FLOATX80
4611
4612/*
4613-------------------------------------------------------------------------------
4614Returns the result of converting the quadruple-precision floating-point
4615value `a' to the extended double-precision floating-point format.  The
4616conversion is performed according to the IEC/IEEE Standard for Binary
4617Floating-Point Arithmetic.
4618-------------------------------------------------------------------------------
4619*/
4620floatx80 float128_to_floatx80( float128 a )
4621{
4622    flag aSign;
4623    int32 aExp;
4624    bits64 aSig0, aSig1;
4625
4626    aSig1 = extractFloat128Frac1( a );
4627    aSig0 = extractFloat128Frac0( a );
4628    aExp = extractFloat128Exp( a );
4629    aSign = extractFloat128Sign( a );
4630    if ( aExp == 0x7FFF ) {
4631        if ( aSig0 | aSig1 ) {
4632            return commonNaNToFloatx80( float128ToCommonNaN( a ) );
4633        }
4634        return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4635    }
4636    if ( aExp == 0 ) {
4637        if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
4638        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
4639    }
4640    else {
4641        aSig0 |= LIT64( 0x0001000000000000 );
4642    }
4643    shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
4644    return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 );
4645
4646}
4647
4648#endif
4649
4650/*
4651-------------------------------------------------------------------------------
4652Rounds the quadruple-precision floating-point value `a' to an integer, and
4653returns the result as a quadruple-precision floating-point value.  The
4654operation is performed according to the IEC/IEEE Standard for Binary
4655Floating-Point Arithmetic.
4656-------------------------------------------------------------------------------
4657*/
4658float128 float128_round_to_int( float128 a )
4659{
4660    flag aSign;
4661    int32 aExp;
4662    bits64 lastBitMask, roundBitsMask;
4663    int8 roundingMode;
4664    float128 z;
4665
4666    aExp = extractFloat128Exp( a );
4667    if ( 0x402F <= aExp ) {
4668        if ( 0x406F <= aExp ) {
4669            if (    ( aExp == 0x7FFF )
4670                 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
4671               ) {
4672                return propagateFloat128NaN( a, a );
4673            }
4674            return a;
4675        }
4676        lastBitMask = 1;
4677        lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
4678        roundBitsMask = lastBitMask - 1;
4679        z = a;
4680        roundingMode = float_rounding_mode;
4681        if ( roundingMode == float_round_nearest_even ) {
4682            if ( lastBitMask ) {
4683                add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
4684                if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
4685            }
4686            else {
4687                if ( (sbits64) z.low < 0 ) {
4688                    ++z.high;
4689                    if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1;
4690                }
4691            }
4692        }
4693        else if ( roundingMode != float_round_to_zero ) {
4694            if (   extractFloat128Sign( z )
4695                 ^ ( roundingMode == float_round_up ) ) {
4696                add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
4697            }
4698        }
4699        z.low &= ~ roundBitsMask;
4700    }
4701    else {
4702        if ( aExp < 0x3FFF ) {
4703            if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
4704            float_exception_flags |= float_flag_inexact;
4705            aSign = extractFloat128Sign( a );
4706            switch ( float_rounding_mode ) {
4707             case float_round_nearest_even:
4708                if (    ( aExp == 0x3FFE )
4709                     && (   extractFloat128Frac0( a )
4710                          | extractFloat128Frac1( a ) )
4711                   ) {
4712                    return packFloat128( aSign, 0x3FFF, 0, 0 );
4713                }
4714                break;
4715	     case float_round_to_zero:
4716		break;
4717             case float_round_down:
4718                return
4719                      aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
4720                    : packFloat128( 0, 0, 0, 0 );
4721             case float_round_up:
4722                return
4723                      aSign ? packFloat128( 1, 0, 0, 0 )
4724                    : packFloat128( 0, 0x3FFF, 0, 0 );
4725            }
4726            return packFloat128( aSign, 0, 0, 0 );
4727        }
4728        lastBitMask = 1;
4729        lastBitMask <<= 0x402F - aExp;
4730        roundBitsMask = lastBitMask - 1;
4731        z.low = 0;
4732        z.high = a.high;
4733        roundingMode = float_rounding_mode;
4734        if ( roundingMode == float_round_nearest_even ) {
4735            z.high += lastBitMask>>1;
4736            if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
4737                z.high &= ~ lastBitMask;
4738            }
4739        }
4740        else if ( roundingMode != float_round_to_zero ) {
4741            if (   extractFloat128Sign( z )
4742                 ^ ( roundingMode == float_round_up ) ) {
4743                z.high |= ( a.low != 0 );
4744                z.high += roundBitsMask;
4745            }
4746        }
4747        z.high &= ~ roundBitsMask;
4748    }
4749    if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
4750        float_exception_flags |= float_flag_inexact;
4751    }
4752    return z;
4753
4754}
4755
4756/*
4757-------------------------------------------------------------------------------
4758Returns the result of adding the absolute values of the quadruple-precision
4759floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
4760before being returned.  `zSign' is ignored if the result is a NaN.
4761The addition is performed according to the IEC/IEEE Standard for Binary
4762Floating-Point Arithmetic.
4763-------------------------------------------------------------------------------
4764*/
4765static float128 addFloat128Sigs( float128 a, float128 b, flag zSign )
4766{
4767    int32 aExp, bExp, zExp;
4768    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
4769    int32 expDiff;
4770
4771    aSig1 = extractFloat128Frac1( a );
4772    aSig0 = extractFloat128Frac0( a );
4773    aExp = extractFloat128Exp( a );
4774    bSig1 = extractFloat128Frac1( b );
4775    bSig0 = extractFloat128Frac0( b );
4776    bExp = extractFloat128Exp( b );
4777    expDiff = aExp - bExp;
4778    if ( 0 < expDiff ) {
4779        if ( aExp == 0x7FFF ) {
4780            if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
4781            return a;
4782        }
4783        if ( bExp == 0 ) {
4784            --expDiff;
4785        }
4786        else {
4787            bSig0 |= LIT64( 0x0001000000000000 );
4788        }
4789        shift128ExtraRightJamming(
4790            bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
4791        zExp = aExp;
4792    }
4793    else if ( expDiff < 0 ) {
4794        if ( bExp == 0x7FFF ) {
4795            if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
4796            return packFloat128( zSign, 0x7FFF, 0, 0 );
4797        }
4798        if ( aExp == 0 ) {
4799            ++expDiff;
4800        }
4801        else {
4802            aSig0 |= LIT64( 0x0001000000000000 );
4803        }
4804        shift128ExtraRightJamming(
4805            aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
4806        zExp = bExp;
4807    }
4808    else {
4809        if ( aExp == 0x7FFF ) {
4810            if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
4811                return propagateFloat128NaN( a, b );
4812            }
4813            return a;
4814        }
4815        add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4816        if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 );
4817        zSig2 = 0;
4818        zSig0 |= LIT64( 0x0002000000000000 );
4819        zExp = aExp;
4820        goto shiftRight1;
4821    }
4822    aSig0 |= LIT64( 0x0001000000000000 );
4823    add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4824    --zExp;
4825    if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
4826    ++zExp;
4827 shiftRight1:
4828    shift128ExtraRightJamming(
4829        zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
4830 roundAndPack:
4831    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
4832
4833}
4834
4835/*
4836-------------------------------------------------------------------------------
4837Returns the result of subtracting the absolute values of the quadruple-
4838precision floating-point values `a' and `b'.  If `zSign' is 1, the
4839difference is negated before being returned.  `zSign' is ignored if the
4840result is a NaN.  The subtraction is performed according to the IEC/IEEE
4841Standard for Binary Floating-Point Arithmetic.
4842-------------------------------------------------------------------------------
4843*/
4844static float128 subFloat128Sigs( float128 a, float128 b, flag zSign )
4845{
4846    int32 aExp, bExp, zExp;
4847    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
4848    int32 expDiff;
4849    float128 z;
4850
4851    aSig1 = extractFloat128Frac1( a );
4852    aSig0 = extractFloat128Frac0( a );
4853    aExp = extractFloat128Exp( a );
4854    bSig1 = extractFloat128Frac1( b );
4855    bSig0 = extractFloat128Frac0( b );
4856    bExp = extractFloat128Exp( b );
4857    expDiff = aExp - bExp;
4858    shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
4859    shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
4860    if ( 0 < expDiff ) goto aExpBigger;
4861    if ( expDiff < 0 ) goto bExpBigger;
4862    if ( aExp == 0x7FFF ) {
4863        if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
4864            return propagateFloat128NaN( a, b );
4865        }
4866        float_raise( float_flag_invalid );
4867        z.low = float128_default_nan_low;
4868        z.high = float128_default_nan_high;
4869        return z;
4870    }
4871    if ( aExp == 0 ) {
4872        aExp = 1;
4873        bExp = 1;
4874    }
4875    if ( bSig0 < aSig0 ) goto aBigger;
4876    if ( aSig0 < bSig0 ) goto bBigger;
4877    if ( bSig1 < aSig1 ) goto aBigger;
4878    if ( aSig1 < bSig1 ) goto bBigger;
4879    return packFloat128( float_rounding_mode == float_round_down, 0, 0, 0 );
4880 bExpBigger:
4881    if ( bExp == 0x7FFF ) {
4882        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
4883        return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
4884    }
4885    if ( aExp == 0 ) {
4886        ++expDiff;
4887    }
4888    else {
4889        aSig0 |= LIT64( 0x4000000000000000 );
4890    }
4891    shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
4892    bSig0 |= LIT64( 0x4000000000000000 );
4893 bBigger:
4894    sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
4895    zExp = bExp;
4896    zSign ^= 1;
4897    goto normalizeRoundAndPack;
4898 aExpBigger:
4899    if ( aExp == 0x7FFF ) {
4900        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
4901        return a;
4902    }
4903    if ( bExp == 0 ) {
4904        --expDiff;
4905    }
4906    else {
4907        bSig0 |= LIT64( 0x4000000000000000 );
4908    }
4909    shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
4910    aSig0 |= LIT64( 0x4000000000000000 );
4911 aBigger:
4912    sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4913    zExp = aExp;
4914 normalizeRoundAndPack:
4915    --zExp;
4916    return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 );
4917
4918}
4919
4920/*
4921-------------------------------------------------------------------------------
4922Returns the result of adding the quadruple-precision floating-point values
4923`a' and `b'.  The operation is performed according to the IEC/IEEE Standard
4924for Binary Floating-Point Arithmetic.
4925-------------------------------------------------------------------------------
4926*/
4927float128 float128_add( float128 a, float128 b )
4928{
4929    flag aSign, bSign;
4930
4931    aSign = extractFloat128Sign( a );
4932    bSign = extractFloat128Sign( b );
4933    if ( aSign == bSign ) {
4934        return addFloat128Sigs( a, b, aSign );
4935    }
4936    else {
4937        return subFloat128Sigs( a, b, aSign );
4938    }
4939
4940}
4941
4942/*
4943-------------------------------------------------------------------------------
4944Returns the result of subtracting the quadruple-precision floating-point
4945values `a' and `b'.  The operation is performed according to the IEC/IEEE
4946Standard for Binary Floating-Point Arithmetic.
4947-------------------------------------------------------------------------------
4948*/
4949float128 float128_sub( float128 a, float128 b )
4950{
4951    flag aSign, bSign;
4952
4953    aSign = extractFloat128Sign( a );
4954    bSign = extractFloat128Sign( b );
4955    if ( aSign == bSign ) {
4956        return subFloat128Sigs( a, b, aSign );
4957    }
4958    else {
4959        return addFloat128Sigs( a, b, aSign );
4960    }
4961
4962}
4963
4964/*
4965-------------------------------------------------------------------------------
4966Returns the result of multiplying the quadruple-precision floating-point
4967values `a' and `b'.  The operation is performed according to the IEC/IEEE
4968Standard for Binary Floating-Point Arithmetic.
4969-------------------------------------------------------------------------------
4970*/
4971float128 float128_mul( float128 a, float128 b )
4972{
4973    flag aSign, bSign, zSign;
4974    int32 aExp, bExp, zExp;
4975    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
4976    float128 z;
4977
4978    aSig1 = extractFloat128Frac1( a );
4979    aSig0 = extractFloat128Frac0( a );
4980    aExp = extractFloat128Exp( a );
4981    aSign = extractFloat128Sign( a );
4982    bSig1 = extractFloat128Frac1( b );
4983    bSig0 = extractFloat128Frac0( b );
4984    bExp = extractFloat128Exp( b );
4985    bSign = extractFloat128Sign( b );
4986    zSign = aSign ^ bSign;
4987    if ( aExp == 0x7FFF ) {
4988        if (    ( aSig0 | aSig1 )
4989             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
4990            return propagateFloat128NaN( a, b );
4991        }
4992        if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid;
4993        return packFloat128( zSign, 0x7FFF, 0, 0 );
4994    }
4995    if ( bExp == 0x7FFF ) {
4996        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
4997        if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
4998 invalid:
4999            float_raise( float_flag_invalid );
5000            z.low = float128_default_nan_low;
5001            z.high = float128_default_nan_high;
5002            return z;
5003        }
5004        return packFloat128( zSign, 0x7FFF, 0, 0 );
5005    }
5006    if ( aExp == 0 ) {
5007        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5008        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5009    }
5010    if ( bExp == 0 ) {
5011        if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5012        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5013    }
5014    zExp = aExp + bExp - 0x4000;
5015    aSig0 |= LIT64( 0x0001000000000000 );
5016    shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 );
5017    mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
5018    add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
5019    zSig2 |= ( zSig3 != 0 );
5020    if ( LIT64( 0x0002000000000000 ) <= zSig0 ) {
5021        shift128ExtraRightJamming(
5022            zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
5023        ++zExp;
5024    }
5025    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
5026
5027}
5028
5029/*
5030-------------------------------------------------------------------------------
5031Returns the result of dividing the quadruple-precision floating-point value
5032`a' by the corresponding value `b'.  The operation is performed according to
5033the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5034-------------------------------------------------------------------------------
5035*/
5036float128 float128_div( float128 a, float128 b )
5037{
5038    flag aSign, bSign, zSign;
5039    int32 aExp, bExp, zExp;
5040    bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
5041    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
5042    float128 z;
5043
5044    aSig1 = extractFloat128Frac1( a );
5045    aSig0 = extractFloat128Frac0( a );
5046    aExp = extractFloat128Exp( a );
5047    aSign = extractFloat128Sign( a );
5048    bSig1 = extractFloat128Frac1( b );
5049    bSig0 = extractFloat128Frac0( b );
5050    bExp = extractFloat128Exp( b );
5051    bSign = extractFloat128Sign( b );
5052    zSign = aSign ^ bSign;
5053    if ( aExp == 0x7FFF ) {
5054        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
5055        if ( bExp == 0x7FFF ) {
5056            if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
5057            goto invalid;
5058        }
5059        return packFloat128( zSign, 0x7FFF, 0, 0 );
5060    }
5061    if ( bExp == 0x7FFF ) {
5062        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
5063        return packFloat128( zSign, 0, 0, 0 );
5064    }
5065    if ( bExp == 0 ) {
5066        if ( ( bSig0 | bSig1 ) == 0 ) {
5067            if ( ( aExp | aSig0 | aSig1 ) == 0 ) {
5068 invalid:
5069                float_raise( float_flag_invalid );
5070                z.low = float128_default_nan_low;
5071                z.high = float128_default_nan_high;
5072                return z;
5073            }
5074            float_raise( float_flag_divbyzero );
5075            return packFloat128( zSign, 0x7FFF, 0, 0 );
5076        }
5077        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5078    }
5079    if ( aExp == 0 ) {
5080        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 );
5081        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5082    }
5083    zExp = aExp - bExp + 0x3FFD;
5084    shortShift128Left(
5085        aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 );
5086    shortShift128Left(
5087        bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
5088    if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
5089        shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 );
5090        ++zExp;
5091    }
5092    zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
5093    mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
5094    sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 );
5095    while ( (sbits64) rem0 < 0 ) {
5096        --zSig0;
5097        add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 );
5098    }
5099    zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
5100    if ( ( zSig1 & 0x3FFF ) <= 4 ) {
5101        mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
5102        sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 );
5103        while ( (sbits64) rem1 < 0 ) {
5104            --zSig1;
5105            add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 );
5106        }
5107        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5108    }
5109    shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 );
5110    return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
5111
5112}
5113
5114/*
5115-------------------------------------------------------------------------------
5116Returns the remainder of the quadruple-precision floating-point value `a'
5117with respect to the corresponding value `b'.  The operation is performed
5118according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5119-------------------------------------------------------------------------------
5120*/
5121float128 float128_rem( float128 a, float128 b )
5122{
5123    flag aSign, bSign, zSign;
5124    int32 aExp, bExp, expDiff;
5125    bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
5126    bits64 allZero, alternateASig0, alternateASig1, sigMean1;
5127    sbits64 sigMean0;
5128    float128 z;
5129
5130    aSig1 = extractFloat128Frac1( a );
5131    aSig0 = extractFloat128Frac0( a );
5132    aExp = extractFloat128Exp( a );
5133    aSign = extractFloat128Sign( a );
5134    bSig1 = extractFloat128Frac1( b );
5135    bSig0 = extractFloat128Frac0( b );
5136    bExp = extractFloat128Exp( b );
5137    bSign = extractFloat128Sign( b );
5138    if ( aExp == 0x7FFF ) {
5139        if (    ( aSig0 | aSig1 )
5140             || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) {
5141            return propagateFloat128NaN( a, b );
5142        }
5143        goto invalid;
5144    }
5145    if ( bExp == 0x7FFF ) {
5146        if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
5147        return a;
5148    }
5149    if ( bExp == 0 ) {
5150        if ( ( bSig0 | bSig1 ) == 0 ) {
5151 invalid:
5152            float_raise( float_flag_invalid );
5153            z.low = float128_default_nan_low;
5154            z.high = float128_default_nan_high;
5155            return z;
5156        }
5157        normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
5158    }
5159    if ( aExp == 0 ) {
5160        if ( ( aSig0 | aSig1 ) == 0 ) return a;
5161        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5162    }
5163    expDiff = aExp - bExp;
5164    if ( expDiff < -1 ) return a;
5165    shortShift128Left(
5166        aSig0 | LIT64( 0x0001000000000000 ),
5167        aSig1,
5168        15 - ( expDiff < 0 ),
5169        &aSig0,
5170        &aSig1
5171    );
5172    shortShift128Left(
5173        bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 );
5174    q = le128( bSig0, bSig1, aSig0, aSig1 );
5175    if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
5176    expDiff -= 64;
5177    while ( 0 < expDiff ) {
5178        q = estimateDiv128To64( aSig0, aSig1, bSig0 );
5179        q = ( 4 < q ) ? q - 4 : 0;
5180        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
5181        shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero );
5182        shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero );
5183        sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 );
5184        expDiff -= 61;
5185    }
5186    if ( -64 < expDiff ) {
5187        q = estimateDiv128To64( aSig0, aSig1, bSig0 );
5188        q = ( 4 < q ) ? q - 4 : 0;
5189        q >>= - expDiff;
5190        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
5191        expDiff += 52;
5192        if ( expDiff < 0 ) {
5193            shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
5194        }
5195        else {
5196            shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
5197        }
5198        mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
5199        sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
5200    }
5201    else {
5202        shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 );
5203        shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 );
5204    }
5205    do {
5206        alternateASig0 = aSig0;
5207        alternateASig1 = aSig1;
5208        ++q;
5209        sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
5210    } while ( 0 <= (sbits64) aSig0 );
5211    add128(
5212        aSig0, aSig1, alternateASig0, alternateASig1, (bits64 *)&sigMean0, &sigMean1 );
5213    if (    ( sigMean0 < 0 )
5214         || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) {
5215        aSig0 = alternateASig0;
5216        aSig1 = alternateASig1;
5217    }
5218    zSign = ( (sbits64) aSig0 < 0 );
5219    if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 );
5220    return
5221        normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 );
5222
5223}
5224
5225/*
5226-------------------------------------------------------------------------------
5227Returns the square root of the quadruple-precision floating-point value `a'.
5228The operation is performed according to the IEC/IEEE Standard for Binary
5229Floating-Point Arithmetic.
5230-------------------------------------------------------------------------------
5231*/
5232float128 float128_sqrt( float128 a )
5233{
5234    flag aSign;
5235    int32 aExp, zExp;
5236    bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
5237    bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
5238    float128 z;
5239
5240    aSig1 = extractFloat128Frac1( a );
5241    aSig0 = extractFloat128Frac0( a );
5242    aExp = extractFloat128Exp( a );
5243    aSign = extractFloat128Sign( a );
5244    if ( aExp == 0x7FFF ) {
5245        if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a );
5246        if ( ! aSign ) return a;
5247        goto invalid;
5248    }
5249    if ( aSign ) {
5250        if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a;
5251 invalid:
5252        float_raise( float_flag_invalid );
5253        z.low = float128_default_nan_low;
5254        z.high = float128_default_nan_high;
5255        return z;
5256    }
5257    if ( aExp == 0 ) {
5258        if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 );
5259        normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
5260    }
5261    zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE;
5262    aSig0 |= LIT64( 0x0001000000000000 );
5263    zSig0 = estimateSqrt32( aExp, aSig0>>17 );
5264    shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 );
5265    zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
5266    doubleZSig0 = zSig0<<1;
5267    mul64To128( zSig0, zSig0, &term0, &term1 );
5268    sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
5269    while ( (sbits64) rem0 < 0 ) {
5270        --zSig0;
5271        doubleZSig0 -= 2;
5272        add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
5273    }
5274    zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
5275    if ( ( zSig1 & 0x1FFF ) <= 5 ) {
5276        if ( zSig1 == 0 ) zSig1 = 1;
5277        mul64To128( doubleZSig0, zSig1, &term1, &term2 );
5278        sub128( rem1, 0, term1, term2, &rem1, &rem2 );
5279        mul64To128( zSig1, zSig1, &term2, &term3 );
5280        sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
5281        while ( (sbits64) rem1 < 0 ) {
5282            --zSig1;
5283            shortShift128Left( 0, zSig1, 1, &term2, &term3 );
5284            term3 |= 1;
5285            term2 |= doubleZSig0;
5286            add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
5287        }
5288        zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
5289    }
5290    shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 );
5291    return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 );
5292
5293}
5294
5295/*
5296-------------------------------------------------------------------------------
5297Returns 1 if the quadruple-precision floating-point value `a' is equal to
5298the corresponding value `b', and 0 otherwise.  The comparison is performed
5299according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5300-------------------------------------------------------------------------------
5301*/
5302flag float128_eq( float128 a, float128 b )
5303{
5304
5305    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5306              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5307         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5308              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5309       ) {
5310        if (    float128_is_signaling_nan( a )
5311             || float128_is_signaling_nan( b ) ) {
5312            float_raise( float_flag_invalid );
5313        }
5314        return 0;
5315    }
5316    return
5317           ( a.low == b.low )
5318        && (    ( a.high == b.high )
5319             || (    ( a.low == 0 )
5320                  && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
5321           );
5322
5323}
5324
5325/*
5326-------------------------------------------------------------------------------
5327Returns 1 if the quadruple-precision floating-point value `a' is less than
5328or equal to the corresponding value `b', and 0 otherwise.  The comparison
5329is performed according to the IEC/IEEE Standard for Binary Floating-Point
5330Arithmetic.
5331-------------------------------------------------------------------------------
5332*/
5333flag float128_le( float128 a, float128 b )
5334{
5335    flag aSign, bSign;
5336
5337    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5338              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5339         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5340              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5341       ) {
5342        float_raise( float_flag_invalid );
5343        return 0;
5344    }
5345    aSign = extractFloat128Sign( a );
5346    bSign = extractFloat128Sign( b );
5347    if ( aSign != bSign ) {
5348        return
5349               aSign
5350            || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5351                 == 0 );
5352    }
5353    return
5354          aSign ? le128( b.high, b.low, a.high, a.low )
5355        : le128( a.high, a.low, b.high, b.low );
5356
5357}
5358
5359/*
5360-------------------------------------------------------------------------------
5361Returns 1 if the quadruple-precision floating-point value `a' is less than
5362the corresponding value `b', and 0 otherwise.  The comparison is performed
5363according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5364-------------------------------------------------------------------------------
5365*/
5366flag float128_lt( float128 a, float128 b )
5367{
5368    flag aSign, bSign;
5369
5370    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5371              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5372         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5373              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5374       ) {
5375        float_raise( float_flag_invalid );
5376        return 0;
5377    }
5378    aSign = extractFloat128Sign( a );
5379    bSign = extractFloat128Sign( b );
5380    if ( aSign != bSign ) {
5381        return
5382               aSign
5383            && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5384                 != 0 );
5385    }
5386    return
5387          aSign ? lt128( b.high, b.low, a.high, a.low )
5388        : lt128( a.high, a.low, b.high, b.low );
5389
5390}
5391
5392/*
5393-------------------------------------------------------------------------------
5394Returns 1 if the quadruple-precision floating-point value `a' is equal to
5395the corresponding value `b', and 0 otherwise.  The invalid exception is
5396raised if either operand is a NaN.  Otherwise, the comparison is performed
5397according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5398-------------------------------------------------------------------------------
5399*/
5400flag float128_eq_signaling( float128 a, float128 b )
5401{
5402
5403    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5404              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5405         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5406              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5407       ) {
5408        float_raise( float_flag_invalid );
5409        return 0;
5410    }
5411    return
5412           ( a.low == b.low )
5413        && (    ( a.high == b.high )
5414             || (    ( a.low == 0 )
5415                  && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) )
5416           );
5417
5418}
5419
5420/*
5421-------------------------------------------------------------------------------
5422Returns 1 if the quadruple-precision floating-point value `a' is less than
5423or equal to the corresponding value `b', and 0 otherwise.  Quiet NaNs do not
5424cause an exception.  Otherwise, the comparison is performed according to the
5425IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5426-------------------------------------------------------------------------------
5427*/
5428flag float128_le_quiet( float128 a, float128 b )
5429{
5430    flag aSign, bSign;
5431
5432    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5433              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5434         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5435              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5436       ) {
5437        if (    float128_is_signaling_nan( a )
5438             || float128_is_signaling_nan( b ) ) {
5439            float_raise( float_flag_invalid );
5440        }
5441        return 0;
5442    }
5443    aSign = extractFloat128Sign( a );
5444    bSign = extractFloat128Sign( b );
5445    if ( aSign != bSign ) {
5446        return
5447               aSign
5448            || (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5449                 == 0 );
5450    }
5451    return
5452          aSign ? le128( b.high, b.low, a.high, a.low )
5453        : le128( a.high, a.low, b.high, b.low );
5454
5455}
5456
5457/*
5458-------------------------------------------------------------------------------
5459Returns 1 if the quadruple-precision floating-point value `a' is less than
5460the corresponding value `b', and 0 otherwise.  Quiet NaNs do not cause an
5461exception.  Otherwise, the comparison is performed according to the IEC/IEEE
5462Standard for Binary Floating-Point Arithmetic.
5463-------------------------------------------------------------------------------
5464*/
5465flag float128_lt_quiet( float128 a, float128 b )
5466{
5467    flag aSign, bSign;
5468
5469    if (    (    ( extractFloat128Exp( a ) == 0x7FFF )
5470              && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) )
5471         || (    ( extractFloat128Exp( b ) == 0x7FFF )
5472              && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) )
5473       ) {
5474        if (    float128_is_signaling_nan( a )
5475             || float128_is_signaling_nan( b ) ) {
5476            float_raise( float_flag_invalid );
5477        }
5478        return 0;
5479    }
5480    aSign = extractFloat128Sign( a );
5481    bSign = extractFloat128Sign( b );
5482    if ( aSign != bSign ) {
5483        return
5484               aSign
5485            && (    ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
5486                 != 0 );
5487    }
5488    return
5489          aSign ? lt128( b.high, b.low, a.high, a.low )
5490        : lt128( a.high, a.low, b.high, b.low );
5491
5492}
5493
5494#endif
5495
5496
5497#if defined(SOFTFLOAT_FOR_GCC) && defined(SOFTFLOAT_NEED_FIXUNS)
5498
5499/*
5500 * These two routines are not part of the original softfloat distribution.
5501 *
5502 * They are based on the corresponding conversions to integer but return
5503 * unsigned numbers instead since these functions are required by GCC.
5504 *
5505 * Added by Mark Brinicombe <mark@NetBSD.org>	27/09/97
5506 *
5507 * float64 version overhauled for SoftFloat 2a [bjh21 2000-07-15]
5508 */
5509
5510/*
5511-------------------------------------------------------------------------------
5512Returns the result of converting the double-precision floating-point value
5513`a' to the 32-bit unsigned integer format.  The conversion is
5514performed according to the IEC/IEEE Standard for Binary Floating-point
5515Arithmetic, except that the conversion is always rounded toward zero.  If
5516`a' is a NaN, the largest positive integer is returned.  If the conversion
5517overflows, the largest integer positive is returned.
5518-------------------------------------------------------------------------------
5519*/
5520uint32 float64_to_uint32_round_to_zero( float64 a )
5521{
5522    flag aSign;
5523    int16 aExp, shiftCount;
5524    bits64 aSig, savedASig;
5525    uint32 z;
5526
5527    aSig = extractFloat64Frac( a );
5528    aExp = extractFloat64Exp( a );
5529    aSign = extractFloat64Sign( a );
5530
5531    if (aSign) {
5532        float_raise( float_flag_invalid );
5533    	return(0);
5534    }
5535
5536    if ( 0x41E < aExp ) {
5537        float_raise( float_flag_invalid );
5538        return 0xffffffff;
5539    }
5540    else if ( aExp < 0x3FF ) {
5541        if ( aExp || aSig ) float_exception_flags |= float_flag_inexact;
5542        return 0;
5543    }
5544    aSig |= LIT64( 0x0010000000000000 );
5545    shiftCount = 0x433 - aExp;
5546    savedASig = aSig;
5547    aSig >>= shiftCount;
5548    z = aSig;
5549    if ( ( aSig<<shiftCount ) != savedASig ) {
5550        float_exception_flags |= float_flag_inexact;
5551    }
5552    return z;
5553
5554}
5555
5556/*
5557-------------------------------------------------------------------------------
5558Returns the result of converting the single-precision floating-point value
5559`a' to the 32-bit unsigned integer format.  The conversion is
5560performed according to the IEC/IEEE Standard for Binary Floating-point
5561Arithmetic, except that the conversion is always rounded toward zero.  If
5562`a' is a NaN, the largest positive integer is returned.  If the conversion
5563overflows, the largest positive integer is returned.
5564-------------------------------------------------------------------------------
5565*/
5566uint32 float32_to_uint32_round_to_zero( float32 a )
5567{
5568    flag aSign;
5569    int16 aExp, shiftCount;
5570    bits32 aSig;
5571    uint32 z;
5572
5573    aSig = extractFloat32Frac( a );
5574    aExp = extractFloat32Exp( a );
5575    aSign = extractFloat32Sign( a );
5576    shiftCount = aExp - 0x9E;
5577
5578    if (aSign) {
5579        float_raise( float_flag_invalid );
5580    	return(0);
5581    }
5582    if ( 0 < shiftCount ) {
5583        float_raise( float_flag_invalid );
5584        return 0xFFFFFFFF;
5585    }
5586    else if ( aExp <= 0x7E ) {
5587        if ( aExp | aSig ) float_exception_flags |= float_flag_inexact;
5588        return 0;
5589    }
5590    aSig = ( aSig | 0x800000 )<<8;
5591    z = aSig>>( - shiftCount );
5592    if ( aSig<<( shiftCount & 31 ) ) {
5593        float_exception_flags |= float_flag_inexact;
5594    }
5595    return z;
5596
5597}
5598
5599#endif
5600